深度学习实战项目
- P171--CIFAR10数据集图像分类(Image Classification)
- P172--MS COCO数据集物体检测(Object Detection)
- P173-- MNIST手写数字数据集DCGAN生成
- P174--基于EasyOCR的字符识别
- P175--基于Air Quality数据集的变分自编码器(Variational autoEncoder,VAE)
运行系统:macOS Sequoia 15.0
Python编译器:PyCharm 2024.1.4 (Community Edition)
Python版本:3.12
TensorFlow版本:2.17.0
Pytorch版本:2.4.1
往期链接:
1-5 | 6-10 | 11-20 | 21-30 | 31-40 | 41-50 |
---|
51-60:函数 | 61-70:类 | 71-80:编程范式及设计模式 |
---|
81-90:Python编码规范 | 91-100:Python自带常用模块-1 |
---|
101-105:Python自带模块-2 | 106-110:Python自带模块-3 |
---|
111-115:Python常用第三方包-频繁使用 | 116-120:Python常用第三方包-深度学习 |
---|
121-125:Python常用第三方包-爬取数据 | 126-130:Python常用第三方包-为了乐趣 |
---|
131-135:Python常用第三方包-拓展工具1 | 136-140:Python常用第三方包-拓展工具2 |
---|
Python项目实战
141-145 | 146-150 | 151-155 | 156-160 | 161-165 | 166-170 |
---|
P171–CIFAR10数据集图像分类(Image Classification)
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
y_train = y_train.flatten()
y_test = y_test.flatten()
model = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f'\nTest accuracy: {test_acc}')
predictions = model.predict(x_test)
def plot_images(images, labels, predictions=None):
plt.figure(figsize=(10, 10))
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i])
plt.title(f'Label: {labels[i]}' + (f'\nPred: {np.argmax(predictions[i])}' if predictions is not None else ''))
plt.axis("off")
plot_images(x_test, y_test, predictions)
plt.show()
P172–MS COCO数据集物体检测(Object Detection)
下载:预训练模型,类别标签文件
import numpy as np
import cv2
import tensorflow as tf
MODEL_PATH = 'efficientdet_d7_coco17_tpu-32/saved_model/'
LABEL_MAP_PATH = 'efficientdet_d7_coco17_tpu-32/mscoco_label_map.pbtxt'
def load_label_map():
category_index = {}
with open(LABEL_MAP_PATH, 'r') as f:
for line in f:
if "id:" in line:
id = int(line.split(":")[1])
if "display_name:" in line:
name = line.split(":")[1].strip().replace('"', '')
category_index[id] = name
return category_index
def load_model():
return tf.saved_model.load(MODEL_PATH)
@tf.function
def infer(model, image):
return model(image)
def run_inference_for_single_image(model, image):
image = np.asarray(image)
input_tensor = tf.convert_to_tensor(image, dtype=tf.uint8)
input_tensor = input_tensor[tf.newaxis, ...]
output_dict = infer(model, input_tensor)
return {key: value.numpy() for key, value in output_dict.items()}
def visualize_results(image_np, output_dict, category_index):
num_detections = int(output_dict['num_detections'][0])
for i in range(num_detections):
score = float(output_dict['detection_scores'][0][i])
print(f"Score for detection {i}: {score}")
if score >= 0.5:
box = output_dict['detection_boxes'][0][i]
class_id = int(output_dict['detection_classes'][0][i])
(ymin, xmin, ymax, xmax) = box
(left, right, top, bottom) = (xmin * image_np.shape[1], xmax * image_np.shape[1],
ymin * image_np.shape[0], ymax * image_np.shape[0])
cv2.rectangle(image_np, (int(left), int(top)), (int(right), int(bottom)), (0, 255, 0), 2)
label = f"{category_index[class_id]}: {score:.2f}"
cv2.putText(image_np, label, (int(left), int(top) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
if __name__ == '__main__':
model = load_model()
category_index = load_label_map()
image_path = '1727248862719.jpg'
image = cv2.imread(image_path)
original_height, original_width = image.shape[:2]
if image is None:
raise ValueError(f"Image not found or unable to read: {image_path}")
image = cv2.resize(image, (640, 640))
image = image.astype(np.uint8)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
output_dict = run_inference_for_single_image(model, image)
visualize_results(image, output_dict, category_index)
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
image_bgr_resize = cv2.resize(image_bgr, (original_width, original_height))
cv2.imshow('Object Detection', image_bgr_resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
P173-- MNIST手写数字数据集DCGAN生成
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.utils import save_image
import matplotlib.pyplot as plt
batch_size = 128
learning_rate = 0.0002
num_epochs = 50
latent_dim = 100
image_size = 28
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.model = nn.Sequential(
nn.ConvTranspose2d(latent_dim, 256, 7, 1, 0, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.ConvTranspose2d(256, 128, 5, 2, 2, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.ConvTranspose2d(128, 64, 5, 2, 2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.ConvTranspose2d(64, 1, 4, 1, 0, bias=False),
nn.Tanh()
)
def forward(self, z):
return self.model(z)
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(1, 64, 5, 2, 2, bias=False),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, 128, 5, 2, 2, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(128, 256, 5, 2, 2, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(256, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, img):
return self.model(img).view(-1, 1)
generator = Generator()
discriminator = Discriminator()
optimizer_g = optim.Adam(generator.parameters(), lr=learning_rate, betas=(0.5, 0.999))
optimizer_d = optim.Adam(discriminator.parameters(), lr=learning_rate, betas=(0.5, 0.999))
criterion = nn.BCELoss()
for epoch in range(num_epochs):
for i, (imgs, _) in enumerate(dataloader):
real_labels = torch.ones(imgs.size(0), 1)
fake_labels = torch.zeros(imgs.size(0), 1)
optimizer_d.zero_grad()
outputs = discriminator(imgs)
d_loss_real = criterion(outputs, real_labels)
z = torch.randn(imgs.size(0), latent_dim, 1, 1)
fake_images = generator(z)
outputs = discriminator(fake_images.detach())
d_loss_fake = criterion(outputs, fake_labels)
d_loss = d_loss_real + d_loss_fake
d_loss.backward()
optimizer_d.step()
optimizer_g.zero_grad()
outputs = discriminator(fake_images)
g_loss = criterion(outputs, real_labels)
g_loss.backward()
optimizer_g.step()
if (i + 1) % 100 == 0:
print(
f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(dataloader)}], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}')
if (epoch + 1) % 10 == 0:
fake_images = (fake_images + 1) / 2
save_image(fake_images.data, f'fake_images-{epoch + 1}.png', nrow=8, normalize=True)
fake_images = fake_images.view(-1, 1, image_size, image_size)
grid = save_image(fake_images, nrow=8, normalize=True)
plt.imshow(grid.permute(1, 2, 0).detach().numpy(), cmap='gray')
plt.axis('off')
plt.show()
模型生成的数据
P174–基于EasyOCR的字符识别
import cv2
import easyocr
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
reader = easyocr.Reader(['ch_sim', 'en'])
image_path = 'test.jpg'
image = cv2.imread(image_path)
results = reader.readtext(image)
for (bbox, text, prob) in results:
print(f'识别文本: {text}, 置信度: {prob:.2f}')
for (bbox, text, prob) in results:
cv2.rectangle(image, (int(bbox[0][0]), int(bbox[0][1])),
(int(bbox[2][0]), int(bbox[2][1])), (0, 255, 0), 2)
plt.figure(figsize=(10, 10))
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')
for (bbox, text, prob) in results:
plt.text(int(bbox[0][0]), int(bbox[0][1]), text, fontsize=12, color='green')
plt.show()
P175–基于Air Quality数据集的变分自编码器(Variational autoEncoder,VAE)
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import zipfile
import requests
import io
learning_rate = 0.001
num_epochs = 50
latent_dim = 10
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00360/AirQualityUCI.zip"
response = requests.get(url)
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
with zip_file.open('AirQualityUCI.csv') as my_file:
data = pd.read_csv(my_file, sep=';', decimal=',', usecols=range(15), skiprows=0)
need_column = ['CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)',
'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)',
'PT08.S5(O3)', 'T', 'RH']
data = data[need_column]
data.fillna(0, inplace=True)
data = data.values
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
data_tensor = torch.FloatTensor(data)
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(data_tensor.size(1), 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, latent_dim * 2)
)
self.decoder = nn.Sequential(
nn.Linear(latent_dim, 64),
nn.ReLU(),
nn.Linear(64, 128),
nn.ReLU(),
nn.Linear(128, data_tensor.size(1)),
nn.Sigmoid()
)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return mu + eps * std
def forward(self, x):
mu_logvar = self.encoder(x).view(-1, 2, latent_dim)
mu, logvar = mu_logvar[:, 0, :], mu_logvar[:, 1, :]
z = self.reparameterize(mu, logvar)
return self.decoder(z), mu, logvar
model = VAE()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
def loss_function(recon_x, x, mu, logvar):
BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + KLD
for epoch in range(num_epochs):
model.train()
train_loss = 0
optimizer.zero_grad()
recon_batch, mu, logvar = model(data_tensor)
loss = loss_function(recon_batch, data_tensor, mu, logvar)
loss.backward()
train_loss += loss.item()
optimizer.step()
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {train_loss / len(data_tensor):.4f}')
model.eval()
with torch.no_grad():
z = torch.randn(64, latent_dim)
generated_data = model.decoder(z)
real_data_samples = data_tensor[:64].detach().numpy()
generated_data_samples = generated_data.numpy()
plt.figure(figsize=(15, 8))
num_features = min(real_data_samples.shape[1], 12)
for i in range(num_features):
plt.subplot(4, 3, i + 1)
plt.plot(real_data_samples[i], label='Real Data', alpha=0.7)
plt.plot(generated_data_samples[i], label='Generated Data', alpha=0.7)
plt.title(f'Feature {i + 1}:{need_column[i]}')
plt.xlabel('Samples')
plt.ylabel('Values')
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()