1 图像基础知识
import numpy as np
import matplotlib.pyplot as plt
# 图像数据
#img=np.zeros((200,200,3))
img=np.full((200,200,3),255)
# 可视化
plt.imshow(img)
plt.show()
# 图像读取
img=plt.imread('img.jpg')
plt.imshow(img)
plt.show()
2 CNN概述
- 卷积层conv+relu
- 池化层pool
- 全连接层FC/Linear
3 卷积层
import matplotlib.pyplot as plt
import torch
from torch import nn
# 数据
img=plt.imread('img.jpg')
print(img.shape)
# conv
img=torch.tensor(img).permute(2,0,1).unsqueeze(0).to(torch.float32)
conv=nn.Conv2d(in_channels=3,out_channels=5,kernel_size=(3,5),stride=(1,2),padding=2)
# 处理
fm=conv(img)
print(fm.shape)
4 池化层
- 下采样:样本减少
- 上采样(深采样):样本增多
- 最大池化相交平均池化使用更多
- 通常kernel_size=(3,3),stride=(2,2),padding=(自定义)
import torch
from torch import nn
# 创建数据
torch.random.manual_seed(22)
data=torch.randint(0,10,[1,3,3],dtype=torch.float32)
print(data)
# 最大池化
pool=nn.MaxPool2d(kernel_size=(2,2),stride=(1,1),padding=0)
print(pool(data))
# 平均池化
pool=nn.AvgPool2d(kernel_size=(2,2),stride=(1,1),padding=0)
print(pool(data))
5 图像分类案例(LeNet)
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torchvision.transforms import Compose
import matplotlib.pyplot as plt
from torchsummary import summary
from torch import optim
from torch.utils.data import DataLoader
# 获取数据
train_dataset=CIFAR10(root='cnn_net',train=True,transform=Compose([ToTensor()]),download=True)
test_dataset=CIFAR10(root='cnn_net',train=False,transform=Compose([ToTensor()]),download=True)
print(train_dataset.class_to_idx)
print(train_dataset.data.shape)
print(test_dataset.data.shape)
plt.imshow(test_dataset.data[100])
plt.show()
print(test_dataset.targets[100])
# 模型构建
class ImageClassification(nn.Module):
def __init__(self):
super().__init__()
self.conv1=nn.Conv2d(in_channels=3,out_channels=6,kernel_size=3,stride=1,padding=0)
self.conv2=nn.Conv2d(in_channels=6,out_channels=16,kernel_size=3,stride=1,padding=0)
self.pool1=nn.MaxPool2d(kernel_size=2,stride=2)
self.pool2=nn.MaxPool2d(kernel_size=2,stride=2)
self.fc1=nn.Linear(in_features=576,out_features=120)
self.fc2=nn.Linear(in_features=120,out_features=84)
self.out=nn.Linear(in_features=84,out_features=10)
def forward(self,x):
x=self.pool1(torch.relu(self.conv1(x)))
x=self.pool2(torch.relu(self.conv2(x)))
x=x.reshape(x.size(0),-1)
x=torch.relu(self.fc1(x))
x=torch.relu(self.fc2(x))
out=self.out(x)
return out
model=ImageClassification()
summary(model,(3,32,32),batch_size=1)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [1, 6, 30, 30] 168
MaxPool2d-2 [1, 6, 15, 15] 0
Conv2d-3 [1, 16, 13, 13] 880
MaxPool2d-4 [1, 16, 6, 6] 0
Linear-5 [1, 120] 69,240
Linear-6 [1, 84] 10,164
Linear-7 [1, 10] 850
================================================================
Total params: 81,302
Trainable params: 81,302
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.08
Params size (MB): 0.31
Estimated Total Size (MB): 0.40
----------------------------------------------------------------
# 模型训练
optimizer=optim.Adam(model.parameters(),lr=0.0001,betas=[0.9,0.99])
error=nn.CrossEntropyLoss()
epoches=10
for epoch in range(epoches):
dataloader=DataLoader(train_dataset,batch_size=2,shuffle=True)
loss_sum=0
num=0.1
for x,y in dataloader:
y_=model(x)
loss=error(y_,y)
loss_sum+=loss.item()
num+=1
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(loss_sum/num)
# 模型保存
torch.save(model.state_dict(),'model.pth')
# 模型预测
test_dataloader=DataLoader(test_dataset,batch_size=8,shuffle=False)
model.load_state_dict(torch.load('model.pth',weights_only=False))
corr=0
num=0
for x,y in test_dataloader:
y_=model(x)
out=torch.argmax(y_,dim=-1)
corr+=(out==y).sum()
num+=len(y)
print(corr/num)
优化方向