文章目录
- 卷积神经网络基础
- 卷积层
- 池化层
- 课上代码
- GPU版本代码
卷积神经网络基础
全连接的网络将图片的的本身二维空间结构进行了破坏,而这些空间结构是有用的,因此,要定义新的操作图像的计算节点,因此引入了卷积神经网络,能够尊重图像的空间结构,他的组成为:卷积层、池化层和归一化处理
卷积层
将图像与过滤器进行卷积,“在空间上滑过图像,计算点积
1.过滤器的通道数和输入的通道数相同,输出的通道数和过滤器的数量相同
2. 对于每一次的卷积,可以发现图片的W和H都变小了,为了解决特征图收缩的问题,我们增加了padding,在原始图像的周围添加0(最常用),称作零填充
3. 如果图片的分辨率很大的话,每一将过滤器移动一个像素,那么就需要很多次才能把图像收缩,因此引入了超参数Stride,它是过滤器每次移动的步长
池化层
它实际上是一种形式的降采样。有多种不同形式的非线性池化函数,而其中“最大池化(Max pooling)”是最为常见的。它是将输入的图像划分为若干个矩形区域,对每个子区域输出最大值。直觉上,这种机制能够有效地原因在于,在发现一个特征之后,它的精确位置远不及它和其他特征的相对位置的关系重要。池化层会不断地减小数据的空间大小,因此参数的数量和计算量也会下降,这在一定程度上也控制了过拟合。
常见池化层
-
平均池化(average pooling):计算图像区域的平均值作为该区域池化后的值。
-
最大池化(max pooling):选图像区域的最大值作为该区域池化后的值。
课上代码
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
batch_size = 64
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize ((0.1307,), (0.3081,))
])
train_dataset = datasets.MNIST (root='./dataset/mnist/', train = True, download= True, transform = transform)
train_loader = DataLoader (train_dataset, shuffle = True, batch_size = batch_size)
test_dataset = datasets.MNIST (root='./dataset/mnist/', train = False, download= True, transform = transform)
test_loader = DataLoader (test_dataset, shuffle = False, batch_size = batch_size)
class Net (torch.nn.Module):
def __init__(self):
super (Net, self).__init__()
self.conv1 = torch.nn.Conv2d (1, 10, kernel_size = 5)
self.conv2 = torch.nn.Conv2d (10, 20, kernel_size = 5)
self.pooling = torch.nn.MaxPool2d(2)
self.fc = torch.nn.Linear (320, 10)
def forward (self, x):
batch_size = x.size (0)
x = F.relu (self.pooling (self.conv1(x)))
x = F.relu (self.pooling (self.conv2(x)))
x = x.view (batch_size, -1)
x = self.fc (x)
return x
model = Net ()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD (model.parameters(), lr = 0.01, momentum=0.5)
def train (epoch):
running_loss = 0
for batch_idx, data in enumerate (train_loader, 0):
inputs, target = data
optimizer.zero_grad()
outputs = model (inputs)
loss = criterion (outputs, target)
loss.backward()
optimizer.step ()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/300))
running_loss = 0.0
def test ():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model (images)
_, predicted = torch.max (outputs.data, dim = 1)
total += labels.size (0)
correct += (labels == predicted).sum().item()
print('accuracy on test set: %d %% ' % (100*correct/total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
GPU版本代码
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
# prepare dataset
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='../dataset/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
# design model using class
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
self.pooling = torch.nn.MaxPool2d(2)
self.fc = torch.nn.Linear(320, 10)
def forward(self, x):
# flatten data from (n,1,28,28) to (n, 784)
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size, -1) # -1 此处自动算出的是320
# print("x.shape",x.shape)
x = self.fc(x)
return x
model = Net()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# construct loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# training cycle forward, backward, update
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('accuracy on test set: %d %% ' % (100*correct/total))
return correct/total
if __name__ == '__main__':
epoch_list = []
acc_list = []
for epoch in range(10):
train(epoch)
acc = test()
epoch_list.append(epoch)
acc_list.append(acc)
plt.plot(epoch_list,acc_list)
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()