一、准备数据集
一百零二类花数据集下载
flower_data包括train和valid文件,分别存放102个文件,对应102种类别的花
cat_to_name.json为类别和花品种键值对
将压缩包进行解压,跟项目放到同级路径下
二、导包
若遇到报错,不存在xxx包,直接进入相关环境,pip install xxx
就行
import os
import imageio
import time
import warnings
import random
import sys
import copy
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.optim as optim
import torchvision
from PIL import Image
from torchvision import transforms, models, datasets
%matplotlib inline
%matplotlib inline
可将绘图内嵌到notebook中,可以省略掉plt.show()
三、读取数据集
train_dir:训练集路径
valid_dir:验证集路径
data_dir = './flower_data/'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
四、transforms模块——数据预处理
Data Augmentation数据增强
依次对数据集进行随机旋转、中心裁剪、水平翻转、垂直翻转、改变亮度、对比度、饱和度、色相、改变灰度率、标准化等操作用以扩充数据集
对train和valid文件夹下的数据进行了不同的预处理方法
data_transforms = {
'train': transforms.Compose([transforms.RandomRotation(45),#随机旋转,-45到45度之间随机选
transforms.CenterCrop(224),#从中心开始裁剪
transforms.RandomHorizontalFlip(p=0.5),#随机水平翻转 选择一个概率概率
transforms.RandomVerticalFlip(p=0.5),#随机垂直翻转
transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),#参数1为亮度,参数2为对比度,参数3为饱和度,参数4为色相
transforms.RandomGrayscale(p=0.025),#概率转换成灰度率,3通道就是R=G=B
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])#均值,标准差
]),
'valid': transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),#从中心开始裁剪
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
五、ImageFolder模块——制作Batch数据集
官网API:torchvision.datasets.ImageFolder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}
读取数据集结构
x
为文件夹,包括train和valid
data_transforms[x]
数据集预处理中数据增强分别对train和valid进行了不同的预处理
batch_size = 8
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
image_datasets
"""
{'train': Dataset ImageFolder
Number of datapoints: 6552
Root location: ./flower_data/train
StandardTransform
Transform: Compose(
RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)
CenterCrop(size=(224, 224))
RandomHorizontalFlip(p=0.5)
RandomVerticalFlip(p=0.5)
ColorJitter(brightness=(0.8, 1.2), contrast=(0.9, 1.1), saturation=(0.9, 1.1), hue=(-0.1, 0.1))
RandomGrayscale(p=0.025)
ToTensor()
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
),
'valid': Dataset ImageFolder
Number of datapoints: 818
Root location: ./flower_data/valid
StandardTransform
Transform: Compose(
Resize(size=256, interpolation=bilinear, max_size=None, antialias=warn)
CenterCrop(size=(224, 224))
ToTensor()
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)}
"""
dataloaders
"""
{'train': <torch.utils.data.dataloader.DataLoader at 0x1924ed088e0>,
'valid': <torch.utils.data.dataloader.DataLoader at 0x19264e750d0>}
"""
dataset_sizes
"""
{'train': 6552, 'valid': 818}
"""
六、读取cat_to_name.json文件
cat_to_name.json文件表示每个类型的花所对应的标签
模型预测的时候,实际上先得到各个类别的概率值,然后选取值最大的,找到对应的类别,此时得到的只是类别的编号,通过该json文件就可以将编号对应的实际类别的花名给取出来
with open('cat_to_name.json', 'r') as f:
cat_to_name = json.load(f)
cat_to_name
"""
{'21': 'fire lily',
'3': 'canterbury bells',
'45': 'bolero deep blue',
'1': 'pink primrose',
'34': 'mexican aster',
'27': 'prince of wales feathers',
'7': 'moon orchid',
......
'77': 'passion flower',
'51': 'petunia'}
"""
七、展示数据集
torch中数据是[C,H,W],而PIL中数据是[H,W,C],故通过image.transpose(1,2,0)
转换一下格式
之前数据预处理是对其进行了减均值 除以标准差,(x-均值)/标准差 = y
还原回去需要乘标准差 加均值,(y * 标准差) + 均值 = x
,即image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
def im_convert(tensor):
""" 展示数据"""
image = tensor.to("cpu").clone().detach()
image = image.numpy().squeeze()
image = image.transpose(1,2,0)
image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
image = image.clip(0, 1)
return image
fig=plt.figure(figsize=(20, 12))
columns = 4
rows = 2
dataiter = iter(dataloaders['valid'])
inputs, classes = next(dataiter)
for idx in range (columns*rows):
ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])
ax.set_title(cat_to_name[str(int(class_names[classes[idx]]))])
plt.imshow(im_convert(inputs[idx]))
plt.show()
八、迁移学习
数据集少,会导致模型过拟合;迁移学习可以使用别人已经训练好的模型参数,一般而言,若自己的数据集不多,训练的时候尽量将别人的网络模型权重参数冻结,只训练全连接层FC,自己用到的层;权重参数不更新,意味着提取特征的能力不会发生变化,别人的网络好,就使用别人的权重参数就行。
官网API:Models and pre-trained weights
这里以resnet152网络模型为例,迁移学习的时候并不是全部使用别人的权重参数
人家这个任务是1000分类[2048,1000]
,故(fc): Linear(in_features=2048, out_features=1000, bias=True)
本项目是102类别的分类任务,故需要修改全连接层,改为[2048,102]
虽然拿到了resnet152网络模型,但是任务需求不一样,需要根据自己的任务实际情况进行修改,这里只有一个全连接层,故只需修改这个全连接层即可
model_name = 'resnet' #可选的比较多 ['resnet', 'alexnet', 'vgg', 'squeezenet', 'densenet', 'inception']
#是否用人家训练好的特征来做
feature_extract = True
# 是否用GPU训练
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
print('CUDA is not available. Training on CPU ...')
else:
print('CUDA is available! Training on GPU ...')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
有选择性的对某些网络层进行冻结,其对应的权重参数也不会更新
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
model_ft = models.resnet152()
model_ft #看一下resnet152网络模型架构
"""
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
......
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=2048, out_features=1000, bias=True)
)
"""
models.resnet152(pretrained=use_pretrained)
表示若是否下载预训练权重文件,若use_pretrained=True
则会进行联网下载,默认下载的路径为:C:\Users\MyPC\.cache\torch\hub\checkpoints
,这里的MyPC为我的电脑名称
num_ftrs = model_ft.fc.in_features
拿到全连接层
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 102), nn.LogSoftmax(dim=1))
对全连接层进行修改,之前的是1000分类,现在改为102分类
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
# 选择合适的模型,不同模型的初始化方法稍微有点区别
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet152
"""
model_ft = models.resnet152(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 102),
nn.LogSoftmax(dim=1))
input_size = 224
elif model_name == "alexnet":
""" Alexnet
"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "vgg":
""" VGG11_bn
"""
model_ft = models.vgg16(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "squeezenet":
""" Squeezenet
"""
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
model_ft.num_classes = num_classes
input_size = 224
elif model_name == "densenet":
""" Densenet
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
Be careful, expects (299,299) sized images and has auxiliary output
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,num_classes)
input_size = 299
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
总结:迁移学习步骤
①拿到网络模型,并且指定use_pretrained= True
相当于是否使用别人训练好的模型权重参数
②自己根据实际情况选择是否要把某些层进行冻结,也就是梯度更新的时候不需要求梯度而已,即param.requires_grad = False
③把网络最后的全连接层修改成跟自己任务一致
先训练自己的,保持别人的网络权重参数不变 然后在之前训练的基础上,继续训练整体的网络模型,对整体网络模型权重参数进行微调,这样的效果最好
九、网络模型参数设置
指定哪些层需要进行训练
model_name = 'resnet'
选择resnet152网络模型
最终的输出结果为102类别
feature_extract
要不要冻结某些层
use_pretrained
要不要使用别人已经训练好的模型权重参数
filename='checkpoint.pth'
将训练好的权重参数以checkpoint.pth命名,保存到当前路径下
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained=True)
#GPU计算
model_ft = model_ft.to(device)
# 模型保存
filename='checkpoint.pth'
# 是否训练所有层
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
params_to_update = []
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
params_to_update.append(param)
print("\t",name)
else:
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
print("\t",name)
"""
Params to learn:
fc.0.weight
fc.0.bias
"""
可以看到Linear(in_features=2048, out_features=102, bias=True)
全连接层已经进行了修改,按照自己的任务实际情况102分类
model_ft
"""
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
......
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Sequential(
(0): Linear(in_features=2048, out_features=102, bias=True)
(1): LogSoftmax(dim=1)
)
)
"""
十、优化器参数设置
一般Adam优化器效果好,使用的比较多
optim.Adam(params_to_update, lr=1e-2)
初始设置的学习率并不是太小,lr=0.01
后续使用学习率衰减策略使得学习率慢慢减小,optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
# 优化器设置
optimizer_ft = optim.Adam(params_to_update, lr=1e-2)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)#学习率每7个epoch衰减成原来的1/10
#最后一层已经LogSoftmax()了,所以不能nn.CrossEntropyLoss()来计算了,nn.CrossEntropyLoss()相当于logSoftmax()和nn.NLLLoss()整合
criterion = nn.NLLLoss()
十一、训练模块参数设置
is_inception
是否使用额外的其他网络,这里使用resnet152就够了
每个epoch都保存准确率最高的那个模型,方便后续继续训练使用
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False,filename=filename):
since = time.time()
best_acc = 0 #保存一个最好的准确率
"""
checkpoint = torch.load(filename)
best_acc = checkpoint['best_acc']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
model.class_to_idx = checkpoint['mapping']
"""
model.to(device)
val_acc_history = []
train_acc_history = []
train_losses = []
valid_losses = []
LRs = [optimizer.param_groups[0]['lr']]
best_model_wts = copy.deepcopy(model.state_dict())
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# 训练和验证
for phase in ['train', 'valid']:
if phase == 'train':
model.train() # 训练
else:
model.eval() # 验证
running_loss = 0.0
running_corrects = 0
# 把数据都取个遍
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# 清零
optimizer.zero_grad()
# 只有训练的时候计算和更新梯度
with torch.set_grad_enabled(phase == 'train'):
if is_inception and phase == 'train':
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 + 0.4*loss2
else:#resnet执行的是这里
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs, 1)
# 训练阶段更新权重
if phase == 'train':
loss.backward()
optimizer.step()
# 计算损失
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
time_elapsed = time.time() - since
print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# 得到最好那次的模型
if phase == 'valid' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
state = {
'state_dict': model.state_dict(),
'best_acc': best_acc,
'optimizer' : optimizer.state_dict(),
}
torch.save(state, filename)
if phase == 'valid':
val_acc_history.append(epoch_acc)
valid_losses.append(epoch_loss)
scheduler.step(epoch_loss)
if phase == 'train':
train_acc_history.append(epoch_acc)
train_losses.append(epoch_loss)
print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))
LRs.append(optimizer.param_groups[0]['lr'])
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# 训练完后用最好的一次当做模型最终的结果
model.load_state_dict(best_model_wts)
return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs
十二、模型训练
①先将别人的网络权重参数冻结,只训练全连接层参数
这里的训练相当于只训练最后的全连接层,别人的网络权重参数都已经冻结,相对而言训练的速度较快些
若电脑配置较低,num_epochs=20
训练次数可以改小一些
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=20, is_inception=(model_name=="inception"))
②再之前的网络模型基础上继续训练全部层
将网络模型的全部权重参数进行训练,故学习率设置的要小一些,仅仅微调而已,此时的训练全部层只是微调而已
for param in model_ft.parameters():
param.requires_grad = True
# 再继续训练所有的参数,学习率调小一点
optimizer = optim.Adam(params_to_update, lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
# 损失函数
criterion = nn.NLLLoss()
此次的训练应该再之前的基础上接着进行训练,故加载之前准确率最好的模型,在此基础上进行训练微调
# Load the checkpoint
checkpoint = torch.load(filename)
best_acc = checkpoint['best_acc']
model_ft.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
#model_ft.class_to_idx = checkpoint['mapping']
模型训练,此时会比较耗时,因为是整个模型的权重参数的微调
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer, num_epochs=10, is_inception=(model_name=="inception"))
训练完成之后会保存权重参数
十三、加载已训练好的模型
将已经训练好的模型加载进来
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained=True)
# GPU模式
#model_ft = model_ft.to(device)
# 保存文件的名字
filename='checkpoint.pth'
# 加载模型
checkpoint = torch.load(filename)
best_acc = checkpoint['best_acc']
model_ft.load_state_dict(checkpoint['state_dict'])
十四、模型测试
Ⅰ,测试数据集预处理
要想对模型进行测试,测试的数据要跟训练时的数据集一致
对测试数据集进行归一化操作np.array(img)/255
,将0-255像素值压缩到0-1之间
测试集使用的操作和训练集上进行均值、标准差一致
torch中类型为[C,H,W]
,需要通过img.transpose((2, 0, 1))
转换为[H,W,C]
def process_image(image_path):
# 读取测试数据
img = Image.open(image_path)
# Resize,thumbnail方法只能进行缩小,所以进行了判断
if img.size[0] > img.size[1]:
img.thumbnail((10000, 256))
else:
img.thumbnail((256, 10000))
# Crop操作
left_margin = (img.width-224)/2
bottom_margin = (img.height-224)/2
right_margin = left_margin + 224
top_margin = bottom_margin + 224
img = img.crop((left_margin, bottom_margin, right_margin,
top_margin))
# 相同的预处理方法
img = np.array(img)/255
mean = np.array([0.485, 0.456, 0.406]) #provided mean
std = np.array([0.229, 0.224, 0.225]) #provided std
img = (img - mean)/std
# 注意颜色通道应该放在第一个位置
img = img.transpose((2, 0, 1))
return img
Ⅱ,展示测试图像
def imshow(image, ax=None, title=None):
"""展示数据"""
if ax is None:
fig, ax = plt.subplots()
# 颜色通道还原
image = np.array(image).transpose((1, 2, 0))
# 预处理还原
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
image = std * image + mean
image = np.clip(image, 0, 1)
ax.imshow(image)
ax.set_title(title)
return ax
我这里从验证集里面随便选了一张
image_path = 'test.jpg'
img = process_image(image_path)
imshow(img)
img.shape # (3, 224, 224)
Ⅲ,得到一个batch的测试数据,投喂给已经训练好的网络中去
这里从数据集中的valid文件夹中取
torch.Size([8, 102])
表示output中一个batch有8条数据,每条数据都有102个结果,每个结果对应属于不同类别的概率值
# 得到一个batch的测试数据
dataiter = iter(dataloaders['valid'])
images, labels = next(dataiter)
model_ft.eval()
if train_on_gpu:
output = model_ft(images.cuda())
else:
output = model_ft(images)
output.shape # torch.Size([8, 102])
Ⅳ,得到一个batch中的8条数据概率值最大的那个id
_, preds_tensor = torch.max(output, 1)
preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy())
preds
"""
array([71, 89, 73, 54, 83, 12, 59, 78], dtype=int64)
"""
Ⅴ,将预测的结果进行展示
上一步得到的只是花的id,通过cat_to_name[str(preds[idx])]
根据之前的cat_to_name.json
文件转换为对应的花的类型的具体名称
预测对了,绿色标签进行显示
红色标签表示预测错误
标签形式:模型预测花的类型(花的实际类型)
fig=plt.figure(figsize=(20, 20))
columns =4
rows = 2
for idx in range (columns*rows):
ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])
plt.imshow(im_convert(images[idx]))
ax.set_title("{} ({})".format(cat_to_name[str(preds[idx])], cat_to_name[str(labels[idx].item())]),
color=("green" if cat_to_name[str(preds[idx])]==cat_to_name[str(labels[idx].item())] else "red"))
plt.show()