基于MobileNetV2的垃圾分类
本文档详细介绍了使用MobileNetV2模型进行垃圾分类的全过程,包括数据准备、模型搭建、模型训练、评估和推理等步骤。MobileNetV2是一种轻量级卷积神经网络,专为移动端和嵌入式设备设计,具有高效、低耗的特点。通过将该模型应用于垃圾分类任务,我们可以自动识别和分类不同类型的垃圾,提高垃圾处理的效率。
本文档介绍了使用MobileNetV2模型进行垃圾分类的代码开发过程。我们将通过读取本地图像数据作为输入,对图像中的垃圾物体进行检测,并将检测结果图片保存到文件中。
1. 实验目的
- 熟悉垃圾分类应用代码的编写(Python语言)。
- 了解Linux操作系统的基本使用。
- 掌握
atc
命令进行模型转换的基本操作。
2. MobileNetV2模型原理介绍
MobileNetV2是Google团队于2018年提出的一种轻量级卷积神经网络,专注于移动端、嵌入式或IoT设备。相比传统的卷积神经网络,MobileNetV2使用深度可分离卷积(Depthwise Separable Convolution),在准确率小幅度降低的前提下,大大减小了模型参数与运算量。
MobileNetV2通过引入倒残差结构(Inverted Residual Block)和线性瓶颈(Linear Bottlenecks)来设计网络,以提高模型的准确率,同时优化后的模型更小。
3. 实验环境
本案例支持Win_x86和Linux系统,CPU/GPU/Ascend均可运行。
4. 数据处理
4.1 数据准备
MobileNetV2的代码默认使用ImageFolder格式管理数据集。每类图片整理成单独的一个文件夹,数据集结构如下:
└─ImageFolder
├─train
│ ├─class1Folder
│ └─......
└─eval
├─class1Folder
└─......
4.2 数据加载
import math
import numpy as np
import os
import random
from matplotlib import pyplot as plt
from easydict import EasyDict
from PIL import Image
import mindspore.nn as nn
import mindspore.dataset as de
import mindspore.dataset.vision as C
import mindspore.dataset.transforms as C2
import mindspore as ms
from mindspore import set_context, Tensor
from mindspore.train import Model
from mindspore.train import Callback, LossMonitor, ModelCheckpoint, CheckpointConfig
os.environ['GLOG_v'] = '3'
os.environ['GLOG_logtostderr'] = '0'
os.environ['GLOG_log_dir'] = '../../log'
os.environ['GLOG_stderrthreshold'] = '2'
set_context(mode=ms.GRAPH_MODE, device_target="CPU", device_id=0)
# 数据集标签和字典
garbage_classes = {
'干垃圾': ['贝壳', '打火机', '旧镜子', '扫把', '陶瓷碗', '牙刷', '一次性筷子', '脏污衣服'],
'可回收物': ['报纸', '玻璃制品', '篮球', '塑料瓶', '硬纸板', '玻璃瓶', '金属制品', '帽子', '易拉罐', '纸张'],
'湿垃圾': ['菜叶', '橙皮', '蛋壳', '香蕉皮'],
'有害垃圾': ['电池', '药片胶囊', '荧光灯', '油漆桶']
}
class_cn = ['贝壳', '打火机', '旧镜子', '扫把', '陶瓷碗', '牙刷', '一次性筷子', '脏污衣服',
'报纸', '玻璃制品', '篮球', '塑料瓶', '硬纸板', '玻璃瓶', '金属制品', '帽子', '易拉罐', '纸张',
'菜叶', '橙皮', '蛋壳', '香蕉皮',
'电池', '药片胶囊', '荧光灯', '油漆桶']
class_en = ['Seashell', 'Lighter','Old Mirror', 'Broom','Ceramic Bowl', 'Toothbrush','Disposable Chopsticks','Dirty Cloth',
'Newspaper', 'Glassware', 'Basketball', 'Plastic Bottle', 'Cardboard','Glass Bottle', 'Metalware', 'Hats', 'Cans', 'Paper',
'Vegetable Leaf','Orange Peel', 'Eggshell','Banana Peel',
'Battery', 'Tablet capsules','Fluorescent lamp', 'Paint bucket']
index_en = {'Seashell': 0, 'Lighter': 1, 'Old Mirror': 2, 'Broom': 3, 'Ceramic Bowl': 4, 'Toothbrush': 5, 'Disposable Chopsticks': 6, 'Dirty Cloth': 7,
'Newspaper': 8, 'Glassware': 9, 'Basketball': 10, 'Plastic Bottle': 11, 'Cardboard': 12, 'Glass Bottle': 13, 'Metalware': 14, 'Hats': 15, 'Cans': 16, 'Paper': 17,
'Vegetable Leaf': 18, 'Orange Peel': 19, 'Eggshell': 20, 'Banana Peel': 21,
'Battery': 22, 'Tablet capsules': 23, 'Fluorescent lamp': 24, 'Paint bucket': 25}
# 训练超参
config = EasyDict({
"num_classes": 26,
"image_height": 224,
"image_width": 224,
"backbone_out_channels":1280,
"batch_size": 16,
"eval_batch_size": 8,
"epochs": 10,
"lr_max": 0.05,
"momentum": 0.9,
"weight_decay": 1e-4,
"save_ckpt_epochs": 1,
"dataset_path": "./data_en",
"class_index": index_en,
"pretrained_ckpt": "./mobilenetV2-200_1067.ckpt"
})
def create_dataset(dataset_path, config, training=True, buffer_size=1000):
"""
创建训练或评估数据集
Args:
dataset_path (string): 数据集路径。
config (struct): 训练和评估配置。
Returns:
ds (dataset): 返回训练或评估数据集。
"""
data_path = os.path.join(dataset_path, 'train' if training else 'test')
ds = de.ImageFolderDataset(data_path, num_parallel_workers=4, class_indexing=config.class_index)
resize_height = config.image_height
resize_width = config.image_width
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
change_swap_op = C.HWC2CHW()
type_cast_op = C2.TypeCast(mstype.int32)
if training:
crop_decode_resize = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)
color_adjust = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
train_trans = [crop_decode_resize, horizontal_flip_op, color_adjust, normalize_op, change_swap_op]
train_ds = ds.map(input_columns="image", operations=train_trans, num_parallel_workers=4)
train_ds = train_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=4)
train_ds = train_ds.shuffle(buffer_size=buffer_size)
ds = train_ds.batch(config.batch_size, drop_remainder=True)
else:
decode_op = C.Decode()
resize_op = C.Resize((int(resize_width/0.875), int(resize_width/0.875)))
center_crop = C.CenterCrop(resize_width)
eval_trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op]
eval_ds = ds.map(input_columns="image", operations=eval_trans, num_parallel_workers=4)
eval_ds = eval_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=4)
ds = eval_ds.batch(config.eval_batch_size, drop_remainder=True)
return ds
# 展示部分处理后的数据
ds = create_dataset(dataset_path=config.dataset_path, config=config, training=False)
print(ds.get_dataset_size())
data = ds.create_dict_iterator(output_numpy=True)._get_next()
images = data['image']
labels = data['label']
for i in range(1, 5):
plt.subplot(2, 2, i)
plt.imshow(np.transpose(images[i], (1,2,0)))
plt.title('label: %s' % class_en[labels[i]])
plt.xticks([])
plt.show()
5. MobileNetV2模型搭建
使用MindSpore定义MobileNetV2网络的各模块时需要继承mindspore.nn.Cell
。Cell是所有神经网络(如Conv2d
等)的基类。以下是MobileNetV2模型的定义:
__all__ = ['
mobilenet_v2']
def conv_bn(inp, oup, stride):
return nn.SequentialCell([
nn.Conv2d(inp, oup, 3, stride, pad_mode='pad', padding=1, has_bias=False),
nn.BatchNorm2d(oup),
nn.ReLU6()
])
def conv_1x1_bn(inp, oup):
return nn.SequentialCell([
nn.Conv2d(inp, oup, 1, 1, pad_mode='pad', has_bias=False),
nn.BatchNorm2d(oup),
nn.ReLU6()
])
class InvertedResidual(nn.Cell):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
layers.append(conv_1x1_bn(inp, hidden_dim))
layers.extend([
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, pad_mode='pad', padding=1, group=hidden_dim, has_bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(),
nn.Conv2d(hidden_dim, oup, 1, 1, pad_mode='pad', has_bias=False),
nn.BatchNorm2d(oup)
])
self.conv = nn.SequentialCell(layers)
def construct(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Cell):
def __init__(self, num_classes=1000, width_mult=1.):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
interverted_residual_setting = [
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
assert len(interverted_residual_setting[0]) == 4
input_channel = int(input_channel * width_mult)
self.last_channel = int(last_channel * max(1.0, width_mult))
self.features = [conv_bn(3, input_channel, 2)]
for t, c, n, s in interverted_residual_setting:
output_channel = int(c * width_mult)
for i in range(n):
if i == 0:
self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
else:
self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
input_channel = output_channel
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
self.features.append(nn.AvgPool2d(7))
self.features = nn.SequentialCell(self.features)
self.classifier = nn.SequentialCell([
nn.Dropout(0.2),
nn.Dense(self.last_channel, num_classes),
])
self._initialize_weights()
def construct(self, x):
x = self.features(x)
x = x.view(x.shape[0], -1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for _, m in self.cells_and_names():
if isinstance(m, nn.Conv2d):
ms.common.initializer.XavierUniform(m.weight)
elif isinstance(m, nn.BatchNorm2d):
m.gamma.set_data(ms.common.initializer.One())
m.beta.set_data(ms.common.initializer.Zero())
elif isinstance(m, nn.Dense):
m.weight.set_data(ms.common.initializer.Normal(0.01))
if m.bias is not None:
m.bias.set_data(ms.common.initializer.Zero())
def mobilenet_v2(pretrained=False, **kwargs):
model = MobileNetV2(**kwargs)
return model
# 创建MobileNetV2模型
network = mobilenet_v2(num_classes=config.num_classes)
# 加载预训练模型
param_dict = ms.load_checkpoint(config.pretrained_ckpt)
ms.load_param_into_net(network, param_dict)
print("load pretrained mobilenet_v2 from [{}]".format(config.pretrained_ckpt))
6. 模型训练
模型训练阶段定义如下:
def init_lr(step_size):
lr_max = config.lr_max
total_steps = config.epochs * step_size
warmup_steps = int(0.1 * total_steps)
lr_each_step = []
for i in range(total_steps):
if i < warmup_steps:
lr = lr_max * (i + 1) / warmup_steps
else:
lr = lr_max * (0.5 + 0.5 * math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps)))
lr_each_step.append(lr)
return np.array(lr_each_step).astype(np.float32)
# 创建训练集
train_dataset = create_dataset(dataset_path=config.dataset_path, config=config, training=True)
# 优化器
lr = init_lr(train_dataset.get_dataset_size())
opt = nn.Momentum(network.trainable_params(), lr, config.momentum, config.weight_decay, use_nesterov=True)
# 损失函数
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# 定义模型
model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'})
# 模型保存配置
config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10)
ckpoint_cb = ModelCheckpoint(prefix="checkpoint_mobilenetV2", directory="./", config=config_ck)
# 损失监控
loss_cb = LossMonitor()
# 训练模型
print("============== Starting Training ==============")
model.train(config.epochs, train_dataset, callbacks=[ckpoint_cb, loss_cb], dataset_sink_mode=True)
7. 模型评估
模型评估阶段代码如下:
# 创建评估集
eval_dataset = create_dataset(dataset_path=config.dataset_path, config=config, training=False)
# 评估模型
acc = model.eval(eval_dataset)
print("============== Acc: {} ==============".format(acc))
8. 模型推理
对于单张图片的推理,可以使用以下代码:
from PIL import Image
def read_img(img_path):
image = Image.open(img_path).convert('RGB')
transform = de.transforms.Compose([
C.Resize((224, 224)),
C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]),
C.HWC2CHW()
])
img = transform(image)
img = np.expand_dims(img, axis=0)
return img
def infer(img_path):
img = read_img(img_path)
img_tensor = Tensor(img)
output = model.predict(img_tensor)
pred = np.argmax(output.asnumpy(), axis=1)
return class_en[pred[0]]
# 读取图像进行推理
img_path = "./data_en/test/Seashell/001.jpg"
pred_label = infer(img_path)
print("Predicted label: ", pred_label)
此代码将在指定的图像文件上执行推理,并输出预测的标签。
通过本次实验,我收获了以下几点:
数据预处理的重要性:
数据预处理是模型训练的关键一步。通过数据增强(如随机裁剪、水平翻转和颜色调整),我们能够提升模型的泛化能力,减少过拟合的风险。
模型设计与优化:
MobileNetV2的倒残差结构(Inverted Residual Block)和线性瓶颈(Linear Bottlenecks)在保持模型准确率的同时,显著减少了参数量和计算量,展示了优秀的模型设计理念。
训练策略与技巧:
在训练过程中,学习率的设定和调整(如学习率预热和余弦退火策略)对模型的收敛速度和最终性能有很大影响。此外,使用Momentum优化器结合Nesterov动量,可以加速训练过程并提高模型准确率。
模型评估与推理:
通过对模型进行评估,我们可以了解其在测试集上的表现,及时调整训练策略。对于单张图片的推理,通过预处理步骤和模型预测,我们能够准确输出垃圾的类别。