【PyTorch】基于YOLO的多目标检测项目(一)
【PyTorch】基于YOLO的多目标检测项目(二)
YOLO-v3网络由跨距为2的卷积层、跳跃连接层和上采样层组成,没有池化层。网络接收一幅416 * 416的图像作为输入,并提供三个YOLO输出。
目录
准备配置文件
搭建YOLO模型
搭建PyTorch模块
搭建DarkNet模型
定义损失函数
训练模型
部署模型
准备配置文件
新建一个py文件导入以下代码,命名为myutils.py作为配置文件,辅助构建模型。
import torch
from torch import nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def parse_model_config(path2file):
cfg_file = open(path2file, 'r')
lines = cfg_file.read().split('\n')
lines = [x for x in lines if x and not x.startswith('#')]
lines = [x.rstrip().lstrip() for x in lines]
blocks_list = []
for line in lines:
if line.startswith('['):
blocks_list.append({})
blocks_list[-1]['type'] = line[1:-1].rstrip()
else:
key, value = line.split("=")
value = value.strip()
blocks_list[-1][key.rstrip()] = value.strip()
return blocks_list
def create_layers(blocks_list):
hyperparams = blocks_list[0]
channels_list = [int(hyperparams["channels"])]
module_list = nn.ModuleList()
for layer_ind, layer_dict in enumerate(blocks_list[1:]):
modules = nn.Sequential()
if layer_dict["type"] == "convolutional":
filters = int(layer_dict["filters"])
kernel_size = int(layer_dict["size"])
pad = (kernel_size - 1) // 2
bn=layer_dict.get("batch_normalize",0)
conv2d= nn.Conv2d(
in_channels=channels_list[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(layer_dict["stride"]),
padding=pad,
bias=not bn)
modules.add_module("conv_{0}".format(layer_ind), conv2d)
if bn:
bn_layer = nn.BatchNorm2d(filters,momentum=0.9, eps=1e-5)
modules.add_module("batch_norm_{0}".format(layer_ind), bn_layer)
if layer_dict["activation"] == "leaky":
activn = nn.LeakyReLU(0.1)
modules.add_module("leaky_{0}".format(layer_ind), activn)
elif layer_dict["type"] == "upsample":
stride = int(layer_dict["stride"])
upsample = nn.Upsample(scale_factor = stride)
modules.add_module("upsample_{}".format(layer_ind), upsample)
elif layer_dict["type"] == "shortcut":
backwards=int(layer_dict["from"])
filters = channels_list[1:][backwards]
modules.add_module("shortcut_{}".format(layer_ind), EmptyLayer())
elif layer_dict["type"] == "route":
layers = [int(x) for x in layer_dict["layers"].split(",")]
filters = sum([channels_list[1:][l] for l in layers])
modules.add_module("route_{}".format(layer_ind), EmptyLayer())
elif layer_dict["type"] == "yolo":
anchors = [int(a) for a in layer_dict["anchors"].split(",")]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
mask = [int(m) for m in layer_dict["mask"].split(",")]
anchors = [anchors[i] for i in mask]
num_classes = int(layer_dict["classes"])
img_size = int(hyperparams["height"])
yolo_layer = YOLOLayer(anchors, num_classes, img_size)
modules.add_module("yolo_{}".format(layer_ind), yolo_layer)
module_list.append(modules)
channels_list.append(filters)
return hyperparams, module_list
class EmptyLayer(nn.Module):
def __init__(self):
super(EmptyLayer, self).__init__()
class YOLOLayer(nn.Module):
def __init__(self, anchors, num_classes, img_dim=416):
super(YOLOLayer, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.img_dim = img_dim
self.grid_size = 0
def forward(self, x_in):
batch_size = x_in.size(0)
grid_size = x_in.size(2)
devide=x_in.device
prediction=x_in.view(batch_size, self.num_anchors,
self.num_classes + 5, grid_size, grid_size)
prediction=prediction.permute(0, 1, 3, 4, 2)
prediction=prediction.contiguous()
obj_score = torch.sigmoid(prediction[..., 4])
pred_cls = torch.sigmoid(prediction[..., 5:])
if grid_size != self.grid_size:
self.compute_grid_offsets(grid_size, cuda=x_in.is_cuda)
pred_boxes=self.transform_outputs(prediction)
output = torch.cat(
(
pred_boxes.view(batch_size, -1, 4),
obj_score.view(batch_size, -1, 1),
pred_cls.view(batch_size, -1, self.num_classes),
), -1,)
return output
def compute_grid_offsets(self, grid_size, cuda=True):
self.grid_size = grid_size
self.stride = self.img_dim / self.grid_size
self.grid_x = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1 ).type(torch.float32)
self.grid_y = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1).transpose(3, 2).type(torch.float32)
scaled_anchors=[(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]
self.scaled_anchors=torch.tensor(scaled_anchors,device=device)
self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
def transform_outputs(self,prediction):
device=prediction.device
x = torch.sigmoid(prediction[..., 0]) # Center x
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
pred_boxes = torch.zeros_like(prediction[..., :4]).to(device)
pred_boxes[..., 0] = x.data + self.grid_x
pred_boxes[..., 1] = y.data + self.grid_y
pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
return pred_boxes * self.stride
搭建YOLO模型
解析配置文件,使用parse_model_config助手读取并打印
from myutils import parse_model_config
path2config="./config/yolov3.cfg"
blocks_list = parse_model_config(path2config)
blocks_list[:2]
搭建PyTorch模块
基于解析的配置文件创建PyTorch模块,调用 create_layers 辅助函数进行转换并获取 PyTorch 模块的列表
from myutils import create_layers
hy_pa, m_l= create_layers(blocks_list)
print(m_l)
print(hy_pa)
搭建DarkNet模型
from torch import nn
class Darknet(nn.Module):
def __init__(self, config_path, img_size=416):
super(Darknet, self).__init__()
self.blocks_list = parse_model_config(config_path)
self.hyperparams, self.module_list = create_layers(self.blocks_list)
self.img_size = img_size
def forward(self, x):
img_dim = x.shape[2]
layer_outputs, yolo_outputs = [], []
for block, module in zip(self.blocks_list[1:], self.module_list):
if block["type"] in ["convolutional", "upsample", "maxpool"]:
x = module(x)
elif block["type"] == "shortcut":
layer_ind = int(block["from"])
x = layer_outputs[-1] + layer_outputs[layer_ind]
elif block["type"] == "yolo":
x= module[0](x)
yolo_outputs.append(x)
elif block["type"] == "route":
x = torch.cat([layer_outputs[int(l_i)]
for l_i in block["layers"].split(",")], 1)
layer_outputs.append(x)
yolo_out_cat = torch.cat(yolo_outputs, 1)
return yolo_out_cat, yolo_outputs
model = Darknet(path2config).to(device)
print(model)
# 创建一个随机的dummy_img,大小为1x3x416x416,并将其移动到指定的设备上
dummy_img=torch.rand(1,3,416,416).to(device)
# 在不计算梯度的情况下,执行模型的前向传播
with torch.no_grad():
# 获取模型的前向传播结果
dummy_out_cat, dummy_out=model.forward(dummy_img)
# 打印dummy_out_cat的形状
print(dummy_out_cat.shape)
# 打印dummy_out中每个元素的形状
print(dummy_out[0].shape,dummy_out[1].shape,dummy_out[2].shape)
定义损失函数
YOLO通常使用组合损失函数
def get_loss_batch(output,targets, params_loss, opt=None):
# 获取损失函数的参数
ignore_thres=params_loss["ignore_thres"]
scaled_anchors= params_loss["scaled_anchors"]
mse_loss= params_loss["mse_loss"]
bce_loss= params_loss["bce_loss"]
# 获取yolo的参数
num_yolos=params_loss["num_yolos"]
num_anchors= params_loss["num_anchors"]
obj_scale= params_loss["obj_scale"]
noobj_scale= params_loss["noobj_scale"]
# 初始化损失
loss=0.0
for yolo_ind in range(num_yolos):
# 获取yolo的输出
yolo_out=output[yolo_ind]
batch_size, num_bbxs, _=yolo_out.shape
# 获取网格大小
gz_2=num_bbxs/num_anchors
grid_size=int(np.sqrt(gz_2))
# 将yolo的输出reshape为(batch_size,num_anchors,grid_size,grid_size,-1)
yolo_out=yolo_out.view(batch_size,num_anchors,grid_size,grid_size,-1)
# 获取预测的边界框
pred_boxes=yolo_out[:,:,:,:,:4]
x,y,w,h= transform_bbox(pred_boxes, scaled_anchors[yolo_ind])
# 获取预测的置信度
pred_conf=yolo_out[:,:,:,:,4]
# 获取预测的类别概率
pred_cls_prob=yolo_out[:,:,:,:,5:]
# 获取yolo的目标
yolo_targets = get_yolo_targets({
"pred_cls_prob": pred_cls_prob,
"pred_boxes":pred_boxes,
"targets": targets,
"anchors": scaled_anchors[yolo_ind],
"ignore_thres": ignore_thres,
})
# 获取目标掩码
obj_mask=yolo_targets["obj_mask"]
noobj_mask=yolo_targets["noobj_mask"]
# 获取目标的x,y,w,h
tx=yolo_targets["tx"]
ty=yolo_targets["ty"]
tw=yolo_targets["tw"]
th=yolo_targets["th"]
# 获取目标的类别
tcls=yolo_targets["tcls"]
# 获取目标的置信度
t_conf=yolo_targets["t_conf"]
# 计算x,y,w,h的损失
loss_x = mse_loss(x[obj_mask], tx[obj_mask])
loss_y = mse_loss(y[obj_mask], ty[obj_mask])
loss_w = mse_loss(w[obj_mask], tw[obj_mask])
loss_h = mse_loss(h[obj_mask], th[obj_mask])
# 计算置信度的损失
loss_conf_obj = bce_loss(pred_conf[obj_mask], t_conf[obj_mask])
loss_conf_noobj = bce_loss(pred_conf[noobj_mask], t_conf[noobj_mask])
loss_conf = obj_scale * loss_conf_obj + noobj_scale * loss_conf_noobj
# 计算类别的损失
loss_cls = bce_loss(pred_cls_prob[obj_mask], tcls[obj_mask])
# 累加损失
loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
# 如果有优化器,则进行反向传播和优化
if opt is not None:
opt.zero_grad()
loss.backward()
opt.step()
# 返回损失
return loss.item()
def transform_bbox(bbox, anchors):
# 将bbox的x、y、w、h分别赋值给x、y、w、h
x=bbox[:,:,:,:,0]
y=bbox[:,:,:,:,1]
w=bbox[:,:,:,:,2]
h=bbox[:,:,:,:,3]
# 将anchors的w、h分别赋值给anchor_w、anchor_h
anchor_w = anchors[:, 0].view((1, 3, 1, 1))
anchor_h = anchors[:, 1].view((1, 3, 1, 1))
# 将x、y分别减去其向下取整的值
x=x-x.floor()
y=y-y.floor()
# 将w、h分别除以anchor_w、anchor_h,并取对数
w= torch.log(w / anchor_w + 1e-16)
h= torch.log(h / anchor_h + 1e-16)
return x, y, w, h
def get_yolo_targets(params):
# 获取预测框、预测类别概率、目标、锚点、忽略阈值
pred_boxes=params["pred_boxes"]
pred_cls_prob=params["pred_cls_prob"]
target=params["targets"]
anchors=params["anchors"]
ignore_thres=params["ignore_thres"]
# 获取批量大小、锚点数量、网格大小、类别数量
batch_size = pred_boxes.size(0)
num_anchors = pred_boxes.size(1)
grid_size = pred_boxes.size(2)
num_cls = pred_cls_prob.size(-1)
# 定义目标张量的形状
sizeT=batch_size, num_anchors, grid_size, grid_size
# 定义目标张量,用于存储目标框的掩码
obj_mask = torch.zeros(sizeT,device=device,dtype=torch.uint8)
# 定义目标张量,用于存储非目标框的掩码
noobj_mask = torch.ones(sizeT,device=device,dtype=torch.uint8)
# 定义目标张量,用于存储目标框的x坐标
tx = torch.zeros(sizeT, device=device, dtype=torch.float32)
# 定义目标张量,用于存储目标框的y坐标
ty= torch.zeros(sizeT, device=device, dtype=torch.float32)
# 定义目标张量,用于存储目标框的宽度
tw= torch.zeros(sizeT, device=device, dtype=torch.float32)
# 定义目标张量,用于存储目标框的高度
th= torch.zeros(sizeT, device=device, dtype=torch.float32)
# 定义目标张量的形状
sizeT=batch_size, num_anchors, grid_size, grid_size, num_cls
# 定义目标张量,用于存储目标类别
tcls= torch.zeros(sizeT, device=device, dtype=torch.float32)
# 将目标框的坐标乘以网格大小
target_bboxes = target[:, 2:] * grid_size
# 获取目标框的xy坐标
t_xy = target_bboxes[:, :2]
# 获取目标框的wh坐标
t_wh = target_bboxes[:, 2:]
# 获取目标框的x坐标
t_x, t_y = t_xy.t()
# 获取目标框的宽度
t_w, t_h = t_wh.t()
# 获取目标框的网格坐标
grid_i, grid_j = t_xy.long().t()
# 计算每个锚点与目标框的iou
iou_with_anchors=[get_iou_WH(anchor, t_wh) for anchor in anchors]
# 将iou转换为张量
iou_with_anchors = torch.stack(iou_with_anchors)
# 获取iou最大的锚点索引
best_iou_wa, best_anchor_ind = iou_with_anchors.max(0)
# 获取目标框的batch索引和类别标签
batch_inds, target_labels = target[:, :2].long().t()
# 将目标框的掩码设置为1
obj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 1
# 将非目标框的掩码设置为0
noobj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 0
# 将大于忽略阈值的iou对应的非目标框掩码设置为0
for ind, iou_wa in enumerate(iou_with_anchors.t()):
noobj_mask[batch_inds[ind], iou_wa > ignore_thres, grid_j[ind], grid_i[ind]] = 0
# 将目标框的x坐标减去网格的整数部分
tx[batch_inds, best_anchor_ind, grid_j, grid_i] = t_x - t_x.floor()
# 将目标框的y坐标减去网格的整数部分
ty[batch_inds, best_anchor_ind, grid_j, grid_i] = t_y - t_y.floor()
# 获取最佳锚点的宽度
anchor_w=anchors[best_anchor_ind][:, 0]
# 将目标框的宽度除以锚点的宽度,并取对数
tw[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_w / anchor_w + 1e-16)
# 获取最佳锚点的高度
anchor_h=anchors[best_anchor_ind][:, 1]
# 将目标框的高度除以锚点的高度,并取对数
th[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_h / anchor_h + 1e-16)
# 将目标类别设置为1
tcls[batch_inds, best_anchor_ind, grid_j, grid_i, target_labels] = 1
# 返回目标张量
output={
"obj_mask" : obj_mask,
"noobj_mask" : noobj_mask,
"tx": tx,
"ty": ty,
"tw": tw,
"th": th,
"tcls": tcls,
"t_conf": obj_mask.float(),
}
return output
def get_iou_WH(wh1, wh2):
# 将wh2转置
wh2 = wh2.t()
# 获取wh1的宽度和高度
w1, h1 = wh1[0], wh1[1]
# 获取wh2的宽度和高度
w2, h2 = wh2[0], wh2[1]
# 计算交集面积
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
# 计算并集面积
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
# 返回交集面积与并集面积的比值
return inter_area / union_area
训练模型
在训练数据上训练模型,并在验证数据上对其进行评估,训练过程遵循标准的随机梯度下降(SGD)。
def loss_epoch(model,params_loss,dataset_dl,sanity_check=False,opt=None):
running_loss=0.0
len_data=len(dataset_dl.dataset)
running_metrics= {}
for xb, yb,_ in dataset_dl:
yb=yb.to(device)
_,output=model(xb.to(device))
loss_b=get_loss_batch(output,yb, params_loss,opt)
running_loss+=loss_b
if sanity_check is True:
break
loss=running_loss/float(len_data)
return loss
import copy
def train_val(model, params):
num_epochs=params["num_epochs"]
params_loss=params["params_loss"]
opt=params["optimizer"]
train_dl=params["train_dl"]
val_dl=params["val_dl"]
sanity_check=params["sanity_check"]
lr_scheduler=params["lr_scheduler"]
path2weights=params["path2weights"]
loss_history={
"train": [],
"val": [],
}
best_model_wts = copy.deepcopy(model.state_dict())
best_loss=float('inf')
for epoch in range(num_epochs):
current_lr=get_lr(opt)
print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr))
model.train()
train_loss=loss_epoch(model,params_loss,train_dl,sanity_check,opt)
loss_history["train"].append(train_loss)
print("train loss: %.6f" %(train_loss))
model.eval()
with torch.no_grad():
val_loss=loss_epoch(model,params_loss,val_dl,sanity_check)
loss_history["val"].append(val_loss)
print("val loss: %.6f" %(val_loss))
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model.state_dict(), path2weights)
print("Copied best model weights!")
lr_scheduler.step(val_loss)
if current_lr != get_lr(opt):
print("Loading best model weights!")
model.load_state_dict(best_model_wts)
print("-"*10)
model.load_state_dict(best_model_wts)
return model, loss_history
def get_lr(opt):
for param_group in opt.param_groups:
return param_group['lr']
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
opt = optim.Adam(model.parameters(), lr=1e-3)
lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)
path2models= "./models/"
if not os.path.exists(path2models):
os.mkdir(path2models)
scaled_anchors=[model.module_list[82][0].scaled_anchors,
model.module_list[94][0].scaled_anchors,
model.module_list[106][0].scaled_anchors]
mse_loss = nn.MSELoss(reduction="sum")
bce_loss = nn.BCELoss(reduction="sum")
params_loss={
"scaled_anchors" : scaled_anchors,
"ignore_thres": 0.5,
"mse_loss": mse_loss,
"bce_loss": bce_loss,
"num_yolos": 3,
"num_anchors": 3,
"obj_scale": 1,
"noobj_scale": 100,
}
params_train={
"num_epochs": 5,
"optimizer": opt,
"params_loss": params_loss,
"train_dl": train_dl,
"val_dl": val_dl,
"sanity_check": True,
"lr_scheduler": lr_scheduler,
"path2weights": path2models+"weights.pt",
}
model,loss_hist=train_val(model,params_train)
部署模型
将训练后的权重加载到模型中
path2weights="./models/weights.pt"
model.load_state_dict(torch.load(path2weights))
img,tg,_=coco_val[11]
print(img.shape)
print(tg.shape)
show_img_bbox(img,tg)