YOLOv7+单目跟踪
- 1. 目标跟踪
- 2. 测距模块
- 2.1 设置测距模块
- 2.2 添加测距
- 3. 主代码
- 4. 实验效果
相关链接
1. YOLOv5+单目测距(python)
2. YOLOv7+单目测距(python)
3. YOLOv5+单目跟踪(python)
4. 具体效果已在Bilibili发布,点击跳转
工程源码见文章末尾
1. 目标跟踪
用yolov7实现跟踪步骤比较简单,去官网下载yolov7源码,然后下载跟踪模块相关代码,链接:https://download.csdn.net/download/qq_45077760/87712810
将下载的内容全部拖进yolov7-main文件夹里,把环境装好,然后运行代码 detect_or_track.py
此时如果不出问题就完成了普通检测
也可以用终端运行命令python detect_or_track.py --weight yolov7.pt --no-trace --view-img --source 1.mp4
--show-fps #显示fps
--seed 2 #初始数字,直接改变目标方框颜色和序号
--track #每个方框左上角有ID数字
--classes 0 1 # 只显示前两种类型 (总共80种在data/coco.yaml里)
--show-track #显示跟踪轨迹
--unique-track-color # 每条轨迹不同颜色
--nobbox
--nolabel
--nosave# 不保存,把上边那行删掉,会保存到XXX\yolov7\runs\detect\exp
2. 测距模块
2.1 设置测距模块
测距部分之前已经写过了,具体见这篇文章,我们在yolov7-main文件夹里创建一个名为distance.py的文件,或者直接把测距那篇文章里的distance.py文件拖进来也可以
distance.py
foc = 1990.0 # 镜头焦距
real_hight_person = 66.9 # 行人高度
real_hight_car = 57.08 # 轿车高度
# 自定义函数,单目测距
def person_distance(h):
dis_inch = (real_hight_person * foc) / (h - 2)
dis_cm = dis_inch * 2.54
dis_cm = int(dis_cm)
dis_m = dis_cm/100
return dis_m
def car_distance(h):
dis_inch = (real_hight_car * foc) / (h - 2)
dis_cm = dis_inch * 2.54
dis_cm = int(dis_cm)
dis_m = dis_cm/100
return dis_m
2.2 添加测距
接下来调用测距代码到主代码 detect_or_track.py 文件中,先在代码开头导入库,添加
from distance import person_distance,car_distance
与测距那篇文章不同,由于跟踪代码自带画框,我们只需要将测距模块写进画框这里,具体如下(注释部分是我添加修改的)
def draw_boxes(img, bbox, identities=None, categories=None, confidences = None, names=None, colors = None):
global distance
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
tl = opt.thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
cat = int(categories[i]) if categories is not None else 0
id = int(identities[i]) if identities is not None else 0
# conf = confidences[i] if confidences is not None else 0
color = colors[cat]
if not opt.nobbox:
cv2.rectangle(img, (x1, y1), (x2, y2), color, tl)
if not opt.nolabel:
h =y2-y1 #计算人的像素高度
dis_m = person_distance(h) # 调用函数,计算行人实际高度
#label = str(id) + ":" + names[cat] if identities is not None else f'{names[cat]} {confidences[i]:.2f}'
label = str(id) + ":"+ names[cat]+ " "+"dis:"+str(dis_m)+"m" if identities is not None else f'{names[cat]} {confidences[i]:.2f}' # 将显示内容写进label,以便接下来画框显示
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = x1 + t_size[0], y1 - t_size[1] - 3
cv2.rectangle(img, (x1, y1), c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
return img
3. 主代码
import argparse
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, \
check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel
from sort import *
from distance import person_distance,car_distance
"""Function to Draw Bounding boxes"""
def draw_boxes(img, bbox, identities=None, categories=None, confidences = None, names=None, colors = None):
global distance
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
tl = opt.thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
cat = int(categories[i]) if categories is not None else 0
id = int(identities[i]) if identities is not None else 0
# conf = confidences[i] if confidences is not None else 0
color = colors[cat]
if not opt.nobbox:
cv2.rectangle(img, (x1, y1), (x2, y2), color, tl)
if not opt.nolabel:
h =y2-y1
dis_m = person_distance(h) # 调用函数,计算行人实际高度
#label = str(id) + ":" + names[cat] if identities is not None else f'{names[cat]} {confidences[i]:.2f}'
label = str(id) + ":"+ names[cat]+ " "+"dis:"+str(dis_m)+"m" if identities is not None else f'{names[cat]} {confidences[i]:.2f}'
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = x1 + t_size[0], y1 - t_size[1] - 3
cv2.rectangle(img, (x1, y1), c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
return img
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
if not opt.nosave:
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if trace:
model = TracedModel(model, device, opt.img_size)
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
t0 = time.time()
###################################
startTime = 0
###################################
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Warmup
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
old_img_b = img.shape[0]
old_img_h = img.shape[2]
old_img_w = img.shape[3]
for i in range(3):
model(img, augment=opt.augment)[0]
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
t2 = time_synchronized()
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t3 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
dets_to_sort = np.empty((0,6))
# NOTE: We send in detected object class too
for x1,y1,x2,y2,conf,detclass in det.cpu().detach().numpy():
dets_to_sort = np.vstack((dets_to_sort,
np.array([x1, y1, x2, y2, conf, detclass])))
if opt.track:
tracked_dets = sort_tracker.update(dets_to_sort, opt.unique_track_color)
tracks =sort_tracker.getTrackers()
# draw boxes for visualization
if len(tracked_dets)>0:
bbox_xyxy = tracked_dets[:,:4]
identities = tracked_dets[:, 8]
categories = tracked_dets[:, 4]
confidences = None
if opt.show_track:
#loop over tracks
for t, track in enumerate(tracks):
track_color = colors[int(track.detclass)] if not opt.unique_track_color else sort_tracker.color_list[t]
[cv2.line(im0, (int(track.centroidarr[i][0]),
int(track.centroidarr[i][1])),
(int(track.centroidarr[i+1][0]),
int(track.centroidarr[i+1][1])),
track_color, thickness=opt.thickness)
for i,_ in enumerate(track.centroidarr)
if i < len(track.centroidarr)-1 ]
else:
bbox_xyxy = dets_to_sort[:,:4]
identities = None
categories = dets_to_sort[:, 5]
confidences = dets_to_sort[:, 4]
im0 = draw_boxes(im0, bbox_xyxy, identities, categories, confidences, names, colors)
# Print time (inference + NMS)
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
# Stream results
######################################################
if dataset.mode != 'image' and opt.show_fps:
currentTime = time.time()
fps = 1/(currentTime - startTime)
startTime = currentTime
cv2.putText(im0, "FPS: " + str(int(fps)), (20, 70), cv2.FONT_HERSHEY_PLAIN, 2, (0,255,0),2)
#######################################################
if view_img:
#cv2.imshow(str(p), im0)
#cv2.waitKey(1) # 1 millisecond
cv2.namedWindow("Webcam", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Webcam", 1280, 720)
cv2.moveWindow("Webcam", 0, 100)
cv2.imshow("Webcam", im0)
cv2.waitKey(1)
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
print(f" The image with the result is saved in: {save_path}")
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
#print(f"Results saved to {save_dir}{s}")
print(f'Done. ({time.time() - t0:.3f}s)')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='street.mp4', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
parser.add_argument('--track', action='store_true', help='run tracking')
parser.add_argument('--show-track', action='store_true', help='show tracked path')
parser.add_argument('--show-fps', action='store_true', help='show fps')
parser.add_argument('--thickness', type=int, default=2, help='bounding box and font size thickness')
parser.add_argument('--seed', type=int, default=1, help='random seed to control bbox colors')
parser.add_argument('--nobbox', action='store_true', help='don`t show bounding box')
parser.add_argument('--nolabel', action='store_true', help='don`t show label')
parser.add_argument('--unique-track-color', action='store_true', help='show each track in unique color')
opt = parser.parse_args()
print(opt)
np.random.seed(opt.seed)
sort_tracker = Sort(max_age=5,
min_hits=2,
iou_threshold=0.2)
#check_requirements(exclude=('pycocotools', 'thop'))
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov7.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
4. 实验效果
同理,运行 detect_or_track.py 或者用终端运行命令python detect_or_track.py --weight yolov7.pt --no-trace --view-img --source 1.mp4
代码打包下载
链接1:https://download.csdn.net/download/qq_45077760/87712914
链接2:https://github.com/up-up-up-up/yolov7_Monocular_track(求STAR)
博客主页有更多有关测距的内容