项目地址:https://github.com/ibaiGorordo/ONNX-YOLOv10-Object-Detection
项目依赖:onnxruntime-gpu、opencv-python、imread-from-url、cap-from-youtube、ultralytics
1、代码修改
代码改动说明:yolov10/yolov10.py中的第18行修改为以下代码,明确指出使用cuda
self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])
同时将代码中的-> tuple[np.ndarray, np.ndarray, np.ndarray]:
修改为 -> tuple
,也就是删除tuple后面的描述
2、onnx模型导出
基于以下代码可以导出onnx模型,并放到ONNX-YOLOv10-Object-Detection项目下的models目录下。
from ultralytics import YOLO
if __name__ == '__main__':
path=r"yolov10n.pt"
model=YOLO(path).cuda()
success = model.export(format="onnx")
3、检测图片
import cv2
from yolov10 import YOLOv10, draw_detections
# Initialize yolov8 object detector
model_path = "models/yolov10n.onnx"
detector = YOLOv10(model_path, conf_thres=0.2)
# Read image
img_url = r"D:\yolo_seq\ultralytics\datasets\coco128\images\train2017\000000000009.jpg"
img = cv2.imread(img_url)
# Detect Objects
for i in range(10):
class_ids, boxes, confidences = detector(img)
# Draw detections
combined_img = draw_detections(img, boxes, confidences, class_ids)
cv2.imshow("Detected Objects", combined_img)
cv2.waitKey(0)
再参考 https://hpg123.blog.csdn.net/article/details/141882208?spm=1001.2014.3001.5502 中rtsp拉流,即可以实现基于yolov10模型实现对rtsp视频流的实时检测了
4、关键代码
项目代码结构
4.1 init.py
from .yolov10 import YOLOv10
from .utils import draw_detections
4.2 utils.py
import os
import cv2
import numpy as np
import tqdm
import requests
class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(class_names), 3))
available_models = ["yolov10n", "yolov10s", "yolov10m", "yolov10b", "yolov10l", "yolov10x"]
def download_model(url: str, path: str):
print(f"Downloading model from {url} to {path}")
r = requests.get(url, stream=True)
with open(path, 'wb') as f:
total_length = int(r.headers.get('content-length'))
for chunk in tqdm.tqdm(r.iter_content(chunk_size=1024 * 1024), total=total_length // (1024 * 1024),
bar_format='{l_bar}{bar:10}'):
if chunk:
f.write(chunk)
f.flush()
def check_model(model_path: str):
if os.path.exists(model_path):
return
model_name = os.path.basename(model_path).split('.')[0]
if model_name not in available_models:
raise ValueError(f"Invalid model name: {model_name}")
url = f"https://github.com/THU-MIG/yolov10/releases/download/v1.1/{model_name}.onnx"
download_model(url, model_path)
def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
det_img = image.copy()
img_height, img_width = image.shape[:2]
font_size = min([img_height, img_width]) * 0.0006
text_thickness = int(min([img_height, img_width]) * 0.001)
det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)
# Draw bounding boxes and labels of detections
for class_id, box, score in zip(class_ids, boxes, scores):
color = colors[class_id]
draw_box(det_img, box, color)
label = class_names[class_id]
caption = f'{label} {int(score * 100)}%'
draw_text(det_img, caption, box, color, font_size, text_thickness)
return det_img
def draw_box(image: np.ndarray, box: np.ndarray, color: tuple = (0, 0, 255),
thickness: int = 2) -> np.ndarray:
x1, y1, x2, y2 = box.astype(int)
return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
def draw_text(image: np.ndarray, text: str, box: np.ndarray, color: tuple = (0, 0, 255),
font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:
x1, y1, x2, y2 = box.astype(int)
(tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_size, thickness=text_thickness)
th = int(th * 1.2)
cv2.rectangle(image, (x1, y1),
(x1 + tw, y1 - th), color, -1)
return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,
cv2.LINE_AA)
def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:
mask_img = image.copy()
# Draw bounding boxes and labels of detections
for box, class_id in zip(boxes, classes):
color = colors[class_id]
x1, y1, x2, y2 = box.astype(int)
# Draw fill rectangle in mask image
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)
4.3 yolov10.py
import time
import cv2
import numpy as np
import onnxruntime
from .utils import draw_detections, check_model
class YOLOv10:
def __init__(self, path: str, conf_thres: float = 0.2):
self.conf_threshold = conf_thres
check_model(path)
# Initialize model
self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])
# Get model info
self.get_input_details()
self.get_output_details()
def __call__(self, image: np.ndarray) -> tuple:
return self.detect_objects(image)
def detect_objects(self, image: np.ndarray) -> tuple:
input_tensor = self.prepare_input(image)
# Perform inference on the image
outputs = self.inference(input_tensor)
return self.process_output(outputs[0])
def prepare_input(self, image: np.ndarray) -> np.ndarray:
self.img_height, self.img_width = image.shape[:2]
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize input image
input_img = cv2.resize(input_img, (self.input_width, self.input_height))
print(input_img.shape)
# Scale input pixel values to 0 to 1
input_img = input_img / 255.0
input_img = input_img.transpose(2, 0, 1)
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
return input_tensor
def inference(self, input_tensor):
start = time.perf_counter()
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
print(f"Inference time: {(time.perf_counter() - start) * 1000:.2f} ms")
return outputs
def process_output(self, output):
output = output.squeeze()
boxes = output[:, :-2]
confidences = output[:, -2]
class_ids = output[:, -1].astype(int)
mask = confidences > self.conf_threshold
boxes = boxes[mask, :]
confidences = confidences[mask]
class_ids = class_ids[mask]
# Rescale boxes to original image dimensions
boxes = self.rescale_boxes(boxes)
return class_ids, boxes, confidences
def rescale_boxes(self, boxes):
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
return boxes
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
self.input_height = input_shape[2] if type(input_shape[2]) == int else 640
self.input_width = input_shape[3] if type(input_shape[3]) == int else 640
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
# https://github.com/ibaiGorordo/ONNX-YOLOv10-Object-Detection/tree/main
if __name__ == '__main__':
model_path = "yolov10n.onnx"
# Detect Objects
img=cv2.imread(r"D:\yolo_seq\ultralytics\datasets\coco128\images\train2017\000000000009.jpg")
#img=cv2.resize(img,(640,640))
# Initialize YOLOv10 object detector
detector = YOLOv10(model_path)
for i in range(10):
class_ids, boxes, confidences = detector(img)
# Draw detections
combined_img = draw_detections(img, boxes, confidences, class_ids)
print("img: ",img.shape)
print("combined_img: ",combined_img.shape)
cv2.imshow("Output", combined_img)
cv2.waitKey(0)