基于ONNX-YOLOv10-Object-Detection项目实现yolov10模型onnx-python推理

项目地址：https://github.com/ibaiGorordo/ONNX-YOLOv10-Object-Detection
项目依赖：onnxruntime-gpu、opencv-python、imread-from-url、cap-from-youtube、ultralytics

1、代码修改

代码改动说明：yolov10/yolov10.py中的第18行修改为以下代码，明确指出使用cuda

self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])

同时将代码中的-> tuple[np.ndarray, np.ndarray, np.ndarray]: 修改为 -> tuple,也就是删除tuple后面的描述

2、onnx模型导出

基于以下代码可以导出onnx模型，并放到ONNX-YOLOv10-Object-Detection项目下的models目录下。

from ultralytics import YOLO
if __name__ == '__main__':
    path=r"yolov10n.pt"
    model=YOLO(path).cuda()
    success = model.export(format="onnx")

3、检测图片

import cv2
from yolov10 import YOLOv10, draw_detections

# Initialize yolov8 object detector
model_path = "models/yolov10n.onnx"
detector = YOLOv10(model_path, conf_thres=0.2)

# Read image
img_url = r"D:\yolo_seq\ultralytics\datasets\coco128\images\train2017\000000000009.jpg"
img = cv2.imread(img_url)

# Detect Objects
for i in range(10):
    class_ids, boxes, confidences = detector(img)

# Draw detections
combined_img = draw_detections(img, boxes, confidences, class_ids)

cv2.imshow("Detected Objects", combined_img)
cv2.waitKey(0)

在这里插入图片描述

再参考 https://hpg123.blog.csdn.net/article/details/141882208?spm=1001.2014.3001.5502 中rtsp拉流，即可以实现基于yolov10模型实现对rtsp视频流的实时检测了

4、关键代码

项目代码结构
在这里插入图片描述

4.1 init.py

from .yolov10 import YOLOv10
from .utils import draw_detections

4.2 utils.py

import os
import cv2
import numpy as np
import tqdm
import requests

class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
               'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
               'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
               'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
               'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
               'scissors', 'teddy bear', 'hair drier', 'toothbrush']

# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(class_names), 3))

available_models = ["yolov10n", "yolov10s", "yolov10m", "yolov10b", "yolov10l", "yolov10x"]


def download_model(url: str, path: str):
    print(f"Downloading model from {url} to {path}")
    r = requests.get(url, stream=True)
    with open(path, 'wb') as f:
        total_length = int(r.headers.get('content-length'))
        for chunk in tqdm.tqdm(r.iter_content(chunk_size=1024 * 1024), total=total_length // (1024 * 1024),
                               bar_format='{l_bar}{bar:10}'):
            if chunk:
                f.write(chunk)
                f.flush()


def check_model(model_path: str):
    if os.path.exists(model_path):
        return

    model_name = os.path.basename(model_path).split('.')[0]
    if model_name not in available_models:
        raise ValueError(f"Invalid model name: {model_name}")
    url = f"https://github.com/THU-MIG/yolov10/releases/download/v1.1/{model_name}.onnx"
    download_model(url, model_path)


def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
    det_img = image.copy()

    img_height, img_width = image.shape[:2]
    font_size = min([img_height, img_width]) * 0.0006
    text_thickness = int(min([img_height, img_width]) * 0.001)

    det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)

    # Draw bounding boxes and labels of detections
    for class_id, box, score in zip(class_ids, boxes, scores):
        color = colors[class_id]

        draw_box(det_img, box, color)

        label = class_names[class_id]
        caption = f'{label} {int(score * 100)}%'
        draw_text(det_img, caption, box, color, font_size, text_thickness)

    return det_img


def draw_box(image: np.ndarray, box: np.ndarray, color: tuple = (0, 0, 255),
             thickness: int = 2) -> np.ndarray:
    x1, y1, x2, y2 = box.astype(int)
    return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)


def draw_text(image: np.ndarray, text: str, box: np.ndarray, color: tuple = (0, 0, 255),
              font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:
    x1, y1, x2, y2 = box.astype(int)
    (tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                  fontScale=font_size, thickness=text_thickness)
    th = int(th * 1.2)

    cv2.rectangle(image, (x1, y1),
                  (x1 + tw, y1 - th), color, -1)

    return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,
                       cv2.LINE_AA)


def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:
    mask_img = image.copy()

    # Draw bounding boxes and labels of detections
    for box, class_id in zip(boxes, classes):
        color = colors[class_id]

        x1, y1, x2, y2 = box.astype(int)

        # Draw fill rectangle in mask image
        cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)

    return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)

4.3 yolov10.py

import time
import cv2
import numpy as np
import onnxruntime

from .utils import draw_detections, check_model


class YOLOv10:

    def __init__(self, path: str, conf_thres: float = 0.2):

        self.conf_threshold = conf_thres

        check_model(path)

        # Initialize model
        self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])

        # Get model info
        self.get_input_details()
        self.get_output_details()

    def __call__(self, image: np.ndarray) -> tuple:
        return self.detect_objects(image)

    def detect_objects(self, image: np.ndarray) -> tuple:
        input_tensor = self.prepare_input(image)

        # Perform inference on the image
        outputs = self.inference(input_tensor)

        return self.process_output(outputs[0])

    def prepare_input(self, image: np.ndarray) -> np.ndarray:
        self.img_height, self.img_width = image.shape[:2]

        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Resize input image
        input_img = cv2.resize(input_img, (self.input_width, self.input_height))
        print(input_img.shape)
        # Scale input pixel values to 0 to 1
        input_img = input_img / 255.0
        input_img = input_img.transpose(2, 0, 1)
        input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)

        return input_tensor

    def inference(self, input_tensor):
        start = time.perf_counter()
        outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})

        print(f"Inference time: {(time.perf_counter() - start) * 1000:.2f} ms")
        return outputs

    def process_output(self, output):
        output = output.squeeze()
        boxes = output[:, :-2]
        confidences = output[:, -2]
        class_ids = output[:, -1].astype(int)

        mask = confidences > self.conf_threshold
        boxes = boxes[mask, :]
        confidences = confidences[mask]
        class_ids = class_ids[mask]

        # Rescale boxes to original image dimensions
        boxes = self.rescale_boxes(boxes)

        return class_ids, boxes, confidences

    def rescale_boxes(self, boxes):
        input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
        boxes = np.divide(boxes, input_shape, dtype=np.float32)
        boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
        return boxes

    def get_input_details(self):
        model_inputs = self.session.get_inputs()
        self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

        input_shape = model_inputs[0].shape
        self.input_height = input_shape[2] if type(input_shape[2]) == int else 640
        self.input_width = input_shape[3] if type(input_shape[3]) == int else 640

    def get_output_details(self):
        model_outputs = self.session.get_outputs()
        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]

# https://github.com/ibaiGorordo/ONNX-YOLOv10-Object-Detection/tree/main
if __name__ == '__main__':

    model_path = "yolov10n.onnx"
    # Detect Objects
    img=cv2.imread(r"D:\yolo_seq\ultralytics\datasets\coco128\images\train2017\000000000009.jpg")
    #img=cv2.resize(img,(640,640))
    # Initialize YOLOv10 object detector
    detector = YOLOv10(model_path)
    for i in range(10):
        class_ids, boxes, confidences = detector(img)
    # Draw detections
    combined_img = draw_detections(img, boxes, confidences, class_ids)
    print("img: ",img.shape)
    print("combined_img: ",combined_img.shape)
    cv2.imshow("Output", combined_img)
    cv2.waitKey(0)