self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])
同时将代码中的-> tuple[np.ndarray, np.ndarray, np.ndarray]:
修改为 -> tuple
from ultralytics import YOLO
if __name__ == '__main__':
success = model.export(format="onnx")
import cv2
from yolov10 import YOLOv10, draw_detections
# Initialize yolov8 object detector
model_path = "models/yolov10n.onnx"
detector = YOLOv10(model_path, conf_thres=0.2)
# Read image
img_url = r"D:\yolo_seq\ultralytics\datasets\coco128\images\train2017\000000000009.jpg"
img = cv2.imread(img_url)
# Detect Objects
for i in range(10):
class_ids, boxes, confidences = detector(img)
# Draw detections
combined_img = draw_detections(img, boxes, confidences, class_ids)
cv2.imshow("Detected Objects", combined_img)
再参考 https://hpg123.blog.csdn.net/article/details/141882208?spm=1001.2014.3001.5502 中rtsp拉流,即可以实现基于yolov10模型实现对rtsp视频流的实时检测了
4.1 init.py
from .yolov10 import YOLOv10
from .utils import draw_detections
4.2 utils.py
import os
import cv2
import numpy as np
import tqdm
import requests
class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(class_names), 3))
available_models = ["yolov10n", "yolov10s", "yolov10m", "yolov10b", "yolov10l", "yolov10x"]
def download_model(url: str, path: str):
print(f"Downloading model from {url} to {path}")
r = requests.get(url, stream=True)
with open(path, 'wb') as f:
total_length = int(r.headers.get('content-length'))
for chunk in tqdm.tqdm(r.iter_content(chunk_size=1024 * 1024), total=total_length // (1024 * 1024),
if chunk:
def check_model(model_path: str):
if os.path.exists(model_path):
model_name = os.path.basename(model_path).split('.')[0]
if model_name not in available_models:
raise ValueError(f"Invalid model name: {model_name}")
url = f"https://github.com/THU-MIG/yolov10/releases/download/v1.1/{model_name}.onnx"
download_model(url, model_path)
def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
det_img = image.copy()
img_height, img_width = image.shape[:2]
font_size = min([img_height, img_width]) * 0.0006
text_thickness = int(min([img_height, img_width]) * 0.001)
det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)
# Draw bounding boxes and labels of detections
for class_id, box, score in zip(class_ids, boxes, scores):
color = colors[class_id]
draw_box(det_img, box, color)
label = class_names[class_id]
caption = f'{label} {int(score * 100)}%'
draw_text(det_img, caption, box, color, font_size, text_thickness)
return det_img
def draw_box(image: np.ndarray, box: np.ndarray, color: tuple = (0, 0, 255),
thickness: int = 2) -> np.ndarray:
x1, y1, x2, y2 = box.astype(int)
return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
def draw_text(image: np.ndarray, text: str, box: np.ndarray, color: tuple = (0, 0, 255),
font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:
x1, y1, x2, y2 = box.astype(int)
(tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_size, thickness=text_thickness)
th = int(th * 1.2)
cv2.rectangle(image, (x1, y1),
(x1 + tw, y1 - th), color, -1)
return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,
def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:
mask_img = image.copy()
# Draw bounding boxes and labels of detections
for box, class_id in zip(boxes, classes):
color = colors[class_id]
x1, y1, x2, y2 = box.astype(int)
# Draw fill rectangle in mask image
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)
4.3 yolov10.py
import time
import cv2
import numpy as np
import onnxruntime
from .utils import draw_detections, check_model
class YOLOv10:
def __init__(self, path: str, conf_thres: float = 0.2):
self.conf_threshold = conf_thres
# Initialize model
self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])
# Get model info
def __call__(self, image: np.ndarray) -> tuple:
return self.detect_objects(image)
def detect_objects(self, image: np.ndarray) -> tuple:
input_tensor = self.prepare_input(image)
# Perform inference on the image
outputs = self.inference(input_tensor)
return self.process_output(outputs[0])
def prepare_input(self, image: np.ndarray) -> np.ndarray:
self.img_height, self.img_width = image.shape[:2]
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize input image
input_img = cv2.resize(input_img, (self.input_width, self.input_height))
# Scale input pixel values to 0 to 1
input_img = input_img / 255.0
input_img = input_img.transpose(2, 0, 1)
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
return input_tensor
def inference(self, input_tensor):
start = time.perf_counter()
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
print(f"Inference time: {(time.perf_counter() - start) * 1000:.2f} ms")
return outputs
def process_output(self, output):
output = output.squeeze()
boxes = output[:, :-2]
confidences = output[:, -2]
class_ids = output[:, -1].astype(int)
mask = confidences > self.conf_threshold
boxes = boxes[mask, :]
confidences = confidences[mask]
class_ids = class_ids[mask]
# Rescale boxes to original image dimensions
boxes = self.rescale_boxes(boxes)
return class_ids, boxes, confidences
def rescale_boxes(self, boxes):
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
return boxes
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
self.input_height = input_shape[2] if type(input_shape[2]) == int else 640
self.input_width = input_shape[3] if type(input_shape[3]) == int else 640
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
# https://github.com/ibaiGorordo/ONNX-YOLOv10-Object-Detection/tree/main
if __name__ == '__main__':
model_path = "yolov10n.onnx"
# Detect Objects
# Initialize YOLOv10 object detector
detector = YOLOv10(model_path)
for i in range(10):
class_ids, boxes, confidences = detector(img)
# Draw detections
combined_img = draw_detections(img, boxes, confidences, class_ids)
print("img: ",img.shape)
print("combined_img: ",combined_img.shape)
cv2.imshow("Output", combined_img)