今天这篇博文是学习大佬作品以后,执行我的需求后的总结,做了一些代码调整,就此记录一下,非常感谢大佬提供如此好的输出。
已知yolov8 训练好的模型一般是pt格式,比如best.pt,现在我期望这个模型可以转成可以部署的格式,不那么明晃晃地调用yolo,于是乎就查到可以转成onnx格式。
1、onnx是什么格式
好像是一堆废话,就是可以用 ONNX Runtime 加载,还有高版本的OpenCV的dnn 也可以加载。
2、基于python 加载onnx模型
(1)将best.pt转成 best.onnx
from ultralytics import YOLO
# 加载训练好的 YOLOv8 模型
model = YOLO('E:/skin_yolo/runs/detect/spot_detection60/weights/best.pt')
# 导出为 ONNX 格式
#model.export(format='onnx')
model.export(format='onnx', imgsz=640)#我的输入是图像尺寸固定的640*640,所以我写死了
(2)python加载模型并做目标检测
首先需要安装onnxruntime、numpy、cv2等库。如果使用 GPU 进行推理,还需安装onnxruntime-gpu。
test_detector.py
#基于yolo模型检测皮肤图像上的目标
#2025-01-06
import cv2
#引用文件中的函数
from targetDetect import TargetDetection
from forDraw import draw_detections
# yolov8 onnx 模型推理
class YOLOV8NDetector:
def __init__(self,model_path):
super(YOLOV8NDetector, self).__init__()
self.model_path = model_path
self.detector = TargetDetection(self.model_path, conf_thres=0.5, iou_thres=0.3)
def detect_image(self, input_image, output_image):
cv_img = cv2.imread(input_image)
boxes, scores, class_ids = self.detector.detect_objects(cv_img)
cv_img = draw_detections(cv_img, boxes, scores, class_ids)
cv2.namedWindow("output", cv2.WINDOW_NORMAL)
cv2.imwrite(output_image, cv_img)
cv2.imshow('output', cv_img)
cv2.waitKey(0)
def detect_video(self, input_video, output_video):
cap = cv2.VideoCapture(input_video)
fps = int(cap.get(5))
videoWriter = None
while True:
_, cv_img = cap.read()
if cv_img is None:
break
boxes, scores, class_ids = self.detector.detect_objects(cv_img)
cv_img = draw_detections(cv_img, boxes, scores, class_ids)
# 如果视频写入器未初始化,则使用输出视频路径和参数进行初始化
if videoWriter is None:
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
# 在这里给值了,它就不是None, 下次判断它就不进这里了
videoWriter = cv2.VideoWriter(output_video, fourcc, fps, (cv_img.shape[1], cv_img.shape[0]))
videoWriter.write(cv_img)
cv2.imshow("aod", cv_img)
cv2.waitKey(5)
# 等待按键并检查窗口是否关闭
if cv2.getWindowProperty("aod", cv2.WND_PROP_AUTOSIZE) < 1:
# 点x退出
break
cap.release()
videoWriter.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
modelpath ="E:/skin_yolo/runs/detect/spot_detection60/weights/best.onnx"#模型路径
det = YOLOV8NDetector(modelpath)
#检测图片时调用
input_image = "E:/Skin_Color/skin_pic/test/12/test.jpg"
output_image = 'E:/Skin_Color/skin_pic/test/12/test_out.jpg'
det.detect_image(input_image, output_image)
#检测视频是调用
# input_video = r"E:\yolodataset\video\A13.mp4"
# output_video = "../testdata/fortest.mp4"
# det.detect_video(input_video, output_video)
可以看出上面的代码依赖两个文件:targetDetect.py 和 forDraw .py
targetDetect.py 中定义了检测目标处理,forDraw .py 中定义了一些目标画框。
targetDetect.py
import time
import cv2
import numpy as np
import onnxruntime
#引用文件中的函数
from forDraw import xywh2xyxy, draw_detections,nms # 单类目标用nms , 多类目标用multiclass_nms
class TargetDetection:
def __init__(self, path, conf_thres=0.7, iou_thres=0.5):
self.conf_threshold = conf_thres
self.iou_threshold = iou_thres
# Initialize model
self.initialize_model(path)
def __call__(self, image):
return self.detect_objects(image)
def initialize_model(self, path):
self.session = onnxruntime.InferenceSession(path, providers=onnxruntime.get_available_providers())
# Get model info
self.get_input_details()
self.get_output_details()
def detect_objects(self, image):
input_tensor = self.prepare_input(image)
# Perform inference on the image
outputs = self.inference(input_tensor)
self.boxes, self.scores, self.class_ids = self.process_output(outputs)
return self.boxes, self.scores, self.class_ids
def prepare_input(self, image):
self.img_height, self.img_width = image.shape[:2]
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize input image
input_img = cv2.resize(input_img, (self.input_width, self.input_height))
# Scale input pixel values to 0 to 1
input_img = input_img / 255.0
input_img = input_img.transpose(2, 0, 1)
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
return input_tensor
def inference(self, input_tensor):
start = time.perf_counter()
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
# print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")
return outputs
def process_output(self, output):
predictions = np.squeeze(output[0]).T
# Filter out object confidence scores below threshold
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > self.conf_threshold, :]
scores = scores[scores > self.conf_threshold]
if len(scores) == 0:
return [], [], []
# Get the class with the highest confidence
class_ids = np.argmax(predictions[:, 4:], axis=1)
# Get bounding boxes for each object
boxes = self.extract_boxes(predictions)
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
indices = nms(boxes, scores, self.iou_threshold)#我的目标只有一个类
#indices = multiclass_nms(boxes, scores, class_ids, self.iou_threshold)#多类
return boxes[indices], scores[indices], class_ids[indices]
def extract_boxes(self, predictions):
# Extract boxes from predictions
boxes = predictions[:, :4]
# Scale boxes to original image dimensions
boxes = self.rescale_boxes(boxes)
# Convert boxes to xyxy format
boxes = xywh2xyxy(boxes)
return boxes
def rescale_boxes(self, boxes):
# Rescale boxes to original image dimensions
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
return boxes
def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):
return draw_detections(image, self.boxes, self.scores,
self.class_ids, mask_alpha)
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
self.input_shape = model_inputs[0].shape
self.input_height = self.input_shape[2]
self.input_width = self.input_shape[3]
print(self.input_width, self.input_height)
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
forDraw .py
import numpy as np
import cv2
class_names = ['spot'] #我的类标记
# Create a list of colors for each class where each color is a tuple of class number integer values
rng = np.random.default_rng(1)#此处是1,我的目标只有一个分类
colors = rng.uniform(0, 255, size=(len(class_names), 1))#此处是1,我的目标只有一个分类
def nms(boxes, scores, iou_threshold):
# 根据 scores 对检测框从高到低进行排序,得到排序后的索引
sorted_indices = np.argsort(scores)[::-1] # [::-1] 反转排序顺序
keep_boxes = []
while sorted_indices.size > 0:
# 保留最高分数的边界框
box_id = sorted_indices[0]
keep_boxes.append(box_id)
# 计算当前最高分数的边界框与剩余边界框的 IoU
ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
# 找出 IoU 小于阈值的边界框索引,保留这些框,过滤重叠框
keep_indices = np.where(ious < iou_threshold)[0]
# 注意:由于 keep_indices 是相对于 sorted_indices[1:] 的索引,
# 需要将其整体偏移 +1 来匹配到原始 sorted_indices
sorted_indices = sorted_indices[keep_indices + 1]
return keep_boxes
def multiclass_nms(boxes, scores, class_ids, iou_threshold):
# 获取所有唯一的类别索引
unique_class_ids = np.unique(class_ids)
keep_boxes = [] # 存储最终保留的边界框索引
for class_id in unique_class_ids:
# 筛选出属于当前类别的边界框索引
class_indices = np.where(class_ids == class_id)[0] # np.where返回元组
# 提取属于当前类别的边界框和分数
class_boxes = boxes[class_indices, :] # 当前类别的边界框
class_scores = scores[class_indices] # 当前类别的分数
# 执行 NMS 并获取保留下来的索引
class_keep_boxes = nms(class_boxes, class_scores, iou_threshold)
# 将保留的索引(对应原始的索引)添加到结果中
keep_boxes.extend(class_indices[class_keep_boxes])
return keep_boxes
def compute_iou(box, boxes):
# 计算交集区域的坐标,xmin 和 ymin: 交集左上角的坐标,xmax 和 ymax: 交集右下角的坐标
xmin = np.maximum(box[0], boxes[:, 0])
ymin = np.maximum(box[1], boxes[:, 1])
xmax = np.minimum(box[2], boxes[:, 2])
ymax = np.minimum(box[3], boxes[:, 3])
# 计算交集区域面积,如果两个框没有重叠,交集宽度和高度会为负,使用 np.maximum 保证面积非负
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
# 计算每个边界框的面积
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# 计算并集区域面积
union_area = box_area + boxes_area - intersection_area
# 计算 IoU(交并比)
iou = intersection_area / union_area # 交集区域面积 / 并集区域面积
return iou
def xywh2xyxy(x):
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
# 将边界框从 (x_center, y_center, w, h) 格式转换为 (x1, y1, x2, y2)
y = np.copy(x)
# 计算左上角坐标 x1 和 y1
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
# 计算右下角坐标 x2 和 y2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
#画检测目标
det_img = image.copy()
img_height, img_width = image.shape[:2]
font_size = min([img_height, img_width]) * 0.0006
text_thickness = int(min([img_height, img_width]) * 0.001)
det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)
# Draw bounding boxes and labels of detections
for class_id, box, score in zip(class_ids, boxes, scores):
color = colors[class_id]
draw_box(det_img, box, color)
label = class_names[class_id]
caption = f'{label} {int(score * 100)}%'
draw_text(det_img, caption, box, color, font_size, text_thickness)
return det_img
def draw_box(image: np.ndarray, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),
thickness: int = 2) -> np.ndarray:
x1, y1, x2, y2 = box.astype(int)
return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
def draw_text(image: np.ndarray, text: str, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),
font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:
#显示注释
x1, y1, x2, y2 = box.astype(int)
(tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_size, thickness=text_thickness)
th = int(th * 1.2)#线宽
cv2.rectangle(image, (x1, y1),(x1 + tw, y1 - th), color, -1)#画注释框
return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,cv2.LINE_AA)
def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:
mask_img = image.copy()
# 画检测到的目标框
for box, class_id in zip(boxes, classes):
color = colors[class_id]
x1, y1, x2, y2 = box.astype(int)
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
# return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)#返回半透明框
return image #返回全透明框
看看处理结果
(3)一些感慨
看模型流程
yolo模型导出以后,要加载处理其实还是需要理解透彻模型的过程,首先是输入和输出
一个不错的网站,可以在线查看模型拓扑结构 https://netron.app/
巨长的流程拓扑结构,小白暂时就只盯着输入和输出看了。
预处理和中间过程都很重要
1)预处理可以是一个很大的绊脚石
2)读取图像并将图像的颜色空间从 BGR 格式转换为 RGB 格式 ONNX 模型则期望输入是 RGB 格式;
3)图像大小resize,我训练就将图像用640了,所以需要 resize 到模型要求的输入尺寸;
4)归一化处理,将像素值归一化到 [0, 1] 区间。
5)调整图像通道顺序,一般从 HWC(Height, Width, Channel)转换为 CHW ( Channel,Height, Width,)格式,并增加一个批次维度,使其变为 NCHW 格式,N 为批次大小,通常设为 1。
最后特别感谢大佬的参考:https://blog.csdn.net/MariLN/article/details/144330414