YOLOV8训练好的best.pt模型转best.onnx并部署成python可调用

今天这篇博文是学习大佬作品以后，执行我的需求后的总结，做了一些代码调整，就此记录一下，非常感谢大佬提供如此好的输出。
已知yolov8 训练好的模型一般是pt格式，比如best.pt，现在我期望这个模型可以转成可以部署的格式，不那么明晃晃地调用yolo，于是乎就查到可以转成onnx格式。
1、onnx是什么格式
在这里插入图片描述
好像是一堆废话，就是可以用 ONNX Runtime 加载，还有高版本的OpenCV的dnn 也可以加载。
2、基于python 加载onnx模型
（1）将best.pt转成 best.onnx

from ultralytics import YOLO

# 加载训练好的 YOLOv8 模型
model = YOLO('E:/skin_yolo/runs/detect/spot_detection60/weights/best.pt')

# 导出为 ONNX 格式
#model.export(format='onnx')

model.export(format='onnx', imgsz=640)#我的输入是图像尺寸固定的640*640，所以我写死了

(2)python加载模型并做目标检测
首先需要安装onnxruntime、numpy、cv2等库。如果使用 GPU 进行推理，还需安装onnxruntime-gpu。

test_detector.py

#基于yolo模型检测皮肤图像上的目标
#2025-01-06

import cv2

#引用文件中的函数
from targetDetect import TargetDetection
from forDraw import draw_detections

# yolov8 onnx 模型推理
class YOLOV8NDetector:
    def __init__(self,model_path):
        super(YOLOV8NDetector, self).__init__()
        self.model_path = model_path
        self.detector = TargetDetection(self.model_path, conf_thres=0.5, iou_thres=0.3)

    def detect_image(self, input_image, output_image):
        cv_img = cv2.imread(input_image)
        boxes, scores, class_ids = self.detector.detect_objects(cv_img)
        cv_img = draw_detections(cv_img, boxes, scores, class_ids)
        cv2.namedWindow("output", cv2.WINDOW_NORMAL)
        cv2.imwrite(output_image, cv_img)
        cv2.imshow('output', cv_img)
        cv2.waitKey(0)

    def detect_video(self, input_video, output_video):
        cap = cv2.VideoCapture(input_video)
        fps = int(cap.get(5))
        videoWriter = None
        while True:
            _, cv_img = cap.read()
            if cv_img is None:
                break
            boxes, scores, class_ids = self.detector.detect_objects(cv_img)
            cv_img = draw_detections(cv_img, boxes, scores, class_ids)

            # 如果视频写入器未初始化，则使用输出视频路径和参数进行初始化
            if videoWriter is None:
                fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
                # 在这里给值了，它就不是None, 下次判断它就不进这里了
                videoWriter = cv2.VideoWriter(output_video, fourcc, fps, (cv_img.shape[1], cv_img.shape[0]))

            videoWriter.write(cv_img)
            cv2.imshow("aod", cv_img)
            cv2.waitKey(5)

            # 等待按键并检查窗口是否关闭
            if cv2.getWindowProperty("aod", cv2.WND_PROP_AUTOSIZE) < 1:
                # 点x退出
                break
        cap.release()
        videoWriter.release()
        cv2.destroyAllWindows()



if __name__ == '__main__':

    modelpath ="E:/skin_yolo/runs/detect/spot_detection60/weights/best.onnx"#模型路径
    det = YOLOV8NDetector(modelpath)

    #检测图片时调用
    input_image = "E:/Skin_Color/skin_pic/test/12/test.jpg"
    output_image = 'E:/Skin_Color/skin_pic/test/12/test_out.jpg'
    det.detect_image(input_image, output_image)

    #检测视频是调用
    # input_video = r"E:\yolodataset\video\A13.mp4"
    # output_video = "../testdata/fortest.mp4"
    # det.detect_video(input_video, output_video)

可以看出上面的代码依赖两个文件：targetDetect.py 和 forDraw .py

targetDetect.py 中定义了检测目标处理，forDraw .py 中定义了一些目标画框。

targetDetect.py

import time
import cv2
import numpy as np
import onnxruntime

#引用文件中的函数
from forDraw import xywh2xyxy, draw_detections,nms # 单类目标用nms , 多类目标用multiclass_nms


class TargetDetection:
    def __init__(self, path, conf_thres=0.7, iou_thres=0.5):
        self.conf_threshold = conf_thres
        self.iou_threshold = iou_thres

        # Initialize model
        self.initialize_model(path)

    def __call__(self, image):
        return self.detect_objects(image)

    def initialize_model(self, path):
        self.session = onnxruntime.InferenceSession(path, providers=onnxruntime.get_available_providers())
        # Get model info
        self.get_input_details()
        self.get_output_details()

    def detect_objects(self, image):
        input_tensor = self.prepare_input(image)

        # Perform inference on the image
        outputs = self.inference(input_tensor)

        self.boxes, self.scores, self.class_ids = self.process_output(outputs)

        return self.boxes, self.scores, self.class_ids

    def prepare_input(self, image):
        self.img_height, self.img_width = image.shape[:2]

        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Resize input image
        input_img = cv2.resize(input_img, (self.input_width, self.input_height))

        # Scale input pixel values to 0 to 1
        input_img = input_img / 255.0
        input_img = input_img.transpose(2, 0, 1)
        input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
        return input_tensor

    def inference(self, input_tensor):
        start = time.perf_counter()
        outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})

        # print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")
        return outputs

    def process_output(self, output):
        predictions = np.squeeze(output[0]).T

        # Filter out object confidence scores below threshold
        scores = np.max(predictions[:, 4:], axis=1)
        predictions = predictions[scores > self.conf_threshold, :]
        scores = scores[scores > self.conf_threshold]

        if len(scores) == 0:
            return [], [], []

        # Get the class with the highest confidence
        class_ids = np.argmax(predictions[:, 4:], axis=1)

        # Get bounding boxes for each object
        boxes = self.extract_boxes(predictions)

        # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
        indices = nms(boxes, scores, self.iou_threshold)#我的目标只有一个类
        #indices = multiclass_nms(boxes, scores, class_ids, self.iou_threshold)#多类

        return boxes[indices], scores[indices], class_ids[indices]

    def extract_boxes(self, predictions):
        # Extract boxes from predictions
        boxes = predictions[:, :4]

        # Scale boxes to original image dimensions
        boxes = self.rescale_boxes(boxes)

        # Convert boxes to xyxy format
        boxes = xywh2xyxy(boxes)

        return boxes

    def rescale_boxes(self, boxes):
        # Rescale boxes to original image dimensions
        input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
        boxes = np.divide(boxes, input_shape, dtype=np.float32)
        boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
        return boxes

    def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):
        return draw_detections(image, self.boxes, self.scores,
                               self.class_ids, mask_alpha)

    def get_input_details(self):
        model_inputs = self.session.get_inputs()
        self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

        self.input_shape = model_inputs[0].shape
        self.input_height = self.input_shape[2]
        self.input_width = self.input_shape[3]

        print(self.input_width, self.input_height)

    def get_output_details(self):
        model_outputs = self.session.get_outputs()
        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]

forDraw .py

import numpy as np
import cv2

class_names = ['spot'] #我的类标记

# Create a list of colors for each class where each color is a tuple of class number integer values

rng = np.random.default_rng(1)#此处是1，我的目标只有一个分类
colors = rng.uniform(0, 255, size=(len(class_names), 1))#此处是1，我的目标只有一个分类


def nms(boxes, scores, iou_threshold):
    # 根据 scores 对检测框从高到低进行排序，得到排序后的索引
    sorted_indices = np.argsort(scores)[::-1]  # [::-1] 反转排序顺序

    keep_boxes = []
    while sorted_indices.size > 0:
        # 保留最高分数的边界框
        box_id = sorted_indices[0]
        keep_boxes.append(box_id)

        # 计算当前最高分数的边界框与剩余边界框的 IoU
        ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])

        # 找出 IoU 小于阈值的边界框索引，保留这些框，过滤重叠框
        keep_indices = np.where(ious < iou_threshold)[0]

        # 注意：由于 keep_indices 是相对于 sorted_indices[1:] 的索引，
        # 需要将其整体偏移 +1 来匹配到原始 sorted_indices
        sorted_indices = sorted_indices[keep_indices + 1]

    return keep_boxes

def multiclass_nms(boxes, scores, class_ids, iou_threshold):
    # 获取所有唯一的类别索引
    unique_class_ids = np.unique(class_ids)

    keep_boxes = []  # 存储最终保留的边界框索引

    for class_id in unique_class_ids:
        # 筛选出属于当前类别的边界框索引
        class_indices = np.where(class_ids == class_id)[0]  # np.where返回元组

        # 提取属于当前类别的边界框和分数
        class_boxes = boxes[class_indices, :]  # 当前类别的边界框
        class_scores = scores[class_indices]  # 当前类别的分数

        # 执行 NMS 并获取保留下来的索引
        class_keep_boxes = nms(class_boxes, class_scores, iou_threshold)

        # 将保留的索引（对应原始的索引）添加到结果中
        keep_boxes.extend(class_indices[class_keep_boxes])

    return keep_boxes
    
def compute_iou(box, boxes):
    # 计算交集区域的坐标，xmin 和 ymin: 交集左上角的坐标，xmax 和 ymax: 交集右下角的坐标
    xmin = np.maximum(box[0], boxes[:, 0])
    ymin = np.maximum(box[1], boxes[:, 1])
    xmax = np.minimum(box[2], boxes[:, 2])
    ymax = np.minimum(box[3], boxes[:, 3])

    # 计算交集区域面积，如果两个框没有重叠，交集宽度和高度会为负，使用 np.maximum 保证面积非负
    intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)

    # 计算每个边界框的面积
    box_area = (box[2] - box[0]) * (box[3] - box[1])
    boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

    # 计算并集区域面积
    union_area = box_area + boxes_area - intersection_area

    # 计算 IoU（交并比）
    iou = intersection_area / union_area  # 交集区域面积 / 并集区域面积

    return iou

def xywh2xyxy(x):
    # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
    # 将边界框从 (x_center, y_center, w, h) 格式转换为 (x1, y1, x2, y2)
    y = np.copy(x)
    # 计算左上角坐标 x1 和 y1
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    # 计算右下角坐标 x2 和 y2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y


def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
    #画检测目标
    det_img = image.copy()

    img_height, img_width = image.shape[:2]
    font_size = min([img_height, img_width]) * 0.0006
    text_thickness = int(min([img_height, img_width]) * 0.001)

    det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)

    # Draw bounding boxes and labels of detections
    for class_id, box, score in zip(class_ids, boxes, scores):
        color = colors[class_id]

        draw_box(det_img, box, color)

        label = class_names[class_id]
        caption = f'{label} {int(score * 100)}%'
        draw_text(det_img, caption, box, color, font_size, text_thickness)

    return det_img


def draw_box(image: np.ndarray, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),
             thickness: int = 2) -> np.ndarray:
    x1, y1, x2, y2 = box.astype(int)
    return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)


def draw_text(image: np.ndarray, text: str, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),
              font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:
    #显示注释
    x1, y1, x2, y2 = box.astype(int)
    (tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                  fontScale=font_size, thickness=text_thickness)
    th = int(th * 1.2)#线宽

    cv2.rectangle(image, (x1, y1),(x1 + tw, y1 - th), color, -1)#画注释框

    return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,cv2.LINE_AA)


def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:
    mask_img = image.copy()
    # 画检测到的目标框
    for box, class_id in zip(boxes, classes):
        color = colors[class_id]

        x1, y1, x2, y2 = box.astype(int)

        cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)

    # return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)#返回半透明框
    return image #返回全透明框

看看处理结果
在这里插入图片描述
（3）一些感慨
看模型流程
yolo模型导出以后，要加载处理其实还是需要理解透彻模型的过程，首先是输入和输出
一个不错的网站，可以在线查看模型拓扑结构 https://netron.app/
巨长的流程拓扑结构，小白暂时就只盯着输入和输出看了。
在这里插入图片描述
预处理和中间过程都很重要
1)预处理可以是一个很大的绊脚石
2)读取图像并将图像的颜色空间从 BGR 格式转换为 RGB 格式 ONNX 模型则期望输入是 RGB 格式；
3)图像大小resize，我训练就将图像用640了，所以需要 resize 到模型要求的输入尺寸；
4)归一化处理，将像素值归一化到 [0, 1] 区间。
5)调整图像通道顺序，一般从 HWC（Height, Width, Channel）转换为 CHW ( Channel,Height, Width,）格式，并增加一个批次维度，使其变为 NCHW 格式，N 为批次大小，通常设为 1。