1 环境:
CPU:i5-12500
2 安装Openvino和ONNXRuntime
2.1 Openvino简介
Openvino是由Intel开发的专门用于优化和部署人工智能推理的半开源的工具包,主要用于对深度推理做优化。
Openvino内部集成了Opencv、TensorFlow模块,除此之外它还具有强大的Plugin开发框架,允许开发者在Openvino之上对推理过程做优化。
Openvino整体框架为:Openvino前端→ Plugin中间层→ Backend后端
Openvino的优点在于它屏蔽了后端接口,提供了统一操作的前端API,开发者可以无需关心后端的实现,例如后端可以是TensorFlow、Keras、ARM-NN,通过Plugin提供给前端接口调用,也就意味着一套代码在Openvino之上可以运行在多个推理引擎之上,Openvino像是类似聚合一样的开发包。
2.2 ONNXRuntime简介
ONNXRuntime是微软推出的一款推理框架,用户可以非常便利的用其运行一个onnx模型。ONNXRuntime支持多种运行后端包括CPU,GPU,TensorRT,DML等。可以说ONNXRuntime是对ONNX模型最原生的支持。
虽然大家用ONNX时更多的是作为一个中间表示,从pytorch转到onnx后直接喂到TensorRT或MNN等各种后端框架,但这并不能否认ONNXRuntime是一款非常优秀的推理框架。而且由于其自身只包含推理功能(最新的ONNXRuntime甚至已经可以训练),通过阅读其源码可以解深度学习框架的一些核心功能原理(op注册,内存管理,运行逻辑等)
总体来看,整个ONNXRuntime的运行可以分为三个阶段,Session构造,模型加载与初始化和运行。和其他所有主流框架相同,ONNXRuntime最常用的语言是python,而实际负责执行框架运行的则是C++。
2.3 安装
pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install onnxruntime -i https://pypi.tuna.tsinghua.edu.cn/simple
3 准备YOLOv8s.onnx文件
YOLOv8官网
.pt文件转.onnx文件示例代码【注意自己转需要安装YOLOv8的环境】:
from ultralytics import YOLO
model = YOLO("yolov8s.pt") # load a pretrained model
path = model.export(format="onnx", dynamic=True) # export the mode l to ONNX format
4 Openvino和ONNXRuntime推理脚本
4.1 预处理
注:其中pad部分去除能减少预处理时间,且推理精度几乎一致。
def preprocess(image, img_h, img_w):
'''
Yolo系列算法通用预处理
'''
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
scale = max(image.shape[0] / img_h, image.shape[1] / img_w)
image = cv2.resize(image, (int(image.shape[1] / scale), int(image.shape[0] / scale)))
wpad = img_w - image.shape[1]
hpad = img_h - image.shape[0]
image_pad = np.ones((image.shape[0]+hpad, image.shape[1]+wpad, 3)) * 114.0
image_pad[:image.shape[0], :image.shape[1], :] = image
image_array = image_pad
image_array = image_array / 255.0
image_array = image_array.transpose((2, 0, 1))
image_array = image_array.astype(np.float32)
input_array = np.ascontiguousarray(np.expand_dims(image_array, 0))
return input_array, scale, image.shape[0], image.shape[1]
4.2 后处理
注:尝试多种后处理写法,该种写法速度最快。
def postprocess(pred, conf_thres, iou_thres, img_w, img_h):
"""
Args:
pred: np.array([(x, y, w, h, cls1_conf, cls2_conf, cls3_conf, ...), ...]), shape=(-1, 4 + num_cls)
conf_thres: 置信度阈值
iou_thres: IOU阀值,若两个box的交并比大于该值,则置信度较小的box将会被抑制
img_w: 原图w大小
img_h: 原图h大小
Returns:
out: 经过NMS后的值,np.array([(x, y, w, h, conf, cls), ...]), shape=(-1, 4 + 1 + 1)
"""
pred = np.squeeze(pred).transpose((1, 0)) # (1, 80+4, -1) -> (80+4, -1) -> (-1, 80+4)
# 按置信度过滤
conf = np.max(pred[..., 4:], axis=-1)
mask = conf >= conf_thres
# Where the score larger than score_threshold
box = pred[mask][..., :4]
confidences = conf[mask]
clsid = np.argmax(pred[mask][..., 4:], axis=1)
# 下面进行非极大抑制NMS处理
# 对box进行转换,以及对不同类别分不同区间处理
bounding_boxes = np.zeros_like(box)
bounding_boxes[:, 0] = (box[:, 0] - box[:, 2] / 2) + clsid * img_w # xmin + 每个类别分不同区间
bounding_boxes[:, 1] = (box[:, 1] - box[:, 3] / 2) + clsid * img_h # ymin + 每个类别分不同区间
bounding_boxes[:, 2] = box[:, 2] # w
bounding_boxes[:, 3] = box[:, 3] # h
# xywh2xyxy
bounding_boxes[:, 2] += bounding_boxes[:, 0]
bounding_boxes[:, 3] += bounding_boxes[:, 1]
if bounding_boxes.shape[0] != confidences.shape[0]:
raise ValueError("Bounding box 与 Confidence 的数量不一致")
if bounding_boxes.shape[0] == 0:
return []
bounding_boxes, confidences = bounding_boxes.astype(np.float32), np.array(confidences)
x1, y1, x2, y2 = bounding_boxes[:, 0], bounding_boxes[:, 1], bounding_boxes[:, 2], bounding_boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(confidences)
pick = []
while len(idxs) > 0:
# 因为idxs是从小到大排列的,last_idx相当于idxs最后一个位置的索引
last_idx = len(idxs) - 1
# 取出最大值在数组上的索引
max_value_idx = idxs[last_idx]
# 将这个添加到相应索引上
pick.append(max_value_idx)
xx1 = np.maximum(x1[max_value_idx], x1[idxs[: last_idx]])
yy1 = np.maximum(y1[max_value_idx], y1[idxs[: last_idx]])
xx2 = np.minimum(x2[max_value_idx], x2[idxs[: last_idx]])
yy2 = np.minimum(y2[max_value_idx], y2[idxs[: last_idx]])
w, h = np.maximum(0, xx2 - xx1 + 1), np.maximum(0, yy2 - yy1 + 1)
iou = w * h / areas[idxs[: last_idx]]
# 删除最大的value,并且删除iou > threshold的bounding boxes
idxs = np.delete(idxs, np.concatenate(([last_idx], np.where(iou > iou_thres)[0])))
out = np.concatenate([box[pick], confidences[pick].reshape(-1, 1), clsid[pick].reshape(-1, 1)], axis=1)
return out
4.3 全部代码
import os
import time
# openvino速度比onnxruntime快一倍
from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple
import onnxruntime as rt # 使用onnxruntime推理用上,pip install onnxruntime
import numpy as np
import cv2
def preprocess(image, img_h, img_w):
'''
Yolo系列算法通用预处理
'''
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
scale = max(image.shape[0] / img_h, image.shape[1] / img_w)
image = cv2.resize(image, (int(image.shape[1] / scale), int(image.shape[0] / scale)))
wpad = img_w - image.shape[1]
hpad = img_h - image.shape[0]
image_pad = np.ones((image.shape[0]+hpad, image.shape[1]+wpad, 3)) * 114.0
image_pad[:image.shape[0], :image.shape[1], :] = image
image_array = image_pad
image_array = image_array / 255.0
image_array = image_array.transpose((2, 0, 1))
image_array = image_array.astype(np.float32)
input_array = np.ascontiguousarray(np.expand_dims(image_array, 0))
return input_array, scale, image.shape[0], image.shape[1]
def postprocess(pred, conf_thres, iou_thres, img_w, img_h):
"""
Args:
pred: np.array([(x, y, w, h, cls1_conf, cls2_conf, cls3_conf, ...), ...]), shape=(-1, 4 + num_cls)
conf_thres: 置信度阈值
iou_thres: IOU阀值,若两个box的交并比大于该值,则置信度较小的box将会被抑制
img_w: 原图w大小
img_h: 原图h大小
Returns:
out: 经过NMS后的值,np.array([(x, y, w, h, conf, cls), ...]), shape=(-1, 4 + 1 + 1)
"""
pred = np.squeeze(pred).transpose((1, 0)) # (1, 80+4, -1) -> (80+4, -1) -> (-1, 80+4)
# 按置信度过滤
conf = np.max(pred[..., 4:], axis=-1)
mask = conf >= conf_thres
# Where the score larger than score_threshold
box = pred[mask][..., :4]
confidences = conf[mask]
clsid = np.argmax(pred[mask][..., 4:], axis=1)
# 下面进行非极大抑制NMS处理
# 对box进行转换,以及对不同类别分不同区间处理
bounding_boxes = np.zeros_like(box)
bounding_boxes[:, 0] = (box[:, 0] - box[:, 2] / 2) + clsid * img_w # xmin + 每个类别分不同区间
bounding_boxes[:, 1] = (box[:, 1] - box[:, 3] / 2) + clsid * img_h # ymin + 每个类别分不同区间
bounding_boxes[:, 2] = box[:, 2] # w
bounding_boxes[:, 3] = box[:, 3] # h
# xywh2xyxy
bounding_boxes[:, 2] += bounding_boxes[:, 0]
bounding_boxes[:, 3] += bounding_boxes[:, 1]
if bounding_boxes.shape[0] != confidences.shape[0]:
raise ValueError("Bounding box 与 Confidence 的数量不一致")
if bounding_boxes.shape[0] == 0:
return []
bounding_boxes, confidences = bounding_boxes.astype(np.float32), np.array(confidences)
x1, y1, x2, y2 = bounding_boxes[:, 0], bounding_boxes[:, 1], bounding_boxes[:, 2], bounding_boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(confidences)
pick = []
while len(idxs) > 0:
# 因为idxs是从小到大排列的,last_idx相当于idxs最后一个位置的索引
last_idx = len(idxs) - 1
# 取出最大值在数组上的索引
max_value_idx = idxs[last_idx]
# 将这个添加到相应索引上
pick.append(max_value_idx)
xx1 = np.maximum(x1[max_value_idx], x1[idxs[: last_idx]])
yy1 = np.maximum(y1[max_value_idx], y1[idxs[: last_idx]])
xx2 = np.minimum(x2[max_value_idx], x2[idxs[: last_idx]])
yy2 = np.minimum(y2[max_value_idx], y2[idxs[: last_idx]])
w, h = np.maximum(0, xx2 - xx1 + 1), np.maximum(0, yy2 - yy1 + 1)
iou = w * h / areas[idxs[: last_idx]]
# 删除最大的value,并且删除iou > threshold的bounding boxes
idxs = np.delete(idxs, np.concatenate(([last_idx], np.where(iou > iou_thres)[0])))
out = np.concatenate([box[pick], confidences[pick].reshape(-1, 1), clsid[pick].reshape(-1, 1)], axis=1)
return out
def draw(img, xscale, yscale, pred, color=(255, 0, 0), tmp=True):
img_ = img.copy()
if len(pred):
for detect in pred:
caption = str('{:.2f}_{}'.format(detect[4], int(detect[5])))
detect = [int((detect[0] - detect[2] / 2) * xscale), int((detect[1] - detect[3] / 2) * yscale),
int((detect[0] + detect[2] / 2) * xscale), int((detect[1] + detect[3] / 2) * yscale)]
img_ = cv2.rectangle(img, (detect[0], detect[1]), (detect[2], detect[3]), color, 2)
# 是否显示置信度类别
if tmp:
cv2.putText(img, caption, (detect[0], detect[1] - 5), 0, 1, color, 2, 16)
return img_
class OpenvinoInference(object):
def __init__(self, onnx_path):
self.onnx_path = onnx_path
ie = Core()
self.model_onnx = ie.read_model(model=self.onnx_path)
self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
self.output_layer_onnx = self.compiled_model_onnx.output(0)
def predirts(self, datas):
predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
return predict_data
if __name__ == '__main__':
height, width = 640, 640 # 修改1:图像resize大小
conf, nms_iou = 0.15, 0.6 # 修改2:置信度阈值与nms的iou阈值
openvino_tmp = True # 修改3:是否进行openvino推理,False为onnxruntime推理
onnx_path = 'D:\\C++\\yolov8s.onnx' # 修改4:onnx文件路径
input_path = 'D:\\C++\\bus.jpg' # 修改5:原图路径
output_path = 'D:\\C++\\out.jpg' # 修改6:图像保存路径
img = cv2.imread(input_path)
if openvino_tmp:
model = OpenvinoInference(onnx_path)
else:
sess = rt.InferenceSession(onnx_path)
t1 = time.time()
data, scale, img_w, img_h = preprocess(img, height, width) # resize_img
print('预处理时间:{:.3f}s'.format(time.time() - t1))
t2 = time.time()
if openvino_tmp:
pred = model.predirts(data)
else:
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred = sess.run([label_name], {input_name: data.astype(np.float32)})[0]
print('推理时间:{:.3f}s'.format(time.time() - t2))
t3 = time.time()
result = postprocess(pred, conf, nms_iou, img_w, img_h)
print('后处理时间:{:.3f}s'.format(time.time() - t3))
ret_img = draw(img, scale, scale, result, color=(0, 255, 0), tmp=True)
cv2.imwrite(output_path, ret_img)
5 结果
具体时间消耗:
预处理时间:0.014s(预处理无Pad为0.007s)
推理时间:0.08s
后处理时间:0.001s
注:640×640下,Openvino和ONNXRuntime推理速度相差不大,1280×1280下,Openvino速度更快。