1.结论
onnx推理比torch快3倍, openvino比onnx快一丢丢。
|
yolov7.pt 转 onnx
python export.py --weights best_31.pt --grid --end2end --simplify --topk-all 10 --iou-thres 0.65 --conf-thres 0.65 --img-size 320 320 --max-wh 200
可以看到yolov7的 onnx是包括nms的
2.onnx推理
# encoding=utf-8
import cv2
cuda = False
w = "best_31.onnx"
img = cv2.imread('3.png')
import cv2
import time
import requests
import random
import numpy as np
import onnxruntime as ort
from PIL import Image
from pathlib import Path
from collections import OrderedDict,namedtuple
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = ort.InferenceSession(w, providers=providers)
def letterbox(im, new_shape=(320, 320), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, r, (dw, dh)
names = ['box', 'box1']
colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(names)}
t1=time.time()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = img.copy()
image, ratio, dwdh = letterbox(image, auto=False)
image = image.transpose((2, 0, 1))
image = np.expand_dims(image, 0)
image = np.ascontiguousarray(image)
im = image.astype(np.float32)
im /= 255
outname = [i.name for i in session.get_outputs()]
inname = [i.name for i in session.get_inputs()]
inp = {inname[0]:im}
outputs = session.run(outname, inp)[0]
ori_images = [img.copy()]
for i,(batch_id,x0,y0,x1,y1,cls_id,score) in enumerate(outputs):
image = ori_images[int(batch_id)]
box = np.array([x0,y0,x1,y1])
box -= np.array(dwdh*2)
box /= ratio
box = box.round().astype(np.int32).tolist()
cls_id = int(cls_id)
score = round(float(score),3)
name = names[cls_id]
color = colors[name]
name += ' '+str(score)
cv2.rectangle(image,box[:2],box[2:],color,2)
cv2.putText(image,name,(box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2)
print(time.time()-t1)
cv2.imshow('img',image)
cv2.waitKey(0)
你去看export.py会发现好像他没加上nms ,其实他是在end2end的时候加进去的
class End2End(nn.Module):
'''export onnx or tensorrt model with NMS operation.'''
def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None):
super().__init__()
device = device if device else torch.device('cpu')
assert isinstance(max_wh,(int)) or max_wh is None
self.model = model.to(device)
self.model.model[-1].end2end = True
self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT
self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device)
self.end2end.eval()
def forward(self, x):
x = self.model(x)
x = self.end2end(x)
return x
其中报错找不到onnxsim
需要安装 pip install onnxsim
这个是把模型缩小的
3. openvino安装报错
安装的连接
https://docs.openvino.ai/2023.0/openvino_docs_install_guides_overview.html?ENVIRONMENT=DEV_TOOLS&OP_SYSTEM=WINDOWS&VERSION=v_2023_0_1&DISTRIBUTION=PIP
我们安装后报错
!!!!!DLL load failed while importing _pyopenvino
解决方法:
1.https://github.com/openvinotoolkit/openvino/issues/18151
他告诉我们要去pypi上看文档
https://pypi.org/project/openvino/
文档上说要安装MSVC runtime 还给了下载连接
https://aka.ms/vs/17/release/vc_redist.x64.exe
我安装了并重启电脑不行
- https://github.com/openvinotoolkit/openvino/issues/15403
他告诉我们要把 openvino的lib路径加在电脑的环境变量
Add the path \envs<your env
name>\Lib\site-packages\openvino\libs into your enviroment path.Reboot your terminal, and everything is ok…
3.重启电脑
4.openvino推理
这个代码是openvino提供的,不过他们把pt转onnx的时候没有加上nms。所以在代码里他们又加上的nms。这里我给去掉了。
4.1 onnx 转 openvino
# encoding=utf-8
from openvino.tools import mo
from openvino.runtime import serialize
model = mo.convert_model('best_tiny.onnx')
# serialize model for saving IR
serialize(model, 'best_tiny.xml')
4.2 openvino推理包括nms
# encoding=utf-8
import time
import numpy as np
import torch
from PIL import Image
from utils.datasets import letterbox
from utils.plots import plot_one_box
def preprocess_image(img0: np.ndarray):
"""
Preprocess image according to YOLOv7 input requirements.
Takes image in np.array format, resizes it to specific size using letterbox resize, converts color space from BGR (default in OpenCV) to RGB and changes data layout from HWC to CHW.
Parameters:
img0 (np.ndarray): image for preprocessing
Returns:
img (np.ndarray): image after preprocessing
img0 (np.ndarray): original image
"""
# resize
img = letterbox(img0,new_shape=(320,320), auto=False)[0]
# Convert
img = img.transpose(2, 0, 1)
img = np.ascontiguousarray(img)
return img, img0
def prepare_input_tensor(image: np.ndarray):
"""
Converts preprocessed image to tensor format according to YOLOv7 input requirements.
Takes image in np.array format with unit8 data in [0, 255] range and converts it to torch.Tensor object with float data in [0, 1] range
Parameters:
image (np.ndarray): image for conversion to tensor
Returns:
input_tensor (torch.Tensor): float tensor ready to use for YOLOv7 inference
"""
input_tensor = image.astype(np.float32) # uint8 to fp16/32
input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0
if input_tensor.ndim == 3:
input_tensor = np.expand_dims(input_tensor, 0)
return input_tensor
# label names for visualization
NAMES = ['box', 'box1']
# colors for visualization
COLORS = {name: [np.random.randint(0, 255) for _ in range(3)]
for i, name in enumerate(NAMES)}
from typing import List, Tuple, Dict
from utils.general import scale_coords, non_max_suppression
from openvino.runtime import Model
def detect(model: Model, image_path, conf_thres: float = 0.25, iou_thres: float = 0.45, classes: List[int] = None, agnostic_nms: bool = False):
"""
OpenVINO YOLOv7 model inference function. Reads image, preprocess it, runs model inference and postprocess results using NMS.
Parameters:
model (Model): OpenVINO compiled model.
image_path (Path): input image path.
conf_thres (float, *optional*, 0.25): minimal accpeted confidence for object filtering
iou_thres (float, *optional*, 0.45): minimal overlap score for remloving objects duplicates in NMS
classes (List[int], *optional*, None): labels for prediction filtering, if not provided all predicted labels will be used
agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not
Returns:
pred (List): list of detections with (n,6) shape, where n - number of detected boxes in format [x1, y1, x2, y2, score, label]
orig_img (np.ndarray): image before preprocessing, can be used for results visualization
inpjut_shape (Tuple[int]): shape of model input tensor, can be used for output rescaling
"""
output_blob = model.output(0)
img = np.array(Image.open(image_path))
preprocessed_img, orig_img = preprocess_image(img)
input_tensor = prepare_input_tensor(preprocessed_img)
t1 = time.time()
predictions = torch.from_numpy(model(input_tensor)[output_blob])
t2=time.time() - t1
# predictions = predictions.unsqueeze(0)
#pred = non_max_suppression(predictions, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
return predictions, orig_img, input_tensor.shape,t2
def draw_boxes(predictions: np.ndarray, input_shape: Tuple[int], image: np.ndarray, names: List[str], colors: Dict[str, int]):
"""
Utility function for drawing predicted bounding boxes on image
Parameters:
predictions (np.ndarray): list of detections with (n,6) shape, where n - number of detected boxes in format [x1, y1, x2, y2, score, label]
image (np.ndarray): image for boxes visualization
names (List[str]): list of names for each class in dataset
colors (Dict[str, int]): mapping between class name and drawing color
Returns:
image (np.ndarray): box visualization result
"""
if not len(predictions):
return image
# Rescale boxes from input size to original image size
predictions[:, 1:5] = scale_coords(input_shape[2:], predictions[:, 1:5], image.shape).round()
# Write results
for index,x1,y1,x2,y2, cls, conf in predictions:
label = f'{names[int(cls)]} {conf:.2f}'
plot_one_box([x1,y1,x2,y2], image, label=label, color=colors[names[int(cls)]], line_thickness=1)
return image
from openvino.runtime import Core
core = Core()
# read converted model
model = core.read_model('best_tiny.xml')
# load model on CPU device
compiled_model = core.compile_model(model, 'CPU')
t2=0
for i in range(100):
boxes, image, input_shape,t = detect(compiled_model, '3.png')
t2+=t
print(t2/100)
# image_with_boxes = draw_boxes(boxes, input_shape, image, NAMES, COLORS)
# # visualize results
# import cv2
# cv2.imshow('img',image_with_boxes)
# cv2.waitKey(0)
5.yolov5 在转onnx的时候加上nms
修改yolov5的 export.py 中代码如下
import torch.nn as nn
import random
class ORT_NMS(torch.autograd.Function):
'''ONNX-Runtime NMS operation'''
@staticmethod
def forward(ctx,
boxes,
scores,
max_output_boxes_per_class=torch.tensor([100]),
iou_threshold=torch.tensor([0.45]),
score_threshold=torch.tensor([0.25])):
device = boxes.device
batch = scores.shape[0]
num_det = random.randint(0, 100)
batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device)
idxs = torch.arange(100, 100 + num_det).to(device)
zeros = torch.zeros((num_det,), dtype=torch.int64).to(device)
selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous()
selected_indices = selected_indices.to(torch.int64)
return selected_indices
@staticmethod
def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold):
return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold)
class ONNX_ORT(nn.Module):
'''onnx module with ONNX-Runtime NMS operation.'''
def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None):
super().__init__()
self.device = device if device else torch.device("cpu")
self.max_obj = torch.tensor([max_obj]).to(device)
self.iou_threshold = torch.tensor([iou_thres]).to(device)
self.score_threshold = torch.tensor([score_thres]).to(device)
self.max_wh = max_wh # if max_wh != 0 : non-agnostic else : agnostic
self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
dtype=torch.float32,
device=self.device)
def forward(self, x):
boxes = x[:, :, :4]
conf = x[:, :, 4:5]
scores = x[:, :, 5:]
scores *= conf
boxes @= self.convert_matrix
max_score, category_id = scores.max(2, keepdim=True)
dis = category_id.float() * self.max_wh
nmsbox = boxes + dis
max_score_tp = max_score.transpose(1, 2).contiguous()
selected_indices = ORT_NMS.apply(nmsbox, max_score_tp, self.max_obj, self.iou_threshold, self.score_threshold)
X, Y = selected_indices[:, 0], selected_indices[:, 2]
selected_boxes = boxes[X, Y, :]
selected_categories = category_id[X, Y, :].float()
selected_scores = max_score[X, Y, :]
X = X.unsqueeze(1).float()
return torch.cat([X, selected_boxes, selected_categories, selected_scores], 1)
class End2End(nn.Module):
'''export onnx or tensorrt model with NMS operation.'''
def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None):
super().__init__()
device = device if device else torch.device('cpu')
assert isinstance(max_wh,(int)) or max_wh is None
self.model = model.to(device)
# self.model.model[-1].export = True
self.patch_model = ONNX_ORT
self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device)
self.end2end.eval()
def forward(self, x):
x = self.model(x)
x = self.end2end(x)
return x
@try_export
def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')):
# YOLOv5 ONNX export
check_requirements('onnx>=1.12.0')
import onnx
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
f = opt.weights.replace('.pt', '.onnx') # filename
output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0']
if dynamic:
dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640)
if isinstance(model, SegmentationModel):
dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160)
elif isinstance(model, DetectionModel):
dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
model = End2End(model, opt.topk_all, opt.iou_thres, opt.conf_thres, 200, torch.device('cpu'))
output_names = ['output']
torch.onnx.export(model, im, f, verbose=False, opset_version=12, input_names=['images'],
output_names=output_names,
dynamic_axes=None)
# Checks
model_onnx = onnx.load(f) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model
# # Metadata
# d = {'stride': int(max(model.stride)), 'names': model.names}
# for k, v in d.items():
# meta = model_onnx.metadata_props.add()
# meta.key, meta.value = k, str(v)
# onnx.save(model_onnx, f)
# Simplify
if simplify:
try:
# cuda = torch.cuda.is_available()
# check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))
import onnxsim
LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
model_onnx, check = onnxsim.simplify(model_onnx)
assert check, 'assert check failed'
onnx.save(model_onnx, f)
except Exception as e:
LOGGER.info(f'{prefix} simplifier failure: {e}')
return f, model_onnx
6. v5 v7 推理
def letterbox(
img: Optional[np.arange],
new_shape: List = (320, 320),
color=(114, 114, 114),
) -> None:
"""Resize and pad image while meeting stride-multiple constraints
Args:
img (_type_): _description_
new_shape (tuple, optional): _description_. Defaults to (256, 256).
color (tuple, optional): _description_. Defaults to (114, 114, 114).
Returns:
_type_: _description_
"""
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
) # add border
return img, r, (dw, dh)
class Model:
def __init__(self, model_path: str, cuda: bool) -> None:
"""Load model
Args:
model_path (str): _description_
cuda (str): _description_
"""
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
self.session = ort.InferenceSession(model_path, providers=providers)
def detect(
self,
img: Optional[np.array],
shape: List[int] = None,
) -> Optional[np.array]:
"""检测
Args:
img (Optional[np.array]): 图片
conf_threshold (str, optional): 置信度. Defaults to 0.25.
shape (List[int], optional): 图片大小. Defaults to None.
Returns:
Optional[np.array]: 一个大的box和2个小的box为一组
"""
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = img.copy()
if shape is None:
shape = [320, 320]
# 图片缩放
image, ratio, dwdh = letterbox(image, shape)
# Convert
# img=np.repeat(img[:, :, np.newaxis], 3, axis=2)
image = image.transpose((2, 0, 1)) # 3x416x416
image = np.expand_dims(image, 0)
image = np.ascontiguousarray(image)
im = image.astype(np.float32)
im /= 255
outname = ['output']
inp = {'images': im}
outputs = self.session.run(outname, inp)[0]
return outputs
if __name__ == "__main__":
model = Model(model_path="weights/yolov5n.onnx", cuda=False)
import os
names = ['box', 'box1']
colors = {name: [random.randint(0, 255) for _ in range(3)] for i, name in enumerate(names)}
for name in os.listdir("img"):
img = cv2.imread(os.path.join("img", name))
result = model.detect(img, shape=[320, 320])
for datas in result:
boxs = []
for data in datas:
box = data[:4].round().astype(np.int32).tolist()
cls_id = int(data[4])
score = round(float(data[5]), 3)
name = names[cls_id]
color = colors[name]
name += ' ' + str(score)
cv2.rectangle(img, box[:2], box[2:], color, 2)
cv2.putText(img, name, (box[0], box[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.75, [225, 255, 255],
thickness=2)
cv2.imshow("Lines", img)
cv2.waitKey(0)