onnxruntime推理
导出onnx模型:
from ultralytics import YOLO
model = YOLO("yolov8n-obb.pt")
model.export(format="onnx")
onnx模型结构如下:
python推理代码:
import cv2
import math
import numpy as np
import onnxruntime
class_names = ["plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor", "bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool"]
input_shape = (1024, 1024)
score_threshold = 0.1
nms_threshold = 0.4
confidence_threshold = 0.1
def nms(boxes, scores, score_threshold, nms_threshold):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (y2 - y1 + 1) * (x2 - x1 + 1)
keep = []
index = scores.argsort()[::-1]
while index.size > 0:
i = index[0]
keep.append(i)
x11 = np.maximum(x1[i], x1[index[1:]])
y11 = np.maximum(y1[i], y1[index[1:]])
x22 = np.minimum(x2[i], x2[index[1:]])
y22 = np.minimum(y2[i], y2[index[1:]])
w = np.maximum(0, x22 - x11 + 1)
h = np.maximum(0, y22 - y11 + 1)
overlaps = w * h
ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
idx = np.where(ious <= nms_threshold)[0]
index = index[idx + 1]
return keep
def xywh2xyxy(x):
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def filter_box(outputs): #过滤掉无用的框
outputs = np.squeeze(outputs)
rotated_boxes = []
scores = []
class_ids = []
classes_scores = outputs[4:(4+len(class_names)), ...]
angles = outputs[-1, ...]
for i in range(outputs.shape[1]):
class_id = np.argmax(classes_scores[...,i])
score = classes_scores[class_id][i]
angle = angles[i]
if 0.5 * math.pi <= angle <= 0.75 * math.pi:
angle -= math.pi
if score > score_threshold:
rotated_boxes.append(np.concatenate([outputs[:4, i], np.array([score, class_id, angle * 180 / math.pi])]))
scores.append(score)
class_ids.append(class_id)
rotated_boxes = np.array(rotated_boxes)
boxes = xywh2xyxy(rotated_boxes)
scores = np.array(scores)
indices = nms(boxes, scores, score_threshold, nms_threshold)
output = rotated_boxes[indices]
return output
def letterbox(im, new_shape=(416, 416), color=(114, 114, 114)):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = (new_shape[1] - new_unpad[0])/2, (new_shape[0] - new_unpad[1])/2 # wh padding
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im
def scale_boxes(boxes, shape):
# Rescale boxes (xyxy) from input_shape to shape
gain = min(input_shape[0] / shape[0], input_shape[1] / shape[1]) # gain = old / new
pad = (input_shape[1] - shape[1] * gain) / 2, (input_shape[0] - shape[0] * gain) / 2 # wh padding
boxes[..., [0, 1]] -= pad # xy padding
boxes[..., :4] /= gain
return boxes
def draw(image, box_data):
box_data = scale_boxes(box_data, image.shape)
boxes = box_data[...,:4]
scores = box_data[...,4]
classes = box_data[...,5].astype(np.int32)
angles = box_data[...,6]
for box, score, cl, angle in zip(boxes, scores, classes, angles):
rotate_box = ((box[0], box[1]), (box[2], box[3]), angle)
points = cv2.boxPoints(rotate_box)
points = np.int0(points)
cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=1)
cv2.putText(image, '{0} {1:.2f}'.format(class_names[cl], score), (points[0][0], points[0][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
if __name__=="__main__":
image = cv2.imread('airport.jpg', -1)
input = letterbox(image, input_shape)
input = input[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32) #BGR2RGB和HWC2CHW
input = input / 255.0
input_tensor = []
input_tensor.append(input)
onnx_session = onnxruntime.InferenceSession('yolov8n-obb.onnx', providers=['CPUExecutionProvider', 'CUDAExecutionProvider'])
input_name = []
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name = []
for node in onnx_session.get_outputs():
output_name.append(node.name)
inputs = {}
for name in input_name:
inputs[name] = np.array(input_tensor)
outputs = onnx_session.run(None, inputs)[0]
print(outputs.shape)
boxes = filter_box(outputs)
draw(image, boxes)
cv2.imwrite('result.jpg', image)
C++推理代码:
#include <iostream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
const std::vector<std::string> class_names = {
"plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor",
"bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool" };
const int input_width = 1024;
const int input_height = 1024;
const float score_threshold = 0.1;
const float nms_threshold = 0.5;
const int num_classes = class_names.size();
const int output_numprob = 5 + num_classes;
const int output_numbox = input_width / 8 * input_height / 8 + input_width / 16 * input_height / 16 + input_width / 32 * input_height / 32;
//LetterBox处理
void LetterBox(const cv::Mat& image, cv::Mat& outImage,
const cv::Size& newShape = cv::Size(640, 640), const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
float ratio[2]{ r, r };
int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };
auto dw = (float)(newShape.width - new_un_pad[0]) / 2;
auto dh = (float)(newShape.height - new_un_pad[1]) / 2;
if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
else
outImage = image.clone();
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
cv::Vec4d params;
params[0] = ratio[0];
params[1] = ratio[1];
params[2] = left;
params[3] = top;
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
//预处理
void pre_process(cv::Mat& image, std::vector<float>& inputs)
{
cv::Vec4d params;
cv::Mat letterbox;
LetterBox(image, letterbox, cv::Size(input_width, input_height));
cv::cvtColor(letterbox, letterbox, cv::COLOR_BGR2RGB);
letterbox.convertTo(letterbox, CV_32FC3, 1.0f / 255.0f);
std::vector<cv::Mat> split_images;
cv::split(letterbox, split_images);
for (size_t i = 0; i < letterbox.channels(); ++i)
{
std::vector<float> split_image_data = split_images[i].reshape(1, 1);
inputs.insert(inputs.end(), split_image_data.begin(), split_image_data.end());
}
}
//网络推理
void process(const wchar_t* model, std::vector<float>& inputs, std::vector<Ort::Value>& outputs)
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "yolov5n");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(12);//设置线程数
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);//启用模型优化策略
//CUDA option set
//OrtCUDAProviderOptions cuda_option;
//cuda_option.device_id = 0;
//cuda_option.arena_extend_strategy = 0;
//cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
//cuda_option.gpu_mem_limit = SIZE_MAX;
//cuda_option.do_copy_in_default_stream = 1;
//session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
//session_options.AppendExecutionProvider_CUDA(cuda_option);
Ort::Session session(env, model, session_options);
std::vector<const char*> input_node_names;
for (size_t i = 0; i < session.GetInputCount(); i++)
{
input_node_names.push_back("images");
}
std::vector<const char*> output_node_names;
for (size_t i = 0; i < session.GetOutputCount(); i++)
{
output_node_names.push_back("output0");
}
// create input tensor object from data values
std::vector<int64_t> input_node_dims = { 1, 3, input_width, input_height };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, inputs.data(), inputs.size(), input_node_dims.data(), input_node_dims.size());
std::vector<Ort::Value> ort_inputs;
ort_inputs.push_back(std::move(input_tensor));//右值引用,避免不必要的拷贝和内存分配操作
// score model & input tensor, get back output tensor
outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
}
//NMS
void nms(std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold, std::vector<int>& indices)
{
struct BoxScore
{
cv::Rect box;
float score;
int id;
};
std::vector<BoxScore> boxes_scores;
for (size_t i = 0; i < boxes.size(); i++)
{
BoxScore box_conf;
box_conf.box = boxes[i];
box_conf.score = scores[i];
box_conf.id = i;
if (scores[i] > score_threshold) boxes_scores.push_back(box_conf);
}
std::sort(boxes_scores.begin(), boxes_scores.end(), [](BoxScore a, BoxScore b) { return a.score > b.score; });
std::vector<float> area(boxes_scores.size());
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
area[i] = boxes_scores[i].box.width * boxes_scores[i].box.height;
}
std::vector<bool> isSuppressed(boxes_scores.size(), false);
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
if (isSuppressed[i]) continue;
for (size_t j = i + 1; j < boxes_scores.size(); ++j)
{
if (isSuppressed[j]) continue;
float x1 = (std::max)(boxes_scores[i].box.x, boxes_scores[j].box.x);
float y1 = (std::max)(boxes_scores[i].box.y, boxes_scores[j].box.y);
float x2 = (std::min)(boxes_scores[i].box.x + boxes_scores[i].box.width, boxes_scores[j].box.x + boxes_scores[j].box.width);
float y2 = (std::min)(boxes_scores[i].box.y + boxes_scores[i].box.height, boxes_scores[j].box.y + boxes_scores[j].box.height);
float w = (std::max)(0.0f, x2 - x1);
float h = (std::max)(0.0f, y2 - y1);
float inter = w * h;
float ovr = inter / (area[i] + area[j] - inter);
if (ovr >= nms_threshold) isSuppressed[j] = true;
}
}
for (int i = 0; i < boxes_scores.size(); ++i)
{
if (!isSuppressed[i]) indices.push_back(boxes_scores[i].id);
}
}
//box缩放到原图尺寸
void scale_box(cv::Rect& box, cv::Size size)
{
float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
int pad_w = (input_width - size.width * gain) / 2;
int pad_h = (input_height - size.height * gain) / 2;
box.x -= pad_w;
box.y -= pad_h;
box.x /= gain;
box.y /= gain;
box.width /= gain;
box.height /= gain;
}
void scale_box(cv::RotatedRect& rotated_box, cv::Size size)
{
float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
int pad_w = (input_width - size.width * gain) / 2;
int pad_h = (input_height - size.height * gain) / 2;
rotated_box.center -= cv::Point2f(pad_w, pad_h);
rotated_box.center /= gain;
rotated_box.size /= gain;
}
//可视化函数
void draw_result(cv::Mat& image, std::string label, cv::RotatedRect rotated_box)
{
cv::Mat points;
cv::boxPoints(rotated_box, points);
cv::Point2f vertices[4];
for (int i = 0; i < 4; i++)
{
vertices[i] = cv::Point2f(points.at<float>(i, 0), points.at<float>(i, 1));
}
for (int i = 0; i < 4; i++)
{
cv::line(image, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1);
}
cv::putText(image, label, cv::Point(rotated_box.center), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 1);
}
//后处理
void post_process(cv::Mat& image, cv::Mat& result, std::vector<Ort::Value>& outputs)
{
std::vector<cv::Rect> boxes;
std::vector<cv::RotatedRect> rotated_boxes;
std::vector<float> scores;
std::vector<int> class_ids;
for (int i = 0; i < output_numbox; i++)
{
std::vector<float> classes_scores(num_classes);
for (int j = 0; j < num_classes; j++)
{
float* ptr = const_cast<float*> (outputs[0].GetTensorData<float>()) + (4 + j) * output_numbox + i;
classes_scores[j] = ptr[0];
}
int class_id = std::max_element(classes_scores.begin(), classes_scores.end()) - classes_scores.begin();
float score = classes_scores[class_id];
if (score < score_threshold)
continue;
float x = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 0 * output_numbox + i)[0];
float y = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 1 * output_numbox + i)[0];
float w = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 2 * output_numbox + i)[0];
float h = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 3 * output_numbox + i)[0];
float angle = (const_cast<float*> (outputs[0].GetTensorData<float>()) + (4 + num_classes) * output_numbox + i)[0];
int left = int(x - 0.5 * w);
int top = int(y - 0.5 * h);
int width = int(w);
int height = int(h);
cv::Rect box = cv::Rect(left, top, width, height);
scale_box(box, image.size());
boxes.push_back(box);
scores.push_back(score);
class_ids.push_back(class_id);
cv::RotatedRect rotated_box;
rotated_box.center = cv::Point2f(x, y) ;
rotated_box.size = cv::Size2f(w, h);
if (angle >= 0.5 * CV_PI && angle <= 0.75 * CV_PI)
angle -= CV_PI;
rotated_box.angle = angle * 180 / CV_PI;
scale_box(rotated_box, image.size());
rotated_boxes.push_back(rotated_box);
}
std::vector<int> indices;
nms(boxes, scores, score_threshold, nms_threshold, indices);
for (int i = 0; i < indices.size(); i++)
{
int idx = indices[i];
cv::RotatedRect rotated_box = rotated_boxes[idx];
std::string label = class_names[class_ids[idx]] + ":" + cv::format("%.2f", scores[idx]); //class_ids[idx]是class_id
draw_result(result, label, rotated_box);
}
}
int main(int argc, char* argv[])
{
cv::Mat image = cv::imread("airport.jpg");
std::vector<float> inputs;
pre_process(image, inputs);
const wchar_t* model = L"yolov8n-obb.onnx";
std::vector<Ort::Value> outputs;
process(model, inputs, outputs);
cv::Mat result = image.clone();
post_process(image, result, outputs);
cv::imwrite("result.jpg", result);
return 0;
}
tensorrt推理
通过trtexec转换onnx模型得到engine模型:
./trtexec.exe --onnx=yolov8n-obb.onnx --saveEngine=yolov8n-obb.engine
python推理代码:
import cv2
import math
import numpy as np
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
class_names = ["plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor", "bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool"]
input_shape = (1024, 1024)
score_threshold = 0.1
nms_threshold = 0.4
def nms(boxes, scores, score_threshold, nms_threshold):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (y2 - y1 + 1) * (x2 - x1 + 1)
keep = []
index = scores.argsort()[::-1]
while index.size > 0:
i = index[0]
keep.append(i)
x11 = np.maximum(x1[i], x1[index[1:]])
y11 = np.maximum(y1[i], y1[index[1:]])
x22 = np.minimum(x2[i], x2[index[1:]])
y22 = np.minimum(y2[i], y2[index[1:]])
w = np.maximum(0, x22 - x11 + 1)
h = np.maximum(0, y22 - y11 + 1)
overlaps = w * h
ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
idx = np.where(ious <= nms_threshold)[0]
index = index[idx + 1]
return keep
def xywh2xyxy(x):
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def filter_box(outputs): #过滤掉无用的框
outputs = np.squeeze(outputs)
rotated_boxes = []
scores = []
class_ids = []
classes_scores = outputs[4:(4+len(class_names)), ...]
angles = outputs[-1, ...]
for i in range(outputs.shape[1]):
class_id = np.argmax(classes_scores[...,i])
score = classes_scores[class_id][i]
angle = angles[i]
if 0.5 * math.pi <= angle <= 0.75 * math.pi:
angle -= math.pi
if score > score_threshold:
rotated_boxes.append(np.concatenate([outputs[:4, i], np.array([score, class_id, angle * 180 / math.pi])]))
scores.append(score)
class_ids.append(class_id)
rotated_boxes = np.array(rotated_boxes)
boxes = xywh2xyxy(rotated_boxes)
scores = np.array(scores)
indices = nms(boxes, scores, score_threshold, nms_threshold)
output = rotated_boxes[indices]
return output
def letterbox(im, new_shape=(416, 416), color=(114, 114, 114)):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = (new_shape[1] - new_unpad[0])/2, (new_shape[0] - new_unpad[1])/2 # wh padding
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im
def scale_boxes(boxes, shape):
# Rescale boxes (xyxy) from input_shape to shape
gain = min(input_shape[0] / shape[0], input_shape[1] / shape[1]) # gain = old / new
pad = (input_shape[1] - shape[1] * gain) / 2, (input_shape[0] - shape[0] * gain) / 2 # wh padding
boxes[..., [0, 1]] -= pad # xy padding
boxes[..., :4] /= gain
return boxes
def draw(image, box_data):
box_data = scale_boxes(box_data, image.shape)
boxes = box_data[...,:4]
scores = box_data[...,4]
classes = box_data[...,5].astype(np.int32)
angles = box_data[...,6]
for box, score, cl, angle in zip(boxes, scores, classes, angles):
rotate_box = ((box[0], box[1]), (box[2], box[3]), angle)
points = cv2.boxPoints(rotate_box)
points = np.int0(points)
cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=1)
cv2.putText(image, '{0} {1:.2f}'.format(class_names[cl], score), (points[0][0], points[0][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
if __name__=="__main__":
logger = trt.Logger(trt.Logger.WARNING)
with open("yolov8n-obb.engine", "rb") as f, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
inputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
outputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
inputs_device = cuda.mem_alloc(inputs_host.nbytes)
outputs_device = cuda.mem_alloc(outputs_host.nbytes)
stream = cuda.Stream()
print(trt.volume(context.get_binding_shape(0)), trt.volume(context.get_binding_shape(1)))
image = cv2.imread('airport.jpg', -1)
input = letterbox(image, input_shape)
input = input[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32) #BGR2RGB和HWC2CHW
input = input / 255.0
input = np.expand_dims(input, axis=0)
np.copyto(inputs_host, input.ravel())
with engine.create_execution_context() as context:
cuda.memcpy_htod_async(inputs_device, inputs_host, stream)
context.execute_async_v2(bindings=[int(inputs_device), int(outputs_device)], stream_handle=stream.handle)
cuda.memcpy_dtoh_async(outputs_host, outputs_device, stream)
stream.synchronize()
boxes = filter_box(outputs_host.reshape(context.get_binding_shape(1)))
draw(image, boxes)
cv2.imwrite('result.jpg', image)
C++推理代码:
#include <iostream>
#include <fstream>
#include <vector>
#include <opencv2/opencv.hpp>
#include <cuda_runtime.h>
#include <NvInfer.h>
#include <NvInferRuntime.h>
const std::vector<std::string> class_names = {
"plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor",
"bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool" };
const int input_width = 1024;
const int input_height = 1024;
const float score_threshold = 0.1;
const float nms_threshold = 0.5;
const int num_classes = class_names.size();
const int output_numprob = 5 + num_classes;
const int output_numbox = input_width / 8 * input_height / 8 + input_width / 16 * input_height / 16 + input_width / 32 * input_height / 32;
const int input_numel = 1 * 3 * input_width * input_height;
const int output_numel = 1 * output_numprob * output_numbox;
inline const char* severity_string(nvinfer1::ILogger::Severity t)
{
switch (t)
{
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error";
case nvinfer1::ILogger::Severity::kERROR: return "error";
case nvinfer1::ILogger::Severity::kWARNING: return "warning";
case nvinfer1::ILogger::Severity::kINFO: return "info";
case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose";
default: return "unknow";
}
}
class TRTLogger : public nvinfer1::ILogger
{
public:
virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
{
if (severity <= Severity::kINFO)
{
if (severity == Severity::kWARNING)
printf("\033[33m%s: %s\033[0m\n", severity_string(severity), msg);
else if (severity <= Severity::kERROR)
printf("\033[31m%s: %s\033[0m\n", severity_string(severity), msg);
else
printf("%s: %s\n", severity_string(severity), msg);
}
}
} logger;
std::vector<unsigned char> load_file(const std::string& file)
{
std::ifstream in(file, std::ios::in | std::ios::binary);
if (!in.is_open())
return {};
in.seekg(0, std::ios::end);
size_t length = in.tellg();
std::vector<uint8_t> data;
if (length > 0)
{
in.seekg(0, std::ios::beg);
data.resize(length);
in.read((char*)&data[0], length);
}
in.close();
return data;
}
//LetterBox处理
void LetterBox(const cv::Mat& image, cv::Mat& outImage,
const cv::Size& newShape = cv::Size(640, 640), const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
float ratio[2]{ r, r };
int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };
auto dw = (float)(newShape.width - new_un_pad[0]) / 2;
auto dh = (float)(newShape.height - new_un_pad[1]) / 2;
if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
else
outImage = image.clone();
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
cv::Vec4d params;
params[0] = ratio[0];
params[1] = ratio[1];
params[2] = left;
params[3] = top;
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
//预处理
void pre_process(cv::Mat& image, float* input_data_host)
{
cv::Mat letterbox;
LetterBox(image, letterbox, cv::Size(input_width, input_height));
letterbox.convertTo(letterbox, CV_32FC3, 1.0f / 255.0f);
int image_area = letterbox.cols * letterbox.rows;
float* pimage = (float*)letterbox.data;
float* phost_b = input_data_host + image_area * 0;
float* phost_g = input_data_host + image_area * 1;
float* phost_r = input_data_host + image_area * 2;
for (int i = 0; i < image_area; ++i, pimage += 3)
{
*phost_r++ = pimage[0];
*phost_g++ = pimage[1];
*phost_b++ = pimage[2];
}
}
//网络推理
void process(std::string model, float* input_data_host, float* output_data_host)
{
TRTLogger logger;
auto engine_data = load_file(model);
auto runtime = nvinfer1::createInferRuntime(logger);
auto engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
cudaStream_t stream = nullptr;
cudaStreamCreate(&stream);
auto execution_context = engine->createExecutionContext();
float* input_data_device = nullptr;
cudaMalloc(&input_data_device, sizeof(float) * input_numel);
cudaMemcpyAsync(input_data_device, input_data_host, sizeof(float) * input_numel, cudaMemcpyHostToDevice, stream);
float* output_data_device = nullptr;
cudaMalloc(&output_data_device, sizeof(float) * output_numel);
float* bindings[] = { input_data_device, output_data_device };
execution_context->enqueueV2((void**)bindings, stream, nullptr);
cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
cudaStreamDestroy(stream);
cudaFree(input_data_device);
cudaFree(output_data_device);
}
//NMS
void nms(std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold, std::vector<int>& indices)
{
assert(boxes.size() == scores.size());
struct BoxScore
{
cv::Rect box;
float score;
int id;
};
std::vector<BoxScore> boxes_scores;
for (size_t i = 0; i < boxes.size(); i++)
{
BoxScore box_conf;
box_conf.box = boxes[i];
box_conf.score = scores[i];
box_conf.id = i;
if (scores[i] > score_threshold) boxes_scores.push_back(box_conf);
}
std::sort(boxes_scores.begin(), boxes_scores.end(), [](BoxScore a, BoxScore b) { return a.score > b.score; });
std::vector<float> area(boxes_scores.size());
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
area[i] = boxes_scores[i].box.width * boxes_scores[i].box.height;
}
std::vector<bool> isSuppressed(boxes_scores.size(), false);
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
if (isSuppressed[i]) continue;
for (size_t j = i + 1; j < boxes_scores.size(); ++j)
{
if (isSuppressed[j]) continue;
float x1 = (std::max)(boxes_scores[i].box.x, boxes_scores[j].box.x);
float y1 = (std::max)(boxes_scores[i].box.y, boxes_scores[j].box.y);
float x2 = (std::min)(boxes_scores[i].box.x + boxes_scores[i].box.width, boxes_scores[j].box.x + boxes_scores[j].box.width);
float y2 = (std::min)(boxes_scores[i].box.y + boxes_scores[i].box.height, boxes_scores[j].box.y + boxes_scores[j].box.height);
float w = (std::max)(0.0f, x2 - x1);
float h = (std::max)(0.0f, y2 - y1);
float inter = w * h;
float ovr = inter / (area[i] + area[j] - inter);
if (ovr >= nms_threshold) isSuppressed[j] = true;
}
}
for (int i = 0; i < boxes_scores.size(); ++i)
{
if (!isSuppressed[i]) indices.push_back(boxes_scores[i].id);
}
}
//box缩放到原图尺寸
void scale_box(cv::Rect& box, cv::Size size)
{
float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
int pad_w = (input_width - size.width * gain) / 2;
int pad_h = (input_height - size.height * gain) / 2;
box.x -= pad_w;
box.y -= pad_h;
box.x /= gain;
box.y /= gain;
box.width /= gain;
box.height /= gain;
}
void scale_box(cv::RotatedRect& rotated_box, cv::Size size)
{
float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
int pad_w = (input_width - size.width * gain) / 2;
int pad_h = (input_height - size.height * gain) / 2;
rotated_box.center -= cv::Point2f(pad_w, pad_h);
rotated_box.center /= gain;
rotated_box.size /= gain;
}
//可视化函数
void draw_result(cv::Mat& image, std::string label, cv::RotatedRect rotated_box)
{
cv::Mat points;
cv::boxPoints(rotated_box, points);
cv::Point2f vertices[4];
for (int i = 0; i < 4; i++)
{
vertices[i] = cv::Point2f(points.at<float>(i, 0), points.at<float>(i, 1));
}
for (int i = 0; i < 4; i++)
{
cv::line(image, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1);
}
cv::putText(image, label, cv::Point(rotated_box.center), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 1);
}
//后处理
void post_process(const cv::Mat image, cv::Mat& result, float* output_data_host)
{
std::vector<cv::Rect> boxes;
std::vector<cv::RotatedRect> rotated_boxes;
std::vector<float> scores;
std::vector<int> class_ids;
for (int i = 0; i < output_numbox; i++)
{
std::vector<float> classes_scores(num_classes);
for (int j = 0; j < num_classes; j++)
{
float* ptr = output_data_host + (4 + j) * output_numbox + i;
classes_scores[j] = ptr[0];
}
int class_id = std::max_element(classes_scores.begin(), classes_scores.end()) - classes_scores.begin();
float score = classes_scores[class_id];
if (score < score_threshold)
continue;
float x = (output_data_host + 0 * output_numbox + i)[0];
float y = (output_data_host + 1 * output_numbox + i)[0];
float w = (output_data_host + 2 * output_numbox + i)[0];
float h = (output_data_host + 3 * output_numbox + i)[0];
float angle = (output_data_host + (4 + num_classes) * output_numbox + i)[0];
int left = int(x - 0.5 * w);
int top = int(y - 0.5 * h);
int width = int(w);
int height = int(h);
cv::Rect box = cv::Rect(left, top, width, height);
scale_box(box, image.size());
boxes.push_back(box);
scores.push_back(score);
class_ids.push_back(class_id);
cv::RotatedRect rotated_box;
rotated_box.center = cv::Point2f(x, y);
rotated_box.size = cv::Size2f(w, h);
if (angle >= 0.5 * CV_PI && angle <= 0.75 * CV_PI)
angle -= CV_PI;
rotated_box.angle = angle * 180 / CV_PI;
scale_box(rotated_box, image.size());
rotated_boxes.push_back(rotated_box);
}
std::vector<int> indices;
nms(boxes, scores, score_threshold, nms_threshold, indices);
for (int i = 0; i < indices.size(); i++)
{
int idx = indices[i];
cv::RotatedRect rotated_box = rotated_boxes[idx];
std::string label = class_names[class_ids[idx]] + ":" + cv::format("%.2f", scores[idx]); //class_ids[idx]是class_id
draw_result(result, label, rotated_box);
}
}
int main(int argc, char* argv[])
{
float* inputs = nullptr;
float* outputs = nullptr;
cudaMallocHost(&inputs, sizeof(float) * input_numel);
cudaMallocHost(&outputs, sizeof(float) * output_numel);
cv::Mat image = cv::imread("airport.jpg");
pre_process(image, inputs);
std::string model = "yolov8n-obb.engine";
process(model, inputs, outputs);
cv::Mat result = image.clone();
post_process(image, result, outputs);
cv::imwrite("result.jpg", result);
cudaFreeHost(outputs);
cudaFreeHost(inputs);
return 0;
}
结果可视化: