yolov8-obb旋转目标检测onnxruntime和tensorrt推理

news2024/12/25 1:10:12

onnxruntime推理

导出onnx模型:

from ultralytics import YOLO
model = YOLO("yolov8n-obb.pt")  
model.export(format="onnx") 

onnx模型结构如下:
在这里插入图片描述

python推理代码:

import cv2
import math
import numpy as np
import onnxruntime


class_names = ["plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor", "bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool"]      
input_shape = (1024, 1024) 
score_threshold = 0.1  
nms_threshold = 0.4
confidence_threshold = 0.1   


def nms(boxes, scores, score_threshold, nms_threshold):
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
    keep = []
    index = scores.argsort()[::-1] 

    while index.size > 0:
        i = index[0]
        keep.append(i)
        x11 = np.maximum(x1[i], x1[index[1:]]) 
        y11 = np.maximum(y1[i], y1[index[1:]])
        x22 = np.minimum(x2[i], x2[index[1:]])
        y22 = np.minimum(y2[i], y2[index[1:]])
        w = np.maximum(0, x22 - x11 + 1)                              
        h = np.maximum(0, y22 - y11 + 1) 
        overlaps = w * h
        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
        idx = np.where(ious <= nms_threshold)[0]
        index = index[idx + 1]
    return keep


def xywh2xyxy(x):
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2
    y[:, 1] = x[:, 1] - x[:, 3] / 2
    y[:, 2] = x[:, 0] + x[:, 2] / 2
    y[:, 3] = x[:, 1] + x[:, 3] / 2
    return y


def filter_box(outputs): #过滤掉无用的框    
    outputs = np.squeeze(outputs)
     
    rotated_boxes = []
    scores = []
    class_ids = []
    classes_scores = outputs[4:(4+len(class_names)), ...]  
    angles = outputs[-1, ...]   
       
    for i in range(outputs.shape[1]):              
        class_id = np.argmax(classes_scores[...,i])
        score = classes_scores[class_id][i]
        angle = angles[i]
        if 0.5 * math.pi <= angle <= 0.75 * math.pi:
            angle -= math.pi
        if score > score_threshold:
            rotated_boxes.append(np.concatenate([outputs[:4, i], np.array([score, class_id, angle * 180 / math.pi])]))
            scores.append(score)
            class_ids.append(class_id) 
            
    rotated_boxes = np.array(rotated_boxes)
    boxes = xywh2xyxy(rotated_boxes)
    scores = np.array(scores)
    indices = nms(boxes, scores, score_threshold, nms_threshold) 
    output = rotated_boxes[indices]
    return output


def letterbox(im, new_shape=(416, 416), color=(114, 114, 114)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    
    # Compute padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))    
    dw, dh = (new_shape[1] - new_unpad[0])/2, (new_shape[0] - new_unpad[1])/2  # wh padding 
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    
    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im


def scale_boxes(boxes, shape):
    # Rescale boxes (xyxy) from input_shape to shape
    gain = min(input_shape[0] / shape[0], input_shape[1] / shape[1])  # gain  = old / new
    pad = (input_shape[1] - shape[1] * gain) / 2, (input_shape[0] - shape[0] * gain) / 2  # wh padding
    boxes[..., [0, 1]] -= pad  # xy padding
    boxes[..., :4] /= gain
    return boxes


def draw(image, box_data):
    box_data = scale_boxes(box_data, image.shape)
    boxes = box_data[...,:4]
    scores = box_data[...,4]
    classes = box_data[...,5].astype(np.int32)
    angles = box_data[...,6]
    for box, score, cl, angle in zip(boxes, scores, classes, angles):
        rotate_box = ((box[0], box[1]), (box[2], box[3]), angle)
        points = cv2.boxPoints(rotate_box)
        points = np.int0(points)
        cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=1)
        cv2.putText(image, '{0} {1:.2f}'.format(class_names[cl], score), (points[0][0], points[0][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)


if __name__=="__main__":
    image = cv2.imread('airport.jpg', -1)
    input = letterbox(image, input_shape)
    input = input[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHW
    input = input / 255.0
    input_tensor = []
    input_tensor.append(input)
    
    onnx_session = onnxruntime.InferenceSession('yolov8n-obb.onnx', providers=['CPUExecutionProvider', 'CUDAExecutionProvider'])
        
    input_name = []
    for node in onnx_session.get_inputs():
        input_name.append(node.name)

    output_name = []
    for node in onnx_session.get_outputs():
        output_name.append(node.name)

    inputs = {}
    for name in input_name:
        inputs[name] =  np.array(input_tensor)
  
    outputs = onnx_session.run(None, inputs)[0]
    print(outputs.shape)
    
    boxes = filter_box(outputs)
    draw(image, boxes)
    cv2.imwrite('result.jpg', image)

C++推理代码:

#include <iostream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>


const std::vector<std::string> class_names = {
	"plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor", 
	"bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool" };
const int input_width = 1024;
const int input_height = 1024;
const float score_threshold = 0.1;
const float nms_threshold = 0.5;
const int num_classes = class_names.size();
const int output_numprob = 5 + num_classes;
const int output_numbox = input_width / 8 * input_height / 8 + input_width / 16 * input_height / 16 + input_width / 32 * input_height / 32;


//LetterBox处理
void LetterBox(const cv::Mat& image, cv::Mat& outImage,
	const cv::Size& newShape = cv::Size(640, 640), const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
	cv::Size shape = image.size();
	float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
	float ratio[2]{ r, r };
	int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };

	auto dw = (float)(newShape.width - new_un_pad[0]) / 2;
	auto dh = (float)(newShape.height - new_un_pad[1]) / 2;

	if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
		cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
	else
		outImage = image.clone();

	int top = int(std::round(dh - 0.1f));
	int bottom = int(std::round(dh + 0.1f));
	int left = int(std::round(dw - 0.1f));
	int right = int(std::round(dw + 0.1f));

	cv::Vec4d params;
	params[0] = ratio[0];
	params[1] = ratio[1];
	params[2] = left;
	params[3] = top;

	cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}


//预处理
void pre_process(cv::Mat& image, std::vector<float>& inputs)
{
	cv::Vec4d params;
	cv::Mat letterbox;
	LetterBox(image, letterbox, cv::Size(input_width, input_height));

	cv::cvtColor(letterbox, letterbox, cv::COLOR_BGR2RGB);
	letterbox.convertTo(letterbox, CV_32FC3, 1.0f / 255.0f);
	std::vector<cv::Mat> split_images;
	cv::split(letterbox, split_images);
	for (size_t i = 0; i < letterbox.channels(); ++i)
	{
		std::vector<float> split_image_data = split_images[i].reshape(1, 1);
		inputs.insert(inputs.end(), split_image_data.begin(), split_image_data.end());
	}
}


//网络推理
void process(const wchar_t* model, std::vector<float>& inputs, std::vector<Ort::Value>& outputs)
{
	Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "yolov5n");
	Ort::SessionOptions session_options;
	session_options.SetIntraOpNumThreads(12);//设置线程数
	session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);//启用模型优化策略

	//CUDA option set
	//OrtCUDAProviderOptions cuda_option;
	//cuda_option.device_id = 0;
	//cuda_option.arena_extend_strategy = 0;
	//cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
	//cuda_option.gpu_mem_limit = SIZE_MAX;
	//cuda_option.do_copy_in_default_stream = 1;
	//session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
	//session_options.AppendExecutionProvider_CUDA(cuda_option);
	Ort::Session session(env, model, session_options);

	std::vector<const char*>  input_node_names;
	for (size_t i = 0; i < session.GetInputCount(); i++)
	{
		input_node_names.push_back("images");
	}

	std::vector<const char*> output_node_names;
	for (size_t i = 0; i < session.GetOutputCount(); i++)
	{
		output_node_names.push_back("output0");
	}

	// create input tensor object from data values
	std::vector<int64_t> input_node_dims = { 1, 3, input_width, input_height };
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, inputs.data(), inputs.size(), input_node_dims.data(), input_node_dims.size());

	std::vector<Ort::Value> ort_inputs;
	ort_inputs.push_back(std::move(input_tensor));//右值引用,避免不必要的拷贝和内存分配操作

	// score model & input tensor, get back output tensor	
	outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
}


//NMS
void nms(std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold, std::vector<int>& indices)
{
	struct BoxScore
	{
		cv::Rect box;
		float score;
		int id;
	};
	std::vector<BoxScore> boxes_scores;
	for (size_t i = 0; i < boxes.size(); i++)
	{
		BoxScore box_conf;
		box_conf.box = boxes[i];
		box_conf.score = scores[i];
		box_conf.id = i;
		if (scores[i] > score_threshold)	boxes_scores.push_back(box_conf);
	}

	std::sort(boxes_scores.begin(), boxes_scores.end(), [](BoxScore a, BoxScore b) { return a.score > b.score; });

	std::vector<float> area(boxes_scores.size());
	for (size_t i = 0; i < boxes_scores.size(); ++i)
	{
		area[i] = boxes_scores[i].box.width * boxes_scores[i].box.height;
	}

	std::vector<bool> isSuppressed(boxes_scores.size(), false);
	for (size_t i = 0; i < boxes_scores.size(); ++i)
	{
		if (isSuppressed[i])  continue;
		for (size_t j = i + 1; j < boxes_scores.size(); ++j)
		{
			if (isSuppressed[j])  continue;

			float x1 = (std::max)(boxes_scores[i].box.x, boxes_scores[j].box.x);
			float y1 = (std::max)(boxes_scores[i].box.y, boxes_scores[j].box.y);
			float x2 = (std::min)(boxes_scores[i].box.x + boxes_scores[i].box.width, boxes_scores[j].box.x + boxes_scores[j].box.width);
			float y2 = (std::min)(boxes_scores[i].box.y + boxes_scores[i].box.height, boxes_scores[j].box.y + boxes_scores[j].box.height);
			float w = (std::max)(0.0f, x2 - x1);
			float h = (std::max)(0.0f, y2 - y1);
			float inter = w * h;
			float ovr = inter / (area[i] + area[j] - inter);

			if (ovr >= nms_threshold)  isSuppressed[j] = true;
		}
	}

	for (int i = 0; i < boxes_scores.size(); ++i)
	{
		if (!isSuppressed[i])	indices.push_back(boxes_scores[i].id);
	}
}


//box缩放到原图尺寸
void scale_box(cv::Rect& box, cv::Size size)
{
	float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
	int pad_w = (input_width - size.width * gain) / 2;
	int pad_h = (input_height - size.height * gain) / 2;
	box.x -= pad_w;
	box.y -= pad_h;
	box.x /= gain;
	box.y /= gain;
	box.width /= gain;
	box.height /= gain;
}

void scale_box(cv::RotatedRect& rotated_box, cv::Size size)
{
	float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
	int pad_w = (input_width - size.width * gain) / 2;
	int pad_h = (input_height - size.height * gain) / 2;
	rotated_box.center -= cv::Point2f(pad_w, pad_h);
	rotated_box.center /= gain;
	rotated_box.size /= gain;
}


//可视化函数
void draw_result(cv::Mat& image, std::string label, cv::RotatedRect rotated_box)
{
	cv::Mat points;
	cv::boxPoints(rotated_box, points);
	cv::Point2f vertices[4];
	for (int i = 0; i < 4; i++)
	{
		vertices[i] = cv::Point2f(points.at<float>(i, 0), points.at<float>(i, 1));
	}
	for (int i = 0; i < 4; i++) 
	{
		cv::line(image, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1);
	}
	cv::putText(image, label, cv::Point(rotated_box.center), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 1);
}


//后处理
void post_process(cv::Mat& image, cv::Mat& result, std::vector<Ort::Value>& outputs)
{
	std::vector<cv::Rect> boxes;
	std::vector<cv::RotatedRect> rotated_boxes;
	std::vector<float> scores;
	std::vector<int> class_ids;

	for (int i = 0; i < output_numbox; i++)
	{
		std::vector<float> classes_scores(num_classes);
		for (int j = 0; j < num_classes; j++)
		{
			float* ptr = const_cast<float*> (outputs[0].GetTensorData<float>()) + (4 + j) * output_numbox + i;
			classes_scores[j] = ptr[0];
		}		
		int class_id = std::max_element(classes_scores.begin(), classes_scores.end()) - classes_scores.begin();
		float score = classes_scores[class_id];
		if (score < score_threshold)
			continue;

		float x = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 0 * output_numbox + i)[0];
		float y = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 1 * output_numbox + i)[0];
		float w = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 2 * output_numbox + i)[0];
		float h = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 3 * output_numbox + i)[0];
		float angle = (const_cast<float*> (outputs[0].GetTensorData<float>()) + (4 + num_classes) * output_numbox + i)[0];
		int left = int(x - 0.5 * w);
		int top = int(y - 0.5 * h);
		int width = int(w);
		int height = int(h);
		cv::Rect box = cv::Rect(left, top, width, height);
		scale_box(box, image.size());
		boxes.push_back(box);
		scores.push_back(score);
		class_ids.push_back(class_id);

		cv::RotatedRect rotated_box;
		rotated_box.center = cv::Point2f(x, y) ;
		rotated_box.size = cv::Size2f(w, h);
		if (angle >= 0.5 * CV_PI && angle <= 0.75 * CV_PI)
			angle -= CV_PI;
		rotated_box.angle = angle * 180 / CV_PI;
		scale_box(rotated_box, image.size());
		rotated_boxes.push_back(rotated_box);
	}

	std::vector<int> indices;
	nms(boxes, scores, score_threshold, nms_threshold, indices);
	for (int i = 0; i < indices.size(); i++)
	{
		int idx = indices[i];
		cv::RotatedRect  rotated_box = rotated_boxes[idx];
		std::string label = class_names[class_ids[idx]] + ":" + cv::format("%.2f", scores[idx]); //class_ids[idx]是class_id
		draw_result(result, label, rotated_box);
	}
}


int main(int argc, char* argv[])
{
	cv::Mat image = cv::imread("airport.jpg");
	std::vector<float> inputs;
	pre_process(image, inputs);

	const wchar_t* model = L"yolov8n-obb.onnx";
	std::vector<Ort::Value> outputs;
	process(model, inputs, outputs);

	cv::Mat result = image.clone();
	post_process(image, result, outputs);

	cv::imwrite("result.jpg", result);
	return 0;
}

tensorrt推理

通过trtexec转换onnx模型得到engine模型:

./trtexec.exe --onnx=yolov8n-obb.onnx --saveEngine=yolov8n-obb.engine 

python推理代码:

import cv2
import math
import numpy as np
import tensorrt as trt
import pycuda.autoinit 
import pycuda.driver as cuda  


class_names = ["plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor", "bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool"]      
input_shape = (1024, 1024) 
score_threshold = 0.1  
nms_threshold = 0.4


def nms(boxes, scores, score_threshold, nms_threshold):
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
    keep = []
    index = scores.argsort()[::-1] 

    while index.size > 0:
        i = index[0]
        keep.append(i)
        x11 = np.maximum(x1[i], x1[index[1:]]) 
        y11 = np.maximum(y1[i], y1[index[1:]])
        x22 = np.minimum(x2[i], x2[index[1:]])
        y22 = np.minimum(y2[i], y2[index[1:]])
        w = np.maximum(0, x22 - x11 + 1)                              
        h = np.maximum(0, y22 - y11 + 1) 
        overlaps = w * h
        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
        idx = np.where(ious <= nms_threshold)[0]
        index = index[idx + 1]
    return keep


def xywh2xyxy(x):
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2
    y[:, 1] = x[:, 1] - x[:, 3] / 2
    y[:, 2] = x[:, 0] + x[:, 2] / 2
    y[:, 3] = x[:, 1] + x[:, 3] / 2
    return y


def filter_box(outputs): #过滤掉无用的框    
    outputs = np.squeeze(outputs)
     
    rotated_boxes = []
    scores = []
    class_ids = []
    classes_scores = outputs[4:(4+len(class_names)), ...]  
    angles = outputs[-1, ...]   
       
    for i in range(outputs.shape[1]):              
        class_id = np.argmax(classes_scores[...,i])
        score = classes_scores[class_id][i]
        angle = angles[i]
        if 0.5 * math.pi <= angle <= 0.75 * math.pi:
            angle -= math.pi
        if score > score_threshold:
            rotated_boxes.append(np.concatenate([outputs[:4, i], np.array([score, class_id, angle * 180 / math.pi])]))
            scores.append(score)
            class_ids.append(class_id) 
            
    rotated_boxes = np.array(rotated_boxes)
    boxes = xywh2xyxy(rotated_boxes)
    scores = np.array(scores)
    indices = nms(boxes, scores, score_threshold, nms_threshold) 
    output = rotated_boxes[indices]
    return output


def letterbox(im, new_shape=(416, 416), color=(114, 114, 114)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    
    # Compute padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))    
    dw, dh = (new_shape[1] - new_unpad[0])/2, (new_shape[0] - new_unpad[1])/2  # wh padding 
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    
    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im


def scale_boxes(boxes, shape):
    # Rescale boxes (xyxy) from input_shape to shape
    gain = min(input_shape[0] / shape[0], input_shape[1] / shape[1])  # gain  = old / new
    pad = (input_shape[1] - shape[1] * gain) / 2, (input_shape[0] - shape[0] * gain) / 2  # wh padding
    boxes[..., [0, 1]] -= pad  # xy padding
    boxes[..., :4] /= gain
    return boxes


def draw(image, box_data):
    box_data = scale_boxes(box_data, image.shape)
    boxes = box_data[...,:4]
    scores = box_data[...,4]
    classes = box_data[...,5].astype(np.int32)
    angles = box_data[...,6]
    for box, score, cl, angle in zip(boxes, scores, classes, angles):
        rotate_box = ((box[0], box[1]), (box[2], box[3]), angle)
        points = cv2.boxPoints(rotate_box)
        points = np.int0(points)
        cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=1)
        cv2.putText(image, '{0} {1:.2f}'.format(class_names[cl], score), (points[0][0], points[0][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)


if __name__=="__main__":
    logger = trt.Logger(trt.Logger.WARNING)
    with open("yolov8n-obb.engine", "rb") as f, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()
    inputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
    outputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
    inputs_device = cuda.mem_alloc(inputs_host.nbytes)
    outputs_device = cuda.mem_alloc(outputs_host.nbytes)
    stream = cuda.Stream()
    print(trt.volume(context.get_binding_shape(0)), trt.volume(context.get_binding_shape(1)))
    
    image = cv2.imread('airport.jpg', -1)
    input = letterbox(image, input_shape)
    input = input[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHW
    input = input / 255.0
    input = np.expand_dims(input, axis=0)     
    np.copyto(inputs_host, input.ravel())
    
    with engine.create_execution_context() as context:
        cuda.memcpy_htod_async(inputs_device, inputs_host, stream)
        context.execute_async_v2(bindings=[int(inputs_device), int(outputs_device)], stream_handle=stream.handle)
        cuda.memcpy_dtoh_async(outputs_host, outputs_device, stream)
        stream.synchronize()  
        boxes = filter_box(outputs_host.reshape(context.get_binding_shape(1)))
        draw(image, boxes)
        cv2.imwrite('result.jpg', image)

C++推理代码:

#include <iostream>
#include <fstream>
#include <vector>
#include <opencv2/opencv.hpp>
#include <cuda_runtime.h>
#include <NvInfer.h>
#include <NvInferRuntime.h>


const std::vector<std::string> class_names = {
	"plane", "ship", "storage tank", "baseball diamond", "tennis court", "basketball court", "ground track field", "harbor",
	"bridge", "large vehicle", "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool" };
const int input_width = 1024;
const int input_height = 1024;
const float score_threshold = 0.1;
const float nms_threshold = 0.5;
const int num_classes = class_names.size();
const int output_numprob = 5 + num_classes;
const int output_numbox = input_width / 8 * input_height / 8 + input_width / 16 * input_height / 16 + input_width / 32 * input_height / 32;
const int input_numel = 1 * 3 * input_width * input_height;
const int output_numel = 1 * output_numprob * output_numbox;


inline const char* severity_string(nvinfer1::ILogger::Severity t)
{
	switch (t)
	{
	case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error";
	case nvinfer1::ILogger::Severity::kERROR:   return "error";
	case nvinfer1::ILogger::Severity::kWARNING: return "warning";
	case nvinfer1::ILogger::Severity::kINFO:    return "info";
	case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose";
	default: return "unknow";
	}
}


class TRTLogger : public nvinfer1::ILogger
{
public:
	virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
	{
		if (severity <= Severity::kINFO)
		{
			if (severity == Severity::kWARNING)
				printf("\033[33m%s: %s\033[0m\n", severity_string(severity), msg);
			else if (severity <= Severity::kERROR)
				printf("\033[31m%s: %s\033[0m\n", severity_string(severity), msg);
			else
				printf("%s: %s\n", severity_string(severity), msg);
		}
	}
} logger;


std::vector<unsigned char> load_file(const std::string& file)
{
	std::ifstream in(file, std::ios::in | std::ios::binary);
	if (!in.is_open())
		return {};

	in.seekg(0, std::ios::end);
	size_t length = in.tellg();

	std::vector<uint8_t> data;
	if (length > 0)
	{
		in.seekg(0, std::ios::beg);
		data.resize(length);
		in.read((char*)&data[0], length);
	}
	in.close();
	return data;
}


//LetterBox处理
void LetterBox(const cv::Mat& image, cv::Mat& outImage,
	const cv::Size& newShape = cv::Size(640, 640), const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
	cv::Size shape = image.size();
	float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
	float ratio[2]{ r, r };
	int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };

	auto dw = (float)(newShape.width - new_un_pad[0]) / 2;
	auto dh = (float)(newShape.height - new_un_pad[1]) / 2;

	if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
		cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
	else
		outImage = image.clone();

	int top = int(std::round(dh - 0.1f));
	int bottom = int(std::round(dh + 0.1f));
	int left = int(std::round(dw - 0.1f));
	int right = int(std::round(dw + 0.1f));

	cv::Vec4d params;
	params[0] = ratio[0];
	params[1] = ratio[1];
	params[2] = left;
	params[3] = top;

	cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}


//预处理
void pre_process(cv::Mat& image, float* input_data_host)
{
	cv::Mat letterbox;
	LetterBox(image, letterbox, cv::Size(input_width, input_height));
	letterbox.convertTo(letterbox, CV_32FC3, 1.0f / 255.0f);

	int image_area = letterbox.cols * letterbox.rows;
	float* pimage = (float*)letterbox.data;
	float* phost_b = input_data_host + image_area * 0;
	float* phost_g = input_data_host + image_area * 1;
	float* phost_r = input_data_host + image_area * 2;
	for (int i = 0; i < image_area; ++i, pimage += 3)
	{
		*phost_r++ = pimage[0];
		*phost_g++ = pimage[1];
		*phost_b++ = pimage[2];
	}
}


//网络推理
void process(std::string model, float* input_data_host, float* output_data_host)
{
	TRTLogger logger;
	auto engine_data = load_file(model);
	auto runtime = nvinfer1::createInferRuntime(logger);
	auto engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());

	cudaStream_t stream = nullptr;
	cudaStreamCreate(&stream);
	auto execution_context = engine->createExecutionContext();

	float* input_data_device = nullptr;
	cudaMalloc(&input_data_device, sizeof(float) * input_numel);
	cudaMemcpyAsync(input_data_device, input_data_host, sizeof(float) * input_numel, cudaMemcpyHostToDevice, stream);

	float* output_data_device = nullptr;
	cudaMalloc(&output_data_device, sizeof(float) * output_numel);

	float* bindings[] = { input_data_device, output_data_device };
	execution_context->enqueueV2((void**)bindings, stream, nullptr);
	cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream);
	cudaStreamSynchronize(stream);

	cudaStreamDestroy(stream);
	cudaFree(input_data_device);
	cudaFree(output_data_device);
}


//NMS
void nms(std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold, std::vector<int>& indices)
{
	assert(boxes.size() == scores.size());

	struct BoxScore
	{
		cv::Rect box;
		float score;
		int id;
	};
	std::vector<BoxScore> boxes_scores;
	for (size_t i = 0; i < boxes.size(); i++)
	{
		BoxScore box_conf;
		box_conf.box = boxes[i];
		box_conf.score = scores[i];
		box_conf.id = i;
		if (scores[i] > score_threshold)	boxes_scores.push_back(box_conf);
	}

	std::sort(boxes_scores.begin(), boxes_scores.end(), [](BoxScore a, BoxScore b) { return a.score > b.score; });

	std::vector<float> area(boxes_scores.size());
	for (size_t i = 0; i < boxes_scores.size(); ++i)
	{
		area[i] = boxes_scores[i].box.width * boxes_scores[i].box.height;
	}

	std::vector<bool> isSuppressed(boxes_scores.size(), false);
	for (size_t i = 0; i < boxes_scores.size(); ++i)
	{
		if (isSuppressed[i])  continue;
		for (size_t j = i + 1; j < boxes_scores.size(); ++j)
		{
			if (isSuppressed[j])  continue;

			float x1 = (std::max)(boxes_scores[i].box.x, boxes_scores[j].box.x);
			float y1 = (std::max)(boxes_scores[i].box.y, boxes_scores[j].box.y);
			float x2 = (std::min)(boxes_scores[i].box.x + boxes_scores[i].box.width, boxes_scores[j].box.x + boxes_scores[j].box.width);
			float y2 = (std::min)(boxes_scores[i].box.y + boxes_scores[i].box.height, boxes_scores[j].box.y + boxes_scores[j].box.height);
			float w = (std::max)(0.0f, x2 - x1);
			float h = (std::max)(0.0f, y2 - y1);
			float inter = w * h;
			float ovr = inter / (area[i] + area[j] - inter);

			if (ovr >= nms_threshold)  isSuppressed[j] = true;
		}
	}

	for (int i = 0; i < boxes_scores.size(); ++i)
	{
		if (!isSuppressed[i])	indices.push_back(boxes_scores[i].id);
	}
}


//box缩放到原图尺寸
void scale_box(cv::Rect& box, cv::Size size)
{
	float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
	int pad_w = (input_width - size.width * gain) / 2;
	int pad_h = (input_height - size.height * gain) / 2;
	box.x -= pad_w;
	box.y -= pad_h;
	box.x /= gain;
	box.y /= gain;
	box.width /= gain;
	box.height /= gain;
}

void scale_box(cv::RotatedRect& rotated_box, cv::Size size)
{
	float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
	int pad_w = (input_width - size.width * gain) / 2;
	int pad_h = (input_height - size.height * gain) / 2;
	rotated_box.center -= cv::Point2f(pad_w, pad_h);
	rotated_box.center /= gain;
	rotated_box.size /= gain;
}


//可视化函数
void draw_result(cv::Mat& image, std::string label, cv::RotatedRect rotated_box)
{
	cv::Mat points;
	cv::boxPoints(rotated_box, points);
	cv::Point2f vertices[4];
	for (int i = 0; i < 4; i++)
	{
		vertices[i] = cv::Point2f(points.at<float>(i, 0), points.at<float>(i, 1));
	}
	for (int i = 0; i < 4; i++)
	{
		cv::line(image, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1);
	}
	cv::putText(image, label, cv::Point(rotated_box.center), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 1);
}


//后处理
void post_process(const cv::Mat image, cv::Mat& result, float* output_data_host)
{
	std::vector<cv::Rect> boxes;
	std::vector<cv::RotatedRect> rotated_boxes;
	std::vector<float> scores;
	std::vector<int> class_ids;

	for (int i = 0; i < output_numbox; i++)
	{
		std::vector<float> classes_scores(num_classes);
		for (int j = 0; j < num_classes; j++)
		{
			float* ptr = output_data_host + (4 + j) * output_numbox + i;
			classes_scores[j] = ptr[0];
		}
		int class_id = std::max_element(classes_scores.begin(), classes_scores.end()) - classes_scores.begin();
		float score = classes_scores[class_id];
		if (score < score_threshold)
			continue;

		float x = (output_data_host + 0 * output_numbox + i)[0];
		float y = (output_data_host + 1 * output_numbox + i)[0];
		float w = (output_data_host + 2 * output_numbox + i)[0];
		float h = (output_data_host + 3 * output_numbox + i)[0];
		float angle = (output_data_host + (4 + num_classes) * output_numbox + i)[0];
		int left = int(x - 0.5 * w);
		int top = int(y - 0.5 * h);
		int width = int(w);
		int height = int(h);
		cv::Rect box = cv::Rect(left, top, width, height);
		scale_box(box, image.size());
		boxes.push_back(box);
		scores.push_back(score);
		class_ids.push_back(class_id);

		cv::RotatedRect rotated_box;
		rotated_box.center = cv::Point2f(x, y);
		rotated_box.size = cv::Size2f(w, h);
		if (angle >= 0.5 * CV_PI && angle <= 0.75 * CV_PI)
			angle -= CV_PI;
		rotated_box.angle = angle * 180 / CV_PI;
		scale_box(rotated_box, image.size());
		rotated_boxes.push_back(rotated_box);
	}

	std::vector<int> indices;
	nms(boxes, scores, score_threshold, nms_threshold, indices);
	for (int i = 0; i < indices.size(); i++)
	{
		int idx = indices[i];
		cv::RotatedRect  rotated_box = rotated_boxes[idx];
		std::string label = class_names[class_ids[idx]] + ":" + cv::format("%.2f", scores[idx]); //class_ids[idx]是class_id
		draw_result(result, label, rotated_box);
	}
}


int main(int argc, char* argv[])
{
	float* inputs = nullptr;
	float* outputs = nullptr;
	cudaMallocHost(&inputs, sizeof(float) * input_numel);
	cudaMallocHost(&outputs, sizeof(float) * output_numel);
	cv::Mat image = cv::imread("airport.jpg");
	pre_process(image, inputs);

	std::string model = "yolov8n-obb.engine";
	process(model, inputs, outputs);

	cv::Mat result = image.clone();
	post_process(image, result, outputs);

	cv::imwrite("result.jpg", result);
	cudaFreeHost(outputs);
	cudaFreeHost(inputs);
	return 0;
}

结果可视化:
在这里插入图片描述

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2109465.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

全面提升管理效率的智慧园区可视化系统

通过图扑 HT 搭建智慧园区可视化&#xff0c;实时监测和展示园区内各设施的状态与能耗&#xff0c;优化资源配置&#xff0c;提升园区管理效率。

科普神文,一次性讲透AI大模型的核心概念

令牌&#xff0c;向量&#xff0c;嵌入&#xff0c;注意力&#xff0c;这些AI大模型名词是否一直让你感觉熟悉又陌生&#xff0c;如果答案肯定的话&#xff0c;那么朋友&#xff0c;今天这篇科普神文不容错过。我将结合大量示例及可视化的图形手段&#xff0c;为你由浅入深一次…

电脑怎么禁止软件联网?电脑怎么限制软件上网?方法很多,这三种最常用!

在日常使用电脑时&#xff0c;某些软件可能会自动联网&#xff0c;这不仅会消耗网络资源&#xff0c;还可能带来安全风险。此外企业老板考虑到公司员工可能会在工作期间访问无关软件&#xff0c;影响工作效率&#xff0c;因此&#xff0c;很多用户希望能够禁止某些软件联网&…

springboot学生社团管理系统—计算机毕业设计源码26281

目录 摘要 Abstract 1 绪论 1.1 研究背景 1.2 研究意义 1.3论文结构与章节安排 2 学生社团管理系统系统分析 2.1 可行性分析 2.2 系统流程分析 2.2.1 数据增加流程 2.2.2 数据修改流程 2.2.3 数据删除流程 2.3 系统功能分析 2.3.1 功能性分析 2.3.2 非功能性分析…

C语言 10 数组

简单来说&#xff0c;数组就是存放数据的一个组&#xff0c;所有的数据都统一存放在这一个组中&#xff0c;一个数组可以同时存放多个数据。 一维数组 比如现在想保存 12 个月的天数&#xff0c;那么只需要创建一个 int 类型的数组就可以了&#xff0c;它可以保存很多个 int …

Linux网络编程IO管理

网络 IO 涉及到两个系统对象&#xff0c;一个是用户空间调用 IO 的进程或者线程&#xff0c;一个是内核空间的内核系统&#xff0c;比如发生 IO 操作 read 时&#xff0c;它会经历两个阶段&#xff1a; 等待内核协议栈的数据准备就绪&#xff1b;将内核中的数据拷贝到用户态的…

vue3 json格式化显示数据(vue3-json-viewer) 对比修改前后数据

需求&#xff1a;对比变更前后数据 npm包下载 npm install vue3-json-viewer --savemain.ts中全局引用 // json可视化 import JsonViewer from "vue3-json-viewer" import "vue3-json-viewer/dist/index.css";app.use(JsonViewer).mount("#app&quo…

鸿蒙界面开发——组件(6):属性字符串(StyledString)文本输入

属性字符串StyledString/MutableStyledString MutableStyledString继承于StyledString&#xff0c;以下统一简称StyledString。 是功能强大的标记对象&#xff0c;可用于字符或段落级别设置文本样式。 通过将StyledString附加到文本组件&#xff0c; 可以通过多种方式更改文本…

深度学习-用神经网络NN实现足球大小球数据分析软件

文章目录 前言一、 数据收集1.1特征数据收集代码实例 二、数据预处理清洗数据特征工程&#xff1a; 三、特征提取四、模型构建五、模型训练与评估总结 前言 预测足球比赛走地大小球&#xff08;即比赛过程中进球总数是否超过某个预设值&#xff09;的深度学习模型是一个复杂但有…

霍尼韦尔、书客、米家护眼大路灯怎么样?终极测评对比和护眼灯王者机型

霍尼韦尔、书客、米家护眼大路灯怎么样&#xff1f;护眼大路灯的重要性不容忽视&#xff0c;它是我们日常生活中用眼的必备工具&#xff0c;也是眼睛能够得到保护重要一环。近年来&#xff0c;护眼大路灯市场呈现出国际大牌的垄断局面&#xff0c;但这也带来了一些问题。为了争…

油猴插件录制请求,封装接口自动化参数

参考&#xff1a;如何使用油猴插件提高测试工作效率 一、背景 在酷家乐设计工具测试中&#xff0c;总会有许多高频且较繁琐的工作&#xff0c;比如&#xff1a; 查询插件版本&#xff1a;需要打开Chrome控制台&#xff0c;输入好几个命令然后过滤出版本信息。 查询模型商品&…

java设计模式day03--(结构型模式:代理模式、适配器模式、装饰者模式、桥接模式、外观模式、组合模式、享元模式)

5&#xff0c;结构型模式 结构型模式描述如何将类或对象按某种布局组成更大的结构。它分为类结构型模式和对象结构型模式&#xff0c;前者采用继承机制来组织接口和类&#xff0c;后者釆用组合或聚合来组合对象。 由于组合关系或聚合关系比继承关系耦合度低&#xff0c;满足“…

SpingBoot中使用Swagger快速生成接口文档

目录 一.Swagger快速上手 二.Swagger中的基本注解 三.使用Swagger进行测试 一.Swagger快速上手 Swagger是⼀个接⼝⽂档⽣成⼯具&#xff0c;它可以帮助开发者⾃动⽣成接⼝⽂档。当项⽬的接⼝发⽣变更时&#xff0c;Swagger可以实时更新⽂档&#xff0c;确保⽂档的准确性和时…

【神经网络系列(高级)】神经网络Grokking现象的电路效率公式——揭秘学习飞跃的秘密【通俗理解】

【通俗理解】神经网络Grokking现象的电路效率公式 论文地址&#xff1a; https://arxiv.org/abs/2309.02390 参考链接&#xff1a; [1]https://x.com/VikrantVarma_/status/1699823229307699305 [2]https://pair.withgoogle.com/explorables/grokking/ 关键词提炼 #Grokkin…

组合优化与凸优化 学习笔记3 凸函数

目前学到了73页 凸函数的定义&#xff1a; 人话&#xff1a;函数f的定义域是凸集&#xff08;在一般的情况下就是不能是断开的定义域&#xff08;一般的情况是1维的嘛&#xff09;&#xff0c;假如x是什么多维向量的话就是说x的取值范围是一个凸集内&#xff09;&#xff0c;并…

基于云原生向量数据库 PieCloudVector 的 RAG 实践

近年来&#xff0c;人工智能生成内容&#xff08;AIGC&#xff09;已然成为最热门的话题之一。工业界出现了各种内容生成工具&#xff0c;能够跨多种模态产生多样化的内容。这些主流的模型能够取得卓越表现&#xff0c;归功于创新的算法、模型规模的大幅扩展&#xff0c;以及海…

XXL-JOB调度中心与执行器

XXL-JOB是一个轻量级的分布式任务调度平台&#xff0c;主要由调度中心和执行器两部分组成。下面详细讲解调度中心与执行器的功能和作用。 调度中心 调度中心是XXL-JOB的核心组件&#xff0c;负责任务的调度管理。其主要功能包括&#xff1a; 任务管理&#xff1a;调度中心提供…

计算组合数:scipy.special.comb()

【小白从小学Python、C、Java】 【考研初试复试毕业设计】 【Python基础AI数据分析】 计算组合数&#xff1a; scipy.special.comb() 选择题 以下代码两次输出的结果是&#xff1f; from scipy.special import comb print("【执行】print(comb(3,2))") print(comb(3…

011. Oracle-约束

我 的 个 人 主 页&#xff1a;&#x1f449;&#x1f449; 失心疯的个人主页 &#x1f448;&#x1f448; 入 门 教 程 推 荐 &#xff1a;&#x1f449;&#x1f449; Python零基础入门教程合集 &#x1f448;&#x1f448; 虚 拟 环 境 搭 建 &#xff1a;&#x1f449;&…

小白学装修 之 硬装阶段

在准备阶段 了解了 装修的基本概念 顺利收房 进行了需求和预算的大致规划 并且完成了简单的自主设计接下来就是带着自己的设计图 预算和想法 去找公司或者施工方了 找施工方 可以是 设计师和施工方分开找 也可以找有设计的装修公司 或者 有施工能力的设计室都行 但不 管哪…