目录
效果
4K视频 CPU前处理效果
4K视频 CUDA核函数前处理效果
2K视频 CUDA核函数前处理效果
1080P 视频 CUDA核函数前处理效果
模型
电脑环境
项目
代码
下载
效果
C++ TensorRT yolov8推理 CUDA核函数加速前处理
4K视频 CPU前处理效果
4K视频 CUDA核函数前处理效果
2K视频 CUDA核函数前处理效果
1080P 视频 CUDA核函数前处理效果
模型
Model Properties
-------------------------
date:2023-09-05T13:17:15.396588
description:Ultralytics YOLOv8n model trained on coco.yaml
author:Ultralytics
task:detect
license:AGPL-3.0 https://ultralytics.com/license
version:8.0.170
stride:32
batch:1
imgsz:[640, 640]
names:{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
---------------------------------------------------------------
Inputs
-------------------------
name:images
tensor:Float[1, 3, 640, 640]
---------------------------------------------------------------
Outputs
-------------------------
name:output0
tensor:Float[1, 84, 8400]
---------------------------------------------------------------
电脑环境
处理器:AMD Ryzen 7 7735H with Radeon Graphics 3.20 GHz
内存:16.0 GB
显卡:NVIDIA GeForce RTX 4060 Laptop GPU
操作系统:Windows 10 企业版
opencv-4.8.1
CUDA12.4
TensorRT-8.6.1.6
VS2022
项目
包含目录
库目录
附件依赖项
cublas.lib
cublasLt.lib
cuda.lib
cudadevrt.lib
cudart.lib
cudart_static.lib
cudnn.lib
cudnn64_8.lib
cudnn_adv_infer.lib
cudnn_adv_infer64_8.lib
cudnn_adv_train.lib
cudnn_adv_train64_8.lib
cudnn_cnn_infer.lib
cudnn_cnn_infer64_8.lib
cudnn_cnn_train.lib
cudnn_cnn_train64_8.lib
cudnn_ops_infer.lib
cudnn_ops_infer64_8.lib
cudnn_ops_train.lib
cudnn_ops_train64_8.lib
cufft.lib
cufftw.lib
cufilt.lib
curand.lib
cusolver.lib
cusolverMg.lib
cusparse.lib
nppc.lib
nppial.lib
nppicc.lib
nppidei.lib
nppif.lib
nppig.lib
nppim.lib
nppist.lib
nppisu.lib
nppitc.lib
npps.lib
nvblas.lib
nvJitLink.lib
nvJitLink_static.lib
nvjpeg.lib
nvml.lib
nvptxcompiler_static.lib
nvrtc-builtins_static.lib
nvrtc.lib
nvrtc_static.lib
OpenCL.lib
nvinfer.lib
nvinfer_dispatch.lib
nvinfer_lean.lib
nvinfer_plugin.lib
nvinfer_vc_plugin.lib
nvonnxparser.lib
nvparsers.lib
opencv_world481.lib
代码
#define _CRT_SECURE_NO_DEPRECATE
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include <numeric>
#include "NvInfer.h"
#include "kernel_function.h"
#include "utils.h"
std::vector<std::string> labels;
float score_threshold = 0.3f;
float nms_threshold = 0.5f;
std::string lable_path = "";
std::string engin_path = "";
std::string video_path = "";
utils::NvinferStruct* p = nullptr;
utils::Logger logger;
utils::InitParameter m_param;
utils::AffineMat m_dst2src;
int src_w = 0;
int src_h = 0;
int dst_w = 640;
int dst_h = 640;
double fps = 0;
int output_size = 0;
double preprocessTime = 0;
double inferTime = 0;
double postprocessTime = 0;
double totalTime = 0;
double detFps = 0;
// input
unsigned char* m_input_src_device = nullptr;
float* m_input_resize_device = nullptr;
float* m_input_rgb_device = nullptr;
float* m_input_norm_device = nullptr;
float* m_input_hwc_device = nullptr;
// output
float* m_output_src_device = nullptr;
float* output_data = nullptr;
//初始化
int init() {
std::ifstream lable_file(lable_path);
if (!lable_file.is_open())
{
std::cerr << "Error opening file: " << lable_path << std::endl;
return -1;
}
std::string line;
while (std::getline(lable_file, line))
{
if (!line.empty())
{
labels.push_back(line);
}
}
lable_file.close();
// 以二进制方式读取文件
std::ifstream engin_file(engin_path.data(), std::ios::binary);
if (!engin_file.good()) {
std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl;
return -1;
}
size_t size = 0;
engin_file.seekg(0, engin_file.end); // 将读指针从文件末尾开始移动0个字节
size = engin_file.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数
engin_file.seekg(0, engin_file.beg); // 将读指针从文件开头开始移动0个字节
char* modelStream = new char[size];
engin_file.read(modelStream, size);
engin_file.close();// 关闭文件
//创建推理核心结构体,初始化变量
p = new utils::NvinferStruct();
//初始化反序列化引擎
p->runtime = nvinfer1::createInferRuntime(logger);
// 初始化推理引擎
p->engine = p->runtime->deserializeCudaEngine(modelStream, size);
// 创建上下文
p->context = p->engine->createExecutionContext();
int numNode = p->engine->getNbBindings();
delete[] modelStream;
output_size = 1 * (labels.size() + 4) * 8400;;
output_data = new float[output_size];
float a = float(dst_h) / src_h;
float b = float(dst_w) / src_w;
float scale = a < b ? a : b;
/*cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * src_w + dst_w + scale - 1) * 0.5,
0.f, scale, (-scale * src_h + dst_h + scale - 1) * 0.5);*/
cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * src_w + dst_w) * 0.5,
0.f, scale, (-scale * src_h + dst_h) * 0.5);
cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
cv::invertAffineTransform(src2dst, dst2src);
m_dst2src.v0 = dst2src.ptr<float>(0)[0];
m_dst2src.v1 = dst2src.ptr<float>(0)[1];
m_dst2src.v2 = dst2src.ptr<float>(0)[2];
m_dst2src.v3 = dst2src.ptr<float>(1)[0];
m_dst2src.v4 = dst2src.ptr<float>(1)[1];
m_dst2src.v5 = dst2src.ptr<float>(1)[2];
CHECK(cudaMalloc(&m_input_src_device, 1 * 3 * src_h * src_w * sizeof(unsigned char)));
CHECK(cudaMalloc(&m_input_resize_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_input_rgb_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_input_norm_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_input_hwc_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_output_src_device, 1 * output_size * sizeof(float)));
return 0;
}
//前处理
void preprocess(cv::Mat& frame) {
CHECK(cudaMemcpy(m_input_src_device, frame.data, sizeof(unsigned char) * 3 * src_h * src_w, cudaMemcpyHostToDevice));
resizeDevice(1, m_input_src_device, src_w, src_h, m_input_resize_device, dst_w, dst_h, 114, m_dst2src);
bgr2rgbDevice(1, m_input_resize_device, dst_w, dst_h, m_input_rgb_device, dst_w, dst_h);
normDevice(1, m_input_rgb_device, dst_w, dst_h, m_input_norm_device, dst_w, dst_h, m_param);
hwc2chwDevice(1, m_input_norm_device, dst_w, dst_h, m_input_hwc_device, dst_w, dst_h);
}
//后处理
void postprocess(std::vector<utils::detresult>& detectionResult) {
CHECK(cudaMemcpy(output_data, m_output_src_device, output_size * sizeof(float), cudaMemcpyDeviceToHost));
cv::Mat dout(labels.size() + 4, 8400, CV_32F, output_data);
cv::Mat det_output = dout.t();
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
const float ratio_h = dst_h / (float)src_h;
const float ratio_w = dst_w / (float)src_w;
for (int i = 0; i < det_output.rows; i++)
{
cv::Mat classes_scores = det_output.row(i).colRange(4, labels.size() + 4);
cv::Point classIdPoint;
double score;
cv::minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
if (score > score_threshold)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow));
int y = static_cast<int>((cy - 0.5 * oh));
int width = static_cast<int>(ow);
int height = static_cast<int>(oh);
// 基于纵横比调整边界框坐标
if (ratio_h > ratio_w)
{
x = x / ratio_w;
y = (y - (dst_h - ratio_w * src_h) / 2) / ratio_w;
width = width / ratio_w;
height = height / ratio_w;
}
else
{
x = (x - (dst_w - ratio_h * src_w) / 2) / ratio_h;
y = y / ratio_h;
width = width / ratio_h;
height = height / ratio_h;
}
// 坐标值安全校验
x = std::max(x, 0);
y = std::max(y, 0);
width = std::min(width, src_w - x);
height = std::min(height, src_h - y);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, score_threshold, nms_threshold, indexes);
for (size_t i = 0; i < indexes.size(); i++)
{
int index = indexes[i];
utils::detresult box(labels[classIds[index]], classIds[index], confidences[index], boxes[index]);
detectionResult.push_back(box);
}
}
//绘制
void draw(cv::Mat& frame, std::vector<utils::detresult>& detectionResult) {
for (size_t i = 0; i < detectionResult.size(); ++i)
{
utils::detresult box = detectionResult[i];
cv::rectangle(frame, box.rect, cv::Scalar(0, 0, 255), 2);
std::string label = box.className + ":" + cv::format("%.2f", box.confidence);
putText(frame, label, cv::Point(box.rect.x, box.rect.y - 5), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
}
// 绘制时间
putText(frame, "preprocessTime:" + std::to_string(preprocessTime * 1000) + "ms", cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "inferTime:" + std::to_string(inferTime * 1000) + "ms", cv::Point(10, 70), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "postprocessTime:" + std::to_string(postprocessTime * 1000) + "ms", cv::Point(10, 110), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "totalTime:" + std::to_string(totalTime * 1000) + "ms", cv::Point(10, 150), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "detFps:" + std::to_string(detFps), cv::Point(10, 190), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
cv::imshow("detresult", frame);
}
//清理
void destroy() {
p->context->destroy();
p->engine->destroy();
p->runtime->destroy();
delete p;
// input
CHECK(cudaFree(m_input_src_device));
CHECK(cudaFree(m_input_resize_device));
CHECK(cudaFree(m_input_rgb_device));
CHECK(cudaFree(m_input_norm_device));
CHECK(cudaFree(m_input_hwc_device));
// output
CHECK(cudaFree(m_output_src_device));
delete output_data;
}
int main()
{
lable_path = "model/lable.txt";
engin_path = "model/yolov8n.engine";
video_path = "test/VID_4K.mp4"; //3840x2160
cv::VideoCapture capture(video_path);
// 检查视频是否成功打开
if (!capture.isOpened())
{
std::cout << "无法读取视频文件" << std::endl;
return -1;
}
fps = capture.get(cv::CAP_PROP_FPS);
src_w = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_WIDTH));
src_h = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_HEIGHT));
dst_w = 640;
dst_h = 640;
score_threshold = 0.3f;
nms_threshold = 0.5f;
//初始化
init();
cv::Mat frame;
while (true)
{
bool success = capture.read(frame); // 读取一帧数据
// 检查是否成功读取帧
if (!success)
{
std::cout << "读取完毕" << std::endl;
break;
}
//前处理
double start = (double)cv::getTickCount();
preprocess(frame);
preprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//推理
start = (double)cv::getTickCount();
float* bindings[] = { m_input_hwc_device, m_output_src_device };
bool context = p->context->executeV2((void**)bindings);
inferTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//后处理
start = (double)cv::getTickCount();
std::vector<utils::detresult> detectionResult;
postprocess(detectionResult);
postprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//总时间
totalTime = preprocessTime + inferTime + postprocessTime;
detFps = (1 / (totalTime));
//绘制、显示
cv::namedWindow("detresult", cv::WINDOW_NORMAL); // cv::WINDOW_NORMAL允许用户调整窗口大小
//cv::resizeWindow("detresult", src_w / 2, src_h / 2); // 设置窗口的宽度和高度
draw(frame, detectionResult);
if (cv::waitKey(1) == 27) // 通过按下ESC键退出循环
{
break;
}
}
cv::destroyAllWindows();
destroy();
return 0;
}
#define _CRT_SECURE_NO_DEPRECATE
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include <numeric>
#include "NvInfer.h"
#include "kernel_function.h"
#include "utils.h"
std::vector<std::string> labels;
float score_threshold = 0.3f;
float nms_threshold = 0.5f;
std::string lable_path = "";
std::string engin_path = "";
std::string video_path = "";
utils::NvinferStruct* p = nullptr;
utils::Logger logger;
utils::InitParameter m_param;
utils::AffineMat m_dst2src;
int src_w = 0;
int src_h = 0;
int dst_w = 640;
int dst_h = 640;
double fps = 0;
int output_size = 0;
double preprocessTime = 0;
double inferTime = 0;
double postprocessTime = 0;
double totalTime = 0;
double detFps = 0;
// input
unsigned char* m_input_src_device = nullptr;
float* m_input_resize_device = nullptr;
float* m_input_rgb_device = nullptr;
float* m_input_norm_device = nullptr;
float* m_input_hwc_device = nullptr;
// output
float* m_output_src_device = nullptr;
float* output_data = nullptr;
//初始化
int init() {
std::ifstream lable_file(lable_path);
if (!lable_file.is_open())
{
std::cerr << "Error opening file: " << lable_path << std::endl;
return -1;
}
std::string line;
while (std::getline(lable_file, line))
{
if (!line.empty())
{
labels.push_back(line);
}
}
lable_file.close();
// 以二进制方式读取文件
std::ifstream engin_file(engin_path.data(), std::ios::binary);
if (!engin_file.good()) {
std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl;
return -1;
}
size_t size = 0;
engin_file.seekg(0, engin_file.end); // 将读指针从文件末尾开始移动0个字节
size = engin_file.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数
engin_file.seekg(0, engin_file.beg); // 将读指针从文件开头开始移动0个字节
char* modelStream = new char[size];
engin_file.read(modelStream, size);
engin_file.close();// 关闭文件
//创建推理核心结构体,初始化变量
p = new utils::NvinferStruct();
//初始化反序列化引擎
p->runtime = nvinfer1::createInferRuntime(logger);
// 初始化推理引擎
p->engine = p->runtime->deserializeCudaEngine(modelStream, size);
// 创建上下文
p->context = p->engine->createExecutionContext();
int numNode = p->engine->getNbBindings();
delete[] modelStream;
output_size = 1 * (labels.size() + 4) * 8400;;
output_data = new float[output_size];
float a = float(dst_h) / src_h;
float b = float(dst_w) / src_w;
float scale = a < b ? a : b;
/*cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * src_w + dst_w + scale - 1) * 0.5,
0.f, scale, (-scale * src_h + dst_h + scale - 1) * 0.5);*/
cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * src_w + dst_w) * 0.5,
0.f, scale, (-scale * src_h + dst_h) * 0.5);
cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
cv::invertAffineTransform(src2dst, dst2src);
m_dst2src.v0 = dst2src.ptr<float>(0)[0];
m_dst2src.v1 = dst2src.ptr<float>(0)[1];
m_dst2src.v2 = dst2src.ptr<float>(0)[2];
m_dst2src.v3 = dst2src.ptr<float>(1)[0];
m_dst2src.v4 = dst2src.ptr<float>(1)[1];
m_dst2src.v5 = dst2src.ptr<float>(1)[2];
CHECK(cudaMalloc(&m_input_src_device, 1 * 3 * src_h * src_w * sizeof(unsigned char)));
CHECK(cudaMalloc(&m_input_resize_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_input_rgb_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_input_norm_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_input_hwc_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
CHECK(cudaMalloc(&m_output_src_device, 1 * output_size * sizeof(float)));
return 0;
}
//前处理
void preprocess(cv::Mat& frame) {
CHECK(cudaMemcpy(m_input_src_device, frame.data, sizeof(unsigned char) * 3 * src_h * src_w, cudaMemcpyHostToDevice));
resizeDevice(1, m_input_src_device, src_w, src_h, m_input_resize_device, dst_w, dst_h, 114, m_dst2src);
bgr2rgbDevice(1, m_input_resize_device, dst_w, dst_h, m_input_rgb_device, dst_w, dst_h);
normDevice(1, m_input_rgb_device, dst_w, dst_h, m_input_norm_device, dst_w, dst_h, m_param);
hwc2chwDevice(1, m_input_norm_device, dst_w, dst_h, m_input_hwc_device, dst_w, dst_h);
}
//后处理
void postprocess(std::vector<utils::detresult>& detectionResult) {
CHECK(cudaMemcpy(output_data, m_output_src_device, output_size * sizeof(float), cudaMemcpyDeviceToHost));
cv::Mat dout(labels.size() + 4, 8400, CV_32F, output_data);
cv::Mat det_output = dout.t();
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
const float ratio_h = dst_h / (float)src_h;
const float ratio_w = dst_w / (float)src_w;
for (int i = 0; i < det_output.rows; i++)
{
cv::Mat classes_scores = det_output.row(i).colRange(4, labels.size() + 4);
cv::Point classIdPoint;
double score;
cv::minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
if (score > score_threshold)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow));
int y = static_cast<int>((cy - 0.5 * oh));
int width = static_cast<int>(ow);
int height = static_cast<int>(oh);
// 基于纵横比调整边界框坐标
if (ratio_h > ratio_w)
{
x = x / ratio_w;
y = (y - (dst_h - ratio_w * src_h) / 2) / ratio_w;
width = width / ratio_w;
height = height / ratio_w;
}
else
{
x = (x - (dst_w - ratio_h * src_w) / 2) / ratio_h;
y = y / ratio_h;
width = width / ratio_h;
height = height / ratio_h;
}
// 坐标值安全校验
x = std::max(x, 0);
y = std::max(y, 0);
width = std::min(width, src_w - x);
height = std::min(height, src_h - y);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, score_threshold, nms_threshold, indexes);
for (size_t i = 0; i < indexes.size(); i++)
{
int index = indexes[i];
utils::detresult box(labels[classIds[index]], classIds[index], confidences[index], boxes[index]);
detectionResult.push_back(box);
}
}
//绘制
void draw(cv::Mat& frame, std::vector<utils::detresult>& detectionResult) {
for (size_t i = 0; i < detectionResult.size(); ++i)
{
utils::detresult box = detectionResult[i];
cv::rectangle(frame, box.rect, cv::Scalar(0, 0, 255), 2);
std::string label = box.className + ":" + cv::format("%.2f", box.confidence);
putText(frame, label, cv::Point(box.rect.x, box.rect.y - 5), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
}
// 绘制时间
putText(frame, "preprocessTime:" + std::to_string(preprocessTime * 1000) + "ms", cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "inferTime:" + std::to_string(inferTime * 1000) + "ms", cv::Point(10, 70), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "postprocessTime:" + std::to_string(postprocessTime * 1000) + "ms", cv::Point(10, 110), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "totalTime:" + std::to_string(totalTime * 1000) + "ms", cv::Point(10, 150), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "detFps:" + std::to_string(detFps), cv::Point(10, 190), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
cv::imshow("detresult", frame);
}
//清理
void destroy() {
p->context->destroy();
p->engine->destroy();
p->runtime->destroy();
delete p;
// input
CHECK(cudaFree(m_input_src_device));
CHECK(cudaFree(m_input_resize_device));
CHECK(cudaFree(m_input_rgb_device));
CHECK(cudaFree(m_input_norm_device));
CHECK(cudaFree(m_input_hwc_device));
// output
CHECK(cudaFree(m_output_src_device));
delete output_data;
}
int main()
{
lable_path = "model/lable.txt";
engin_path = "model/yolov8n.engine";
video_path = "test/VID_4K.mp4"; //3840x2160
cv::VideoCapture capture(video_path);
// 检查视频是否成功打开
if (!capture.isOpened())
{
std::cout << "无法读取视频文件" << std::endl;
return -1;
}
fps = capture.get(cv::CAP_PROP_FPS);
src_w = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_WIDTH));
src_h = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_HEIGHT));
dst_w = 640;
dst_h = 640;
score_threshold = 0.3f;
nms_threshold = 0.5f;
//初始化
init();
cv::Mat frame;
while (true)
{
bool success = capture.read(frame); // 读取一帧数据
// 检查是否成功读取帧
if (!success)
{
std::cout << "读取完毕" << std::endl;
break;
}
//前处理
double start = (double)cv::getTickCount();
preprocess(frame);
preprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//推理
start = (double)cv::getTickCount();
float* bindings[] = { m_input_hwc_device, m_output_src_device };
bool context = p->context->executeV2((void**)bindings);
inferTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//后处理
start = (double)cv::getTickCount();
std::vector<utils::detresult> detectionResult;
postprocess(detectionResult);
postprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//总时间
totalTime = preprocessTime + inferTime + postprocessTime;
detFps = (1 / (totalTime));
//绘制、显示
cv::namedWindow("detresult", cv::WINDOW_NORMAL); // cv::WINDOW_NORMAL允许用户调整窗口大小
//cv::resizeWindow("detresult", src_w / 2, src_h / 2); // 设置窗口的宽度和高度
draw(frame, detectionResult);
if (cv::waitKey(1) == 27) // 通过按下ESC键退出循环
{
break;
}
}
cv::destroyAllWindows();
destroy();
return 0;
}
下载
源码下载