利用OpenCV的DNN模块加载onnx模型文件进行图片检测。
1、使用的yolov5工程代码,调用export.py导出onnx模型。
2、下载opencv版本,https://opencv.org/releases/
使用opencv版本4.5.3或以上,本文使用的opencv4.6.0
3、使用vc2015编写使用代码。
// dnnUseOnnx.cpp : 定义控制台应用程序的入口点。
#include <fstream>
#include <iostream>
#include <string>
#include <map>
#include <opencv2/opencv.hpp>
struct DetectResult
{
int classId;
float score;
cv::Rect box;
};
class YOLOv5Detector
{
public:
void initConfig(std::string onnxpath, int iw, int ih, float threshold, bool bIsEnableCuda);
void detect(cv::Mat& frame, std::vector<DetectResult>& result);
private:
int input_w = 640;
int input_h = 640;
cv::dnn::Net net;
int threshold_score = 0.25;
};
void YOLOv5Detector::initConfig(std::string onnxpath, int iw, int ih, float threshold, bool bIsEnableCuda)
{
this->input_w = iw;
this->input_h = ih;
this->threshold_score = threshold;
try
{
this->net = cv::dnn::readNetFromONNX(onnxpath);
//依据情况选定是否使用CUDA
if (bIsEnableCuda)
{
std::cout << "Attempty to use CUDA\n";
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
}
else
{
std::cout << "Running on CPU\n";
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
}
catch (cv::Exception & e) {
printf("exception %s\n", e.err.c_str());
}
}
void YOLOv5Detector::detect(cv::Mat& frame, std::vector<DetectResult>& results)
{
// 图象预处理 - 格式化操作
int w = frame.cols;
int h = frame.rows;
int _max = std::max(h, w);
cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
if (frame.channels() == 1)
{
cv::cvtColor(frame, frame, cv::COLOR_GRAY2BGR);
}
cv::Rect roi(0, 0, w, h);
frame.copyTo(image(roi));
float x_factor = image.cols / 640.0f;
float y_factor = image.rows / 640.0f;
cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(this->input_w, this->input_h), cv::Scalar(0, 0, 0),
true, false);
this->net.setInput(blob);
cv::Mat preds = this->net.forward("output0");//outputname,使用Netron看一下输出的名字,一般为output0或者output
//如果preds里有Mat的维数大于2,那么设断点调试的时候,可以看到rows和cols都等于-1,当Mat的dims>2时,想要访问Mat的高和宽,可以通过size属性获取。如下:
printf("output:%d,%d,%d\n", preds.size[0], preds.size[1], preds.size[2]);//打印输出:output:1,25200,85
//YOLOV5的输出1,25200,85如何理解和解析
//1、25200代表着检测框的数量,比如我们取出第一个检测框a,也就是[1,1,85],取出来之后我们解析85,前五个为box的中点坐标、长宽值以及置信,后面80我们取Max(80个类别)中最大值,类别的处于多少行对应于label class.txt别中的类是哪一类别。
cv::Mat det_output(preds.size[1], preds.size[2], CV_32F, preds.ptr<float>());
float confidence_threshold = 0.5;
std::vector<cv::Rect> boxes;
boxes.clear();
std::vector<int> classIds;
classIds.clear();
std::vector<float> confidences;
confidences.clear();
for (int i = 0; i < det_output.rows; i++)
{
float confidence = det_output.at<float>(i, 4);
if (confidence < 0.45)
{
continue;
}
cv::Mat classes_scores = det_output.row(i).colRange(5, preds.size[2]);//colRange(5, num_class);num_class:最大分类数
cv::Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
// 置信度 0~1之间
if (score > this->threshold_score)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score * confidence);
}
}
// NMS
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++)
{
DetectResult dr;
int index = indexes[i];
int idx = classIds[index];
dr.box = boxes[index];
dr.classId = idx;
dr.score = confidences[index];
cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);
cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),
cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
results.push_back(dr);
}
std::ostringstream ss;
std::vector<double> layersTimings;
double freq = cv::getTickFrequency() / 1000.0;
double time = net.getPerfProfile(layersTimings) / freq;
ss << "FPS: " << 1000 / time << " ; time : " << time << " ms";
putText(frame, ss.str(), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
}
std::map<int, std::string> classNames = { { 0, "person" },{ 1, "bicycle" },{ 2, "car" },{ 3, "motorcycle" } ,{ 4, "airplane" } ,{ 5, "bus" },{ 6, "train" },{ 7, "truck" },{ 8, "boat" },{ 9, "traffic light" },
{ 10, "fire hydrant" },{ 11, "stop sign'" },{ 12, "parking meter" },{ 13, "bench" } ,{ 14, "bird" } ,{ 15, "cat" },{ 16, "dog" },{ 17, "horse" },{ 18, "sheep" },{ 19, "cow" },
{ 20, "elephant" },{ 21, "bear" },{ 22, "zebra" },{ 23, "giraffe" } ,{ 24, "backpack" } ,{ 25, "umbrella" },{ 26, "handbag" },{ 27, "tie" },{ 28, "suitcase" },{ 29, "frisbee" },
{ 30, "skis" },{ 31, "snowboard" },{ 32, "sports ball" },{ 33, "kite" } ,{ 34, "baseball bat" } ,{ 35, "baseball glove" },{ 36, "skateboard" },{ 37, "surfboard" },{ 38, "tennis racket" },{ 39, "bottle" },
{ 40, "wine glass" },{ 41, "cup" },{ 42, "fork" },{ 43, "knife" } ,{ 44, "spoon" } ,{ 45, "bowl" },{ 46, "banana" },{ 47, "apple" },{ 48, "sandwich" },{ 49, "orange" },
{ 50, "broccoli" },{ 51, "carrot" },{ 52, "hot dog" },{ 53, "pizza" } ,{ 54, "donut" } ,{ 55, "cake" },{ 56, "chair" },{ 57, "couch" },{ 58, "potted plant" },{ 59, "bed" },
{ 60, "dining table" },{ 61, "toilet" },{ 62, "tv" },{ 63, "laptop" } ,{ 64, "mouse" } ,{ 65, "remote" },{ 66, "keyboard" },{ 67, "cell phone" },{ 68, "microwave" },{ 69, "oven" },
{ 70, "toaster" },{ 71, "sink" },{ 72, "refrigerator" },{ 73, "book" } ,{ 74, "clock" } ,{ 75, "vase" },{ 76, "scissors" },{ 77, "teddy bear" },{ 78, "hair drier" },{ 79, "toothbrush" }
};
int main(int argc, char* argv[])
{
std::shared_ptr<YOLOv5Detector> detector = std::make_shared<YOLOv5Detector>();
detector->initConfig(R"(D:\python-project\yolov5\yolov5s.onnx)", 640, 640, 0.25f, false);
cv::Mat frame = cv::imread(R"(D:\python-project\yolov5\data\images\bus.jpg)");
std::vector<DetectResult> results;
detector->detect(frame, results);
for (DetectResult& dr : results)
{
cv::Rect box = dr.box;
cv::putText(frame, classNames[dr.classId]+ " "+ std::to_string(dr.score), cv::Point(box.tl().x, box.tl().y - 10), cv::FONT_HERSHEY_SIMPLEX,
.5, cv::Scalar(0, 0, 0));
}
cv::imshow("OpenCV-DNN-yolov5", frame);
cv::waitKey();
results.clear();
}
运行效果:
注意事项:
1)、readNetFromONNX加载onnx模型出错。 interp_mode != "asymmetric",这个错误信息表明你在使用OpenCV的readNetFromONNX函数加载ONNX模型时,模型中的某些节点的插值模式(interp_mode)不是"asymmetric"。
解决方法:使用opencv版本4.5.3或以上,本文使用的opencv4.6.0
2)、监测到目标比yolov5工程detect.py推理出来的目标少。
解决方法:det_output.row(i).colRange(5, num_class);num_class:最大分类数
cv::Mat classes_scores = det_output.row(i).colRange(5, preds.size[2]);