使用Openvino部署C++的Yolov5时类别信息混乱问题记录

news2025/1/7 19:28:30

使用Openvino部署C++的Yolov5时类别信息混乱问题记录

简单记录一下。

一、问题描述

问题描述：在使用Yolov5的onnx格式模型进行C++的Openvino进行模型部署时，通过读取classes.txt获得类别信息时，出现模型类别混乱，或者说根本就不给图像赋予类别标签的情况。
在这里插入图片描述

二、解决办法

通过debug程序，发现是读取txt文件时的问题，读入txt文件时，会读出几行空白的数据。
在这里插入图片描述
最终发现原因是classes.txt文件中，多了几个空白的换行。需要尽量留意，删除掉空白的换行。

三、部署测试代码

三个部分,需要提取配置好opencv和openvino

1.main.cpp

#include <opencv2/opencv.hpp>  
#include <boost/filesystem.hpp>  
#include <iostream>  
#include <string>  
#include "yolov5vino.h"

namespace fs = boost::filesystem;
YOLOv5VINO yolov5VINO;

int main() {
 

    std::string sourceDir = "D:/work/C++Code/Yolov5Openvino/Test/testimgs";
    std::string destDir = "D:/work/C++Code/Yolov5Openvino/Test/output";
    std::string ModelPath = "D:/work/C++Code/Yolov5Openvino/Test/best.onnx";
    std::string ClassesPath = "D:/work/C++Code/Yolov5Openvino/Test/classes.txt";


    yolov5VINO.init(ModelPath, ClassesPath);

    // 确保目标目录存在  
    if (!fs::exists(destDir)) {
        fs::create_directories(destDir);
    }

    // 遍历源目录中的所有文件  
    for (fs::directory_iterator end, dir(sourceDir); dir != end; ++dir) {
        if (fs::is_regular_file(dir->status())) {
            std::string filePath = dir->path().string();
            std::string fileName = dir->path().filename().string();

            // 读取图片  
            cv::Mat img = cv::imread(filePath, cv::IMREAD_COLOR);
            if (img.empty()) {
                std::cerr << "Failed to load image: " << filePath << std::endl;
                continue;
            }

            
            std::vector<Detection> outputs;
            yolov5VINO.detect(img, outputs);
            yolov5VINO.drawRect(img, outputs);

            // 构造目标文件路径  
            std::string destFilePath = destDir + "/" + fileName;

            // 保存图  
            if (!cv::imwrite(destFilePath, img)) {
                std::cerr << "Failed to save image: " << destFilePath << std::endl;
            }
            else {
                std::cout << "Saved image: " << destFilePath << std::endl;
            }
        }
    }

    return 0;
}

2.Yolv5nivo.h

#pragma once
#ifndef YOLOV5VINO_H
#define YOLOV5VINO_H
#include <fstream>
#include <opencv2/opencv.hpp>
#include <inference_engine.hpp>
#define NOT_NCS2
using namespace std;
using namespace InferenceEngine;

struct Detection
{
    int class_id;
    float confidence;
    cv::Rect box;
};

class YOLOv5VINO
{
public:
    YOLOv5VINO();
    ~YOLOv5VINO();
    void init(string modelPath, string classPath);
    cv::Mat formatYolov5(const cv::Mat& source);
    void detect(cv::Mat& image, vector<Detection>& outputs);
    void drawRect(cv::Mat& image, vector<Detection>& outputs);
    void loadClassList(string classPath);
private:
    float m_scoreThreshold = 0.5;
    float m_nmsThreshold = 0.6;
    float m_confThreshold = 0.5;

    //"CPU","GPU","MYRIAD"
#ifdef NCS2
    const std::string m_deviceName = "MYRIAD";
    const std::string m_modelFilename = "configFiles/yolov5sNCS2.xml";
#else
    const std::string m_deviceName = "CPU";
    const std::string m_modelFilename = "best.onnx";
#endif // NCS2
    const std::string m_classfile = "classes.txt";
    size_t m_numChannels = 3;
    size_t m_inputH = 416;
    size_t m_inputW = 416;
    size_t m_imageSize = 0;
    std::string m_inputName = "";
    std::string m_outputName = "";
    const vector<cv::Scalar> colors = { cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0) };

    InferRequest m_inferRequest;
    Blob::Ptr m_inputData;
    

public:
    vector<std::string> m_classNames;
    //const vector<std::string> m_classNames = { "138A","8151","B1100","JH8161","RE85","S22","KA90","T105","TX2295" };
   // const vector<string> m_classNames = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light" };
};
#endif

3.yolov5vino.cpp

#include "yolov5vino.h"

YOLOv5VINO::YOLOv5VINO()
{
}

YOLOv5VINO::~YOLOv5VINO()
{
}


void YOLOv5VINO::init(string modelPath, string classPath)
{
    InferenceEngine::Core ie;
    InferenceEngine::CNNNetwork network = ie.ReadNetwork(modelPath);
    InputsDataMap inputs = network.getInputsInfo();
    OutputsDataMap outputs = network.getOutputsInfo();
    for (auto item : inputs)
    {
        m_inputName = item.first;
        auto input_data = item.second;
        input_data->setPrecision(Precision::FP32);
        input_data->setLayout(Layout::NCHW);
        input_data->getPreProcess().setColorFormat(ColorFormat::RGB);
        std::cout << "input name = " << m_inputName << std::endl;
    }

    for (auto item : outputs)
    {
        auto output_data = item.second;
        output_data->setPrecision(Precision::FP32);
        m_outputName = item.first;
        std::cout << "output name = " << m_outputName << std::endl;
    }
    auto executable_network = ie.LoadNetwork(network, m_deviceName);
    m_inferRequest = executable_network.CreateInferRequest();

    m_inputData = m_inferRequest.GetBlob(m_inputName);
    m_numChannels = m_inputData->getTensorDesc().getDims()[1];
    m_inputH = m_inputData->getTensorDesc().getDims()[2];
    m_inputW = m_inputData->getTensorDesc().getDims()[3];
    m_imageSize = m_inputH * m_inputW;

   loadClassList(classPath);
   
}


void YOLOv5VINO::loadClassList(string classPath)
{
    std::ifstream ifs(classPath);
    std::string line;
    while (getline(ifs, line))
    {
        m_classNames.push_back(line);
    }
    ifs.close();
}

cv::Mat YOLOv5VINO::formatYolov5(const cv::Mat& source)
{
    int col = source.cols;
    int row = source.rows;
    int max = MAX(col, row);
    cv::Mat result = cv::Mat::zeros(max, max, CV_8UC3);
    source.copyTo(result(cv::Rect(0, 0, col, row)));
    return result;
}


void YOLOv5VINO::detect(cv::Mat& image, vector<Detection>& outputs)
{
    cv::Mat input_image = formatYolov5(image);
    cv::Mat blob_image;
    cv::resize(input_image, blob_image, cv::Size(m_inputW, m_inputH));
    cv::cvtColor(blob_image, blob_image, cv::COLOR_BGR2RGB);

    float* data = static_cast<float*>(m_inputData->buffer());
    for (size_t row = 0; row < m_inputH; row++) {
        for (size_t col = 0; col < m_inputW; col++) {
            for (size_t ch = 0; ch < m_numChannels; ch++) {
#ifdef NCS2
                data[m_imageSize * ch + row * m_inputW + col] = float(blob_image.at<cv::Vec3b>(row, col)[ch]);
#else
                data[m_imageSize * ch + row * m_inputW + col] = float(blob_image.at<cv::Vec3b>(row, col)[ch] / 255.0);
#endif // NCS2
            }
        }
    }
    auto start = std::chrono::high_resolution_clock::now();

    m_inferRequest.Infer();
    auto output = m_inferRequest.GetBlob(m_outputName);
    const float* detection_out = static_cast<PrecisionTrait<Precision::FP32>::value_type*>(output->buffer());

    //维度信息
    const SizeVector outputDims = output->getTensorDesc().getDims();//1,6300[25200],9
    float x_factor = float(input_image.cols) / m_inputW;
    float y_factor = float(input_image.rows) / m_inputH;
    float* dataout = (float*)detection_out;
    const int dimensions = outputDims[2];
    const int rows = outputDims[1];
    vector<int> class_ids;
    vector<float> confidences;
    vector<cv::Rect> boxes;
    for (int i = 0; i < rows; ++i)
    {
        float confidence = dataout[4];
        if (confidence >= m_confThreshold)
        {
            float* classes_scores = dataout + 5;
            cv::Mat scores(1, m_classNames.size(), CV_32FC1, classes_scores);
            cv::Point class_id;
            double max_class_score;
            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
            if (max_class_score > m_scoreThreshold)
            {
                confidences.push_back(confidence);
                class_ids.push_back(class_id.x);

                float x = dataout[0];
                float y = dataout[1];
                float w = dataout[2];
                float h = dataout[3];
                int left = int((x - 0.5 * w) * x_factor);
                int top = int((y - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
                boxes.push_back(cv::Rect(left, top, width, height));
            }
        }
        dataout += dimensions;
    }
    vector<int> nms_result;
    cv::dnn::NMSBoxes(boxes, confidences, m_scoreThreshold, m_nmsThreshold, nms_result);
    for (int i = 0; i < nms_result.size(); i++)
    {
        int idx = nms_result[i];
        Detection result;
        result.class_id = class_ids[idx];
        result.confidence = confidences[idx];
        result.box = boxes[idx];
        outputs.push_back(result);
    }
    std::sort(outputs.begin(), outputs.end(), [](const Detection& a, const Detection& b) {
        return a.confidence > b.confidence; // 从大到小排序  
        });
}

void YOLOv5VINO::drawRect(cv::Mat& image, vector<Detection>& outputs)
{
    int detections = outputs.size();
    for (int i = 0; i < detections; ++i)
    {
        auto detection = outputs[i];
        auto box = detection.box;
        auto classId = detection.class_id;
        const auto color = colors[classId % colors.size()];
        rectangle(image, box, color, 3);

        rectangle(image, cv::Point(box.x, box.y - 40), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
        putText(image, m_classNames[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 1.5, cv::Scalar(0, 0, 0), 2);
    }

}