一、吐槽

但是我打开该项目阅读readme.txt时候，发现这些示例网站一个都打不开！

而且readme.txt很不详细，你想训练自己的模型都无从下手。

二、ONXX格式导出

先安装super-gradients

pip install super-gradients

然后写个python文件，运行

from super_gradients.training import models
from super_gradients.common.object_names import Models

# Load model with pretrained weights
net = models.get(Models.YOLO_NAS_S, pretrained_weights='coco')
# net.predict("/home/jason/work/01-img/dog.png").show()



models.convert_to_onnx(model=net, input_shape=(3, 640, 640), out_path='yolo-nas-s.onnx')

用netron 查看下输出：

需要注意以下两点，通过官方工具导出的"bboxes"已经是"xyxy"格式了，所以不需要再执行make_grid, distance2bbox等操作了，直接进行"NMS"即可。但是，OpenCV的NMS要求输入的BBOXES格式为xywh，所以还需要再改一下

可以看到有两个输出

1135：float32[1,8400,4]。8400 是框子的数量，4是位置信息

1126：float32[1,8400,80]。 80是每个类别的概率。

三、onnx runtime C++部署

最大类别概率是178,索引是78,人就在摄像头面前。。。。位置信息也不对。。。

这是关于后处理的那个函数，有大佬试过这个网络的部署吗，望指点一二！

bool Yolov8Onnx::OnnxBatchDetect(std::vector<cv::Mat> &srcImgs, std::vector<std::vector<OutputDet> > &output)
{
    vector<Vec4d> params;
    vector<Mat> input_images;
    cv::Size input_size(_netWidth, _netHeight);

    //preprocessing (信封处理)
    Preprocessing(srcImgs, input_images, params);
    // [0~255] --> [0~1]; BGR2RGB
    Mat blob = cv::dnn::blobFromImages(input_images, 1 / 255.0, input_size, Scalar(0,0,0), true, false);

    // 前向传播得到推理结果
    int64_t input_tensor_length = VectorProduct(_inputTensorShape);// ?
    std::vector<Ort::Value> input_tensors;
    std::vector<Ort::Value> output_tensors;
    input_tensors.push_back(Ort::Value::CreateTensor<float>(_OrtMemoryInfo, (float*)blob.data,
                                                            input_tensor_length, _inputTensorShape.data(),
                                                            _inputTensorShape.size()));

    output_tensors = _OrtSession->Run(Ort::RunOptions{ nullptr },
        _inputNodeNames.data(),
        input_tensors.data(),
        _inputNodeNames.size(),
        _outputNodeNames.data(),
        _outputNodeNames.size()
    );

    //post-process
//    int net_width = _className.size() + 4; // 不用加4
    int net_width = _className.size();
    float* all_data0 = output_tensors[0].GetTensorMutableData<float>(); // 位置信息
    float* all_data1 = output_tensors[1].GetTensorMutableData<float>(); // 80类别概率

    _output0TensorShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); //  [1, 8400, 4]
    _output1TensorShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape(); // [1,8400, 80]


    int64_t one_output1_length = VectorProduct(_output1TensorShape) / _output1TensorShape[0]; // 一张图片输出所占内存长度 8400*80
    int64_t one_output0_length = VectorProduct(_output0TensorShape) / _output0TensorShape[0]; // 一张图片输出所占内存长度 8400*4

    int net_height = _output0TensorShape[1]; // 8400

    for (int img_index = 0; img_index < srcImgs.size(); ++img_index){

        // 一张图片的预测框
        vector<int> class_ids;
        vector<float> confidences;
        vector<Rect> boxes;
        for (int r=0; r<net_height; ++r){//如果是yolov5则需要做修改
            Mat scores(1, _className.size(), CV_32F, all_data1); // 80个类别的概率

            // 得到最大类别概率、类别索引
            Point classIdPoint;
            double max_class_soces;
            minMaxLoc(scores, 0, &max_class_soces, 0, &classIdPoint);
            max_class_soces = (float)max_class_soces;
            printf("max_clasee:%d id:%d\n",(int)max_class_soces,(int)classIdPoint.x);

            // 预测框坐标映射到原图上
            if (max_class_soces >= _classThreshold){
                // rect [x,y,w,h]

                float x = all_data0[0];
                float y = all_data0[1];
                float w = all_data0[2] - x;
                float h = all_data0[3] - y;



                x = (x - params[img_index][2]) / params[img_index][0]; //x
                y = (y - params[img_index][3]) / params[img_index][1]; //y
                w = w / params[img_index][0]; //w
                h = h / params[img_index][1]; //h

                int left = MAX(int(x - 0.5 *w +0.5), 0);
                int top = MAX(int(y - 0.5*h + 0.5), 0);

                class_ids.push_back(classIdPoint.x);
                confidences.push_back(max_class_soces);
                boxes.push_back(Rect(left, top, int(w + 0.5), int(h + 0.5)));
            }

        }
        all_data0 += one_output0_length; //指针指向下一个图片的地址
        all_data1 += one_output1_length;

        // 对一张图的预测框执行NMS处理
        vector<int> nms_result;
        cv::dnn::NMSBoxes(boxes, confidences, _classThreshold, _nmsThrehold, nms_result); // 还需要classThreshold？

        // 对一张图片：依据NMS处理得到的索引，得到类别id、confidence、box，并置于结构体OutputDet的容器中
        vector<OutputDet> temp_output;
        for (size_t i=0; i<nms_result.size(); ++i){
            int idx = nms_result[i];
            OutputDet result;
            result.id = class_ids[idx];
            result.confidence = confidences[idx];
            result.box = boxes[idx];
            temp_output.push_back(result);
        }
        output.push_back(temp_output); // 多张图片的输出；添加一张图片的输出置于此容器中

    }
    if (output.size())
        return true;
    else
        return false;

}

参考：

YOLO-NAS_whaosoft143的博客-CSDN博客

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.coloradmin.cn/o/539941.html

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈，一经查实，立即删除！