ncnn vulkan 以类的方式推理示例

flyfish

环境
ncnn-android-vulkan.zip 20230517
opencv 4.6.0
开发环境Qt 6.2.4
模型 yolov5_62

在这里插入图片描述
构建套件 Clang arm64-v8a

在这里插入图片描述

ndk 25 和api 28版本如下
在这里插入图片描述
头文件

#ifndef YOLOV5GPU_H
#define YOLOV5GPU_H

#include <string>
#include <vector>
#include <algorithm>
// ncnn
#include "layer.h"
#include "net.h"
#include "benchmark.h"
#include <opencv2/opencv.hpp>

struct Object
{
    cv::Rect_<float> rect;
    int label; // class_index
    float prob;
};

class Yolov5GPU
{
public:
    int RGB_; // input image channel order,0 bgr,1 rgb
    bool use_gpu_;
    int load_model();

    int inference(const cv::Mat &bgr, std::vector<Object> &objects);
    void draw_objects(const cv::Mat &bgr, const std::vector<Object> &objects);
public:
    Yolov5GPU();
    ~Yolov5GPU();

private:
    ncnn::Net yolov5_;
    ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
    ncnn::PoolAllocator g_workspace_pool_allocator;

    inline float
    intersection_area(const Object &a, const Object &b);

    void qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right);

    void qsort_descent_inplace(std::vector<Object> &faceobjects);

    void nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked, float nms_threshold);

    inline float sigmoid(float x);

    void generate_proposals(const ncnn::Mat &anchors, int stride, const ncnn::Mat &in_pad, const ncnn::Mat &feat_blob, float prob_threshold, std::vector<Object> &objects);
};

#endif // YOLOV5GPU_H

实现文件

#include "yolov5gpu.h"
#include <iostream>

Yolov5GPU::Yolov5GPU()
{

    ncnn::create_gpu_instance();

    use_gpu_ = true;
    RGB_ = 0; // 默认opencv加载 使用bgr 顺序

    ncnn::Option opt;
    opt.lightmode = true;
    opt.num_threads = 4;
    opt.blob_allocator = &(g_blob_pool_allocator);
    opt.workspace_allocator = &(g_workspace_pool_allocator);
    opt.use_packing_layout = true;

    if (ncnn::get_gpu_count() != 0)
        opt.use_vulkan_compute = use_gpu_;

    yolov5_.opt = opt;

    std::cout << "get_gpu_count():" << ncnn::get_gpu_count() << std::endl;
}
Yolov5GPU::~Yolov5GPU()
{
    g_blob_pool_allocator.clear();
    g_workspace_pool_allocator.clear();
    yolov5_.clear();

    ncnn::destroy_gpu_instance();
}

int Yolov5GPU::load_model()
{

    // init params
    int ret = yolov5_.load_param("yolov5s_6.2.param");
    if (ret != 0)
    {
        // error
        std::cout << "load_param error" << std::endl;
        return -1;
    }

    // init bin

    ret = yolov5_.load_model("yolov5s_6.2.bin");
    if (ret != 0)
    {
        // error
        std::cout << "load_model error" << std::endl;
        return -1;
    }
    return 0;
}
int Yolov5GPU::inference(const cv::Mat &bgr, std::vector<Object> &objects)
{

    const int target_size = 640;
    const float prob_threshold = 0.25f;
    const float nms_threshold = 0.45f;

    int img_w = bgr.cols;
    int img_h = bgr.rows;

    // letterbox pad to multiple of 32

    int w = img_w;
    int h = img_h;
    float scale = 1.f;
    if (w > h)
    {
        scale = (float)target_size / w;
        w = target_size;
        h = h * scale;
    }
    else
    {
        scale = (float)target_size / h;
        h = target_size;
        w = w * scale;
    }

    // w h , is original size now.
    // img_w img_h,is target size now.
    ncnn::Mat in;
    if (RGB_ == 0)
    {
        in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
    }

    else
    {
        in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
    }

    // pad to target_size rectangle
    // yolov5/utils/datasets.py letterbox
    int wpad = (w + 31) / 32 * 32 - w;
    int hpad = (h + 31) / 32 * 32 - h;
    ncnn::Mat in_pad;
    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);

    // yolov5
    // std::vector<Object> objects;
    {

        const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
        in_pad.substract_mean_normalize(0, norm_vals);

        ncnn::Extractor ex = yolov5_.create_extractor();

        ex.set_vulkan_compute(use_gpu_);

        ex.input("images", in_pad);

        std::vector<Object> proposals;

        // anchor setting from yolov5/models/yolov5s.yaml

        // stride 8
        {
            ncnn::Mat out;
            ex.extract("output", out);

            ncnn::Mat anchors(6);
            anchors[0] = 10.f;
            anchors[1] = 13.f;
            anchors[2] = 16.f;
            anchors[3] = 30.f;
            anchors[4] = 33.f;
            anchors[5] = 23.f;

            std::vector<Object> objects8;
            generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);

            proposals.insert(proposals.end(), objects8.begin(), objects8.end());
        }

        // stride 16
        {
            ncnn::Mat out;
            ex.extract("353", out);

            ncnn::Mat anchors(6);
            anchors[0] = 30.f;
            anchors[1] = 61.f;
            anchors[2] = 62.f;
            anchors[3] = 45.f;
            anchors[4] = 59.f;
            anchors[5] = 119.f;

            std::vector<Object> objects16;
            generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);

            proposals.insert(proposals.end(), objects16.begin(), objects16.end());
        }

        // stride 32
        {
            ncnn::Mat out;
            ex.extract("367", out);

            ncnn::Mat anchors(6);
            anchors[0] = 116.f;
            anchors[1] = 90.f;
            anchors[2] = 156.f;
            anchors[3] = 198.f;
            anchors[4] = 373.f;
            anchors[5] = 326.f;

            std::vector<Object> objects32;
            generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);

            proposals.insert(proposals.end(), objects32.begin(), objects32.end());
        }

        // sort all proposals by score from highest to lowest
        qsort_descent_inplace(proposals);

        // apply nms with nms_threshold
        std::vector<int> picked;
        nms_sorted_bboxes(proposals, picked, nms_threshold);

        int count = picked.size();

        objects.resize(count);

        std::cout << "count:" << count << std::endl;
        for (int i = 0; i < count; i++)
        {

            objects[i] = proposals[picked[i]];

            // adjust offset to original unpadded
            float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
            float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
            float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
            float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;

            // clip
            x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
            y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
            x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
            y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);

            objects[i].rect.x = x0;
            objects[i].rect.y = y0;
            objects[i].rect.width = x1 - x0;
            objects[i].rect.height = y1 - y0;
        }
    }
    return 0;
}
void Yolov5GPU::draw_objects(const cv::Mat &bgr, const std::vector<Object> &objects)
{
    for (size_t i = 0; i < objects.size(); i++)
    {
        const Object &obj = objects[i];

        fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);

        cv::rectangle(bgr, obj.rect, cv::Scalar(255, 0, 0), 1);
    }
    return;
}

inline float Yolov5GPU::intersection_area(const Object &a, const Object &b)
{
    cv::Rect_<float> inter = a.rect & b.rect;
    return inter.area();
}

void Yolov5GPU::qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right)
{
    int i = left;
    int j = right;
    float p = faceobjects[(left + right) / 2].prob;

    while (i <= j)
    {
        while (faceobjects[i].prob > p)
            i++;

        while (faceobjects[j].prob < p)
            j--;

        if (i <= j)
        {
            // swap
            std::swap(faceobjects[i], faceobjects[j]);

            i++;
            j--;
        }
    }

#pragma omp parallel sections
    {
#pragma omp section
        {
            if (left < j)
                qsort_descent_inplace(faceobjects, left, j);
        }
#pragma omp section
        {
            if (i < right)
                qsort_descent_inplace(faceobjects, i, right);
        }
    }
}

void Yolov5GPU::qsort_descent_inplace(std::vector<Object> &faceobjects)
{
    if (faceobjects.empty())
        return;

    qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}

void Yolov5GPU::nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked, float nms_threshold)
{

    picked.clear();

    const int n = faceobjects.size();

    std::vector<float> areas(n);
    for (int i = 0; i < n; i++)
    {
        areas[i] = faceobjects[i].rect.area();
    }

    for (int i = 0; i < n; i++)
    {
        const Object &a = faceobjects[i];

        int keep = 1;
        for (int j = 0; j < (int)picked.size(); j++)
        {
            const Object &b = faceobjects[picked[j]];

            //            if (!agnostic && a.label != b.label)
            //                continue;

            // intersection over union
            float inter_area = intersection_area(a, b);
            float union_area = areas[i] + areas[picked[j]] - inter_area;
            // float IoU = inter_area / union_area
            if (inter_area / union_area > nms_threshold)
                keep = 0;
        }

        if (keep)
            picked.push_back(i);
    }
}

inline float Yolov5GPU::sigmoid(float x)
{
    return static_cast<float>(1.f / (1.f + exp(-x)));
}

void Yolov5GPU::generate_proposals(const ncnn::Mat &anchors, int stride, const ncnn::Mat &in_pad, const ncnn::Mat &feat_blob, float prob_threshold, std::vector<Object> &objects)
{
    const int num_grid = feat_blob.h;

    int num_grid_x;
    int num_grid_y;
    if (in_pad.w > in_pad.h)
    {
        num_grid_x = in_pad.w / stride;
        num_grid_y = num_grid / num_grid_x;
    }
    else
    {
        num_grid_y = in_pad.h / stride;
        num_grid_x = num_grid / num_grid_y;
    }

    const int num_class = feat_blob.w - 5;

    const int num_anchors = anchors.w / 2;

    for (int q = 0; q < num_anchors; q++)
    {
        const float anchor_w = anchors[q * 2];
        const float anchor_h = anchors[q * 2 + 1];

        const ncnn::Mat feat = feat_blob.channel(q);

        for (int i = 0; i < num_grid_y; i++)
        {
            for (int j = 0; j < num_grid_x; j++)
            {
                const float *featptr = feat.row(i * num_grid_x + j);
                float box_confidence = sigmoid(featptr[4]);
                if (box_confidence >= prob_threshold)
                {
                    // find class index with max class score
                    int class_index = 0;
                    float class_score = -FLT_MAX;
                    for (int k = 0; k < num_class; k++)
                    {
                        float score = featptr[5 + k];
                        if (score > class_score)
                        {
                            class_index = k;
                            class_score = score;
                        }
                    }
                    float confidence = box_confidence * sigmoid(class_score);
                    if (confidence >= prob_threshold)
                    {
                        //                        if(class_index>=3 && class_index<=14){
                        //                            int new_label_index=class_index;
                        // yolov5/models/yolo.py Detect forward
                        // y = x[i].sigmoid()
                        // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
                        // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh

                        float dx = sigmoid(featptr[0]);
                        float dy = sigmoid(featptr[1]);
                        float dw = sigmoid(featptr[2]);
                        float dh = sigmoid(featptr[3]);

                        float pb_cx = (dx * 2.f - 0.5f + j) * stride;
                        float pb_cy = (dy * 2.f - 0.5f + i) * stride;

                        float pb_w = pow(dw * 2.f, 2) * anchor_w;
                        float pb_h = pow(dh * 2.f, 2) * anchor_h;

                        float x0 = pb_cx - pb_w * 0.5f;
                        float y0 = pb_cy - pb_h * 0.5f;
                        float x1 = pb_cx + pb_w * 0.5f;
                        float y1 = pb_cy + pb_h * 0.5f;

                        Object obj;
                        obj.rect.x = x0;
                        obj.rect.y = y0;
                        obj.rect.width = x1 - x0;
                        obj.rect.height = y1 - y0;

                        obj.label = class_index;
                        //                            if(new_label_index>8 && new_label_index<12){
                        //                                obj.label=9;
                        //                            }
                        //                            else if(new_label_index>=12){
                        //                                obj.label=(new_label_index-2);
                        //                            }
                        // obj.label = new_label_index;
                        obj.prob = confidence;
                        objects.push_back(obj);
                        //                        }
                    }
                }
            }
        }
    }
}

调用方式

#include <iostream>

#include "yolov5gpu.h"

int main()
{
    std::cout << "begin" << std::endl;
    Yolov5GPU model;

    model.load_model();
    std::cout << "load_model succeed" << std::endl;
    std::vector<Object> objects;
    cv::Mat bgr = cv::imread("./test.jpg", cv::IMREAD_COLOR);
    if (!bgr.empty())
    {
        std::cout << " cv::imread succeed" << std::endl;
    }
    else
    {
        std::cout << " cv::imread failed" << std::endl;
    }

    for (int i = 0; i < 5; i++) //tests
    {
        double start_time = ncnn::get_current_time();

        model.inference(bgr, objects);
        std::cout << "inference succeed" << std::endl;
        double elasped = ncnn::get_current_time() - start_time;

        std::cout << "result:" << elasped << std::endl;
    }
    model.draw_objects(bgr, objects);
    cv::imwrite("./result.jpg", bgr);
  

    std::cout << "end" << std::endl;

    return 0;
}

编译配置 CMakeLists.txt

cmake_minimum_required(VERSION 3.5) 

project(yolov5_gpu_test LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_INCLUDE_CURRENT_DIR ON)

set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn_gpu/lib/cmake/ncnn)
find_package(ncnn REQUIRED)


include_directories(
    ${CMAKE_SOURCE_DIR}/ncnn_gpu/include
    ${CMAKE_SOURCE_DIR}/ncnn_gpu/include/ncnn
)


set(OpenCV_DIR opencv-mobile-4.6.0-android/sdk/native/jni)


find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})

message(STATUS "version: ${OpenCV_VERSION}")
message(STATUS "libraries: ${OpenCV_LIBS}")
message(STATUS "include path: ${OpenCV_INCLUDE_DIRS}")



add_executable(yolov5_gpu_test yolov5gpu.h yolov5gpu.cpp main.cpp)

target_link_libraries(yolov5_gpu_test z dl m log android

      ncnn ${OpenCV_LIBS}

)