cmake_minimum_required(VERSION 3.16)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
find_package(OpenCV REQUIRED)
add_library(libmnn SHARED IMPORTED)
set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)
add_executable(untitled22 main.cpp)
target_link_libraries(untitled22 ${OpenCV_LIBS} libmnn )
#include <iostream>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace std;
class DBNet {
DBNet(const float binaryThreshold = 0.5, const float polygonThreshold = 0.7, const float unclipRatio = 1.5,
const int maxCandidates = 1000);
void detect(Mat &srcimg);
float binaryThreshold;
float polygonThreshold;
float unclipRatio;
int maxCandidates;
const int inpWidth = 736;
const int inpHeight = 736;
const float meanValues[3] = {0.485, 0.456, 0.406};
const float normValues[3] = {0.229, 0.224, 0.225};
float contourScore(const Mat &binary, const vector<Point> &contour);
void unclip(const vector<Point2f> &inPoly, vector<Point2f> &outPoly);
DBNet::DBNet(const float binaryThreshold, const float polygonThreshold, const float unclipRatio,
const int maxCandidates) {
cout << "run dbnet" << endl;
this->binaryThreshold = binaryThreshold;
this->polygonThreshold = polygonThreshold;
this->unclipRatio = unclipRatio;
this->maxCandidates = maxCandidates;
//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); gpu
void DBNet::detect(Mat &srcimg) {
int h = srcimg.rows;
int w = srcimg.cols;
Mat dst;
resize(srcimg, dst, Size(this->inpWidth, this->inpHeight));
auto mnnNet = std::shared_ptr<MNN::Interpreter>(
auto t1 = std::chrono::steady_clock::now();
MNN::ScheduleConfig netConfig;
netConfig.type = MNN_FORWARD_CPU;
netConfig.numThread = 4;
auto session = mnnNet->createSession(netConfig);
auto input = mnnNet->getSessionInput(session, nullptr);
mnnNet->resizeTensor(input, {1, 3, (int) inpWidth, (int) inpHeight});
MNN::CV::ImageProcess::Config config;
const float mean_vals[3] = {255 * 0.485, 255 * 0.456, 255 * 0.406};
const float norm_255[3] = {1 / (255 * 0.229), 1 / (255 * 0.224), 1 / (255 * 0.225)};
std::shared_ptr<MNN::CV::ImageProcess> pretreat(
MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::RGB, mean_vals, 3,
norm_255, 3));
pretreat->convert(dst.data, (int) inpWidth, (int) inpHeight, dst.step[0], input);
MNN::Tensor inputHost(input, input->getDimensionType());
auto output = mnnNet->getSessionOutput(session, "output");
MNN::Tensor outputHost(output, output->getDimensionType());
int shape_h = outputHost.height();
int shape_c = outputHost.channel();
int shape_w = outputHost.width();
int shape_s = outputHost.size();
printf("---c= %d w= %d h= %d s= %d ----\n", shape_c, shape_w, shape_h, shape_s);
// for (int i = 0; i < shape_s; i++) { outputCount.push_back(outputHost.host<float>()[i]); }
const float *floatArray = outputHost.host<float>();
// for (int i = 0; i < shape_s; i++){
// std::cout<<floatArray[i]<<" ";
// if(i==100) break;
// }
Mat binary(dst.rows, dst.cols, CV_32FC1);
memcpy(binary.data, floatArray, shape_s/4* sizeof(float));
// Threshold
Mat bitmap;
threshold(binary, bitmap, binaryThreshold, 255, THRESH_BINARY);
//cv::imshow("", binary);
// Scale ratio
float scaleHeight = (float) (h) / (float) (binary.size[0]);
float scaleWidth = (float) (w) / (float) (binary.size[1]);
// Find contours
vector<vector<Point> > contours;
bitmap.convertTo(bitmap, CV_8UC1);
findContours(bitmap, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
// Candidate number limitation
size_t numCandidate = min(contours.size(), (size_t) (maxCandidates > 0 ? maxCandidates : INT_MAX));
vector<float> confidences;
vector<vector<Point2f> > results;
for (size_t i = 0; i < numCandidate; i++) {
vector<Point> &contour = contours[i];
// Calculate text contour score
if (contourScore(binary, contour) < polygonThreshold)
// Rescale
vector<Point> contourScaled;
for (size_t j = 0; j < contour.size(); j++) {
contourScaled.push_back(Point(int(contour[j].x * scaleWidth),
int(contour[j].y * scaleHeight)));
// Unclip
RotatedRect box = minAreaRect(contourScaled);
// minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width
const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property
bool swap_size = false;
if (box.size.width < box.size.height) // horizontal-wide text area is expected
swap_size = true;
else if (fabs(box.angle) >= angle_threshold) // don't work with vertical rectangles
swap_size = true;
if (swap_size) {
swap(box.size.width, box.size.height);
if (box.angle < 0)
box.angle += 90;
else if (box.angle > 0)
box.angle -= 90;
Point2f vertex[4];
box.points(vertex); // order: bl, tl, tr, br
vector<Point2f> approx;
for (int j = 0; j < 4; j++)
vector<Point2f> polygon;
unclip(approx, polygon);
confidences = vector<float>(contours.size(), 1.0f);
for (int i = 0; i < results.size(); i++) {
for (int j = 0; j < 4; j++) {
circle(srcimg, Point((int) results[i][j].x, (int) results[i][j].y), 2, Scalar(0, 0, 255), -1);
if (j < 3) {
line(srcimg, Point((int) results[i][j].x, (int) results[i][j].y),
Point((int) results[i][j + 1].x, (int) results[i][j + 1].y), Scalar(0, 255, 0));
} else {
line(srcimg, Point((int) results[i][j].x, (int) results[i][j].y),
Point((int) results[i][0].x, (int) results[i][0].y), Scalar(0, 255, 0));
float DBNet::contourScore(const Mat &binary, const vector<Point> &contour) {
Rect rect = boundingRect(contour);
int xmin = max(rect.x, 0);
int xmax = min(rect.x + rect.width, binary.cols - 1);
int ymin = max(rect.y, 0);
int ymax = min(rect.y + rect.height, binary.rows - 1);
Mat binROI = binary(Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));
Mat mask = Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8U);
vector<Point> roiContour;
for (size_t i = 0; i < contour.size(); i++) {
Point pt = Point(contour[i].x - xmin, contour[i].y - ymin);
vector<vector<Point>> roiContours = {roiContour};
fillPoly(mask, roiContours, Scalar(1));
float score = mean(binROI, mask).val[0];
return score;
void DBNet::unclip(const vector<Point2f> &inPoly, vector<Point2f> &outPoly) {
float area = contourArea(inPoly);
float length = arcLength(inPoly, true);
float distance = area * unclipRatio / length;
size_t numPoints = inPoly.size();
vector<vector<Point2f>> newLines;
for (size_t i = 0; i < numPoints; i++) {
vector<Point2f> newLine;
Point pt1 = inPoly[i];
Point pt2 = inPoly[(i - 1) % numPoints];
Point vec = pt1 - pt2;
float unclipDis = (float) (distance / norm(vec));
Point2f rotateVec = Point2f(vec.y * unclipDis, -vec.x * unclipDis);
newLine.push_back(Point2f(pt1.x + rotateVec.x, pt1.y + rotateVec.y));
newLine.push_back(Point2f(pt2.x + rotateVec.x, pt2.y + rotateVec.y));
size_t numLines = newLines.size();
for (size_t i = 0; i < numLines; i++) {
Point2f a = newLines[i][0];
Point2f b = newLines[i][1];
Point2f c = newLines[(i + 1) % numLines][0];
Point2f d = newLines[(i + 1) % numLines][1];
Point2f pt;
Point2f v1 = b - a;
Point2f v2 = d - c;
float cosAngle = (v1.x * v2.x + v1.y * v2.y) / (norm(v1) * norm(v2));
if (fabs(cosAngle) > 0.7) {
pt.x = (b.x + c.x) * 0.5;
pt.y = (b.y + c.y) * 0.5;
} else {
float denom = a.x * (float) (d.y - c.y) + b.x * (float) (c.y - d.y) +
d.x * (float) (b.y - a.y) + c.x * (float) (a.y - b.y);
float num = a.x * (float) (d.y - c.y) + c.x * (float) (a.y - d.y) + d.x * (float) (c.y - a.y);
float s = num / denom;
pt.x = a.x + s * (b.x - a.x);
pt.y = a.y + s * (b.y - a.y);
int main() {
DBNet mynet(0.3, 0.3, 4.5, 1000);
string imgpath = "../testimgs/3.jpg";
Mat srcimg = imread(imgpath);
imshow("kWinName", srcimg);
