C# PaddleInference OCR识别 学习研究Demo

news2024/11/19 15:15:56

说明

基于开源项目 https://github.com/sdcb/PaddleSharp

VS2022+.net4.8+ OpenCvSharp4+Sdcb.PaddleInference

效果

项目

 代码

using Sdcb.PaddleInference.Native;
using Sdcb.PaddleInference;
using System;
using System.Collections.Generic;
using OpenCvSharp.Extensions;
using OpenCvSharp;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.Globalization;
using System.IO;

namespace PaddleInference_OCR识别
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        Bitmap bmp;
        string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
        string img = "";
        string startupPath = "";

        int MaxSize = 1536;
        float? BoxThreshold = 0.3f;
        float? BoxScoreThreahold = 0.7f;
        int? DilatedSize = 2;
        int MinSize = 3;
        float UnclipRatio = 2.0f;

        Mat src;
        PaddlePredictor det_predictor;

        RotatedRect[] rects;

        bool Enable180Classification { get; set; } = true;
        bool AllowRotateDetection { get; set; } = true;
        double RotateThreshold { get; } = 0.75;
        Mat[] mats;
        PaddlePredictor cls_predictor;

        OcrShape recShape = new OcrShape(3, 320, 48);
        PaddlePredictor rec_predictor;

        public IReadOnlyList<string> Labels;

        DateTime dt1 = DateTime.Now;
        DateTime dt2 = DateTime.Now;

        private unsafe void Form1_Load(object sender, EventArgs e)
        {

            string startupPath = Application.StartupPath;

            IntPtr det_ptr = PaddleNative.PD_ConfigCreate();

            Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8;

            //检测模型路径
            String det_programPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdmodel";
            String det_paramsPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdiparams";

            //方式一
            //byte[] programBytes = PaddleEncoding.GetBytes(det_programPath);
            //byte[] paramsBytes = PaddleEncoding.GetBytes(det_paramsPath);
            //fixed (byte* programPtr = programBytes)
            //fixed (byte* paramsPtr = paramsBytes)
            //{
            //    PaddleNative.PD_ConfigSetModel(det_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
            //}

            //方式二  读自己加密后的模型文件,解密后写入byte[]
            Stream Steam = new FileStream(det_programPath, FileMode.Open, FileAccess.Read, FileShare.Read);
            byte[] programBuffer = new byte[Steam.Length];
            Steam.Read(programBuffer, 0, programBuffer.Length);

            Steam = new FileStream(det_paramsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
            byte[] paramsBuffer = new byte[Steam.Length];
            Steam.Read(paramsBuffer, 0, paramsBuffer.Length);

            fixed (byte* pprogram = programBuffer)
            fixed (byte* pparams = paramsBuffer)
            {
                PaddleNative.PD_ConfigSetModelBuffer(det_ptr,
                    (IntPtr)pprogram, programBuffer.Length,
                    (IntPtr)pparams, paramsBuffer.Length);
            }

            det_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(det_ptr));

            //方向分类模型
            IntPtr cls_ptr = PaddleNative.PD_ConfigCreate();

            String cls_programPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdmodel";
            String cls_paramsPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdiparams";

            byte[] programBytes = PaddleEncoding.GetBytes(cls_programPath);
            byte[] paramsBytes = PaddleEncoding.GetBytes(cls_paramsPath);
            fixed (byte* programPtr = programBytes)
            fixed (byte* paramsPtr = paramsBytes)
            {
                PaddleNative.PD_ConfigSetModel(cls_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
            }

            cls_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(cls_ptr));

            //识别模型
            IntPtr rec_ptr = PaddleNative.PD_ConfigCreate();

            String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel";
            String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams";

            byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath);
            byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath);
            fixed (byte* rec_programPtr = rec_programBytes)
            fixed (byte* rec_paramsPtr = rec_paramsBytes)
            {
                PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr);
            }

            rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr));

            //Labels
            String labelsPath = startupPath + "\\ppocr_keys.txt";
            Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
            StreamReader reader = new StreamReader(Steam);
            List<string> tempList = new List<string>();
            while (!reader.EndOfStream)
            {
                tempList.Add(reader.ReadLine());
            }
            reader.Dispose();
            Steam.Dispose();
            Labels = tempList;
        }

        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            pictureBox1.Image = null;

            img = ofd.FileName;
            bmp = new Bitmap(img);
            pictureBox1.Image = new Bitmap(img);
            textBox1.Text = "";
        }

        private void button2_Click(object sender, EventArgs e)
        {
            textBox1.Text = "";
            Application.DoEvents();
            if (img == "")
            {
                return;
            }
            dt1 = DateTime.Now;
            src = Cv2.ImRead(img);

            Mat resized = MatResize(src, MaxSize);
            //Cv2.ImShow("resized", resized);
            Mat padded = MatPadding32(resized);
            //Cv2.ImShow("padded", padded);
            Mat normalized = Normalize(padded);
            Cv2.ImShow("normalized", normalized);
            OpenCvSharp.Size resizedSize = resized.Size();
            using (PaddleTensor input = det_predictor.GetInputTensor(det_predictor.InputNames[0]))
            {
                input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
                float[] setData = ExtractMat(normalized);
                input.SetData(setData);
            }

            if (!det_predictor.Run())
            {
                throw new Exception("PaddlePredictor(Detector) run failed.");
            }

            using (PaddleTensor output = det_predictor.GetOutputTensor(det_predictor.OutputNames[0]))
            {
                float[] data = output.GetData<float>();
                int[] shape = output.Shape;

                Mat pred = new Mat(shape[2], shape[3], MatType.CV_32FC1, data);
                Cv2.ImShow("pred", pred);
                Mat cbuf = new Mat();

                Mat roi = pred[0, resizedSize.Height, 0, resizedSize.Width];
                roi.ConvertTo(cbuf, MatType.CV_8UC1, 255);
                //Cv2.ImShow("roi", roi);
                Mat dilated = new Mat();
                Mat binary = BoxThreshold != null ?
                   cbuf.Threshold((int)(BoxThreshold * 255), 255, ThresholdTypes.Binary) :
                   cbuf;
                //Cv2.ImShow("binary", binary);
                if (DilatedSize != null)
                {
                    Mat ones = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(DilatedSize.Value, DilatedSize.Value));
                    Cv2.Dilate(binary, dilated, ones);
                    ones.Dispose();
                }
                else
                {
                    Cv2.CopyTo(binary, dilated);
                }
                //Cv2.ImShow("dilated", dilated);
                OpenCvSharp.Point[][] contours = dilated.FindContoursAsArray(RetrievalModes.List, ContourApproximationModes.ApproxSimple);

                OpenCvSharp.Size size = src.Size();
                double scaleRate = 1.0 * src.Width / resizedSize.Width;

                rects = contours
                    .Where(x => BoxScoreThreahold == null || GetScore(x, pred) > BoxScoreThreahold)
                    .Select(x => Cv2.MinAreaRect(x))
                    .Where(x => x.Size.Width > MinSize && x.Size.Height > MinSize)
                    .Select(rect =>
                    {
                        float minEdge = Math.Min(rect.Size.Width, rect.Size.Height);
                        Size2f newSize = new Size2f(
                            (rect.Size.Width + UnclipRatio * minEdge) * scaleRate,
                            (rect.Size.Height + UnclipRatio * minEdge) * scaleRate);
                        RotatedRect largerRect = new RotatedRect(rect.Center * scaleRate, newSize, rect.Angle);
                        return largerRect;
                    })
                    .OrderBy(v => v.Center.Y)
                    .ThenBy(v => v.Center.X)
                    .ToArray();

                binary.Dispose();
                roi.Dispose();
                cbuf.Dispose();
                pred.Dispose();
                dilated.Dispose();


                dt2 = DateTime.Now;
                StringBuilder sb = new StringBuilder();
                sb.AppendLine("-----------------------------------\n");
                sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");

                textBox1.Text = sb.ToString();

                //绘图
                Mat src2 = Cv2.ImRead(img);
                for (int i = 0; i < rects.Length; i++)
                {
                    Scalar scalar = Scalar.RandomColor();
                    List<OpenCvSharp.Point> temp = new List<OpenCvSharp.Point>();
                    foreach (var item2 in rects[i].Points())
                    {
                        temp.Add(new OpenCvSharp.Point(item2.X, item2.Y));
                    }
                    List<List<OpenCvSharp.Point>> lltemp = new List<List<OpenCvSharp.Point>>();
                    lltemp.Add(temp);
                    Cv2.Polylines(src2, lltemp, true, scalar);
                }

                if (pictureBox1.Image != null)
                {
                    pictureBox1.Image.Dispose();
                }

                pictureBox1.Image = BitmapConverter.ToBitmap(src2);
                src2.Dispose();

            }
        }

        private float GetScore(OpenCvSharp.Point[] contour, Mat pred)
        {
            int width = pred.Width;
            int height = pred.Height;
            int[] boxX = contour.Select(v => v.X).ToArray();
            int[] boxY = contour.Select(v => v.Y).ToArray();

            int xmin = Clamp(boxX.Min(), 0, width - 1);
            int xmax = Clamp(boxX.Max(), 0, width - 1);
            int ymin = Clamp(boxY.Min(), 0, height - 1);
            int ymax = Clamp(boxY.Max(), 0, height - 1);

            OpenCvSharp.Point[] rootPoints = contour
                .Select(v => new OpenCvSharp.Point(v.X - xmin, v.Y - ymin))
                .ToArray();
            Mat mask = new Mat(ymax - ymin + 1, xmax - xmin + 1, MatType.CV_8UC1, Scalar.Black);
            mask.FillPoly(new[] { rootPoints }, new Scalar(1));

            Mat croppedMat = pred[ymin, ymax + 1, xmin, xmax + 1];
            float score = (float)croppedMat.Mean(mask).Val0;
            return score;
        }

        public int Clamp(int val, int min, int max)
        {
            if (val < min)
            {
                return min;
            }
            else if (val > max)
            {
                return max;
            }
            return val;
        }

        float[] ExtractMat(Mat src)
        {
            int rows = src.Rows;
            int cols = src.Cols;
            float[] array = new float[rows * cols * 3];
            GCHandle gCHandle = default(GCHandle);
            try
            {
                gCHandle = GCHandle.Alloc(array, GCHandleType.Pinned);
                IntPtr intPtr = gCHandle.AddrOfPinnedObject();
                for (int i = 0; i < src.Channels(); i++)
                {
                    Mat dest = new Mat(rows, cols, MatType.CV_32FC1, intPtr + i * rows * cols * 4, 0L);
                    Cv2.ExtractChannel(src, dest, i);
                    dest.Dispose();
                }
                return array;
            }
            finally
            {
                gCHandle.Free();
            }
        }

        private Mat MatResize(Mat src, int? maxSize)
        {
            if (maxSize == null) return src.Clone();

            OpenCvSharp.Size size = src.Size();
            int longEdge = Math.Max(size.Width, size.Height);
            double scaleRate = 1.0 * maxSize.Value / longEdge;

            return scaleRate < 1.0 ?
                src.Resize(OpenCvSharp.Size.Zero, scaleRate, scaleRate) :
                src.Clone();
        }

        private Mat MatPadding32(Mat src)
        {
            OpenCvSharp.Size size = src.Size();
            OpenCvSharp.Size newSize = new OpenCvSharp.Size(
                32 * Math.Ceiling(1.0 * size.Width / 32),
                32 * Math.Ceiling(1.0 * size.Height / 32));

            return src.CopyMakeBorder(0, newSize.Height - size.Height, 0, newSize.Width - size.Width, BorderTypes.Constant, Scalar.Black);
        }

        private Mat Normalize(Mat src)
        {
            Mat normalized = new Mat();
            src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
            Mat[] bgr = normalized.Split();
            float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
            float[] means = new[] { 0.485f, 0.456f, 0.406f };
            for (int i = 0; i < bgr.Length; ++i)
            {
                bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
            }
            normalized.Dispose();
            Mat dest = new Mat();
            Cv2.Merge(bgr, dest);
            foreach (Mat channel in bgr)
            {
                channel.Dispose();
            }
            return dest;
        }

        private Mat GetRotateCropImage(Mat src, RotatedRect rect)
        {
            bool wider = rect.Size.Width > rect.Size.Height;
            float angle = rect.Angle;
            OpenCvSharp.Size srcSize = src.Size();
            Rect boundingRect = rect.BoundingRect();

            int expTop = Math.Max(0, 0 - boundingRect.Top);
            int expBottom = Math.Max(0, boundingRect.Bottom - srcSize.Height);
            int expLeft = Math.Max(0, 0 - boundingRect.Left);
            int expRight = Math.Max(0, boundingRect.Right - srcSize.Width);

            Rect rectToExp = boundingRect + new OpenCvSharp.Point(expTop, expLeft);
            Rect roiRect = Rect.FromLTRB(
                boundingRect.Left + expLeft,
                boundingRect.Top + expTop,
                boundingRect.Right - expRight,
                boundingRect.Bottom - expBottom);
            Mat boundingMat = src[roiRect];
            Mat expanded = boundingMat.CopyMakeBorder(expTop, expBottom, expLeft, expRight, BorderTypes.Replicate);
            Point2f[] rp = rect.Points()
                .Select(v => new Point2f(v.X - rectToExp.X, v.Y - rectToExp.Y))
                .ToArray();

            Point2f[] srcPoints = new[] { rp[0], rp[3], rp[2], rp[1] };

            if (wider == true && angle >= 0 && angle < 45)
            {
                srcPoints = new[] { rp[1], rp[2], rp[3], rp[0] };
            }

            var ptsDst0 = new Point2f(0, 0);
            var ptsDst1 = new Point2f(rect.Size.Width, 0);
            var ptsDst2 = new Point2f(rect.Size.Width, rect.Size.Height);
            var ptsDst3 = new Point2f(0, rect.Size.Height);

            Mat matrix = Cv2.GetPerspectiveTransform(srcPoints, new[] { ptsDst0, ptsDst1, ptsDst2, ptsDst3 });

            Mat dest = expanded.WarpPerspective(matrix, new OpenCvSharp.Size(rect.Size.Width, rect.Size.Height), InterpolationFlags.Nearest, BorderTypes.Replicate);

            if (!wider)
            {
                Cv2.Transpose(dest, dest);
            }
            else if (angle > 45)
            {
                Cv2.Flip(dest, dest, FlipMode.X);
            }

            boundingMat.Dispose();
            expanded.Dispose();
            matrix.Dispose();

            return dest;
        }

        private Rect GetCropedRect(Rect rect, OpenCvSharp.Size size)
        {
            return Rect.FromLTRB(
                Clamp(rect.Left, 0, size.Width),
                Clamp(rect.Top, 0, size.Height),
                Clamp(rect.Right, 0, size.Width),
                Clamp(rect.Bottom, 0, size.Height));
        }

        private void Form1_FormClosing(object sender, FormClosingEventArgs e)
        {
            det_predictor.Dispose();
            rec_predictor.Dispose();
        }

        /// <summary>
        /// 方向检测
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button4_Click(object sender, EventArgs e)
        {
            textBox1.Text = "";
            Application.DoEvents();
            if (rects == null)
            {
                return;
            }
            if (rects == null)
            {
                return;
            }
            dt1 = DateTime.Now;
            mats =
               rects.Select(rect =>
               {
                   Mat roi = AllowRotateDetection ? GetRotateCropImage(src, rect) : src[GetCropedRect(rect.BoundingRect(), src.Size())];
                   return Enable180Classification ? CLSPredictorRun(roi) : roi;
               })
               .ToArray();

            dt2 = DateTime.Now;
            StringBuilder sb = new StringBuilder();
            sb.AppendLine("-----------------------------------\n");
            sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");

            textBox1.Text = sb.ToString();
        }

        private void button3_Click(object sender, EventArgs e)
        {
            textBox1.Text = "";
            Application.DoEvents();
            if (rects == null || mats == null)
            {
                return;
            }
            dt1 = DateTime.Now;
            try
            {
                int recognizeBatchSize = 0;
                //Cv2.ImShow("mats", mats[0]);
                PaddleOcrRecognizerResult[] porr = RecognizerRun(mats, recognizeBatchSize);
                dt2 = DateTime.Now;
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < porr.Length; i++)
                {
                    sb.AppendLine("("+ porr[i].Score+")"+ porr[i].Text );
                }
                sb.AppendLine("-----------------------------------\n");
                sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
                textBox1.Text = sb.ToString();
            }
            finally
            {
                foreach (Mat mat in mats)
                {
                    mat.Dispose();
                }
            }
        }

        public PaddleOcrRecognizerResult[] RecognizerRun(Mat[] srcs, int batchSize = 0)
        {
            if (srcs.Length == 0)
            {
                return new PaddleOcrRecognizerResult[0];
            }

            int chooseBatchSize = batchSize != 0 ? batchSize : Math.Min(8, Environment.ProcessorCount);
            PaddleOcrRecognizerResult[] allResult = new PaddleOcrRecognizerResult[srcs.Length];

            return srcs
                .Select((x, i) => (mat: x, i))
                .OrderBy(x => x.mat.Width)
                .Chunk(chooseBatchSize)
                .Select(x => (result: RunMulti(x.Select(x2 => x2.mat).ToArray()), ids: x.Select(x2 => x2.i).ToArray()))
                .SelectMany(x => x.result.Zip(x.ids, (result, i) => (result, i)))
                .OrderBy(x => x.i)
                .Select(x => x.result)
                .ToArray();
        }

        private Mat ResizePadding(Mat src, int height, int targetWidth)
        {
            OpenCvSharp.Size size = src.Size();
            float whRatio = 1.0f * size.Width / size.Height;
            int width = (int)Math.Ceiling(height * whRatio);

            if (width == targetWidth)
            {
                return src.Resize(new OpenCvSharp.Size(width, height));
            }
            else
            {
                //using Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
                Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
                return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
            }
        }

        private PaddleOcrRecognizerResult Run(Mat src) => RunMulti(new[] { src }).Single();

        private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
        {
            if (srcs.Length == 0)
            {
                return new PaddleOcrRecognizerResult[0];
            }

            for (int i = 0; i < srcs.Length; ++i)
            {
                Mat src = srcs[i];
                if (src.Empty())
                {
                    throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
                }
            }

            int modelHeight = recShape.Height;
            int maxWidth = (int)Math.Ceiling(srcs.Max(src =>
            {
                OpenCvSharp.Size size = src.Size();
                return 1.0 * size.Width / size.Height * modelHeight;
            }));

            int index = 0;

            Mat[] normalizeds = srcs
                .Select(src =>
                {
                   
                    Mat channel3 = new Mat();
                    if (src.Channels() == 4)
                    {
                        channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR);
                    }
                    else if (src.Channels() == 3)
                    {
                        channel3 = src.Clone();
                    }
                    else if (src.Channels() == 1)
                    {
                        channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB);
                    }
                    else
                    {
                        throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)");
                    }

                    Mat resized = ResizePadding(channel3, modelHeight, maxWidth);
                    Cv2.ImShow("resized"+index.ToString(), resized);
                    index++;
                    channel3.Dispose();
                    return Normalize(resized);
                })
                .ToArray();

            using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0]))
            {
                int channel = normalizeds[0].Channels();
                input.Shape = new[] { normalizeds.Length, channel, modelHeight, maxWidth };
                float[] data = ExtractMat(normalizeds, channel, modelHeight, maxWidth);
                input.SetData(data);
            }
            if (!rec_predictor.Run())
            {
                throw new Exception($"PaddlePredictor(Recognizer) run failed.");
            }

            using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0]))
            {
                float[] data = output.GetData<float>();
                int[] shape = output.Shape;

                GCHandle dataHandle = default;
                try
                {
                    dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
                    IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
                    int labelCount = shape[2];
                    int charCount = shape[1];

                    return Enumerable.Range(0, shape[0])
                        .Select(i =>
                        {
                            StringBuilder sb = new StringBuilder();
                            int lastIndex = 0;
                            float score = 0;
                            for (int n = 0; n < charCount; ++n)
                            {
                                Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
                                int[] maxIdx = new int[2];
                                mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx);

                                if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
                                {
                                    score += (float)maxVal;
                                    sb.Append(GetLabelByIndex(maxIdx[1]));
                                }
                                lastIndex = maxIdx[1];
                                mat.Dispose();
                            }
                            return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
                        })
                        .ToArray();
                }
                finally
                {
                    dataHandle.Free();
                }
            }
        }

        private float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
        {
            float[] result = new float[srcs.Length * channel * width * height];
            GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
            try
            {
                for (int i = 0; i < srcs.Length; ++i)
                {
                    Mat src = srcs[i];
                    if (src.Channels() != channel)
                    {
                        throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
                    }
                    for (int c = 0; c < channel; ++c)
                    {
                        //using Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
                        Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
                        Cv2.ExtractChannel(src, dest, c);
                        //dest.Dispose();
                    }
                }
                return result;
            }
            finally
            {
                resultHandle.Free();
            }
        }

        string GetLabelByIndex(int x)
        {
            if (x > 0 && x <= Labels.Count)
            {
                return Labels[x - 1];
            }
            else if (x == Labels.Count + 1)
            {
                return "";
            }
            else
            {
                throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?");
            }
        }

        private Mat ResizePadding(Mat src, OcrShape shape)
        {
            OpenCvSharp.Size srcSize = src.Size();
            Mat roi = srcSize.Width / srcSize.Height > shape.Width / shape.Height ?
                src[0, srcSize.Height, 0, (int)Math.Floor(1.0 * srcSize.Height * shape.Width / shape.Height)] :
                src.Clone();
            double scaleRate = 1.0 * shape.Height / srcSize.Height;
            Mat resized = roi.Resize(new OpenCvSharp.Size(Math.Floor(roi.Width * scaleRate), shape.Height));
            if (resized.Width < shape.Width)
            {
                Cv2.CopyMakeBorder(resized, resized, 0, 0, 0, shape.Width - resized.Width, BorderTypes.Constant, Scalar.Black);
            }
            roi.Dispose();
            return resized;
        }

        public Mat CLSPredictorRun(Mat src)
        {
            if (src.Empty())
            {
                throw new ArgumentException("src size should not be 0, wrong input picture provided?");
            }

            if (!(src.Channels() == 3 || src.Channels() == 1))
            {
                throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
            }

            if (ShouldRotate180(src))
            {
                Cv2.Rotate(src, src, RotateFlags.Rotate180);
                Console.WriteLine("ShouldRotate180:True");
                return src;
            }
            else
            {
                Console.WriteLine("ShouldRotate180:Flase");
                return src;
            }
        }

        public bool ShouldRotate180(Mat src)
        {
            if (src.Empty())
            {
                throw new ArgumentException("src size should not be 0, wrong input picture provided?");
            }

            if (!(src.Channels() == 3 || src.Channels() == 1))
            {
                throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
            }

            //The default OcrShape used in the classification model
            OcrShape shape = new OcrShape(3, 192, 48);
            Mat resized = ResizePadding(src, shape);
            Mat normalized = Normalize(resized);

            using (PaddleTensor input = cls_predictor.GetInputTensor(cls_predictor.InputNames[0]))
            {
                input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
                float[] data = ExtractMat(normalized);
                input.SetData(data);
            }
            if (!cls_predictor.Run())
            {
                throw new Exception("PaddlePredictor(Classifier) run failed.");
            }

            using (PaddleTensor output = cls_predictor.GetOutputTensor(cls_predictor.OutputNames[0]))
            {
                float[] softmax = output.GetData<float>();
                float score = 0;
                int label = 0;
                for (int i = 0; i < softmax.Length; ++i)
                {
                    if (softmax[i] > score)
                    {
                        score = softmax[i];
                        label = i;
                    }
                }

                resized.Dispose();
                normalized.Dispose();

                return label % 2 == 1 && score > RotateThreshold;
            }
        }

    }
}

Demo下载

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/726511.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

15年前的手机并没有jvm虚拟机,为何可以运行Java游戏

2000年代初期&#xff0c;随着移动通信技术的发展&#xff0c;手机逐渐普及。那个时代的手机功能相对比较单一&#xff0c;主要用于打电话和发送短信。但是&#xff0c;随着技术的进步&#xff0c;人们开始在手机上玩游戏&#xff0c;而其中最受欢迎的游戏就是Java游戏。在那个…

ChatLaw,开源了!

公众号关注 “GitHubDaily” 设为 “星标”&#xff0c;每天带你逛 GitHub&#xff01; 最近这段时间&#xff0c;AI 的整体热度有所下降&#xff0c;但是 AI 技术在各行各业的探索脚步&#xff0c;却一直没有停止。 在 ChatGPT 刚发布时&#xff0c;有不少业内人士认为&#x…

【浏览器篇】记录下浏览器保存PDF文件不同方式的小区别

【浏览器篇】记录下浏览器保存PDF文件不同方式的小区别 以前不太注意这些&#xff0c;最近搞文档比较多才发现为何保存的一部分PDF文件里面字体可以复制可以搜索&#xff0c;一部分保存的PDF里面的字体却无法复制、无法搜索等&#xff0c;发现是不同保存方式得到的文档权限不一…

SQL注入攻击原理 实战

我来进行实战了&#xff0c;总在看教程。 文章目录 前言一&#xff0c;网站是否存在sql漏洞二、判断一下字段3. 判断显点4.查找相关信息1.查询数据库2.版本3.数据库表名4.字段名5,查询 总结 前言 提示&#xff1a;这里可以添加本文要记录的大概内容&#xff1a; 前言&#xff…

华为OD机试真题 Python 实现【学校的位置】【2023Q1 100分】,附详细解题思路

目录 一、题目描述二、输入描述三、输出描述四、Python算法源码五、效果展示1、输入2、输出3、说明 一、题目描述 为了解决新学期学生暴涨的问题&#xff0c;小乐村要建所新学校。考虑到学生上学安全问题&#xff0c;需要所有学生家到学校距离最短。假设学校和所有的学生家&am…

unity+pico neo3入门教程1-基础传送

tips&#xff1a;之前入门教程如果没有左手柄&#xff0c;查看一下自己的手柄设置&#xff0c;左右手柄&#xff0c; Helloworld型 1.基础传送&#xff0c;调式地面传送功能&#xff0c;通过手柄默认的“握手键”&#xff0c;瞬移&#xff0c; VR头显&#xff0c;添加Teleport…

Go语言远程调试

Go语言远程调试 1、安装dlv # 安装dlv $ go install github.com/go-delve/delve/cmd/dlvlatest$ dlv version Delve Debugger Version: 1.20.1 Build: $Id: 96e65b6c615845d42e0e31d903f6475b0e4ece6e $2、命令行远程调试 我们远程(Linux服务器)有如下代码&#xff1a; [ro…

(四)Kafka 消费者

文章目录 1. Kafka 消费者相关概念消费者和消费者组&#xff08;1&#xff09;横向伸缩消费者&#xff08;2&#xff09;横向伸缩消费者组 分区再平衡再均衡的类型&#xff08;1&#xff09;主动再均衡&#xff08;2&#xff09;协作再均衡&#xff08;增量再均衡&#xff09; …

MyBatisAnnotationSqlInjection.ql学习

源码位置 java\ql\src\experimental\Security\CWE\CWE-089 源代码 /*** name SQL injection in MyBatis annotation* description Constructing a dynamic SQL statement with input that comes from an* untrusted source could allow an attacker to modify …

【UE5 Cesium】14-Cesium for Unreal 加载服务器上的倾斜摄影

目录 前言 步骤 一、下载安装tomcat 10 二、下载安装JDK 三、启动Tomcat 四、Tomcat加载倾斜摄影 五、UE中加载Tomcat上的倾斜摄影 前言 上一篇文章&#xff08;【UE5 Cesium】13-Cesium for Unreal 加载本地倾斜摄影&#xff09;介绍了如何在UE中加载本地倾斜摄影&am…

链表专题1—24. 两两交换链表中的节点 234.回文链表 143.重排链表 141.环形链表 142.环形链表II 160.链表相交 C++实现

文章目录 24. 两两交换链表中的节点234.回文链表链表转数组统计长度反转后半部分链表 快慢指针 143. 重排链表数组 双指针 超时双队列反转和插入链表 141. 环形链表142.环形链表II160.链表相交 24. 两两交换链表中的节点 迭代法&#xff0c;时间复杂度&#xff1a; O ( n ) O(n…

App store里简单好用的便签app有哪些?

作为一个打工人&#xff0c;我经常需要一个简单而又好用的便签应用来记录我的各种事务和备忘。我曾在App Store里尝试了许多便签应用&#xff0c;但有一款应用真正让我留下了深刻的印象——敬业签。 敬业签的简单和易用性让我爱不释手。无论是添加新的便签&#xff0c;设置提醒…

基础大模型能像人类一样标注数据吗?

自从 ChatGPT 出现以来&#xff0c;我们见证了大语言模型 (LLM) 领域前所未有的发展&#xff0c;尤其是对话类模型&#xff0c;经过微调以后可以根据给出的提示语 (prompt) 来完成相关要求和命令。然而&#xff0c;直到如今我们也无法对比这些大模型的性能&#xff0c;因为缺乏…

为什么程序员更容易抑郁?是因为...

【1】 前段时间&#xff0c;有一位朋友&#xff0c;在后台留言&#xff1a; 《谢谢你听我吐槽&#xff0c;说出来感觉好了很多》 这位程序员朋友在深圳大厂&#xff0c;35岁&#xff0c;10年研发经验&#xff0c;倍感抑郁&#xff0c;吐露了自己的近况&#xff1a; &#xff08…

IDE /skipping incompatible xxx_d.dll when searching for -lxxx_d

文章目录 概述场景复现用以测试的代码编译器位数不匹配导致?保持编译器类型一致再验证编译器位数的影响MingW下调用OS的库咋不告警?以mingW下使用winSocket为例MingW下网络编程的头文件分析该环境下链接的ws2_32库文件在哪里&#xff1f;mingW为啥可以兼容window下的动态库 概…

MySQL自治平台建设的内核原理及实践(下)

总第566篇 2023年 第018篇 本文整理自美团技术沙龙第75期的主题分享《美团数据库攻防演练建设实践》&#xff0c;系超大规模数据库集群保稳系列&#xff08;内含4个议题的PPT及视频&#xff09;的第4篇文章。 本文作者在演讲后根据同学们的反馈&#xff0c;补充了很多技术细节&…

【Web狗自虐系列1】Pwn入门之初级ROP

0x0 栈介绍 栈式一种典型的后进先出的数据结构&#xff0c;其操作主要有压栈(push)与出栈(pop)两种操作 压栈与出栈都是操作的栈顶 高级语言在运行时都会被转换为汇编程序&#xff0c;在汇编程序运行过程中&#xff0c;充分利用了这一数据结构。每个程序在运行时都有虚拟地址…

国产化适配再进一步,融云完成欧拉、TDSQL、优炫等多方适配

近期&#xff0c;融云完成了与开源操作系统欧拉&#xff08;openEuler&#xff09;、企业级数据库 TDSQL 和优炫的适配工作&#xff0c;国产化上下游生态适配之路再次迈进坚实一步。关注【融云 RongCloud】&#xff0c;了解协同办公平台更多干货。 欧拉&#xff08;openEuler&a…

DoTween 学习

部分参考&#xff1a;DOTween中文详解&#xff08;持续更新&#xff09;_一条爱玩游戏的咸鱼的博客-CSDN博客 官方文档&#xff1a;DOTween - Documentation (demigiant.com) 什么是Tween&#xff08;补间&#xff09; 补间&#xff0c;一般指补间动画&#xff0c;例如uni…

Ceph集群的部署

一、Ceph集群的部署 1、集群环境 1.1 集群架构 主机名业务IP存储IP服务器配置系统类型集群角色ceph-mon1-deploy172.17.10.61/16192.168.10.61/242C/4GUbuntu1804mondeploy(部署节点)ceph-mon2172.17.10.62/16192.168.10.62/242C/4GUbuntu1804mon(监控节点)ceph-mon3172.17.…