当前位置:   article > 正文

C# PaddleInference OCR识别 学习研究Demo_paddle ocr c#

paddle ocr c#

目录

说明

效果

项目

代码

下载 


说明

基于开源项目 https://github.com/sdcb/PaddleSharp

VS2022+.net4.8+ OpenCvSharp4+Sdcb.PaddleInference

效果

项目

代码

using Sdcb.PaddleInference.Native;
using Sdcb.PaddleInference;
using System;
using System.Collections.Generic;
using OpenCvSharp.Extensions;
using OpenCvSharp;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.Globalization;
using System.IO;

namespace PaddleInference_OCR识别
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        Bitmap bmp;
        string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
        string img = "";
        string startupPath = "";

        int MaxSize = 1536;
        float? BoxThreshold = 0.3f;
        float? BoxScoreThreahold = 0.7f;
        int? DilatedSize = 2;
        int MinSize = 3;
        float UnclipRatio = 2.0f;

        Mat src;
        PaddlePredictor det_predictor;

        RotatedRect[] rects;

        bool Enable180Classification { get; set; } = true;
        bool AllowRotateDetection { get; set; } = true;
        double RotateThreshold { get; } = 0.75;
        Mat[] mats;
        PaddlePredictor cls_predictor;

        OcrShape recShape = new OcrShape(3, 320, 48);
        PaddlePredictor rec_predictor;

        public IReadOnlyList<string> Labels;

        DateTime dt1 = DateTime.Now;
        DateTime dt2 = DateTime.Now;

        private unsafe void Form1_Load(object sender, EventArgs e)
        {

            string startupPath = Application.StartupPath;

            IntPtr det_ptr = PaddleNative.PD_ConfigCreate();

            Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8;

            //检测模型路径
            String det_programPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdmodel";
            String det_paramsPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdiparams";

            //方式一
            //byte[] programBytes = PaddleEncoding.GetBytes(det_programPath);
            //byte[] paramsBytes = PaddleEncoding.GetBytes(det_paramsPath);
            //fixed (byte* programPtr = programBytes)
            //fixed (byte* paramsPtr = paramsBytes)
            //{
            //    PaddleNative.PD_ConfigSetModel(det_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
            //}

            //方式二  读自己加密后的模型文件,解密后写入byte[]
            Stream Steam = new FileStream(det_programPath, FileMode.Open, FileAccess.Read, FileShare.Read);
            byte[] programBuffer = new byte[Steam.Length];
            Steam.Read(programBuffer, 0, programBuffer.Length);

            Steam = new FileStream(det_paramsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
            byte[] paramsBuffer = new byte[Steam.Length];
            Steam.Read(paramsBuffer, 0, paramsBuffer.Length);

            fixed (byte* pprogram = programBuffer)
            fixed (byte* pparams = paramsBuffer)
            {
                PaddleNative.PD_ConfigSetModelBuffer(det_ptr,
                    (IntPtr)pprogram, programBuffer.Length,
                    (IntPtr)pparams, paramsBuffer.Length);
            }

            det_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(det_ptr));

            //方向分类模型
            IntPtr cls_ptr = PaddleNative.PD_ConfigCreate();

            String cls_programPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdmodel";
            String cls_paramsPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdiparams";

            byte[] programBytes = PaddleEncoding.GetBytes(cls_programPath);
            byte[] paramsBytes = PaddleEncoding.GetBytes(cls_paramsPath);
            fixed (byte* programPtr = programBytes)
            fixed (byte* paramsPtr = paramsBytes)
            {
                PaddleNative.PD_ConfigSetModel(cls_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
            }

            cls_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(cls_ptr));

            //识别模型
            IntPtr rec_ptr = PaddleNative.PD_ConfigCreate();

            String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel";
            String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams";

            byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath);
            byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath);
            fixed (byte* rec_programPtr = rec_programBytes)
            fixed (byte* rec_paramsPtr = rec_paramsBytes)
            {
                PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr);
            }

            rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr));

            //Labels
            String labelsPath = startupPath + "\\ppocr_keys.txt";
            Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
            StreamReader reader = new StreamReader(Steam);
            List<string> tempList = new List<string>();
            while (!reader.EndOfStream)
            {
                tempList.Add(reader.ReadLine());
            }
            reader.Dispose();
            Steam.Dispose();
            Labels = tempList;
        }

        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            pictureBox1.Image = null;

            img = ofd.FileName;
            bmp = new Bitmap(img);
            pictureBox1.Image = new Bitmap(img);
            textBox1.Text = "";
        }

        private void button2_Click(object sender, EventArgs e)
        {
            textBox1.Text = "";
            Application.DoEvents();
            if (img == "")
            {
                return;
            }
            dt1 = DateTime.Now;
            src = Cv2.ImRead(img);

            Mat resized = MatResize(src, MaxSize);
            //Cv2.ImShow("resized", resized);
            Mat padded = MatPadding32(resized);
            //Cv2.ImShow("padded", padded);
            Mat normalized = Normalize(padded);
            Cv2.ImShow("normalized", normalized);
            OpenCvSharp.Size resizedSize = resized.Size();
            using (PaddleTensor input = det_predictor.GetInputTensor(det_predictor.InputNames[0]))
            {
                input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
                float[] setData = ExtractMat(normalized);
                input.SetData(setData);
            }

            if (!det_predictor.Run())
            {
                throw new Exception("PaddlePredictor(Detector) run failed.");
            }

            using (PaddleTensor output = det_predictor.GetOutputTensor(det_predictor.OutputNames[0]))
            {
                float[] data = output.GetData<float>();
                int[] shape = output.Shape;

                Mat pred = new Mat(shape[2], shape[3], MatType.CV_32FC1, data);
                Cv2.ImShow("pred", pred);
                Mat cbuf = new Mat();

                Mat roi = pred[0, resizedSize.Height, 0, resizedSize.Width];
                roi.ConvertTo(cbuf, MatType.CV_8UC1, 255);
                //Cv2.ImShow("roi", roi);
                Mat dilated = new Mat();
                Mat binary = BoxThreshold != null ?
                   cbuf.Threshold((int)(BoxThreshold * 255), 255, ThresholdTypes.Binary) :
                   cbuf;
                //Cv2.ImShow("binary", binary);
                if (DilatedSize != null)
                {
                    Mat ones = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(DilatedSize.Value, DilatedSize.Value));
                    Cv2.Dilate(binary, dilated, ones);
                    ones.Dispose();
                }
                else
                {
                    Cv2.CopyTo(binary, dilated);
                }
                //Cv2.ImShow("dilated", dilated);
                OpenCvSharp.Point[][] contours = dilated.FindContoursAsArray(RetrievalModes.List, ContourApproximationModes.ApproxSimple);

                OpenCvSharp.Size size = src.Size();
                double scaleRate = 1.0 * src.Width / resizedSize.Width;

                rects = contours
                    .Where(x => BoxScoreThreahold == null || GetScore(x, pred) > BoxScoreThreahold)
                    .Select(x => Cv2.MinAreaRect(x))
                    .Where(x => x.Size.Width > MinSize && x.Size.Height > MinSize)
                    .Select(rect =>
                    {
                        float minEdge = Math.Min(rect.Size.Width, rect.Size.Height);
                        Size2f newSize = new Size2f(
                            (rect.Size.Width + UnclipRatio * minEdge) * scaleRate,
                            (rect.Size.Height + UnclipRatio * minEdge) * scaleRate);
                        RotatedRect largerRect = new RotatedRect(rect.Center * scaleRate, newSize, rect.Angle);
                        return largerRect;
                    })
                    .OrderBy(v => v.Center.Y)
                    .ThenBy(v => v.Center.X)
                    .ToArray();

                binary.Dispose();
                roi.Dispose();
                cbuf.Dispose();
                pred.Dispose();
                dilated.Dispose();


                dt2 = DateTime.Now;
                StringBuilder sb = new StringBuilder();
                sb.AppendLine("-----------------------------------\n");
                sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");

                textBox1.Text = sb.ToString();

                //绘图
                Mat src2 = Cv2.ImRead(img);
                for (int i = 0; i < rects.Length; i++)
                {
                    Scalar scalar = Scalar.RandomColor();
                    List<OpenCvSharp.Point> temp = new List<OpenCvSharp.Point>();
                    foreach (var item2 in rects[i].Points())
                    {
                        temp.Add(new OpenCvSharp.Point(item2.X, item2.Y));
                    }
                    List<List<OpenCvSharp.Point>> lltemp = new List<List<OpenCvSharp.Point>>();
                    lltemp.Add(temp);
                    Cv2.Polylines(src2, lltemp, true, scalar);
                }

                if (pictureBox1.Image != null)
                {
                    pictureBox1.Image.Dispose();
                }

                pictureBox1.Image = BitmapConverter.ToBitmap(src2);
                src2.Dispose();

            }
        }

        private float GetScore(OpenCvSharp.Point[] contour, Mat pred)
        {
            int width = pred.Width;
            int height = pred.Height;
            int[] boxX = contour.Select(v => v.X).ToArray();
            int[] boxY = contour.Select(v => v.Y).ToArray();

            int xmin = Clamp(boxX.Min(), 0, width - 1);
            int xmax = Clamp(boxX.Max(), 0, width - 1);
            int ymin = Clamp(boxY.Min(), 0, height - 1);
            int ymax = Clamp(boxY.Max(), 0, height - 1);

            OpenCvSharp.Point[] rootPoints = contour
                .Select(v => new OpenCvSharp.Point(v.X - xmin, v.Y - ymin))
                .ToArray();
            Mat mask = new Mat(ymax - ymin + 1, xmax - xmin + 1, MatType.CV_8UC1, Scalar.Black);
            mask.FillPoly(new[] { rootPoints }, new Scalar(1));

            Mat croppedMat = pred[ymin, ymax + 1, xmin, xmax + 1];
            float score = (float)croppedMat.Mean(mask).Val0;
            return score;
        }

        public int Clamp(int val, int min, int max)
        {
            if (val < min)
            {
                return min;
            }
            else if (val > max)
            {
                return max;
            }
            return val;
        }

        float[] ExtractMat(Mat src)
        {
            int rows = src.Rows;
            int cols = src.Cols;
            float[] array = new float[rows * cols * 3];
            GCHandle gCHandle = default(GCHandle);
            try
            {
                gCHandle = GCHandle.Alloc(array, GCHandleType.Pinned);
                IntPtr intPtr = gCHandle.AddrOfPinnedObject();
                for (int i = 0; i < src.Channels(); i++)
                {
                    Mat dest = new Mat(rows, cols, MatType.CV_32FC1, intPtr + i * rows * cols * 4, 0L);
                    Cv2.ExtractChannel(src, dest, i);
                    dest.Dispose();
                }
                return array;
            }
            finally
            {
                gCHandle.Free();
            }
        }

        private Mat MatResize(Mat src, int? maxSize)
        {
            if (maxSize == null) return src.Clone();

            OpenCvSharp.Size size = src.Size();
            int longEdge = Math.Max(size.Width, size.Height);
            double scaleRate = 1.0 * maxSize.Value / longEdge;

            return scaleRate < 1.0 ?
                src.Resize(OpenCvSharp.Size.Zero, scaleRate, scaleRate) :
                src.Clone();
        }

        private Mat MatPadding32(Mat src)
        {
            OpenCvSharp.Size size = src.Size();
            OpenCvSharp.Size newSize = new OpenCvSharp.Size(
                32 * Math.Ceiling(1.0 * size.Width / 32),
                32 * Math.Ceiling(1.0 * size.Height / 32));

            return src.CopyMakeBorder(0, newSize.Height - size.Height, 0, newSize.Width - size.Width, BorderTypes.Constant, Scalar.Black);
        }

        private Mat Normalize(Mat src)
        {
            Mat normalized = new Mat();
            src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
            Mat[] bgr = normalized.Split();
            float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
            float[] means = new[] { 0.485f, 0.456f, 0.406f };
            for (int i = 0; i < bgr.Length; ++i)
            {
                bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
            }
            normalized.Dispose();
            Mat dest = new Mat();
            Cv2.Merge(bgr, dest);
            foreach (Mat channel in bgr)
            {
                channel.Dispose();
            }
            return dest;
        }

        private Mat GetRotateCropImage(Mat src, RotatedRect rect)
        {
            bool wider = rect.Size.Width > rect.Size.Height;
            float angle = rect.Angle;
            OpenCvSharp.Size srcSize = src.Size();
            Rect boundingRect = rect.BoundingRect();

            int expTop = Math.Max(0, 0 - boundingRect.Top);
            int expBottom = Math.Max(0, boundingRect.Bottom - srcSize.Height);
            int expLeft = Math.Max(0, 0 - boundingRect.Left);
            int expRight = Math.Max(0, boundingRect.Right - srcSize.Width);

            Rect rectToExp = boundingRect + new OpenCvSharp.Point(expTop, expLeft);
            Rect roiRect = Rect.FromLTRB(
                boundingRect.Left + expLeft,
                boundingRect.Top + expTop,
                boundingRect.Right - expRight,
                boundingRect.Bottom - expBottom);
            Mat boundingMat = src[roiRect];
            Mat expanded = boundingMat.CopyMakeBorder(expTop, expBottom, expLeft, expRight, BorderTypes.Replicate);
            Point2f[] rp = rect.Points()
                .Select(v => new Point2f(v.X - rectToExp.X, v.Y - rectToExp.Y))
                .ToArray();

            Point2f[] srcPoints = new[] { rp[0], rp[3], rp[2], rp[1] };

            if (wider == true && angle >= 0 && angle < 45)
            {
                srcPoints = new[] { rp[1], rp[2], rp[3], rp[0] };
            }

            var ptsDst0 = new Point2f(0, 0);
            var ptsDst1 = new Point2f(rect.Size.Width, 0);
            var ptsDst2 = new Point2f(rect.Size.Width, rect.Size.Height);
            var ptsDst3 = new Point2f(0, rect.Size.Height);

            Mat matrix = Cv2.GetPerspectiveTransform(srcPoints, new[] { ptsDst0, ptsDst1, ptsDst2, ptsDst3 });

            Mat dest = expanded.WarpPerspective(matrix, new OpenCvSharp.Size(rect.Size.Width, rect.Size.Height), InterpolationFlags.Nearest, BorderTypes.Replicate);

            if (!wider)
            {
                Cv2.Transpose(dest, dest);
            }
            else if (angle > 45)
            {
                Cv2.Flip(dest, dest, FlipMode.X);
            }

            boundingMat.Dispose();
            expanded.Dispose();
            matrix.Dispose();

            return dest;
        }

        private Rect GetCropedRect(Rect rect, OpenCvSharp.Size size)
        {
            return Rect.FromLTRB(
                Clamp(rect.Left, 0, size.Width),
                Clamp(rect.Top, 0, size.Height),
                Clamp(rect.Right, 0, size.Width),
                Clamp(rect.Bottom, 0, size.Height));
        }

        private void Form1_FormClosing(object sender, FormClosingEventArgs e)
        {
            det_predictor.Dispose();
            rec_predictor.Dispose();
        }

        /// <summary>
        /// 方向检测
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button4_Click(object sender, EventArgs e)
        {
            textBox1.Text = "";
            Application.DoEvents();
            if (rects == null)
            {
                return;
            }
            if (rects == null)
            {
                return;
            }
            dt1 = DateTime.Now;
            mats =
               rects.Select(rect =>
               {
                   Mat roi = AllowRotateDetection ? GetRotateCropImage(src, rect) : src[GetCropedRect(rect.BoundingRect(), src.Size())];
                   return Enable180Classification ? CLSPredictorRun(roi) : roi;
               })
               .ToArray();

            dt2 = DateTime.Now;
            StringBuilder sb = new StringBuilder();
            sb.AppendLine("-----------------------------------\n");
            sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");

            textBox1.Text = sb.ToString();
        }

        private void button3_Click(object sender, EventArgs e)
        {
            textBox1.Text = "";
            Application.DoEvents();
            if (rects == null || mats == null)
            {
                return;
            }
            dt1 = DateTime.Now;
            try
            {
                int recognizeBatchSize = 0;
                //Cv2.ImShow("mats", mats[0]);
                PaddleOcrRecognizerResult[] porr = RecognizerRun(mats, recognizeBatchSize);
                dt2 = DateTime.Now;
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < porr.Length; i++)
                {
                    sb.AppendLine("("+ porr[i].Score+")"+ porr[i].Text );
                }
                sb.AppendLine("-----------------------------------\n");
                sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
                textBox1.Text = sb.ToString();
            }
            finally
            {
                foreach (Mat mat in mats)
                {
                    mat.Dispose();
                }
            }
        }

        public PaddleOcrRecognizerResult[] RecognizerRun(Mat[] srcs, int batchSize = 0)
        {
            if (srcs.Length == 0)
            {
                return new PaddleOcrRecognizerResult[0];
            }

            int chooseBatchSize = batchSize != 0 ? batchSize : Math.Min(8, Environment.ProcessorCount);
            PaddleOcrRecognizerResult[] allResult = new PaddleOcrRecognizerResult[srcs.Length];

            return srcs
                .Select((x, i) => (mat: x, i))
                .OrderBy(x => x.mat.Width)
                .Chunk(chooseBatchSize)
                .Select(x => (result: RunMulti(x.Select(x2 => x2.mat).ToArray()), ids: x.Select(x2 => x2.i).ToArray()))
                .SelectMany(x => x.result.Zip(x.ids, (result, i) => (result, i)))
                .OrderBy(x => x.i)
                .Select(x => x.result)
                .ToArray();
        }

        private Mat ResizePadding(Mat src, int height, int targetWidth)
        {
            OpenCvSharp.Size size = src.Size();
            float whRatio = 1.0f * size.Width / size.Height;
            int width = (int)Math.Ceiling(height * whRatio);

            if (width == targetWidth)
            {
                return src.Resize(new OpenCvSharp.Size(width, height));
            }
            else
            {
                //using Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
                Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
                return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
            }
        }

        private PaddleOcrRecognizerResult Run(Mat src) => RunMulti(new[] { src }).Single();

        private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
        {
            if (srcs.Length == 0)
            {
                return new PaddleOcrRecognizerResult[0];
            }

            for (int i = 0; i < srcs.Length; ++i)
            {
                Mat src = srcs[i];
                if (src.Empty())
                {
                    throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
                }
            }

            int modelHeight = recShape.Height;
            int maxWidth = (int)Math.Ceiling(srcs.Max(src =>
            {
                OpenCvSharp.Size size = src.Size();
                return 1.0 * size.Width / size.Height * modelHeight;
            }));

            int index = 0;

            Mat[] normalizeds = srcs
                .Select(src =>
                {
                   
                    Mat channel3 = new Mat();
                    if (src.Channels() == 4)
                    {
                        channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR);
                    }
                    else if (src.Channels() == 3)
                    {
                        channel3 = src.Clone();
                    }
                    else if (src.Channels() == 1)
                    {
                        channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB);
                    }
                    else
                    {
                        throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)");
                    }

                    Mat resized = ResizePadding(channel3, modelHeight, maxWidth);
                    Cv2.ImShow("resized"+index.ToString(), resized);
                    index++;
                    channel3.Dispose();
                    return Normalize(resized);
                })
                .ToArray();

            using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0]))
            {
                int channel = normalizeds[0].Channels();
                input.Shape = new[] { normalizeds.Length, channel, modelHeight, maxWidth };
                float[] data = ExtractMat(normalizeds, channel, modelHeight, maxWidth);
                input.SetData(data);
            }
            if (!rec_predictor.Run())
            {
                throw new Exception($"PaddlePredictor(Recognizer) run failed.");
            }

            using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0]))
            {
                float[] data = output.GetData<float>();
                int[] shape = output.Shape;

                GCHandle dataHandle = default;
                try
                {
                    dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
                    IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
                    int labelCount = shape[2];
                    int charCount = shape[1];

                    return Enumerable.Range(0, shape[0])
                        .Select(i =>
                        {
                            StringBuilder sb = new StringBuilder();
                            int lastIndex = 0;
                            float score = 0;
                            for (int n = 0; n < charCount; ++n)
                            {
                                Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
                                int[] maxIdx = new int[2];
                                mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx);

                                if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
                                {
                                    score += (float)maxVal;
                                    sb.Append(GetLabelByIndex(maxIdx[1]));
                                }
                                lastIndex = maxIdx[1];
                                mat.Dispose();
                            }
                            return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
                        })
                        .ToArray();
                }
                finally
                {
                    dataHandle.Free();
                }
            }
        }

        private float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
        {
            float[] result = new float[srcs.Length * channel * width * height];
            GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
            try
            {
                for (int i = 0; i < srcs.Length; ++i)
                {
                    Mat src = srcs[i];
                    if (src.Channels() != channel)
                    {
                        throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
                    }
                    for (int c = 0; c < channel; ++c)
                    {
                        //using Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
                        Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
                        Cv2.ExtractChannel(src, dest, c);
                        //dest.Dispose();
                    }
                }
                return result;
            }
            finally
            {
                resultHandle.Free();
            }
        }

        string GetLabelByIndex(int x)
        {
            if (x > 0 && x <= Labels.Count)
            {
                return Labels[x - 1];
            }
            else if (x == Labels.Count + 1)
            {
                return "";
            }
            else
            {
                throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?");
            }
        }

        private Mat ResizePadding(Mat src, OcrShape shape)
        {
            OpenCvSharp.Size srcSize = src.Size();
            Mat roi = srcSize.Width / srcSize.Height > shape.Width / shape.Height ?
                src[0, srcSize.Height, 0, (int)Math.Floor(1.0 * srcSize.Height * shape.Width / shape.Height)] :
                src.Clone();
            double scaleRate = 1.0 * shape.Height / srcSize.Height;
            Mat resized = roi.Resize(new OpenCvSharp.Size(Math.Floor(roi.Width * scaleRate), shape.Height));
            if (resized.Width < shape.Width)
            {
                Cv2.CopyMakeBorder(resized, resized, 0, 0, 0, shape.Width - resized.Width, BorderTypes.Constant, Scalar.Black);
            }
            roi.Dispose();
            return resized;
        }

        public Mat CLSPredictorRun(Mat src)
        {
            if (src.Empty())
            {
                throw new ArgumentException("src size should not be 0, wrong input picture provided?");
            }

            if (!(src.Channels() == 3 || src.Channels() == 1))
            {
                throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
            }

            if (ShouldRotate180(src))
            {
                Cv2.Rotate(src, src, RotateFlags.Rotate180);
                Console.WriteLine("ShouldRotate180:True");
                return src;
            }
            else
            {
                Console.WriteLine("ShouldRotate180:Flase");
                return src;
            }
        }

        public bool ShouldRotate180(Mat src)
        {
            if (src.Empty())
            {
                throw new ArgumentException("src size should not be 0, wrong input picture provided?");
            }

            if (!(src.Channels() == 3 || src.Channels() == 1))
            {
                throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
            }

            //The default OcrShape used in the classification model
            OcrShape shape = new OcrShape(3, 192, 48);
            Mat resized = ResizePadding(src, shape);
            Mat normalized = Normalize(resized);

            using (PaddleTensor input = cls_predictor.GetInputTensor(cls_predictor.InputNames[0]))
            {
                input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
                float[] data = ExtractMat(normalized);
                input.SetData(data);
            }
            if (!cls_predictor.Run())
            {
                throw new Exception("PaddlePredictor(Classifier) run failed.");
            }

            using (PaddleTensor output = cls_predictor.GetOutputTensor(cls_predictor.OutputNames[0]))
            {
                float[] softmax = output.GetData<float>();
                float score = 0;
                int label = 0;
                for (int i = 0; i < softmax.Length; ++i)
                {
                    if (softmax[i] > score)
                    {
                        score = softmax[i];
                        label = i;
                    }
                }

                resized.Dispose();
                normalized.Dispose();

                return label % 2 == 1 && score > RotateThreshold;
            }
        }

    }
}

  1. using Sdcb.PaddleInference.Native;
  2. using Sdcb.PaddleInference;
  3. using System;
  4. using System.Collections.Generic;
  5. using OpenCvSharp.Extensions;
  6. using OpenCvSharp;
  7. using System.Data;
  8. using System.Drawing;
  9. using System.Linq;
  10. using System.Text;
  11. using System.Windows.Forms;
  12. using System.Runtime.InteropServices;
  13. using System.Globalization;
  14. using System.IO;
  15. namespace PaddleInference_OCR识别
  16. {
  17. public partial class Form1 : Form
  18. {
  19. public Form1()
  20. {
  21. InitializeComponent();
  22. }
  23. Bitmap bmp;
  24. string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
  25. string img = "";
  26. string startupPath = "";
  27. int MaxSize = 1536;
  28. float? BoxThreshold = 0.3f;
  29. float? BoxScoreThreahold = 0.7f;
  30. int? DilatedSize = 2;
  31. int MinSize = 3;
  32. float UnclipRatio = 2.0f;
  33. Mat src;
  34. PaddlePredictor det_predictor;
  35. RotatedRect[] rects;
  36. bool Enable180Classification { get; set; } = true;
  37. bool AllowRotateDetection { get; set; } = true;
  38. double RotateThreshold { get; } = 0.75;
  39. Mat[] mats;
  40. PaddlePredictor cls_predictor;
  41. OcrShape recShape = new OcrShape(3, 320, 48);
  42. PaddlePredictor rec_predictor;
  43. public IReadOnlyList<string> Labels;
  44. DateTime dt1 = DateTime.Now;
  45. DateTime dt2 = DateTime.Now;
  46. private unsafe void Form1_Load(object sender, EventArgs e)
  47. {
  48. string startupPath = Application.StartupPath;
  49. IntPtr det_ptr = PaddleNative.PD_ConfigCreate();
  50. Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8;
  51. //检测模型路径
  52. String det_programPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdmodel";
  53. String det_paramsPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdiparams";
  54. //方式一
  55. //byte[] programBytes = PaddleEncoding.GetBytes(det_programPath);
  56. //byte[] paramsBytes = PaddleEncoding.GetBytes(det_paramsPath);
  57. //fixed (byte* programPtr = programBytes)
  58. //fixed (byte* paramsPtr = paramsBytes)
  59. //{
  60. // PaddleNative.PD_ConfigSetModel(det_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
  61. //}
  62. //方式二 读自己加密后的模型文件,解密后写入byte[]
  63. Stream Steam = new FileStream(det_programPath, FileMode.Open, FileAccess.Read, FileShare.Read);
  64. byte[] programBuffer = new byte[Steam.Length];
  65. Steam.Read(programBuffer, 0, programBuffer.Length);
  66. Steam = new FileStream(det_paramsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
  67. byte[] paramsBuffer = new byte[Steam.Length];
  68. Steam.Read(paramsBuffer, 0, paramsBuffer.Length);
  69. fixed (byte* pprogram = programBuffer)
  70. fixed (byte* pparams = paramsBuffer)
  71. {
  72. PaddleNative.PD_ConfigSetModelBuffer(det_ptr,
  73. (IntPtr)pprogram, programBuffer.Length,
  74. (IntPtr)pparams, paramsBuffer.Length);
  75. }
  76. det_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(det_ptr));
  77. //方向分类模型
  78. IntPtr cls_ptr = PaddleNative.PD_ConfigCreate();
  79. String cls_programPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdmodel";
  80. String cls_paramsPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdiparams";
  81. byte[] programBytes = PaddleEncoding.GetBytes(cls_programPath);
  82. byte[] paramsBytes = PaddleEncoding.GetBytes(cls_paramsPath);
  83. fixed (byte* programPtr = programBytes)
  84. fixed (byte* paramsPtr = paramsBytes)
  85. {
  86. PaddleNative.PD_ConfigSetModel(cls_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
  87. }
  88. cls_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(cls_ptr));
  89. //识别模型
  90. IntPtr rec_ptr = PaddleNative.PD_ConfigCreate();
  91. String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel";
  92. String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams";
  93. byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath);
  94. byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath);
  95. fixed (byte* rec_programPtr = rec_programBytes)
  96. fixed (byte* rec_paramsPtr = rec_paramsBytes)
  97. {
  98. PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr);
  99. }
  100. rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr));
  101. //Labels
  102. String labelsPath = startupPath + "\\ppocr_keys.txt";
  103. Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
  104. StreamReader reader = new StreamReader(Steam);
  105. List<string> tempList = new List<string>();
  106. while (!reader.EndOfStream)
  107. {
  108. tempList.Add(reader.ReadLine());
  109. }
  110. reader.Dispose();
  111. Steam.Dispose();
  112. Labels = tempList;
  113. }
  114. private void button1_Click(object sender, EventArgs e)
  115. {
  116. OpenFileDialog ofd = new OpenFileDialog();
  117. ofd.Filter = fileFilter;
  118. if (ofd.ShowDialog() != DialogResult.OK) return;
  119. pictureBox1.Image = null;
  120. img = ofd.FileName;
  121. bmp = new Bitmap(img);
  122. pictureBox1.Image = new Bitmap(img);
  123. textBox1.Text = "";
  124. }
  125. private void button2_Click(object sender, EventArgs e)
  126. {
  127. textBox1.Text = "";
  128. Application.DoEvents();
  129. if (img == "")
  130. {
  131. return;
  132. }
  133. dt1 = DateTime.Now;
  134. src = Cv2.ImRead(img);
  135. Mat resized = MatResize(src, MaxSize);
  136. //Cv2.ImShow("resized", resized);
  137. Mat padded = MatPadding32(resized);
  138. //Cv2.ImShow("padded", padded);
  139. Mat normalized = Normalize(padded);
  140. Cv2.ImShow("normalized", normalized);
  141. OpenCvSharp.Size resizedSize = resized.Size();
  142. using (PaddleTensor input = det_predictor.GetInputTensor(det_predictor.InputNames[0]))
  143. {
  144. input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
  145. float[] setData = ExtractMat(normalized);
  146. input.SetData(setData);
  147. }
  148. if (!det_predictor.Run())
  149. {
  150. throw new Exception("PaddlePredictor(Detector) run failed.");
  151. }
  152. using (PaddleTensor output = det_predictor.GetOutputTensor(det_predictor.OutputNames[0]))
  153. {
  154. float[] data = output.GetData<float>();
  155. int[] shape = output.Shape;
  156. Mat pred = new Mat(shape[2], shape[3], MatType.CV_32FC1, data);
  157. Cv2.ImShow("pred", pred);
  158. Mat cbuf = new Mat();
  159. Mat roi = pred[0, resizedSize.Height, 0, resizedSize.Width];
  160. roi.ConvertTo(cbuf, MatType.CV_8UC1, 255);
  161. //Cv2.ImShow("roi", roi);
  162. Mat dilated = new Mat();
  163. Mat binary = BoxThreshold != null ?
  164. cbuf.Threshold((int)(BoxThreshold * 255), 255, ThresholdTypes.Binary) :
  165. cbuf;
  166. //Cv2.ImShow("binary", binary);
  167. if (DilatedSize != null)
  168. {
  169. Mat ones = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(DilatedSize.Value, DilatedSize.Value));
  170. Cv2.Dilate(binary, dilated, ones);
  171. ones.Dispose();
  172. }
  173. else
  174. {
  175. Cv2.CopyTo(binary, dilated);
  176. }
  177. //Cv2.ImShow("dilated", dilated);
  178. OpenCvSharp.Point[][] contours = dilated.FindContoursAsArray(RetrievalModes.List, ContourApproximationModes.ApproxSimple);
  179. OpenCvSharp.Size size = src.Size();
  180. double scaleRate = 1.0 * src.Width / resizedSize.Width;
  181. rects = contours
  182. .Where(x => BoxScoreThreahold == null || GetScore(x, pred) > BoxScoreThreahold)
  183. .Select(x => Cv2.MinAreaRect(x))
  184. .Where(x => x.Size.Width > MinSize && x.Size.Height > MinSize)
  185. .Select(rect =>
  186. {
  187. float minEdge = Math.Min(rect.Size.Width, rect.Size.Height);
  188. Size2f newSize = new Size2f(
  189. (rect.Size.Width + UnclipRatio * minEdge) * scaleRate,
  190. (rect.Size.Height + UnclipRatio * minEdge) * scaleRate);
  191. RotatedRect largerRect = new RotatedRect(rect.Center * scaleRate, newSize, rect.Angle);
  192. return largerRect;
  193. })
  194. .OrderBy(v => v.Center.Y)
  195. .ThenBy(v => v.Center.X)
  196. .ToArray();
  197. binary.Dispose();
  198. roi.Dispose();
  199. cbuf.Dispose();
  200. pred.Dispose();
  201. dilated.Dispose();
  202. dt2 = DateTime.Now;
  203. StringBuilder sb = new StringBuilder();
  204. sb.AppendLine("-----------------------------------\n");
  205. sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
  206. textBox1.Text = sb.ToString();
  207. //绘图
  208. Mat src2 = Cv2.ImRead(img);
  209. for (int i = 0; i < rects.Length; i++)
  210. {
  211. Scalar scalar = Scalar.RandomColor();
  212. List<OpenCvSharp.Point> temp = new List<OpenCvSharp.Point>();
  213. foreach (var item2 in rects[i].Points())
  214. {
  215. temp.Add(new OpenCvSharp.Point(item2.X, item2.Y));
  216. }
  217. List<List<OpenCvSharp.Point>> lltemp = new List<List<OpenCvSharp.Point>>();
  218. lltemp.Add(temp);
  219. Cv2.Polylines(src2, lltemp, true, scalar);
  220. }
  221. if (pictureBox1.Image != null)
  222. {
  223. pictureBox1.Image.Dispose();
  224. }
  225. pictureBox1.Image = BitmapConverter.ToBitmap(src2);
  226. src2.Dispose();
  227. }
  228. }
  229. private float GetScore(OpenCvSharp.Point[] contour, Mat pred)
  230. {
  231. int width = pred.Width;
  232. int height = pred.Height;
  233. int[] boxX = contour.Select(v => v.X).ToArray();
  234. int[] boxY = contour.Select(v => v.Y).ToArray();
  235. int xmin = Clamp(boxX.Min(), 0, width - 1);
  236. int xmax = Clamp(boxX.Max(), 0, width - 1);
  237. int ymin = Clamp(boxY.Min(), 0, height - 1);
  238. int ymax = Clamp(boxY.Max(), 0, height - 1);
  239. OpenCvSharp.Point[] rootPoints = contour
  240. .Select(v => new OpenCvSharp.Point(v.X - xmin, v.Y - ymin))
  241. .ToArray();
  242. Mat mask = new Mat(ymax - ymin + 1, xmax - xmin + 1, MatType.CV_8UC1, Scalar.Black);
  243. mask.FillPoly(new[] { rootPoints }, new Scalar(1));
  244. Mat croppedMat = pred[ymin, ymax + 1, xmin, xmax + 1];
  245. float score = (float)croppedMat.Mean(mask).Val0;
  246. return score;
  247. }
  248. public int Clamp(int val, int min, int max)
  249. {
  250. if (val < min)
  251. {
  252. return min;
  253. }
  254. else if (val > max)
  255. {
  256. return max;
  257. }
  258. return val;
  259. }
  260. float[] ExtractMat(Mat src)
  261. {
  262. int rows = src.Rows;
  263. int cols = src.Cols;
  264. float[] array = new float[rows * cols * 3];
  265. GCHandle gCHandle = default(GCHandle);
  266. try
  267. {
  268. gCHandle = GCHandle.Alloc(array, GCHandleType.Pinned);
  269. IntPtr intPtr = gCHandle.AddrOfPinnedObject();
  270. for (int i = 0; i < src.Channels(); i++)
  271. {
  272. Mat dest = new Mat(rows, cols, MatType.CV_32FC1, intPtr + i * rows * cols * 4, 0L);
  273. Cv2.ExtractChannel(src, dest, i);
  274. dest.Dispose();
  275. }
  276. return array;
  277. }
  278. finally
  279. {
  280. gCHandle.Free();
  281. }
  282. }
  283. private Mat MatResize(Mat src, int? maxSize)
  284. {
  285. if (maxSize == null) return src.Clone();
  286. OpenCvSharp.Size size = src.Size();
  287. int longEdge = Math.Max(size.Width, size.Height);
  288. double scaleRate = 1.0 * maxSize.Value / longEdge;
  289. return scaleRate < 1.0 ?
  290. src.Resize(OpenCvSharp.Size.Zero, scaleRate, scaleRate) :
  291. src.Clone();
  292. }
  293. private Mat MatPadding32(Mat src)
  294. {
  295. OpenCvSharp.Size size = src.Size();
  296. OpenCvSharp.Size newSize = new OpenCvSharp.Size(
  297. 32 * Math.Ceiling(1.0 * size.Width / 32),
  298. 32 * Math.Ceiling(1.0 * size.Height / 32));
  299. return src.CopyMakeBorder(0, newSize.Height - size.Height, 0, newSize.Width - size.Width, BorderTypes.Constant, Scalar.Black);
  300. }
  301. private Mat Normalize(Mat src)
  302. {
  303. Mat normalized = new Mat();
  304. src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
  305. Mat[] bgr = normalized.Split();
  306. float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
  307. float[] means = new[] { 0.485f, 0.456f, 0.406f };
  308. for (int i = 0; i < bgr.Length; ++i)
  309. {
  310. bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
  311. }
  312. normalized.Dispose();
  313. Mat dest = new Mat();
  314. Cv2.Merge(bgr, dest);
  315. foreach (Mat channel in bgr)
  316. {
  317. channel.Dispose();
  318. }
  319. return dest;
  320. }
  321. private Mat GetRotateCropImage(Mat src, RotatedRect rect)
  322. {
  323. bool wider = rect.Size.Width > rect.Size.Height;
  324. float angle = rect.Angle;
  325. OpenCvSharp.Size srcSize = src.Size();
  326. Rect boundingRect = rect.BoundingRect();
  327. int expTop = Math.Max(0, 0 - boundingRect.Top);
  328. int expBottom = Math.Max(0, boundingRect.Bottom - srcSize.Height);
  329. int expLeft = Math.Max(0, 0 - boundingRect.Left);
  330. int expRight = Math.Max(0, boundingRect.Right - srcSize.Width);
  331. Rect rectToExp = boundingRect + new OpenCvSharp.Point(expTop, expLeft);
  332. Rect roiRect = Rect.FromLTRB(
  333. boundingRect.Left + expLeft,
  334. boundingRect.Top + expTop,
  335. boundingRect.Right - expRight,
  336. boundingRect.Bottom - expBottom);
  337. Mat boundingMat = src[roiRect];
  338. Mat expanded = boundingMat.CopyMakeBorder(expTop, expBottom, expLeft, expRight, BorderTypes.Replicate);
  339. Point2f[] rp = rect.Points()
  340. .Select(v => new Point2f(v.X - rectToExp.X, v.Y - rectToExp.Y))
  341. .ToArray();
  342. Point2f[] srcPoints = new[] { rp[0], rp[3], rp[2], rp[1] };
  343. if (wider == true && angle >= 0 && angle < 45)
  344. {
  345. srcPoints = new[] { rp[1], rp[2], rp[3], rp[0] };
  346. }
  347. var ptsDst0 = new Point2f(0, 0);
  348. var ptsDst1 = new Point2f(rect.Size.Width, 0);
  349. var ptsDst2 = new Point2f(rect.Size.Width, rect.Size.Height);
  350. var ptsDst3 = new Point2f(0, rect.Size.Height);
  351. Mat matrix = Cv2.GetPerspectiveTransform(srcPoints, new[] { ptsDst0, ptsDst1, ptsDst2, ptsDst3 });
  352. Mat dest = expanded.WarpPerspective(matrix, new OpenCvSharp.Size(rect.Size.Width, rect.Size.Height), InterpolationFlags.Nearest, BorderTypes.Replicate);
  353. if (!wider)
  354. {
  355. Cv2.Transpose(dest, dest);
  356. }
  357. else if (angle > 45)
  358. {
  359. Cv2.Flip(dest, dest, FlipMode.X);
  360. }
  361. boundingMat.Dispose();
  362. expanded.Dispose();
  363. matrix.Dispose();
  364. return dest;
  365. }
  366. private Rect GetCropedRect(Rect rect, OpenCvSharp.Size size)
  367. {
  368. return Rect.FromLTRB(
  369. Clamp(rect.Left, 0, size.Width),
  370. Clamp(rect.Top, 0, size.Height),
  371. Clamp(rect.Right, 0, size.Width),
  372. Clamp(rect.Bottom, 0, size.Height));
  373. }
  374. private void Form1_FormClosing(object sender, FormClosingEventArgs e)
  375. {
  376. det_predictor.Dispose();
  377. rec_predictor.Dispose();
  378. }
  379. /// <summary>
  380. /// 方向检测
  381. /// </summary>
  382. /// <param name="sender"></param>
  383. /// <param name="e"></param>
  384. private void button4_Click(object sender, EventArgs e)
  385. {
  386. textBox1.Text = "";
  387. Application.DoEvents();
  388. if (rects == null)
  389. {
  390. return;
  391. }
  392. if (rects == null)
  393. {
  394. return;
  395. }
  396. dt1 = DateTime.Now;
  397. mats =
  398. rects.Select(rect =>
  399. {
  400. Mat roi = AllowRotateDetection ? GetRotateCropImage(src, rect) : src[GetCropedRect(rect.BoundingRect(), src.Size())];
  401. return Enable180Classification ? CLSPredictorRun(roi) : roi;
  402. })
  403. .ToArray();
  404. dt2 = DateTime.Now;
  405. StringBuilder sb = new StringBuilder();
  406. sb.AppendLine("-----------------------------------\n");
  407. sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
  408. textBox1.Text = sb.ToString();
  409. }
  410. private void button3_Click(object sender, EventArgs e)
  411. {
  412. textBox1.Text = "";
  413. Application.DoEvents();
  414. if (rects == null || mats == null)
  415. {
  416. return;
  417. }
  418. dt1 = DateTime.Now;
  419. try
  420. {
  421. int recognizeBatchSize = 0;
  422. //Cv2.ImShow("mats", mats[0]);
  423. PaddleOcrRecognizerResult[] porr = RecognizerRun(mats, recognizeBatchSize);
  424. dt2 = DateTime.Now;
  425. StringBuilder sb = new StringBuilder();
  426. for (int i = 0; i < porr.Length; i++)
  427. {
  428. sb.AppendLine("("+ porr[i].Score+")"+ porr[i].Text );
  429. }
  430. sb.AppendLine("-----------------------------------\n");
  431. sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
  432. textBox1.Text = sb.ToString();
  433. }
  434. finally
  435. {
  436. foreach (Mat mat in mats)
  437. {
  438. mat.Dispose();
  439. }
  440. }
  441. }
  442. public PaddleOcrRecognizerResult[] RecognizerRun(Mat[] srcs, int batchSize = 0)
  443. {
  444. if (srcs.Length == 0)
  445. {
  446. return new PaddleOcrRecognizerResult[0];
  447. }
  448. int chooseBatchSize = batchSize != 0 ? batchSize : Math.Min(8, Environment.ProcessorCount);
  449. PaddleOcrRecognizerResult[] allResult = new PaddleOcrRecognizerResult[srcs.Length];
  450. return srcs
  451. .Select((x, i) => (mat: x, i))
  452. .OrderBy(x => x.mat.Width)
  453. .Chunk(chooseBatchSize)
  454. .Select(x => (result: RunMulti(x.Select(x2 => x2.mat).ToArray()), ids: x.Select(x2 => x2.i).ToArray()))
  455. .SelectMany(x => x.result.Zip(x.ids, (result, i) => (result, i)))
  456. .OrderBy(x => x.i)
  457. .Select(x => x.result)
  458. .ToArray();
  459. }
  460. private Mat ResizePadding(Mat src, int height, int targetWidth)
  461. {
  462. OpenCvSharp.Size size = src.Size();
  463. float whRatio = 1.0f * size.Width / size.Height;
  464. int width = (int)Math.Ceiling(height * whRatio);
  465. if (width == targetWidth)
  466. {
  467. return src.Resize(new OpenCvSharp.Size(width, height));
  468. }
  469. else
  470. {
  471. //using Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
  472. Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
  473. return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
  474. }
  475. }
  476. private PaddleOcrRecognizerResult Run(Mat src) => RunMulti(new[] { src }).Single();
  477. private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
  478. {
  479. if (srcs.Length == 0)
  480. {
  481. return new PaddleOcrRecognizerResult[0];
  482. }
  483. for (int i = 0; i < srcs.Length; ++i)
  484. {
  485. Mat src = srcs[i];
  486. if (src.Empty())
  487. {
  488. throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
  489. }
  490. }
  491. int modelHeight = recShape.Height;
  492. int maxWidth = (int)Math.Ceiling(srcs.Max(src =>
  493. {
  494. OpenCvSharp.Size size = src.Size();
  495. return 1.0 * size.Width / size.Height * modelHeight;
  496. }));
  497. int index = 0;
  498. Mat[] normalizeds = srcs
  499. .Select(src =>
  500. {
  501. Mat channel3 = new Mat();
  502. if (src.Channels() == 4)
  503. {
  504. channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR);
  505. }
  506. else if (src.Channels() == 3)
  507. {
  508. channel3 = src.Clone();
  509. }
  510. else if (src.Channels() == 1)
  511. {
  512. channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB);
  513. }
  514. else
  515. {
  516. throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)");
  517. }
  518. Mat resized = ResizePadding(channel3, modelHeight, maxWidth);
  519. Cv2.ImShow("resized"+index.ToString(), resized);
  520. index++;
  521. channel3.Dispose();
  522. return Normalize(resized);
  523. })
  524. .ToArray();
  525. using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0]))
  526. {
  527. int channel = normalizeds[0].Channels();
  528. input.Shape = new[] { normalizeds.Length, channel, modelHeight, maxWidth };
  529. float[] data = ExtractMat(normalizeds, channel, modelHeight, maxWidth);
  530. input.SetData(data);
  531. }
  532. if (!rec_predictor.Run())
  533. {
  534. throw new Exception($"PaddlePredictor(Recognizer) run failed.");
  535. }
  536. using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0]))
  537. {
  538. float[] data = output.GetData<float>();
  539. int[] shape = output.Shape;
  540. GCHandle dataHandle = default;
  541. try
  542. {
  543. dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
  544. IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
  545. int labelCount = shape[2];
  546. int charCount = shape[1];
  547. return Enumerable.Range(0, shape[0])
  548. .Select(i =>
  549. {
  550. StringBuilder sb = new StringBuilder();
  551. int lastIndex = 0;
  552. float score = 0;
  553. for (int n = 0; n < charCount; ++n)
  554. {
  555. Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
  556. int[] maxIdx = new int[2];
  557. mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx);
  558. if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
  559. {
  560. score += (float)maxVal;
  561. sb.Append(GetLabelByIndex(maxIdx[1]));
  562. }
  563. lastIndex = maxIdx[1];
  564. mat.Dispose();
  565. }
  566. return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
  567. })
  568. .ToArray();
  569. }
  570. finally
  571. {
  572. dataHandle.Free();
  573. }
  574. }
  575. }
  576. private float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
  577. {
  578. float[] result = new float[srcs.Length * channel * width * height];
  579. GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
  580. IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
  581. try
  582. {
  583. for (int i = 0; i < srcs.Length; ++i)
  584. {
  585. Mat src = srcs[i];
  586. if (src.Channels() != channel)
  587. {
  588. throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
  589. }
  590. for (int c = 0; c < channel; ++c)
  591. {
  592. //using Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
  593. Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
  594. Cv2.ExtractChannel(src, dest, c);
  595. //dest.Dispose();
  596. }
  597. }
  598. return result;
  599. }
  600. finally
  601. {
  602. resultHandle.Free();
  603. }
  604. }
  605. string GetLabelByIndex(int x)
  606. {
  607. if (x > 0 && x <= Labels.Count)
  608. {
  609. return Labels[x - 1];
  610. }
  611. else if (x == Labels.Count + 1)
  612. {
  613. return "";
  614. }
  615. else
  616. {
  617. throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?");
  618. }
  619. }
  620. private Mat ResizePadding(Mat src, OcrShape shape)
  621. {
  622. OpenCvSharp.Size srcSize = src.Size();
  623. Mat roi = srcSize.Width / srcSize.Height > shape.Width / shape.Height ?
  624. src[0, srcSize.Height, 0, (int)Math.Floor(1.0 * srcSize.Height * shape.Width / shape.Height)] :
  625. src.Clone();
  626. double scaleRate = 1.0 * shape.Height / srcSize.Height;
  627. Mat resized = roi.Resize(new OpenCvSharp.Size(Math.Floor(roi.Width * scaleRate), shape.Height));
  628. if (resized.Width < shape.Width)
  629. {
  630. Cv2.CopyMakeBorder(resized, resized, 0, 0, 0, shape.Width - resized.Width, BorderTypes.Constant, Scalar.Black);
  631. }
  632. roi.Dispose();
  633. return resized;
  634. }
  635. public Mat CLSPredictorRun(Mat src)
  636. {
  637. if (src.Empty())
  638. {
  639. throw new ArgumentException("src size should not be 0, wrong input picture provided?");
  640. }
  641. if (!(src.Channels() == 3 || src.Channels() == 1))
  642. {
  643. throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
  644. }
  645. if (ShouldRotate180(src))
  646. {
  647. Cv2.Rotate(src, src, RotateFlags.Rotate180);
  648. Console.WriteLine("ShouldRotate180:True");
  649. return src;
  650. }
  651. else
  652. {
  653. Console.WriteLine("ShouldRotate180:Flase");
  654. return src;
  655. }
  656. }
  657. public bool ShouldRotate180(Mat src)
  658. {
  659. if (src.Empty())
  660. {
  661. throw new ArgumentException("src size should not be 0, wrong input picture provided?");
  662. }
  663. if (!(src.Channels() == 3 || src.Channels() == 1))
  664. {
  665. throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
  666. }
  667. //The default OcrShape used in the classification model
  668. OcrShape shape = new OcrShape(3, 192, 48);
  669. Mat resized = ResizePadding(src, shape);
  670. Mat normalized = Normalize(resized);
  671. using (PaddleTensor input = cls_predictor.GetInputTensor(cls_predictor.InputNames[0]))
  672. {
  673. input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
  674. float[] data = ExtractMat(normalized);
  675. input.SetData(data);
  676. }
  677. if (!cls_predictor.Run())
  678. {
  679. throw new Exception("PaddlePredictor(Classifier) run failed.");
  680. }
  681. using (PaddleTensor output = cls_predictor.GetOutputTensor(cls_predictor.OutputNames[0]))
  682. {
  683. float[] softmax = output.GetData<float>();
  684. float score = 0;
  685. int label = 0;
  686. for (int i = 0; i < softmax.Length; ++i)
  687. {
  688. if (softmax[i] > score)
  689. {
  690. score = softmax[i];
  691. label = i;
  692. }
  693. }
  694. resized.Dispose();
  695. normalized.Dispose();
  696. return label % 2 == 1 && score > RotateThreshold;
  697. }
  698. }
  699. }
  700. }

下载 

Demo下载

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/从前慢现在也慢/article/detail/692803
推荐阅读
相关标签
  

闽ICP备14008679号