基于开源项目 https://github.com/sdcb/PaddleSharp
VS2022+.net4.8+ OpenCvSharp4+Sdcb.PaddleInference
using Sdcb.PaddleInference.Native;
using Sdcb.PaddleInference;
using System;
using System.Collections.Generic;
using OpenCvSharp.Extensions;
using OpenCvSharp;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.Globalization;
using System.IO;
namespace PaddleInference_OCR识别
public partial class Form1 : Form
public Form1()
Bitmap bmp;
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string img = "";
string startupPath = "";
int MaxSize = 1536;
float? BoxThreshold = 0.3f;
float? BoxScoreThreahold = 0.7f;
int? DilatedSize = 2;
int MinSize = 3;
float UnclipRatio = 2.0f;
Mat src;
PaddlePredictor det_predictor;
RotatedRect[] rects;
bool Enable180Classification { get; set; } = true;
bool AllowRotateDetection { get; set; } = true;
double RotateThreshold { get; } = 0.75;
Mat[] mats;
PaddlePredictor cls_predictor;
OcrShape recShape = new OcrShape(3, 320, 48);
PaddlePredictor rec_predictor;
public IReadOnlyList<string> Labels;
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
private unsafe void Form1_Load(object sender, EventArgs e)
string startupPath = Application.StartupPath;
IntPtr det_ptr = PaddleNative.PD_ConfigCreate();
Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8;
String det_programPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdmodel";
String det_paramsPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdiparams";
//byte[] programBytes = PaddleEncoding.GetBytes(det_programPath);
//byte[] paramsBytes = PaddleEncoding.GetBytes(det_paramsPath);
//fixed (byte* programPtr = programBytes)
//fixed (byte* paramsPtr = paramsBytes)
// PaddleNative.PD_ConfigSetModel(det_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
//方式二 读自己加密后的模型文件,解密后写入byte[]
Stream Steam = new FileStream(det_programPath, FileMode.Open, FileAccess.Read, FileShare.Read);
byte[] programBuffer = new byte[Steam.Length];
Steam.Read(programBuffer, 0, programBuffer.Length);
Steam = new FileStream(det_paramsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
byte[] paramsBuffer = new byte[Steam.Length];
Steam.Read(paramsBuffer, 0, paramsBuffer.Length);
fixed (byte* pprogram = programBuffer)
fixed (byte* pparams = paramsBuffer)
(IntPtr)pprogram, programBuffer.Length,
(IntPtr)pparams, paramsBuffer.Length);
det_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(det_ptr));
IntPtr cls_ptr = PaddleNative.PD_ConfigCreate();
String cls_programPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdmodel";
String cls_paramsPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdiparams";
byte[] programBytes = PaddleEncoding.GetBytes(cls_programPath);
byte[] paramsBytes = PaddleEncoding.GetBytes(cls_paramsPath);
fixed (byte* programPtr = programBytes)
fixed (byte* paramsPtr = paramsBytes)
PaddleNative.PD_ConfigSetModel(cls_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
cls_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(cls_ptr));
IntPtr rec_ptr = PaddleNative.PD_ConfigCreate();
String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel";
String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams";
byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath);
byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath);
fixed (byte* rec_programPtr = rec_programBytes)
fixed (byte* rec_paramsPtr = rec_paramsBytes)
PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr);
rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr));
String labelsPath = startupPath + "\\ppocr_keys.txt";
Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
StreamReader reader = new StreamReader(Steam);
List<string> tempList = new List<string>();
while (!reader.EndOfStream)
Labels = tempList;
private void button1_Click(object sender, EventArgs e)
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
img = ofd.FileName;
bmp = new Bitmap(img);
pictureBox1.Image = new Bitmap(img);
textBox1.Text = "";
private void button2_Click(object sender, EventArgs e)
textBox1.Text = "";
if (img == "")
dt1 = DateTime.Now;
src = Cv2.ImRead(img);
Mat resized = MatResize(src, MaxSize);
//Cv2.ImShow("resized", resized);
Mat padded = MatPadding32(resized);
//Cv2.ImShow("padded", padded);
Mat normalized = Normalize(padded);
Cv2.ImShow("normalized", normalized);
OpenCvSharp.Size resizedSize = resized.Size();
using (PaddleTensor input = det_predictor.GetInputTensor(det_predictor.InputNames[0]))
input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
float[] setData = ExtractMat(normalized);
if (!det_predictor.Run())
throw new Exception("PaddlePredictor(Detector) run failed.");
using (PaddleTensor output = det_predictor.GetOutputTensor(det_predictor.OutputNames[0]))
float[] data = output.GetData<float>();
int[] shape = output.Shape;
Mat pred = new Mat(shape[2], shape[3], MatType.CV_32FC1, data);
Cv2.ImShow("pred", pred);
Mat cbuf = new Mat();
Mat roi = pred[0, resizedSize.Height, 0, resizedSize.Width];
roi.ConvertTo(cbuf, MatType.CV_8UC1, 255);
//Cv2.ImShow("roi", roi);
Mat dilated = new Mat();
Mat binary = BoxThreshold != null ?
cbuf.Threshold((int)(BoxThreshold * 255), 255, ThresholdTypes.Binary) :
//Cv2.ImShow("binary", binary);
if (DilatedSize != null)
Mat ones = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(DilatedSize.Value, DilatedSize.Value));
Cv2.Dilate(binary, dilated, ones);
Cv2.CopyTo(binary, dilated);
//Cv2.ImShow("dilated", dilated);
OpenCvSharp.Point[][] contours = dilated.FindContoursAsArray(RetrievalModes.List, ContourApproximationModes.ApproxSimple);
OpenCvSharp.Size size = src.Size();
double scaleRate = 1.0 * src.Width / resizedSize.Width;
rects = contours
.Where(x => BoxScoreThreahold == null || GetScore(x, pred) > BoxScoreThreahold)
.Select(x => Cv2.MinAreaRect(x))
.Where(x => x.Size.Width > MinSize && x.Size.Height > MinSize)
.Select(rect =>
float minEdge = Math.Min(rect.Size.Width, rect.Size.Height);
Size2f newSize = new Size2f(
(rect.Size.Width + UnclipRatio * minEdge) * scaleRate,
(rect.Size.Height + UnclipRatio * minEdge) * scaleRate);
RotatedRect largerRect = new RotatedRect(rect.Center * scaleRate, newSize, rect.Angle);
return largerRect;
.OrderBy(v => v.Center.Y)
.ThenBy(v => v.Center.X)
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
Mat src2 = Cv2.ImRead(img);
for (int i = 0; i < rects.Length; i++)
Scalar scalar = Scalar.RandomColor();
List<OpenCvSharp.Point> temp = new List<OpenCvSharp.Point>();
foreach (var item2 in rects[i].Points())
temp.Add(new OpenCvSharp.Point(item2.X, item2.Y));
List<List<OpenCvSharp.Point>> lltemp = new List<List<OpenCvSharp.Point>>();
Cv2.Polylines(src2, lltemp, true, scalar);
if (pictureBox1.Image != null)
pictureBox1.Image = BitmapConverter.ToBitmap(src2);
private float GetScore(OpenCvSharp.Point[] contour, Mat pred)
int width = pred.Width;
int height = pred.Height;
int[] boxX = contour.Select(v => v.X).ToArray();
int[] boxY = contour.Select(v => v.Y).ToArray();
int xmin = Clamp(boxX.Min(), 0, width - 1);
int xmax = Clamp(boxX.Max(), 0, width - 1);
int ymin = Clamp(boxY.Min(), 0, height - 1);
int ymax = Clamp(boxY.Max(), 0, height - 1);
OpenCvSharp.Point[] rootPoints = contour
.Select(v => new OpenCvSharp.Point(v.X - xmin, v.Y - ymin))
Mat mask = new Mat(ymax - ymin + 1, xmax - xmin + 1, MatType.CV_8UC1, Scalar.Black);
mask.FillPoly(new[] { rootPoints }, new Scalar(1));
Mat croppedMat = pred[ymin, ymax + 1, xmin, xmax + 1];
float score = (float)croppedMat.Mean(mask).Val0;
return score;
public int Clamp(int val, int min, int max)
if (val < min)
return min;
else if (val > max)
return max;
return val;
float[] ExtractMat(Mat src)
int rows = src.Rows;
int cols = src.Cols;
float[] array = new float[rows * cols * 3];
GCHandle gCHandle = default(GCHandle);
gCHandle = GCHandle.Alloc(array, GCHandleType.Pinned);
IntPtr intPtr = gCHandle.AddrOfPinnedObject();
for (int i = 0; i < src.Channels(); i++)
Mat dest = new Mat(rows, cols, MatType.CV_32FC1, intPtr + i * rows * cols * 4, 0L);
Cv2.ExtractChannel(src, dest, i);
return array;
private Mat MatResize(Mat src, int? maxSize)
if (maxSize == null) return src.Clone();
OpenCvSharp.Size size = src.Size();
int longEdge = Math.Max(size.Width, size.Height);
double scaleRate = 1.0 * maxSize.Value / longEdge;
return scaleRate < 1.0 ?
src.Resize(OpenCvSharp.Size.Zero, scaleRate, scaleRate) :
private Mat MatPadding32(Mat src)
OpenCvSharp.Size size = src.Size();
OpenCvSharp.Size newSize = new OpenCvSharp.Size(
32 * Math.Ceiling(1.0 * size.Width / 32),
32 * Math.Ceiling(1.0 * size.Height / 32));
return src.CopyMakeBorder(0, newSize.Height - size.Height, 0, newSize.Width - size.Width, BorderTypes.Constant, Scalar.Black);
private Mat Normalize(Mat src)
Mat normalized = new Mat();
src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
Mat[] bgr = normalized.Split();
float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
float[] means = new[] { 0.485f, 0.456f, 0.406f };
for (int i = 0; i < bgr.Length; ++i)
bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
Mat dest = new Mat();
Cv2.Merge(bgr, dest);
foreach (Mat channel in bgr)
return dest;
private Mat GetRotateCropImage(Mat src, RotatedRect rect)
bool wider = rect.Size.Width > rect.Size.Height;
float angle = rect.Angle;
OpenCvSharp.Size srcSize = src.Size();
Rect boundingRect = rect.BoundingRect();
int expTop = Math.Max(0, 0 - boundingRect.Top);
int expBottom = Math.Max(0, boundingRect.Bottom - srcSize.Height);
int expLeft = Math.Max(0, 0 - boundingRect.Left);
int expRight = Math.Max(0, boundingRect.Right - srcSize.Width);
Rect rectToExp = boundingRect + new OpenCvSharp.Point(expTop, expLeft);
Rect roiRect = Rect.FromLTRB(
boundingRect.Left + expLeft,
boundingRect.Top + expTop,
boundingRect.Right - expRight,
boundingRect.Bottom - expBottom);
Mat boundingMat = src[roiRect];
Mat expanded = boundingMat.CopyMakeBorder(expTop, expBottom, expLeft, expRight, BorderTypes.Replicate);
Point2f[] rp = rect.Points()
.Select(v => new Point2f(v.X - rectToExp.X, v.Y - rectToExp.Y))
Point2f[] srcPoints = new[] { rp[0], rp[3], rp[2], rp[1] };
if (wider == true && angle >= 0 && angle < 45)
srcPoints = new[] { rp[1], rp[2], rp[3], rp[0] };
var ptsDst0 = new Point2f(0, 0);
var ptsDst1 = new Point2f(rect.Size.Width, 0);
var ptsDst2 = new Point2f(rect.Size.Width, rect.Size.Height);
var ptsDst3 = new Point2f(0, rect.Size.Height);
Mat matrix = Cv2.GetPerspectiveTransform(srcPoints, new[] { ptsDst0, ptsDst1, ptsDst2, ptsDst3 });
Mat dest = expanded.WarpPerspective(matrix, new OpenCvSharp.Size(rect.Size.Width, rect.Size.Height), InterpolationFlags.Nearest, BorderTypes.Replicate);
if (!wider)
Cv2.Transpose(dest, dest);
else if (angle > 45)
Cv2.Flip(dest, dest, FlipMode.X);
return dest;
private Rect GetCropedRect(Rect rect, OpenCvSharp.Size size)
return Rect.FromLTRB(
Clamp(rect.Left, 0, size.Width),
Clamp(rect.Top, 0, size.Height),
Clamp(rect.Right, 0, size.Width),
Clamp(rect.Bottom, 0, size.Height));
private void Form1_FormClosing(object sender, FormClosingEventArgs e)
/// <summary>
/// 方向检测
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button4_Click(object sender, EventArgs e)
textBox1.Text = "";
if (rects == null)
if (rects == null)
dt1 = DateTime.Now;
mats =
rects.Select(rect =>
Mat roi = AllowRotateDetection ? GetRotateCropImage(src, rect) : src[GetCropedRect(rect.BoundingRect(), src.Size())];
return Enable180Classification ? CLSPredictorRun(roi) : roi;
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
private void button3_Click(object sender, EventArgs e)
textBox1.Text = "";
if (rects == null || mats == null)
dt1 = DateTime.Now;
int recognizeBatchSize = 0;
//Cv2.ImShow("mats", mats[0]);
PaddleOcrRecognizerResult[] porr = RecognizerRun(mats, recognizeBatchSize);
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < porr.Length; i++)
sb.AppendLine("("+ porr[i].Score+")"+ porr[i].Text );
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
foreach (Mat mat in mats)
public PaddleOcrRecognizerResult[] RecognizerRun(Mat[] srcs, int batchSize = 0)
if (srcs.Length == 0)
return new PaddleOcrRecognizerResult[0];
int chooseBatchSize = batchSize != 0 ? batchSize : Math.Min(8, Environment.ProcessorCount);
PaddleOcrRecognizerResult[] allResult = new PaddleOcrRecognizerResult[srcs.Length];
return srcs
.Select((x, i) => (mat: x, i))
.OrderBy(x => x.mat.Width)
.Select(x => (result: RunMulti(x.Select(x2 => x2.mat).ToArray()), ids: x.Select(x2 => x2.i).ToArray()))
.SelectMany(x => x.result.Zip(x.ids, (result, i) => (result, i)))
.OrderBy(x => x.i)
.Select(x => x.result)
private Mat ResizePadding(Mat src, int height, int targetWidth)
OpenCvSharp.Size size = src.Size();
float whRatio = 1.0f * size.Width / size.Height;
int width = (int)Math.Ceiling(height * whRatio);
if (width == targetWidth)
return src.Resize(new OpenCvSharp.Size(width, height));
//using Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
private PaddleOcrRecognizerResult Run(Mat src) => RunMulti(new[] { src }).Single();
private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
if (srcs.Length == 0)
return new PaddleOcrRecognizerResult[0];
for (int i = 0; i < srcs.Length; ++i)
Mat src = srcs[i];
if (src.Empty())
throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
int modelHeight = recShape.Height;
int maxWidth = (int)Math.Ceiling(srcs.Max(src =>
OpenCvSharp.Size size = src.Size();
return 1.0 * size.Width / size.Height * modelHeight;
int index = 0;
Mat[] normalizeds = srcs
.Select(src =>
Mat channel3 = new Mat();
if (src.Channels() == 4)
channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR);
else if (src.Channels() == 3)
channel3 = src.Clone();
else if (src.Channels() == 1)
channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB);
throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)");
Mat resized = ResizePadding(channel3, modelHeight, maxWidth);
Cv2.ImShow("resized"+index.ToString(), resized);
return Normalize(resized);
using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0]))
int channel = normalizeds[0].Channels();
input.Shape = new[] { normalizeds.Length, channel, modelHeight, maxWidth };
float[] data = ExtractMat(normalizeds, channel, modelHeight, maxWidth);
if (!rec_predictor.Run())
throw new Exception($"PaddlePredictor(Recognizer) run failed.");
using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0]))
float[] data = output.GetData<float>();
int[] shape = output.Shape;
GCHandle dataHandle = default;
dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
int labelCount = shape[2];
int charCount = shape[1];
return Enumerable.Range(0, shape[0])
.Select(i =>
StringBuilder sb = new StringBuilder();
int lastIndex = 0;
float score = 0;
for (int n = 0; n < charCount; ++n)
Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
int[] maxIdx = new int[2];
mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx);
if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
score += (float)maxVal;
lastIndex = maxIdx[1];
return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
private float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
float[] result = new float[srcs.Length * channel * width * height];
GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
for (int i = 0; i < srcs.Length; ++i)
Mat src = srcs[i];
if (src.Channels() != channel)
throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
for (int c = 0; c < channel; ++c)
//using Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
Cv2.ExtractChannel(src, dest, c);
return result;
string GetLabelByIndex(int x)
if (x > 0 && x <= Labels.Count)
return Labels[x - 1];
else if (x == Labels.Count + 1)
return "";
throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?");
private Mat ResizePadding(Mat src, OcrShape shape)
OpenCvSharp.Size srcSize = src.Size();
Mat roi = srcSize.Width / srcSize.Height > shape.Width / shape.Height ?
src[0, srcSize.Height, 0, (int)Math.Floor(1.0 * srcSize.Height * shape.Width / shape.Height)] :
double scaleRate = 1.0 * shape.Height / srcSize.Height;
Mat resized = roi.Resize(new OpenCvSharp.Size(Math.Floor(roi.Width * scaleRate), shape.Height));
if (resized.Width < shape.Width)
Cv2.CopyMakeBorder(resized, resized, 0, 0, 0, shape.Width - resized.Width, BorderTypes.Constant, Scalar.Black);
return resized;
public Mat CLSPredictorRun(Mat src)
if (src.Empty())
throw new ArgumentException("src size should not be 0, wrong input picture provided?");
if (!(src.Channels() == 3 || src.Channels() == 1))
throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
if (ShouldRotate180(src))
Cv2.Rotate(src, src, RotateFlags.Rotate180);
return src;
return src;
public bool ShouldRotate180(Mat src)
if (src.Empty())
throw new ArgumentException("src size should not be 0, wrong input picture provided?");
if (!(src.Channels() == 3 || src.Channels() == 1))
throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
//The default OcrShape used in the classification model
OcrShape shape = new OcrShape(3, 192, 48);
Mat resized = ResizePadding(src, shape);
Mat normalized = Normalize(resized);
using (PaddleTensor input = cls_predictor.GetInputTensor(cls_predictor.InputNames[0]))
input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
float[] data = ExtractMat(normalized);
if (!cls_predictor.Run())
throw new Exception("PaddlePredictor(Classifier) run failed.");
using (PaddleTensor output = cls_predictor.GetOutputTensor(cls_predictor.OutputNames[0]))
float[] softmax = output.GetData<float>();
float score = 0;
int label = 0;
for (int i = 0; i < softmax.Length; ++i)
if (softmax[i] > score)
score = softmax[i];
label = i;
return label % 2 == 1 && score > RotateThreshold;
