赞
踩
目录
基于开源项目 https://github.com/sdcb/PaddleSharp
VS2022+.net4.8+ OpenCvSharp4+Sdcb.PaddleInference
using Sdcb.PaddleInference.Native;
using Sdcb.PaddleInference;
using System;
using System.Collections.Generic;
using OpenCvSharp.Extensions;
using OpenCvSharp;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.Globalization;
using System.IO;
namespace PaddleInference_OCR识别
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
Bitmap bmp;
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string img = "";
string startupPath = "";
int MaxSize = 1536;
float? BoxThreshold = 0.3f;
float? BoxScoreThreahold = 0.7f;
int? DilatedSize = 2;
int MinSize = 3;
float UnclipRatio = 2.0f;
Mat src;
PaddlePredictor det_predictor;
RotatedRect[] rects;
bool Enable180Classification { get; set; } = true;
bool AllowRotateDetection { get; set; } = true;
double RotateThreshold { get; } = 0.75;
Mat[] mats;
PaddlePredictor cls_predictor;
OcrShape recShape = new OcrShape(3, 320, 48);
PaddlePredictor rec_predictor;
public IReadOnlyList<string> Labels;
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
private unsafe void Form1_Load(object sender, EventArgs e)
{
string startupPath = Application.StartupPath;
IntPtr det_ptr = PaddleNative.PD_ConfigCreate();
Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8;
//检测模型路径
String det_programPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdmodel";
String det_paramsPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdiparams";
//方式一
//byte[] programBytes = PaddleEncoding.GetBytes(det_programPath);
//byte[] paramsBytes = PaddleEncoding.GetBytes(det_paramsPath);
//fixed (byte* programPtr = programBytes)
//fixed (byte* paramsPtr = paramsBytes)
//{
// PaddleNative.PD_ConfigSetModel(det_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
//}
//方式二 读自己加密后的模型文件,解密后写入byte[]
Stream Steam = new FileStream(det_programPath, FileMode.Open, FileAccess.Read, FileShare.Read);
byte[] programBuffer = new byte[Steam.Length];
Steam.Read(programBuffer, 0, programBuffer.Length);
Steam = new FileStream(det_paramsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
byte[] paramsBuffer = new byte[Steam.Length];
Steam.Read(paramsBuffer, 0, paramsBuffer.Length);
fixed (byte* pprogram = programBuffer)
fixed (byte* pparams = paramsBuffer)
{
PaddleNative.PD_ConfigSetModelBuffer(det_ptr,
(IntPtr)pprogram, programBuffer.Length,
(IntPtr)pparams, paramsBuffer.Length);
}
det_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(det_ptr));
//方向分类模型
IntPtr cls_ptr = PaddleNative.PD_ConfigCreate();
String cls_programPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdmodel";
String cls_paramsPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdiparams";
byte[] programBytes = PaddleEncoding.GetBytes(cls_programPath);
byte[] paramsBytes = PaddleEncoding.GetBytes(cls_paramsPath);
fixed (byte* programPtr = programBytes)
fixed (byte* paramsPtr = paramsBytes)
{
PaddleNative.PD_ConfigSetModel(cls_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
}
cls_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(cls_ptr));
//识别模型
IntPtr rec_ptr = PaddleNative.PD_ConfigCreate();
String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel";
String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams";
byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath);
byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath);
fixed (byte* rec_programPtr = rec_programBytes)
fixed (byte* rec_paramsPtr = rec_paramsBytes)
{
PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr);
}
rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr));
//Labels
String labelsPath = startupPath + "\\ppocr_keys.txt";
Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
StreamReader reader = new StreamReader(Steam);
List<string> tempList = new List<string>();
while (!reader.EndOfStream)
{
tempList.Add(reader.ReadLine());
}
reader.Dispose();
Steam.Dispose();
Labels = tempList;
}
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
img = ofd.FileName;
bmp = new Bitmap(img);
pictureBox1.Image = new Bitmap(img);
textBox1.Text = "";
}
private void button2_Click(object sender, EventArgs e)
{
textBox1.Text = "";
Application.DoEvents();
if (img == "")
{
return;
}
dt1 = DateTime.Now;
src = Cv2.ImRead(img);
Mat resized = MatResize(src, MaxSize);
//Cv2.ImShow("resized", resized);
Mat padded = MatPadding32(resized);
//Cv2.ImShow("padded", padded);
Mat normalized = Normalize(padded);
Cv2.ImShow("normalized", normalized);
OpenCvSharp.Size resizedSize = resized.Size();
using (PaddleTensor input = det_predictor.GetInputTensor(det_predictor.InputNames[0]))
{
input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
float[] setData = ExtractMat(normalized);
input.SetData(setData);
}
if (!det_predictor.Run())
{
throw new Exception("PaddlePredictor(Detector) run failed.");
}
using (PaddleTensor output = det_predictor.GetOutputTensor(det_predictor.OutputNames[0]))
{
float[] data = output.GetData<float>();
int[] shape = output.Shape;
Mat pred = new Mat(shape[2], shape[3], MatType.CV_32FC1, data);
Cv2.ImShow("pred", pred);
Mat cbuf = new Mat();
Mat roi = pred[0, resizedSize.Height, 0, resizedSize.Width];
roi.ConvertTo(cbuf, MatType.CV_8UC1, 255);
//Cv2.ImShow("roi", roi);
Mat dilated = new Mat();
Mat binary = BoxThreshold != null ?
cbuf.Threshold((int)(BoxThreshold * 255), 255, ThresholdTypes.Binary) :
cbuf;
//Cv2.ImShow("binary", binary);
if (DilatedSize != null)
{
Mat ones = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(DilatedSize.Value, DilatedSize.Value));
Cv2.Dilate(binary, dilated, ones);
ones.Dispose();
}
else
{
Cv2.CopyTo(binary, dilated);
}
//Cv2.ImShow("dilated", dilated);
OpenCvSharp.Point[][] contours = dilated.FindContoursAsArray(RetrievalModes.List, ContourApproximationModes.ApproxSimple);
OpenCvSharp.Size size = src.Size();
double scaleRate = 1.0 * src.Width / resizedSize.Width;
rects = contours
.Where(x => BoxScoreThreahold == null || GetScore(x, pred) > BoxScoreThreahold)
.Select(x => Cv2.MinAreaRect(x))
.Where(x => x.Size.Width > MinSize && x.Size.Height > MinSize)
.Select(rect =>
{
float minEdge = Math.Min(rect.Size.Width, rect.Size.Height);
Size2f newSize = new Size2f(
(rect.Size.Width + UnclipRatio * minEdge) * scaleRate,
(rect.Size.Height + UnclipRatio * minEdge) * scaleRate);
RotatedRect largerRect = new RotatedRect(rect.Center * scaleRate, newSize, rect.Angle);
return largerRect;
})
.OrderBy(v => v.Center.Y)
.ThenBy(v => v.Center.X)
.ToArray();
binary.Dispose();
roi.Dispose();
cbuf.Dispose();
pred.Dispose();
dilated.Dispose();
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
sb.AppendLine("-----------------------------------\n");
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
//绘图
Mat src2 = Cv2.ImRead(img);
for (int i = 0; i < rects.Length; i++)
{
Scalar scalar = Scalar.RandomColor();
List<OpenCvSharp.Point> temp = new List<OpenCvSharp.Point>();
foreach (var item2 in rects[i].Points())
{
temp.Add(new OpenCvSharp.Point(item2.X, item2.Y));
}
List<List<OpenCvSharp.Point>> lltemp = new List<List<OpenCvSharp.Point>>();
lltemp.Add(temp);
Cv2.Polylines(src2, lltemp, true, scalar);
}
if (pictureBox1.Image != null)
{
pictureBox1.Image.Dispose();
}
pictureBox1.Image = BitmapConverter.ToBitmap(src2);
src2.Dispose();
}
}
private float GetScore(OpenCvSharp.Point[] contour, Mat pred)
{
int width = pred.Width;
int height = pred.Height;
int[] boxX = contour.Select(v => v.X).ToArray();
int[] boxY = contour.Select(v => v.Y).ToArray();
int xmin = Clamp(boxX.Min(), 0, width - 1);
int xmax = Clamp(boxX.Max(), 0, width - 1);
int ymin = Clamp(boxY.Min(), 0, height - 1);
int ymax = Clamp(boxY.Max(), 0, height - 1);
OpenCvSharp.Point[] rootPoints = contour
.Select(v => new OpenCvSharp.Point(v.X - xmin, v.Y - ymin))
.ToArray();
Mat mask = new Mat(ymax - ymin + 1, xmax - xmin + 1, MatType.CV_8UC1, Scalar.Black);
mask.FillPoly(new[] { rootPoints }, new Scalar(1));
Mat croppedMat = pred[ymin, ymax + 1, xmin, xmax + 1];
float score = (float)croppedMat.Mean(mask).Val0;
return score;
}
public int Clamp(int val, int min, int max)
{
if (val < min)
{
return min;
}
else if (val > max)
{
return max;
}
return val;
}
float[] ExtractMat(Mat src)
{
int rows = src.Rows;
int cols = src.Cols;
float[] array = new float[rows * cols * 3];
GCHandle gCHandle = default(GCHandle);
try
{
gCHandle = GCHandle.Alloc(array, GCHandleType.Pinned);
IntPtr intPtr = gCHandle.AddrOfPinnedObject();
for (int i = 0; i < src.Channels(); i++)
{
Mat dest = new Mat(rows, cols, MatType.CV_32FC1, intPtr + i * rows * cols * 4, 0L);
Cv2.ExtractChannel(src, dest, i);
dest.Dispose();
}
return array;
}
finally
{
gCHandle.Free();
}
}
private Mat MatResize(Mat src, int? maxSize)
{
if (maxSize == null) return src.Clone();
OpenCvSharp.Size size = src.Size();
int longEdge = Math.Max(size.Width, size.Height);
double scaleRate = 1.0 * maxSize.Value / longEdge;
return scaleRate < 1.0 ?
src.Resize(OpenCvSharp.Size.Zero, scaleRate, scaleRate) :
src.Clone();
}
private Mat MatPadding32(Mat src)
{
OpenCvSharp.Size size = src.Size();
OpenCvSharp.Size newSize = new OpenCvSharp.Size(
32 * Math.Ceiling(1.0 * size.Width / 32),
32 * Math.Ceiling(1.0 * size.Height / 32));
return src.CopyMakeBorder(0, newSize.Height - size.Height, 0, newSize.Width - size.Width, BorderTypes.Constant, Scalar.Black);
}
private Mat Normalize(Mat src)
{
Mat normalized = new Mat();
src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
Mat[] bgr = normalized.Split();
float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
float[] means = new[] { 0.485f, 0.456f, 0.406f };
for (int i = 0; i < bgr.Length; ++i)
{
bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
}
normalized.Dispose();
Mat dest = new Mat();
Cv2.Merge(bgr, dest);
foreach (Mat channel in bgr)
{
channel.Dispose();
}
return dest;
}
private Mat GetRotateCropImage(Mat src, RotatedRect rect)
{
bool wider = rect.Size.Width > rect.Size.Height;
float angle = rect.Angle;
OpenCvSharp.Size srcSize = src.Size();
Rect boundingRect = rect.BoundingRect();
int expTop = Math.Max(0, 0 - boundingRect.Top);
int expBottom = Math.Max(0, boundingRect.Bottom - srcSize.Height);
int expLeft = Math.Max(0, 0 - boundingRect.Left);
int expRight = Math.Max(0, boundingRect.Right - srcSize.Width);
Rect rectToExp = boundingRect + new OpenCvSharp.Point(expTop, expLeft);
Rect roiRect = Rect.FromLTRB(
boundingRect.Left + expLeft,
boundingRect.Top + expTop,
boundingRect.Right - expRight,
boundingRect.Bottom - expBottom);
Mat boundingMat = src[roiRect];
Mat expanded = boundingMat.CopyMakeBorder(expTop, expBottom, expLeft, expRight, BorderTypes.Replicate);
Point2f[] rp = rect.Points()
.Select(v => new Point2f(v.X - rectToExp.X, v.Y - rectToExp.Y))
.ToArray();
Point2f[] srcPoints = new[] { rp[0], rp[3], rp[2], rp[1] };
if (wider == true && angle >= 0 && angle < 45)
{
srcPoints = new[] { rp[1], rp[2], rp[3], rp[0] };
}
var ptsDst0 = new Point2f(0, 0);
var ptsDst1 = new Point2f(rect.Size.Width, 0);
var ptsDst2 = new Point2f(rect.Size.Width, rect.Size.Height);
var ptsDst3 = new Point2f(0, rect.Size.Height);
Mat matrix = Cv2.GetPerspectiveTransform(srcPoints, new[] { ptsDst0, ptsDst1, ptsDst2, ptsDst3 });
Mat dest = expanded.WarpPerspective(matrix, new OpenCvSharp.Size(rect.Size.Width, rect.Size.Height), InterpolationFlags.Nearest, BorderTypes.Replicate);
if (!wider)
{
Cv2.Transpose(dest, dest);
}
else if (angle > 45)
{
Cv2.Flip(dest, dest, FlipMode.X);
}
boundingMat.Dispose();
expanded.Dispose();
matrix.Dispose();
return dest;
}
private Rect GetCropedRect(Rect rect, OpenCvSharp.Size size)
{
return Rect.FromLTRB(
Clamp(rect.Left, 0, size.Width),
Clamp(rect.Top, 0, size.Height),
Clamp(rect.Right, 0, size.Width),
Clamp(rect.Bottom, 0, size.Height));
}
private void Form1_FormClosing(object sender, FormClosingEventArgs e)
{
det_predictor.Dispose();
rec_predictor.Dispose();
}
/// <summary>
/// 方向检测
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button4_Click(object sender, EventArgs e)
{
textBox1.Text = "";
Application.DoEvents();
if (rects == null)
{
return;
}
if (rects == null)
{
return;
}
dt1 = DateTime.Now;
mats =
rects.Select(rect =>
{
Mat roi = AllowRotateDetection ? GetRotateCropImage(src, rect) : src[GetCropedRect(rect.BoundingRect(), src.Size())];
return Enable180Classification ? CLSPredictorRun(roi) : roi;
})
.ToArray();
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
sb.AppendLine("-----------------------------------\n");
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
}
private void button3_Click(object sender, EventArgs e)
{
textBox1.Text = "";
Application.DoEvents();
if (rects == null || mats == null)
{
return;
}
dt1 = DateTime.Now;
try
{
int recognizeBatchSize = 0;
//Cv2.ImShow("mats", mats[0]);
PaddleOcrRecognizerResult[] porr = RecognizerRun(mats, recognizeBatchSize);
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < porr.Length; i++)
{
sb.AppendLine("("+ porr[i].Score+")"+ porr[i].Text );
}
sb.AppendLine("-----------------------------------\n");
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
}
finally
{
foreach (Mat mat in mats)
{
mat.Dispose();
}
}
}
public PaddleOcrRecognizerResult[] RecognizerRun(Mat[] srcs, int batchSize = 0)
{
if (srcs.Length == 0)
{
return new PaddleOcrRecognizerResult[0];
}
int chooseBatchSize = batchSize != 0 ? batchSize : Math.Min(8, Environment.ProcessorCount);
PaddleOcrRecognizerResult[] allResult = new PaddleOcrRecognizerResult[srcs.Length];
return srcs
.Select((x, i) => (mat: x, i))
.OrderBy(x => x.mat.Width)
.Chunk(chooseBatchSize)
.Select(x => (result: RunMulti(x.Select(x2 => x2.mat).ToArray()), ids: x.Select(x2 => x2.i).ToArray()))
.SelectMany(x => x.result.Zip(x.ids, (result, i) => (result, i)))
.OrderBy(x => x.i)
.Select(x => x.result)
.ToArray();
}
private Mat ResizePadding(Mat src, int height, int targetWidth)
{
OpenCvSharp.Size size = src.Size();
float whRatio = 1.0f * size.Width / size.Height;
int width = (int)Math.Ceiling(height * whRatio);
if (width == targetWidth)
{
return src.Resize(new OpenCvSharp.Size(width, height));
}
else
{
//using Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
}
}
private PaddleOcrRecognizerResult Run(Mat src) => RunMulti(new[] { src }).Single();
private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
{
if (srcs.Length == 0)
{
return new PaddleOcrRecognizerResult[0];
}
for (int i = 0; i < srcs.Length; ++i)
{
Mat src = srcs[i];
if (src.Empty())
{
throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
}
}
int modelHeight = recShape.Height;
int maxWidth = (int)Math.Ceiling(srcs.Max(src =>
{
OpenCvSharp.Size size = src.Size();
return 1.0 * size.Width / size.Height * modelHeight;
}));
int index = 0;
Mat[] normalizeds = srcs
.Select(src =>
{
Mat channel3 = new Mat();
if (src.Channels() == 4)
{
channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR);
}
else if (src.Channels() == 3)
{
channel3 = src.Clone();
}
else if (src.Channels() == 1)
{
channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB);
}
else
{
throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)");
}
Mat resized = ResizePadding(channel3, modelHeight, maxWidth);
Cv2.ImShow("resized"+index.ToString(), resized);
index++;
channel3.Dispose();
return Normalize(resized);
})
.ToArray();
using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0]))
{
int channel = normalizeds[0].Channels();
input.Shape = new[] { normalizeds.Length, channel, modelHeight, maxWidth };
float[] data = ExtractMat(normalizeds, channel, modelHeight, maxWidth);
input.SetData(data);
}
if (!rec_predictor.Run())
{
throw new Exception($"PaddlePredictor(Recognizer) run failed.");
}
using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0]))
{
float[] data = output.GetData<float>();
int[] shape = output.Shape;
GCHandle dataHandle = default;
try
{
dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
int labelCount = shape[2];
int charCount = shape[1];
return Enumerable.Range(0, shape[0])
.Select(i =>
{
StringBuilder sb = new StringBuilder();
int lastIndex = 0;
float score = 0;
for (int n = 0; n < charCount; ++n)
{
Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
int[] maxIdx = new int[2];
mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx);
if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
{
score += (float)maxVal;
sb.Append(GetLabelByIndex(maxIdx[1]));
}
lastIndex = maxIdx[1];
mat.Dispose();
}
return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
})
.ToArray();
}
finally
{
dataHandle.Free();
}
}
}
private float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
{
float[] result = new float[srcs.Length * channel * width * height];
GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
try
{
for (int i = 0; i < srcs.Length; ++i)
{
Mat src = srcs[i];
if (src.Channels() != channel)
{
throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
}
for (int c = 0; c < channel; ++c)
{
//using Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
Cv2.ExtractChannel(src, dest, c);
//dest.Dispose();
}
}
return result;
}
finally
{
resultHandle.Free();
}
}
string GetLabelByIndex(int x)
{
if (x > 0 && x <= Labels.Count)
{
return Labels[x - 1];
}
else if (x == Labels.Count + 1)
{
return "";
}
else
{
throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?");
}
}
private Mat ResizePadding(Mat src, OcrShape shape)
{
OpenCvSharp.Size srcSize = src.Size();
Mat roi = srcSize.Width / srcSize.Height > shape.Width / shape.Height ?
src[0, srcSize.Height, 0, (int)Math.Floor(1.0 * srcSize.Height * shape.Width / shape.Height)] :
src.Clone();
double scaleRate = 1.0 * shape.Height / srcSize.Height;
Mat resized = roi.Resize(new OpenCvSharp.Size(Math.Floor(roi.Width * scaleRate), shape.Height));
if (resized.Width < shape.Width)
{
Cv2.CopyMakeBorder(resized, resized, 0, 0, 0, shape.Width - resized.Width, BorderTypes.Constant, Scalar.Black);
}
roi.Dispose();
return resized;
}
public Mat CLSPredictorRun(Mat src)
{
if (src.Empty())
{
throw new ArgumentException("src size should not be 0, wrong input picture provided?");
}
if (!(src.Channels() == 3 || src.Channels() == 1))
{
throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
}
if (ShouldRotate180(src))
{
Cv2.Rotate(src, src, RotateFlags.Rotate180);
Console.WriteLine("ShouldRotate180:True");
return src;
}
else
{
Console.WriteLine("ShouldRotate180:Flase");
return src;
}
}
public bool ShouldRotate180(Mat src)
{
if (src.Empty())
{
throw new ArgumentException("src size should not be 0, wrong input picture provided?");
}
if (!(src.Channels() == 3 || src.Channels() == 1))
{
throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
}
//The default OcrShape used in the classification model
OcrShape shape = new OcrShape(3, 192, 48);
Mat resized = ResizePadding(src, shape);
Mat normalized = Normalize(resized);
using (PaddleTensor input = cls_predictor.GetInputTensor(cls_predictor.InputNames[0]))
{
input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
float[] data = ExtractMat(normalized);
input.SetData(data);
}
if (!cls_predictor.Run())
{
throw new Exception("PaddlePredictor(Classifier) run failed.");
}
using (PaddleTensor output = cls_predictor.GetOutputTensor(cls_predictor.OutputNames[0]))
{
float[] softmax = output.GetData<float>();
float score = 0;
int label = 0;
for (int i = 0; i < softmax.Length; ++i)
{
if (softmax[i] > score)
{
score = softmax[i];
label = i;
}
}
resized.Dispose();
normalized.Dispose();
return label % 2 == 1 && score > RotateThreshold;
}
}
}
}
- using Sdcb.PaddleInference.Native;
- using Sdcb.PaddleInference;
- using System;
- using System.Collections.Generic;
- using OpenCvSharp.Extensions;
- using OpenCvSharp;
- using System.Data;
- using System.Drawing;
- using System.Linq;
- using System.Text;
- using System.Windows.Forms;
- using System.Runtime.InteropServices;
- using System.Globalization;
- using System.IO;
-
- namespace PaddleInference_OCR识别
- {
- public partial class Form1 : Form
- {
- public Form1()
- {
- InitializeComponent();
- }
-
- Bitmap bmp;
- string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
- string img = "";
- string startupPath = "";
-
- int MaxSize = 1536;
- float? BoxThreshold = 0.3f;
- float? BoxScoreThreahold = 0.7f;
- int? DilatedSize = 2;
- int MinSize = 3;
- float UnclipRatio = 2.0f;
-
- Mat src;
- PaddlePredictor det_predictor;
-
- RotatedRect[] rects;
-
- bool Enable180Classification { get; set; } = true;
- bool AllowRotateDetection { get; set; } = true;
- double RotateThreshold { get; } = 0.75;
- Mat[] mats;
- PaddlePredictor cls_predictor;
-
- OcrShape recShape = new OcrShape(3, 320, 48);
- PaddlePredictor rec_predictor;
-
- public IReadOnlyList<string> Labels;
-
- DateTime dt1 = DateTime.Now;
- DateTime dt2 = DateTime.Now;
-
- private unsafe void Form1_Load(object sender, EventArgs e)
- {
-
- string startupPath = Application.StartupPath;
-
- IntPtr det_ptr = PaddleNative.PD_ConfigCreate();
-
- Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8;
-
- //检测模型路径
- String det_programPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdmodel";
- String det_paramsPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdiparams";
-
- //方式一
- //byte[] programBytes = PaddleEncoding.GetBytes(det_programPath);
- //byte[] paramsBytes = PaddleEncoding.GetBytes(det_paramsPath);
- //fixed (byte* programPtr = programBytes)
- //fixed (byte* paramsPtr = paramsBytes)
- //{
- // PaddleNative.PD_ConfigSetModel(det_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
- //}
-
- //方式二 读自己加密后的模型文件,解密后写入byte[]
- Stream Steam = new FileStream(det_programPath, FileMode.Open, FileAccess.Read, FileShare.Read);
- byte[] programBuffer = new byte[Steam.Length];
- Steam.Read(programBuffer, 0, programBuffer.Length);
-
- Steam = new FileStream(det_paramsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
- byte[] paramsBuffer = new byte[Steam.Length];
- Steam.Read(paramsBuffer, 0, paramsBuffer.Length);
-
- fixed (byte* pprogram = programBuffer)
- fixed (byte* pparams = paramsBuffer)
- {
- PaddleNative.PD_ConfigSetModelBuffer(det_ptr,
- (IntPtr)pprogram, programBuffer.Length,
- (IntPtr)pparams, paramsBuffer.Length);
- }
-
- det_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(det_ptr));
-
- //方向分类模型
- IntPtr cls_ptr = PaddleNative.PD_ConfigCreate();
-
- String cls_programPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdmodel";
- String cls_paramsPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdiparams";
-
- byte[] programBytes = PaddleEncoding.GetBytes(cls_programPath);
- byte[] paramsBytes = PaddleEncoding.GetBytes(cls_paramsPath);
- fixed (byte* programPtr = programBytes)
- fixed (byte* paramsPtr = paramsBytes)
- {
- PaddleNative.PD_ConfigSetModel(cls_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
- }
-
- cls_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(cls_ptr));
-
- //识别模型
- IntPtr rec_ptr = PaddleNative.PD_ConfigCreate();
-
- String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel";
- String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams";
-
- byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath);
- byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath);
- fixed (byte* rec_programPtr = rec_programBytes)
- fixed (byte* rec_paramsPtr = rec_paramsBytes)
- {
- PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr);
- }
-
- rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr));
-
- //Labels
- String labelsPath = startupPath + "\\ppocr_keys.txt";
- Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
- StreamReader reader = new StreamReader(Steam);
- List<string> tempList = new List<string>();
- while (!reader.EndOfStream)
- {
- tempList.Add(reader.ReadLine());
- }
- reader.Dispose();
- Steam.Dispose();
- Labels = tempList;
- }
-
- private void button1_Click(object sender, EventArgs e)
- {
- OpenFileDialog ofd = new OpenFileDialog();
- ofd.Filter = fileFilter;
- if (ofd.ShowDialog() != DialogResult.OK) return;
-
- pictureBox1.Image = null;
-
- img = ofd.FileName;
- bmp = new Bitmap(img);
- pictureBox1.Image = new Bitmap(img);
- textBox1.Text = "";
- }
-
- private void button2_Click(object sender, EventArgs e)
- {
- textBox1.Text = "";
- Application.DoEvents();
- if (img == "")
- {
- return;
- }
- dt1 = DateTime.Now;
- src = Cv2.ImRead(img);
-
- Mat resized = MatResize(src, MaxSize);
- //Cv2.ImShow("resized", resized);
- Mat padded = MatPadding32(resized);
- //Cv2.ImShow("padded", padded);
- Mat normalized = Normalize(padded);
- Cv2.ImShow("normalized", normalized);
- OpenCvSharp.Size resizedSize = resized.Size();
- using (PaddleTensor input = det_predictor.GetInputTensor(det_predictor.InputNames[0]))
- {
- input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
- float[] setData = ExtractMat(normalized);
- input.SetData(setData);
- }
-
- if (!det_predictor.Run())
- {
- throw new Exception("PaddlePredictor(Detector) run failed.");
- }
-
- using (PaddleTensor output = det_predictor.GetOutputTensor(det_predictor.OutputNames[0]))
- {
- float[] data = output.GetData<float>();
- int[] shape = output.Shape;
-
- Mat pred = new Mat(shape[2], shape[3], MatType.CV_32FC1, data);
- Cv2.ImShow("pred", pred);
- Mat cbuf = new Mat();
-
- Mat roi = pred[0, resizedSize.Height, 0, resizedSize.Width];
- roi.ConvertTo(cbuf, MatType.CV_8UC1, 255);
- //Cv2.ImShow("roi", roi);
- Mat dilated = new Mat();
- Mat binary = BoxThreshold != null ?
- cbuf.Threshold((int)(BoxThreshold * 255), 255, ThresholdTypes.Binary) :
- cbuf;
- //Cv2.ImShow("binary", binary);
- if (DilatedSize != null)
- {
- Mat ones = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(DilatedSize.Value, DilatedSize.Value));
- Cv2.Dilate(binary, dilated, ones);
- ones.Dispose();
- }
- else
- {
- Cv2.CopyTo(binary, dilated);
- }
- //Cv2.ImShow("dilated", dilated);
- OpenCvSharp.Point[][] contours = dilated.FindContoursAsArray(RetrievalModes.List, ContourApproximationModes.ApproxSimple);
-
- OpenCvSharp.Size size = src.Size();
- double scaleRate = 1.0 * src.Width / resizedSize.Width;
-
- rects = contours
- .Where(x => BoxScoreThreahold == null || GetScore(x, pred) > BoxScoreThreahold)
- .Select(x => Cv2.MinAreaRect(x))
- .Where(x => x.Size.Width > MinSize && x.Size.Height > MinSize)
- .Select(rect =>
- {
- float minEdge = Math.Min(rect.Size.Width, rect.Size.Height);
- Size2f newSize = new Size2f(
- (rect.Size.Width + UnclipRatio * minEdge) * scaleRate,
- (rect.Size.Height + UnclipRatio * minEdge) * scaleRate);
- RotatedRect largerRect = new RotatedRect(rect.Center * scaleRate, newSize, rect.Angle);
- return largerRect;
- })
- .OrderBy(v => v.Center.Y)
- .ThenBy(v => v.Center.X)
- .ToArray();
-
- binary.Dispose();
- roi.Dispose();
- cbuf.Dispose();
- pred.Dispose();
- dilated.Dispose();
-
-
- dt2 = DateTime.Now;
- StringBuilder sb = new StringBuilder();
- sb.AppendLine("-----------------------------------\n");
- sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
-
- textBox1.Text = sb.ToString();
-
- //绘图
- Mat src2 = Cv2.ImRead(img);
- for (int i = 0; i < rects.Length; i++)
- {
- Scalar scalar = Scalar.RandomColor();
- List<OpenCvSharp.Point> temp = new List<OpenCvSharp.Point>();
- foreach (var item2 in rects[i].Points())
- {
- temp.Add(new OpenCvSharp.Point(item2.X, item2.Y));
- }
- List<List<OpenCvSharp.Point>> lltemp = new List<List<OpenCvSharp.Point>>();
- lltemp.Add(temp);
- Cv2.Polylines(src2, lltemp, true, scalar);
- }
-
- if (pictureBox1.Image != null)
- {
- pictureBox1.Image.Dispose();
- }
-
- pictureBox1.Image = BitmapConverter.ToBitmap(src2);
- src2.Dispose();
-
- }
- }
-
- private float GetScore(OpenCvSharp.Point[] contour, Mat pred)
- {
- int width = pred.Width;
- int height = pred.Height;
- int[] boxX = contour.Select(v => v.X).ToArray();
- int[] boxY = contour.Select(v => v.Y).ToArray();
-
- int xmin = Clamp(boxX.Min(), 0, width - 1);
- int xmax = Clamp(boxX.Max(), 0, width - 1);
- int ymin = Clamp(boxY.Min(), 0, height - 1);
- int ymax = Clamp(boxY.Max(), 0, height - 1);
-
- OpenCvSharp.Point[] rootPoints = contour
- .Select(v => new OpenCvSharp.Point(v.X - xmin, v.Y - ymin))
- .ToArray();
- Mat mask = new Mat(ymax - ymin + 1, xmax - xmin + 1, MatType.CV_8UC1, Scalar.Black);
- mask.FillPoly(new[] { rootPoints }, new Scalar(1));
-
- Mat croppedMat = pred[ymin, ymax + 1, xmin, xmax + 1];
- float score = (float)croppedMat.Mean(mask).Val0;
- return score;
- }
-
- public int Clamp(int val, int min, int max)
- {
- if (val < min)
- {
- return min;
- }
- else if (val > max)
- {
- return max;
- }
- return val;
- }
-
- float[] ExtractMat(Mat src)
- {
- int rows = src.Rows;
- int cols = src.Cols;
- float[] array = new float[rows * cols * 3];
- GCHandle gCHandle = default(GCHandle);
- try
- {
- gCHandle = GCHandle.Alloc(array, GCHandleType.Pinned);
- IntPtr intPtr = gCHandle.AddrOfPinnedObject();
- for (int i = 0; i < src.Channels(); i++)
- {
- Mat dest = new Mat(rows, cols, MatType.CV_32FC1, intPtr + i * rows * cols * 4, 0L);
- Cv2.ExtractChannel(src, dest, i);
- dest.Dispose();
- }
- return array;
- }
- finally
- {
- gCHandle.Free();
- }
- }
-
- private Mat MatResize(Mat src, int? maxSize)
- {
- if (maxSize == null) return src.Clone();
-
- OpenCvSharp.Size size = src.Size();
- int longEdge = Math.Max(size.Width, size.Height);
- double scaleRate = 1.0 * maxSize.Value / longEdge;
-
- return scaleRate < 1.0 ?
- src.Resize(OpenCvSharp.Size.Zero, scaleRate, scaleRate) :
- src.Clone();
- }
-
- private Mat MatPadding32(Mat src)
- {
- OpenCvSharp.Size size = src.Size();
- OpenCvSharp.Size newSize = new OpenCvSharp.Size(
- 32 * Math.Ceiling(1.0 * size.Width / 32),
- 32 * Math.Ceiling(1.0 * size.Height / 32));
-
- return src.CopyMakeBorder(0, newSize.Height - size.Height, 0, newSize.Width - size.Width, BorderTypes.Constant, Scalar.Black);
- }
-
- private Mat Normalize(Mat src)
- {
- Mat normalized = new Mat();
- src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
- Mat[] bgr = normalized.Split();
- float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
- float[] means = new[] { 0.485f, 0.456f, 0.406f };
- for (int i = 0; i < bgr.Length; ++i)
- {
- bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
- }
- normalized.Dispose();
- Mat dest = new Mat();
- Cv2.Merge(bgr, dest);
- foreach (Mat channel in bgr)
- {
- channel.Dispose();
- }
- return dest;
- }
-
- private Mat GetRotateCropImage(Mat src, RotatedRect rect)
- {
- bool wider = rect.Size.Width > rect.Size.Height;
- float angle = rect.Angle;
- OpenCvSharp.Size srcSize = src.Size();
- Rect boundingRect = rect.BoundingRect();
-
- int expTop = Math.Max(0, 0 - boundingRect.Top);
- int expBottom = Math.Max(0, boundingRect.Bottom - srcSize.Height);
- int expLeft = Math.Max(0, 0 - boundingRect.Left);
- int expRight = Math.Max(0, boundingRect.Right - srcSize.Width);
-
- Rect rectToExp = boundingRect + new OpenCvSharp.Point(expTop, expLeft);
- Rect roiRect = Rect.FromLTRB(
- boundingRect.Left + expLeft,
- boundingRect.Top + expTop,
- boundingRect.Right - expRight,
- boundingRect.Bottom - expBottom);
- Mat boundingMat = src[roiRect];
- Mat expanded = boundingMat.CopyMakeBorder(expTop, expBottom, expLeft, expRight, BorderTypes.Replicate);
- Point2f[] rp = rect.Points()
- .Select(v => new Point2f(v.X - rectToExp.X, v.Y - rectToExp.Y))
- .ToArray();
-
- Point2f[] srcPoints = new[] { rp[0], rp[3], rp[2], rp[1] };
-
- if (wider == true && angle >= 0 && angle < 45)
- {
- srcPoints = new[] { rp[1], rp[2], rp[3], rp[0] };
- }
-
- var ptsDst0 = new Point2f(0, 0);
- var ptsDst1 = new Point2f(rect.Size.Width, 0);
- var ptsDst2 = new Point2f(rect.Size.Width, rect.Size.Height);
- var ptsDst3 = new Point2f(0, rect.Size.Height);
-
- Mat matrix = Cv2.GetPerspectiveTransform(srcPoints, new[] { ptsDst0, ptsDst1, ptsDst2, ptsDst3 });
-
- Mat dest = expanded.WarpPerspective(matrix, new OpenCvSharp.Size(rect.Size.Width, rect.Size.Height), InterpolationFlags.Nearest, BorderTypes.Replicate);
-
- if (!wider)
- {
- Cv2.Transpose(dest, dest);
- }
- else if (angle > 45)
- {
- Cv2.Flip(dest, dest, FlipMode.X);
- }
-
- boundingMat.Dispose();
- expanded.Dispose();
- matrix.Dispose();
-
- return dest;
- }
-
- private Rect GetCropedRect(Rect rect, OpenCvSharp.Size size)
- {
- return Rect.FromLTRB(
- Clamp(rect.Left, 0, size.Width),
- Clamp(rect.Top, 0, size.Height),
- Clamp(rect.Right, 0, size.Width),
- Clamp(rect.Bottom, 0, size.Height));
- }
-
- private void Form1_FormClosing(object sender, FormClosingEventArgs e)
- {
- det_predictor.Dispose();
- rec_predictor.Dispose();
- }
-
- /// <summary>
- /// 方向检测
- /// </summary>
- /// <param name="sender"></param>
- /// <param name="e"></param>
- private void button4_Click(object sender, EventArgs e)
- {
- textBox1.Text = "";
- Application.DoEvents();
- if (rects == null)
- {
- return;
- }
- if (rects == null)
- {
- return;
- }
- dt1 = DateTime.Now;
- mats =
- rects.Select(rect =>
- {
- Mat roi = AllowRotateDetection ? GetRotateCropImage(src, rect) : src[GetCropedRect(rect.BoundingRect(), src.Size())];
- return Enable180Classification ? CLSPredictorRun(roi) : roi;
- })
- .ToArray();
-
- dt2 = DateTime.Now;
- StringBuilder sb = new StringBuilder();
- sb.AppendLine("-----------------------------------\n");
- sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
-
- textBox1.Text = sb.ToString();
- }
-
- private void button3_Click(object sender, EventArgs e)
- {
- textBox1.Text = "";
- Application.DoEvents();
- if (rects == null || mats == null)
- {
- return;
- }
- dt1 = DateTime.Now;
- try
- {
- int recognizeBatchSize = 0;
- //Cv2.ImShow("mats", mats[0]);
- PaddleOcrRecognizerResult[] porr = RecognizerRun(mats, recognizeBatchSize);
- dt2 = DateTime.Now;
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < porr.Length; i++)
- {
- sb.AppendLine("("+ porr[i].Score+")"+ porr[i].Text );
- }
- sb.AppendLine("-----------------------------------\n");
- sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
- textBox1.Text = sb.ToString();
- }
- finally
- {
- foreach (Mat mat in mats)
- {
- mat.Dispose();
- }
- }
- }
-
- public PaddleOcrRecognizerResult[] RecognizerRun(Mat[] srcs, int batchSize = 0)
- {
- if (srcs.Length == 0)
- {
- return new PaddleOcrRecognizerResult[0];
- }
-
- int chooseBatchSize = batchSize != 0 ? batchSize : Math.Min(8, Environment.ProcessorCount);
- PaddleOcrRecognizerResult[] allResult = new PaddleOcrRecognizerResult[srcs.Length];
-
- return srcs
- .Select((x, i) => (mat: x, i))
- .OrderBy(x => x.mat.Width)
- .Chunk(chooseBatchSize)
- .Select(x => (result: RunMulti(x.Select(x2 => x2.mat).ToArray()), ids: x.Select(x2 => x2.i).ToArray()))
- .SelectMany(x => x.result.Zip(x.ids, (result, i) => (result, i)))
- .OrderBy(x => x.i)
- .Select(x => x.result)
- .ToArray();
- }
-
- private Mat ResizePadding(Mat src, int height, int targetWidth)
- {
- OpenCvSharp.Size size = src.Size();
- float whRatio = 1.0f * size.Width / size.Height;
- int width = (int)Math.Ceiling(height * whRatio);
-
- if (width == targetWidth)
- {
- return src.Resize(new OpenCvSharp.Size(width, height));
- }
- else
- {
- //using Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
- Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
- return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
- }
- }
-
- private PaddleOcrRecognizerResult Run(Mat src) => RunMulti(new[] { src }).Single();
-
- private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
- {
- if (srcs.Length == 0)
- {
- return new PaddleOcrRecognizerResult[0];
- }
-
- for (int i = 0; i < srcs.Length; ++i)
- {
- Mat src = srcs[i];
- if (src.Empty())
- {
- throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
- }
- }
-
- int modelHeight = recShape.Height;
- int maxWidth = (int)Math.Ceiling(srcs.Max(src =>
- {
- OpenCvSharp.Size size = src.Size();
- return 1.0 * size.Width / size.Height * modelHeight;
- }));
-
- int index = 0;
-
- Mat[] normalizeds = srcs
- .Select(src =>
- {
-
- Mat channel3 = new Mat();
- if (src.Channels() == 4)
- {
- channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR);
- }
- else if (src.Channels() == 3)
- {
- channel3 = src.Clone();
- }
- else if (src.Channels() == 1)
- {
- channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB);
- }
- else
- {
- throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)");
- }
-
- Mat resized = ResizePadding(channel3, modelHeight, maxWidth);
- Cv2.ImShow("resized"+index.ToString(), resized);
- index++;
- channel3.Dispose();
- return Normalize(resized);
- })
- .ToArray();
-
- using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0]))
- {
- int channel = normalizeds[0].Channels();
- input.Shape = new[] { normalizeds.Length, channel, modelHeight, maxWidth };
- float[] data = ExtractMat(normalizeds, channel, modelHeight, maxWidth);
- input.SetData(data);
- }
- if (!rec_predictor.Run())
- {
- throw new Exception($"PaddlePredictor(Recognizer) run failed.");
- }
-
- using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0]))
- {
- float[] data = output.GetData<float>();
- int[] shape = output.Shape;
-
- GCHandle dataHandle = default;
- try
- {
- dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
- IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
- int labelCount = shape[2];
- int charCount = shape[1];
-
- return Enumerable.Range(0, shape[0])
- .Select(i =>
- {
- StringBuilder sb = new StringBuilder();
- int lastIndex = 0;
- float score = 0;
- for (int n = 0; n < charCount; ++n)
- {
- Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
- int[] maxIdx = new int[2];
- mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx);
-
- if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
- {
- score += (float)maxVal;
- sb.Append(GetLabelByIndex(maxIdx[1]));
- }
- lastIndex = maxIdx[1];
- mat.Dispose();
- }
- return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
- })
- .ToArray();
- }
- finally
- {
- dataHandle.Free();
- }
- }
- }
-
- private float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
- {
- float[] result = new float[srcs.Length * channel * width * height];
- GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
- IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
- try
- {
- for (int i = 0; i < srcs.Length; ++i)
- {
- Mat src = srcs[i];
- if (src.Channels() != channel)
- {
- throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
- }
- for (int c = 0; c < channel; ++c)
- {
- //using Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
- Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
- Cv2.ExtractChannel(src, dest, c);
- //dest.Dispose();
- }
- }
- return result;
- }
- finally
- {
- resultHandle.Free();
- }
- }
-
- string GetLabelByIndex(int x)
- {
- if (x > 0 && x <= Labels.Count)
- {
- return Labels[x - 1];
- }
- else if (x == Labels.Count + 1)
- {
- return "";
- }
- else
- {
- throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?");
- }
- }
-
- private Mat ResizePadding(Mat src, OcrShape shape)
- {
- OpenCvSharp.Size srcSize = src.Size();
- Mat roi = srcSize.Width / srcSize.Height > shape.Width / shape.Height ?
- src[0, srcSize.Height, 0, (int)Math.Floor(1.0 * srcSize.Height * shape.Width / shape.Height)] :
- src.Clone();
- double scaleRate = 1.0 * shape.Height / srcSize.Height;
- Mat resized = roi.Resize(new OpenCvSharp.Size(Math.Floor(roi.Width * scaleRate), shape.Height));
- if (resized.Width < shape.Width)
- {
- Cv2.CopyMakeBorder(resized, resized, 0, 0, 0, shape.Width - resized.Width, BorderTypes.Constant, Scalar.Black);
- }
- roi.Dispose();
- return resized;
- }
-
- public Mat CLSPredictorRun(Mat src)
- {
- if (src.Empty())
- {
- throw new ArgumentException("src size should not be 0, wrong input picture provided?");
- }
-
- if (!(src.Channels() == 3 || src.Channels() == 1))
- {
- throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
- }
-
- if (ShouldRotate180(src))
- {
- Cv2.Rotate(src, src, RotateFlags.Rotate180);
- Console.WriteLine("ShouldRotate180:True");
- return src;
- }
- else
- {
- Console.WriteLine("ShouldRotate180:Flase");
- return src;
- }
- }
-
- public bool ShouldRotate180(Mat src)
- {
- if (src.Empty())
- {
- throw new ArgumentException("src size should not be 0, wrong input picture provided?");
- }
-
- if (!(src.Channels() == 3 || src.Channels() == 1))
- {
- throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
- }
-
- //The default OcrShape used in the classification model
- OcrShape shape = new OcrShape(3, 192, 48);
- Mat resized = ResizePadding(src, shape);
- Mat normalized = Normalize(resized);
-
- using (PaddleTensor input = cls_predictor.GetInputTensor(cls_predictor.InputNames[0]))
- {
- input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
- float[] data = ExtractMat(normalized);
- input.SetData(data);
- }
- if (!cls_predictor.Run())
- {
- throw new Exception("PaddlePredictor(Classifier) run failed.");
- }
-
- using (PaddleTensor output = cls_predictor.GetOutputTensor(cls_predictor.OutputNames[0]))
- {
- float[] softmax = output.GetData<float>();
- float score = 0;
- int label = 0;
- for (int i = 0; i < softmax.Length; ++i)
- {
- if (softmax[i] > score)
- {
- score = softmax[i];
- label = i;
- }
- }
-
- resized.Dispose();
- normalized.Dispose();
-
- return label % 2 == 1 && score > RotateThreshold;
- }
- }
-
- }
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。