当前位置:   article > 正文

C# PaddleInference OCR 表格识别_sdcb.paddleinference

sdcb.paddleinference

目录

效果

项目

测试图片

代码

下载 


效果

左图是表格图片识别后将其位置标注出的效果

右图是利用webBrowser1呈现的HTML效果

文本框中是表格识别后生成的HTML内容

项目

VS2022

.net4.8

OpenCvSharp4

Sdcb.PaddleInference

Sdcb.PaddleOCR

测试图片

代码

using OpenCvSharp.Extensions;
using OpenCvSharp;
using Sdcb.PaddleInference;
using Sdcb.PaddleOCR;
using Sdcb.PaddleOCR.Models;
using Sdcb.PaddleOCR.Models.Details;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Windows.Forms;


namespace PaddleInference_OCR_表格识别
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }


        Bitmap bmp;
        string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
        string img = "";
        string startupPath = "";


        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            pictureBox1.Image = null;

            img = ofd.FileName;
            bmp = new Bitmap(img);
            pictureBox1.Image = new Bitmap(img);
        }

        TableRecognitionModel tableModel;
        PaddleOcrTableRecognizer tableRec;

        FullOcrModel model;
        PaddleOcrAll paddleOcr;

        private void Form1_Load(object sender, EventArgs e)
        {
            startupPath = System.Windows.Forms.Application.StartupPath;
            string table_directoryPath = startupPath + "\\ch_ppstructure_mobile_v2.0_SLANet";
            string table_labelFilePath = startupPath + "\\table_structure_dict_ch.txt";

            tableModel = new FileTableRecognizationModel(table_directoryPath, table_labelFilePath);
            tableRec = new PaddleOcrTableRecognizer(tableModel);

            string detectionModelDir = startupPath + "\\ch_PP-OCRv3_det";
            string classificationModelDir = startupPath + "\\ch_ppocr_mobile_v2.0_cls";
            string recognitionModelDir = startupPath + "\\ch_PP-OCRv3_rec";
            string labelFilePath = startupPath + "\\ppocr_keys_v1.txt";

            model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);

            paddleOcr = new PaddleOcrAll(model, PaddleDevice.Mkldnn());
            paddleOcr.AllowRotateDetection = true; /* 允许识别有角度的文字 */
            paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */
        }

        TableDetectionResult tableResult;
        private void button2_Click(object sender, EventArgs e)
        {

            if (pictureBox1.Image == null)
            {
                return;
            }

            Mat src = Cv2.ImRead(img);
            tableResult = tableRec.Run(src);

            List<TableCellBox> ltCellBox = tableResult.StructureBoxes;

            foreach (TableCellBox item in ltCellBox)
            {
                Scalar scalar = Scalar.RandomColor();
                Cv2.Rectangle(src, item.Rect, scalar);
            }
            //Cv2.ImShow("src", src);
            //Cv2.ImWrite("src.jpg", src);
            pictureBox1.Image = BitmapConverter.ToBitmap(src);

            //List<string> ltTags = tableResult.HtmlTags;
            //float score = tableResult.Score;

            paddleOcr.Detector.UnclipRatio = 1.2f;
            PaddleOcrResult result = paddleOcr.Run(src);

            src.Dispose();

            string html = tableResult.RebuildTable(result);
            textBox1.Text = html;
            System.IO.File.WriteAllText("table.html", html);

            webBrowser1.DocumentText = html;
        }
    }
}
 

  1. using OpenCvSharp.Extensions;
  2. using OpenCvSharp;
  3. using Sdcb.PaddleInference;
  4. using Sdcb.PaddleOCR;
  5. using Sdcb.PaddleOCR.Models;
  6. using Sdcb.PaddleOCR.Models.Details;
  7. using System;
  8. using System.Collections.Generic;
  9. using System.Drawing;
  10. using System.Windows.Forms;
  11. namespace PaddleInference_OCR_表格识别
  12. {
  13. public partial class Form1 : Form
  14. {
  15. public Form1()
  16. {
  17. InitializeComponent();
  18. }
  19. Bitmap bmp;
  20. string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
  21. string img = "";
  22. string startupPath = "";
  23. private void button1_Click(object sender, EventArgs e)
  24. {
  25. OpenFileDialog ofd = new OpenFileDialog();
  26. ofd.Filter = fileFilter;
  27. if (ofd.ShowDialog() != DialogResult.OK) return;
  28. pictureBox1.Image = null;
  29. img = ofd.FileName;
  30. bmp = new Bitmap(img);
  31. pictureBox1.Image = new Bitmap(img);
  32. }
  33. TableRecognitionModel tableModel;
  34. PaddleOcrTableRecognizer tableRec;
  35. FullOcrModel model;
  36. PaddleOcrAll paddleOcr;
  37. private void Form1_Load(object sender, EventArgs e)
  38. {
  39. startupPath = System.Windows.Forms.Application.StartupPath;
  40. string table_directoryPath = startupPath + "\\ch_ppstructure_mobile_v2.0_SLANet";
  41. string table_labelFilePath = startupPath + "\\table_structure_dict_ch.txt";
  42. tableModel = new FileTableRecognizationModel(table_directoryPath, table_labelFilePath);
  43. tableRec = new PaddleOcrTableRecognizer(tableModel);
  44. string detectionModelDir = startupPath + "\\ch_PP-OCRv3_det";
  45. string classificationModelDir = startupPath + "\\ch_ppocr_mobile_v2.0_cls";
  46. string recognitionModelDir = startupPath + "\\ch_PP-OCRv3_rec";
  47. string labelFilePath = startupPath + "\\ppocr_keys_v1.txt";
  48. model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);
  49. paddleOcr = new PaddleOcrAll(model, PaddleDevice.Mkldnn());
  50. paddleOcr.AllowRotateDetection = true; /* 允许识别有角度的文字 */
  51. paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */
  52. }
  53. TableDetectionResult tableResult;
  54. private void button2_Click(object sender, EventArgs e)
  55. {
  56. if (pictureBox1.Image == null)
  57. {
  58. return;
  59. }
  60. Mat src = Cv2.ImRead(img);
  61. tableResult = tableRec.Run(src);
  62. List<TableCellBox> ltCellBox = tableResult.StructureBoxes;
  63. foreach (TableCellBox item in ltCellBox)
  64. {
  65. Scalar scalar = Scalar.RandomColor();
  66. Cv2.Rectangle(src, item.Rect, scalar);
  67. }
  68. //Cv2.ImShow("src", src);
  69. //Cv2.ImWrite("src.jpg", src);
  70. pictureBox1.Image = BitmapConverter.ToBitmap(src);
  71. //List<string> ltTags = tableResult.HtmlTags;
  72. //float score = tableResult.Score;
  73. paddleOcr.Detector.UnclipRatio = 1.2f;
  74. PaddleOcrResult result = paddleOcr.Run(src);
  75. src.Dispose();
  76. string html = tableResult.RebuildTable(result);
  77. textBox1.Text = html;
  78. System.IO.File.WriteAllText("table.html", html);
  79. webBrowser1.DocumentText = html;
  80. }
  81. }
  82. }

下载 

Demo下载

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号