当前位置:   article > 正文

C#使用Spire.OCR框架识别图片中的字母,数字,文字等

spire.ocr

OCR

OCR(optical character recognition),光学字符识别。

      OCR文字识别是指电子设备(例如扫描仪或数码相机)检查纸上打印的字符,然后用字符识别方法将形状翻译成计算机文字的过程;即,对文本资料进行扫描,然后对图像文件进行分析处理,获取文字及版面信息的过程。
      如何除错或利用辅助信息提高识别正确率,是OCR最重要的课题。
      衡量一个OCR系统性能好坏的主要指标有:拒识率、误识率、识别速度、用户界面的友好性,产品的稳定性,易用性及可行性等。
      OCR技术的实现,总体上可以分为五步:预处理图片、切割字符、识别字符、恢复版面、后处理文字。中间的三步是核心,头尾两步最难。

C#中使用Spire.OCR框架可以识别图片中的字母、文字、数字等。

一、Demo测试

VS2019中新建窗体应用程序ImageRecognitionTextDemo,将默认的Form1重命名为FormImageRecognitionText。

右键项目属性

选择目标平台为X64 

右键项目 ImageRecognitionTextDemo,选择【管理NuGet程序包】 ,搜索Spire.OCR

安装Nuget包完成

将下载如下非托管dll放在应用程序的debug下,或者 复制到项目根目录下,选择始终复制。

 注意托管Spire.OCR.dll需要自动引用如上六个框架包,因此Spire.OCR.dll和六个框架包必须在同一路径下。

二、窗体FormImageRecognitionText设计器代码如下

文件 FormImageRecognitionText.Designer.cs源程序如下:

  1. namespace ImageRecognitionTextDemo
  2. {
  3. partial class FormImageRecognitionText
  4. {
  5. /// <summary>
  6. /// 必需的设计器变量。
  7. /// </summary>
  8. private System.ComponentModel.IContainer components = null;
  9. /// <summary>
  10. /// 清理所有正在使用的资源。
  11. /// </summary>
  12. /// <param name="disposing">如果应释放托管资源,为 true;否则为 false。</param>
  13. protected override void Dispose(bool disposing)
  14. {
  15. if (disposing && (components != null))
  16. {
  17. components.Dispose();
  18. }
  19. base.Dispose(disposing);
  20. }
  21. #region Windows 窗体设计器生成的代码
  22. /// <summary>
  23. /// 设计器支持所需的方法 - 不要修改
  24. /// 使用代码编辑器修改此方法的内容。
  25. /// </summary>
  26. private void InitializeComponent()
  27. {
  28. this.pictureBox1 = new System.Windows.Forms.PictureBox();
  29. this.rtxbDisplay = new System.Windows.Forms.RichTextBox();
  30. this.btnOpen = new System.Windows.Forms.Button();
  31. this.btnRecognize = new System.Windows.Forms.Button();
  32. ((System.ComponentModel.ISupportInitialize)(this.pictureBox1)).BeginInit();
  33. this.SuspendLayout();
  34. //
  35. // pictureBox1
  36. //
  37. this.pictureBox1.BorderStyle = System.Windows.Forms.BorderStyle.FixedSingle;
  38. this.pictureBox1.Location = new System.Drawing.Point(12, 12);
  39. this.pictureBox1.Name = "pictureBox1";
  40. this.pictureBox1.Size = new System.Drawing.Size(400, 400);
  41. this.pictureBox1.TabIndex = 0;
  42. this.pictureBox1.TabStop = false;
  43. //
  44. // rtxbDisplay
  45. //
  46. this.rtxbDisplay.Location = new System.Drawing.Point(418, 12);
  47. this.rtxbDisplay.Name = "rtxbDisplay";
  48. this.rtxbDisplay.ReadOnly = true;
  49. this.rtxbDisplay.Size = new System.Drawing.Size(562, 715);
  50. this.rtxbDisplay.TabIndex = 1;
  51. this.rtxbDisplay.Text = "";
  52. //
  53. // btnOpen
  54. //
  55. this.btnOpen.Font = new System.Drawing.Font("宋体", 13F, System.Drawing.FontStyle.Bold);
  56. this.btnOpen.Location = new System.Drawing.Point(51, 439);
  57. this.btnOpen.Name = "btnOpen";
  58. this.btnOpen.Size = new System.Drawing.Size(104, 56);
  59. this.btnOpen.TabIndex = 2;
  60. this.btnOpen.Text = "打开图片";
  61. this.btnOpen.UseVisualStyleBackColor = true;
  62. this.btnOpen.Click += new System.EventHandler(this.btnOpen_Click);
  63. //
  64. // btnRecognize
  65. //
  66. this.btnRecognize.Font = new System.Drawing.Font("宋体", 13F, System.Drawing.FontStyle.Bold);
  67. this.btnRecognize.Location = new System.Drawing.Point(217, 439);
  68. this.btnRecognize.Name = "btnRecognize";
  69. this.btnRecognize.Size = new System.Drawing.Size(104, 56);
  70. this.btnRecognize.TabIndex = 3;
  71. this.btnRecognize.Text = "识别图片";
  72. this.btnRecognize.UseVisualStyleBackColor = true;
  73. this.btnRecognize.Click += new System.EventHandler(this.btnRecognize_Click);
  74. //
  75. // FormImageRecognitionText
  76. //
  77. this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 12F);
  78. this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
  79. this.ClientSize = new System.Drawing.Size(982, 739);
  80. this.Controls.Add(this.btnRecognize);
  81. this.Controls.Add(this.btnOpen);
  82. this.Controls.Add(this.rtxbDisplay);
  83. this.Controls.Add(this.pictureBox1);
  84. this.Name = "FormImageRecognitionText";
  85. this.Text = "图像识别文本框架OCR";
  86. this.Load += new System.EventHandler(this.FormImageRecognitionText_Load);
  87. ((System.ComponentModel.ISupportInitialize)(this.pictureBox1)).EndInit();
  88. this.ResumeLayout(false);
  89. }
  90. #endregion
  91. private System.Windows.Forms.PictureBox pictureBox1;
  92. private System.Windows.Forms.RichTextBox rtxbDisplay;
  93. private System.Windows.Forms.Button btnOpen;
  94. private System.Windows.Forms.Button btnRecognize;
  95. }
  96. }

三、FormImageRecognitionText窗体源程序如下:

程序文件FormImageRecognitionText.cs

  1. using System;
  2. using System.Collections.Generic;
  3. using System.ComponentModel;
  4. using System.Data;
  5. using System.Drawing;
  6. using System.IO;
  7. using System.Linq;
  8. using System.Text;
  9. using System.Threading.Tasks;
  10. using System.Windows.Forms;
  11. using Spire.OCR;
  12. namespace ImageRecognitionTextDemo
  13. {
  14. public partial class FormImageRecognitionText : Form
  15. {
  16. public FormImageRecognitionText()
  17. {
  18. InitializeComponent();
  19. }
  20. private void FormImageRecognitionText_Load(object sender, EventArgs e)
  21. {
  22. //C# .NET实现扫描识别图片中的文字 Spire.OCR
  23. //OCR:光学字符识别。
  24. /*
  25. * OCR(optical character recognition)文字识别是指电子设备(例如扫描仪或数码相机)检查纸上打印的字符,
  26. * 然后用字符识别方法将形状翻译成计算机文字的过程;即,对文本资料进行扫描,然后对图像文件进行分析处理,获取文字及版面信息的过程。
  27. * 如何除错或利用辅助信息提高识别正确率,是OCR最重要的课题。
  28. * 衡量一个OCR系统性能好坏的主要指标有:拒识率、误识率、识别速度、用户界面的友好性,产品的稳定性,易用性及可行性等。
  29. * OCR技术的实现,总体上可以分为五步:预处理图片、切割字符、识别字符、恢复版面、后处理文字。中间的三步是核心,头尾两步最难。
  30. * 百家号技术:
  31. * https://baijiahao.baidu.com/s?id=1744946979174786023&wfr=spider&for=pc
  32. */
  33. pictureBox1.BackgroundImageLayout = ImageLayout.Zoom;
  34. }
  35. /// <summary>
  36. /// 显示文本框内容
  37. /// </summary>
  38. /// <param name="message"></param>
  39. private void DisplayContent(string message)
  40. {
  41. this.BeginInvoke(new Action(() =>
  42. {
  43. if (rtxbDisplay.TextLength > 10240)
  44. {
  45. rtxbDisplay.Clear();
  46. }
  47. rtxbDisplay.AppendText($"{DateTime.Now.ToString("HH:mm:ss.fff")}->{message}\n");
  48. rtxbDisplay.ScrollToCaret();
  49. }));
  50. Application.DoEvents();
  51. }
  52. /// <summary>
  53. /// 显示OCR处理结果
  54. /// </summary>
  55. /// <param name="ocrScanner"></param>
  56. /// <param name="stopwatch"></param>
  57. /// <param name="result"></param>
  58. private void DisplayProcessResultOCR(OcrScanner ocrScanner, System.Diagnostics.Stopwatch stopwatch, bool result)
  59. {
  60. IOCRText text = ocrScanner.Text;
  61. IOCRTextBlock[] ocrTextBlocks = text.Blocks;
  62. stopwatch.Stop();
  63. DisplayContent($"识别图片完成,耗时【{stopwatch.Elapsed.TotalMilliseconds}】ms:识别结果【{result}】,文本块个数【{ocrTextBlocks.Length}】");
  64. for (int i = 0; i < ocrTextBlocks.Length; i++)
  65. {
  66. DisplayContent($"第【{(i + 1).ToString("D2")}】个文本块:");
  67. DisplayContent($" Text:【{ocrTextBlocks[i].Text}】");
  68. DisplayContent($" Confidence:【{ocrTextBlocks[i].Confidence}】");
  69. DisplayContent($" Level:【{ocrTextBlocks[i].Level}】");
  70. DisplayContent($" IsTruncated:【{ocrTextBlocks[i].IsTruncated}】");
  71. DisplayContent($" Box:【{ocrTextBlocks[i].Box}】");
  72. //DisplayContent($" TextBlock:【{ocrTextBlocks[i].TextBlock == null}】");
  73. }
  74. MessageBox.Show(text.ToString());
  75. }
  76. private async void btnOpen_Click(object sender, EventArgs e)
  77. {
  78. OpenFileDialog openFileDialog = new OpenFileDialog();
  79. openFileDialog.Filter = "jpeg|*.jpg|bmp|*.bmp|gif|*.gif|png|*.png|tiff|*.tiff|All|*.*";
  80. if (openFileDialog.ShowDialog() == DialogResult.OK)
  81. {
  82. string fileName = openFileDialog.FileName;
  83. pictureBox1.BackgroundImage = Image.FromFile(fileName);
  84. await Task.Run(new Action(() =>
  85. {
  86. OcrScanner ocrScanner = new OcrScanner();
  87. System.Diagnostics.Stopwatch stopwatch = System.Diagnostics.Stopwatch.StartNew();
  88. DisplayContent($"开始识别图片:【{fileName}】");
  89. bool result = ocrScanner.Scan(fileName);
  90. DisplayProcessResultOCR(ocrScanner, stopwatch, result);
  91. }));
  92. }
  93. }
  94. private async void btnRecognize_Click(object sender, EventArgs e)
  95. {
  96. MemoryStream memoryStream = new MemoryStream();
  97. pictureBox1.BackgroundImage.Save(memoryStream, pictureBox1.BackgroundImage.RawFormat);
  98. await Task.Run(new Action(() =>
  99. {
  100. OcrScanner ocrScanner = new OcrScanner();
  101. System.Diagnostics.Stopwatch stopwatch = System.Diagnostics.Stopwatch.StartNew();
  102. DisplayContent($"开始识别图片:图片大小【{memoryStream.Length / 1024}】KB");
  103. OCRImageFormat imageFormat;
  104. Enum.TryParse(pictureBox1.BackgroundImage.RawFormat.ToString(), true, out imageFormat);
  105. bool result = ocrScanner.Scan(memoryStream, imageFormat);
  106. DisplayProcessResultOCR(ocrScanner, stopwatch, result);
  107. }));
  108. }
  109. }
  110. }

四、测试运行如下:

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/知新_RL/article/detail/219359
推荐阅读
相关标签
  

闽ICP备14008679号