当前位置:   article > 正文

C#检查目录下所有文件的编码格式并转换为UTF8格式_c# 更改默认编码格式不是“utf8”

c# 更改默认编码格式不是“utf8”

1、检查目录下所有文件的编码格式(递归)

2、将不是utf8的格式转换为UTF8格式


.config

  1. <appSettings>
  2. <add key="ExceptList" value=".git,.nuget,.vs,.dll,.pdb, .png,.jpg,.gif ,.log,.eot, .ttf, .woff, .swf, packages , ReferenceDLL ,.cache,.xls, .xlsx,.doc,.docx,favicon.ico,_references.js,.exe" />
  3. <add key="isChangeToUTF8" value="true" />
  4. </appSettings>


  1. public partial class Form1 : Form
  2. {
  3. public Form1()
  4. {
  5. InitializeComponent();
  6. this.txt_url.Text = "G:\\WorkSpace\\messagecenter";
  7. }
  8. private static List<string> ExceptList = new List<string>();
  9. private static bool isChangeToUTF8 = false;
  10. private List<string> ResultList=new List<string>();
  11. static Form1()
  12. {
  13. System.Configuration.AppSettingsReader appReader = new System.Configuration.AppSettingsReader();
  14. string strExcept= Convert.ToString(appReader.GetValue("ExceptList", typeof(string)));
  15. string strIsChange = Convert.ToString(appReader.GetValue("isChangeToUTF8", typeof(string)));
  16. if (!string.IsNullOrEmpty(strExcept))
  17. {
  18. var tempExcept = strExcept.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
  19. foreach (var temp in tempExcept)
  20. {
  21. ExceptList.Add(temp.Trim());
  22. }
  23. }
  24. if (!string.IsNullOrEmpty(strIsChange) && strIsChange.ToUpper()=="TRUE")
  25. {
  26. isChangeToUTF8 = true;
  27. }
  28. }
  29. private void btn_start_Click(object sender, EventArgs e)
  30. {
  31. ResultList = new List<string>();
  32. string myPath = txt_url.Text;
  33. if (string.IsNullOrEmpty(myPath))
  34. {
  35. MessageBox.Show("PATH?");
  36. return;
  37. }
  38. List<string> pathList = new List<string>();
  39. try
  40. {
  41. //查找目录下的所有文件
  42. GetDirectory(myPath, pathList);
  43. foreach (var path in pathList)
  44. {
  45. //获取文件编码格式
  46. var type = EncodingType.GetType(path, ResultList);
  47. if (type != Encoding.UTF8)
  48. {
  49. ResultList.Add($"{path},原格式{type.EncodingName}");
  50. if (isChangeToUTF8)
  51. {
  52. //转换编码格式至UTF8
  53. ChangeEncoding(path, type);
  54. }
  55. }
  56. }
  57. }
  58. catch (Exception exception)
  59. {
  60. MessageBox.Show(exception.ToString());
  61. return;
  62. }
  63. if (ResultList.Any())
  64. {
  65. ResultList.Insert(0,myPath);
  66. ResultList.Insert(1,$"共计文件{pathList.Count}个");
  67. ResultList.Insert(2,$"非UTF-8文件共{ResultList.Count-2}个");
  68. byte[] myByte = System.Text.Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, ResultList.ToArray()));
  69. using (FileStream fsWrite = new FileStream($"D:\\result{DateTime.Now.ToString("yyyyMMddHHmmss")}.txt", FileMode.Append))
  70. {
  71. fsWrite.Write(myByte, 0, myByte.Length);
  72. };
  73. MessageBox.Show("D:\\result.txt");
  74. }
  75. else
  76. {
  77. ResultList.Insert(0, myPath);
  78. ResultList.Insert(1, $"共计文件{pathList.Count}个");
  79. ResultList.Insert(2, $"非UTF-8文件共{ResultList.Count - 2}个");
  80. byte[] myByte = System.Text.Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, ResultList.ToArray()));
  81. using (FileStream fsWrite = new FileStream($"D:\\success{DateTime.Now.ToString("yyyyMMddHHmmss")}.txt", FileMode.Append))
  82. {
  83. fsWrite.Write(myByte, 0, myByte.Length);
  84. };
  85. MessageBox.Show("finish!");
  86. }
  87. }
  88. private void GetDirectory(string path, List<string> list)
  89. {
  90. DirectoryInfo folder = new DirectoryInfo(path);
  91. GetFile(path,list);
  92. foreach (var directory in folder.GetDirectories())
  93. {
  94. if (!ExceptList.Contains(directory.Name))
  95. {
  96. string childPath = $"{path}\\{directory.Name}";
  97. GetDirectory(childPath, list);
  98. }
  99. }
  100. }
  101. private void GetFile(string path, List<string> list)
  102. {
  103. DirectoryInfo folder = new DirectoryInfo(path);
  104. foreach (FileInfo file in folder.GetFiles())
  105. {
  106. if (!ExceptList.Any(e => file.Name.EndsWith(e)))
  107. {
  108. list.Add($"{path}\\{file.Name}");
  109. }
  110. }
  111. }
  112. private void ChangeEncoding(string filename , System.Text.Encoding encoding)
  113. {
  114. System.IO.FileStream fs = new System.IO.FileStream(filename, System.IO.FileMode.Open, System.IO.FileAccess.Read);
  115. byte[] flieByte = new byte[fs.Length];
  116. fs.Read(flieByte, 0, flieByte.Length);
  117. fs.Close();
  118. StreamWriter docWriter;
  119. System.Text.Encoding ec = System.Text.Encoding.GetEncoding("UTF-8");
  120. docWriter = new StreamWriter(filename, false, ec);
  121. docWriter.Write(encoding.GetString(flieByte));
  122. docWriter.Close();
  123. }
  124. }


  1. /// <summary>
  2. /// 获取文件的编码格式
  3. /// </summary>
  4. public class EncodingType
  5. {
  6. /// <summary>
  7. /// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
  8. /// </summary>
  9. /// <param name=“FILE_NAME“>文件路径</param>
  10. /// <returns>文件的编码类型</returns>
  11. public static System.Text.Encoding GetType(string FILE_NAME ,List<string> ResultList)
  12. {
  13. FileStream fs = new FileStream(FILE_NAME, FileMode.Open, FileAccess.Read);
  14. Encoding r = GetType(fs, FILE_NAME, ResultList);
  15. fs.Close();
  16. return r;
  17. }
  18. /// <summary>
  19. /// 通过给定的文件流,判断文件的编码类型
  20. /// </summary>
  21. /// <param name=“fs“>文件流</param>
  22. /// <returns>文件的编码类型</returns>
  23. public static System.Text.Encoding GetType(FileStream fs, string FILE_NAME , List<string> ResultList)
  24. {
  25. //byte[] Unicode = new byte[] { 0xFF, 0xFE, 0x41 };
  26. //byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
  27. //byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; //带BOM
  28. Encoding reVal = Encoding.Default;
  29. BinaryReader r = new BinaryReader(fs, System.Text.Encoding.Default);
  30. int i;
  31. int.TryParse(fs.Length.ToString(), out i);
  32. byte[] ss = r.ReadBytes(i);
  33. if (IsUTF8Bytes(ss, FILE_NAME, ResultList) || (ss.Length > 3 && ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF))
  34. {
  35. reVal = Encoding.UTF8;
  36. }
  37. else if (ss.Length > 3 && ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)
  38. {
  39. reVal = Encoding.BigEndianUnicode;
  40. }
  41. else if (ss.Length>3 && ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)
  42. {
  43. reVal = Encoding.Unicode;
  44. }
  45. r.Close();
  46. return reVal;
  47. }
  48. /// <summary>
  49. /// 判断是否是不带 BOM 的 UTF8 格式
  50. /// </summary>
  51. /// <param name=“data“></param>
  52. /// <returns></returns>
  53. private static bool IsUTF8Bytes(byte[] data, string FILE_NAME, List<string> ResultList)
  54. {
  55. int charByteCounter = 1; //计算当前正分析的字符应还有的字节数
  56. byte curByte; //当前分析的字节.
  57. for (int i = 0; i < data.Length; i++)
  58. {
  59. curByte = data[i];
  60. if (charByteCounter == 1)
  61. {
  62. if (curByte >= 0x80)
  63. {
  64. //判断当前
  65. while (((curByte <<= 1) & 0x80) != 0)
  66. {
  67. charByteCounter++;
  68. }
  69. //标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
  70. if (charByteCounter == 1 || charByteCounter > 6)
  71. {
  72. return false;
  73. }
  74. }
  75. }
  76. else
  77. {
  78. //若是UTF-8 此时第一位必须为1
  79. if ((curByte & 0xC0) != 0x80)
  80. {
  81. return false;
  82. }
  83. charByteCounter--;
  84. }
  85. }
  86. if (charByteCounter > 1)
  87. {
  88. ResultList.Add($"{FILE_NAME},异常:非预期的byte格式,无法判断是否是UTF8(不带BOM)格式,已跳过");
  89. //throw new Exception("非预期的byte格式");
  90. }
  91. return true;
  92. }
  93. }


声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Monodyee/article/detail/253389
推荐阅读
  

闽ICP备14008679号