赞
踩
1、检查目录下所有文件的编码格式(递归)
2、将不是utf8的格式转换为UTF8格式
.config
- <appSettings>
- <add key="ExceptList" value=".git,.nuget,.vs,.dll,.pdb, .png,.jpg,.gif ,.log,.eot, .ttf, .woff, .swf, packages , ReferenceDLL ,.cache,.xls, .xlsx,.doc,.docx,favicon.ico,_references.js,.exe" />
- <add key="isChangeToUTF8" value="true" />
- </appSettings>
- public partial class Form1 : Form
- {
- public Form1()
- {
- InitializeComponent();
- this.txt_url.Text = "G:\\WorkSpace\\messagecenter";
- }
-
- private static List<string> ExceptList = new List<string>();
-
- private static bool isChangeToUTF8 = false;
-
- private List<string> ResultList=new List<string>();
-
- static Form1()
- {
- System.Configuration.AppSettingsReader appReader = new System.Configuration.AppSettingsReader();
- string strExcept= Convert.ToString(appReader.GetValue("ExceptList", typeof(string)));
- string strIsChange = Convert.ToString(appReader.GetValue("isChangeToUTF8", typeof(string)));
-
- if (!string.IsNullOrEmpty(strExcept))
- {
- var tempExcept = strExcept.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
- foreach (var temp in tempExcept)
- {
- ExceptList.Add(temp.Trim());
- }
-
- }
- if (!string.IsNullOrEmpty(strIsChange) && strIsChange.ToUpper()=="TRUE")
- {
- isChangeToUTF8 = true;
- }
- }
-
-
- private void btn_start_Click(object sender, EventArgs e)
- {
- ResultList = new List<string>();
- string myPath = txt_url.Text;
- if (string.IsNullOrEmpty(myPath))
- {
- MessageBox.Show("PATH?");
- return;
- }
-
- List<string> pathList = new List<string>();
- try
- {
- //查找目录下的所有文件
- GetDirectory(myPath, pathList);
- foreach (var path in pathList)
- {
- //获取文件编码格式
- var type = EncodingType.GetType(path, ResultList);
- if (type != Encoding.UTF8)
- {
- ResultList.Add($"{path},原格式{type.EncodingName}");
- if (isChangeToUTF8)
- {
- //转换编码格式至UTF8
- ChangeEncoding(path, type);
- }
- }
- }
- }
- catch (Exception exception)
- {
- MessageBox.Show(exception.ToString());
- return;
- }
-
- if (ResultList.Any())
- {
- ResultList.Insert(0,myPath);
- ResultList.Insert(1,$"共计文件{pathList.Count}个");
- ResultList.Insert(2,$"非UTF-8文件共{ResultList.Count-2}个");
- byte[] myByte = System.Text.Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, ResultList.ToArray()));
- using (FileStream fsWrite = new FileStream($"D:\\result{DateTime.Now.ToString("yyyyMMddHHmmss")}.txt", FileMode.Append))
- {
- fsWrite.Write(myByte, 0, myByte.Length);
- };
- MessageBox.Show("D:\\result.txt");
- }
- else
- {
- ResultList.Insert(0, myPath);
- ResultList.Insert(1, $"共计文件{pathList.Count}个");
- ResultList.Insert(2, $"非UTF-8文件共{ResultList.Count - 2}个");
- byte[] myByte = System.Text.Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, ResultList.ToArray()));
- using (FileStream fsWrite = new FileStream($"D:\\success{DateTime.Now.ToString("yyyyMMddHHmmss")}.txt", FileMode.Append))
- {
- fsWrite.Write(myByte, 0, myByte.Length);
- };
- MessageBox.Show("finish!");
- }
- }
-
-
- private void GetDirectory(string path, List<string> list)
- {
- DirectoryInfo folder = new DirectoryInfo(path);
- GetFile(path,list);
- foreach (var directory in folder.GetDirectories())
- {
- if (!ExceptList.Contains(directory.Name))
- {
- string childPath = $"{path}\\{directory.Name}";
- GetDirectory(childPath, list);
- }
-
- }
- }
- private void GetFile(string path, List<string> list)
- {
- DirectoryInfo folder = new DirectoryInfo(path);
- foreach (FileInfo file in folder.GetFiles())
- {
- if (!ExceptList.Any(e => file.Name.EndsWith(e)))
- {
- list.Add($"{path}\\{file.Name}");
- }
-
- }
- }
-
-
- private void ChangeEncoding(string filename , System.Text.Encoding encoding)
- {
- System.IO.FileStream fs = new System.IO.FileStream(filename, System.IO.FileMode.Open, System.IO.FileAccess.Read);
- byte[] flieByte = new byte[fs.Length];
- fs.Read(flieByte, 0, flieByte.Length);
- fs.Close();
-
- StreamWriter docWriter;
- System.Text.Encoding ec = System.Text.Encoding.GetEncoding("UTF-8");
- docWriter = new StreamWriter(filename, false, ec);
- docWriter.Write(encoding.GetString(flieByte));
- docWriter.Close();
- }
-
- }
- /// <summary>
- /// 获取文件的编码格式
- /// </summary>
- public class EncodingType
- {
- /// <summary>
- /// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
- /// </summary>
- /// <param name=“FILE_NAME“>文件路径</param>
- /// <returns>文件的编码类型</returns>
- public static System.Text.Encoding GetType(string FILE_NAME ,List<string> ResultList)
- {
- FileStream fs = new FileStream(FILE_NAME, FileMode.Open, FileAccess.Read);
- Encoding r = GetType(fs, FILE_NAME, ResultList);
- fs.Close();
- return r;
- }
-
- /// <summary>
- /// 通过给定的文件流,判断文件的编码类型
- /// </summary>
- /// <param name=“fs“>文件流</param>
- /// <returns>文件的编码类型</returns>
- public static System.Text.Encoding GetType(FileStream fs, string FILE_NAME , List<string> ResultList)
- {
- //byte[] Unicode = new byte[] { 0xFF, 0xFE, 0x41 };
- //byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
- //byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; //带BOM
- Encoding reVal = Encoding.Default;
- BinaryReader r = new BinaryReader(fs, System.Text.Encoding.Default);
- int i;
- int.TryParse(fs.Length.ToString(), out i);
- byte[] ss = r.ReadBytes(i);
- if (IsUTF8Bytes(ss, FILE_NAME, ResultList) || (ss.Length > 3 && ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF))
- {
- reVal = Encoding.UTF8;
- }
- else if (ss.Length > 3 && ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)
- {
- reVal = Encoding.BigEndianUnicode;
- }
- else if (ss.Length>3 && ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)
- {
- reVal = Encoding.Unicode;
- }
- r.Close();
- return reVal;
-
- }
-
- /// <summary>
- /// 判断是否是不带 BOM 的 UTF8 格式
- /// </summary>
- /// <param name=“data“></param>
- /// <returns></returns>
- private static bool IsUTF8Bytes(byte[] data, string FILE_NAME, List<string> ResultList)
- {
- int charByteCounter = 1; //计算当前正分析的字符应还有的字节数
- byte curByte; //当前分析的字节.
- for (int i = 0; i < data.Length; i++)
- {
- curByte = data[i];
- if (charByteCounter == 1)
- {
- if (curByte >= 0x80)
- {
- //判断当前
- while (((curByte <<= 1) & 0x80) != 0)
- {
- charByteCounter++;
- }
- //标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
- if (charByteCounter == 1 || charByteCounter > 6)
- {
- return false;
- }
- }
- }
- else
- {
- //若是UTF-8 此时第一位必须为1
- if ((curByte & 0xC0) != 0x80)
- {
- return false;
- }
- charByteCounter--;
- }
- }
- if (charByteCounter > 1)
- {
- ResultList.Add($"{FILE_NAME},异常:非预期的byte格式,无法判断是否是UTF8(不带BOM)格式,已跳过");
- //throw new Exception("非预期的byte格式");
- }
- return true;
- }
-
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。