赞
踩
// Coder.cpp: implementation of the Coder class.
//
//
#include "stdafx.h"
#include "Coder.h"
#include "Encoding.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//
// Construction/Destruction
//
//初始化文件头常量
/*static*/ const byte Coder::UNICODEBOM[2]={0xFF,0xFE};
/*static*/ const byte Coder::UNICODEBEBOM[2]={0xFE,0xFF};
/*static*/ const byte Coder::UTF8BOM[3]={0xEF,0xBB,0xBF};
Coder::Coder()
{
PREDEFINEDSIZE=2097152;//默认一次转换字节大小 2M字节
}
Coder::~Coder()
{
}
//繁体中文BIG5 转换成 简体中文 GB2312
char* Coder::BIG5ToGB2312(const char* szBIG5Str)
{
CString msg;
LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);
wchar_t* szUnicodeBuff =MByteToWChar(CP_BIG5,szBIG5Str);
char* szGB2312Buff =WCharToMByte(CP_GB2312,szUnicodeBuff);
int nLength = LCMapString(lcid,LCMAP_SIMPLIFIED_CHINESE, szGB2312Buff,-1,NULL,0);
char* pBuffer = new char[nLength + 1];
if(!pBuffer)
return NULL;
memset(pBuffer,0,sizeof(char)*(nLength+1));
LCMapString(0x0804,LCMAP_SIMPLIFIED_CHINESE,szGB2312Buff,-1,pBuffer,nLength);
delete[] szUnicodeBuff;
delete[] szGB2312Buff;
return pBuffer;
}
// GB2312 转 GBK
char* Coder::GB2312ToGBK(const char *szGB2312Str)
{
int nStrLen = strlen(szGB2312Str);
if(!nStrLen)
return NULL;
LCID wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
int nReturn = LCMapString(wLCID, LCMAP_TRADITIONAL_CHINESE, szGB2312Str, nStrLen, NULL, 0);
if(!nReturn)
return NULL;
char *pcBuf = new char[nReturn + 1];
if(!pcBuf)
return NULL;
memset(pcBuf,0,sizeof(char)*(nReturn + 1));
wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
LCMapString(wLCID, LCMAP_TRADITIONAL_CHINESE, szGB2312Str, nReturn, pcBuf, nReturn);
return pcBuf;
}
// GBK 转换成 GB2312
char* Coder::GBKToGB2312(const char *szGBKStr)
{
int nStrLen = strlen(szGBKStr);
if(!nStrLen)
return NULL;
LCID wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_BIG5);
int nReturn = LCMapString(wLCID, LCMAP_SIMPLIFIED_CHINESE, szGBKStr, nStrLen, NULL, 0);
if(!nReturn)
return NULL;
char *pcBuf = new char[nReturn + 1];
memset(pcBuf,0,sizeof(char)*(nReturn + 1));
wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_BIG5);
LCMapString(wLCID, LCMAP_SIMPLIFIED_CHINESE, szGBKStr, nReturn, pcBuf, nReturn);
return pcBuf;
}
//简繁中文GBK转换成繁体中文Big5
char* Coder::GBKToBIG5(const char *szGBKStr)
{
char *pTemp=NULL;
char *pBuffer=NULL;
pTemp=GBKToGB2312(szGBKStr);
pBuffer=GB2312ToBIG5(pTemp);
delete[] pTemp;
return pBuffer;
}
//繁体中文BIG5转换到简繁中文GBK
char* Coder::BIG5ToGBK(const char *szBIG5Str)
{
char *pTemp=NULL;
char *pBuffer=NULL;
pTemp=BIG5ToGB2312(szBIG5Str);
pBuffer=GB2312ToGBK(pTemp);
delete[] pTemp;
return pBuffer;
}
//简体中文 GB2312 转换成 繁体中文BIG5
char* Coder::GB2312ToBIG5(const char* szGB2312Str)
{
LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);
int nLength = LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGB2312Str,-1,NULL,0);
char* pBuffer=new char[nLength+1];
if(!pBuffer)
return NULL;
LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGB2312Str,-1,pBuffer,nLength);
pBuffer[nLength]=0;
wchar_t* pUnicodeBuff = MByteToWChar(CP_GB2312,pBuffer);
char* pBIG5Buff = WCharToMByte(CP_BIG5,pUnicodeBuff);
delete[] pBuffer;
delete[] pUnicodeBuff;
return pBIG5Buff;
}
//获取文件编码类型
//Unicode编码文件通过读取文件头判别
//中文编码通过统计文件编码类别来判别 判别次数最多为30次
//中文编码的判别存在误差
TextCode Coder::GetCodeType(CString filepath)
{
CFile file;
byte buf[3];//unsigned char
TextCode tctemp;
if(file.Open(filepath,CFile::modeRead))
{
file.Read(buf,3);
if(buf[0]==UTF8BOM[0] && buf[1]==UTF8BOM[1] && buf[2]==UTF8BOM[2])
return UTF8;
else
if(buf[0]==UNICODEBOM[0] &&buf[1]==UNICODEBOM[1] )
return UNICODE ;
else
if(buf[0]==UNICODEBEBOM[0] &&buf[1]==UNICODEBEBOM[1] )
return UNICODEBIGENDIAN;
else
{
int time=30;
while(file.Read(buf,2) &&time )
{
if ( (buf[0]>=176 && buf[0]<=247) && (buf[1]>=160 && buf[1]<=254) )
tctemp=GB2312;
else
if ( (buf[0]>=129 && buf[0]<=255) && ( ( buf[1]>=64 && buf[1]<=126) || ( buf[1]>=161 && buf[1]<=254) ) )
tctemp=BIG5;
else
if ( (buf[0]>=129 && buf[0] <=254) && (buf[1]>=64 && buf[1]<=254))
tctemp=GBK;
time--;
file.Seek(100,CFile::current);//跳过一定字节 利于统计全文
}
return tctemp;
}
}
else
return GB2312;
}
//多字节文件转换为UNICODE、UNICODE big endian文件
BOOL Coder::MBFileToUnicodeFile(CString filesourcepath, CString filesavepath,TextCode tcTo,TextCode tcCur)
{
TextCode curtc;
CFile filesource,filesave;;
char *pChSrc=NULL;
char *pChTemp=NULL;
wchar_t *pwChDes=NULL;
DWORD filelength,readlen,len;
int bufferlen,strlength;
UINT CodePage;
//由于存在误差 允许用户自定义转换
if(tcCur!=DefaultCodeType)
curtc=tcCur;
else
curtc=GetCodeType(filesourcepath);
if(curtc>UTF8 || tcTo
return FALSE;
//源文件打开失败或者源文件无内容 后者保存文件建立失败 均返回转换失败
if(!filesource.Open(filesourcepath,CFile::modeRead) || 0==(filelength=filesource.GetLength()))
return FALSE;
if( !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))
return FALSE;
//预分配内存 分配失败则转换失败
if(filelength
bufferlen=filelength;
else
bufferlen=PREDEFINEDSIZE;
pChSrc=new char[bufferlen+1];
if(!pChSrc)
return FALSE;
//根据当前文件类别指定转换代码页
switch(curtc)
{
case GB2312:
CodePage=CP_GB2312;
break;
case GBK:
CodePage=CP_GB2312;//特殊处理
break;
case BIG5:
CodePage=CP_BIG5;
break;
case UTF8:
CodePage=CP_UTF8;
break;
default:
break;
}
//UTF8文件跳过文件
if(UTF8==curtc)
filesource.Seek(3*sizeof(byte),CFile::begin);
//写入文件头
if(UNICODEBIGENDIAN==tcTo)
filesave.Write(&UNICODEBEBOM,2*sizeof(byte));
else
filesave.Write(&UNICODEBOM,2*sizeof(byte));
//读取文件 分段转换知道结束
while(filelength>0)
{
memset(pChSrc,0, sizeof(char)*(bufferlen+1));
if(filelength>PREDEFINEDSIZE)
len=PREDEFINEDSIZE;
else
len=filelength;
readlen=filesource.Read(pChSrc,len);
if(!readlen)
break;
//GBK转换为GB2312处理
if(GBK==curtc)
{
pChTemp=pChSrc;
pChSrc=GBKToGB2312(pChSrc);
}
pwChDes=MByteToWChar(CodePage,pChSrc);
if(pwChDes)
{
if(UNICODEBIGENDIAN==tcTo)
UnicodeEndianConvert(pwChDes);
strlength=wcslen(pwChDes)*2;//这里注意写入文件的长度
filesave.Write(pwChDes,strlength);
filesave.Flush();
filelength-=readlen;
}
else
break;
}
delete[] pChSrc;
delete[] pChTemp;
delete[] pwChDes;
return TRUE;
}
//
wchar_t* Coder::MByteToWChar(UINT CodePage,LPCSTR lpcszSrcStr)
{
LPWSTR lpcwsStrDes=NULL;
int len=MultiByteToWideChar(CodePage,0,lpcszSrcStr,-1,NULL,0);
lpcwsStrDes=new wchar_t[len+1];
if(!lpcwsStrDes)
return NULL;
memset(lpcwsStrDes,0,sizeof(wchar_t)*(len+1));
len=MultiByteToWideChar(CodePage,0,lpcszSrcStr,-1,lpcwsStrDes,len);
if(len)
return lpcwsStrDes;
else
{
delete[] lpcwsStrDes;
return NULL;
}
}
char* Coder::WCharToMByte(UINT CodePage,LPCWSTR lpcwszSrcStr)
{
char* lpszDesStr=NULL;
int len=WideCharToMultiByte(CodePage,0,lpcwszSrcStr,-1,NULL,0,NULL,NULL);
lpszDesStr=new char[len+1];
memset(lpszDesStr,0,sizeof(char)*(len+1));
if(!lpszDesStr)
return NULL;
len=WideCharToMultiByte(CodePage,0,lpcwszSrcStr,-1,lpszDesStr,len,NULL,NULL);
if(len)
return lpszDesStr;
else
{
delete[] lpszDesStr;
return NULL;
}
}
//Unicode 和Unicode big endian之间字节序的转换
void Coder::UnicodeEndianConvert(LPWSTR lpwszstr)
{
wchar_t wchtemp[2];
long index;
int len=wcslen(lpwszstr);
if(!len)
return;
//交换高低字节 直到遇到结束符
index=0;
while( index
{
wchtemp[0]=lpwszstr[index];
wchtemp[1]=lpwszstr[index+1];
unsigned char high, low;
high = (wchtemp[0] & 0xFF00) >>8;
low = wchtemp[0] & 0x00FF;
wchtemp[0] = ( low <<8) | high;
high = (wchtemp[1] & 0xFF00) >>8;
low = wchtemp[1] & 0x00FF;
wchtemp[1] = ( low <<8) | high;
lpwszstr[index]=wchtemp[0];
lpwszstr[index+1]=wchtemp[1];
index+=2;
}
}
//Unicode和Unicode big endian文件向多字节文件转换
BOOL Coder::UnicodeFileToMBFile(CString filesourcepath, CString filesavepath,TextCode tcTo)
{
TextCode curtc;
CFile filesource,filesave;;
char *pChDes=NULL;
char *pChTemp=NULL;
wchar_t *pwChSrc=NULL;
DWORD filelength,readlen,len;
int bufferlen,strlength;
UINT CodePage;
curtc=GetCodeType(filesourcepath);
//文件转换类型错误 则转换失败
if(curtc<=UTF8 || tcTo>UTF8 || curtc==tcTo)
return FALSE;
//源文件打开失败或者源文件无内容 后者保存文件建立失败 均转换失败
if(!filesource.Open(filesourcepath,CFile::modeRead) || 0==(filelength=filesource.GetLength()))
return FALSE;
if( !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))
return FALSE;
//预分配内存 分配失败则转换失败
if(filelength
bufferlen=filelength;
else
bufferlen=PREDEFINEDSIZE;
pwChSrc=new wchar_t[(bufferlen/2)+1];
if(!pwChSrc)
return FALSE;
//预先决定代码页
switch(tcTo)
{
case GB2312:
CodePage=CP_GB2312;
break;
case GBK:
CodePage=CP_GB2312;//特殊处理
break;
case BIG5:
CodePage=CP_GB2312;//特殊处理
break;
case UTF8:
CodePage=CP_UTF8;
break;
default:
break;
}
filesource.Seek(sizeof(wchar_t),CFile::begin);
while(filelength>0)
{
memset(pwChSrc,0,sizeof(wchar_t)*((bufferlen/2)+1));
if(filelength>PREDEFINEDSIZE)
len=PREDEFINEDSIZE;
else
len=filelength;
readlen=filesource.Read(pwChSrc,len);
if(!readlen)
break;
if(UNICODEBIGENDIAN==curtc)
UnicodeEndianConvert(pwChSrc);
pChDes=WCharToMByte(CodePage,pwChSrc);
//GBK无法直接转换 BIG5直接转换会产生错误 二者均先转到GB2312然后再转到目的类型
if(GBK==tcTo)
{
pChTemp=pChDes;
pChDes=GB2312ToGBK(pChDes);
}
if(BIG5==tcTo)
{
pChTemp=pChDes;
pChDes=GB2312ToBIG5(pChDes);
}
if(pChDes)
{
strlength=strlen(pChDes);
filesave.Write(pChDes,strlength);
filesave.Flush();
filelength-=readlen;
}
else
break;
}
delete[] pChDes;
delete[] pChTemp;
delete[] pwChSrc;
return TRUE;
}
//多字节文件转为多字节文件
//多字节转为多字节时,一般先转为UNICODE类型,再转换到指定目的类型,实行两次转换
BOOL Coder::MBFileToMBFile(CString filesourcepath, CString filesavepath,TextCode tcTo,TextCode tcCur)
{
BOOL bret=FALSE;
TextCode curtc;
CFile filesource,filesave;
char *pChDes=NULL;
char *pChSrc=NULL;
DWORD filelength,readlen,len;
int bufferlen,strlength;
UINT CodePageCur,CodePageTo;
//由于存在误差 允许用户自定义转换
if(DefaultCodeType!=tcCur)
curtc=tcCur;
else
curtc=GetCodeType(filesourcepath);
//转换类型错误 则返回转换失败
if(curtc>UTF8 || tcTo>UTF8 || curtc==tcTo)
return FALSE;
//源文件打开失败或者源文件无内容 后者保存文件建立失败 均返回转换失败
if(!filesource.Open(filesourcepath,CFile::modeRead) || 0==(filelength=filesource.GetLength()))
return FALSE;
if( !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))
return FALSE;
//预分配内存 分配失败则转换失败
if(filelength
bufferlen=filelength;
else
bufferlen=PREDEFINEDSIZE;
pChSrc=new char[bufferlen+1];
if(!pChSrc)
return FALSE;
if(UTF8==curtc)
filesource.Seek(3*sizeof(byte),CFile::begin);
CodePageCur=GetCodePage(curtc);
CodePageTo=GetCodePage(tcTo);
while(filelength>0)
{
memset(pChSrc,0,sizeof(char)*(bufferlen+1));
if(filelength>PREDEFINEDSIZE)
len=PREDEFINEDSIZE;
else
len=filelength;
readlen=filesource.Read(pChSrc,len);
if(!readlen)
break;
pChDes=MByteToMByte(CodePageCur,CodePageTo,pChSrc);
if(pChDes)
{
strlength=strlen(pChDes);
filesave.Write(pChDes,strlength);
filelength-=readlen;
}
else
break;
}
delete[] pChSrc;
delete[] pChDes;
return TRUE;
}
//Unicode 和Unicode big endian文件之间转换
BOOL Coder::UnicodeEndianFileConvert(CString filesourcepath, CString filesavepath,TextCode tcTo)
{
TextCode curtc=GetCodeType(filesourcepath);
if(curtc!=UNICODE && curtc!=UNICODEBIGENDIAN)
return FALSE;
if(curtc==tcTo)
return FALSE;
CFile filesource,filesave;;
wchar_t *pwChDes;
DWORD length;
if(!filesource.Open(filesourcepath,CFile::modeRead) || !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))
return FALSE;
length=filesource.GetLength();
if(!length)
return FALSE;
pwChDes=new wchar_t[(length/2)+1];
if(!pwChDes)
return FALSE;
memset(pwChDes,0,sizeof(wchar_t)*((length/2)+1));
filesource.Read(pwChDes,length);
UnicodeEndianConvert(pwChDes);
length=wcslen(pwChDes)*2;
if(UNICODE==tcTo)
filesave.Write(&UNICODEBOM,2*sizeof(byte));
else
filesave.Write(&UNICODEBEBOM,2*sizeof(byte));
filesave.Write(pwChDes,length);
filesave.Flush();
delete[] pwChDes;
return TRUE;
}
//文件转到另一种文件
//6种格式文件两两转换 共计30种转换
BOOL Coder::FileToOtherFile(CString filesourcepath, CString filesavepath, TextCode tcTo,TextCode tcCur)
{
TextCode curtc;
BOOL bret=FALSE;
if(DefaultCodeType!=tcCur)
curtc=tcCur;
else
curtc=GetCodeType(filesourcepath);
if(curtc==tcTo)
return FALSE;
//UNICODE和UNICODE big endian文件之间转换 共2种
if(curtc>=UNICODE&& tcTo>=UNICODE)
bret=UnicodeEndianFileConvert(filesourcepath,filesavepath,tcTo);
else
//多字节文件向 UNICODE和UNICODE big endian文件之间转换 共8种
if(curtc=UNICODE)
bret=MBFileToUnicodeFile(filesourcepath,filesavepath,tcTo,curtc);
else
//UNICODE和UNICODE big endian文件向多字节文件转换 共8种
if(curtc>=UNICODE && tcTo
bret=UnicodeFileToMBFile(filesourcepath,filesavepath,tcTo);
else
//多字节文件之间转换 共12种
if(curtc
bret=MBFileToMBFile(filesourcepath,filesavepath,tcTo,curtc);
return bret;
}
//编码类型转换为字符串
CString Coder::CodeTypeToString(TextCode tc)
{
CString strtype;
switch(tc)
{
case GB2312:
strtype=_T("GB2312");
break;
case BIG5:
strtype=_T("Big5");
break;
case GBK:
strtype=_T("GBK");
break;
case UTF8:
strtype=_T("UTF-8");
break;
case UNICODE:
strtype=_T("Unicode");
break;
case UNICODEBIGENDIAN:
strtype=_T("Unicode big endian");
break;
}
return strtype;
}
//多字节向多字节转换
char* Coder::MByteToMByte(UINT CodePageCur, UINT CodePageTo, const char* szSrcStr)
{
char *pchDes=NULL;
char *pchTemp=NULL;
wchar_t *pwchtemp=NULL;
//三种中文编码之间转换
if(CodePageCur!=CP_UTF8 && CodePageTo!=CP_UTF8)
{
switch(CodePageCur)
{
case CP_GB2312:
{
if(CP_BIG5==CodePageTo)
pchDes=GB2312ToBIG5(szSrcStr);
else
pchDes=GB2312ToGBK(szSrcStr);
break;
}
case CP_BIG5:
{
if(CP_GB2312==CodePageTo)
pchDes=BIG5ToGB2312(szSrcStr);
else
pchDes=BIG5ToGBK(szSrcStr);
break;
}
case CP_GBK:
{
if(CP_GB2312==CodePageTo)
pchDes=GBKToGB2312(szSrcStr);
else
pchDes=GBKToBIG5(szSrcStr);
break;
}
}
}
else
{ //从UTF-8转到其他多字节 直接转到GB2312 其他形式用GB2312做中间形式
if(CP_UTF8==CodePageCur)
{
pwchtemp=MByteToWChar(CodePageCur,szSrcStr);
if(CP_GB2312==CodePageTo)
{
pchDes=WCharToMByte(CP_GB2312,pwchtemp);
}
else
{
pchTemp=WCharToMByte(CP_GB2312,pwchtemp);
if(CP_GBK==CodePageTo)
pchDes=GB2312ToGBK(pchTemp);
else
pchDes=GB2312ToBIG5(pchTemp);
}
}
//从其他多字节转到UTF-8
else
{
if(CP_GBK==CodePageCur)
{
pchTemp=GBKToGB2312(szSrcStr);
pwchtemp=MByteToWChar(CP_GB2312,pchTemp);
}
else
pwchtemp=MByteToWChar(CodePageCur,szSrcStr);
pchDes=WCharToMByte(CodePageTo,pwchtemp);
}
}
delete[] pchTemp;
delete[] pwchtemp;
return pchDes;
}
//获取编码类型对应的代码页
UINT Coder::GetCodePage(TextCode tccur)
{
UINT CodePage;
switch(tccur)
{
case GB2312:
CodePage=CP_GB2312;
break;
case BIG5:
CodePage=CP_BIG5;
break;
case GBK:
CodePage=CP_GBK;
break;
case UTF8:
CodePage=CP_UTF8;
break;
case UNICODEBIGENDIAN:
case UNICODE:
break;
}
return CodePage;
}
//指定转换时默认一次转换字节大小
void Coder::SetDefaultConvertSize(UINT nCount)
{
if(nCount!=0)
PREDEFINEDSIZE=nCount;
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。