赞
踩
- //GB2312到UTF-8的转换
- static int GB2312ToUtf8(const char* gb2312, char* utf8)
- {
- int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
- wchar_t* wstr = new wchar_t[len+1];
- memset(wstr, 0, len+1);
- MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
- len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
- WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
- if(wstr) delete[] wstr;
- return len;
- }
-
- //判断是否是utf8
- bool IsTextUTF8(const char* str, long length)
- {
- unsigned char chr;
-
- int nBytes = 0; // UFT8可用1-6个字节编码,ASCII用一个字节
- bool bAllAscii = true; // 如果全部都是ASCII, 说明不是UTF-8
-
- for (int i=0; i < length; i++)
- {
- chr = *(str + i);
-
- if ((chr&0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8, ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx
- {
- bAllAscii = false;
- }
-
- if (nBytes == 0) // 如果不是ASCII码,应该是多字节符,计算字节数
- {
- if (chr >= 0x80)
- {
- if (chr>=0xFC && chr<=0xFD)
- nBytes = 6;
- else if (chr>=0xF8)
- nBytes = 5;
- else if (chr>=0xF0)
- nBytes = 4;
- else if (chr>=0xE0)
- nBytes = 3;
- else if (chr>=0xC0)
- nBytes = 2;
- else
- return false;
-
- nBytes--;
- }
- }
- else // every char of ascii buffer looks like 10xxxxxx, except the first char
- {
- if( (chr&0xC0) != 0x80 )
- {
- return false;
- }
- nBytes--;
- }
- }
-
- if (nBytes > 0) // format error
- {
- return false;
- }
-
- if (bAllAscii) // if all chars are ascii, the buffer is not utf-8
- {
- return false;
- }
-
- return true;
- }
-
-
- //UTF-8到GB2312的转换
- static int Utf8ToGB2312(const char* utf8, char* gb2312)
- {
- int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
- wchar_t* wstr = new wchar_t[len+1];
- memset(wstr, 0, len+1);
- MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
- len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
- WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL);
- if(wstr) delete[] wstr;
- return len;
- }
-
-
- //GB2312到Unicode的转换
- static int GB2312ToUnicode(const char* gb2312, char* unicode)
- {
- UINT nCodePage = 936; //GB2312
- int len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0);
- wchar_t* wstr = new wchar_t[len+1];
- memset(wstr, 0, len+1);
- MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len);
- len = len*sizeof(wchar_t);
- memcpy(unicode, wstr, len);
- if(wstr) delete[] wstr;
- return len;
- }
-
-
- //Unicode到GB2312的转换
- static int UnicodeToGB2312(const char* unicode, int size, char*gb2312)
- {
- UINT nCodePage = 936; //GB2312
- wchar_t* wstr = new wchar_t[size/2+1];
- memcpy(wstr, unicode, size);
- int len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL);
- WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL);
- if(wstr) delete[] wstr;
- return len;
- }
-
-
-
-
- //UTF-8到Unicode的转换
- static int Utf8ToUnicode(const char* utf8, char*unicode)
- {
- int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
- wchar_t* wstr = new wchar_t[len+1];
- memset(wstr, 0, len+1);
- MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
- memcpy(unicode, wstr, len);
- if(wstr) delete[] wstr;
- return len;
- }
-
-
- //Unicode到UTF-8的转换
- static int UnicodeToUtf8(const char* unicode, int size, char* utf8)
- {
- wchar_t* wstr = new wchar_t[size/2+1];
- memcpy(wstr, unicode, size);
- int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
- WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
- if(wstr) delete[] wstr;
- return len;
- }

转自http://blog.csdn.net/seven407/article/details/7712823
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。