微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

将UTF-16转换为UTF-8

我目前正在使用VC ++ 2008 MFC。 由于Postgresql不支持UTF-16(由Unicode用于Windows的编码),因此在存储之前,我需要将string从UTF-16转换为UTF-8。

这是我的代码片段。

// demo.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include "demo.h" #include "Utils.h" #include <iostream> #ifdef _DEBUG #define new DEBUG_NEW #endif // The one and only application object CWinApp theApp; using namespace std; int _tmain(int argc,TCHAR* argv[],TCHAR* envp[]) { int nRetCode = 0; // initialize MFC and print and error on failure if (!AfxWinInit(::GetModuleHandle(NULL),NULL,::GetCommandLine(),0)) { // Todo: change error code to suit your needs _tprintf(_T("Fatal error: MFC initialization Failedn")); nRetCode = 1; } else { // Todo: code your application's behavior here. } CString utf16 = _T("Hello"); std::cout << utf16.GetLength() << std::endl; CStringA utf8 = UTF8Util::ConvertUTF16ToUTF8(utf16); std::cout << utf8.GetLength() << std::endl; getchar(); return nRetCode; }

和转换function。

namespace UTF8Util { //---------------------------------------------------------------------------- // FUNCTION: ConvertUTF8ToUTF16 // DESC: Converts Unicode UTF-8 text to Unicode UTF-16 (Windows default). //---------------------------------------------------------------------------- CStringW ConvertUTF8ToUTF16( __in const CHAR * pszTextUTF8 ) { // // Special case of NULL or empty input string // if ( (pszTextUTF8 == NULL) || (*pszTextUTF8 == '') ) { // Return empty string return L""; } // // Consider CHAR's count corresponding to total input string length,// including end-of-string () character // const size_t cchUTF8Max = INT_MAX - 1; size_t cchUTF8; HRESULT hr = ::StringCchLengthA( pszTextUTF8,cchUTF8Max,&cchUTF8 ); if ( Failed( hr ) ) { AtlThrow( hr ); } // Consider also terminating ++cchUTF8; // Convert to 'int' for use with MultiBytetoWideChar API int cbUTF8 = static_cast<int>( cchUTF8 ); // // Get size of destination UTF-16 buffer,in WCHAR's // int cchUTF16 = ::MultiBytetoWideChar( CP_UTF8,// convert from UTF-8 MB_ERR_INVALID_CHARS,// error on invalid chars pszTextUTF8,// source UTF-8 string cbUTF8,// total length of source UTF-8 string,// in CHAR's (= bytes),including end-of-string NULL,// unused - no conversion done in this step 0 // request size of destination buffer,in WCHAR's ); ATLASSERT( cchUTF16 != 0 ); if ( cchUTF16 == 0 ) { AtlThrowLastWin32(); } // // Allocate destination buffer to store UTF-16 string // CStringW strUTF16; WCHAR * pszUTF16 = strUTF16.GetBuffer( cchUTF16 ); // // Do the conversion from UTF-8 to UTF-16 // int result = ::MultiBytetoWideChar( CP_UTF8,including end-of-string pszUTF16,// destination buffer cchUTF16 // size of destination buffer,in WCHAR's ); ATLASSERT( result != 0 ); if ( result == 0 ) { AtlThrowLastWin32(); } // Release internal CString buffer strUTF16.ReleaseBuffer(); // Return resulting UTF16 string return strUTF16; } //---------------------------------------------------------------------------- // FUNCTION: ConvertUTF16ToUTF8 // DESC: Converts Unicode UTF-16 (Windows default) text to Unicode UTF-8. //---------------------------------------------------------------------------- CStringA ConvertUTF16ToUTF8( __in const WCHAR * pszTextUTF16 ) { // // Special case of NULL or empty input string // if ( (pszTextUTF16 == NULL) || (*pszTextUTF16 == L'') ) { // Return empty string return ""; } // // Consider WCHAR's count corresponding to total input string length,// including end-of-string (L'') character. // const size_t cchUTF16Max = INT_MAX - 1; size_t cchUTF16; HRESULT hr = ::StringCchLengthW( pszTextUTF16,cchUTF16Max,&cchUTF16 ); if ( Failed( hr ) ) { AtlThrow( hr ); } // Consider also terminating ++cchUTF16; // // WC_ERR_INVALID_CHARS flag is set to fail if invalid input character // is encountered. // This flag is supported on Windows Vista and later. // Don't use it on Windows XP and prevIoUs. // #if (WINVER >= 0x0600) DWORD dwConversionFlags = WC_ERR_INVALID_CHARS; #else DWORD dwConversionFlags = 0; #endif // // Get size of destination UTF-8 buffer,in CHAR's (= bytes) // int cbUTF8 = ::WideCharToMultiByte( CP_UTF8,// convert to UTF-8 dwConversionFlags,// specify conversion behavior pszTextUTF16,// source UTF-16 string static_cast<int>( cchUTF16 ),// total source string length,in WCHAR's,// including end-of-string NULL,// unused - no conversion required in this step 0,// request buffer size NULL,NULL // unused ); ATLASSERT( cbUTF8 != 0 ); if ( cbUTF8 == 0 ) { AtlThrowLastWin32(); } // // Allocate destination buffer for UTF-8 string // CStringA strUTF8; int cchUTF8 = cbUTF8; // sizeof(CHAR) = 1 byte CHAR * pszUTF8 = strUTF8.GetBuffer( cchUTF8 ); // // Do the conversion from UTF-16 to UTF-8 // int result = ::WideCharToMultiByte( CP_UTF8,// including end-of-string pszUTF8,// destination buffer cbUTF8,// destination buffer size,in bytes NULL,NULL // unused ); ATLASSERT( result != 0 ); if ( result == 0 ) { AtlThrowLastWin32(); } // Release internal CString buffer strUTF8.ReleaseBuffer(); // Return resulting UTF-8 string return strUTF8; } } // namespace UTF8Util

但是,在运行时,我得到了exception

Windows服务Process.Start不在networking服务帐户下工作

debugging堆栈值损坏的好方法

将程序的输出复制到屏幕上,也复制到文件

有什么办法让SHFileOperation模态?

在使用gettimeofday时,初始计时结果较慢 – 在RHEL6服务器下更糟糕

ATLASSERT(cbUTF8!= 0);同时尝试获取目标UTF-8缓冲区的大小我错过了什么东西?如果我正在使用中文字符进行testing,那么如何validation由此产生的UTF-8string是否正确?在Windows上与-R和-rpath开关链接获取与Winsock套接字列表SystemEvents.SessionSwitch用户被locking/解锁为什么这四个并行线程不能在cpu上平均加载在Windows上编译Objective-C程序问题是你指定了WC_ERR_INVALID_CHARS标志:[b]Windows Vista及更高版本:遇到无效输入字符时失败。 如果这个标志没有被设置,那么这个函数会静地丢弃非法的代码点。 对GetLastError的调用返回ERROR_NO_UNICODE_TRANSLATION。 请注意,此标志仅适用于将CodePage指定为CP_UTF8或54936(适用于Windows Vista和更高版本)。 它不能与其他代码页值一起使用。您的转换功能似乎相当长。 这个如何为你工作?//---------------------------------------------------------------------------- // FUNCTION: ConvertUTF16ToUTF8 // DESC: Converts Unicode UTF-16 (Windows default) text to Unicode UTF-8. //---------------------------------------------------------------------------- CStringA ConvertUTF16ToUTF8( __in LPCWSTR pszTextUTF16 ) { if (pszTextUTF16 == NULL) return ""; int utf16len = wcslen(pszTextUTF16); int utf8len = WideCharToMultiByte(CP_UTF8,pszTextUTF16,utf16len,NULL ); CArray<CHAR> buffer; buffer.SetSize(utf8len+1); buffer.SetAt(utf8len,''); WideCharToMultiByte(CP_UTF8,buffer.GetData(),utf8len,0 ); return buffer.GetData(); }我看到你使用了一个名为StringCchLengthW的函数来获得输出缓冲区所需的长度。 我看的大部分地方都推荐使用WideCharToMultiByte函数来告诉你它需要多少个CHAR。编辑:[/b]正如Rob指出的,您可以在CP_UTF8代码页中使用CW2A:CStringA str = CW2A(wStr,CP_UTF8);在我编辑的时候,我可以回答你的第二个问题:我如何验证由此产生的UTF-8字符串是否正确?将其写入文本文件,然后在Mozilla Firefox或equivillant程序中打开它。 在“视图”菜单中,您可以转到字符编码并手动切换到UTF-8(假设Firefox没有正确猜测)。 将它与具有相同文本的UTF-16文档进行比较,看看是否有任何差异。您也可以使用ATL字符串转换宏 – 从UTF-16转换为UTF-8使用CW2A并通过CP_UTF8作为代码页,例如:CW2A utf8(buffer,CP_UTF8); const char* data = utf8.m_psz;

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。

相关推荐