5858#include < memory>
5959#include < trantor/utils/Logger.h>
6060
61+ #if __cplusplus < 201103L || __cplusplus >= 201703L
62+ static std::wstring utf8Toutf16 (const std::string &utf8Str)
63+ {
64+ std::wstring utf16Str;
65+ utf16Str.reserve (utf8Str.length ()); // Reserve space to avoid reallocations
66+
67+ for (size_t i = 0 ; i < utf8Str.length ();)
68+ {
69+ wchar_t unicode_char;
70+
71+ // Check the first byte
72+ if ((utf8Str[i] & 0b10000000 ) == 0 )
73+ {
74+ // Single-byte character (ASCII)
75+ unicode_char = utf8Str[i++];
76+ }
77+ else if ((utf8Str[i] & 0b11100000 ) == 0b11000000 )
78+ {
79+ if (i + 1 >= utf8Str.length ())
80+ {
81+ // Invalid UTF-8 sequence
82+ // Handle the error as needed
83+ return L" " ;
84+ }
85+ // Two-byte character
86+ unicode_char = ((utf8Str[i] & 0b00011111 ) << 6 ) |
87+ (utf8Str[i + 1 ] & 0b00111111 );
88+ i += 2 ;
89+ }
90+ else if ((utf8Str[i] & 0b11110000 ) == 0b11100000 )
91+ {
92+ if (i + 2 >= utf8Str.length ())
93+ {
94+ // Invalid UTF-8 sequence
95+ // Handle the error as needed
96+ return L" " ;
97+ }
98+ // Three-byte character
99+ unicode_char = ((utf8Str[i] & 0b00001111 ) << 12 ) |
100+ ((utf8Str[i + 1 ] & 0b00111111 ) << 6 ) |
101+ (utf8Str[i + 2 ] & 0b00111111 );
102+ i += 3 ;
103+ }
104+ else
105+ {
106+ // Invalid UTF-8 sequence
107+ // Handle the error as needed
108+ return L" " ;
109+ }
110+
111+ utf16Str.push_back (unicode_char);
112+ }
113+
114+ return utf16Str;
115+ }
116+
117+ static std::string utf16Toutf8 (const std::wstring &utf16Str)
118+ {
119+ std::string utf8Str;
120+ utf8Str.reserve (utf16Str.length () * 3 );
121+
122+ for (size_t i = 0 ; i < utf16Str.length (); ++i)
123+ {
124+ wchar_t unicode_char = utf16Str[i];
125+
126+ if (unicode_char <= 0x7F )
127+ {
128+ // Single-byte character (ASCII)
129+ utf8Str.push_back (static_cast <char >(unicode_char));
130+ }
131+ else if (unicode_char <= 0x7FF )
132+ {
133+ // Two-byte character
134+ utf8Str.push_back (
135+ static_cast <char >(0xC0 | ((unicode_char >> 6 ) & 0x1F )));
136+ utf8Str.push_back (static_cast <char >(0x80 | (unicode_char & 0x3F )));
137+ }
138+ else
139+ {
140+ // Three-byte character
141+ utf8Str.push_back (
142+ static_cast <char >(0xE0 | ((unicode_char >> 12 ) & 0x0F )));
143+ utf8Str.push_back (
144+ static_cast <char >(0x80 | ((unicode_char >> 6 ) & 0x3F )));
145+ utf8Str.push_back (static_cast <char >(0x80 | (unicode_char & 0x3F )));
146+ }
147+ }
148+
149+ return utf8Str;
150+ }
151+ #endif // __cplusplus
152+
61153namespace trantor
62154{
63155namespace utils
@@ -81,11 +173,7 @@ std::string toUtf8(const std::wstring &wstr)
81173 NULL,
82174 NULL);
83175#elif __cplusplus < 201103L || __cplusplus >= 201703L
84- // Note: Introduced in c++11 and deprecated with c++17.
85- // Revert to C99 code since there no replacement yet
86- strTo.resize (3 * wstr.length (), 0 );
87- auto nLen = wcstombs (&strTo[0 ], wstr.c_str (), strTo.length ());
88- strTo.resize (nLen);
176+ strTo = utf16Toutf8 (wstr);
89177#else // c++11 to c++14
90178 std::wstring_convert<std::codecvt_utf8<wchar_t >, wchar_t > utf8conv;
91179 strTo = utf8conv.to_bytes (wstr);
@@ -104,11 +192,7 @@ std::wstring fromUtf8(const std::string &str)
104192 ::MultiByteToWideChar (
105193 CP_UTF8, 0 , &str[0 ], (int )str.size(), &wstrTo[0], nSizeNeeded);
106194#elif __cplusplus < 201103L || __cplusplus >= 201703L
107- // Note: Introduced in c++11 and deprecated with c++17.
108- // Revert to C99 code since there no replacement yet
109- wstrTo.resize (str.length (), 0 );
110- auto nLen = mbstowcs (&wstrTo[0 ], str.c_str (), wstrTo.length ());
111- wstrTo.resize (nLen);
195+ wstrTo = utf8Toutf16 (str);
112196#else // c++11 to c++14
113197 std::wstring_convert<std::codecvt_utf8<wchar_t >, wchar_t > utf8conv;
114198 try
0 commit comments