@@ -266,39 +266,43 @@ gb18030_2005_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
266266 }
267267}
268268
269- static const unsigned short gb18030_2005_pua2charset [31 * 3 ] = {
270- /* Unicode range GB18030 range */
271- 0xe766 , 0xe76b , 0xa2ab , /*.. 0xa2b0, */
272- 0xe76d , 0xe76d , 0xa2e4 ,
273- 0xe76e , 0xe76f , 0xa2ef , /*.. 0xa2f0, */
274- 0xe770 , 0xe771 , 0xa2fd , /*.. 0xa2fe, */
275- 0xe772 , 0xe77c , 0xa4f4 , /*.. 0xa4fe, */
276- 0xe77d , 0xe784 , 0xa5f7 , /*.. 0xa5fe, */
277- 0xe785 , 0xe78c , 0xa6b9 , /*.. 0xa6c0, */
278- 0xe78d , 0xe793 , 0xa6d9 , /*.. 0xa6df, */
279- 0xe794 , 0xe795 , 0xa6ec , /*.. 0xa6ed, */
280- 0xe796 , 0xe796 , 0xa6f3 ,
281- 0xe797 , 0xe79f , 0xa6f6 , /*.. 0xa6fe, */
282- 0xe7a0 , 0xe7ae , 0xa7c2 , /*.. 0xa7d0, */
283- 0xe7af , 0xe7bb , 0xa7f2 , /*.. 0xa7fe, */
284- 0xe7bc , 0xe7c6 , 0xa896 , /*.. 0xa8a0, */
285- 0xe7c9 , 0xe7cc , 0xa8c1 , /*.. 0xa8c4, */
286- 0xe7cd , 0xe7e1 , 0xa8ea , /*.. 0xa8fe, */
287- 0xe7e2 , 0xe7e2 , 0xa958 ,
288- 0xe7e3 , 0xe7e3 , 0xa95b ,
289- 0xe7e4 , 0xe7e6 , 0xa95d , /*.. 0xa95f, */
290- 0xe7f4 , 0xe800 , 0xa997 , /*.. 0xa9a3, */
291- 0xe801 , 0xe80f , 0xa9f0 , /*.. 0xa9fe, */
292- 0xe810 , 0xe814 , 0xd7fa , /*.. 0xd7fe, */
293- 0xe816 , 0xe818 , 0xfe51 , /*.. 0xfe53, */
294- 0xe81e , 0xe81e , 0xfe59 ,
295- 0xe826 , 0xe826 , 0xfe61 ,
296- 0xe82b , 0xe82c , 0xfe66 , /*.. 0xfe67, */
297- 0xe831 , 0xe832 , 0xfe6c , /*.. 0xfe6d, */
298- 0xe83b , 0xe83b , 0xfe76 ,
299- 0xe843 , 0xe843 , 0xfe7e ,
300- 0xe854 , 0xe855 , 0xfe90 , /*.. 0xfe91, */
301- 0xe864 , 0xe864 , 0xfea0 ,
269+ static const struct { unsigned short uni [2 ]; unsigned int charset ; } gb18030_2005_pua2charset [35 ] = {
270+ /* Unicode range GB18030 range */
271+ { { 0xe766 , 0xe76b }, 0xa2ab /*.. 0xa2b0, */ },
272+ { { 0xe76d , 0xe76d }, 0xa2e4 },
273+ { { 0xe76e , 0xe76f }, 0xa2ef /*.. 0xa2f0, */ },
274+ { { 0xe770 , 0xe771 }, 0xa2fd /*.. 0xa2fe, */ },
275+ { { 0xe772 , 0xe77c }, 0xa4f4 /*.. 0xa4fe, */ },
276+ { { 0xe77d , 0xe784 }, 0xa5f7 /*.. 0xa5fe, */ },
277+ { { 0xe785 , 0xe78c }, 0xa6b9 /*.. 0xa6c0, */ },
278+ { { 0xe78d , 0xe793 }, 0xa6d9 /*.. 0xa6df, */ },
279+ { { 0xe794 , 0xe795 }, 0xa6ec /*.. 0xa6ed, */ },
280+ { { 0xe796 , 0xe796 }, 0xa6f3 },
281+ { { 0xe797 , 0xe79f }, 0xa6f6 /*.. 0xa6fe, */ },
282+ { { 0xe7a0 , 0xe7ae }, 0xa7c2 /*.. 0xa7d0, */ },
283+ { { 0xe7af , 0xe7bb }, 0xa7f2 /*.. 0xa7fe, */ },
284+ { { 0xe7bc , 0xe7c6 }, 0xa896 /*.. 0xa8a0, */ },
285+ { { 0xe7c9 , 0xe7cc }, 0xa8c1 /*.. 0xa8c4, */ },
286+ { { 0xe7cd , 0xe7e1 }, 0xa8ea /*.. 0xa8fe, */ },
287+ { { 0xe7e2 , 0xe7e2 }, 0xa958 },
288+ { { 0xe7e3 , 0xe7e3 }, 0xa95b },
289+ { { 0xe7e4 , 0xe7e6 }, 0xa95d /*.. 0xa95f, */ },
290+ { { 0xe7f4 , 0xe800 }, 0xa997 /*.. 0xa9a3, */ },
291+ { { 0xe801 , 0xe80f }, 0xa9f0 /*.. 0xa9fe, */ },
292+ { { 0xe810 , 0xe814 }, 0xd7fa /*.. 0xd7fe, */ },
293+ { { 0xe816 , 0xe816 }, 0x95329031 },
294+ { { 0xe817 , 0xe817 }, 0x95329033 },
295+ { { 0xe818 , 0xe818 }, 0x95329730 },
296+ { { 0xe81e , 0xe81e }, 0xfe59 },
297+ { { 0xe826 , 0xe826 }, 0xfe61 },
298+ { { 0xe82b , 0xe82c }, 0xfe66 /*.. 0xfe67, */ },
299+ { { 0xe831 , 0xe831 }, 0x9536b937 },
300+ { { 0xe832 , 0xe832 }, 0xfe6d },
301+ { { 0xe83b , 0xe83b }, 0x9630ba35 },
302+ { { 0xe843 , 0xe843 }, 0xfe7e },
303+ { { 0xe854 , 0xe854 }, 0xfe90 },
304+ { { 0xe855 , 0xe855 }, 0x9635b630 },
305+ { { 0xe864 , 0xe864 }, 0xfea0 },
302306};
303307
304308static int
@@ -316,7 +320,7 @@ gb18030_2005_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
316320 if (ret != RET_ILUNI )
317321 return ret ;
318322
319- ret = gb18030_2005_ext_wctomb (conv ,r ,wc ,n );
323+ ret = gb18030ext_wctomb (conv ,r ,wc ,n );
320324 if (ret != RET_ILUNI )
321325 return ret ;
322326
@@ -337,23 +341,32 @@ gb18030_2005_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
337341 return 2 ;
338342 }
339343 } else {
340- /* User-defined characters, two-byte part of range U+E766..U+E864 */
344+ /* User-defined characters, two-byte part and 6 four-byte mappings in
345+ range U+E766..U+E864 */
341346 unsigned int k1 = 0 ;
342- unsigned int k2 = 31 ;
347+ unsigned int k2 = 35 ;
343348 /* Invariant: We know that if wc occurs in Unicode interval in
344349 gb18030_2005_pua2charset, it does so at a k with k1 <= k < k2. */
345350 while (k1 < k2 ) {
346351 unsigned int k = (k1 + k2 ) / 2 ;
347- if (wc < gb18030_2005_pua2charset [k * 3 + 0 ])
352+ if (wc < gb18030_2005_pua2charset [k ]. uni [ 0 ])
348353 k2 = k ;
349- else if (wc > gb18030_2005_pua2charset [k * 3 + 1 ])
354+ else if (wc > gb18030_2005_pua2charset [k ]. uni [ 1 ])
350355 k1 = k + 1 ;
351356 else {
352- unsigned short c =
353- gb18030_2005_pua2charset [k * 3 + 2 ] + (wc - gb18030_2005_pua2charset [k * 3 + 0 ]);
354- r [0 ] = (c >> 8 );
355- r [1 ] = (c & 0xff );
356- return 2 ;
357+ unsigned int c =
358+ gb18030_2005_pua2charset [k ].charset + (wc - gb18030_2005_pua2charset [k ].uni [0 ]);
359+ if (c < 0x10000 ) {
360+ r [0 ] = (c >> 8 );
361+ r [1 ] = c & 0xff ;
362+ return 2 ;
363+ } else {
364+ r [0 ] = (c >> 24 );
365+ r [1 ] = (c >> 16 ) & 0xff ;
366+ r [2 ] = (c >> 8 ) & 0xff ;
367+ r [3 ] = c & 0xff ;
368+ return 4 ;
369+ }
357370 }
358371 }
359372 }
0 commit comments