11/*
2- * Copyright (C) 1999-2000 Free Software Foundation, Inc.
2+ * Copyright (C) 1999-2002 Free Software Foundation, Inc.
33 * This file is part of the GNU LIBICONV Library.
44 *
55 * The GNU LIBICONV Library is free software; you can redistribute it
2020
2121/*
2222 * JAVA
23- * This is ISO 8859-1 with \uXXXX escape sequences, denoting Unicode characters.
23+ * This is ISO 8859-1 with \uXXXX escape sequences, denoting Unicode BMP
24+ * characters. Consecutive pairs of \uXXXX escape sequences in the surrogate
25+ * range, as in UTF-16, denote Unicode characters outside the BMP.
2426 */
2527
2628static int
2729java_mbtowc (conv_t conv , ucs4_t * pwc , const unsigned char * s , int n )
2830{
2931 unsigned char c ;
30- ucs4_t wc ;
32+ ucs4_t wc , wc2 ;
3133 int i ;
3234
3335 c = s [0 ];
@@ -54,8 +56,39 @@ java_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
5456 goto simply_backslash ;
5557 wc |= (ucs4_t ) c << (4 * (5 - i ));
5658 }
57- * pwc = wc ;
58- return 6 ;
59+ if (!(wc >= 0xd800 && wc < 0xe000 )) {
60+ * pwc = wc ;
61+ return 6 ;
62+ }
63+ if (wc >= 0xdc00 )
64+ goto simply_backslash ;
65+ if (n < 7 )
66+ return RET_TOOFEW (0 );
67+ if (s [6 ] != '\\' )
68+ goto simply_backslash ;
69+ if (n < 8 )
70+ return RET_TOOFEW (0 );
71+ if (s [7 ] != 'u' )
72+ goto simply_backslash ;
73+ wc2 = 0 ;
74+ for (i = 8 ; i < 12 ; i ++ ) {
75+ if (n <= i )
76+ return RET_TOOFEW (0 );
77+ c = s [i ];
78+ if (c >= '0' && c <= '9' )
79+ c -= '0' ;
80+ else if (c >= 'A' && c <= 'Z' )
81+ c -= 'A' - 10 ;
82+ else if (c >= 'a' && c <= 'z' )
83+ c -= 'a' - 10 ;
84+ else
85+ goto simply_backslash ;
86+ wc2 |= (ucs4_t ) c << (4 * (11 - i ));
87+ }
88+ if (!(wc2 >= 0xdc00 && wc2 < 0xe000 ))
89+ goto simply_backslash ;
90+ * pwc = 0x10000 + ((wc - 0xd800 ) << 10 ) + (wc2 - 0xdc00 );
91+ return 12 ;
5992simply_backslash :
6093 * pwc = '\\' ;
6194 return 1 ;
@@ -67,7 +100,7 @@ java_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
67100 if (wc < 0x80 ) {
68101 * r = wc ;
69102 return 1 ;
70- } else {
103+ } else if ( wc < 0x10000 ) {
71104 if (n >= 6 ) {
72105 unsigned int i ;
73106 r [0 ] = '\\' ;
@@ -79,5 +112,26 @@ java_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
79112 return 6 ;
80113 } else
81114 return RET_TOOSMALL ;
115+ } else if (wc < 0x110000 ) {
116+ if (n >= 12 ) {
117+ ucs4_t wc1 = 0xd800 + ((wc - 0x10000 ) >> 10 );
118+ ucs4_t wc2 = 0xdc00 + ((wc - 0x10000 ) & 0x3ff );
119+ unsigned int i ;
120+ r [0 ] = '\\' ;
121+ r [1 ] = 'u' ;
122+ i = (wc1 >> 12 ) & 0x0f ; r [2 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
123+ i = (wc1 >> 8 ) & 0x0f ; r [3 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
124+ i = (wc1 >> 4 ) & 0x0f ; r [4 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
125+ i = wc1 & 0x0f ; r [5 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
126+ r [6 ] = '\\' ;
127+ r [7 ] = 'u' ;
128+ i = (wc2 >> 12 ) & 0x0f ; r [8 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
129+ i = (wc2 >> 8 ) & 0x0f ; r [9 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
130+ i = (wc2 >> 4 ) & 0x0f ; r [10 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
131+ i = wc2 & 0x0f ; r [11 ] = (i < 10 ? '0' + i : 'a' - 10 + i );
132+ return 12 ;
133+ } else
134+ return RET_TOOSMALL ;
82135 }
136+ return RET_ILUNI ;
83137}
0 commit comments