Skip to content

Commit ac379c1

Browse files
committed
Upgrade the GB18030 converter to the version from 2005.
1 parent 33b05e1 commit ac379c1

File tree

5 files changed

+142
-44
lines changed

5 files changed

+142
-44
lines changed

ChangeLog

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
2011-08-06 Bruno Haible <bruno@clisp.org>
2+
3+
Upgrade the GB18030 converter to the version from 2005.
4+
* lib/gb18030ext.h (gb18030ext_2uni_pagefe): Change element type to
5+
'unsigned int'. Change values for 0xFE51..0xFE53, 0xFE59, 0xFE61,
6+
0xFE66, 0xFE67, 0xFE6C, 0xFE6D, 0xFE76, 0xFE7E, 0xFE90, 0xFE91, 0xFEA0.
7+
(gb18030ext_mbtowc): Change type of wc to 'unsigned int'. Change values
8+
for 0xA6D9..0xA6DF, 0xA6EC..0xA6ED, 0xA6F3, 0xA8BC.
9+
(gb18030ext_page9f, gb18030ext_pagefe): New constant arrays.
10+
(gb18030ext_wctomb): Change values for U+1E3F, U+9FB4..U+9FBB,
11+
U+FE10..U+FE19, U+20087, U+20089, U+200CC, U+215D7, U+2298F, U+241FE.
12+
* tests/GB18030-BMP.TXT: Change values for 0xA6D9..0xA6DF,
13+
0xA6EC..0xA6ED, 0xA6F3, 0xA8BC, 0xFE51..0xFE53, 0xFE59, 0xFE61, 0xFE66,
14+
0xFE67, 0xFE6C, 0xFE6D, 0xFE76, 0xFE7E, 0xFE90, 0xFE91, 0xFEA0, to map
15+
to now-assigned Unicode codepoints.
16+
* tests/GB18030.IRREVERSIBLE.TXT: New file.
17+
118
2011-08-06 Bruno Haible <bruno@clisp.org>
219

320
Fix conversion bug in CP1258 converter.

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
* The 'iconv' program now produces its output as soon as it can. It no longer
22
unnecessarily waits for more input.
3+
* Updated the GB18030 converter to map 25 characters to code points that have
4+
been to Unicode since 2000, rather than to code points in the Private Use
5+
Area.
36
* Updated the BIG5-HKSCS converter. The old BIG5-HKSCS converter is renamed to
47
BIG5-HKSCS:2004. A new converter BIG5-HKSCS:2008 is added. BIG5-HKSCS is now
58
an alias for BIG5-HKSCS:2008.

lib/gb18030ext.h

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
2+
* Copyright (C) 1999-2001, 2005, 2011 Free Software Foundation, Inc.
33
* This file is part of the GNU LIBICONV Library.
44
*
55
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -27,20 +27,20 @@ static const unsigned short gb18030ext_2uni_pagea9[13] = {
2727
0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6,
2828
0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb,
2929
};
30-
static const unsigned short gb18030ext_2uni_pagefe[96] = {
30+
static const unsigned int gb18030ext_2uni_pagefe[96] = {
3131
/* 0xfe */
32-
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
33-
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
34-
0x2e81, 0xe816, 0xe817, 0xe818, 0x2e84, 0x3473, 0x3447, 0x2e88,
35-
0x2e8b, 0xe81e, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e,
36-
0x3918, 0xe826, 0x39cf, 0x39df, 0x3a73, 0x39d0, 0xe82b, 0xe82c,
37-
0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0xe831, 0xe832, 0x2eaa, 0x4056,
38-
0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0xe83b, 0x43b1,
39-
0x43ac, 0x2ebb, 0x43dd, 0x44d6, 0x4661, 0x464c, 0xe843, 0x4723,
40-
0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982,
41-
0x4983, 0x4985, 0x4986, 0x499f, 0x499b, 0x49b7, 0x49b6, 0xe854,
42-
0xe855, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13,
43-
0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, 0x4d19, 0x4dae, 0xe864,
32+
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
33+
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
34+
0x2e81, 0x20087, 0x20089, 0x200cc, 0x2e84, 0x3473, 0x3447, 0x2e88,
35+
0x2e8b, 0x9fb4, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e,
36+
0x3918, 0x9fb5, 0x39cf, 0x39df, 0x3a73, 0x39d0, 0x9fb6, 0x9fb7,
37+
0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0x215d7, 0x9fb8, 0x2eaa, 0x4056,
38+
0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0x2298f, 0x43b1,
39+
0x43ac, 0x2ebb, 0x43dd, 0x44d6, 0x4661, 0x464c, 0x9fb9, 0x4723,
40+
0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982,
41+
0x4983, 0x4985, 0x4986, 0x499f, 0x499b, 0x49b7, 0x49b6, 0x9fba,
42+
0x241fe, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13,
43+
0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, 0x4d19, 0x4dae, 0x9fbb,
4444
};
4545

4646
static int
@@ -52,7 +52,7 @@ gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
5252
unsigned char c2 = s[1];
5353
if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0x80 && c2 < 0xff)) {
5454
unsigned int i = 190 * (c1 - 0x81) + (c2 - (c2 >= 0x80 ? 0x41 : 0x40));
55-
unsigned short wc = 0xfffd;
55+
unsigned int wc = 0xfffd;
5656
switch (c1) {
5757
case 0xa2:
5858
if (i >= 6376 && i <= 6381) /* 0xA2AB..0xA2B0 */
@@ -77,12 +77,14 @@ gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
7777
case 0xa6:
7878
if (i >= 7150 && i <= 7157) /* 0xA6B9..0xA6C0 */
7979
wc = 0xe785 + (i - 7150);
80+
else if (i >= 7183 && i <= 7184) /* 0xA6DA..0xA6DB */
81+
wc = 0xfe12 - (i - 7183);
8082
else if (i >= 7182 && i <= 7190) /* 0xA6D9..0xA6DF */
81-
wc = 0xe78d + (i - 7182);
83+
wc = 0xfe10 + (i - 7182);
8284
else if (i >= 7201 && i <= 7202) /* 0xA6EC..0xA6ED */
83-
wc = 0xe794 + (i - 7201);
85+
wc = 0xfe17 + (i - 7201);
8486
else if (i == 7208) /* 0xA6F3 */
85-
wc = 0xe796;
87+
wc = 0xfe19;
8688
else if (i >= 7211 && i <= 7219) /* 0xA6F6..0xA6FE */
8789
wc = 0xe797 + (i - 7211);
8890
break;
@@ -96,7 +98,7 @@ gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
9698
if (i >= 7495 && i <= 7505) /* 0xA896..0xA8A0 */
9799
wc = 0xe7bc + (i - 7495);
98100
else if (i == 7533) /* 0xA8BC */
99-
wc = 0xe7c7;
101+
wc = 0x1e3f;
100102
else if (i == 7536) /* 0xA8BF */
101103
wc = 0x01f9;
102104
else if (i >= 7538 && i <= 7541) /* 0xA8C1..0xA8C4 */
@@ -230,6 +232,14 @@ static const unsigned short gb18030ext_page4d[16] = {
230232
0x0000, 0x0000, 0x0000, 0xfe98, 0xfe99, 0xfe9a, 0xfe9b, 0xfe9c, /*0x10-0x17*/
231233
0xfe9d, 0xfe9e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
232234
};
235+
static const unsigned short gb18030ext_page9f[16] = {
236+
0x0000, 0x0000, 0x0000, 0x0000, 0xfe59, 0xfe61, 0xfe66, 0xfe67, /*0xb0-0xb7*/
237+
0xfe6d, 0xfe7e, 0xfe90, 0xfea0, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/
238+
};
239+
static const unsigned short gb18030ext_pagefe[16] = {
240+
0xa6d9, 0xa6db, 0xa6da, 0xa6dc, 0xa6dd, 0xa6de, 0xa6df, 0xa6ec, /*0x10-0x17*/
241+
0xa6ed, 0xa6f3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
242+
};
233243

234244
static int
235245
gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
@@ -238,6 +248,8 @@ gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
238248
unsigned short c = 0;
239249
if (wc == 0x01f9)
240250
c = 0xa8bf;
251+
else if (wc == 0x1e3f)
252+
c = 0xa8bc;
241253
else if (wc == 0x20ac)
242254
c = 0xa2e3;
243255
else if (wc >= 0x2e80 && wc < 0x2ed0)
@@ -290,6 +302,22 @@ gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
290302
c = gb18030ext_page4d[wc-0x4d10];
291303
else if (wc == 0x4dae)
292304
c = 0xfe9f;
305+
else if (wc >= 0x9fb4 && wc < 0x9fbc)
306+
c = gb18030ext_page9f[wc-0x9fb0];
307+
else if (wc >= 0xfe10 && wc < 0xfe1a)
308+
c = gb18030ext_pagefe[wc-0xfe10];
309+
else if (wc == 0x20087)
310+
c = 0xfe51;
311+
else if (wc == 0x20089)
312+
c = 0xfe52;
313+
else if (wc == 0x200cc)
314+
c = 0xfe53;
315+
else if (wc == 0x215d7)
316+
c = 0xfe6c;
317+
else if (wc == 0x2298f)
318+
c = 0xfe76;
319+
else if (wc == 0x241fe)
320+
c = 0xfe91;
293321
if (c != 0) {
294322
r[0] = (c >> 8); r[1] = (c & 0xff);
295323
return 2;

tests/GB18030-BMP.TXT

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -46728,13 +46728,13 @@
4672846728
0xA6D6 0x03C7
4672946729
0xA6D7 0x03C8
4673046730
0xA6D8 0x03C9
46731-
0xA6D9 0xE78D
46732-
0xA6DA 0xE78E
46733-
0xA6DB 0xE78F
46734-
0xA6DC 0xE790
46735-
0xA6DD 0xE791
46736-
0xA6DE 0xE792
46737-
0xA6DF 0xE793
46731+
0xA6D9 0xFE10
46732+
0xA6DA 0xFE12
46733+
0xA6DB 0xFE11
46734+
0xA6DC 0xFE13
46735+
0xA6DD 0xFE14
46736+
0xA6DE 0xFE15
46737+
0xA6DF 0xFE16
4673846738
0xA6E0 0xFE35
4673946739
0xA6E1 0xFE36
4674046740
0xA6E2 0xFE39
@@ -46747,14 +46747,14 @@
4674746747
0xA6E9 0xFE42
4674846748
0xA6EA 0xFE43
4674946749
0xA6EB 0xFE44
46750-
0xA6EC 0xE794
46751-
0xA6ED 0xE795
46750+
0xA6EC 0xFE17
46751+
0xA6ED 0xFE18
4675246752
0xA6EE 0xFE3B
4675346753
0xA6EF 0xFE3C
4675446754
0xA6F0 0xFE37
4675546755
0xA6F1 0xFE38
4675646756
0xA6F2 0xFE31
46757-
0xA6F3 0xE796
46757+
0xA6F3 0xFE19
4675846758
0xA6F4 0xFE33
4675946759
0xA6F5 0xFE34
4676046760
0xA6F6 0xE797
@@ -47079,7 +47079,7 @@
4707947079
0xA8B9 0x00FC
4708047080
0xA8BA 0x00EA
4708147081
0xA8BB 0x0251
47082-
0xA8BC 0xE7C7
47082+
0xA8BC 0x1E3F
4708347083
0xA8BD 0x0144
4708447084
0xA8BE 0x0148
4708547085
0xA8BF 0x01F9
@@ -63313,35 +63313,35 @@
6331363313
0xFE4E 0xFA28
6331463314
0xFE4F 0xFA29
6331563315
0xFE50 0x2E81
63316-
0xFE51 0xE816
63317-
0xFE52 0xE817
63318-
0xFE53 0xE818
63316+
0xFE51 0x20087
63317+
0xFE52 0x20089
63318+
0xFE53 0x200CC
6331963319
0xFE54 0x2E84
6332063320
0xFE55 0x3473
6332163321
0xFE56 0x3447
6332263322
0xFE57 0x2E88
6332363323
0xFE58 0x2E8B
63324-
0xFE59 0xE81E
63324+
0xFE59 0x9FB4
6332563325
0xFE5A 0x359E
6332663326
0xFE5B 0x361A
6332763327
0xFE5C 0x360E
6332863328
0xFE5D 0x2E8C
6332963329
0xFE5E 0x2E97
6333063330
0xFE5F 0x396E
6333163331
0xFE60 0x3918
63332-
0xFE61 0xE826
63332+
0xFE61 0x9FB5
6333363333
0xFE62 0x39CF
6333463334
0xFE63 0x39DF
6333563335
0xFE64 0x3A73
6333663336
0xFE65 0x39D0
63337-
0xFE66 0xE82B
63338-
0xFE67 0xE82C
63337+
0xFE66 0x9FB6
63338+
0xFE67 0x9FB7
6333963339
0xFE68 0x3B4E
6334063340
0xFE69 0x3C6E
6334163341
0xFE6A 0x3CE0
6334263342
0xFE6B 0x2EA7
63343-
0xFE6C 0xE831
63344-
0xFE6D 0xE832
63343+
0xFE6C 0x215D7
63344+
0xFE6D 0x9FB8
6334563345
0xFE6E 0x2EAA
6334663346
0xFE6F 0x4056
6334763347
0xFE70 0x415F
@@ -63350,15 +63350,15 @@
6335063350
0xFE73 0x2EB3
6335163351
0xFE74 0x2EB6
6335263352
0xFE75 0x2EB7
63353-
0xFE76 0xE83B
63353+
0xFE76 0x2298F
6335463354
0xFE77 0x43B1
6335563355
0xFE78 0x43AC
6335663356
0xFE79 0x2EBB
6335763357
0xFE7A 0x43DD
6335863358
0xFE7B 0x44D6
6335963359
0xFE7C 0x4661
6336063360
0xFE7D 0x464C
63361-
0xFE7E 0xE843
63361+
0xFE7E 0x9FB9
6336263362
0xFE80 0x4723
6336363363
0xFE81 0x4729
6336463364
0xFE82 0x477C
@@ -63375,8 +63375,8 @@
6337563375
0xFE8D 0x499B
6337663376
0xFE8E 0x49B7
6337763377
0xFE8F 0x49B6
63378-
0xFE90 0xE854
63379-
0xFE91 0xE855
63378+
0xFE90 0x9FBA
63379+
0xFE91 0x241FE
6338063380
0xFE92 0x4CA3
6338163381
0xFE93 0x4C9F
6338263382
0xFE94 0x4CA0
@@ -63391,7 +63391,7 @@
6339163391
0xFE9D 0x4D18
6339263392
0xFE9E 0x4D19
6339363393
0xFE9F 0x4DAE
63394-
0xFEA0 0xE864
63394+
0xFEA0 0x9FBB
6339563395
0xFEA1 0xE468
6339663396
0xFEA2 0xE469
6339763397
0xFEA3 0xE46A

tests/GB18030.IRREVERSIBLE.TXT

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
0x8135F437 0x1E3F
2+
0x82359037 0x9FB4
3+
0x82359038 0x9FB5
4+
0x82359039 0x9FB6
5+
0x82359130 0x9FB7
6+
0x82359131 0x9FB8
7+
0x82359132 0x9FB9
8+
0x82359133 0x9FBA
9+
0x82359134 0x9FBB
10+
0x84318236 0xFE10
11+
0x84318237 0xFE11
12+
0x84318238 0xFE12
13+
0x84318239 0xFE13
14+
0x84318330 0xFE14
15+
0x84318331 0xFE15
16+
0x84318332 0xFE16
17+
0x84318333 0xFE17
18+
0x84318334 0xFE18
19+
0x84318335 0xFE19
20+
0x95329031 0x20087
21+
0x95329033 0x20089
22+
0x95329730 0x200CC
23+
0x9536B937 0x215D7
24+
0x9630BA35 0x2298F
25+
0x9635B630 0x241FE
26+
0xA6D9 0xE78D
27+
0xA6DA 0xE78E
28+
0xA6DB 0xE78F
29+
0xA6DC 0xE790
30+
0xA6DD 0xE791
31+
0xA6DE 0xE792
32+
0xA6DF 0xE793
33+
0xA6EC 0xE794
34+
0xA6ED 0xE795
35+
0xA6F3 0xE796
36+
0xA8BC 0xE7C7
37+
0xFE51 0xE816
38+
0xFE52 0xE817
39+
0xFE53 0xE818
40+
0xFE59 0xE81E
41+
0xFE61 0xE826
42+
0xFE66 0xE82B
43+
0xFE67 0xE82C
44+
0xFE6C 0xE831
45+
0xFE6D 0xE832
46+
0xFE76 0xE83B
47+
0xFE7E 0xE843
48+
0xFE90 0xE854
49+
0xFE91 0xE855
50+
0xFEA0 0xE864

0 commit comments

Comments
 (0)