Skip to content

Commit 2d38ac3

Browse files
committed
Sync from libiconv: a4c1470b9f603056cb74de97c199802f9dfefb36.
Message: ```text Reported by Tomas Kalibera <tomas.kalibera@gmail.com> in <https://lists.gnu.org/archive/html/bug-gnu-libiconv/2024-12/msg00000.html>. * lib/converters.h (struct conv_struct): Add field 'ibyteorder'. * lib/iconv_open2.h: Initialize the ibyteorder field. * lib/ucs2.h (ucs2_mbtowc): Use the ibyteorder field instead of the istate field. * lib/ucs4.h (ucs4_mbtowc): Likewise. * lib/utf16.h (utf16_mbtowc): Likewise. * lib/utf32.h (utf32_mbtowc): Likewise. * tests/test-bom-state.c: New file. * tests/Makefile.in (check): Run test-bom-state. (test-bom-state, test-bom-state.@OBJEXT@): New targets. (clean): Remove test-bom-state. (SOURCE_FILES): Add test-bom-state.c. * NEWS: Mention the change. ```
1 parent 3aa7c01 commit 2d38ac3

File tree

8 files changed

+227
-86
lines changed

8 files changed

+227
-86
lines changed

lib/converters/ucs2.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* @file ucs2.h
33
* @brief UCS-2
4-
* @copyright Copyright (C) 1999-2001, 2008, 2011, 2016 Free Software Foundation, Inc.
4+
* @copyright Copyright (C) 1999-2024 Free Software Foundation, Inc.
55
* @copyright Copyright (C) 2024 The C++ Plus Project.
66
*/
77
/*
@@ -34,7 +34,7 @@
3434
/* The state is 0 if big-endian, 1 if little-endian. */
3535
static int ucs2_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
3636
{
37-
state_t state = conv->istate;
37+
state_t state = conv->ibyteorder;
3838
int count = 0;
3939
for (; n >= 2 && count <= RET_COUNT_MAX && count <= INT_MAX - 2;)
4040
{
@@ -48,20 +48,20 @@ static int ucs2_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t
4848
}
4949
else if (wc >= 0xd800 && wc < 0xe000)
5050
{
51-
conv->istate = state;
51+
conv->ibyteorder = state;
5252
return RET_SHIFT_ILSEQ(count);
5353
}
5454
else
5555
{
5656
*pwc = wc;
57-
conv->istate = state;
57+
conv->ibyteorder = state;
5858
return count + 2;
5959
}
6060
s += 2;
6161
n -= 2;
6262
count += 2;
6363
}
64-
conv->istate = state;
64+
conv->ibyteorder = state;
6565
return RET_TOOFEW(count);
6666
}
6767

lib/converters/ucs4.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* @file ucs4.h
33
* @brief UCS-4
4-
* @copyright Copyright (C) 1999-2001, 2008, 2011, 2016, 2024 Free Software Foundation, Inc.
4+
* @copyright Copyright (C) 1999-2024 Free Software Foundation, Inc.
55
* @copyright Copyright (C) 2024 The C++ Plus Project.
66
*/
77
/*
@@ -34,7 +34,7 @@
3434
/* The state is 0 if big-endian, 1 if little-endian. */
3535
static int ucs4_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
3636
{
37-
state_t state = conv->istate;
37+
state_t state = conv->ibyteorder;
3838
int count = 0;
3939
for (; n >= 4 && count <= RET_COUNT_MAX && count <= INT_MAX - 4;)
4040
{
@@ -50,19 +50,19 @@ static int ucs4_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t
5050
else if (wc <= 0x7fffffff)
5151
{
5252
*pwc = wc;
53-
conv->istate = state;
53+
conv->ibyteorder = state;
5454
return count + 4;
5555
}
5656
else
5757
{
58-
conv->istate = state;
58+
conv->ibyteorder = state;
5959
return RET_SHIFT_ILSEQ(count);
6060
}
6161
s += 4;
6262
n -= 4;
6363
count += 4;
6464
}
65-
conv->istate = state;
65+
conv->ibyteorder = state;
6666
return RET_TOOFEW(count);
6767
}
6868

lib/converters/utf16.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* @file utf16.h
33
* @brief UTF-16
4-
* @copyright Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
4+
* @copyright Copyright (C) 1999-2024 Free Software Foundation, Inc.
55
* @copyright Copyright (C) 2024 The C++ Plus Project.
66
*/
77
/*
@@ -40,7 +40,7 @@
4040
/* The state is 0 if big-endian, 1 if little-endian. */
4141
static int utf16_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
4242
{
43-
state_t state = conv->istate;
43+
state_t state = conv->ibyteorder;
4444
int count = 0;
4545
for (; n >= 2 && count <= RET_COUNT_MAX && count <= INT_MAX - 2;)
4646
{
@@ -60,7 +60,7 @@ static int utf16_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t
6060
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
6161
goto ilseq;
6262
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
63-
conv->istate = state;
63+
conv->ibyteorder = state;
6464
return count + 4;
6565
}
6666
else
@@ -73,18 +73,18 @@ static int utf16_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t
7373
else
7474
{
7575
*pwc = wc;
76-
conv->istate = state;
76+
conv->ibyteorder = state;
7777
return count + 2;
7878
}
7979
s += 2;
8080
n -= 2;
8181
count += 2;
8282
}
83-
conv->istate = state;
83+
conv->ibyteorder = state;
8484
return RET_TOOFEW(count);
8585

8686
ilseq:
87-
conv->istate = state;
87+
conv->ibyteorder = state;
8888
return RET_SHIFT_ILSEQ(count);
8989
}
9090

lib/converters/utf32.h

Lines changed: 68 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* @file utf32.h
33
* @brief UTF-32
4-
* @copyright Copyright (C) 1999-2001, 2008, 2011, 2016, 2024 Free Software Foundation, Inc.
4+
* @copyright Copyright (C) 1999-2024 Free Software Foundation, Inc.
55
* @copyright Copyright (C) 2024 The C++ Plus Project.
66
*/
77
/*
@@ -38,70 +38,81 @@
3838
only for strings containing U+FEFF characters, which is quite rare.)
3939
The default is big-endian. */
4040
/* The state is 0 if big-endian, 1 if little-endian. */
41-
static int
42-
utf32_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
41+
static int utf32_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
4342
{
44-
state_t state = conv->istate;
45-
int count = 0;
46-
for (; n >= 4 && count <= RET_COUNT_MAX && count <= INT_MAX-4;) {
47-
ucs4_t wc = (state
48-
? (ucs4_t) s[0]
49-
+ ((ucs4_t) s[1] << 8)
50-
+ ((ucs4_t) s[2] << 16)
51-
+ ((ucs4_t) s[3] << 24)
52-
: ((ucs4_t) s[0] << 24)
53-
+ ((ucs4_t) s[1] << 16)
54-
+ ((ucs4_t) s[2] << 8)
55-
+ (ucs4_t) s[3]);
56-
if (wc == 0x0000feff) {
57-
} else if (wc == 0xfffe0000u) {
58-
state ^= 1;
59-
} else {
60-
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
61-
*pwc = wc;
62-
conv->istate = state;
63-
return count+4;
64-
} else {
65-
conv->istate = state;
66-
return RET_SHIFT_ILSEQ(count);
67-
}
43+
state_t state = conv->ibyteorder;
44+
int count = 0;
45+
for (; n >= 4 && count <= RET_COUNT_MAX && count <= INT_MAX - 4;)
46+
{
47+
ucs4_t wc = (state ? (ucs4_t)s[0] + ((ucs4_t)s[1] << 8) + ((ucs4_t)s[2] << 16) + ((ucs4_t)s[3] << 24)
48+
: ((ucs4_t)s[0] << 24) + ((ucs4_t)s[1] << 16) + ((ucs4_t)s[2] << 8) + (ucs4_t)s[3]);
49+
if (wc == 0x0000feff)
50+
{
51+
}
52+
else if (wc == 0xfffe0000u)
53+
{
54+
state ^= 1;
55+
}
56+
else
57+
{
58+
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
59+
{
60+
*pwc = wc;
61+
conv->ibyteorder = state;
62+
return count + 4;
63+
}
64+
else
65+
{
66+
conv->ibyteorder = state;
67+
return RET_SHIFT_ILSEQ(count);
68+
}
69+
}
70+
s += 4;
71+
n -= 4;
72+
count += 4;
6873
}
69-
s += 4; n -= 4; count += 4;
70-
}
71-
conv->istate = state;
72-
return RET_TOOFEW(count);
74+
conv->ibyteorder = state;
75+
return RET_TOOFEW(count);
7376
}
7477

7578
/* We output UTF-32 in big-endian order, with byte-order mark. */
7679
/* The state is 0 at the beginning, 1 after the BOM has been written. */
77-
static int
78-
utf32_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
80+
static int utf32_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
7981
{
80-
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
81-
int count = 0;
82-
if (!conv->ostate) {
83-
if (n >= 4) {
84-
r[0] = 0x00;
85-
r[1] = 0x00;
86-
r[2] = 0xFE;
87-
r[3] = 0xFF;
88-
r += 4; n -= 4; count += 4;
89-
} else
90-
return RET_TOOSMALL;
91-
}
92-
if (wc < 0x110000) {
93-
if (n >= 4) {
94-
r[0] = 0;
95-
r[1] = (unsigned char) (wc >> 16);
96-
r[2] = (unsigned char) (wc >> 8);
97-
r[3] = (unsigned char) wc;
98-
conv->ostate = 1;
99-
return count+4;
100-
} else
101-
return RET_TOOSMALL;
82+
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
83+
{
84+
int count = 0;
85+
if (!conv->ostate)
86+
{
87+
if (n >= 4)
88+
{
89+
r[0] = 0x00;
90+
r[1] = 0x00;
91+
r[2] = 0xFE;
92+
r[3] = 0xFF;
93+
r += 4;
94+
n -= 4;
95+
count += 4;
96+
}
97+
else
98+
return RET_TOOSMALL;
99+
}
100+
if (wc < 0x110000)
101+
{
102+
if (n >= 4)
103+
{
104+
r[0] = 0;
105+
r[1] = (unsigned char)(wc >> 16);
106+
r[2] = (unsigned char)(wc >> 8);
107+
r[3] = (unsigned char)wc;
108+
conv->ostate = 1;
109+
return count + 4;
110+
}
111+
else
112+
return RET_TOOSMALL;
113+
}
102114
}
103-
}
104-
return RET_ILUNI;
115+
return RET_ILUNI;
105116
}
106117

107118
#endif /* _UTF32_H_ */

lib/iconv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ _CPPP_API reiconv_t reiconv_open_from_index(int fromcode, int tocode, bool disca
108108
// Initialize the states.
109109
cd->istate = 0;
110110
cd->ostate = 0;
111+
cd->ibyteorder = 0;
111112

112113
// Initialize the operation flags.
113114
cd->discard_ilseq = discard_ilseq;

lib/reiconv_defines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct conv_struct
130130
/* Input (conversion multibyte -> unicode) */
131131
int iindex;
132132
struct mbtowc_funcs ifuncs;
133+
state_t ibyteorder;
133134
state_t istate;
134135
/* Output (conversion unicode -> multibyte) */
135136
int oindex;

0 commit comments

Comments
 (0)