Skip to content

Commit e4d4f72

Browse files
committed
Sync from libiconv: ef2f457f0b409da5a682fc3387d7dbeb55147483.
Message: ```text Support multiple suffixes in iconv_open, like glibc does. * lib/iconv_open1.h: Use a loop when looking for the common suffixes. * tests/test-discard.c: New file. * tests/Makefile.in (check): Run test-discard. (test-discard, test-discard.@OBJEXT@): New targets. (clean): Remove test-discard. (SOURCE_FILES): Add test-discard.c. * NEWS: Mention the change. ```
1 parent e1f8540 commit e4d4f72

File tree

4 files changed

+277
-11
lines changed

4 files changed

+277
-11
lines changed

lib/iconv.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ _CPPP_API reiconv_t reiconv_open(const char *tocode, const char *fromcode)
154154

155155
for (size_t i = 0; i < fromcode_len; i++)
156156
{
157-
if (i < fromcode_len && fromcode_buf[i] == '/')
157+
if (fromcode_buf[i] == '/')
158158
{
159159
fromcode_buf[i] = '\0';
160160
if (i + 7 < fromcode_len && memcmp(fromcode_buf + i + 1, "/IGNORE", 8) == 0)
@@ -169,7 +169,7 @@ _CPPP_API reiconv_t reiconv_open(const char *tocode, const char *fromcode)
169169
if (tocode_buf[i] == '/')
170170
{
171171
tocode_buf[i] = '\0';
172-
if (i + 7 < fromcode_len && memcmp(tocode_buf + i + 1, "/IGNORE", 8) == 0)
172+
if (i + 7 < tocode_len && memcmp(tocode_buf + i + 1, "/IGNORE", 7) == 0)
173173
{
174174
discard_ilseq = true;
175175
}

tests/test-bom-state.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
2-
* @file sort.cpp
2+
* @file test-bom-state.cpp
33
* @brief Checks that iconv does not forget about the byte-order state.
4-
* @author Bruno Haible
4+
* @author Bruno Haible, ChenPi11
55
* @copyright Copyright (C) 2024 Free Software Foundation, Inc.
66
*/
77
/*
@@ -66,7 +66,7 @@ static void test_one_input(const char *fromcode, const char *input, std::size_t
6666
char *outbuf = outbuf1;
6767
std::size_t outbytesleft = sizeof(outbuf1);
6868
std::size_t ret = ::reiconv_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
69-
if (ret != (size_t)(-1) || errno != E2BIG || outbytesleft != 0)
69+
if (ret != (std::size_t)(-1) || errno != E2BIG || outbytesleft != 0)
7070
{
7171
std::abort();
7272
}

tests/test-discard.cpp

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
/**
2+
* @file test-bom-state.cpp
3+
* @brief Checks the behaviour of iconv() with suffix //IGNORE.
4+
* @author Bruno Haible, ChenPi11
5+
* @copyright Copyright (C) 2024 Free Software Foundation, Inc.
6+
*/
7+
/*
8+
* This file is part of the cppp-reiconv Library.
9+
*
10+
* The cppp-reiconv Library is free software; you can redistribute it
11+
* and/or modify it under the terms of the GNU Lesser General Public
12+
* License as published by the Free Software Foundation; either version 3
13+
* of the License, or (at your option) any later version.
14+
*
15+
* The cppp-reiconv Library is distributed in the hope that it will be
16+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18+
* Lesser General Public License for more details.
19+
*
20+
* You should have received a copy of the GNU Lesser General Public
21+
* License along with the cppp-reiconv Library; see the file LICENSE.
22+
* If not, see <https://www.gnu.org/licenses/>.
23+
*/
24+
25+
#include "iconv.h"
26+
27+
#include <cerrno>
28+
#include <cstdio>
29+
#include <errno.h>
30+
#include <stdlib.h>
31+
32+
static const char input1[8] = "3\xd4\xe2\x84\x83\xc3\x9f";
33+
static const char input2[8] = "3\xe2\x84\x83\xd4\xc3\x9f";
34+
35+
static void test_default(::reiconv_t cd)
36+
{
37+
char output[10];
38+
char *inbuf;
39+
std::size_t inbytesleft;
40+
char *outbuf;
41+
std::size_t outbytesleft;
42+
std::size_t ret;
43+
44+
inbuf = (char *)input1;
45+
inbytesleft = sizeof(input1) - 1;
46+
outbuf = output;
47+
outbytesleft = sizeof(output);
48+
ret = ::reiconv_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
49+
if (ret != (size_t)(-1) || errno != EILSEQ || sizeof(input1) - 1 - inbytesleft != 1)
50+
{
51+
std::abort();
52+
}
53+
if (sizeof(output) - outbytesleft != 1 || output[0] != '3')
54+
{
55+
std::abort();
56+
}
57+
58+
inbuf = (char *)input2;
59+
inbytesleft = sizeof(input2) - 1;
60+
outbuf = output;
61+
outbytesleft = sizeof(output);
62+
ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
63+
if (ret != (std::size_t)(-1) || errno != EILSEQ || sizeof(input2) - 1 - inbytesleft != 1)
64+
{
65+
std::abort();
66+
}
67+
if (sizeof(output) - outbytesleft != 1 || output[0] != '3')
68+
{
69+
std::abort();
70+
}
71+
}
72+
73+
static void test_translit(::reiconv_t cd)
74+
{
75+
char output[10];
76+
char *inbuf;
77+
std::size_t inbytesleft;
78+
char *outbuf;
79+
std::size_t outbytesleft;
80+
std::size_t ret;
81+
82+
inbuf = (char *)input1;
83+
inbytesleft = sizeof(input1) - 1;
84+
outbuf = output;
85+
outbytesleft = sizeof(output);
86+
ret = ::reiconv_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
87+
if (ret != (std::size_t)(-1) || errno != EILSEQ || sizeof(input1) - 1 - inbytesleft != 1)
88+
{
89+
std::abort();
90+
}
91+
if (sizeof(output) - outbytesleft != 1 || output[0] != '3')
92+
{
93+
std::abort();
94+
}
95+
96+
inbuf = (char *)input2;
97+
inbytesleft = sizeof(input2) - 1;
98+
outbuf = output;
99+
outbytesleft = sizeof(output);
100+
ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
101+
if (ret != (std::size_t)(-1) || errno != EILSEQ || sizeof(input2) - 1 - inbytesleft != 4)
102+
{
103+
std::abort();
104+
}
105+
if (sizeof(output) - outbytesleft != 3 || output[0] != '3' || output[1] != '\xb0' || output[2] != 'C')
106+
{
107+
std::abort();
108+
}
109+
}
110+
111+
static void test_ignore(::reiconv_t cd)
112+
{
113+
char output[10];
114+
char *inbuf;
115+
std::size_t inbytesleft;
116+
char *outbuf;
117+
std::size_t outbytesleft;
118+
std::size_t ret;
119+
120+
inbuf = (char *)input1;
121+
inbytesleft = sizeof(input1) - 1;
122+
outbuf = output;
123+
outbytesleft = sizeof(output) - 1;
124+
ret = ::reiconv_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
125+
if (ret != 1 || inbytesleft != 0)
126+
{
127+
std::abort();
128+
}
129+
if (sizeof(output) - outbytesleft != 3 || output[0] != '3' || output[1] != '\xdf')
130+
{
131+
std::abort();
132+
}
133+
134+
inbuf = (char *)input2;
135+
inbytesleft = sizeof(input2) - 1;
136+
outbuf = output;
137+
outbytesleft = sizeof(output) - 1;
138+
ret = ::reiconv_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
139+
if (ret != 1 || inbytesleft != 0)
140+
{
141+
std::abort();
142+
}
143+
if (sizeof(output) - outbytesleft != 3 || output[0] != '3' || output[1] != '\xdf')
144+
{
145+
std::abort();
146+
}
147+
}
148+
149+
static void test_ignore_translit(::reiconv_t cd)
150+
{
151+
char output[10];
152+
char *inbuf;
153+
std::size_t inbytesleft;
154+
char *outbuf;
155+
std::size_t outbytesleft;
156+
std::size_t ret;
157+
158+
inbuf = (char *)input1;
159+
inbytesleft = sizeof(input1) - 1;
160+
outbuf = output;
161+
outbytesleft = sizeof(output);
162+
ret = ::reiconv_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
163+
if (ret != 1 || inbytesleft != 0)
164+
{
165+
std::abort();
166+
}
167+
if (sizeof(output) - outbytesleft != 4 || output[0] != '3' || output[1] != '\xb0' || output[2] != 'C' ||
168+
output[3] != '\xdf')
169+
{
170+
std::abort();
171+
}
172+
173+
inbuf = (char *)input2;
174+
inbytesleft = sizeof(input2) - 1;
175+
outbuf = output;
176+
outbytesleft = sizeof(output);
177+
ret = ::reiconv_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
178+
if (ret != 1 || inbytesleft != 0)
179+
{
180+
std::abort();
181+
}
182+
if (sizeof(output) - outbytesleft != 4 || output[0] != '3' || output[1] != '\xb0' || output[2] != 'C' ||
183+
output[3] != '\xdf')
184+
{
185+
std::abort();
186+
}
187+
}
188+
189+
int main()
190+
{
191+
::reiconv_t cd;
192+
193+
#pragma region Default conversion
194+
195+
cd = ::reiconv_open("ISO-8859-1", "UTF-8");
196+
test_default(cd);
197+
::reiconv_handle_close(cd);
198+
199+
cd = ::reiconv_open_from_index(ENCODING_UTF8, ENCODING_ISO8859_1, 0);
200+
test_default(cd);
201+
::reiconv_handle_close(cd);
202+
203+
cd = ::reiconv_open_from_codepage(65001, 28591, 0);
204+
test_default(cd);
205+
::reiconv_handle_close(cd);
206+
207+
#pragma endregion
208+
209+
#pragma region Ignore conversion
210+
211+
cd = ::reiconv_open("ISO-8859-1//IGNORE", "UTF-8");
212+
test_ignore(cd);
213+
::reiconv_handle_close(cd);
214+
215+
cd = ::reiconv_open("ISO-8859-1", "UTF-8//IGNORE");
216+
test_ignore(cd);
217+
::reiconv_handle_close(cd);
218+
219+
cd = ::reiconv_open_from_index(ENCODING_UTF8, ENCODING_ISO8859_1, 1);
220+
test_ignore(cd);
221+
::reiconv_handle_close(cd);
222+
223+
cd = ::reiconv_open_from_codepage(65001, 28591, 1);
224+
test_ignore(cd);
225+
::reiconv_handle_close(cd);
226+
227+
#pragma endregion
228+
229+
#if TEST_TRANSLIT
230+
#pragma region Translit conversion
231+
cd = ::reiconv_open("ISO-8859-1//TRANSLIT", "UTF-8");
232+
test_translit(cd);
233+
::reiconv_handle_close(cd);
234+
235+
cd = ::reiconv_open("ISO-8859-1", "UTF-8//TRANSLIT");
236+
test_translit(cd);
237+
::reiconv_handle_close(cd);
238+
239+
// TODO: Translit support.
240+
241+
#pragma endregion
242+
#endif
243+
244+
#if TEST_TRANSLIT
245+
{
246+
iconv_t cd = iconv_open("ISO-8859-1//IGNORE//TRANSLIT", "UTF-8");
247+
test_ignore_translit(cd);
248+
iconv_close(cd);
249+
}
250+
{
251+
iconv_t cd = iconv_open("ISO-8859-1//TRANSLIT//IGNORE", "UTF-8");
252+
test_ignore_translit(cd);
253+
iconv_close(cd);
254+
}
255+
#endif
256+
257+
return EXIT_SUCCESS;
258+
}

tests/tests.cmake

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,25 @@ include_directories("${output_includedir}")
1212
# Test executables
1313
add_executable(data-generator "${CMAKE_CURRENT_SOURCE_DIR}/tests/data-generator.cpp")
1414
add_executable(test-bom-state "${CMAKE_CURRENT_SOURCE_DIR}/tests/test-bom-state.cpp")
15+
add_executable(test-discard "${CMAKE_CURRENT_SOURCE_DIR}/tests/test-discard.cpp")
1516
add_executable(check-encoding "${CMAKE_CURRENT_SOURCE_DIR}/tests/check-encoding.cpp")
1617
add_executable(check-stateful "${CMAKE_CURRENT_SOURCE_DIR}/tests/check-stateful.cpp")
1718
add_executable(check-stateless "${CMAKE_CURRENT_SOURCE_DIR}/tests/check-stateless.cpp")
1819
add_executable(sort "${CMAKE_CURRENT_SOURCE_DIR}/tests/sort.cpp")
1920

2021
target_link_libraries(test-bom-state libcppp-reiconv.static)
22+
target_link_libraries(test-discard libcppp-reiconv.static)
2123
target_link_libraries(check-encoding libcppp-reiconv.static)
2224
target_link_libraries(check-stateful libcppp-reiconv.static)
2325
target_link_libraries(check-stateless libcppp-reiconv.static)
2426

25-
set_target_properties(data-generator PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}" )
26-
set_target_properties(test-bom-state PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}" )
27-
set_target_properties(check-encoding PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}" )
28-
set_target_properties(check-stateful PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}" )
29-
set_target_properties(check-stateless PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}" )
30-
set_target_properties(sort PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}" )
27+
set_target_properties(data-generator PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}")
28+
set_target_properties(test-bom-state PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}")
29+
set_target_properties(test-discard PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}")
30+
set_target_properties(check-encoding PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}")
31+
set_target_properties(check-stateful PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}")
32+
set_target_properties(check-stateless PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}")
33+
set_target_properties(sort PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_testsdir}")
3134

3235
macro(convert_to_crlf input_file output_file)
3336
file(READ ${input_file} file_content)
@@ -111,6 +114,11 @@ add_test(NAME test-bom-state
111114
COMMAND "$<TARGET_FILE:test-bom-state>"
112115
)
113116

117+
add_test(NAME test-discard
118+
WORKING_DIRECTORY "${output_testsdir}"
119+
COMMAND "$<TARGET_FILE:test-discard>"
120+
)
121+
114122
# General multi-byte encodings.
115123
test("stateless" "UTF-8")
116124
test("stateful" "UTF-16")

0 commit comments

Comments
 (0)