Skip to content

Commit 68ac8a9

Browse files
committed
New EBCDIC encodings.
Reported by Ulrich Schwab and Calvin Buckley via Jack Woehr. * NOTES: Mention how to enable EBCDIC encodings. * tests/IBM-*.TXT: New files. * tools/8bit_tab_to_h.c (main): Emit copyright header with year 2022. * tools/Makefile: Add rules for generating ebcdic*.h. * lib/ebcdic*.h: New files, automatically generated by tools/Makefile. * lib/ebcdic838.h: Tweak reverse mapping manually. * lib/ebcdic1160.h: Likewise. * lib/converters.h: Include all ebcdic*.h. * lib/encodings_zos.def: New file. * lib/genaliases2.c: Include encodings_zos.def. * lib/genflags.c: Likewise. * Makefile.devel (lib/aliases_zos.h lib/canonical_zos.h): New rule. (lib/flags.h, totally-clean): Update. * lib/aliases2.h: Include aliases_zos.h. * lib/iconv.c (USE_ZOS): New macro. Include encodings_zos.def, canonical_zos.h. * README, man/iconv_open.3: Document the IBM-* encodings. * tests/Makefile.in (check-extra-yes): Also test the EBCDIC encodings.
1 parent 5897684 commit 68ac8a9

File tree

123 files changed

+21889
-11
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+21889
-11
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@
288288
/lib/aliases_osf1.h
289289
/lib/aliases_osf1_sysosf1.h
290290
/lib/aliases_dos.h
291+
/lib/aliases_zos.h
291292
/lib/aliases_extra.h
292293
/lib/canonical.h
293294
/lib/canonical_sysaix.h
@@ -299,6 +300,7 @@
299300
/lib/canonical_osf1.h
300301
/lib/canonical_osf1_sysosf1.h
301302
/lib/canonical_dos.h
303+
/lib/canonical_zos.h
302304
/lib/canonical_extra.h
303305
/lib/canonical_local.h
304306
/lib/canonical_local_sysaix.h

ChangeLog

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,26 @@
1+
2022-01-23 Bruno Haible <bruno@clisp.org>
2+
3+
New EBCDIC encodings.
4+
Reported by Ulrich Schwab and Calvin Buckley via Jack Woehr.
5+
* NOTES: Mention how to enable EBCDIC encodings.
6+
* tests/IBM-*.TXT: New files.
7+
* tools/8bit_tab_to_h.c (main): Emit copyright header with year 2022.
8+
* tools/Makefile: Add rules for generating ebcdic*.h.
9+
* lib/ebcdic*.h: New files, automatically generated by tools/Makefile.
10+
* lib/ebcdic838.h: Tweak reverse mapping manually.
11+
* lib/ebcdic1160.h: Likewise.
12+
* lib/converters.h: Include all ebcdic*.h.
13+
* lib/encodings_zos.def: New file.
14+
* lib/genaliases2.c: Include encodings_zos.def.
15+
* lib/genflags.c: Likewise.
16+
* Makefile.devel (lib/aliases_zos.h lib/canonical_zos.h): New rule.
17+
(lib/flags.h, totally-clean): Update.
18+
* lib/aliases2.h: Include aliases_zos.h.
19+
* lib/iconv.c (USE_ZOS): New macro.
20+
Include encodings_zos.def, canonical_zos.h.
21+
* README, man/iconv_open.3: Document the IBM-* encodings.
22+
* tests/Makefile.in (check-extra-yes): Also test the EBCDIC encodings.
23+
124
2022-01-04 Bruno Haible <bruno@clisp.org>
225

326
Update after gnulib changed.

Makefile.devel

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ all : srclib/Makefile.gnulib srclib/Makefile.in \
2323
lib/aliases_aix.h lib/aliases_aix_sysaix.h \
2424
lib/aliases_osf1.h lib/aliases_osf1_sysosf1.h \
2525
lib/aliases_dos.h \
26+
lib/aliases_zos.h \
2627
lib/aliases_extra.h \
2728
lib/flags.h lib/translit.h \
2829
man/iconv.1.html man/iconv.3.html man/iconv_close.3.html man/iconv_open.3.html man/iconvctl.3.html man/iconv_open_into.3.html \
@@ -190,13 +191,18 @@ lib/aliases_dos.h lib/canonical_dos.h : lib/encodings_dos.def lib/genaliases2.c
190191
./genaliases dos lib/aliases_dos.h lib/canonical_dos.h
191192
$(RM) genaliases
192193

194+
lib/aliases_zos.h lib/canonical_zos.h : lib/encodings_zos.def lib/genaliases2.c
195+
$(CC) $(CFLAGS) -DUSE_ZOS lib/genaliases2.c -o genaliases
196+
./genaliases zos lib/aliases_zos.h lib/canonical_zos.h
197+
$(RM) genaliases
198+
193199
lib/aliases_extra.h lib/canonical_extra.h : lib/encodings_extra.def lib/genaliases2.c
194200
$(CC) $(CFLAGS) -DUSE_EXTRA lib/genaliases2.c -o genaliases
195201
./genaliases extra lib/aliases_extra.h lib/canonical_extra.h
196202
$(RM) genaliases
197203

198204

199-
lib/flags.h : lib/encodings.def lib/encodings_aix.def lib/encodings_osf1.def lib/encodings_dos.def lib/encodings_extra.def lib/converters.h lib/genflags.c
205+
lib/flags.h : lib/encodings.def lib/encodings_aix.def lib/encodings_osf1.def lib/encodings_dos.def lib/encodings_zos.def lib/encodings_extra.def lib/converters.h lib/genflags.c
200206
$(CC) $(CFLAGS) lib/genflags.c -o genflags
201207
./genflags > lib/flags.h
202208
$(RM) genflags
@@ -231,11 +237,13 @@ totally-clean : force
231237
rm -f lib/aliases_aix.h lib/aliases_aix_sysaix.h
232238
rm -f lib/aliases_osf1.h lib/aliases_osf1_sysosf1.h
233239
rm -f lib/aliases_dos.h
240+
rm -f lib/aliases_zos.h
234241
rm -f lib/aliases_extra.h
235242
rm -f lib/canonical.h lib/canonical_sysaix.h lib/canonical_syshpux.h lib/canonical_sysosf1.h lib/canonical_syssolaris.h
236243
rm -f lib/canonical_aix.h lib/canonical_aix_sysaix.h
237244
rm -f lib/canonical_osf1.h lib/canonical_osf1_sysosf1.h
238245
rm -f lib/canonical_dos.h
246+
rm -f lib/canonical_zos.h
239247
rm -f lib/canonical_extra.h
240248
rm -f lib/canonical_local.h lib/canonical_local_sysaix.h lib/canonical_local_syshpux.h lib/canonical_local_sysosf1.h lib/canonical_local_syssolaris.h
241249
rm -f lib/flags.h

NOTES

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,9 @@ A: No, they are not in use any more. Supporting ISO-646 variants is pointless
328328
since ISO-8859-* have been adopted.
329329

330330
Q: Support EBCDIC ?
331-
A: No!
331+
A: Available through --enable-extra-encodings.
332+
Why? Because several people (Ulrich Schwab, Calvin Buckley) have shown
333+
interest in these encodings, by preparing forks of GNU libiconv.
332334

333335
Q: How do I add a new character set?
334336
A: 1. Explain the "why" in this file, above.

README

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,24 @@ support for a few extra encodings:
7272
TDS565
7373
Platform specifics
7474
ATARIST, RISCOS-LATIN1
75+
EBCDIC compatible (not ASCII compatible, very rarely used)
76+
European languages
77+
IBM-{037,273,277,278,280,282,284,285,297,423,500,870,871,875,880},
78+
IBM-{905,924,1025,1026,1047,1112,1122,1123,1140,1141,1142,1143},
79+
IBM-{1144,1145,1146,1147,1148,1149,1153,1154,1155,1156,1157,1158},
80+
IBM-{1165,1166,4971}
81+
Semitic languages
82+
IBM-{424,425,12712,16804}
83+
Persian
84+
IBM-1097
85+
Thai
86+
IBM-{838,1160}
87+
Laotian
88+
IBM-1132
89+
Vietnamese
90+
IBM-{1130,1164}
91+
Indic languages
92+
IBM-1137
7593

7694
It can convert from any of these encodings to any other, through Unicode
7795
conversion.

lib/aliases2.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 1999-2003, 2008 Free Software Foundation, Inc.
2+
* Copyright (C) 1999-2003, 2008, 2022 Free Software Foundation, Inc.
33
* This file is part of the GNU LIBICONV Library.
44
*
55
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -34,6 +34,9 @@
3434
#ifdef USE_DOS
3535
# include "aliases_dos.h"
3636
#endif
37+
#ifdef USE_ZOS
38+
# include "aliases_zos.h"
39+
#endif
3740
#ifdef USE_EXTRA
3841
# include "aliases_extra.h"
3942
#endif

lib/converters.h

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 1999-2002, 2004-2011, 2016 Free Software Foundation, Inc.
2+
* Copyright (C) 1999-2002, 2004-2011, 2016, 2022 Free Software Foundation, Inc.
33
* This file is part of the GNU LIBICONV Library.
44
*
55
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -290,6 +290,62 @@ typedef struct {
290290
#include "cp1125.h"
291291
#endif
292292

293+
#ifdef USE_ZOS
294+
#include "ebcdic037.h"
295+
#include "ebcdic273.h"
296+
#include "ebcdic277.h"
297+
#include "ebcdic278.h"
298+
#include "ebcdic280.h"
299+
#include "ebcdic282.h"
300+
#include "ebcdic284.h"
301+
#include "ebcdic285.h"
302+
#include "ebcdic297.h"
303+
#include "ebcdic423.h"
304+
#include "ebcdic424.h"
305+
#include "ebcdic425.h"
306+
#include "ebcdic500.h"
307+
#include "ebcdic838.h"
308+
#include "ebcdic870.h"
309+
#include "ebcdic871.h"
310+
#include "ebcdic875.h"
311+
#include "ebcdic880.h"
312+
#include "ebcdic905.h"
313+
#include "ebcdic924.h"
314+
#include "ebcdic1025.h"
315+
#include "ebcdic1026.h"
316+
#include "ebcdic1047.h"
317+
#include "ebcdic1097.h"
318+
#include "ebcdic1112.h"
319+
#include "ebcdic1122.h"
320+
#include "ebcdic1123.h"
321+
#include "ebcdic1130.h"
322+
#include "ebcdic1132.h"
323+
#include "ebcdic1137.h"
324+
#include "ebcdic1140.h"
325+
#include "ebcdic1141.h"
326+
#include "ebcdic1142.h"
327+
#include "ebcdic1143.h"
328+
#include "ebcdic1144.h"
329+
#include "ebcdic1145.h"
330+
#include "ebcdic1146.h"
331+
#include "ebcdic1147.h"
332+
#include "ebcdic1148.h"
333+
#include "ebcdic1149.h"
334+
#include "ebcdic1153.h"
335+
#include "ebcdic1154.h"
336+
#include "ebcdic1155.h"
337+
#include "ebcdic1156.h"
338+
#include "ebcdic1157.h"
339+
#include "ebcdic1158.h"
340+
#include "ebcdic1160.h"
341+
#include "ebcdic1164.h"
342+
#include "ebcdic1165.h"
343+
#include "ebcdic1166.h"
344+
#include "ebcdic4971.h"
345+
#include "ebcdic12712.h"
346+
#include "ebcdic16804.h"
347+
#endif
348+
293349
#ifdef USE_EXTRA
294350
#include "euc_jisx0213.h"
295351
#include "shift_jisx0213.h"

lib/ebcdic037.h

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright (C) 1999-2022 Free Software Foundation, Inc.
3+
* This file is part of the GNU LIBICONV Library.
4+
*
5+
* The GNU LIBICONV Library is free software; you can redistribute it
6+
* and/or modify it under the terms of the GNU Lesser General Public
7+
* License as published by the Free Software Foundation; either version 2
8+
* of the License, or (at your option) any later version.
9+
*
10+
* The GNU LIBICONV Library is distributed in the hope that it will be
11+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13+
* Lesser General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Lesser General Public
16+
* License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17+
* If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
/*
21+
* IBM-037
22+
*/
23+
24+
static const unsigned short ebcdic037_2uni[256] = {
25+
/* 0x00 */
26+
0x0000, 0x0001, 0x0002, 0x0003, 0x009c, 0x0009, 0x0086, 0x007f,
27+
0x0097, 0x008d, 0x008e, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
28+
/* 0x10 */
29+
0x0010, 0x0011, 0x0012, 0x0013, 0x009d, 0x0085, 0x0008, 0x0087,
30+
0x0018, 0x0019, 0x0092, 0x008f, 0x001c, 0x001d, 0x001e, 0x001f,
31+
/* 0x20 */
32+
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000a, 0x0017, 0x001b,
33+
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x0005, 0x0006, 0x0007,
34+
/* 0x30 */
35+
0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004,
36+
0x0098, 0x0099, 0x009a, 0x009b, 0x0014, 0x0015, 0x009e, 0x001a,
37+
/* 0x40 */
38+
0x0020, 0x00a0, 0x00e2, 0x00e4, 0x00e0, 0x00e1, 0x00e3, 0x00e5,
39+
0x00e7, 0x00f1, 0x00a2, 0x002e, 0x003c, 0x0028, 0x002b, 0x007c,
40+
/* 0x50 */
41+
0x0026, 0x00e9, 0x00ea, 0x00eb, 0x00e8, 0x00ed, 0x00ee, 0x00ef,
42+
0x00ec, 0x00df, 0x0021, 0x0024, 0x002a, 0x0029, 0x003b, 0x00ac,
43+
/* 0x60 */
44+
0x002d, 0x002f, 0x00c2, 0x00c4, 0x00c0, 0x00c1, 0x00c3, 0x00c5,
45+
0x00c7, 0x00d1, 0x00a6, 0x002c, 0x0025, 0x005f, 0x003e, 0x003f,
46+
/* 0x70 */
47+
0x00f8, 0x00c9, 0x00ca, 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf,
48+
0x00cc, 0x0060, 0x003a, 0x0023, 0x0040, 0x0027, 0x003d, 0x0022,
49+
/* 0x80 */
50+
0x00d8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
51+
0x0068, 0x0069, 0x00ab, 0x00bb, 0x00f0, 0x00fd, 0x00fe, 0x00b1,
52+
/* 0x90 */
53+
0x00b0, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
54+
0x0071, 0x0072, 0x00aa, 0x00ba, 0x00e6, 0x00b8, 0x00c6, 0x00a4,
55+
/* 0xa0 */
56+
0x00b5, 0x007e, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078,
57+
0x0079, 0x007a, 0x00a1, 0x00bf, 0x00d0, 0x00dd, 0x00de, 0x00ae,
58+
/* 0xb0 */
59+
0x005e, 0x00a3, 0x00a5, 0x00b7, 0x00a9, 0x00a7, 0x00b6, 0x00bc,
60+
0x00bd, 0x00be, 0x005b, 0x005d, 0x00af, 0x00a8, 0x00b4, 0x00d7,
61+
/* 0xc0 */
62+
0x007b, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
63+
0x0048, 0x0049, 0x00ad, 0x00f4, 0x00f6, 0x00f2, 0x00f3, 0x00f5,
64+
/* 0xd0 */
65+
0x007d, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
66+
0x0051, 0x0052, 0x00b9, 0x00fb, 0x00fc, 0x00f9, 0x00fa, 0x00ff,
67+
/* 0xe0 */
68+
0x005c, 0x00f7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058,
69+
0x0059, 0x005a, 0x00b2, 0x00d4, 0x00d6, 0x00d2, 0x00d3, 0x00d5,
70+
/* 0xf0 */
71+
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
72+
0x0038, 0x0039, 0x00b3, 0x00db, 0x00dc, 0x00d9, 0x00da, 0x009f,
73+
};
74+
75+
static int
76+
ebcdic037_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
77+
{
78+
unsigned char c = *s;
79+
*pwc = (ucs4_t) ebcdic037_2uni[c];
80+
return 1;
81+
}
82+
83+
static const unsigned char ebcdic037_page00[256] = {
84+
0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, /* 0x00-0x07 */
85+
0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
86+
0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, /* 0x10-0x17 */
87+
0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
88+
0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, /* 0x20-0x27 */
89+
0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, /* 0x28-0x2f */
90+
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0x30-0x37 */
91+
0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, /* 0x38-0x3f */
92+
0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0x40-0x47 */
93+
0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, /* 0x48-0x4f */
94+
0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, /* 0x50-0x57 */
95+
0xe7, 0xe8, 0xe9, 0xba, 0xe0, 0xbb, 0xb0, 0x6d, /* 0x58-0x5f */
96+
0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x60-0x67 */
97+
0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, /* 0x68-0x6f */
98+
0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, /* 0x70-0x77 */
99+
0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07, /* 0x78-0x7f */
100+
0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, /* 0x80-0x87 */
101+
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b, /* 0x88-0x8f */
102+
0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, /* 0x90-0x97 */
103+
0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xff, /* 0x98-0x9f */
104+
0x41, 0xaa, 0x4a, 0xb1, 0x9f, 0xb2, 0x6a, 0xb5, /* 0xa0-0xa7 */
105+
0xbd, 0xb4, 0x9a, 0x8a, 0x5f, 0xca, 0xaf, 0xbc, /* 0xa8-0xaf */
106+
0x90, 0x8f, 0xea, 0xfa, 0xbe, 0xa0, 0xb6, 0xb3, /* 0xb0-0xb7 */
107+
0x9d, 0xda, 0x9b, 0x8b, 0xb7, 0xb8, 0xb9, 0xab, /* 0xb8-0xbf */
108+
0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9e, 0x68, /* 0xc0-0xc7 */
109+
0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, /* 0xc8-0xcf */
110+
0xac, 0x69, 0xed, 0xee, 0xeb, 0xef, 0xec, 0xbf, /* 0xd0-0xd7 */
111+
0x80, 0xfd, 0xfe, 0xfb, 0xfc, 0xad, 0xae, 0x59, /* 0xd8-0xdf */
112+
0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9c, 0x48, /* 0xe0-0xe7 */
113+
0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, /* 0xe8-0xef */
114+
0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, /* 0xf0-0xf7 */
115+
0x70, 0xdd, 0xde, 0xdb, 0xdc, 0x8d, 0x8e, 0xdf, /* 0xf8-0xff */
116+
};
117+
118+
static int
119+
ebcdic037_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
120+
{
121+
unsigned char c = 0;
122+
if (wc < 0x0100)
123+
c = ebcdic037_page00[wc];
124+
if (c != 0 || wc == 0) {
125+
*r = c;
126+
return 1;
127+
}
128+
return RET_ILUNI;
129+
}

0 commit comments

Comments
 (0)