Skip to content

Commit ed9ef09

Browse files
committed
New encoding C99.
1 parent 5babd6b commit ed9ef09

File tree

7 files changed

+144
-3
lines changed

7 files changed

+144
-3
lines changed

ChangeLog

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
2002-05-26 Bruno Haible <bruno@clisp.org>
2+
3+
* lib/c99.h: New file.
4+
* lib/converters.h: Include it.
5+
* lib/encodings.def (C99): New encoding.
6+
* README, man/iconv_open.3: Document C99 encoding.
7+
18
2002-05-26 Bruno Haible <bruno@clisp.org>
29

310
* lib/java.h (java_mbtowc): Accept 12-byte sequences for non-BMP

NOTES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,9 @@ A: libiconv, as an internationalization library, supports those character
298298
* UTF-7
299299
We implement this because it is essential functionality for mail
300300
applications.
301+
* C99
302+
We implement it because it's used for C and C++ programs and because
303+
it's a nice encoding for debugging.
301304
* JAVA
302305
We implement it because it's used for Java programs and because it's
303306
a nice encoding for debugging.

README

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ It provides support for the encodings:
4242
UTF-16, UTF-16BE, UTF-16LE
4343
UTF-32, UTF-32BE, UTF-32LE
4444
UTF-7
45-
JAVA
45+
C99, JAVA
4646
Full Unicode, in terms of `uint16_t' or `uint32_t'
4747
(with machine dependent endianness and alignment)
4848
UCS-2-INTERNAL, UCS-4-INTERNAL

lib/c99.h

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* Copyright (C) 1999-2002 Free Software Foundation, Inc.
3+
* This file is part of the GNU LIBICONV Library.
4+
*
5+
* The GNU LIBICONV Library is free software; you can redistribute it
6+
* and/or modify it under the terms of the GNU Library General Public
7+
* License as published by the Free Software Foundation; either version 2
8+
* of the License, or (at your option) any later version.
9+
*
10+
* The GNU LIBICONV Library is distributed in the hope that it will be
11+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13+
* Library General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Library General Public
16+
* License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17+
* If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18+
* Suite 330, Boston, MA 02111-1307, USA.
19+
*/
20+
21+
/*
22+
* C99
23+
* This is ASCII with \uXXXX and \UXXXXXXXX escape sequences, denoting Unicode
24+
* characters. See ISO/IEC 9899:1999, section 6.4.3.
25+
* The treatment of control characters in the range U+0080..U+009F is not
26+
* specified; we pass them through unmodified.
27+
*/
28+
29+
static int
30+
c99_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
31+
{
32+
unsigned char c;
33+
ucs4_t wc;
34+
int i;
35+
36+
c = s[0];
37+
if (c < 0xa0) {
38+
if (c != '\\') {
39+
*pwc = c;
40+
return 1;
41+
}
42+
if (n < 2)
43+
return RET_TOOFEW(0);
44+
c = s[1];
45+
if (c == 'u') {
46+
wc = 0;
47+
for (i = 2; i < 6; i++) {
48+
if (n <= i)
49+
return RET_TOOFEW(0);
50+
c = s[i];
51+
if (c >= '0' && c <= '9')
52+
c -= '0';
53+
else if (c >= 'A' && c <= 'Z')
54+
c -= 'A'-10;
55+
else if (c >= 'a' && c <= 'z')
56+
c -= 'a'-10;
57+
else
58+
goto simply_backslash;
59+
wc |= (ucs4_t) c << (4 * (5-i));
60+
}
61+
if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000))
62+
|| wc == 0x0024 || wc == 0x0040 || wc == 0x0060) {
63+
*pwc = wc;
64+
return 6;
65+
}
66+
} else if (c == 'U') {
67+
wc = 0;
68+
for (i = 2; i < 10; i++) {
69+
if (n <= i)
70+
return RET_TOOFEW(0);
71+
c = s[i];
72+
if (c >= '0' && c <= '9')
73+
c -= '0';
74+
else if (c >= 'A' && c <= 'Z')
75+
c -= 'A'-10;
76+
else if (c >= 'a' && c <= 'z')
77+
c -= 'a'-10;
78+
else
79+
goto simply_backslash;
80+
wc |= (ucs4_t) c << (4 * (9-i));
81+
}
82+
if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000))
83+
|| wc == 0x0024 || wc == 0x0040 || wc == 0x0060) {
84+
*pwc = wc;
85+
return 10;
86+
}
87+
} else
88+
goto simply_backslash;
89+
}
90+
return RET_ILSEQ;
91+
simply_backslash:
92+
*pwc = '\\';
93+
return 1;
94+
}
95+
96+
static int
97+
c99_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
98+
{
99+
if (wc < 0xa0) {
100+
*r = wc;
101+
return 1;
102+
} else {
103+
int result;
104+
unsigned char u;
105+
if (wc < 0x10000) {
106+
result = 6;
107+
u = 'u';
108+
} else {
109+
result = 10;
110+
u = 'U';
111+
}
112+
if (n >= result) {
113+
int count;
114+
r[0] = '\\';
115+
r[1] = u;
116+
r += 2;
117+
for (count = result-3; count >= 0; count--) {
118+
unsigned int i = (wc >> (4*count)) & 0x0f;
119+
*r++ = (i < 10 ? '0'+i : 'a'-10+i);
120+
}
121+
return result;
122+
} else
123+
return RET_TOOSMALL;
124+
}
125+
}

lib/converters.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ struct conv_struct {
125125
#include "ucs2swapped.h"
126126
#include "ucs4internal.h"
127127
#include "ucs4swapped.h"
128+
#include "c99.h"
128129
#include "java.h"
129130

130131
/* 8-bit encodings */

lib/encodings.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ DEFENCODING(( "UCS-4-SWAPPED", /* libiconv */
155155
ucs4swapped,
156156
{ ucs4swapped_mbtowc, NULL }, { ucs4swapped_wctomb, NULL })
157157

158+
DEFENCODING(( "C99",
159+
),
160+
c99,
161+
{ c99_mbtowc, NULL }, { c99_wctomb, NULL })
162+
158163
DEFENCODING(( "JAVA",
159164
),
160165
java,

man/iconv_open.3

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
.\" GNU glibc-2 source code and manual
1010
.\" OpenGroup's Single Unix specification http://www.UNIX-systems.org/online.html
1111
.\"
12-
.TH ICONV_OPEN 3 "May 12, 2002" "GNU" "Linux Programmer's Manual"
12+
.TH ICONV_OPEN 3 "May 26, 2002" "GNU" "Linux Programmer's Manual"
1313
.SH NAME
1414
iconv_open \- allocate descriptor for character set conversion
1515
.SH SYNOPSIS
@@ -114,7 +114,7 @@ UTF-32, UTF-32BE, UTF-32LE
114114
UTF-7
115115
.nf
116116
.fi
117-
JAVA
117+
C99, JAVA
118118
.TP
119119
Full Unicode, in terms of \fBuint16_t\fP or \fBuint32_t\fP
120120
(with machine dependent endianness and alignment)

0 commit comments

Comments
 (0)