Skip to content

Commit abe7a84

Browse files
committed
Add unittest
1 parent d7908aa commit abe7a84

File tree

1 file changed

+71
-2
lines changed

1 file changed

+71
-2
lines changed

std/encoding.d

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ for arbitrary _encoding and decoding of characters, arbitrary transcoding
88
between strings of different type, as well as validation and sanitization.
99
1010
Encodings currently supported are UTF-8, UTF-16, UTF-32, ASCII, ISO-8859-1
11-
(also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250, WINDOWS-1251
11+
(also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250, WINDOWS-1251
1212
and WINDOWS-1252.
1313
1414
$(SCRIPT inhibitQuickIndex = 1;)
@@ -88,7 +88,7 @@ auto e = EncodingScheme.create("utf-8");
8888
8989
This library supplies $(LREF EncodingScheme) subclasses for ASCII,
9090
ISO-8859-1 (also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250,
91-
WINDOWS-1251, WINDOWS-1252, UTF-8, and (on little-endian architectures)
91+
WINDOWS-1251, WINDOWS-1252, UTF-8, and (on little-endian architectures)
9292
UTF-16LE and UTF-32LE; or (on big-endian architectures) UTF-16BE and UTF-32BE.
9393
9494
This library provides a mechanism whereby other modules may add $(LREF
@@ -3301,6 +3301,75 @@ class EncodingSchemeWindows1252 : EncodingScheme
33013301
}
33023302
}
33033303

3304+
@system unittest
3305+
{
3306+
static string[] schemeNames =
3307+
[
3308+
"ASCII",
3309+
"ISO-8859-1",
3310+
"ISO-8859-2",
3311+
"windows-1250",
3312+
"windows-1251",
3313+
"windows-1252"
3314+
];
3315+
3316+
EncodingScheme[] schemes;
3317+
3318+
foreach (name;schemeNames)
3319+
{
3320+
schemes ~= EncodingScheme.create(name);
3321+
}
3322+
3323+
ubyte[1] buffer;
3324+
static dchar[][] valid =
3325+
[
3326+
//Valid ASCII
3327+
['\u0001','\u0020','\u0040','\u0060','\u007F'],
3328+
//Vaild 8859-1
3329+
['\u0001','\u0020','\u0070','\u00DA','\u00FF'],
3330+
//Valid 8859-2
3331+
['\u0020','\u00D7','\u00DF','\u010F','\u02D9'],
3332+
//Valid 1250
3333+
['\u0020','\u20AC','\u201E','\u2021','\u2039'],
3334+
//Valid 1251
3335+
['\u0402','\u00A4','\u0415','\u0439','\u044F'],
3336+
//Valid 1252
3337+
['\u20AC','\u0160','\u2019','\u2122','\u0178'],
3338+
];
3339+
3340+
static const(ubyte)[] invalid = [0xA0,0xFF,0xFF,0x81,0x98,0x81];
3341+
3342+
foreach (i,scheme;schemes)
3343+
{
3344+
assert(scheme.toString() == schemeNames[i],"Error in the name of encoding scheme"~schemeNames[i]);
3345+
assert(!scheme.canEncode('\uFFFD'));
3346+
assert(scheme.encodedLength('A') == 1);
3347+
const(ubyte)[] encodeStr;
3348+
dchar[] decStr;
3349+
foreach (chr;valid[i])
3350+
{
3351+
assert(scheme.encode(chr,buffer) == 1);
3352+
encodeStr ~= buffer;
3353+
const(ubyte)[] buf = buffer;
3354+
decStr ~= scheme.decode(buf);
3355+
}
3356+
3357+
assert(scheme.isValid(encodeStr),"Not correctly encoded UTF => " ~ schemeNames[i]);
3358+
assert(valid[i] == decStr,"Error encode/decode UTF8 <=> " ~ schemeNames[i]);
3359+
3360+
if (schemeNames[i] == "ISO-8859-1" || schemeNames[i] == "ISO-8859-2")
3361+
{
3362+
assert(scheme.safeDecode(invalid) != INVALID_SEQUENCE);
3363+
}
3364+
else
3365+
{
3366+
assert(scheme.safeDecode(invalid) == INVALID_SEQUENCE);
3367+
}
3368+
assert(scheme.replacementSequence() == cast(immutable(ubyte)[])"?");
3369+
}
3370+
assert(invalid.length == 0);
3371+
}
3372+
33043373
/**
33053374
EncodingScheme to handle UTF-8
33063375

0 commit comments

Comments
 (0)