@@ -8,7 +8,7 @@ for arbitrary _encoding and decoding of characters, arbitrary transcoding
88between strings of different type, as well as validation and sanitization.
99
1010Encodings currently supported are UTF-8, UTF-16, UTF-32, ASCII, ISO-8859-1
11- (also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250, WINDOWS-1251
11+ (also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250, WINDOWS-1251
1212and WINDOWS-1252.
1313
1414$(SCRIPT inhibitQuickIndex = 1;)
@@ -88,7 +88,7 @@ auto e = EncodingScheme.create("utf-8");
8888
8989This library supplies $(LREF EncodingScheme) subclasses for ASCII,
9090ISO-8859-1 (also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250,
91- WINDOWS-1251, WINDOWS-1252, UTF-8, and (on little-endian architectures)
91+ WINDOWS-1251, WINDOWS-1252, UTF-8, and (on little-endian architectures)
9292UTF-16LE and UTF-32LE; or (on big-endian architectures) UTF-16BE and UTF-32BE.
9393
9494This library provides a mechanism whereby other modules may add $(LREF
@@ -3301,6 +3301,75 @@ class EncodingSchemeWindows1252 : EncodingScheme
33013301 }
33023302}
33033303
3304+ @system unittest
3305+ {
3306+ static string [] schemeNames =
3307+ [
3308+ " ASCII" ,
3309+ " ISO-8859-1" ,
3310+ " ISO-8859-2" ,
3311+ " windows-1250" ,
3312+ " windows-1251" ,
3313+ " windows-1252"
3314+ ];
3315+
3316+ EncodingScheme[] schemes;
3317+
3318+ foreach (name;schemeNames)
3319+ {
3320+ schemes ~= EncodingScheme.create(name);
3321+ }
3322+
3323+ ubyte [1 ] buffer;
3324+ static dchar [][] valid =
3325+ [
3326+ // Valid ASCII
3327+ [' \u0001 ' ,' \u0020 ' ,' \u0040 ' ,' \u0060 ' ,' \u007F ' ],
3328+ // Vaild 8859-1
3329+ [' \u0001 ' ,' \u0020 ' ,' \u0070 ' ,' \u00DA ' ,' \u00FF ' ],
3330+ // Valid 8859-2
3331+ [' \u0020 ' ,' \u00D7 ' ,' \u00DF ' ,' \u010F ' ,' \u02D9 ' ],
3332+ // Valid 1250
3333+ [' \u0020 ' ,' \u20AC ' ,' \u201E ' ,' \u2021 ' ,' \u2039 ' ],
3334+ // Valid 1251
3335+ [' \u0402 ' ,' \u00A4 ' ,' \u0415 ' ,' \u0439 ' ,' \u044F ' ],
3336+ // Valid 1252
3337+ [' \u20AC ' ,' \u0160 ' ,' \u2019 ' ,' \u2122 ' ,' \u0178 ' ],
3338+ ];
3339+
3340+ static const (ubyte )[] invalid = [0xA0 ,0xFF ,0xFF ,0x81 ,0x98 ,0x81 ];
3341+
3342+ foreach (i,scheme;schemes)
3343+ {
3344+ assert (scheme.toString() == schemeNames[i]," Error in the name of encoding scheme" ~ schemeNames[i]);
3345+ assert (! scheme.canEncode(' \uFFFD ' ));
3346+ assert (scheme.encodedLength(' A' ) == 1 );
3347+ const (ubyte )[] encodeStr;
3348+ dchar [] decStr;
3349+ foreach (chr;valid[i])
3350+ {
3351+ assert (scheme.encode(chr,buffer) == 1 );
3352+ encodeStr ~= buffer;
3353+ const (ubyte )[] buf = buffer;
3354+ decStr ~= scheme.decode(buf);
3355+ }
3356+
3357+ assert (scheme.isValid(encodeStr)," Not correctly encoded UTF => " ~ schemeNames[i]);
3358+ assert (valid[i] == decStr," Error encode/decode UTF8 <=> " ~ schemeNames[i]);
3359+
3360+ if (schemeNames[i] == " ISO-8859-1" || schemeNames[i] == " ISO-8859-2" )
3361+ {
3362+ assert (scheme.safeDecode(invalid) != INVALID_SEQUENCE );
3363+ }
3364+ else
3365+ {
3366+ assert (scheme.safeDecode(invalid) == INVALID_SEQUENCE );
3367+ }
3368+ assert (scheme.replacementSequence() == cast (immutable (ubyte )[])" ?" );
3369+ }
3370+ assert (invalid.length == 0 );
3371+ }
3372+
33043373/**
33053374 EncodingScheme to handle UTF-8
33063375
0 commit comments