@@ -749,4 +749,79 @@ defmodule StringTest do
749749 assert String . myers_difference ( "abc" , "aйbc" ) == [ eq: "a" , ins: "й" , eq: "bc" ]
750750 assert String . myers_difference ( "aйbc" , "abc" ) == [ eq: "a" , del: "й" , eq: "bc" ]
751751 end
752+
753+ test "normalize/2" do
754+ assert String . normalize ( "ŝ" , :nfd ) == "ŝ"
755+ assert String . normalize ( "ḇravô" , :nfd ) == "ḇravô"
756+ assert String . normalize ( "ṩierra" , :nfd ) == "ṩierra"
757+ assert String . normalize ( "뢴" , :nfd ) == "뢴"
758+ assert String . normalize ( "êchǭ" , :nfc ) == "êchǭ"
759+ assert String . normalize ( "거̄" , :nfc ) == "거̄"
760+ assert String . normalize ( "뢴" , :nfc ) == "뢴"
761+
762+ ## Error cases
763+ assert String . normalize ( << 15 , 216 >> , :nfc ) == << 15 , 216 >>
764+ assert String . normalize ( << 15 , 216 >> , :nfd ) == << 15 , 216 >>
765+ assert String . normalize ( << 216 , 15 >> , :nfc ) == << 216 , 15 >>
766+ assert String . normalize ( << 216 , 15 >> , :nfd ) == << 216 , 15 >>
767+
768+ ## Cases from NormalizationTest.txt
769+
770+ # 05B8 05B9 05B1 0591 05C3 05B0 05AC 059F
771+ # 05B1 05B8 05B9 0591 05C3 05B0 05AC 059F
772+ # HEBREW POINT QAMATS, HEBREW POINT HOLAM, HEBREW POINT HATAF SEGOL,
773+ # HEBREW ACCENT ETNAHTA, HEBREW PUNCTUATION SOF PASUQ, HEBREW POINT SHEVA,
774+ # HEBREW ACCENT ILUY, HEBREW ACCENT QARNEY PARA
775+ assert String . normalize ( "ֱָֹ֑׃ְ֬֟" , :nfc ) == "ֱָֹ֑׃ְ֬֟"
776+
777+ # 095D (exclusion list)
778+ # 0922 093C
779+ # DEVANAGARI LETTER RHA
780+ assert String . normalize ( "ढ़" , :nfc ) == "ढ़"
781+
782+ # 0061 0315 0300 05AE 0340 0062
783+ # 00E0 05AE 0300 0315 0062
784+ # LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT,
785+ # HEBREW ACCENT ZINOR, COMBINING GRAVE TONE MARK, LATIN SMALL LETTER B
786+ assert String . normalize ( "à֮̀̕b" , :nfc ) == "à֮̀̕b"
787+
788+ # 0344
789+ # 0308 0301
790+ # COMBINING GREEK DIALYTIKA TONOS
791+ assert String . normalize ( "\u0344 " , :nfc ) == "\u0308 \u0301 "
792+
793+ # 115B9 0334 115AF
794+ # 115B9 0334 115AF
795+ # SIDDHAM VOWEL SIGN AI, COMBINING TILDE OVERLAY, SIDDHAM VOWEL SIGN AA
796+ assert String . normalize ( "𑖹̴𑖯" , :nfc ) == "𑖹̴𑖯"
797+ # HEBREW ACCENT ETNAHTA, HEBREW PUNCTUATION SOF PASUQ, HEBREW POINT SHEVA,
798+ # HEBREW ACCENT ILUY, HEBREW ACCENT QARNEY PARA
799+ assert String . normalize ( "ֱָֹ֑׃ְ֬֟" , :nfc ) == "ֱָֹ֑׃ְ֬֟"
800+
801+ # 095D (exclusion list)
802+ # HEBREW ACCENT ETNAHTA, HEBREW PUNCTUATION SOF PASUQ, HEBREW POINT SHEVA,
803+ # HEBREW ACCENT ILUY, HEBREW ACCENT QARNEY PARA
804+ assert String . normalize ( "ֱָֹ֑׃ְ֬֟" , :nfc ) == "ֱָֹ֑׃ְ֬֟"
805+
806+ # 095D (exclusion list)
807+ # 0922 093C
808+ # DEVANAGARI LETTER RHA
809+ assert String . normalize ( "ढ़" , :nfc ) == "ढ़"
810+
811+ # 0061 0315 0300 05AE 0340 0062
812+ # 00E0 05AE 0300 0315 0062
813+ # LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT,
814+ # HEBREW ACCENT ZINOR, COMBINING GRAVE TONE MARK, LATIN SMALL LETTER B
815+ assert String . normalize ( "à֮̀̕b" , :nfc ) == "à֮̀̕b"
816+
817+ # 0344
818+ # 0308 0301
819+ # COMBINING GREEK DIALYTIKA TONOS
820+ assert String . normalize ( "\u0344 " , :nfc ) == "\u0308 \u0301 "
821+
822+ # 115B9 0334 115AF
823+ # 115B9 0334 115AF
824+ # SIDDHAM VOWEL SIGN AI, COMBINING TILDE OVERLAY, SIDDHAM VOWEL SIGN AA
825+ assert String . normalize ( "𑖹̴𑖯" , :nfc ) == "𑖹̴𑖯"
826+ end
752827end
0 commit comments