@@ -1049,3 +1049,53 @@ fn roundtrip_percentage_token() {
10491049 }
10501050 }
10511051}
1052+
1053+ #[ test]
1054+ fn utf16_columns ( ) {
1055+ // This particular test serves two purposes. First, it checks
1056+ // that the column number computations are correct. Second, it
1057+ // checks that tokenizer code paths correctly differentiate
1058+ // between the different UTF-8 encoding bytes. In particular
1059+ // different leader bytes and continuation bytes are treated
1060+ // differently, so we make sure to include all lengths in the
1061+ // tests, using the string "QΡ✈🆒". Also, remember that because
1062+ // the column is in units of UTF-16, the 4-byte sequence results
1063+ // in two columns.
1064+ let tests = vec ! [
1065+ ( "" , 0 ) ,
1066+ ( "ascii" , 5 ) ,
1067+ ( "/*QΡ✈🆒*/" , 9 ) ,
1068+ ( "'QΡ✈🆒*'" , 8 ) ,
1069+ ( "\" \\ \" 'QΡ✈🆒*'" , 11 ) ,
1070+ ( "\\ Q\\ Ρ\\ ✈\\ 🆒" , 9 ) ,
1071+ ( "QΡ✈🆒" , 5 ) ,
1072+ ( "QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒" , 14 ) ,
1073+ ( "newline\r \n QΡ✈🆒" , 5 ) ,
1074+ ( "url(QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒)" , 19 ) ,
1075+ ( "url(QΡ✈🆒)" , 10 ) ,
1076+ ( "url(\r \n QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒)" , 15 ) ,
1077+ ( "url(\r \n QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒" , 14 ) ,
1078+ ( "url(\r \n QΡ✈🆒\\ Q\\ Ρ\\ ✈\\ 🆒 x" , 16 ) ,
1079+ ( "QΡ✈🆒()" , 7 ) ,
1080+ // Test that under/over-flow of current_line_start_position is
1081+ // handled properly; see the special case in consume_4byte_intro.
1082+ ( "🆒" , 2 ) ,
1083+ ] ;
1084+
1085+ for test in tests {
1086+ let mut input = ParserInput :: new ( test. 0 ) ;
1087+ let mut parser = Parser :: new ( & mut input) ;
1088+
1089+ // Read all tokens.
1090+ loop {
1091+ match parser. next ( ) {
1092+ Err ( BasicParseError :: EndOfInput ) => { break ; }
1093+ Err ( _) => { assert ! ( false ) ; }
1094+ Ok ( _) => { }
1095+ } ;
1096+ }
1097+
1098+ // Check the resulting column.
1099+ assert_eq ! ( parser. current_source_location( ) . column, test. 1 ) ;
1100+ }
1101+ }
0 commit comments