1010
1111use ast;
1212use parse:: { ParseSess , PResult , filemap_to_tts} ;
13- use parse:: new_parser_from_source_str;
13+ use parse:: { lexer , new_parser_from_source_str} ;
1414use parse:: parser:: Parser ;
1515use parse:: token;
1616use ptr:: P ;
17- use str :: char_at ;
17+ use std :: iter :: Peekable ;
1818
1919/// Map a string to tts, using a made-up filename:
2020pub fn string_to_tts ( source_str : String ) -> Vec < ast:: TokenTree > {
@@ -87,69 +87,62 @@ pub fn strs_to_idents(ids: Vec<&str> ) -> Vec<ast::Ident> {
8787
8888/// Does the given string match the pattern? whitespace in the first string
8989/// may be deleted or replaced with other whitespace to match the pattern.
90- /// this function is Unicode-ignorant; fortunately, the careful design of
91- /// UTF-8 mitigates this ignorance. In particular, this function only collapses
92- /// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate Unicode
93- /// chars. Unsurprisingly, it doesn't do NKF-normalization(?).
90+ /// This function is relatively Unicode-ignorant; fortunately, the careful design
91+ /// of UTF-8 mitigates this ignorance. It doesn't do NKF-normalization(?).
9492pub fn matches_codepattern ( a : & str , b : & str ) -> bool {
95- let mut idx_a = 0 ;
96- let mut idx_b = 0 ;
93+ let mut a_iter = a. chars ( ) . peekable ( ) ;
94+ let mut b_iter = b. chars ( ) . peekable ( ) ;
95+
9796 loop {
98- if idx_a == a. len ( ) && idx_b == b. len ( ) {
99- return true ;
100- }
101- else if idx_a == a. len ( ) { return false ; }
102- else if idx_b == b. len ( ) {
103- // maybe the stuff left in a is all ws?
104- if is_whitespace ( char_at ( a, idx_a) ) {
105- return scan_for_non_ws_or_end ( a, idx_a) == a. len ( ) ;
106- } else {
107- return false ;
97+ let ( a, b) = match ( a_iter. peek ( ) , b_iter. peek ( ) ) {
98+ ( None , None ) => return true ,
99+ ( None , _) => return false ,
100+ ( Some ( & a) , None ) => {
101+ if is_pattern_whitespace ( a) {
102+ break // trailing whitespace check is out of loop for borrowck
103+ } else {
104+ return false
105+ }
108106 }
109- }
110- // ws in both given and pattern:
111- else if is_whitespace ( char_at ( a, idx_a) )
112- && is_whitespace ( char_at ( b, idx_b) ) {
113- idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
114- idx_b = scan_for_non_ws_or_end ( b, idx_b) ;
115- }
116- // ws in given only:
117- else if is_whitespace ( char_at ( a, idx_a) ) {
118- idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
119- }
120- // *don't* silently eat ws in expected only.
121- else if char_at ( a, idx_a) == char_at ( b, idx_b) {
122- idx_a += 1 ;
123- idx_b += 1 ;
124- }
125- else {
126- return false ;
107+ ( Some ( & a) , Some ( & b) ) => ( a, b)
108+ } ;
109+
110+ if is_pattern_whitespace ( a) && is_pattern_whitespace ( b) {
111+ // skip whitespace for a and b
112+ scan_for_non_ws_or_end ( & mut a_iter) ;
113+ scan_for_non_ws_or_end ( & mut b_iter) ;
114+ } else if is_pattern_whitespace ( a) {
115+ // skip whitespace for a
116+ scan_for_non_ws_or_end ( & mut a_iter) ;
117+ } else if a == b {
118+ a_iter. next ( ) ;
119+ b_iter. next ( ) ;
120+ } else {
121+ return false
127122 }
128123 }
124+
125+ // check if a has *only* trailing whitespace
126+ a_iter. all ( is_pattern_whitespace)
129127}
130128
131- /// Given a string and an index, return the first usize >= idx
132- /// that is a non-ws-char or is outside of the legal range of
133- /// the string.
134- fn scan_for_non_ws_or_end ( a : & str , idx : usize ) -> usize {
135- let mut i = idx;
136- let len = a. len ( ) ;
137- while ( i < len) && ( is_whitespace ( char_at ( a, i) ) ) {
138- i += 1 ;
129+ /// Advances the given peekable `Iterator` until it reaches a non-whitespace character
130+ fn scan_for_non_ws_or_end < I : Iterator < Item = char > > ( iter : & mut Peekable < I > ) {
131+ while lexer:: is_pattern_whitespace ( iter. peek ( ) . cloned ( ) ) {
132+ iter. next ( ) ;
139133 }
140- i
141134}
142135
143- /// Copied from lexer.
144- pub fn is_whitespace ( c : char ) -> bool {
145- return c == ' ' || c == '\t' || c == '\r' || c == '\n' ;
136+ pub fn is_pattern_whitespace ( c : char ) -> bool {
137+ lexer:: is_pattern_whitespace ( Some ( c) )
146138}
147139
148140#[ cfg( test) ]
149141mod tests {
150142 use super :: * ;
151143
152- #[ test] fn eqmodws ( ) {
144+ #[ test]
145+ fn eqmodws ( ) {
153146 assert_eq ! ( matches_codepattern( "" , "" ) , true ) ;
154147 assert_eq ! ( matches_codepattern( "" , "a" ) , false ) ;
155148 assert_eq ! ( matches_codepattern( "a" , "" ) , false ) ;
@@ -160,5 +153,22 @@ mod tests {
160153 assert_eq ! ( matches_codepattern( "a b" , "a b" ) , true ) ;
161154 assert_eq ! ( matches_codepattern( "ab" , "a b" ) , false ) ;
162155 assert_eq ! ( matches_codepattern( "a b" , "ab" ) , true ) ;
156+ assert_eq ! ( matches_codepattern( " a b" , "ab" ) , true ) ;
157+ }
158+
159+ #[ test]
160+ fn pattern_whitespace ( ) {
161+ assert_eq ! ( matches_codepattern( "" , "\x0C " ) , false ) ;
162+ assert_eq ! ( matches_codepattern( "a b " , "a \u{0085} \n \t \r b" ) , true ) ;
163+ assert_eq ! ( matches_codepattern( "a b" , "a \u{0085} \n \t \r b " ) , false ) ;
164+ }
165+
166+ #[ test]
167+ fn non_pattern_whitespace ( ) {
168+ // These have the property 'White_Space' but not 'Pattern_White_Space'
169+ assert_eq ! ( matches_codepattern( "a b" , "a\u{2002} b" ) , false ) ;
170+ assert_eq ! ( matches_codepattern( "a b" , "a\u{2002} b" ) , false ) ;
171+ assert_eq ! ( matches_codepattern( "\u{205F} a b" , "ab" ) , false ) ;
172+ assert_eq ! ( matches_codepattern( "a \u{3000} b" , "ab" ) , false ) ;
163173 }
164174}
0 commit comments