@@ -147,8 +147,14 @@ enum PatternToken {
147147 Char(char),
148148 AnyChar,
149149 AnySequence,
150- AnyWithin(~[char]),
151- AnyExcept(~[char])
150+ AnyWithin(~[CharSpecifier]),
151+ AnyExcept(~[CharSpecifier])
152+ }
153+
154+ #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, IterBytes)]
155+ enum CharSpecifier {
156+ SingleChar(char),
157+ CharRange(char, char)
152158}
153159
154160#[deriving(Eq)]
@@ -164,12 +170,15 @@ impl Pattern {
164170 * This function compiles Unix shell style patterns: `?` matches any single character,
165171 * `*` matches any (possibly empty) sequence of characters and `[...]` matches any character
166172 * inside the brackets, unless the first character is `!` in which case it matches any
167- * character except those between the `!` and the `]`.
173+ * character except those between the `!` and the `]`. Character sequences can also specify
174+ * ranges of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any character
175+ * between 0 and 9 inclusive.
168176 *
169177 * The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets (e.g. `[?]`).
170178 * When a `]` occurs immediately following `[` or `[!` then it is interpreted as
171179 * being part of, rather then ending, the character set, so `]` and NOT `]` can be
172- * matched by `[]]` and `[!]]` respectively.
180+ * matched by `[]]` and `[!]]` respectively. The `-` character can be specified inside a
181+ * character sequence pattern by placing it at the start or the end, e.g. `[abc-]`.
173182 *
174183 * When a `[` does not have a closing `]` before the end of the string then the `[` will
175184 * be treated literally.
@@ -199,7 +208,8 @@ impl Pattern {
199208 match chars.slice_from(i + 3).position_elem(&']') {
200209 None => (),
201210 Some(j) => {
202- tokens.push(AnyExcept(chars.slice(i + 2, i + 3 + j).to_owned()));
211+ let cs = parse_char_specifiers(chars.slice(i + 2, i + 3 + j));
212+ tokens.push(AnyExcept(cs));
203213 i += j + 4;
204214 loop;
205215 }
@@ -209,7 +219,8 @@ impl Pattern {
209219 match chars.slice_from(i + 2).position_elem(&']') {
210220 None => (),
211221 Some(j) => {
212- tokens.push(AnyWithin(chars.slice(i + 1, i + 2 + j).to_owned()));
222+ let cs = parse_char_specifiers(chars.slice(i + 1, i + 2 + j));
223+ tokens.push(AnyWithin(cs));
213224 i += j + 3;
214225 loop;
215226 }
@@ -335,15 +346,11 @@ impl Pattern {
335346 AnyChar => {
336347 !require_literal(c)
337348 }
338- AnyWithin(ref chars) => {
339- !require_literal(c) &&
340- chars.iter()
341- .rposition(|&e| chars_eq(e, c, options.case_sensitive)).is_some()
349+ AnyWithin(ref specifiers) => {
350+ !require_literal(c) && in_char_specifiers(*specifiers, c, options)
342351 }
343- AnyExcept(ref chars) => {
344- !require_literal(c) &&
345- chars.iter()
346- .rposition(|&e| chars_eq(e, c, options.case_sensitive)).is_none()
352+ AnyExcept(ref specifiers) => {
353+ !require_literal(c) && !in_char_specifiers(*specifiers, c, options)
347354 }
348355 Char(c2) => {
349356 chars_eq(c, c2, options.case_sensitive)
@@ -370,6 +377,63 @@ impl Pattern {
370377
371378}
372379
380+ fn parse_char_specifiers(s: &[char]) -> ~[CharSpecifier] {
381+ let mut cs = ~[];
382+ let mut i = 0;
383+ while i < s.len() {
384+ if i + 3 <= s.len() && s[i + 1] == '-' {
385+ cs.push(CharRange(s[i], s[i + 2]));
386+ i += 3;
387+ } else {
388+ cs.push(SingleChar(s[i]));
389+ i += 1;
390+ }
391+ }
392+ cs
393+ }
394+
395+ fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool {
396+
397+ for &specifier in specifiers.iter() {
398+ match specifier {
399+ SingleChar(sc) => {
400+ if chars_eq(c, sc, options.case_sensitive) {
401+ return true;
402+ }
403+ }
404+ CharRange(start, end) => {
405+
406+ // FIXME: work with non-ascii chars properly (issue #1347)
407+ if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
408+
409+ let start = start.to_ascii().to_lower();
410+ let end = end.to_ascii().to_lower();
411+
412+ let start_up = start.to_upper();
413+ let end_up = end.to_upper();
414+
415+ // only allow case insensitive matching when
416+ // both start and end are within a-z or A-Z
417+ if start != start_up && end != end_up {
418+ let start = start.to_char();
419+ let end = end.to_char();
420+ let c = c.to_ascii().to_lower().to_char();
421+ if c >= start && c <= end {
422+ return true;
423+ }
424+ }
425+ }
426+
427+ if c >= start && c <= end {
428+ return true;
429+ }
430+ }
431+ }
432+ }
433+
434+ false
435+ }
436+
373437/// A helper function to determine if two chars are (possibly case-insensitively) equal.
374438fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
375439 if cfg!(windows) && path::windows::is_sep(a) && path::windows::is_sep(b) {
@@ -672,6 +736,54 @@ mod test {
672736 glob ( "/*/*/*/*" ) . skip ( 10000 ) . next ( ) ;
673737 }
674738
739+ #[ test]
740+ fn test_range_pattern ( ) {
741+
742+ let pat = Pattern :: new ( "a[0-9]b" ) ;
743+ for i in range ( 0 , 10 ) {
744+ assert ! ( pat. matches( fmt!( "a%db" , i) ) ) ;
745+ }
746+ assert ! ( !pat. matches( "a_b" ) ) ;
747+
748+ let pat = Pattern :: new ( "a[!0-9]b" ) ;
749+ for i in range ( 0 , 10 ) {
750+ assert ! ( !pat. matches( fmt!( "a%db" , i) ) ) ;
751+ }
752+ assert ! ( pat. matches( "a_b" ) ) ;
753+
754+ let pats = [ "[a-z123]" , "[1a-z23]" , "[123a-z]" ] ;
755+ for & p in pats. iter ( ) {
756+ let pat = Pattern :: new ( p) ;
757+ for c in "abcdefghijklmnopqrstuvwxyz" . iter ( ) {
758+ assert ! ( pat. matches( c. to_str( ) ) ) ;
759+ }
760+ for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" . iter ( ) {
761+ let options = MatchOptions { case_sensitive : false , .. MatchOptions :: new ( ) } ;
762+ assert ! ( pat. matches_with( c. to_str( ) , options) ) ;
763+ }
764+ assert ! ( pat. matches( "1" ) ) ;
765+ assert ! ( pat. matches( "2" ) ) ;
766+ assert ! ( pat. matches( "3" ) ) ;
767+ }
768+
769+ let pats = [ "[abc-]" , "[-abc]" , "[a-c-]" ] ;
770+ for & p in pats. iter ( ) {
771+ let pat = Pattern :: new ( p) ;
772+ assert ! ( pat. matches( "a" ) ) ;
773+ assert ! ( pat. matches( "b" ) ) ;
774+ assert ! ( pat. matches( "c" ) ) ;
775+ assert ! ( pat. matches( "-" ) ) ;
776+ assert ! ( !pat. matches( "d" ) ) ;
777+ }
778+
779+ let pat = Pattern :: new ( "[2-1]" ) ;
780+ assert ! ( !pat. matches( "1" ) ) ;
781+ assert ! ( !pat. matches( "2" ) ) ;
782+
783+ assert ! ( Pattern :: new( "[-]" ) . matches( "-" ) ) ;
784+ assert ! ( !Pattern :: new( "[!-]" ) . matches( "-" ) ) ;
785+ }
786+
675787 #[ test]
676788 fn test_unclosed_bracket ( ) {
677789 // unclosed `[` should be treated literally
0 commit comments