@@ -463,7 +463,7 @@ pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) {
463463}
464464
465465fn each_split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
466- allow_trailing_empty : bool ) , it: & fn ( & str ) -> bool ) {
466+ allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
467467 if sep < 128 u as char {
468468 let b = sep as u8 , l = len ( s) ;
469469 let mut done = 0 u;
@@ -513,8 +513,8 @@ pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> b
513513 each_split_inner ( s, sepfn, len ( s) , false , false , it)
514514}
515515
516- pure fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517- allow_empty : bool , allow_trailing_empty : bool ) , it: & fn ( & str ) -> bool ) {
516+ fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517+ allow_empty : bool , allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
518518 let l = len ( s) ;
519519 let mut i = 0 u, start = 0 u, done = 0 u;
520520 while i < l && done < count {
@@ -534,7 +534,7 @@ pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
534534}
535535
536536// See Issue #1932 for why this is a naive search
537- fn iter_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) ) {
537+ fn iter_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) -> bool ) {
538538 let sep_len = len ( sep) , l = len ( s) ;
539539 fail_unless ! ( sep_len > 0 u) ;
540540 let mut i = 0 u, match_start = 0 u, match_i = 0 u;
@@ -545,7 +545,7 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
545545 match_i += 1 u;
546546 // Found a match
547547 if match_i == sep_len {
548- f ( match_start, i + 1 u) ;
548+ if ! f ( match_start, i + 1 u) { return ; }
549549 match_i = 0 u;
550550 }
551551 i += 1 u;
@@ -561,10 +561,10 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
561561 }
562562}
563563
564- fn iter_between_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) ) {
564+ fn iter_between_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) -> bool ) {
565565 let mut last_end = 0 u;
566- do iter_matches ( s, sep) |from, to| {
567- f ( last_end, from) ;
566+ for iter_matches( s, sep) |from, to| {
567+ if ! f ( last_end, from) { return ; }
568568 last_end = to;
569569 }
570570 f ( last_end, len ( s) ) ;
@@ -580,13 +580,13 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
580580 * ~~~
581581 */
582582pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
583- do iter_between_matches ( s, sep) |from, to| {
583+ for iter_between_matches( s, sep) |from, to| {
584584 if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
585585 }
586586}
587587
588588pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
589- do iter_between_matches ( s, sep) |from, to| {
589+ for iter_between_matches( s, sep) |from, to| {
590590 if to > from {
591591 if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
592592 }
@@ -630,7 +630,7 @@ pub fn levdistance(s: &str, t: &str) -> uint {
630630/**
631631 * Splits a string into a vector of the substrings separated by LF ('\n').
632632 */
633- pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char ( s, '\n' , it) }
633+ pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char_no_trailing ( s, '\n' , it) }
634634
635635/**
636636 * Splits a string into a vector of the substrings separated by LF ('\n')
@@ -656,52 +656,56 @@ pub fn each_word(s: &str, it: &fn(&str) -> bool) {
656656 * each of which is less bytes long than a limit
657657 */
658658pub fn each_split_within( ss : & str , lim : uint , it : & fn ( & str ) -> bool ) {
659- let words = str:: words ( ss) ;
660-
661- // empty?
662- if words == ~[ ] { return ~[ ] ; }
663-
664- let mut rows : ~[ ~str ] = ~[ ] ;
665- let mut row : ~str = ~"";
666-
667- for words. each |wptr| {
668- let word = copy * wptr;
669-
670- // if adding this word to the row would go over the limit,
671- // then start a new row
672- if row. len ( ) + word. len ( ) + 1 > lim {
673- rows. push ( copy row) ; // save previous row
674- row = word; // start a new one
675- } else {
676- if row. len ( ) > 0 { row += ~" " } // separate words
677- row += word; // append to this row
678- }
659+ // Just for fun, let's write this as an automaton
660+ enum SplitWithinState {
661+ A , // Leading whitespace, initial state
662+ B , // Words
663+ C , // Internal and trailing whitespace
679664 }
665+ enum Whitespace { Ws , Cr }
666+ enum LengthLimit { UnderLim , OverLim }
680667
681- // save the last row
682- if row != ~" " { rows. push ( row) ; }
668+ let mut slice_start = 0 ;
669+ let mut last_start = 0 ;
670+ let mut last_end = 0 ;
671+ let mut state = A ;
683672
684- rows
685- // NOTE: Finish change here
673+ let mut cont = true ;
674+ let slice = || { cont = it ( ss . slice ( slice_start , last_end ) ) } ;
686675
687- let mut last_slice_i = 0 , last_word_i = 0 , word_start = true ;
688- for each_chari( s) |i, c| {
689- if ( i - last_slice_i) <= lim {
690- if char:: is_whitespace ( c) {
676+ let machine = |i : uint , c : char | {
677+ let whitespace = if char:: is_whitespace ( c) { Ws } else { Cr } ;
678+ let limit = if ( i - slice_start + 1 ) <= lim { UnderLim } else { OverLim } ;
691679
692- } else {
680+ state = match ( state, whitespace, limit) {
681+ ( A , Ws , _) => { A }
682+ ( A , Cr , _) => { slice_start = i; last_start = i; B }
693683
694- }
695- } else {
684+ ( B , Cr , UnderLim ) => { B }
685+ ( B , Cr , OverLim ) if ( i - last_start + 1 ) > lim
686+ => { fail ! ( ~"word longer than limit!") }
687+ (B, Cr, OverLim) => { slice(); slice_start = last_start; B }
688+ (B, Ws, UnderLim) => { last_end = i; C }
689+ (B, Ws, OverLim) => { last_end = i; slice(); A }
696690
697- }
691+ (C, Cr, UnderLim) => { last_start = i; B }
692+ (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
693+ (C, Ws, OverLim) => { slice(); A }
694+ (C, Ws, UnderLim) => { C }
695+ };
696+ cont
697+ };
698698
699+ str::each_chari(ss, machine);
699700
701+ // Let the automaton 'run out'
702+ let mut fake_i = ss.len();
703+ while cont && match state { B | C => true, A => false } {
704+ machine(fake_i, ' ');
705+ fake_i += 1;
700706 }
701707}
702708
703-
704-
705709/// Convert a string to lowercase. ASCII only
706710pub fn to_lower(s: &str) -> ~str {
707711 map(s,
@@ -731,7 +735,7 @@ pub fn to_upper(s: &str) -> ~str {
731735 */
732736pub fn replace(s: &str, from: &str, to: &str) -> ~str {
733737 let mut result = ~" ", first = true;
734- do iter_between_matches ( s, from) |start, end| {
738+ for iter_between_matches(s, from) |start, end| {
735739 if first {
736740 first = false;
737741 } else {
@@ -2286,9 +2290,9 @@ pub trait StrSlice {
22862290 fn len(&self) -> uint;
22872291 fn char_len(&self) -> uint;
22882292 fn slice(&self, begin: uint, end: uint) -> &'self str;
2289- fn split (&self, sepfn: &fn(char) -> bool) -> ~[~str] ;
2290- fn split_char (&self, sep: char) -> ~[~str] ;
2291- fn split_str (&self, sep: &'a str) -> ~[~str] ;
2293+ fn each_split (&self, sepfn: &fn(char) -> bool, it: &fn(&str ) -> bool) ;
2294+ fn each_split_char (&self, sep: char, it: &fn(&str ) -> bool) ;
2295+ fn each_split_str (&self, sep: &'a str, it: &fn(&str ) -> bool) ;
22922296 fn starts_with(&self, needle: &'a str) -> bool;
22932297 fn substr(&self, begin: uint, n: uint) -> &'self str;
22942298 fn to_lower(&self) -> ~str;
@@ -2408,20 +2412,24 @@ impl StrSlice for &'self str {
24082412 }
24092413 /// Splits a string into substrings using a character function
24102414 #[inline]
2411- fn split (&self, sepfn: &fn(char) -> bool) -> ~[~str] {
2412- split (*self, sepfn)
2415+ fn each_split (&self, sepfn: &fn(char) -> bool, it: &fn(&str ) -> bool) {
2416+ each_split (*self, sepfn, it )
24132417 }
24142418 /**
24152419 * Splits a string into substrings at each occurrence of a given character
24162420 */
24172421 #[inline]
2418- fn split_char(&self, sep: char) -> ~[~str] { split_char(*self, sep) }
2422+ fn each_split_char(&self, sep: char, it: &fn(&str) -> bool) {
2423+ each_split_char(*self, sep, it)
2424+ }
24192425 /**
24202426 * Splits a string into a vector of the substrings separated by a given
24212427 * string
24222428 */
24232429 #[inline]
2424- fn split_str(&self, sep: &'a str) -> ~[~str] { split_str(*self, sep) }
2430+ fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool) {
2431+ each_split_str(*self, sep, it)
2432+ }
24252433 /// Returns true if one string starts with another
24262434 #[inline]
24272435 fn starts_with(&self, needle: &'a str) -> bool {
0 commit comments