@@ -56,15 +56,15 @@ pub fn from_slice(s: &str) -> ~str {
5656
5757impl ToStr for ~str {
5858 #[ inline( always) ]
59- fn to_str ( & self ) -> ~str { copy * self }
59+ fn to_str ( & self ) -> ~str { from_slice ( * self ) }
6060}
6161impl ToStr for & ' self str {
6262 #[ inline( always) ]
63- fn to_str ( & self ) -> ~str { :: str :: from_slice ( * self ) }
63+ fn to_str ( & self ) -> ~str { from_slice ( * self ) }
6464}
6565impl ToStr for @str {
6666 #[ inline( always) ]
67- fn to_str ( & self ) -> ~str { :: str :: from_slice ( * self ) }
67+ fn to_str ( & self ) -> ~str { from_slice ( * self ) }
6868}
6969
7070/**
@@ -383,7 +383,7 @@ Section: Transforming strings
383383*/
384384
385385/**
386- * Converts a string to a vector of bytes
386+ * Converts a string to a unique vector of bytes
387387 *
388388 * The result vector is not null-terminated.
389389 */
@@ -403,22 +403,19 @@ pub fn byte_slice<T>(s: &str, f: &fn(v: &[u8]) -> T) -> T {
403403 }
404404}
405405
406- /// Convert a string to a vector of characters
407- pub fn chars ( s : & str ) -> ~[ char ] {
408- let mut buf = ~[ ] , i = 0 ;
409- let len = len ( s) ;
410- while i < len {
411- let CharRange { ch, next} = char_range_at ( s, i) ;
412- unsafe { buf. push ( ch) ; }
413- i = next;
406+ /// Convert a string to a unique vector of characters
407+ pub fn to_chars ( s : & str ) -> ~[ char ] {
408+ let mut buf = ~[ ] ;
409+ for each_char( s) |c| {
410+ buf. push ( c) ;
414411 }
415412 buf
416413}
417414
418415/**
419416 * Take a substring of another.
420417 *
421- * Returns a string containing `n` characters starting at byte offset
418+ * Returns a slice pointing at `n` characters starting from byte offset
422419 * `begin`.
423420 */
424421pub fn substr ( s : & ' a str , begin : uint , n : uint ) -> & ' a str {
@@ -437,10 +434,17 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str {
437434 unsafe { raw:: slice_bytes ( s, begin, end) }
438435}
439436
440- /// Splits a string into substrings at each occurrence of a given
441- /// character.
442- pub fn split_char ( s : & str , sep : char ) -> ~[ ~str ] {
443- split_char_inner ( s, sep, len ( s) , true , true )
437+ /// Splits a string into substrings at each occurrence of a given character
438+ pub fn each_split_char ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
439+ each_split_char_inner ( s, sep, len ( s) , true , true , it)
440+ }
441+
442+ /**
443+ * Like `split_char`, but a trailing empty string is omitted
444+ * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
445+ */
446+ pub fn each_split_char_no_trailing ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
447+ each_split_char_inner ( s, sep, len ( s) , true , false , it)
444448}
445449
446450/**
@@ -449,35 +453,25 @@ pub fn split_char(s: &str, sep: char) -> ~[~str] {
449453 *
450454 * The byte must be a valid UTF-8/ASCII byte
451455 */
452- pub fn splitn_char ( s : & str , sep : char , count : uint ) -> ~ [ ~ str ] {
453- split_char_inner ( s, sep, count, true , true )
456+ pub fn each_splitn_char ( s : & str , sep : char , count : uint , it : & fn ( & str ) -> bool ) {
457+ each_split_char_inner ( s, sep, count, true , true , it )
454458}
455459
456460/// Like `split_char`, but omits empty strings from the returned vector
457- pub fn split_char_nonempty ( s : & str , sep : char ) -> ~[ ~str ] {
458- split_char_inner ( s, sep, len ( s) , false , false )
459- }
460-
461- /**
462- * Like `split_char`, but a trailing empty string is omitted
463- * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
464- */
465- pub fn split_char_no_trailing ( s : & str , sep : char ) -> ~[ ~str ] {
466- split_char_inner ( s, sep, len ( s) , true , false )
461+ pub fn each_split_char_nonempty ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
462+ each_split_char_inner ( s, sep, len ( s) , false , false , it)
467463}
468464
469- fn split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
470- allow_trailing_empty : bool ) -> ~ [ ~ str ] {
465+ fn each_split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
466+ allow_trailing_empty : bool ) , it : & fn ( & str ) -> bool ) {
471467 if sep < 128 u as char {
472468 let b = sep as u8 , l = len ( s) ;
473- let mut result = ~ [ ] , done = 0 u;
469+ let mut done = 0 u;
474470 let mut i = 0 u, start = 0 u;
475471 while i < l && done < count {
476472 if s[ i] == b {
477473 if allow_empty || start < i {
478- unsafe {
479- result. push ( raw:: slice_bytes_unique ( s, start, i) ) ;
480- }
474+ if !it ( unsafe { raw:: slice_bytes ( s, start, i) } ) { return ; }
481475 }
482476 start = i + 1 u;
483477 done += 1 u;
@@ -486,68 +480,57 @@ fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
486480 }
487481 // only push a non-empty trailing substring
488482 if allow_trailing_empty || start < l {
489- unsafe { result . push ( raw:: slice_bytes_unique ( s, start, l) ) } ;
483+ if ! it ( unsafe { raw:: slice_bytes ( s, start, l) } ) { return ; }
490484 }
491- result
492485 } else {
493- split_inner ( s, |cur| cur == sep, count, allow_empty, allow_trailing_empty)
486+ each_split_inner ( s, |cur| cur == sep, count, allow_empty, allow_trailing_empty, it )
494487 }
495488}
496489
497-
498490/// Splits a string into substrings using a character function
499- pub fn split ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
500- split_inner ( s, sepfn, len ( s) , true , true )
491+ pub fn each_split ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
492+ each_split_inner ( s, sepfn, len ( s) , true , true , it)
493+ }
494+
495+ /**
496+ * Like `split`, but a trailing empty string is omitted
497+ * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
498+ */
499+ pub fn each_split_no_trailing ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
500+ each_split_inner ( s, sepfn, len ( s) , true , false , it)
501501}
502502
503503/**
504504 * Splits a string into substrings using a character function, cutting at
505505 * most `count` times.
506506 */
507- pub fn splitn ( s : & str ,
508- sepfn : & fn ( char ) -> bool ,
509- count : uint )
510- -> ~[ ~str ] {
511- split_inner ( s, sepfn, count, true , true )
507+ pub fn each_splitn ( s : & str , sepfn : & fn ( char ) -> bool , count : uint , it : & fn ( & str ) -> bool ) {
508+ each_split_inner ( s, sepfn, count, true , true , it)
512509}
513510
514511/// Like `split`, but omits empty strings from the returned vector
515- pub fn split_nonempty ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
516- split_inner ( s, sepfn, len ( s) , false , false )
517- }
518-
519-
520- /**
521- * Like `split`, but a trailing empty string is omitted
522- * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
523- */
524- pub fn split_no_trailing ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
525- split_inner ( s, sepfn, len ( s) , true , false )
512+ pub fn each_split_nonempty ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
513+ each_split_inner ( s, sepfn, len ( s) , false , false , it)
526514}
527515
528- fn split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
529- allow_empty : bool , allow_trailing_empty : bool ) -> ~ [ ~ str ] {
516+ pure fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517+ allow_empty : bool , allow_trailing_empty : bool ) , it : & fn ( & str ) -> bool ) {
530518 let l = len ( s) ;
531- let mut result = ~ [ ] , i = 0 u, start = 0 u, done = 0 u;
519+ let mut i = 0 u, start = 0 u, done = 0 u;
532520 while i < l && done < count {
533521 let CharRange { ch, next} = char_range_at ( s, i) ;
534522 if sepfn ( ch) {
535523 if allow_empty || start < i {
536- unsafe {
537- result. push ( raw:: slice_bytes_unique ( s, start, i) ) ;
538- }
524+ if !it ( unsafe { raw:: slice_bytes ( s, start, i) } ) { return ; }
539525 }
540526 start = next;
541527 done += 1 u;
542528 }
543529 i = next;
544530 }
545531 if allow_trailing_empty || start < l {
546- unsafe {
547- result. push ( raw:: slice_bytes_unique ( s, start, l) ) ;
548- }
532+ if !it ( unsafe { raw:: slice_bytes ( s, start, l) } ) { return ; }
549533 }
550- result
551534}
552535
553536// See Issue #1932 for why this is a naive search
@@ -596,22 +579,18 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
596579 * fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", "."))
597580 * ~~~
598581 */
599- pub fn split_str ( s : & ' a str , sep : & ' b str ) -> ~[ ~str ] {
600- let mut result = ~[ ] ;
582+ pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
601583 do iter_between_matches ( s, sep) |from, to| {
602- unsafe { result . push ( raw:: slice_bytes_unique ( s, from, to) ) ; }
584+ if ! it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
603585 }
604- result
605586}
606587
607- pub fn split_str_nonempty ( s : & ' a str , sep : & ' b str ) -> ~[ ~str ] {
608- let mut result = ~[ ] ;
588+ pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
609589 do iter_between_matches ( s, sep) |from, to| {
610590 if to > from {
611- unsafe { result . push ( raw:: slice_bytes_unique ( s, from, to) ) ; }
591+ if ! it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
612592 }
613593 }
614- result
615594}
616595
617596/// Levenshtein Distance between two strings
@@ -651,34 +630,32 @@ pub fn levdistance(s: &str, t: &str) -> uint {
651630/**
652631 * Splits a string into a vector of the substrings separated by LF ('\n').
653632 */
654- pub fn lines ( s : & str ) -> ~[ ~str ] {
655- split_char_no_trailing ( s, '\n' )
656- }
633+ pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char ( s, '\n' , it) }
657634
658635/**
659636 * Splits a string into a vector of the substrings separated by LF ('\n')
660637 * and/or CR LF ("\r\n")
661638 */
662- pub fn lines_any ( s : & str ) -> ~[ ~str ] {
663- vec:: map ( lines ( s) , |s| {
664- let l = len ( * s) ;
665- let mut cp = copy * s;
639+ pub fn each_line_any ( s : & str , it : & fn ( & str ) -> bool ) {
640+ for each_line( s) |s| {
641+ let l = s. len ( ) ;
666642 if l > 0 u && s[ l - 1 u] == '\r' as u8 {
667- unsafe { raw:: set_len ( & mut cp, l - 1 u) ; }
643+ if !it ( unsafe { raw:: slice_bytes ( s, 0 , l - 1 ) } ) { return ; }
644+ } else {
645+ if !it ( s ) { return ; }
668646 }
669- cp
670- } )
647+ }
671648}
672649
673650/// Splits a string into a vector of the substrings separated by whitespace
674- pub fn words ( s : & str ) -> ~ [ ~ str ] {
675- split_nonempty ( s, char:: is_whitespace)
651+ pub fn each_word ( s : & str , it : & fn ( & str ) -> bool ) {
652+ each_split_nonempty ( s, |c| char:: is_whitespace ( c ) , it )
676653}
677654
678655/** Split a string into a vector of substrings,
679- * each of which is less than a limit
656+ * each of which is less bytes long than a limit
680657 */
681- pub fn split_within ( ss : & str , lim : uint ) -> ~ [ ~ str ] {
658+ pub fn each_split_within ( ss : & str , lim : uint , it : & fn ( & str ) -> bool ) {
682659 let words = str:: words ( ss) ;
683660
684661 // empty?
@@ -705,6 +682,22 @@ pub fn split_within(ss: &str, lim: uint) -> ~[~str] {
705682 if row != ~" " { rows. push ( row) ; }
706683
707684 rows
685+ // NOTE: Finish change here
686+
687+ let mut last_slice_i = 0 , last_word_i = 0 , word_start = true ;
688+ for each_chari( s) |i, c| {
689+ if ( i - last_slice_i) <= lim {
690+ if char:: is_whitespace ( c) {
691+
692+ } else {
693+
694+ }
695+ } else {
696+
697+ }
698+
699+
700+ }
708701}
709702
710703
@@ -997,10 +990,17 @@ pub fn eachi_reverse(s: &str, it: &fn(uint, u8) -> bool) {
997990 }
998991}
999992
1000- /// Iterates over the chars in a string
993+
994+ /// Iterate over each char of a string, without allocating
1001995#[ inline( always) ]
1002996pub fn each_char ( s : & str , it : & fn ( char ) -> bool ) {
1003- each_chari ( s, |_i, c| it ( c) )
997+ let mut i = 0 ;
998+ let len = len ( s) ;
999+ while i < len {
1000+ let CharRange { ch, next} = char_range_at ( s, i) ;
1001+ if !it ( ch) { return ; }
1002+ i = next;
1003+ }
10041004}
10051005
10061006/// Iterates over the chars in a string, with indices
@@ -1038,31 +1038,34 @@ pub fn each_chari_reverse(s: &str, it: &fn(uint, char) -> bool) {
10381038 }
10391039}
10401040
1041- /// Apply a function to each substring after splitting by character
1041+ /////////////////////////////////////////////////////////////////////////////////////////////////
1042+ // NOTE: Remove afterwards
1043+ /* /// Apply a function to each substring after splitting by character
10421044pub fn split_char_each(ss: &str, cc: char, ff: &fn(v: &str) -> bool) {
10431045 vec::each(split_char(ss, cc), |s| ff(*s))
10441046}
10451047
1046- / **
1048+ **
10471049 * Apply a function to each substring after splitting by character, up to
10481050 * `count` times
1049- */
1051+ *
10501052pub fn splitn_char_each(ss: &str, sep: char, count: uint,
10511053 ff: &fn(v: &str) -> bool) {
10521054 vec::each(splitn_char(ss, sep, count), |s| ff(*s))
10531055}
10541056
1055- /// Apply a function to each word
1057+ / Apply a function to each word
10561058pub fn words_each(ss: &str, ff: &fn(v: &str) -> bool) {
10571059 vec::each(words(ss), |s| ff(*s))
10581060}
10591061
1060- / **
1062+ **
10611063 * Apply a function to each line (by '\n')
1062- */
1064+ *
10631065pub fn lines_each(ss: &str, ff: &fn(v: &str) -> bool) {
10641066 vec::each(lines(ss), |s| ff(*s))
1065- }
1067+ } */
1068+ /////////////////////////////////////////////////////////////////////////////////////////////////
10661069
10671070/*
10681071Section: Searching
@@ -2511,7 +2514,7 @@ impl OwnedStr for ~str {
25112514impl Clone for ~str {
25122515 #[inline(always)]
25132516 fn clone(&self) -> ~str {
2514- self.to_str() // hilarious
2517+ from_slice(*self)
25152518 }
25162519}
25172520
0 commit comments