@@ -186,6 +186,7 @@ pub fn push_str_no_overallocate(lhs: &mut ~str, rhs: &str) {
186186 raw:: set_len( lhs, llen + rlen) ;
187187 }
188188}
189+
189190/// Appends a string slice to the back of a string
190191#[ inline( always) ]
191192pub fn push_str ( lhs : & mut ~str , rhs : & str ) {
@@ -214,7 +215,6 @@ pub fn append(lhs: ~str, rhs: &str) -> ~str {
214215 v
215216}
216217
217-
218218/// Concatenate a vector of strings
219219pub fn concat ( v : & [ ~str ] ) -> ~str {
220220 let mut s: ~str = ~"";
@@ -435,35 +435,32 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str {
435435}
436436
437437/// Splits a string into substrings at each occurrence of a given character
438- pub fn each_split_char ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
438+ pub fn each_split_char ( s : & ' a str , sep : char , it : & fn ( & ' a str ) -> bool ) {
439439 each_split_char_inner ( s, sep, len ( s) , true , true , it)
440440}
441441
442- /**
443- * Like `split_char`, but a trailing empty string is omitted
444- * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
445- */
446- pub fn each_split_char_no_trailing ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
442+ /// Like `each_split_char`, but a trailing empty string is omitted
443+ pub fn each_split_char_no_trailing ( s : & ' a str , sep : char , it : & fn ( & ' a str ) -> bool ) {
447444 each_split_char_inner ( s, sep, len ( s) , true , false , it)
448445}
449446
450447/**
451448 * Splits a string into substrings at each occurrence of a given
452449 * character up to 'count' times.
453450 *
454- * The byte must be a valid UTF-8/ASCII byte
451+ * The character must be a valid UTF-8/ASCII character
455452 */
456- pub fn each_splitn_char ( s : & str , sep : char , count : uint , it : & fn ( & str ) -> bool ) {
453+ pub fn each_splitn_char ( s : & ' a str , sep : char , count : uint , it : & fn ( & ' a str ) -> bool ) {
457454 each_split_char_inner ( s, sep, count, true , true , it)
458455}
459456
460- /// Like `split_char `, but omits empty strings from the returned vector
461- pub fn each_split_char_nonempty ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
457+ /// Like `each_split_char `, but omits empty strings
458+ pub fn each_split_char_nonempty ( s : & ' a str , sep : char , it : & fn ( & ' a str ) -> bool ) {
462459 each_split_char_inner ( s, sep, len ( s) , false , false , it)
463460}
464461
465- fn each_split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
466- allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
462+ fn each_split_char_inner ( s : & ' a str , sep : char , count : uint , allow_empty : bool ,
463+ allow_trailing_empty : bool , it : & fn ( & ' a str ) -> bool ) {
467464 if sep < 128 u as char {
468465 let b = sep as u8 , l = len ( s) ;
469466 let mut done = 0 u;
@@ -478,7 +475,7 @@ fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
478475 }
479476 i += 1 u;
480477 }
481- // only push a non-empty trailing substring
478+ // only slice a non-empty trailing substring
482479 if allow_trailing_empty || start < l {
483480 if !it ( unsafe { raw:: slice_bytes ( s, start, l) } ) { return ; }
484481 }
@@ -488,33 +485,30 @@ fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
488485}
489486
490487/// Splits a string into substrings using a character function
491- pub fn each_split ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
488+ pub fn each_split ( s : & ' a str , sepfn : & fn ( char ) -> bool , it : & fn ( & ' a str ) -> bool ) {
492489 each_split_inner ( s, sepfn, len ( s) , true , true , it)
493490}
494491
495- /**
496- * Like `split`, but a trailing empty string is omitted
497- * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
498- */
499- pub fn each_split_no_trailing ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
492+ /// Like `each_split`, but a trailing empty string is omitted
493+ pub fn each_split_no_trailing ( s : & ' a str , sepfn : & fn ( char ) -> bool , it : & fn ( & ' a str ) -> bool ) {
500494 each_split_inner ( s, sepfn, len ( s) , true , false , it)
501495}
502496
503497/**
504498 * Splits a string into substrings using a character function, cutting at
505499 * most `count` times.
506500 */
507- pub fn each_splitn ( s : & str , sepfn : & fn ( char ) -> bool , count : uint , it : & fn ( & str ) -> bool ) {
501+ pub fn each_splitn ( s : & ' a str , sepfn : & fn ( char ) -> bool , count : uint , it : & fn ( & ' a str ) -> bool ) {
508502 each_split_inner ( s, sepfn, count, true , true , it)
509503}
510504
511- /// Like `split `, but omits empty strings from the returned vector
512- pub fn each_split_nonempty ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
505+ /// Like `each_split `, but omits empty strings
506+ pub fn each_split_nonempty ( s : & ' a str , sepfn : & fn ( char ) -> bool , it : & fn ( & ' a str ) -> bool ) {
513507 each_split_inner ( s, sepfn, len ( s) , false , false , it)
514508}
515509
516- fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517- allow_empty : bool , allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
510+ fn each_split_inner ( s : & ' a str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
511+ allow_empty : bool , allow_trailing_empty : bool , it : & fn ( & ' a str ) -> bool ) {
518512 let l = len ( s) ;
519513 let mut i = 0 u, start = 0 u, done = 0 u;
520514 while i < l && done < count {
@@ -576,16 +570,18 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) {
576570 * # Example
577571 *
578572 * ~~~
579- * fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", "."))
573+ * let mut v = ~[];
574+ * for each_split_str(".XXX.YYY.", ".") |subs| { v.push(subs); }
575+ * fail_unless!(v == ["", "XXX", "YYY", ""]);
580576 * ~~~
581577 */
582- pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
578+ pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & ' a str ) -> bool ) {
583579 for iter_between_matches( s, sep) |from, to| {
584580 if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
585581 }
586582}
587583
588- pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
584+ pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & ' a str ) -> bool ) {
589585 for iter_between_matches( s, sep) |from, to| {
590586 if to > from {
591587 if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
@@ -628,15 +624,17 @@ pub fn levdistance(s: &str, t: &str) -> uint {
628624}
629625
630626/**
631- * Splits a string into a vector of the substrings separated by LF ('\n').
627+ * Splits a string into substrings separated by LF ('\n').
632628 */
633- pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char_no_trailing ( s, '\n' , it) }
629+ pub fn each_line ( s : & ' a str , it : & fn ( & ' a str ) -> bool ) {
630+ each_split_char_no_trailing ( s, '\n' , it)
631+ }
634632
635633/**
636- * Splits a string into a vector of the substrings separated by LF ('\n')
634+ * Splits a string into substrings separated by LF ('\n')
637635 * and/or CR LF ("\r\n")
638636 */
639- pub fn each_line_any ( s : & str , it : & fn ( & str ) -> bool ) {
637+ pub fn each_line_any ( s : & ' a str , it : & fn ( & ' a str ) -> bool ) {
640638 for each_line( s) |s| {
641639 let l = s. len ( ) ;
642640 if l > 0 u && s[ l - 1 u] == '\r' as u8 {
@@ -647,33 +645,46 @@ pub fn each_line_any(s: &str, it: &fn(&str) -> bool) {
647645 }
648646}
649647
650- /// Splits a string into a vector of the substrings separated by whitespace
651- pub fn each_word ( s : & str , it : & fn ( & str ) -> bool ) {
652- each_split_nonempty ( s, |c| char:: is_whitespace ( c ) , it)
648+ /// Splits a string into substrings separated by whitespace
649+ pub fn each_word ( s : & ' a str , it : & fn ( & ' a str ) -> bool ) {
650+ each_split_nonempty ( s, char:: is_whitespace, it)
653651}
654652
655- /** Split a string into a vector of substrings,
656- * each of which is less bytes long than a limit
653+ /** Splits a string into substrings with possibly internal whitespace,
654+ * each of them at most `lim` bytes long. The substrings have leading and trailing
655+ * whitespace removed, and are only cut at whitespace boundaries.
656+ *
657+ * #Failure:
658+ *
659+ * Fails during iteration if the string contains a non-whitespace
660+ * sequence longer than the limit.
657661 */
658- pub fn each_split_within( ss : & str , lim : uint , it : & fn ( & str ) -> bool ) {
659- // Just for fun, let's write this as an automaton
662+ pub fn each_split_within( ss : & ' a str , lim : uint , it : & fn ( & ' a str ) -> bool ) {
663+ // Just for fun, let's write this as an state machine:
664+
660665 enum SplitWithinState {
661- A , // Leading whitespace, initial state
662- B , // Words
663- C , // Internal and trailing whitespace
666+ A , // leading whitespace, initial state
667+ B , // words
668+ C , // internal and trailing whitespace
669+ }
670+ enum Whitespace {
671+ Ws , // current char is whitespace
672+ Cr // current char is not whitespace
673+ }
674+ enum LengthLimit {
675+ UnderLim , // current char makes current substring still fit in limit
676+ OverLim // current char makes current substring no longer fit in limit
664677 }
665- enum Whitespace { Ws , Cr }
666- enum LengthLimit { UnderLim , OverLim }
667678
668679 let mut slice_start = 0 ;
669680 let mut last_start = 0 ;
670681 let mut last_end = 0 ;
671682 let mut state = A ;
672683
673684 let mut cont = true ;
674- let slice = || { cont = it ( ss . slice ( slice_start, last_end) ) } ;
685+ let slice: & fn ( ) = || { cont = it ( slice ( ss , slice_start, last_end) ) } ;
675686
676- let machine = |i : uint , c : char | {
687+ let machine: & fn ( uint , char ) -> bool = |i, c| {
677688 let whitespace = if char:: is_whitespace ( c) { Ws } else { Cr } ;
678689 let limit = if ( i - slice_start + 1 ) <= lim { UnderLim } else { OverLim } ;
679690
@@ -693,12 +704,13 @@ pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) {
693704 (C, Ws, OverLim) => { slice(); A }
694705 (C, Ws, UnderLim) => { C }
695706 };
707+
696708 cont
697709 };
698710
699711 str::each_chari(ss, machine);
700712
701- // Let the automaton 'run out'
713+ // Let the automaton 'run out' by supplying trailing whitespace
702714 let mut fake_i = ss.len();
703715 while cont && match state { B | C => true, A => false } {
704716 machine(fake_i, ' ');
@@ -1186,8 +1198,7 @@ pub fn rfind_char_from(s: &str, c: char, start: uint) -> Option<uint> {
11861198 * or equal to `len(s)`. `start` must be the index of a character boundary,
11871199 * as defined by `is_char_boundary`.
11881200 */
1189- pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint)
1190- -> Option<uint> {
1201+ pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint) -> Option<uint> {
11911202 if c < 128u as char {
11921203 fail_unless!(start >= end);
11931204 fail_unless!(start <= len(s));
@@ -1268,11 +1279,7 @@ pub fn find_from(s: &str, start: uint, f: &fn(char)
12681279 * or equal to `len(s)`. `start` must be the index of a character
12691280 * boundary, as defined by `is_char_boundary`.
12701281 */
1271- pub fn find_between(s: &str,
1272- start: uint,
1273- end: uint,
1274- f: &fn(char) -> bool)
1275- -> Option<uint> {
1282+ pub fn find_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option<uint> {
12761283 fail_unless!(start <= end);
12771284 fail_unless!(end <= len(s));
12781285 fail_unless!(is_char_boundary(s, start));
@@ -1323,8 +1330,7 @@ pub fn rfind(s: &str, f: &fn(char) -> bool) -> Option<uint> {
13231330 * `start` must be less than or equal to `len(s)', `start` must be the
13241331 * index of a character boundary, as defined by `is_char_boundary`
13251332 */
1326- pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool)
1327- -> Option<uint> {
1333+ pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) -> Option<uint> {
13281334 rfind_between(s, start, 0u, f)
13291335}
13301336
@@ -1350,9 +1356,7 @@ pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool)
13501356 * than or equal to `len(s)`. `start` must be the index of a character
13511357 * boundary, as defined by `is_char_boundary`
13521358 */
1353- pub fn rfind_between(s: &str, start: uint, end: uint,
1354- f: &fn(char) -> bool)
1355- -> Option<uint> {
1359+ pub fn rfind_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option<uint> {
13561360 fail_unless!(start >= end);
13571361 fail_unless!(start <= len(s));
13581362 fail_unless!(is_char_boundary(s, start));
@@ -1408,8 +1412,7 @@ pub fn find_str(haystack: &'a str, needle: &'b str) -> Option<uint> {
14081412 *
14091413 * `start` must be less than or equal to `len(s)`
14101414 */
1411- pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint)
1412- -> Option<uint> {
1415+ pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) -> Option<uint> {
14131416 find_str_between(haystack, needle, start, len(haystack))
14141417}
14151418
@@ -1433,9 +1436,8 @@ pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint)
14331436 * `start` must be less than or equal to `end` and `end` must be less than
14341437 * or equal to `len(s)`.
14351438 */
1436- pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint,
1437- end:uint)
1438- -> Option<uint> {
1439+ pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint, end:uint)
1440+ -> Option<uint> {
14391441 // See Issue #1932 for why this is a naive search
14401442 fail_unless!(end <= len(haystack));
14411443 let needle_len = len(needle);
@@ -1638,7 +1640,6 @@ pub fn utf16_chars(v: &[u16], f: &fn(char)) {
16381640 }
16391641}
16401642
1641-
16421643pub fn from_utf16(v: &[u16]) -> ~str {
16431644 let mut buf = ~" ";
16441645 unsafe {
@@ -1955,7 +1956,6 @@ pub fn as_c_str<T>(s: &str, f: &fn(*libc::c_char) -> T) -> T {
19551956 }
19561957}
19571958
1958-
19591959/**
19601960 * Work with the byte buffer and length of a slice.
19611961 *
0 commit comments