@@ -21,7 +21,7 @@ use char;
2121use fmt;
2222use iter:: { Map , Cloned , FusedIterator , TrustedLen , Filter } ;
2323use iter_private:: TrustedRandomAccess ;
24- use slice:: { self , SliceIndex } ;
24+ use slice:: { self , SliceIndex , Split as SliceSplit } ;
2525use mem;
2626
2727pub mod pattern;
@@ -2722,7 +2722,10 @@ impl str {
27222722 /// the original string slice, separated by any amount of whitespace.
27232723 ///
27242724 /// 'Whitespace' is defined according to the terms of the Unicode Derived
2725- /// Core Property `White_Space`.
2725+ /// Core Property `White_Space`. If you only want to split on ASCII whitespace
2726+ /// instead, use [`split_ascii_whitespace`].
2727+ ///
2728+ /// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
27262729 ///
27272730 /// # Examples
27282731 ///
@@ -2756,6 +2759,53 @@ impl str {
27562759 SplitWhitespace { inner : self . split ( IsWhitespace ) . filter ( IsNotEmpty ) }
27572760 }
27582761
2762+ /// Split a string slice by ASCII whitespace.
2763+ ///
2764+ /// The iterator returned will return string slices that are sub-slices of
2765+ /// the original string slice, separated by any amount of ASCII whitespace.
2766+ ///
2767+ /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2768+ ///
2769+ /// [`split_whitespace`]: #method.split_whitespace
2770+ ///
2771+ /// # Examples
2772+ ///
2773+ /// Basic usage:
2774+ ///
2775+ /// ```
2776+ /// #![feature(split_ascii_whitespace)]
2777+ /// let mut iter = "A few words".split_ascii_whitespace();
2778+ ///
2779+ /// assert_eq!(Some("A"), iter.next());
2780+ /// assert_eq!(Some("few"), iter.next());
2781+ /// assert_eq!(Some("words"), iter.next());
2782+ ///
2783+ /// assert_eq!(None, iter.next());
2784+ /// ```
2785+ ///
2786+ /// All kinds of ASCII whitespace are considered:
2787+ ///
2788+ /// ```
2789+ /// let mut iter = " Mary had\ta little \n\t lamb".split_whitespace();
2790+ /// assert_eq!(Some("Mary"), iter.next());
2791+ /// assert_eq!(Some("had"), iter.next());
2792+ /// assert_eq!(Some("a"), iter.next());
2793+ /// assert_eq!(Some("little"), iter.next());
2794+ /// assert_eq!(Some("lamb"), iter.next());
2795+ ///
2796+ /// assert_eq!(None, iter.next());
2797+ /// ```
2798+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
2799+ #[ inline]
2800+ pub fn split_ascii_whitespace ( & self ) -> SplitAsciiWhitespace {
2801+ let inner = self
2802+ . as_bytes ( )
2803+ . split ( IsAsciiWhitespace )
2804+ . filter ( IsNotEmpty )
2805+ . map ( UnsafeBytesToStr ) ;
2806+ SplitAsciiWhitespace { inner }
2807+ }
2808+
27592809 /// An iterator over the lines of a string, as string slices.
27602810 ///
27612811 /// Lines are ended with either a newline (`\n`) or a carriage return with
@@ -3895,6 +3945,20 @@ pub struct SplitWhitespace<'a> {
38953945 inner : Filter < Split < ' a , IsWhitespace > , IsNotEmpty > ,
38963946}
38973947
3948+ /// An iterator over the non-ASCII-whitespace substrings of a string,
3949+ /// separated by any amount of ASCII whitespace.
3950+ ///
3951+ /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
3952+ /// See its documentation for more.
3953+ ///
3954+ /// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
3955+ /// [`str`]: ../../std/primitive.str.html
3956+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
3957+ #[ derive( Clone , Debug ) ]
3958+ pub struct SplitAsciiWhitespace < ' a > {
3959+ inner : Map < Filter < SliceSplit < ' a , u8 , IsAsciiWhitespace > , IsNotEmpty > , UnsafeBytesToStr > ,
3960+ }
3961+
38983962#[ derive( Clone ) ]
38993963struct IsWhitespace ;
39003964
@@ -3914,37 +3978,98 @@ impl FnMut<(char, )> for IsWhitespace {
39143978 }
39153979}
39163980
3981+ #[ derive( Clone ) ]
3982+ struct IsAsciiWhitespace ;
3983+
3984+ impl < ' a > FnOnce < ( & ' a u8 , ) > for IsAsciiWhitespace {
3985+ type Output = bool ;
3986+
3987+ #[ inline]
3988+ extern "rust-call" fn call_once ( mut self , arg : ( & u8 , ) ) -> bool {
3989+ self . call_mut ( arg)
3990+ }
3991+ }
3992+
3993+ impl < ' a > FnMut < ( & ' a u8 , ) > for IsAsciiWhitespace {
3994+ #[ inline]
3995+ extern "rust-call" fn call_mut ( & mut self , arg : ( & u8 , ) ) -> bool {
3996+ arg. 0 . is_ascii_whitespace ( )
3997+ }
3998+ }
3999+
39174000#[ derive( Clone ) ]
39184001struct IsNotEmpty ;
39194002
39204003impl < ' a , ' b > FnOnce < ( & ' a & ' b str , ) > for IsNotEmpty {
39214004 type Output = bool ;
39224005
39234006 #[ inline]
3924- extern "rust-call" fn call_once ( mut self , arg : ( & & str , ) ) -> bool {
4007+ extern "rust-call" fn call_once ( mut self , arg : ( & ' a & ' b str , ) ) -> bool {
39254008 self . call_mut ( arg)
39264009 }
39274010}
39284011
39294012impl < ' a , ' b > FnMut < ( & ' a & ' b str , ) > for IsNotEmpty {
39304013 #[ inline]
3931- extern "rust-call" fn call_mut ( & mut self , arg : ( & & str , ) ) -> bool {
4014+ extern "rust-call" fn call_mut ( & mut self , arg : ( & ' a & ' b str , ) ) -> bool {
4015+ !arg. 0 . is_empty ( )
4016+ }
4017+ }
4018+
4019+ impl < ' a , ' b > FnOnce < ( & ' a & ' b [ u8 ] , ) > for IsNotEmpty {
4020+ type Output = bool ;
4021+
4022+ #[ inline]
4023+ extern "rust-call" fn call_once ( mut self , arg : ( & ' a & ' b [ u8 ] , ) ) -> bool {
4024+ self . call_mut ( arg)
4025+ }
4026+ }
4027+
4028+ impl < ' a , ' b > FnMut < ( & ' a & ' b [ u8 ] , ) > for IsNotEmpty {
4029+ #[ inline]
4030+ extern "rust-call" fn call_mut ( & mut self , arg : ( & ' a & ' b [ u8 ] , ) ) -> bool {
39324031 !arg. 0 . is_empty ( )
39334032 }
39344033}
39354034
4035+ #[ derive( Clone ) ]
4036+ struct UnsafeBytesToStr ;
4037+
4038+ impl < ' a > FnOnce < ( & ' a [ u8 ] , ) > for UnsafeBytesToStr {
4039+ type Output = & ' a str ;
4040+
4041+ #[ inline]
4042+ extern "rust-call" fn call_once ( mut self , arg : ( & ' a [ u8 ] , ) ) -> & ' a str {
4043+ self . call_mut ( arg)
4044+ }
4045+ }
4046+
4047+ impl < ' a > FnMut < ( & ' a [ u8 ] , ) > for UnsafeBytesToStr {
4048+ #[ inline]
4049+ extern "rust-call" fn call_mut ( & mut self , arg : ( & ' a [ u8 ] , ) ) -> & ' a str {
4050+ unsafe { from_utf8_unchecked ( arg. 0 ) }
4051+ }
4052+ }
4053+
39364054
39374055#[ stable( feature = "split_whitespace" , since = "1.1.0" ) ]
39384056impl < ' a > Iterator for SplitWhitespace < ' a > {
39394057 type Item = & ' a str ;
39404058
4059+ #[ inline]
39414060 fn next ( & mut self ) -> Option < & ' a str > {
39424061 self . inner . next ( )
39434062 }
4063+
4064+ #[ inline]
4065+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
4066+ self . inner . size_hint ( )
4067+ }
39444068}
39454069
39464070#[ stable( feature = "split_whitespace" , since = "1.1.0" ) ]
39474071impl < ' a > DoubleEndedIterator for SplitWhitespace < ' a > {
4072+ #[ inline]
39484073 fn next_back ( & mut self ) -> Option < & ' a str > {
39494074 self . inner . next_back ( )
39504075 }
@@ -3953,6 +4078,32 @@ impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
39534078#[ stable( feature = "fused" , since = "1.26.0" ) ]
39544079impl < ' a > FusedIterator for SplitWhitespace < ' a > { }
39554080
4081+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
4082+ impl < ' a > Iterator for SplitAsciiWhitespace < ' a > {
4083+ type Item = & ' a str ;
4084+
4085+ #[ inline]
4086+ fn next ( & mut self ) -> Option < & ' a str > {
4087+ self . inner . next ( )
4088+ }
4089+
4090+ #[ inline]
4091+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
4092+ self . inner . size_hint ( )
4093+ }
4094+ }
4095+
4096+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
4097+ impl < ' a > DoubleEndedIterator for SplitAsciiWhitespace < ' a > {
4098+ #[ inline]
4099+ fn next_back ( & mut self ) -> Option < & ' a str > {
4100+ self . inner . next_back ( )
4101+ }
4102+ }
4103+
4104+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
4105+ impl < ' a > FusedIterator for SplitAsciiWhitespace < ' a > { }
4106+
39564107/// An iterator of [`u16`] over the string encoded as UTF-16.
39574108///
39584109/// [`u16`]: ../../std/primitive.u16.html
0 commit comments