@@ -6,9 +6,10 @@ use crate::cmp;
66use crate :: collections:: TryReserveError ;
77use crate :: fmt;
88use crate :: hash:: { Hash , Hasher } ;
9- use crate :: ops;
9+ use crate :: ops:: { self , Range } ;
1010use crate :: rc:: Rc ;
11- use crate :: str:: FromStr ;
11+ use crate :: slice;
12+ use crate :: str:: { from_utf8 as str_from_utf8, FromStr } ;
1213use crate :: sync:: Arc ;
1314
1415use crate :: sys:: os_str:: { Buf , Slice } ;
@@ -963,6 +964,83 @@ impl OsStr {
963964 self . inner . as_encoded_bytes ( )
964965 }
965966
967+ /// Takes a substring based on a range that corresponds to the return value of
968+ /// [`OsStr::as_encoded_bytes`].
969+ ///
970+ /// The range's start and end must lie on valid `OsStr` boundaries.
971+ /// A valid `OsStr` boundary is one of:
972+ /// - The start of the string
973+ /// - The end of the string
974+ /// - Immediately before a valid non-empty UTF-8 substring
975+ /// - Immediately after a valid non-empty UTF-8 substring
976+ ///
977+ /// # Panics
978+ ///
979+ /// Panics if `range` does not lie on valid `OsStr` boundaries or if it
980+ /// exceeds the end of the string.
981+ ///
982+ /// # Example
983+ ///
984+ /// ```
985+ /// #![feature(os_str_slice)]
986+ ///
987+ /// use std::ffi::OsStr;
988+ ///
989+ /// let os_str = OsStr::new("foo=bar");
990+ /// let bytes = os_str.as_encoded_bytes();
991+ /// if let Some(index) = bytes.iter().position(|b| *b == b'=') {
992+ /// let key = os_str.slice_encoded_bytes(..index);
993+ /// let value = os_str.slice_encoded_bytes(index + 1..);
994+ /// assert_eq!(key, "foo");
995+ /// assert_eq!(value, "bar");
996+ /// }
997+ /// ```
998+ #[ unstable( feature = "os_str_slice" , issue = "118485" ) ]
999+ pub fn slice_encoded_bytes < R : ops:: RangeBounds < usize > > ( & self , range : R ) -> & Self {
1000+ #[ track_caller]
1001+ fn check_valid_boundary ( bytes : & [ u8 ] , index : usize ) {
1002+ if index == 0 || index == bytes. len ( ) {
1003+ return ;
1004+ }
1005+
1006+ // Fast path
1007+ if bytes[ index - 1 ] . is_ascii ( ) || bytes[ index] . is_ascii ( ) {
1008+ return ;
1009+ }
1010+
1011+ let ( before, after) = bytes. split_at ( index) ;
1012+
1013+ // UTF-8 takes at most 4 bytes per codepoint, so we don't
1014+ // need to check more than that.
1015+ let after = after. get ( ..4 ) . unwrap_or ( after) ;
1016+ match str_from_utf8 ( after) {
1017+ Ok ( _) => return ,
1018+ Err ( err) if err. valid_up_to ( ) != 0 => return ,
1019+ Err ( _) => ( ) ,
1020+ }
1021+
1022+ for len in 2 ..=4 . min ( index) {
1023+ let before = & before[ index - len..] ;
1024+ if str_from_utf8 ( before) . is_ok ( ) {
1025+ return ;
1026+ }
1027+ }
1028+
1029+ panic ! ( "byte index {index} is not an OsStr boundary" ) ;
1030+ }
1031+
1032+ let encoded_bytes = self . as_encoded_bytes ( ) ;
1033+ let Range { start, end } = slice:: range ( range, ..encoded_bytes. len ( ) ) ;
1034+ check_valid_boundary ( encoded_bytes, start) ;
1035+ check_valid_boundary ( encoded_bytes, end) ;
1036+
1037+ // SAFETY: `slice::range` ensures that `start` and `end` are valid
1038+ let slice = unsafe { encoded_bytes. get_unchecked ( start..end) } ;
1039+
1040+ // SAFETY: `slice` comes from `self` and we validated the boundaries
1041+ unsafe { Self :: from_encoded_bytes_unchecked ( slice) }
1042+ }
1043+
9661044 /// Converts this string to its ASCII lower case equivalent in-place.
9671045 ///
9681046 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
0 commit comments