@@ -8,15 +8,12 @@ mod tests;
88pub const MAIN_SEP_STR : & str = "\\ " ;
99pub const MAIN_SEP : char = '\\' ;
1010
11- // The unsafety here stems from converting between `&OsStr` and `&[u8]`
12- // and back. This is safe to do because (1) we only look at ASCII
13- // contents of the encoding and (2) new &OsStr values are produced
14- // only from ASCII-bounded slices of existing &OsStr values.
15- fn os_str_as_u8_slice ( s : & OsStr ) -> & [ u8 ] {
16- unsafe { mem:: transmute ( s) }
17- }
18- unsafe fn u8_slice_as_os_str ( s : & [ u8 ] ) -> & OsStr {
19- mem:: transmute ( s)
11+ // Safety: `bytes` must be a valid wtf8 encoded slice
12+ #[ inline]
13+ unsafe fn bytes_as_os_str ( bytes : & [ u8 ] ) -> & OsStr {
14+ // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8,
15+ // which is compatible with &[u8].
16+ mem:: transmute ( bytes)
2017}
2118
2219#[ inline]
@@ -29,79 +26,116 @@ pub fn is_verbatim_sep(b: u8) -> bool {
2926 b == b'\\'
3027}
3128
32- // In most DOS systems, it is not possible to have more than 26 drive letters.
33- // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
34- pub fn is_valid_drive_letter ( disk : u8 ) -> bool {
35- disk. is_ascii_alphabetic ( )
36- }
37-
3829pub fn parse_prefix ( path : & OsStr ) -> Option < Prefix < ' _ > > {
3930 use Prefix :: { DeviceNS , Disk , Verbatim , VerbatimDisk , VerbatimUNC , UNC } ;
4031
41- let path = os_str_as_u8_slice ( path) ;
42-
43- // \\
44- if let Some ( path) = path. strip_prefix ( br"\\" ) {
45- // \\?\
46- if let Some ( path) = path. strip_prefix ( br"?\" ) {
47- // \\?\UNC\server\share
48- if let Some ( path) = path. strip_prefix ( br"UNC\" ) {
49- let ( server, share) = match get_first_two_components ( path, is_verbatim_sep) {
50- Some ( ( server, share) ) => unsafe {
51- ( u8_slice_as_os_str ( server) , u8_slice_as_os_str ( share) )
52- } ,
53- None => ( unsafe { u8_slice_as_os_str ( path) } , OsStr :: new ( "" ) ) ,
54- } ;
55- return Some ( VerbatimUNC ( server, share) ) ;
32+ if let Some ( path) = strip_prefix ( path, r"\\" ) {
33+ // \\
34+ if let Some ( path) = strip_prefix ( path, r"?\" ) {
35+ // \\?\
36+ if let Some ( path) = strip_prefix ( path, r"UNC\" ) {
37+ // \\?\UNC\server\share
38+
39+ let ( server, path) = parse_next_component ( path, true ) ;
40+ let ( share, _) = parse_next_component ( path, true ) ;
41+
42+ Some ( VerbatimUNC ( server, share) )
5643 } else {
57- // \\?\path
58- match path {
59- // \\?\C:\path
60- [ c, b':' , b'\\' , ..] if is_valid_drive_letter ( * c) => {
61- return Some ( VerbatimDisk ( c. to_ascii_uppercase ( ) ) ) ;
62- }
63- // \\?\cat_pics
64- _ => {
65- let idx = path. iter ( ) . position ( |& b| b == b'\\' ) . unwrap_or ( path. len ( ) ) ;
66- let slice = & path[ ..idx] ;
67- return Some ( Verbatim ( unsafe { u8_slice_as_os_str ( slice) } ) ) ;
68- }
44+ let ( prefix, _) = parse_next_component ( path, true ) ;
45+
46+ // in verbatim paths only recognize an exact drive prefix
47+ if let Some ( drive) = parse_drive_exact ( prefix) {
48+ // \\?\C:
49+ Some ( VerbatimDisk ( drive) )
50+ } else {
51+ // \\?\prefix
52+ Some ( Verbatim ( prefix) )
6953 }
7054 }
71- } else if let Some ( path) = path . strip_prefix ( b". \ \ ") {
55+ } else if let Some ( path) = strip_prefix ( path , r". \") {
7256 // \\.\COM42
73- let idx = path. iter ( ) . position ( |& b| b == b'\\' ) . unwrap_or ( path. len ( ) ) ;
74- let slice = & path[ ..idx] ;
75- return Some ( DeviceNS ( unsafe { u8_slice_as_os_str ( slice) } ) ) ;
76- }
77- match get_first_two_components ( path, is_sep_byte) {
78- Some ( ( server, share) ) if !server. is_empty ( ) && !share. is_empty ( ) => {
57+ let ( prefix, _) = parse_next_component ( path, false ) ;
58+ Some ( DeviceNS ( prefix) )
59+ } else {
60+ let ( server, path) = parse_next_component ( path, false ) ;
61+ let ( share, _) = parse_next_component ( path, false ) ;
62+
63+ if !server. is_empty ( ) && !share. is_empty ( ) {
7964 // \\server\share
80- return Some ( unsafe { UNC ( u8_slice_as_os_str ( server) , u8_slice_as_os_str ( share) ) } ) ;
65+ Some ( UNC ( server, share) )
66+ } else {
67+ // no valid prefix beginning with "\\" recognized
68+ None
8169 }
82- _ => { }
8370 }
84- } else if let [ c , b':' , .. ] = path {
71+ } else if let Some ( drive ) = parse_drive ( path) {
8572 // C:
86- if is_valid_drive_letter ( * c) {
87- return Some ( Disk ( c. to_ascii_uppercase ( ) ) ) ;
88- }
73+ Some ( Disk ( drive) )
74+ } else {
75+ // no prefix
76+ None
8977 }
90- None
9178}
9279
93- /// Returns the first two path components with predicate `f`.
94- ///
95- /// The two components returned will be use by caller
96- /// to construct `VerbatimUNC` or `UNC` Windows path prefix.
97- ///
98- /// Returns [`None`] if there are no separators in path.
99- fn get_first_two_components ( path : & [ u8 ] , f : fn ( u8 ) -> bool ) -> Option < ( & [ u8 ] , & [ u8 ] ) > {
100- let idx = path. iter ( ) . position ( |& x| f ( x) ) ?;
101- // Panic safe
102- // The max `idx+1` is `path.len()` and `path[path.len()..]` is a valid index.
103- let ( first, path) = ( & path[ ..idx] , & path[ idx + 1 ..] ) ;
104- let idx = path. iter ( ) . position ( |& x| f ( x) ) . unwrap_or ( path. len ( ) ) ;
105- let second = & path[ ..idx] ;
106- Some ( ( first, second) )
80+ // Parses a drive prefix, e.g. "C:" and "C:\whatever"
81+ fn parse_drive ( prefix : & OsStr ) -> Option < u8 > {
82+ // In most DOS systems, it is not possible to have more than 26 drive letters.
83+ // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
84+ fn is_valid_drive_letter ( drive : & u8 ) -> bool {
85+ drive. is_ascii_alphabetic ( )
86+ }
87+
88+ match prefix. bytes ( ) {
89+ [ drive, b':' , ..] if is_valid_drive_letter ( drive) => Some ( drive. to_ascii_uppercase ( ) ) ,
90+ _ => None ,
91+ }
92+ }
93+
94+ // Parses a drive prefix exactly, e.g. "C:"
95+ fn parse_drive_exact ( prefix : & OsStr ) -> Option < u8 > {
96+ // only parse two bytes: the drive letter and the drive separator
97+ if prefix. len ( ) == 2 { parse_drive ( prefix) } else { None }
98+ }
99+
100+ fn strip_prefix < ' a > ( path : & ' a OsStr , prefix : & str ) -> Option < & ' a OsStr > {
101+ // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
102+ // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
103+ match path. bytes ( ) . strip_prefix ( prefix. as_bytes ( ) ) {
104+ Some ( path) => unsafe { Some ( bytes_as_os_str ( path) ) } ,
105+ None => None ,
106+ }
107+ }
108+
109+ // Parse the next path component.
110+ //
111+ // Returns the next component and the rest of the path excluding the component and separator.
112+ // Does not recognize `/` as a separator character if `verbatim` is true.
113+ fn parse_next_component ( path : & OsStr , verbatim : bool ) -> ( & OsStr , & OsStr ) {
114+ let separator = if verbatim { is_verbatim_sep } else { is_sep_byte } ;
115+
116+ match path. bytes ( ) . iter ( ) . position ( |& x| separator ( x) ) {
117+ Some ( separator_start) => {
118+ let mut separator_end = separator_start + 1 ;
119+
120+ // a series of multiple separator characters is treated as a single separator,
121+ // except in verbatim paths
122+ while !verbatim && separator_end < path. len ( ) && separator ( path. bytes ( ) [ separator_end] )
123+ {
124+ separator_end += 1 ;
125+ }
126+
127+ let component = & path. bytes ( ) [ ..separator_start] ;
128+
129+ // Panic safe
130+ // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
131+ let path = & path. bytes ( ) [ separator_end..] ;
132+
133+ // Safety: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
134+ // is encoded in a single byte, therefore `bytes[separator_start]` and
135+ // `bytes[separator_end]` must be code point boundaries and thus
136+ // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
137+ unsafe { ( bytes_as_os_str ( component) , bytes_as_os_str ( path) ) }
138+ }
139+ None => ( path, OsStr :: new ( "" ) ) ,
140+ }
107141}
0 commit comments