1111#![ allow( dead_code) ] // runtime init functions not used during testing
1212
1313use os:: windows:: prelude:: * ;
14+ use sys:: windows:: os:: current_exe;
1415use sys:: c;
15- use slice;
16- use ops:: Range ;
1716use ffi:: OsString ;
18- use libc:: { c_int, c_void} ;
1917use fmt;
18+ use collections:: VecDeque ;
19+ use core:: iter;
20+ use slice;
21+ use path:: PathBuf ;
2022
2123pub unsafe fn init ( _argc : isize , _argv : * const * const u8 ) { }
2224
2325pub unsafe fn cleanup ( ) { }
2426
2527pub fn args ( ) -> Args {
2628 unsafe {
27- let mut nArgs: c_int = 0 ;
28- let lpCmdLine = c:: GetCommandLineW ( ) ;
29- let szArgList = c:: CommandLineToArgvW ( lpCmdLine, & mut nArgs) ;
30-
31- // szArcList can be NULL if CommandLinToArgvW failed,
32- // but in that case nArgs is 0 so we won't actually
33- // try to read a null pointer
34- Args { cur : szArgList, range : 0 ..( nArgs as isize ) }
29+ let lp_cmd_line = c:: GetCommandLineW ( ) ;
30+ let parsed_args_list = parse_lp_cmd_line (
31+ lp_cmd_line as * const u16 ,
32+ || current_exe ( ) . map ( PathBuf :: into_os_string) . unwrap_or_else ( |_| OsString :: new ( ) ) ) ;
33+
34+ Args { parsed_args_list : parsed_args_list }
3535 }
3636}
3737
38+ /// Implements the Windows command-line argument parsing algorithm, described at
39+ /// <https://docs.microsoft.com/en-us/previous-versions//17w5ykft(v=vs.85)>.
40+ ///
41+ /// Windows includes a function to do this in shell32.dll,
42+ /// but linking with that DLL causes the process to be registered as a GUI application.
43+ /// GUI applications add a bunch of overhead, even if no windows are drawn. See
44+ /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
45+ unsafe fn parse_lp_cmd_line < F : Fn ( ) -> OsString > ( lp_cmd_line : * const u16 , exe_name : F )
46+ -> VecDeque < OsString > {
47+ const BACKSLASH : u16 = '\\' as u16 ;
48+ const QUOTE : u16 = '"' as u16 ;
49+ const TAB : u16 = '\t' as u16 ;
50+ const SPACE : u16 = ' ' as u16 ;
51+ let mut in_quotes = false ;
52+ let mut was_in_quotes = false ;
53+ let mut backslash_count: usize = 0 ;
54+ let mut ret_val = VecDeque :: new ( ) ;
55+ let mut cur = Vec :: new ( ) ;
56+ if lp_cmd_line. is_null ( ) || * lp_cmd_line == 0 {
57+ ret_val. push_back ( exe_name ( ) ) ;
58+ return ret_val;
59+ }
60+ let mut i = 0 ;
61+ // The executable name at the beginning is special.
62+ match * lp_cmd_line {
63+ // The executable name ends at the next quote mark,
64+ // no matter what.
65+ QUOTE => {
66+ loop {
67+ i += 1 ;
68+ if * lp_cmd_line. offset ( i) == 0 {
69+ ret_val. push_back ( OsString :: from_wide (
70+ slice:: from_raw_parts ( lp_cmd_line. offset ( 1 ) , i as usize - 1 )
71+ ) ) ;
72+ return ret_val;
73+ }
74+ if * lp_cmd_line. offset ( i) == QUOTE {
75+ break ;
76+ }
77+ }
78+ ret_val. push_back ( OsString :: from_wide (
79+ slice:: from_raw_parts ( lp_cmd_line. offset ( 1 ) , i as usize - 1 )
80+ ) ) ;
81+ i += 1 ;
82+ }
83+ // Implement quirk: when they say whitespace here,
84+ // they include the entire ASCII control plane:
85+ // "However, if lpCmdLine starts with any amount of whitespace, CommandLineToArgvW
86+ // will consider the first argument to be an empty string. Excess whitespace at the
87+ // end of lpCmdLine is ignored."
88+ 0 ...SPACE => {
89+ ret_val. push_back ( OsString :: new ( ) ) ;
90+ i += 1 ;
91+ } ,
92+ // The executable name ends at the next quote mark,
93+ // no matter what.
94+ _ => {
95+ loop {
96+ i += 1 ;
97+ if * lp_cmd_line. offset ( i) == 0 {
98+ ret_val. push_back ( OsString :: from_wide (
99+ slice:: from_raw_parts ( lp_cmd_line, i as usize )
100+ ) ) ;
101+ return ret_val;
102+ }
103+ if let 0 ...SPACE = * lp_cmd_line. offset ( i) {
104+ break ;
105+ }
106+ }
107+ ret_val. push_back ( OsString :: from_wide (
108+ slice:: from_raw_parts ( lp_cmd_line, i as usize )
109+ ) ) ;
110+ i += 1 ;
111+ }
112+ }
113+ loop {
114+ let c = * lp_cmd_line. offset ( i) ;
115+ match c {
116+ // backslash
117+ BACKSLASH => {
118+ backslash_count += 1 ;
119+ was_in_quotes = false ;
120+ } ,
121+ QUOTE if backslash_count % 2 == 0 => {
122+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
123+ backslash_count = 0 ;
124+ if was_in_quotes {
125+ cur. push ( '"' as u16 ) ;
126+ was_in_quotes = false ;
127+ } else {
128+ was_in_quotes = in_quotes;
129+ in_quotes = !in_quotes;
130+ }
131+ }
132+ QUOTE if backslash_count % 2 != 0 => {
133+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
134+ backslash_count = 0 ;
135+ was_in_quotes = false ;
136+ cur. push ( b'"' as u16 ) ;
137+ }
138+ SPACE | TAB if !in_quotes => {
139+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
140+ if !cur. is_empty ( ) || was_in_quotes {
141+ ret_val. push_back ( OsString :: from_wide ( & cur[ ..] ) ) ;
142+ cur. truncate ( 0 ) ;
143+ }
144+ backslash_count = 0 ;
145+ was_in_quotes = false ;
146+ }
147+ 0x00 => {
148+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
149+ // include empty quoted strings at the end of the arguments list
150+ if !cur. is_empty ( ) || was_in_quotes || in_quotes {
151+ ret_val. push_back ( OsString :: from_wide ( & cur[ ..] ) ) ;
152+ }
153+ break ;
154+ }
155+ _ => {
156+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
157+ backslash_count = 0 ;
158+ was_in_quotes = false ;
159+ cur. push ( c) ;
160+ }
161+ }
162+ i += 1 ;
163+ }
164+ ret_val
165+ }
166+
38167pub struct Args {
39- range : Range < isize > ,
40- cur : * mut * mut u16 ,
168+ parsed_args_list : VecDeque < OsString > ,
41169}
42170
43171pub struct ArgsInnerDebug < ' a > {
@@ -48,14 +176,13 @@ impl<'a> fmt::Debug for ArgsInnerDebug<'a> {
48176 fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
49177 f. write_str ( "[" ) ?;
50178 let mut first = true ;
51- for i in self . args . range . clone ( ) {
179+ for i in & self . args . parsed_args_list {
52180 if !first {
53181 f. write_str ( ", " ) ?;
54182 }
55183 first = false ;
56184
57- // Here we do allocation which could be avoided.
58- fmt:: Debug :: fmt ( & unsafe { os_string_from_ptr ( * self . args . cur . offset ( i) ) } , f) ?;
185+ fmt:: Debug :: fmt ( i, f) ?;
59186 }
60187 f. write_str ( "]" ) ?;
61188 Ok ( ( ) )
@@ -70,38 +197,79 @@ impl Args {
70197 }
71198}
72199
73- unsafe fn os_string_from_ptr ( ptr : * mut u16 ) -> OsString {
74- let mut len = 0 ;
75- while * ptr. offset ( len) != 0 { len += 1 ; }
76-
77- // Push it onto the list.
78- let ptr = ptr as * const u16 ;
79- let buf = slice:: from_raw_parts ( ptr, len as usize ) ;
80- OsStringExt :: from_wide ( buf)
81- }
82-
83200impl Iterator for Args {
84201 type Item = OsString ;
85- fn next ( & mut self ) -> Option < OsString > {
86- self . range . next ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
202+ fn next ( & mut self ) -> Option < OsString > { self . parsed_args_list . pop_front ( ) }
203+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
204+ ( self . parsed_args_list . len ( ) , Some ( self . parsed_args_list . len ( ) ) )
87205 }
88- fn size_hint ( & self ) -> ( usize , Option < usize > ) { self . range . size_hint ( ) }
89206}
90207
91208impl DoubleEndedIterator for Args {
92- fn next_back ( & mut self ) -> Option < OsString > {
93- self . range . next_back ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
94- }
209+ fn next_back ( & mut self ) -> Option < OsString > { self . parsed_args_list . pop_back ( ) }
95210}
96211
97212impl ExactSizeIterator for Args {
98- fn len ( & self ) -> usize { self . range . len ( ) }
213+ fn len ( & self ) -> usize { self . parsed_args_list . len ( ) }
99214}
100215
101- impl Drop for Args {
102- fn drop ( & mut self ) {
103- // self.cur can be null if CommandLineToArgvW previously failed,
104- // but LocalFree ignores NULL pointers
105- unsafe { c:: LocalFree ( self . cur as * mut c_void ) ; }
216+ #[ cfg( test) ]
217+ mod tests {
218+ use sys:: windows:: args:: * ;
219+ use ffi:: OsString ;
220+
221+ fn chk ( string : & str , parts : & [ & str ] ) {
222+ let mut wide: Vec < u16 > = OsString :: from ( string) . encode_wide ( ) . collect ( ) ;
223+ wide. push ( 0 ) ;
224+ let parsed = unsafe {
225+ parse_lp_cmd_line ( wide. as_ptr ( ) as * const u16 , || OsString :: from ( "TEST.EXE" ) )
226+ } ;
227+ let expected: Vec < OsString > = parts. iter ( ) . map ( |k| OsString :: from ( k) ) . collect ( ) ;
228+ assert_eq ! ( parsed, expected) ;
229+ }
230+
231+ #[ test]
232+ fn empty ( ) {
233+ chk ( "" , & [ "TEST.EXE" ] ) ;
234+ chk ( "\0 " , & [ "TEST.EXE" ] ) ;
235+ }
236+
237+ #[ test]
238+ fn single_words ( ) {
239+ chk ( "EXE one_word" , & [ "EXE" , "one_word" ] ) ;
240+ chk ( "EXE a" , & [ "EXE" , "a" ] ) ;
241+ chk ( "EXE 😅" , & [ "EXE" , "😅" ] ) ;
242+ chk ( "EXE 😅🤦" , & [ "EXE" , "😅🤦" ] ) ;
243+ }
244+
245+ #[ test]
246+ fn official_examples ( ) {
247+ chk ( r#"EXE "abc" d e"# , & [ "EXE" , "abc" , "d" , "e" ] ) ;
248+ chk ( r#"EXE a\\\b d"e f"g h"# , & [ "EXE" , r#"a\\\b"# , "de fg" , "h" ] ) ;
249+ chk ( r#"EXE a\\\"b c d"# , & [ "EXE" , r#"a\"b"# , "c" , "d" ] ) ;
250+ chk ( r#"EXE a\\\\"b c" d e"# , & [ "EXE" , r#"a\\b c"# , "d" , "e" ] ) ;
251+ }
252+
253+ #[ test]
254+ fn whitespace_behavior ( ) {
255+ chk ( r#" test"# , & [ "" , "test" ] ) ;
256+ chk ( r#" test"# , & [ "" , "test" ] ) ;
257+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
258+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
259+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
260+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
261+ chk ( r#"test "# , & [ "test" ] ) ;
262+ }
263+
264+ #[ test]
265+ fn genius_quotes ( ) {
266+ chk ( r#"EXE "" """# , & [ "EXE" , "" , "" ] ) ;
267+ chk ( r#"EXE "" """"# , & [ "EXE" , "" , "\" " ] ) ;
268+ chk (
269+ r#"EXE "this is """all""" in the same argument""# ,
270+ & [ "EXE" , "this is \" all\" in the same argument" ]
271+ ) ;
272+ chk ( r#"EXE "\u{1}"""# , & [ "EXE" , "\u{1} \" " ] ) ;
273+ chk ( r#"EXE "a"" a"# , & [ "EXE" , "a\" " , "a" ] ) ;
106274 }
107275}
0 commit comments