@@ -44,6 +44,128 @@ pub fn args() -> Args {
4444 }
4545}
4646
47+ /// Implements the Windows command-line argument parsing algorithm.
48+ ///
49+ /// Microsoft's documentation for the Windows CLI argument format can be found at
50+ /// <https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments>
51+ ///
52+ /// A more in-depth explanation is here:
53+ /// <https://daviddeley.com/autohotkey/parameters/parameters.htm#WIN>
54+ ///
55+ /// Windows includes a function to do command line parsing in shell32.dll.
56+ /// However, this is not used for two reasons:
57+ ///
58+ /// 1. Linking with that DLL causes the process to be registered as a GUI application.
59+ /// GUI applications add a bunch of overhead, even if no windows are drawn. See
60+ /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
61+ ///
62+ /// 2. It does not follow the modern C/C++ argv rules outlined in the first two links above.
63+ ///
64+ /// This function was tested for equivalence to the C/C++ parsing rules using an
65+ /// extensive test suite available at
66+ /// <https://github.com/ChrisDenton/winarg/tree/std>.
67+ fn parse_lp_cmd_line < ' a , F : Fn ( ) -> OsString > (
68+ lp_cmd_line : Option < WStrUnits < ' a > > ,
69+ exe_name : F ,
70+ ) -> Vec < OsString > {
71+ const BACKSLASH : NonZeroU16 = non_zero_u16 ( b'\\' as u16 ) ;
72+ const QUOTE : NonZeroU16 = non_zero_u16 ( b'"' as u16 ) ;
73+ const TAB : NonZeroU16 = non_zero_u16 ( b'\t' as u16 ) ;
74+ const SPACE : NonZeroU16 = non_zero_u16 ( b' ' as u16 ) ;
75+
76+ let mut ret_val = Vec :: new ( ) ;
77+ // If the cmd line pointer is null or it points to an empty string then
78+ // return the name of the executable as argv[0].
79+ if lp_cmd_line. as_ref ( ) . and_then ( |cmd| cmd. peek ( ) ) . is_none ( ) {
80+ ret_val. push ( exe_name ( ) ) ;
81+ return ret_val;
82+ }
83+ let mut code_units = lp_cmd_line. unwrap ( ) ;
84+
85+ // The executable name at the beginning is special.
86+ let mut in_quotes = false ;
87+ let mut cur = Vec :: new ( ) ;
88+ for w in & mut code_units {
89+ match w {
90+ // A quote mark always toggles `in_quotes` no matter what because
91+ // there are no escape characters when parsing the executable name.
92+ QUOTE => in_quotes = !in_quotes,
93+ // If not `in_quotes` then whitespace ends argv[0].
94+ SPACE | TAB if !in_quotes => break ,
95+ // In all other cases the code unit is taken literally.
96+ _ => cur. push ( w. get ( ) ) ,
97+ }
98+ }
99+ // Skip whitespace.
100+ code_units. advance_while ( |w| w == SPACE || w == TAB ) ;
101+ ret_val. push ( OsString :: from_wide ( & cur) ) ;
102+
103+ // Parse the arguments according to these rules:
104+ // * All code units are taken literally except space, tab, quote and backslash.
105+ // * When not `in_quotes`, space and tab separate arguments. Consecutive spaces and tabs are
106+ // treated as a single separator.
107+ // * A space or tab `in_quotes` is taken literally.
108+ // * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally.
109+ // * A quote can be escaped if preceded by an odd number of backslashes.
110+ // * If any number of backslashes is immediately followed by a quote then the number of
111+ // backslashes is halved (rounding down).
112+ // * Backslashes not followed by a quote are all taken literally.
113+ // * If `in_quotes` then a quote can also be escaped using another quote
114+ // (i.e. two consecutive quotes become one literal quote).
115+ let mut cur = Vec :: new ( ) ;
116+ let mut in_quotes = false ;
117+ while let Some ( w) = code_units. next ( ) {
118+ match w {
119+ // If not `in_quotes`, a space or tab ends the argument.
120+ SPACE | TAB if !in_quotes => {
121+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
122+ cur. truncate ( 0 ) ;
123+
124+ // Skip whitespace.
125+ code_units. advance_while ( |w| w == SPACE || w == TAB ) ;
126+ }
127+ // Backslashes can escape quotes or backslashes but only if consecutive backslashes are followed by a quote.
128+ BACKSLASH => {
129+ let backslash_count = code_units. advance_while ( |w| w == BACKSLASH ) + 1 ;
130+ if code_units. peek ( ) == Some ( QUOTE ) {
131+ cur. extend ( iter:: repeat ( BACKSLASH . get ( ) ) . take ( backslash_count / 2 ) ) ;
132+ // The quote is escaped if there are an odd number of backslashes.
133+ if backslash_count % 2 == 1 {
134+ code_units. next ( ) ;
135+ cur. push ( QUOTE . get ( ) ) ;
136+ }
137+ } else {
138+ // If there is no quote on the end then there is no escaping.
139+ cur. extend ( iter:: repeat ( BACKSLASH . get ( ) ) . take ( backslash_count) ) ;
140+ }
141+ }
142+ // If `in_quotes` and not backslash escaped (see above) then a quote either
143+ // unsets `in_quote` or is escaped by another quote.
144+ QUOTE if in_quotes => match code_units. peek ( ) {
145+ // Two consecutive quotes when `in_quotes` produces one literal quote.
146+ Some ( QUOTE ) => {
147+ cur. push ( QUOTE . get ( ) ) ;
148+ code_units. next ( ) ;
149+ }
150+ // Otherwise set `in_quotes`.
151+ Some ( _) => in_quotes = false ,
152+ // The end of the command line.
153+ // Push `cur` even if empty, which we do by breaking while `in_quotes` is still set.
154+ None => break ,
155+ } ,
156+ // If not `in_quotes` and not BACKSLASH escaped (see above) then a quote sets `in_quote`.
157+ QUOTE => in_quotes = true ,
158+ // Everything else is always taken literally.
159+ _ => cur. push ( w. get ( ) ) ,
160+ }
161+ }
162+ // Push the final argument, if any.
163+ if !cur. is_empty ( ) || in_quotes {
164+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
165+ }
166+ ret_val
167+ }
168+
47169pub struct Args {
48170 parsed_args_list : vec:: IntoIter < OsString > ,
49171}
0 commit comments