@@ -44,6 +44,128 @@ pub fn args() -> Args {
44
44
}
45
45
}
46
46
47
+ /// Implements the Windows command-line argument parsing algorithm.
48
+ ///
49
+ /// Microsoft's documentation for the Windows CLI argument format can be found at
50
+ /// <https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments>
51
+ ///
52
+ /// A more in-depth explanation is here:
53
+ /// <https://daviddeley.com/autohotkey/parameters/parameters.htm#WIN>
54
+ ///
55
+ /// Windows includes a function to do command line parsing in shell32.dll.
56
+ /// However, this is not used for two reasons:
57
+ ///
58
+ /// 1. Linking with that DLL causes the process to be registered as a GUI application.
59
+ /// GUI applications add a bunch of overhead, even if no windows are drawn. See
60
+ /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
61
+ ///
62
+ /// 2. It does not follow the modern C/C++ argv rules outlined in the first two links above.
63
+ ///
64
+ /// This function was tested for equivalence to the C/C++ parsing rules using an
65
+ /// extensive test suite available at
66
+ /// <https://github.com/ChrisDenton/winarg/tree/std>.
67
+ fn parse_lp_cmd_line < ' a , F : Fn ( ) -> OsString > (
68
+ lp_cmd_line : Option < WStrUnits < ' a > > ,
69
+ exe_name : F ,
70
+ ) -> Vec < OsString > {
71
+ const BACKSLASH : NonZeroU16 = non_zero_u16 ( b'\\' as u16 ) ;
72
+ const QUOTE : NonZeroU16 = non_zero_u16 ( b'"' as u16 ) ;
73
+ const TAB : NonZeroU16 = non_zero_u16 ( b'\t' as u16 ) ;
74
+ const SPACE : NonZeroU16 = non_zero_u16 ( b' ' as u16 ) ;
75
+
76
+ let mut ret_val = Vec :: new ( ) ;
77
+ // If the cmd line pointer is null or it points to an empty string then
78
+ // return the name of the executable as argv[0].
79
+ if lp_cmd_line. as_ref ( ) . and_then ( |cmd| cmd. peek ( ) ) . is_none ( ) {
80
+ ret_val. push ( exe_name ( ) ) ;
81
+ return ret_val;
82
+ }
83
+ let mut code_units = lp_cmd_line. unwrap ( ) ;
84
+
85
+ // The executable name at the beginning is special.
86
+ let mut in_quotes = false ;
87
+ let mut cur = Vec :: new ( ) ;
88
+ for w in & mut code_units {
89
+ match w {
90
+ // A quote mark always toggles `in_quotes` no matter what because
91
+ // there are no escape characters when parsing the executable name.
92
+ QUOTE => in_quotes = !in_quotes,
93
+ // If not `in_quotes` then whitespace ends argv[0].
94
+ SPACE | TAB if !in_quotes => break ,
95
+ // In all other cases the code unit is taken literally.
96
+ _ => cur. push ( w. get ( ) ) ,
97
+ }
98
+ }
99
+ // Skip whitespace.
100
+ code_units. advance_while ( |w| w == SPACE || w == TAB ) ;
101
+ ret_val. push ( OsString :: from_wide ( & cur) ) ;
102
+
103
+ // Parse the arguments according to these rules:
104
+ // * All code units are taken literally except space, tab, quote and backslash.
105
+ // * When not `in_quotes`, space and tab separate arguments. Consecutive spaces and tabs are
106
+ // treated as a single separator.
107
+ // * A space or tab `in_quotes` is taken literally.
108
+ // * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally.
109
+ // * A quote can be escaped if preceded by an odd number of backslashes.
110
+ // * If any number of backslashes is immediately followed by a quote then the number of
111
+ // backslashes is halved (rounding down).
112
+ // * Backslashes not followed by a quote are all taken literally.
113
+ // * If `in_quotes` then a quote can also be escaped using another quote
114
+ // (i.e. two consecutive quotes become one literal quote).
115
+ let mut cur = Vec :: new ( ) ;
116
+ let mut in_quotes = false ;
117
+ while let Some ( w) = code_units. next ( ) {
118
+ match w {
119
+ // If not `in_quotes`, a space or tab ends the argument.
120
+ SPACE | TAB if !in_quotes => {
121
+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
122
+ cur. truncate ( 0 ) ;
123
+
124
+ // Skip whitespace.
125
+ code_units. advance_while ( |w| w == SPACE || w == TAB ) ;
126
+ }
127
+ // Backslashes can escape quotes or backslashes but only if consecutive backslashes are followed by a quote.
128
+ BACKSLASH => {
129
+ let backslash_count = code_units. advance_while ( |w| w == BACKSLASH ) + 1 ;
130
+ if code_units. peek ( ) == Some ( QUOTE ) {
131
+ cur. extend ( iter:: repeat ( BACKSLASH . get ( ) ) . take ( backslash_count / 2 ) ) ;
132
+ // The quote is escaped if there are an odd number of backslashes.
133
+ if backslash_count % 2 == 1 {
134
+ code_units. next ( ) ;
135
+ cur. push ( QUOTE . get ( ) ) ;
136
+ }
137
+ } else {
138
+ // If there is no quote on the end then there is no escaping.
139
+ cur. extend ( iter:: repeat ( BACKSLASH . get ( ) ) . take ( backslash_count) ) ;
140
+ }
141
+ }
142
+ // If `in_quotes` and not backslash escaped (see above) then a quote either
143
+ // unsets `in_quote` or is escaped by another quote.
144
+ QUOTE if in_quotes => match code_units. peek ( ) {
145
+ // Two consecutive quotes when `in_quotes` produces one literal quote.
146
+ Some ( QUOTE ) => {
147
+ cur. push ( QUOTE . get ( ) ) ;
148
+ code_units. next ( ) ;
149
+ }
150
+ // Otherwise set `in_quotes`.
151
+ Some ( _) => in_quotes = false ,
152
+ // The end of the command line.
153
+ // Push `cur` even if empty, which we do by breaking while `in_quotes` is still set.
154
+ None => break ,
155
+ } ,
156
+ // If not `in_quotes` and not BACKSLASH escaped (see above) then a quote sets `in_quote`.
157
+ QUOTE => in_quotes = true ,
158
+ // Everything else is always taken literally.
159
+ _ => cur. push ( w. get ( ) ) ,
160
+ }
161
+ }
162
+ // Push the final argument, if any.
163
+ if !cur. is_empty ( ) || in_quotes {
164
+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
165
+ }
166
+ ret_val
167
+ }
168
+
47
169
pub struct Args {
48
170
parsed_args_list : vec:: IntoIter < OsString > ,
49
171
}
0 commit comments