1*4724848cSchristos /*
2*4724848cSchristos * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
3*4724848cSchristos *
4*4724848cSchristos * Licensed under the OpenSSL license (the "License"). You may not use
5*4724848cSchristos * this file except in compliance with the License. You can obtain a copy
6*4724848cSchristos * in the file LICENSE in the source distribution or at
7*4724848cSchristos * https://www.openssl.org/source/license.html
8*4724848cSchristos */
9*4724848cSchristos
10*4724848cSchristos #include <windows.h>
11*4724848cSchristos #include <stdlib.h>
12*4724848cSchristos #include <string.h>
13*4724848cSchristos #include <malloc.h>
14*4724848cSchristos
15*4724848cSchristos #if defined(CP_UTF8)
16*4724848cSchristos
17*4724848cSchristos static UINT saved_cp;
18*4724848cSchristos static int newargc;
19*4724848cSchristos static char **newargv;
20*4724848cSchristos
cleanup(void)21*4724848cSchristos static void cleanup(void)
22*4724848cSchristos {
23*4724848cSchristos int i;
24*4724848cSchristos
25*4724848cSchristos SetConsoleOutputCP(saved_cp);
26*4724848cSchristos
27*4724848cSchristos for (i = 0; i < newargc; i++)
28*4724848cSchristos free(newargv[i]);
29*4724848cSchristos
30*4724848cSchristos free(newargv);
31*4724848cSchristos }
32*4724848cSchristos
33*4724848cSchristos /*
34*4724848cSchristos * Incrementally [re]allocate newargv and keep it NULL-terminated.
35*4724848cSchristos */
validate_argv(int argc)36*4724848cSchristos static int validate_argv(int argc)
37*4724848cSchristos {
38*4724848cSchristos static int size = 0;
39*4724848cSchristos
40*4724848cSchristos if (argc >= size) {
41*4724848cSchristos char **ptr;
42*4724848cSchristos
43*4724848cSchristos while (argc >= size)
44*4724848cSchristos size += 64;
45*4724848cSchristos
46*4724848cSchristos ptr = realloc(newargv, size * sizeof(newargv[0]));
47*4724848cSchristos if (ptr == NULL)
48*4724848cSchristos return 0;
49*4724848cSchristos
50*4724848cSchristos (newargv = ptr)[argc] = NULL;
51*4724848cSchristos } else {
52*4724848cSchristos newargv[argc] = NULL;
53*4724848cSchristos }
54*4724848cSchristos
55*4724848cSchristos return 1;
56*4724848cSchristos }
57*4724848cSchristos
process_glob(WCHAR * wstr,int wlen)58*4724848cSchristos static int process_glob(WCHAR *wstr, int wlen)
59*4724848cSchristos {
60*4724848cSchristos int i, slash, udlen;
61*4724848cSchristos WCHAR saved_char;
62*4724848cSchristos WIN32_FIND_DATAW data;
63*4724848cSchristos HANDLE h;
64*4724848cSchristos
65*4724848cSchristos /*
66*4724848cSchristos * Note that we support wildcard characters only in filename part
67*4724848cSchristos * of the path, and not in directories. Windows users are used to
68*4724848cSchristos * this, that's why recursive glob processing is not implemented.
69*4724848cSchristos */
70*4724848cSchristos /*
71*4724848cSchristos * Start by looking for last slash or backslash, ...
72*4724848cSchristos */
73*4724848cSchristos for (slash = 0, i = 0; i < wlen; i++)
74*4724848cSchristos if (wstr[i] == L'/' || wstr[i] == L'\\')
75*4724848cSchristos slash = i + 1;
76*4724848cSchristos /*
77*4724848cSchristos * ... then look for asterisk or question mark in the file name.
78*4724848cSchristos */
79*4724848cSchristos for (i = slash; i < wlen; i++)
80*4724848cSchristos if (wstr[i] == L'*' || wstr[i] == L'?')
81*4724848cSchristos break;
82*4724848cSchristos
83*4724848cSchristos if (i == wlen)
84*4724848cSchristos return 0; /* definitely not a glob */
85*4724848cSchristos
86*4724848cSchristos saved_char = wstr[wlen];
87*4724848cSchristos wstr[wlen] = L'\0';
88*4724848cSchristos h = FindFirstFileW(wstr, &data);
89*4724848cSchristos wstr[wlen] = saved_char;
90*4724848cSchristos if (h == INVALID_HANDLE_VALUE)
91*4724848cSchristos return 0; /* not a valid glob, just pass... */
92*4724848cSchristos
93*4724848cSchristos if (slash)
94*4724848cSchristos udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
95*4724848cSchristos NULL, 0, NULL, NULL);
96*4724848cSchristos else
97*4724848cSchristos udlen = 0;
98*4724848cSchristos
99*4724848cSchristos do {
100*4724848cSchristos int uflen;
101*4724848cSchristos char *arg;
102*4724848cSchristos
103*4724848cSchristos /*
104*4724848cSchristos * skip over . and ..
105*4724848cSchristos */
106*4724848cSchristos if (data.cFileName[0] == L'.') {
107*4724848cSchristos if ((data.cFileName[1] == L'\0') ||
108*4724848cSchristos (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
109*4724848cSchristos continue;
110*4724848cSchristos }
111*4724848cSchristos
112*4724848cSchristos if (!validate_argv(newargc + 1))
113*4724848cSchristos break;
114*4724848cSchristos
115*4724848cSchristos /*
116*4724848cSchristos * -1 below means "scan for trailing '\0' *and* count it",
117*4724848cSchristos * so that |uflen| covers even trailing '\0'.
118*4724848cSchristos */
119*4724848cSchristos uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
120*4724848cSchristos NULL, 0, NULL, NULL);
121*4724848cSchristos
122*4724848cSchristos arg = malloc(udlen + uflen);
123*4724848cSchristos if (arg == NULL)
124*4724848cSchristos break;
125*4724848cSchristos
126*4724848cSchristos if (udlen)
127*4724848cSchristos WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
128*4724848cSchristos arg, udlen, NULL, NULL);
129*4724848cSchristos
130*4724848cSchristos WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
131*4724848cSchristos arg + udlen, uflen, NULL, NULL);
132*4724848cSchristos
133*4724848cSchristos newargv[newargc++] = arg;
134*4724848cSchristos } while (FindNextFileW(h, &data));
135*4724848cSchristos
136*4724848cSchristos CloseHandle(h);
137*4724848cSchristos
138*4724848cSchristos return 1;
139*4724848cSchristos }
140*4724848cSchristos
win32_utf8argv(int * argc,char ** argv[])141*4724848cSchristos void win32_utf8argv(int *argc, char **argv[])
142*4724848cSchristos {
143*4724848cSchristos const WCHAR *wcmdline;
144*4724848cSchristos WCHAR *warg, *wend, *p;
145*4724848cSchristos int wlen, ulen, valid = 1;
146*4724848cSchristos char *arg;
147*4724848cSchristos
148*4724848cSchristos if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0)
149*4724848cSchristos return;
150*4724848cSchristos
151*4724848cSchristos newargc = 0;
152*4724848cSchristos newargv = NULL;
153*4724848cSchristos if (!validate_argv(newargc))
154*4724848cSchristos return;
155*4724848cSchristos
156*4724848cSchristos wcmdline = GetCommandLineW();
157*4724848cSchristos if (wcmdline == NULL) return;
158*4724848cSchristos
159*4724848cSchristos /*
160*4724848cSchristos * make a copy of the command line, since we might have to modify it...
161*4724848cSchristos */
162*4724848cSchristos wlen = wcslen(wcmdline);
163*4724848cSchristos p = _alloca((wlen + 1) * sizeof(WCHAR));
164*4724848cSchristos wcscpy(p, wcmdline);
165*4724848cSchristos
166*4724848cSchristos while (*p != L'\0') {
167*4724848cSchristos int in_quote = 0;
168*4724848cSchristos
169*4724848cSchristos if (*p == L' ' || *p == L'\t') {
170*4724848cSchristos p++; /* skip over white spaces */
171*4724848cSchristos continue;
172*4724848cSchristos }
173*4724848cSchristos
174*4724848cSchristos /*
175*4724848cSchristos * Note: because we may need to fiddle with the number of backslashes,
176*4724848cSchristos * the argument string is copied into itself. This is safe because
177*4724848cSchristos * the number of characters will never expand.
178*4724848cSchristos */
179*4724848cSchristos warg = wend = p;
180*4724848cSchristos while (*p != L'\0'
181*4724848cSchristos && (in_quote || (*p != L' ' && *p != L'\t'))) {
182*4724848cSchristos switch (*p) {
183*4724848cSchristos case L'\\':
184*4724848cSchristos /*
185*4724848cSchristos * Microsoft documentation on how backslashes are treated
186*4724848cSchristos * is:
187*4724848cSchristos *
188*4724848cSchristos * + Backslashes are interpreted literally, unless they
189*4724848cSchristos * immediately precede a double quotation mark.
190*4724848cSchristos * + If an even number of backslashes is followed by a double
191*4724848cSchristos * quotation mark, one backslash is placed in the argv array
192*4724848cSchristos * for every pair of backslashes, and the double quotation
193*4724848cSchristos * mark is interpreted as a string delimiter.
194*4724848cSchristos * + If an odd number of backslashes is followed by a double
195*4724848cSchristos * quotation mark, one backslash is placed in the argv array
196*4724848cSchristos * for every pair of backslashes, and the double quotation
197*4724848cSchristos * mark is "escaped" by the remaining backslash, causing a
198*4724848cSchristos * literal double quotation mark (") to be placed in argv.
199*4724848cSchristos *
200*4724848cSchristos * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
201*4724848cSchristos *
202*4724848cSchristos * Though referred page doesn't mention it, multiple qouble
203*4724848cSchristos * quotes are also special. Pair of double quotes in quoted
204*4724848cSchristos * string is counted as single double quote.
205*4724848cSchristos */
206*4724848cSchristos {
207*4724848cSchristos const WCHAR *q = p;
208*4724848cSchristos int i;
209*4724848cSchristos
210*4724848cSchristos while (*p == L'\\')
211*4724848cSchristos p++;
212*4724848cSchristos
213*4724848cSchristos if (*p == L'"') {
214*4724848cSchristos int i;
215*4724848cSchristos
216*4724848cSchristos for (i = (p - q) / 2; i > 0; i--)
217*4724848cSchristos *wend++ = L'\\';
218*4724848cSchristos
219*4724848cSchristos /*
220*4724848cSchristos * if odd amount of backslashes before the quote,
221*4724848cSchristos * said quote is part of the argument, not a delimiter
222*4724848cSchristos */
223*4724848cSchristos if ((p - q) % 2 == 1)
224*4724848cSchristos *wend++ = *p++;
225*4724848cSchristos } else {
226*4724848cSchristos for (i = p - q; i > 0; i--)
227*4724848cSchristos *wend++ = L'\\';
228*4724848cSchristos }
229*4724848cSchristos }
230*4724848cSchristos break;
231*4724848cSchristos case L'"':
232*4724848cSchristos /*
233*4724848cSchristos * Without the preceding backslash (or when preceded with an
234*4724848cSchristos * even number of backslashes), the double quote is a simple
235*4724848cSchristos * string delimiter and just slightly change the parsing state
236*4724848cSchristos */
237*4724848cSchristos if (in_quote && p[1] == L'"')
238*4724848cSchristos *wend++ = *p++;
239*4724848cSchristos else
240*4724848cSchristos in_quote = !in_quote;
241*4724848cSchristos p++;
242*4724848cSchristos break;
243*4724848cSchristos default:
244*4724848cSchristos /*
245*4724848cSchristos * Any other non-delimiter character is just taken verbatim
246*4724848cSchristos */
247*4724848cSchristos *wend++ = *p++;
248*4724848cSchristos }
249*4724848cSchristos }
250*4724848cSchristos
251*4724848cSchristos wlen = wend - warg;
252*4724848cSchristos
253*4724848cSchristos if (wlen == 0 || !process_glob(warg, wlen)) {
254*4724848cSchristos if (!validate_argv(newargc + 1)) {
255*4724848cSchristos valid = 0;
256*4724848cSchristos break;
257*4724848cSchristos }
258*4724848cSchristos
259*4724848cSchristos ulen = 0;
260*4724848cSchristos if (wlen > 0) {
261*4724848cSchristos ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
262*4724848cSchristos NULL, 0, NULL, NULL);
263*4724848cSchristos if (ulen <= 0)
264*4724848cSchristos continue;
265*4724848cSchristos }
266*4724848cSchristos
267*4724848cSchristos arg = malloc(ulen + 1);
268*4724848cSchristos if (arg == NULL) {
269*4724848cSchristos valid = 0;
270*4724848cSchristos break;
271*4724848cSchristos }
272*4724848cSchristos
273*4724848cSchristos if (wlen > 0)
274*4724848cSchristos WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
275*4724848cSchristos arg, ulen, NULL, NULL);
276*4724848cSchristos arg[ulen] = '\0';
277*4724848cSchristos
278*4724848cSchristos newargv[newargc++] = arg;
279*4724848cSchristos }
280*4724848cSchristos }
281*4724848cSchristos
282*4724848cSchristos if (valid) {
283*4724848cSchristos saved_cp = GetConsoleOutputCP();
284*4724848cSchristos SetConsoleOutputCP(CP_UTF8);
285*4724848cSchristos
286*4724848cSchristos *argc = newargc;
287*4724848cSchristos *argv = newargv;
288*4724848cSchristos
289*4724848cSchristos atexit(cleanup);
290*4724848cSchristos } else if (newargv != NULL) {
291*4724848cSchristos int i;
292*4724848cSchristos
293*4724848cSchristos for (i = 0; i < newargc; i++)
294*4724848cSchristos free(newargv[i]);
295*4724848cSchristos
296*4724848cSchristos free(newargv);
297*4724848cSchristos
298*4724848cSchristos newargc = 0;
299*4724848cSchristos newargv = NULL;
300*4724848cSchristos }
301*4724848cSchristos
302*4724848cSchristos return;
303*4724848cSchristos }
304*4724848cSchristos #else
win32_utf8argv(int * argc,char ** argv[])305*4724848cSchristos void win32_utf8argv(int *argc, char **argv[])
306*4724848cSchristos { return; }
307*4724848cSchristos #endif
308