1 /* $NetBSD: quotearg.c,v 1.1.1.1 2016/01/10 21:36:18 christos Exp $ */
2
3 /* quotearg.c - quote arguments for output
4 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19
20 /* Written by Paul Eggert <eggert@twinsun.com> */
21
22 #if HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25
26 #if HAVE_STDDEF_H
27 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
28 #endif
29 #include <sys/types.h>
30 #include <quotearg.h>
31 #include <xalloc.h>
32
33 #include <ctype.h>
34
35 #if ENABLE_NLS
36 # include <libintl.h>
37 # define _(text) gettext (text)
38 #else
39 # define _(text) text
40 #endif
41 #define N_(text) text
42
43 #if HAVE_LIMITS_H
44 # include <limits.h>
45 #endif
46 #ifndef CHAR_BIT
47 # define CHAR_BIT 8
48 #endif
49 #ifndef UCHAR_MAX
50 # define UCHAR_MAX ((unsigned char) -1)
51 #endif
52
53 #if HAVE_C_BACKSLASH_A
54 # define ALERT_CHAR '\a'
55 #else
56 # define ALERT_CHAR '\7'
57 #endif
58
59 #if HAVE_STDLIB_H
60 # include <stdlib.h>
61 #endif
62
63 #if HAVE_STRING_H
64 # include <string.h>
65 #endif
66
67 #if HAVE_WCHAR_H
68 # include <wchar.h>
69 #endif
70
71 #if !HAVE_MBRTOWC
72 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
73 other macros are defined only for documentation and to satisfy C
74 syntax. */
75 # undef MB_CUR_MAX
76 # define MB_CUR_MAX 1
77 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
78 # define mbsinit(ps) 1
79 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
80 #endif
81
82 #ifndef iswprint
83 # if HAVE_WCTYPE_H
84 # include <wctype.h>
85 # endif
86 # if !defined iswprint && !HAVE_ISWPRINT
87 # define iswprint(wc) 1
88 # endif
89 #endif
90
91 #define INT_BITS (sizeof (int) * CHAR_BIT)
92
93 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
94 # define IN_CTYPE_DOMAIN(c) 1
95 #else
96 # define IN_CTYPE_DOMAIN(c) isascii(c)
97 #endif
98
99 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
100 #undef ISPRINT
101 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
102
103 struct quoting_options
104 {
105 /* Basic quoting style. */
106 enum quoting_style style;
107
108 /* Quote the characters indicated by this bit vector even if the
109 quoting style would not normally require them to be quoted. */
110 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
111 };
112
113 /* Names of quoting styles. */
114 char const *const quoting_style_args[] =
115 {
116 "literal",
117 "shell",
118 "shell-always",
119 "c",
120 "escape",
121 "locale",
122 "clocale",
123 0
124 };
125
126 /* Correspondences to quoting style names. */
127 enum quoting_style const quoting_style_vals[] =
128 {
129 literal_quoting_style,
130 shell_quoting_style,
131 shell_always_quoting_style,
132 c_quoting_style,
133 escape_quoting_style,
134 locale_quoting_style,
135 clocale_quoting_style
136 };
137
138 /* The default quoting options. */
139 static struct quoting_options default_quoting_options;
140
141 /* Allocate a new set of quoting options, with contents initially identical
142 to O if O is not null, or to the default if O is null.
143 It is the caller's responsibility to free the result. */
144 struct quoting_options *
clone_quoting_options(struct quoting_options * o)145 clone_quoting_options (struct quoting_options *o)
146 {
147 struct quoting_options *p
148 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
149 *p = *(o ? o : &default_quoting_options);
150 return p;
151 }
152
153 /* Get the value of O's quoting style. If O is null, use the default. */
154 enum quoting_style
get_quoting_style(struct quoting_options * o)155 get_quoting_style (struct quoting_options *o)
156 {
157 return (o ? o : &default_quoting_options)->style;
158 }
159
160 /* In O (or in the default if O is null),
161 set the value of the quoting style to S. */
162 void
set_quoting_style(struct quoting_options * o,enum quoting_style s)163 set_quoting_style (struct quoting_options *o, enum quoting_style s)
164 {
165 (o ? o : &default_quoting_options)->style = s;
166 }
167
168 /* In O (or in the default if O is null),
169 set the value of the quoting options for character C to I.
170 Return the old value. Currently, the only values defined for I are
171 0 (the default) and 1 (which means to quote the character even if
172 it would not otherwise be quoted). */
173 int
set_char_quoting(struct quoting_options * o,char c,int i)174 set_char_quoting (struct quoting_options *o, char c, int i)
175 {
176 unsigned char uc = c;
177 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
178 int shift = uc % INT_BITS;
179 int r = (*p >> shift) & 1;
180 *p ^= ((i & 1) ^ r) << shift;
181 return r;
182 }
183
184 /* MSGID approximates a quotation mark. Return its translation if it
185 has one; otherwise, return either it or "\"", depending on S. */
186 static char const *
gettext_quote(char const * msgid,enum quoting_style s)187 gettext_quote (char const *msgid, enum quoting_style s)
188 {
189 char const *translation = _(msgid);
190 if (translation == msgid && s == clocale_quoting_style)
191 translation = "\"";
192 return translation;
193 }
194
195 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
196 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
197 non-quoting-style part of O to control quoting.
198 Terminate the output with a null character, and return the written
199 size of the output, not counting the terminating null.
200 If BUFFERSIZE is too small to store the output string, return the
201 value that would have been returned had BUFFERSIZE been large enough.
202 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
203
204 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
205 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
206 style specified by O, and O may not be null. */
207
208 static size_t
quotearg_buffer_restyled(char * buffer,size_t buffersize,char const * arg,size_t argsize,enum quoting_style quoting_style,struct quoting_options const * o)209 quotearg_buffer_restyled (char *buffer, size_t buffersize,
210 char const *arg, size_t argsize,
211 enum quoting_style quoting_style,
212 struct quoting_options const *o)
213 {
214 size_t i;
215 size_t len = 0;
216 char const *quote_string = 0;
217 size_t quote_string_len = 0;
218 int backslash_escapes = 0;
219 int unibyte_locale = MB_CUR_MAX == 1;
220
221 #define STORE(c) \
222 do \
223 { \
224 if (len < buffersize) \
225 buffer[len] = (c); \
226 len++; \
227 } \
228 while (0)
229
230 switch (quoting_style)
231 {
232 case c_quoting_style:
233 STORE ('"');
234 backslash_escapes = 1;
235 quote_string = "\"";
236 quote_string_len = 1;
237 break;
238
239 case escape_quoting_style:
240 backslash_escapes = 1;
241 break;
242
243 case locale_quoting_style:
244 case clocale_quoting_style:
245 {
246 /* Get translations for open and closing quotation marks.
247
248 The message catalog should translate "`" to a left
249 quotation mark suitable for the locale, and similarly for
250 "'". If the catalog has no translation,
251 locale_quoting_style quotes `like this', and
252 clocale_quoting_style quotes "like this".
253
254 For example, an American English Unicode locale should
255 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
256 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
257 MARK). A British English Unicode locale should instead
258 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
259 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
260
261 char const *left = gettext_quote (N_("`"), quoting_style);
262 char const *right = gettext_quote (N_("'"), quoting_style);
263 for (quote_string = left; *quote_string; quote_string++)
264 STORE (*quote_string);
265 backslash_escapes = 1;
266 quote_string = right;
267 quote_string_len = strlen (quote_string);
268 }
269 break;
270
271 case shell_always_quoting_style:
272 STORE ('\'');
273 quote_string = "'";
274 quote_string_len = 1;
275 break;
276
277 default:
278 break;
279 }
280
281 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
282 {
283 unsigned char c;
284 unsigned char esc;
285
286 if (backslash_escapes
287 && quote_string_len
288 && i + quote_string_len <= argsize
289 && memcmp (arg + i, quote_string, quote_string_len) == 0)
290 STORE ('\\');
291
292 c = arg[i];
293 switch (c)
294 {
295 case '?':
296 switch (quoting_style)
297 {
298 case shell_quoting_style:
299 goto use_shell_always_quoting_style;
300
301 case c_quoting_style:
302 if (i + 2 < argsize && arg[i + 1] == '?')
303 switch (arg[i + 2])
304 {
305 case '!': case '\'':
306 case '(': case ')': case '-': case '/':
307 case '<': case '=': case '>':
308 /* Escape the second '?' in what would otherwise be
309 a trigraph. */
310 i += 2;
311 c = arg[i + 2];
312 STORE ('?');
313 STORE ('\\');
314 STORE ('?');
315 break;
316 }
317 break;
318
319 default:
320 break;
321 }
322 break;
323
324 case ALERT_CHAR: esc = 'a'; goto c_escape;
325 case '\b': esc = 'b'; goto c_escape;
326 case '\f': esc = 'f'; goto c_escape;
327 case '\n': esc = 'n'; goto c_and_shell_escape;
328 case '\r': esc = 'r'; goto c_and_shell_escape;
329 case '\t': esc = 't'; goto c_and_shell_escape;
330 case '\v': esc = 'v'; goto c_escape;
331 case '\\': esc = c; goto c_and_shell_escape;
332
333 c_and_shell_escape:
334 if (quoting_style == shell_quoting_style)
335 goto use_shell_always_quoting_style;
336 c_escape:
337 if (backslash_escapes)
338 {
339 c = esc;
340 goto store_escape;
341 }
342 break;
343
344 case '#': case '~':
345 if (i != 0)
346 break;
347 /* Fall through. */
348 case ' ':
349 case '!': /* special in bash */
350 case '"': case '$': case '&':
351 case '(': case ')': case '*': case ';':
352 case '<': case '>': case '[':
353 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
354 case '`': case '|':
355 /* A shell special character. In theory, '$' and '`' could
356 be the first bytes of multibyte characters, which means
357 we should check them with mbrtowc, but in practice this
358 doesn't happen so it's not worth worrying about. */
359 if (quoting_style == shell_quoting_style)
360 goto use_shell_always_quoting_style;
361 break;
362
363 case '\'':
364 switch (quoting_style)
365 {
366 case shell_quoting_style:
367 goto use_shell_always_quoting_style;
368
369 case shell_always_quoting_style:
370 STORE ('\'');
371 STORE ('\\');
372 STORE ('\'');
373 break;
374
375 default:
376 break;
377 }
378 break;
379
380 case '%': case '+': case ',': case '-': case '.': case '/':
381 case '0': case '1': case '2': case '3': case '4': case '5':
382 case '6': case '7': case '8': case '9': case ':': case '=':
383 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
384 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
385 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
386 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
387 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
388 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
389 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
390 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
391 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
392 case '{': case '}':
393 /* These characters don't cause problems, no matter what the
394 quoting style is. They cannot start multibyte sequences. */
395 break;
396
397 default:
398 /* If we have a multibyte sequence, copy it until we reach
399 its end, find an error, or come back to the initial shift
400 state. For C-like styles, if the sequence has
401 unprintable characters, escape the whole sequence, since
402 we can't easily escape single characters within it. */
403 {
404 /* Length of multibyte sequence found so far. */
405 size_t m;
406
407 int printable;
408
409 if (unibyte_locale)
410 {
411 m = 1;
412 printable = ISPRINT (c);
413 }
414 else
415 {
416 mbstate_t mbstate;
417 memset (&mbstate, 0, sizeof mbstate);
418
419 m = 0;
420 printable = 1;
421 if (argsize == (size_t) -1)
422 argsize = strlen (arg);
423
424 do
425 {
426 wchar_t w;
427 size_t bytes = mbrtowc (&w, &arg[i + m],
428 argsize - (i + m), &mbstate);
429 if (bytes == 0)
430 break;
431 else if (bytes == (size_t) -1)
432 {
433 printable = 0;
434 break;
435 }
436 else if (bytes == (size_t) -2)
437 {
438 printable = 0;
439 while (i + m < argsize && arg[i + m])
440 m++;
441 break;
442 }
443 else
444 {
445 if (! iswprint (w))
446 printable = 0;
447 m += bytes;
448 }
449 }
450 while (! mbsinit (&mbstate));
451 }
452
453 if (1 < m || (backslash_escapes && ! printable))
454 {
455 /* Output a multibyte sequence, or an escaped
456 unprintable unibyte character. */
457 size_t ilim = i + m;
458
459 for (;;)
460 {
461 if (backslash_escapes && ! printable)
462 {
463 STORE ('\\');
464 STORE ('0' + (c >> 6));
465 STORE ('0' + ((c >> 3) & 7));
466 c = '0' + (c & 7);
467 }
468 if (ilim <= i + 1)
469 break;
470 STORE (c);
471 c = arg[++i];
472 }
473
474 goto store_c;
475 }
476 }
477 }
478
479 if (! (backslash_escapes
480 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
481 goto store_c;
482
483 store_escape:
484 STORE ('\\');
485
486 store_c:
487 STORE (c);
488 }
489
490 if (quote_string)
491 for (; *quote_string; quote_string++)
492 STORE (*quote_string);
493
494 if (len < buffersize)
495 buffer[len] = '\0';
496 return len;
497
498 use_shell_always_quoting_style:
499 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
500 shell_always_quoting_style, o);
501 }
502
503 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
504 argument ARG (of size ARGSIZE), using O to control quoting.
505 If O is null, use the default.
506 Terminate the output with a null character, and return the written
507 size of the output, not counting the terminating null.
508 If BUFFERSIZE is too small to store the output string, return the
509 value that would have been returned had BUFFERSIZE been large enough.
510 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
511 size_t
quotearg_buffer(char * buffer,size_t buffersize,char const * arg,size_t argsize,struct quoting_options const * o)512 quotearg_buffer (char *buffer, size_t buffersize,
513 char const *arg, size_t argsize,
514 struct quoting_options const *o)
515 {
516 struct quoting_options const *p = o ? o : &default_quoting_options;
517 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
518 p->style, p);
519 }
520
521 /* Use storage slot N to return a quoted version of the string ARG.
522 OPTIONS specifies the quoting options.
523 The returned value points to static storage that can be
524 reused by the next call to this function with the same value of N.
525 N must be nonnegative. N is deliberately declared with type "int"
526 to allow for future extensions (using negative values). */
527 static char *
quotearg_n_options(int n,char const * arg,struct quoting_options const * options)528 quotearg_n_options (int n, char const *arg,
529 struct quoting_options const *options)
530 {
531 /* Preallocate a slot 0 buffer, so that the caller can always quote
532 one small component of a "memory exhausted" message in slot 0. */
533 static char slot0[256];
534 static unsigned int nslots = 1;
535 struct slotvec
536 {
537 size_t size;
538 char *val;
539 };
540 static struct slotvec slotvec0 = {sizeof slot0, slot0};
541 static struct slotvec *slotvec = &slotvec0;
542
543 if (nslots <= n)
544 {
545 int n1 = n + 1;
546 size_t s = n1 * sizeof (struct slotvec);
547 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
548 abort ();
549 if (slotvec == &slotvec0)
550 {
551 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
552 *slotvec = slotvec0;
553 }
554 slotvec = (struct slotvec *) xrealloc (slotvec, s);
555 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
556 nslots = n;
557 }
558
559 {
560 size_t size = slotvec[n].size;
561 char *val = slotvec[n].val;
562 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
563
564 if (size <= qsize)
565 {
566 slotvec[n].size = size = qsize + 1;
567 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
568 quotearg_buffer (val, size, arg, (size_t) -1, options);
569 }
570
571 return val;
572 }
573 }
574
575 char *
quotearg_n(unsigned int n,char const * arg)576 quotearg_n (unsigned int n, char const *arg)
577 {
578 return quotearg_n_options (n, arg, &default_quoting_options);
579 }
580
581 char *
quotearg(char const * arg)582 quotearg (char const *arg)
583 {
584 return quotearg_n (0, arg);
585 }
586
587 char *
quotearg_n_style(unsigned int n,enum quoting_style s,char const * arg)588 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
589 {
590 struct quoting_options o;
591 o.style = s;
592 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
593 return quotearg_n_options (n, arg, &o);
594 }
595
596 char *
quotearg_style(enum quoting_style s,char const * arg)597 quotearg_style (enum quoting_style s, char const *arg)
598 {
599 return quotearg_n_style (0, s, arg);
600 }
601
602 char *
quotearg_char(char const * arg,char ch)603 quotearg_char (char const *arg, char ch)
604 {
605 struct quoting_options options;
606 options = default_quoting_options;
607 set_char_quoting (&options, ch, 1);
608 return quotearg_n_options (0, arg, &options);
609 }
610
611 char *
quotearg_colon(char const * arg)612 quotearg_colon (char const *arg)
613 {
614 return quotearg_char (arg, ':');
615 }
616