1 /* quotearg.c - quote arguments for output
2
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005 Free Software
4 Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 #include <sys/cdefs.h>
20 __RCSID("$NetBSD: quotearg.c,v 1.3 2016/05/17 14:00:09 christos Exp $");
21
22
23 /* Written by Paul Eggert <eggert@twinsun.com> */
24
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28
29 #include "quotearg.h"
30
31 #include "xalloc.h"
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdbool.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #include "gettext.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
43
44 #if HAVE_WCHAR_H
45
46 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
47 # include <stdio.h>
48 # include <time.h>
49
50 # include <wchar.h>
51 #endif
52
53 #if !HAVE_MBRTOWC
54 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
55 other macros are defined only for documentation and to satisfy C
56 syntax. */
57 # undef MB_CUR_MAX
58 # define MB_CUR_MAX 1
59 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
60 # define iswprint(wc) isprint ((unsigned char) (wc))
61 # undef HAVE_MBSINIT
62 #endif
63
64 #if !defined mbsinit && !HAVE_MBSINIT
65 # define mbsinit(ps) 1
66 #endif
67
68 #ifndef iswprint
69 # if HAVE_WCTYPE_H
70 # include <wctype.h>
71 # endif
72 # if !defined iswprint && !HAVE_ISWPRINT
73 # define iswprint(wc) 1
74 # endif
75 #endif
76
77 #ifndef SIZE_MAX
78 # define SIZE_MAX ((size_t) -1)
79 #endif
80
81 #define INT_BITS (sizeof (int) * CHAR_BIT)
82
83 struct quoting_options
84 {
85 /* Basic quoting style. */
86 enum quoting_style style;
87
88 /* Quote the characters indicated by this bit vector even if the
89 quoting style would not normally require them to be quoted. */
90 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
91 };
92
93 /* Names of quoting styles. */
94 char const *const quoting_style_args[] =
95 {
96 "literal",
97 "shell",
98 "shell-always",
99 "c",
100 "escape",
101 "locale",
102 "clocale",
103 0
104 };
105
106 /* Correspondences to quoting style names. */
107 enum quoting_style const quoting_style_vals[] =
108 {
109 literal_quoting_style,
110 shell_quoting_style,
111 shell_always_quoting_style,
112 c_quoting_style,
113 escape_quoting_style,
114 locale_quoting_style,
115 clocale_quoting_style
116 };
117
118 /* The default quoting options. */
119 static struct quoting_options default_quoting_options;
120
121 /* Allocate a new set of quoting options, with contents initially identical
122 to O if O is not null, or to the default if O is null.
123 It is the caller's responsibility to free the result. */
124 struct quoting_options *
clone_quoting_options(struct quoting_options * o)125 clone_quoting_options (struct quoting_options *o)
126 {
127 int e = errno;
128 struct quoting_options *p = xmalloc (sizeof *p);
129 *p = *(o ? o : &default_quoting_options);
130 errno = e;
131 return p;
132 }
133
134 /* Get the value of O's quoting style. If O is null, use the default. */
135 enum quoting_style
get_quoting_style(struct quoting_options * o)136 get_quoting_style (struct quoting_options *o)
137 {
138 return (o ? o : &default_quoting_options)->style;
139 }
140
141 /* In O (or in the default if O is null),
142 set the value of the quoting style to S. */
143 void
set_quoting_style(struct quoting_options * o,enum quoting_style s)144 set_quoting_style (struct quoting_options *o, enum quoting_style s)
145 {
146 (o ? o : &default_quoting_options)->style = s;
147 }
148
149 /* In O (or in the default if O is null),
150 set the value of the quoting options for character C to I.
151 Return the old value. Currently, the only values defined for I are
152 0 (the default) and 1 (which means to quote the character even if
153 it would not otherwise be quoted). */
154 int
set_char_quoting(struct quoting_options * o,char c,int i)155 set_char_quoting (struct quoting_options *o, char c, int i)
156 {
157 unsigned char uc = c;
158 unsigned int *p =
159 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
160 int shift = uc % INT_BITS;
161 int r = (*p >> shift) & 1;
162 *p ^= ((i & 1) ^ r) << shift;
163 return r;
164 }
165
166 /* MSGID approximates a quotation mark. Return its translation if it
167 has one; otherwise, return either it or "\"", depending on S. */
168 static char const *
gettext_quote(char const * msgid,enum quoting_style s)169 gettext_quote (char const *msgid, enum quoting_style s)
170 {
171 char const *translation = _(msgid);
172 if (translation == msgid && s == clocale_quoting_style)
173 translation = "\"";
174 return translation;
175 }
176
177 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
178 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
179 non-quoting-style part of O to control quoting.
180 Terminate the output with a null character, and return the written
181 size of the output, not counting the terminating null.
182 If BUFFERSIZE is too small to store the output string, return the
183 value that would have been returned had BUFFERSIZE been large enough.
184 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
185
186 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
187 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
188 style specified by O, and O may not be null. */
189
190 static size_t
quotearg_buffer_restyled(char * buffer,size_t buffersize,char const * arg,size_t argsize,enum quoting_style quoting_style,struct quoting_options const * o)191 quotearg_buffer_restyled (char *buffer, size_t buffersize,
192 char const *arg, size_t argsize,
193 enum quoting_style quoting_style,
194 struct quoting_options const *o)
195 {
196 size_t i;
197 size_t len = 0;
198 char const *quote_string = 0;
199 size_t quote_string_len = 0;
200 bool backslash_escapes = false;
201 bool unibyte_locale = MB_CUR_MAX == 1;
202
203 #define STORE(c) \
204 do \
205 { \
206 if (len < buffersize) \
207 buffer[len] = (c); \
208 len++; \
209 } \
210 while (0)
211
212 switch (quoting_style)
213 {
214 case c_quoting_style:
215 STORE ('"');
216 backslash_escapes = true;
217 quote_string = "\"";
218 quote_string_len = 1;
219 break;
220
221 case escape_quoting_style:
222 backslash_escapes = true;
223 break;
224
225 case locale_quoting_style:
226 case clocale_quoting_style:
227 {
228 /* TRANSLATORS:
229 Get translations for open and closing quotation marks.
230
231 The message catalog should translate "`" to a left
232 quotation mark suitable for the locale, and similarly for
233 "'". If the catalog has no translation,
234 locale_quoting_style quotes `like this', and
235 clocale_quoting_style quotes "like this".
236
237 For example, an American English Unicode locale should
238 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
239 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
240 MARK). A British English Unicode locale should instead
241 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
242 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
243
244 If you don't know what to put here, please see
245 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
246 and use glyphs suitable for your language. */
247
248 char const *left = gettext_quote (N_("`"), quoting_style);
249 char const *right = gettext_quote (N_("'"), quoting_style);
250 for (quote_string = left; *quote_string; quote_string++)
251 STORE (*quote_string);
252 backslash_escapes = true;
253 quote_string = right;
254 quote_string_len = strlen (quote_string);
255 }
256 break;
257
258 case shell_always_quoting_style:
259 STORE ('\'');
260 quote_string = "'";
261 quote_string_len = 1;
262 break;
263
264 default:
265 break;
266 }
267
268 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
269 {
270 unsigned char c;
271 unsigned char esc;
272
273 if (backslash_escapes
274 && quote_string_len
275 && i + quote_string_len <= argsize
276 && memcmp (arg + i, quote_string, quote_string_len) == 0)
277 STORE ('\\');
278
279 c = arg[i];
280 switch (c)
281 {
282 case '\0':
283 if (backslash_escapes)
284 {
285 STORE ('\\');
286 STORE ('0');
287 STORE ('0');
288 c = '0';
289 }
290 break;
291
292 case '?':
293 switch (quoting_style)
294 {
295 case shell_quoting_style:
296 goto use_shell_always_quoting_style;
297
298 case c_quoting_style:
299 if (i + 2 < argsize && arg[i + 1] == '?')
300 switch (arg[i + 2])
301 {
302 case '!': case '\'':
303 case '(': case ')': case '-': case '/':
304 case '<': case '=': case '>':
305 /* Escape the second '?' in what would otherwise be
306 a trigraph. */
307 c = arg[i + 2];
308 i += 2;
309 STORE ('?');
310 STORE ('\\');
311 STORE ('?');
312 break;
313 }
314 break;
315
316 default:
317 break;
318 }
319 break;
320
321 case '\a': esc = 'a'; goto c_escape;
322 case '\b': esc = 'b'; goto c_escape;
323 case '\f': esc = 'f'; goto c_escape;
324 case '\n': esc = 'n'; goto c_and_shell_escape;
325 case '\r': esc = 'r'; goto c_and_shell_escape;
326 case '\t': esc = 't'; goto c_and_shell_escape;
327 case '\v': esc = 'v'; goto c_escape;
328 case '\\': esc = c; goto c_and_shell_escape;
329
330 c_and_shell_escape:
331 if (quoting_style == shell_quoting_style)
332 goto use_shell_always_quoting_style;
333 c_escape:
334 if (backslash_escapes)
335 {
336 c = esc;
337 goto store_escape;
338 }
339 break;
340
341 case '{': case '}': /* sometimes special if isolated */
342 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
343 break;
344 /* Fall through. */
345 case '#': case '~':
346 if (i != 0)
347 break;
348 /* Fall through. */
349 case ' ':
350 case '!': /* special in bash */
351 case '"': case '$': case '&':
352 case '(': case ')': case '*': case ';':
353 case '<':
354 case '=': /* sometimes special in 0th or (with "set -k") later args */
355 case '>': case '[':
356 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
357 case '`': case '|':
358 /* A shell special character. In theory, '$' and '`' could
359 be the first bytes of multibyte characters, which means
360 we should check them with mbrtowc, but in practice this
361 doesn't happen so it's not worth worrying about. */
362 if (quoting_style == shell_quoting_style)
363 goto use_shell_always_quoting_style;
364 break;
365
366 case '\'':
367 switch (quoting_style)
368 {
369 case shell_quoting_style:
370 goto use_shell_always_quoting_style;
371
372 case shell_always_quoting_style:
373 STORE ('\'');
374 STORE ('\\');
375 STORE ('\'');
376 break;
377
378 default:
379 break;
380 }
381 break;
382
383 case '%': case '+': case ',': case '-': case '.': case '/':
384 case '0': case '1': case '2': case '3': case '4': case '5':
385 case '6': case '7': case '8': case '9': case ':':
386 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
387 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
388 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
389 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
390 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
391 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
392 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
393 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
394 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
395 /* These characters don't cause problems, no matter what the
396 quoting style is. They cannot start multibyte sequences. */
397 break;
398
399 default:
400 /* If we have a multibyte sequence, copy it until we reach
401 its end, find an error, or come back to the initial shift
402 state. For C-like styles, if the sequence has
403 unprintable characters, escape the whole sequence, since
404 we can't easily escape single characters within it. */
405 {
406 /* Length of multibyte sequence found so far. */
407 size_t m;
408
409 bool printable;
410
411 if (unibyte_locale)
412 {
413 m = 1;
414 printable = isprint (c) != 0;
415 }
416 else
417 {
418 mbstate_t mbstate;
419 memset (&mbstate, 0, sizeof mbstate);
420
421 m = 0;
422 printable = true;
423 if (argsize == SIZE_MAX)
424 argsize = strlen (arg);
425
426 do
427 {
428 wchar_t w;
429 size_t bytes = mbrtowc (&w, &arg[i + m],
430 argsize - (i + m), &mbstate);
431 if (bytes == 0)
432 break;
433 else if (bytes == (size_t) -1)
434 {
435 printable = false;
436 break;
437 }
438 else if (bytes == (size_t) -2)
439 {
440 printable = false;
441 while (i + m < argsize && arg[i + m])
442 m++;
443 break;
444 }
445 else
446 {
447 /* Work around a bug with older shells that "see" a '\'
448 that is really the 2nd byte of a multibyte character.
449 In practice the problem is limited to ASCII
450 chars >= '@' that are shell special chars. */
451 if ('[' == 0x5b && quoting_style == shell_quoting_style)
452 {
453 size_t j;
454 for (j = 1; j < bytes; j++)
455 switch (arg[i + m + j])
456 {
457 case '[': case '\\': case '^':
458 case '`': case '|':
459 goto use_shell_always_quoting_style;
460 }
461 }
462
463 if (! iswprint (w))
464 printable = false;
465 m += bytes;
466 }
467 }
468 while (! mbsinit (&mbstate));
469 }
470
471 if (1 < m || (backslash_escapes && ! printable))
472 {
473 /* Output a multibyte sequence, or an escaped
474 unprintable unibyte character. */
475 size_t ilim = i + m;
476
477 for (;;)
478 {
479 if (backslash_escapes && ! printable)
480 {
481 STORE ('\\');
482 STORE ('0' + (c >> 6));
483 STORE ('0' + ((c >> 3) & 7));
484 c = '0' + (c & 7);
485 }
486 if (ilim <= i + 1)
487 break;
488 STORE (c);
489 c = arg[++i];
490 }
491
492 goto store_c;
493 }
494 }
495 }
496
497 if (! (backslash_escapes
498 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
499 goto store_c;
500
501 store_escape:
502 STORE ('\\');
503
504 store_c:
505 STORE (c);
506 }
507
508 if (i == 0 && quoting_style == shell_quoting_style)
509 goto use_shell_always_quoting_style;
510
511 if (quote_string)
512 for (; *quote_string; quote_string++)
513 STORE (*quote_string);
514
515 if (len < buffersize)
516 buffer[len] = '\0';
517 return len;
518
519 use_shell_always_quoting_style:
520 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
521 shell_always_quoting_style, o);
522 }
523
524 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
525 argument ARG (of size ARGSIZE), using O to control quoting.
526 If O is null, use the default.
527 Terminate the output with a null character, and return the written
528 size of the output, not counting the terminating null.
529 If BUFFERSIZE is too small to store the output string, return the
530 value that would have been returned had BUFFERSIZE been large enough.
531 If ARGSIZE is SIZE_MAX, use the string length of the argument for
532 ARGSIZE. */
533 size_t
quotearg_buffer(char * buffer,size_t buffersize,char const * arg,size_t argsize,struct quoting_options const * o)534 quotearg_buffer (char *buffer, size_t buffersize,
535 char const *arg, size_t argsize,
536 struct quoting_options const *o)
537 {
538 struct quoting_options const *p = o ? o : &default_quoting_options;
539 int e = errno;
540 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
541 p->style, p);
542 errno = e;
543 return r;
544 }
545
546 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
547 allocated storage containing the quoted string. */
548 char *
quotearg_alloc(char const * arg,size_t argsize,struct quoting_options const * o)549 quotearg_alloc (char const *arg, size_t argsize,
550 struct quoting_options const *o)
551 {
552 int e = errno;
553 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
554 char *buf = xmalloc (bufsize);
555 quotearg_buffer (buf, bufsize, arg, argsize, o);
556 errno = e;
557 return buf;
558 }
559
560 /* Use storage slot N to return a quoted version of argument ARG.
561 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
562 null-terminated string.
563 OPTIONS specifies the quoting options.
564 The returned value points to static storage that can be
565 reused by the next call to this function with the same value of N.
566 N must be nonnegative. N is deliberately declared with type "int"
567 to allow for future extensions (using negative values). */
568 static char *
quotearg_n_options(int n,char const * arg,size_t argsize,struct quoting_options const * options)569 quotearg_n_options (int n, char const *arg, size_t argsize,
570 struct quoting_options const *options)
571 {
572 int e = errno;
573
574 /* Preallocate a slot 0 buffer, so that the caller can always quote
575 one small component of a "memory exhausted" message in slot 0. */
576 static char slot0[256];
577 static unsigned int nslots = 1;
578 unsigned int n0 = n;
579 struct slotvec
580 {
581 size_t size;
582 char *val;
583 };
584 static struct slotvec slotvec0 = {sizeof slot0, slot0};
585 static struct slotvec *slotvec = &slotvec0;
586
587 if (n < 0)
588 abort ();
589
590 if (nslots <= n0)
591 {
592 unsigned int n1 = n0 + 1;
593
594 /* XXX: wrong int cast to avoid gcc warning */
595 if (xalloc_oversized ((int)n1, sizeof *slotvec))
596 xalloc_die ();
597
598 if (slotvec == &slotvec0)
599 {
600 slotvec = xmalloc (sizeof *slotvec);
601 *slotvec = slotvec0;
602 }
603 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
604 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
605 nslots = n1;
606 }
607
608 {
609 size_t size = slotvec[n].size;
610 char *val = slotvec[n].val;
611 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
612
613 if (size <= qsize)
614 {
615 slotvec[n].size = size = qsize + 1;
616 if (val != slot0)
617 free (val);
618 slotvec[n].val = val = xmalloc (size);
619 quotearg_buffer (val, size, arg, argsize, options);
620 }
621
622 errno = e;
623 return val;
624 }
625 }
626
627 char *
quotearg_n(int n,char const * arg)628 quotearg_n (int n, char const *arg)
629 {
630 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
631 }
632
633 char *
quotearg(char const * arg)634 quotearg (char const *arg)
635 {
636 return quotearg_n (0, arg);
637 }
638
639 /* Return quoting options for STYLE, with no extra quoting. */
640 static struct quoting_options
quoting_options_from_style(enum quoting_style style)641 quoting_options_from_style (enum quoting_style style)
642 {
643 struct quoting_options o;
644 o.style = style;
645 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
646 return o;
647 }
648
649 char *
quotearg_n_style(int n,enum quoting_style s,char const * arg)650 quotearg_n_style (int n, enum quoting_style s, char const *arg)
651 {
652 struct quoting_options const o = quoting_options_from_style (s);
653 return quotearg_n_options (n, arg, SIZE_MAX, &o);
654 }
655
656 char *
quotearg_n_style_mem(int n,enum quoting_style s,char const * arg,size_t argsize)657 quotearg_n_style_mem (int n, enum quoting_style s,
658 char const *arg, size_t argsize)
659 {
660 struct quoting_options const o = quoting_options_from_style (s);
661 return quotearg_n_options (n, arg, argsize, &o);
662 }
663
664 char *
quotearg_style(enum quoting_style s,char const * arg)665 quotearg_style (enum quoting_style s, char const *arg)
666 {
667 return quotearg_n_style (0, s, arg);
668 }
669
670 char *
quotearg_char(char const * arg,char ch)671 quotearg_char (char const *arg, char ch)
672 {
673 struct quoting_options options;
674 options = default_quoting_options;
675 set_char_quoting (&options, ch, 1);
676 return quotearg_n_options (0, arg, SIZE_MAX, &options);
677 }
678
679 char *
quotearg_colon(char const * arg)680 quotearg_colon (char const *arg)
681 {
682 return quotearg_char (arg, ':');
683 }
684