xref: /netbsd-src/external/gpl2/xcvs/dist/lib/quotearg.c (revision 5a6c14c844c4c665da5632061aebde7bb2cb5766)
1 /* quotearg.c - quote arguments for output
2 
3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005 Free Software
4    Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 #include <sys/cdefs.h>
20 __RCSID("$NetBSD: quotearg.c,v 1.3 2016/05/17 14:00:09 christos Exp $");
21 
22 
23 /* Written by Paul Eggert <eggert@twinsun.com> */
24 
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 
29 #include "quotearg.h"
30 
31 #include "xalloc.h"
32 
33 #include <ctype.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdbool.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "gettext.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
43 
44 #if HAVE_WCHAR_H
45 
46 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared.  */
47 # include <stdio.h>
48 # include <time.h>
49 
50 # include <wchar.h>
51 #endif
52 
53 #if !HAVE_MBRTOWC
54 /* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
55    other macros are defined only for documentation and to satisfy C
56    syntax.  */
57 # undef MB_CUR_MAX
58 # define MB_CUR_MAX 1
59 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
60 # define iswprint(wc) isprint ((unsigned char) (wc))
61 # undef HAVE_MBSINIT
62 #endif
63 
64 #if !defined mbsinit && !HAVE_MBSINIT
65 # define mbsinit(ps) 1
66 #endif
67 
68 #ifndef iswprint
69 # if HAVE_WCTYPE_H
70 #  include <wctype.h>
71 # endif
72 # if !defined iswprint && !HAVE_ISWPRINT
73 #  define iswprint(wc) 1
74 # endif
75 #endif
76 
77 #ifndef SIZE_MAX
78 # define SIZE_MAX ((size_t) -1)
79 #endif
80 
81 #define INT_BITS (sizeof (int) * CHAR_BIT)
82 
83 struct quoting_options
84 {
85   /* Basic quoting style.  */
86   enum quoting_style style;
87 
88   /* Quote the characters indicated by this bit vector even if the
89      quoting style would not normally require them to be quoted.  */
90   unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
91 };
92 
93 /* Names of quoting styles.  */
94 char const *const quoting_style_args[] =
95 {
96   "literal",
97   "shell",
98   "shell-always",
99   "c",
100   "escape",
101   "locale",
102   "clocale",
103   0
104 };
105 
106 /* Correspondences to quoting style names.  */
107 enum quoting_style const quoting_style_vals[] =
108 {
109   literal_quoting_style,
110   shell_quoting_style,
111   shell_always_quoting_style,
112   c_quoting_style,
113   escape_quoting_style,
114   locale_quoting_style,
115   clocale_quoting_style
116 };
117 
118 /* The default quoting options.  */
119 static struct quoting_options default_quoting_options;
120 
121 /* Allocate a new set of quoting options, with contents initially identical
122    to O if O is not null, or to the default if O is null.
123    It is the caller's responsibility to free the result.  */
124 struct quoting_options *
clone_quoting_options(struct quoting_options * o)125 clone_quoting_options (struct quoting_options *o)
126 {
127   int e = errno;
128   struct quoting_options *p = xmalloc (sizeof *p);
129   *p = *(o ? o : &default_quoting_options);
130   errno = e;
131   return p;
132 }
133 
134 /* Get the value of O's quoting style.  If O is null, use the default.  */
135 enum quoting_style
get_quoting_style(struct quoting_options * o)136 get_quoting_style (struct quoting_options *o)
137 {
138   return (o ? o : &default_quoting_options)->style;
139 }
140 
141 /* In O (or in the default if O is null),
142    set the value of the quoting style to S.  */
143 void
set_quoting_style(struct quoting_options * o,enum quoting_style s)144 set_quoting_style (struct quoting_options *o, enum quoting_style s)
145 {
146   (o ? o : &default_quoting_options)->style = s;
147 }
148 
149 /* In O (or in the default if O is null),
150    set the value of the quoting options for character C to I.
151    Return the old value.  Currently, the only values defined for I are
152    0 (the default) and 1 (which means to quote the character even if
153    it would not otherwise be quoted).  */
154 int
set_char_quoting(struct quoting_options * o,char c,int i)155 set_char_quoting (struct quoting_options *o, char c, int i)
156 {
157   unsigned char uc = c;
158   unsigned int *p =
159     (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
160   int shift = uc % INT_BITS;
161   int r = (*p >> shift) & 1;
162   *p ^= ((i & 1) ^ r) << shift;
163   return r;
164 }
165 
166 /* MSGID approximates a quotation mark.  Return its translation if it
167    has one; otherwise, return either it or "\"", depending on S.  */
168 static char const *
gettext_quote(char const * msgid,enum quoting_style s)169 gettext_quote (char const *msgid, enum quoting_style s)
170 {
171   char const *translation = _(msgid);
172   if (translation == msgid && s == clocale_quoting_style)
173     translation = "\"";
174   return translation;
175 }
176 
177 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
178    argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
179    non-quoting-style part of O to control quoting.
180    Terminate the output with a null character, and return the written
181    size of the output, not counting the terminating null.
182    If BUFFERSIZE is too small to store the output string, return the
183    value that would have been returned had BUFFERSIZE been large enough.
184    If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
185 
186    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
187    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
188    style specified by O, and O may not be null.  */
189 
190 static size_t
quotearg_buffer_restyled(char * buffer,size_t buffersize,char const * arg,size_t argsize,enum quoting_style quoting_style,struct quoting_options const * o)191 quotearg_buffer_restyled (char *buffer, size_t buffersize,
192 			  char const *arg, size_t argsize,
193 			  enum quoting_style quoting_style,
194 			  struct quoting_options const *o)
195 {
196   size_t i;
197   size_t len = 0;
198   char const *quote_string = 0;
199   size_t quote_string_len = 0;
200   bool backslash_escapes = false;
201   bool unibyte_locale = MB_CUR_MAX == 1;
202 
203 #define STORE(c) \
204     do \
205       { \
206 	if (len < buffersize) \
207 	  buffer[len] = (c); \
208 	len++; \
209       } \
210     while (0)
211 
212   switch (quoting_style)
213     {
214     case c_quoting_style:
215       STORE ('"');
216       backslash_escapes = true;
217       quote_string = "\"";
218       quote_string_len = 1;
219       break;
220 
221     case escape_quoting_style:
222       backslash_escapes = true;
223       break;
224 
225     case locale_quoting_style:
226     case clocale_quoting_style:
227       {
228 	/* TRANSLATORS:
229 	   Get translations for open and closing quotation marks.
230 
231 	   The message catalog should translate "`" to a left
232 	   quotation mark suitable for the locale, and similarly for
233 	   "'".  If the catalog has no translation,
234 	   locale_quoting_style quotes `like this', and
235 	   clocale_quoting_style quotes "like this".
236 
237 	   For example, an American English Unicode locale should
238 	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
239 	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
240 	   MARK).  A British English Unicode locale should instead
241 	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
242 	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
243 
244 	   If you don't know what to put here, please see
245 	   <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
246 	   and use glyphs suitable for your language.  */
247 
248 	char const *left = gettext_quote (N_("`"), quoting_style);
249 	char const *right = gettext_quote (N_("'"), quoting_style);
250 	for (quote_string = left; *quote_string; quote_string++)
251 	  STORE (*quote_string);
252 	backslash_escapes = true;
253 	quote_string = right;
254 	quote_string_len = strlen (quote_string);
255       }
256       break;
257 
258     case shell_always_quoting_style:
259       STORE ('\'');
260       quote_string = "'";
261       quote_string_len = 1;
262       break;
263 
264     default:
265       break;
266     }
267 
268   for (i = 0;  ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize);  i++)
269     {
270       unsigned char c;
271       unsigned char esc;
272 
273       if (backslash_escapes
274 	  && quote_string_len
275 	  && i + quote_string_len <= argsize
276 	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
277 	STORE ('\\');
278 
279       c = arg[i];
280       switch (c)
281 	{
282 	case '\0':
283 	  if (backslash_escapes)
284 	    {
285 	      STORE ('\\');
286 	      STORE ('0');
287 	      STORE ('0');
288 	      c = '0';
289 	    }
290 	  break;
291 
292 	case '?':
293 	  switch (quoting_style)
294 	    {
295 	    case shell_quoting_style:
296 	      goto use_shell_always_quoting_style;
297 
298 	    case c_quoting_style:
299 	      if (i + 2 < argsize && arg[i + 1] == '?')
300 		switch (arg[i + 2])
301 		  {
302 		  case '!': case '\'':
303 		  case '(': case ')': case '-': case '/':
304 		  case '<': case '=': case '>':
305 		    /* Escape the second '?' in what would otherwise be
306 		       a trigraph.  */
307 		    c = arg[i + 2];
308 		    i += 2;
309 		    STORE ('?');
310 		    STORE ('\\');
311 		    STORE ('?');
312 		    break;
313 		  }
314 	      break;
315 
316 	    default:
317 	      break;
318 	    }
319 	  break;
320 
321 	case '\a': esc = 'a'; goto c_escape;
322 	case '\b': esc = 'b'; goto c_escape;
323 	case '\f': esc = 'f'; goto c_escape;
324 	case '\n': esc = 'n'; goto c_and_shell_escape;
325 	case '\r': esc = 'r'; goto c_and_shell_escape;
326 	case '\t': esc = 't'; goto c_and_shell_escape;
327 	case '\v': esc = 'v'; goto c_escape;
328 	case '\\': esc = c; goto c_and_shell_escape;
329 
330 	c_and_shell_escape:
331 	  if (quoting_style == shell_quoting_style)
332 	    goto use_shell_always_quoting_style;
333 	c_escape:
334 	  if (backslash_escapes)
335 	    {
336 	      c = esc;
337 	      goto store_escape;
338 	    }
339 	  break;
340 
341 	case '{': case '}': /* sometimes special if isolated */
342 	  if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
343 	    break;
344 	  /* Fall through.  */
345 	case '#': case '~':
346 	  if (i != 0)
347 	    break;
348 	  /* Fall through.  */
349 	case ' ':
350 	case '!': /* special in bash */
351 	case '"': case '$': case '&':
352 	case '(': case ')': case '*': case ';':
353 	case '<':
354 	case '=': /* sometimes special in 0th or (with "set -k") later args */
355 	case '>': case '[':
356 	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
357 	case '`': case '|':
358 	  /* A shell special character.  In theory, '$' and '`' could
359 	     be the first bytes of multibyte characters, which means
360 	     we should check them with mbrtowc, but in practice this
361 	     doesn't happen so it's not worth worrying about.  */
362 	  if (quoting_style == shell_quoting_style)
363 	    goto use_shell_always_quoting_style;
364 	  break;
365 
366 	case '\'':
367 	  switch (quoting_style)
368 	    {
369 	    case shell_quoting_style:
370 	      goto use_shell_always_quoting_style;
371 
372 	    case shell_always_quoting_style:
373 	      STORE ('\'');
374 	      STORE ('\\');
375 	      STORE ('\'');
376 	      break;
377 
378 	    default:
379 	      break;
380 	    }
381 	  break;
382 
383 	case '%': case '+': case ',': case '-': case '.': case '/':
384 	case '0': case '1': case '2': case '3': case '4': case '5':
385 	case '6': case '7': case '8': case '9': case ':':
386 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
387 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
388 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
389 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
390 	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
391 	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
392 	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
393 	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
394 	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
395 	  /* These characters don't cause problems, no matter what the
396 	     quoting style is.  They cannot start multibyte sequences.  */
397 	  break;
398 
399 	default:
400 	  /* If we have a multibyte sequence, copy it until we reach
401 	     its end, find an error, or come back to the initial shift
402 	     state.  For C-like styles, if the sequence has
403 	     unprintable characters, escape the whole sequence, since
404 	     we can't easily escape single characters within it.  */
405 	  {
406 	    /* Length of multibyte sequence found so far.  */
407 	    size_t m;
408 
409 	    bool printable;
410 
411 	    if (unibyte_locale)
412 	      {
413 		m = 1;
414 		printable = isprint (c) != 0;
415 	      }
416 	    else
417 	      {
418 		mbstate_t mbstate;
419 		memset (&mbstate, 0, sizeof mbstate);
420 
421 		m = 0;
422 		printable = true;
423 		if (argsize == SIZE_MAX)
424 		  argsize = strlen (arg);
425 
426 		do
427 		  {
428 		    wchar_t w;
429 		    size_t bytes = mbrtowc (&w, &arg[i + m],
430 					    argsize - (i + m), &mbstate);
431 		    if (bytes == 0)
432 		      break;
433 		    else if (bytes == (size_t) -1)
434 		      {
435 			printable = false;
436 			break;
437 		      }
438 		    else if (bytes == (size_t) -2)
439 		      {
440 			printable = false;
441 			while (i + m < argsize && arg[i + m])
442 			  m++;
443 			break;
444 		      }
445 		    else
446 		      {
447 			/* Work around a bug with older shells that "see" a '\'
448 			   that is really the 2nd byte of a multibyte character.
449 			   In practice the problem is limited to ASCII
450 			   chars >= '@' that are shell special chars.  */
451 			if ('[' == 0x5b && quoting_style == shell_quoting_style)
452 			  {
453 			    size_t j;
454 			    for (j = 1; j < bytes; j++)
455 			      switch (arg[i + m + j])
456 				{
457 				case '[': case '\\': case '^':
458 				case '`': case '|':
459 				  goto use_shell_always_quoting_style;
460 				}
461 			  }
462 
463 			if (! iswprint (w))
464 			  printable = false;
465 			m += bytes;
466 		      }
467 		  }
468 		while (! mbsinit (&mbstate));
469 	      }
470 
471 	    if (1 < m || (backslash_escapes && ! printable))
472 	      {
473 		/* Output a multibyte sequence, or an escaped
474 		   unprintable unibyte character.  */
475 		size_t ilim = i + m;
476 
477 		for (;;)
478 		  {
479 		    if (backslash_escapes && ! printable)
480 		      {
481 			STORE ('\\');
482 			STORE ('0' + (c >> 6));
483 			STORE ('0' + ((c >> 3) & 7));
484 			c = '0' + (c & 7);
485 		      }
486 		    if (ilim <= i + 1)
487 		      break;
488 		    STORE (c);
489 		    c = arg[++i];
490 		  }
491 
492 		goto store_c;
493 	      }
494 	  }
495 	}
496 
497       if (! (backslash_escapes
498 	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
499 	goto store_c;
500 
501     store_escape:
502       STORE ('\\');
503 
504     store_c:
505       STORE (c);
506     }
507 
508   if (i == 0 && quoting_style == shell_quoting_style)
509     goto use_shell_always_quoting_style;
510 
511   if (quote_string)
512     for (; *quote_string; quote_string++)
513       STORE (*quote_string);
514 
515   if (len < buffersize)
516     buffer[len] = '\0';
517   return len;
518 
519  use_shell_always_quoting_style:
520   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
521 				   shell_always_quoting_style, o);
522 }
523 
524 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
525    argument ARG (of size ARGSIZE), using O to control quoting.
526    If O is null, use the default.
527    Terminate the output with a null character, and return the written
528    size of the output, not counting the terminating null.
529    If BUFFERSIZE is too small to store the output string, return the
530    value that would have been returned had BUFFERSIZE been large enough.
531    If ARGSIZE is SIZE_MAX, use the string length of the argument for
532    ARGSIZE.  */
533 size_t
quotearg_buffer(char * buffer,size_t buffersize,char const * arg,size_t argsize,struct quoting_options const * o)534 quotearg_buffer (char *buffer, size_t buffersize,
535 		 char const *arg, size_t argsize,
536 		 struct quoting_options const *o)
537 {
538   struct quoting_options const *p = o ? o : &default_quoting_options;
539   int e = errno;
540   size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
541 				       p->style, p);
542   errno = e;
543   return r;
544 }
545 
546 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
547    allocated storage containing the quoted string.  */
548 char *
quotearg_alloc(char const * arg,size_t argsize,struct quoting_options const * o)549 quotearg_alloc (char const *arg, size_t argsize,
550 		struct quoting_options const *o)
551 {
552   int e = errno;
553   size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
554   char *buf = xmalloc (bufsize);
555   quotearg_buffer (buf, bufsize, arg, argsize, o);
556   errno = e;
557   return buf;
558 }
559 
560 /* Use storage slot N to return a quoted version of argument ARG.
561    ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
562    null-terminated string.
563    OPTIONS specifies the quoting options.
564    The returned value points to static storage that can be
565    reused by the next call to this function with the same value of N.
566    N must be nonnegative.  N is deliberately declared with type "int"
567    to allow for future extensions (using negative values).  */
568 static char *
quotearg_n_options(int n,char const * arg,size_t argsize,struct quoting_options const * options)569 quotearg_n_options (int n, char const *arg, size_t argsize,
570 		    struct quoting_options const *options)
571 {
572   int e = errno;
573 
574   /* Preallocate a slot 0 buffer, so that the caller can always quote
575      one small component of a "memory exhausted" message in slot 0.  */
576   static char slot0[256];
577   static unsigned int nslots = 1;
578   unsigned int n0 = n;
579   struct slotvec
580     {
581       size_t size;
582       char *val;
583     };
584   static struct slotvec slotvec0 = {sizeof slot0, slot0};
585   static struct slotvec *slotvec = &slotvec0;
586 
587   if (n < 0)
588     abort ();
589 
590   if (nslots <= n0)
591     {
592       unsigned int n1 = n0 + 1;
593 
594       /* XXX: wrong int cast to avoid gcc warning */
595       if (xalloc_oversized ((int)n1, sizeof *slotvec))
596 	xalloc_die ();
597 
598       if (slotvec == &slotvec0)
599 	{
600 	  slotvec = xmalloc (sizeof *slotvec);
601 	  *slotvec = slotvec0;
602 	}
603       slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
604       memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
605       nslots = n1;
606     }
607 
608   {
609     size_t size = slotvec[n].size;
610     char *val = slotvec[n].val;
611     size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
612 
613     if (size <= qsize)
614       {
615 	slotvec[n].size = size = qsize + 1;
616 	if (val != slot0)
617 	  free (val);
618 	slotvec[n].val = val = xmalloc (size);
619 	quotearg_buffer (val, size, arg, argsize, options);
620       }
621 
622     errno = e;
623     return val;
624   }
625 }
626 
627 char *
quotearg_n(int n,char const * arg)628 quotearg_n (int n, char const *arg)
629 {
630   return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
631 }
632 
633 char *
quotearg(char const * arg)634 quotearg (char const *arg)
635 {
636   return quotearg_n (0, arg);
637 }
638 
639 /* Return quoting options for STYLE, with no extra quoting.  */
640 static struct quoting_options
quoting_options_from_style(enum quoting_style style)641 quoting_options_from_style (enum quoting_style style)
642 {
643   struct quoting_options o;
644   o.style = style;
645   memset (o.quote_these_too, 0, sizeof o.quote_these_too);
646   return o;
647 }
648 
649 char *
quotearg_n_style(int n,enum quoting_style s,char const * arg)650 quotearg_n_style (int n, enum quoting_style s, char const *arg)
651 {
652   struct quoting_options const o = quoting_options_from_style (s);
653   return quotearg_n_options (n, arg, SIZE_MAX, &o);
654 }
655 
656 char *
quotearg_n_style_mem(int n,enum quoting_style s,char const * arg,size_t argsize)657 quotearg_n_style_mem (int n, enum quoting_style s,
658 		      char const *arg, size_t argsize)
659 {
660   struct quoting_options const o = quoting_options_from_style (s);
661   return quotearg_n_options (n, arg, argsize, &o);
662 }
663 
664 char *
quotearg_style(enum quoting_style s,char const * arg)665 quotearg_style (enum quoting_style s, char const *arg)
666 {
667   return quotearg_n_style (0, s, arg);
668 }
669 
670 char *
quotearg_char(char const * arg,char ch)671 quotearg_char (char const *arg, char ch)
672 {
673   struct quoting_options options;
674   options = default_quoting_options;
675   set_char_quoting (&options, ch, 1);
676   return quotearg_n_options (0, arg, SIZE_MAX, &options);
677 }
678 
679 char *
quotearg_colon(char const * arg)680 quotearg_colon (char const *arg)
681 {
682   return quotearg_char (arg, ':');
683 }
684