xref: /netbsd-src/external/gpl2/grep/dist/lib/quotearg.c (revision a8fa202a6440953be7b92a8960a811bff58203f4)
1 /*	$NetBSD: quotearg.c,v 1.1.1.1 2016/01/10 21:36:18 christos Exp $	*/
2 
3 /* quotearg.c - quote arguments for output
4    Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
19 
20 /* Written by Paul Eggert <eggert@twinsun.com> */
21 
22 #if HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25 
26 #if HAVE_STDDEF_H
27 # include <stddef.h>  /* For the definition of size_t on windows w/MSVC.  */
28 #endif
29 #include <sys/types.h>
30 #include <quotearg.h>
31 #include <xalloc.h>
32 
33 #include <ctype.h>
34 
35 #if ENABLE_NLS
36 # include <libintl.h>
37 # define _(text) gettext (text)
38 #else
39 # define _(text) text
40 #endif
41 #define N_(text) text
42 
43 #if HAVE_LIMITS_H
44 # include <limits.h>
45 #endif
46 #ifndef CHAR_BIT
47 # define CHAR_BIT 8
48 #endif
49 #ifndef UCHAR_MAX
50 # define UCHAR_MAX ((unsigned char) -1)
51 #endif
52 
53 #if HAVE_C_BACKSLASH_A
54 # define ALERT_CHAR '\a'
55 #else
56 # define ALERT_CHAR '\7'
57 #endif
58 
59 #if HAVE_STDLIB_H
60 # include <stdlib.h>
61 #endif
62 
63 #if HAVE_STRING_H
64 # include <string.h>
65 #endif
66 
67 #if HAVE_WCHAR_H
68 # include <wchar.h>
69 #endif
70 
71 #if !HAVE_MBRTOWC
72 /* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
73    other macros are defined only for documentation and to satisfy C
74    syntax.  */
75 # undef MB_CUR_MAX
76 # define MB_CUR_MAX 1
77 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
78 # define mbsinit(ps) 1
79 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
80 #endif
81 
82 #ifndef iswprint
83 # if HAVE_WCTYPE_H
84 #  include <wctype.h>
85 # endif
86 # if !defined iswprint && !HAVE_ISWPRINT
87 #  define iswprint(wc) 1
88 # endif
89 #endif
90 
91 #define INT_BITS (sizeof (int) * CHAR_BIT)
92 
93 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
94 # define IN_CTYPE_DOMAIN(c) 1
95 #else
96 # define IN_CTYPE_DOMAIN(c) isascii(c)
97 #endif
98 
99 /* Undefine to protect against the definition in wctype.h of solaris2.6.   */
100 #undef ISPRINT
101 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
102 
103 struct quoting_options
104 {
105   /* Basic quoting style.  */
106   enum quoting_style style;
107 
108   /* Quote the characters indicated by this bit vector even if the
109      quoting style would not normally require them to be quoted.  */
110   int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
111 };
112 
113 /* Names of quoting styles.  */
114 char const *const quoting_style_args[] =
115 {
116   "literal",
117   "shell",
118   "shell-always",
119   "c",
120   "escape",
121   "locale",
122   "clocale",
123   0
124 };
125 
126 /* Correspondences to quoting style names.  */
127 enum quoting_style const quoting_style_vals[] =
128 {
129   literal_quoting_style,
130   shell_quoting_style,
131   shell_always_quoting_style,
132   c_quoting_style,
133   escape_quoting_style,
134   locale_quoting_style,
135   clocale_quoting_style
136 };
137 
138 /* The default quoting options.  */
139 static struct quoting_options default_quoting_options;
140 
141 /* Allocate a new set of quoting options, with contents initially identical
142    to O if O is not null, or to the default if O is null.
143    It is the caller's responsibility to free the result.  */
144 struct quoting_options *
clone_quoting_options(struct quoting_options * o)145 clone_quoting_options (struct quoting_options *o)
146 {
147   struct quoting_options *p
148     = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
149   *p = *(o ? o : &default_quoting_options);
150   return p;
151 }
152 
153 /* Get the value of O's quoting style.  If O is null, use the default.  */
154 enum quoting_style
get_quoting_style(struct quoting_options * o)155 get_quoting_style (struct quoting_options *o)
156 {
157   return (o ? o : &default_quoting_options)->style;
158 }
159 
160 /* In O (or in the default if O is null),
161    set the value of the quoting style to S.  */
162 void
set_quoting_style(struct quoting_options * o,enum quoting_style s)163 set_quoting_style (struct quoting_options *o, enum quoting_style s)
164 {
165   (o ? o : &default_quoting_options)->style = s;
166 }
167 
168 /* In O (or in the default if O is null),
169    set the value of the quoting options for character C to I.
170    Return the old value.  Currently, the only values defined for I are
171    0 (the default) and 1 (which means to quote the character even if
172    it would not otherwise be quoted).  */
173 int
set_char_quoting(struct quoting_options * o,char c,int i)174 set_char_quoting (struct quoting_options *o, char c, int i)
175 {
176   unsigned char uc = c;
177   int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
178   int shift = uc % INT_BITS;
179   int r = (*p >> shift) & 1;
180   *p ^= ((i & 1) ^ r) << shift;
181   return r;
182 }
183 
184 /* MSGID approximates a quotation mark.  Return its translation if it
185    has one; otherwise, return either it or "\"", depending on S.  */
186 static char const *
gettext_quote(char const * msgid,enum quoting_style s)187 gettext_quote (char const *msgid, enum quoting_style s)
188 {
189   char const *translation = _(msgid);
190   if (translation == msgid && s == clocale_quoting_style)
191     translation = "\"";
192   return translation;
193 }
194 
195 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
196    argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
197    non-quoting-style part of O to control quoting.
198    Terminate the output with a null character, and return the written
199    size of the output, not counting the terminating null.
200    If BUFFERSIZE is too small to store the output string, return the
201    value that would have been returned had BUFFERSIZE been large enough.
202    If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
203 
204    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
205    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
206    style specified by O, and O may not be null.  */
207 
208 static size_t
quotearg_buffer_restyled(char * buffer,size_t buffersize,char const * arg,size_t argsize,enum quoting_style quoting_style,struct quoting_options const * o)209 quotearg_buffer_restyled (char *buffer, size_t buffersize,
210 			  char const *arg, size_t argsize,
211 			  enum quoting_style quoting_style,
212 			  struct quoting_options const *o)
213 {
214   size_t i;
215   size_t len = 0;
216   char const *quote_string = 0;
217   size_t quote_string_len = 0;
218   int backslash_escapes = 0;
219   int unibyte_locale = MB_CUR_MAX == 1;
220 
221 #define STORE(c) \
222     do \
223       { \
224 	if (len < buffersize) \
225 	  buffer[len] = (c); \
226 	len++; \
227       } \
228     while (0)
229 
230   switch (quoting_style)
231     {
232     case c_quoting_style:
233       STORE ('"');
234       backslash_escapes = 1;
235       quote_string = "\"";
236       quote_string_len = 1;
237       break;
238 
239     case escape_quoting_style:
240       backslash_escapes = 1;
241       break;
242 
243     case locale_quoting_style:
244     case clocale_quoting_style:
245       {
246 	/* Get translations for open and closing quotation marks.
247 
248 	   The message catalog should translate "`" to a left
249 	   quotation mark suitable for the locale, and similarly for
250 	   "'".  If the catalog has no translation,
251 	   locale_quoting_style quotes `like this', and
252 	   clocale_quoting_style quotes "like this".
253 
254 	   For example, an American English Unicode locale should
255 	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
256 	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
257 	   MARK).  A British English Unicode locale should instead
258 	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
259 	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.  */
260 
261 	char const *left = gettext_quote (N_("`"), quoting_style);
262 	char const *right = gettext_quote (N_("'"), quoting_style);
263 	for (quote_string = left; *quote_string; quote_string++)
264 	  STORE (*quote_string);
265 	backslash_escapes = 1;
266 	quote_string = right;
267 	quote_string_len = strlen (quote_string);
268       }
269       break;
270 
271     case shell_always_quoting_style:
272       STORE ('\'');
273       quote_string = "'";
274       quote_string_len = 1;
275       break;
276 
277     default:
278       break;
279     }
280 
281   for (i = 0;  ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize);  i++)
282     {
283       unsigned char c;
284       unsigned char esc;
285 
286       if (backslash_escapes
287 	  && quote_string_len
288 	  && i + quote_string_len <= argsize
289 	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
290 	STORE ('\\');
291 
292       c = arg[i];
293       switch (c)
294 	{
295 	case '?':
296 	  switch (quoting_style)
297 	    {
298 	    case shell_quoting_style:
299 	      goto use_shell_always_quoting_style;
300 
301 	    case c_quoting_style:
302 	      if (i + 2 < argsize && arg[i + 1] == '?')
303 		switch (arg[i + 2])
304 		  {
305 		  case '!': case '\'':
306 		  case '(': case ')': case '-': case '/':
307 		  case '<': case '=': case '>':
308 		    /* Escape the second '?' in what would otherwise be
309 		       a trigraph.  */
310 		    i += 2;
311 		    c = arg[i + 2];
312 		    STORE ('?');
313 		    STORE ('\\');
314 		    STORE ('?');
315 		    break;
316 		  }
317 	      break;
318 
319 	    default:
320 	      break;
321 	    }
322 	  break;
323 
324 	case ALERT_CHAR: esc = 'a'; goto c_escape;
325 	case '\b': esc = 'b'; goto c_escape;
326 	case '\f': esc = 'f'; goto c_escape;
327 	case '\n': esc = 'n'; goto c_and_shell_escape;
328 	case '\r': esc = 'r'; goto c_and_shell_escape;
329 	case '\t': esc = 't'; goto c_and_shell_escape;
330 	case '\v': esc = 'v'; goto c_escape;
331 	case '\\': esc = c; goto c_and_shell_escape;
332 
333 	c_and_shell_escape:
334 	  if (quoting_style == shell_quoting_style)
335 	    goto use_shell_always_quoting_style;
336 	c_escape:
337 	  if (backslash_escapes)
338 	    {
339 	      c = esc;
340 	      goto store_escape;
341 	    }
342 	  break;
343 
344 	case '#': case '~':
345 	  if (i != 0)
346 	    break;
347 	  /* Fall through.  */
348 	case ' ':
349 	case '!': /* special in bash */
350 	case '"': case '$': case '&':
351 	case '(': case ')': case '*': case ';':
352 	case '<': case '>': case '[':
353 	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
354 	case '`': case '|':
355 	  /* A shell special character.  In theory, '$' and '`' could
356 	     be the first bytes of multibyte characters, which means
357 	     we should check them with mbrtowc, but in practice this
358 	     doesn't happen so it's not worth worrying about.  */
359 	  if (quoting_style == shell_quoting_style)
360 	    goto use_shell_always_quoting_style;
361 	  break;
362 
363 	case '\'':
364 	  switch (quoting_style)
365 	    {
366 	    case shell_quoting_style:
367 	      goto use_shell_always_quoting_style;
368 
369 	    case shell_always_quoting_style:
370 	      STORE ('\'');
371 	      STORE ('\\');
372 	      STORE ('\'');
373 	      break;
374 
375 	    default:
376 	      break;
377 	    }
378 	  break;
379 
380 	case '%': case '+': case ',': case '-': case '.': case '/':
381 	case '0': case '1': case '2': case '3': case '4': case '5':
382 	case '6': case '7': case '8': case '9': case ':': case '=':
383 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
384 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
385 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
386 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
387 	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
388 	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
389 	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
390 	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
391 	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
392 	case '{': case '}':
393 	  /* These characters don't cause problems, no matter what the
394 	     quoting style is.  They cannot start multibyte sequences.  */
395 	  break;
396 
397 	default:
398 	  /* If we have a multibyte sequence, copy it until we reach
399 	     its end, find an error, or come back to the initial shift
400 	     state.  For C-like styles, if the sequence has
401 	     unprintable characters, escape the whole sequence, since
402 	     we can't easily escape single characters within it.  */
403 	  {
404 	    /* Length of multibyte sequence found so far.  */
405 	    size_t m;
406 
407 	    int printable;
408 
409 	    if (unibyte_locale)
410 	      {
411 		m = 1;
412 		printable = ISPRINT (c);
413 	      }
414 	    else
415 	      {
416 		mbstate_t mbstate;
417 		memset (&mbstate, 0, sizeof mbstate);
418 
419 		m = 0;
420 		printable = 1;
421 		if (argsize == (size_t) -1)
422 		  argsize = strlen (arg);
423 
424 		do
425 		  {
426 		    wchar_t w;
427 		    size_t bytes = mbrtowc (&w, &arg[i + m],
428 					    argsize - (i + m), &mbstate);
429 		    if (bytes == 0)
430 		      break;
431 		    else if (bytes == (size_t) -1)
432 		      {
433 			printable = 0;
434 			break;
435 		      }
436 		    else if (bytes == (size_t) -2)
437 		      {
438 			printable = 0;
439 			while (i + m < argsize && arg[i + m])
440 			  m++;
441 			break;
442 		      }
443 		    else
444 		      {
445 			if (! iswprint (w))
446 			  printable = 0;
447 			m += bytes;
448 		      }
449 		  }
450 		while (! mbsinit (&mbstate));
451 	      }
452 
453 	    if (1 < m || (backslash_escapes && ! printable))
454 	      {
455 		/* Output a multibyte sequence, or an escaped
456 		   unprintable unibyte character.  */
457 		size_t ilim = i + m;
458 
459 		for (;;)
460 		  {
461 		    if (backslash_escapes && ! printable)
462 		      {
463 			STORE ('\\');
464 			STORE ('0' + (c >> 6));
465 			STORE ('0' + ((c >> 3) & 7));
466 			c = '0' + (c & 7);
467 		      }
468 		    if (ilim <= i + 1)
469 		      break;
470 		    STORE (c);
471 		    c = arg[++i];
472 		  }
473 
474 		goto store_c;
475 	      }
476 	  }
477 	}
478 
479       if (! (backslash_escapes
480 	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
481 	goto store_c;
482 
483     store_escape:
484       STORE ('\\');
485 
486     store_c:
487       STORE (c);
488     }
489 
490   if (quote_string)
491     for (; *quote_string; quote_string++)
492       STORE (*quote_string);
493 
494   if (len < buffersize)
495     buffer[len] = '\0';
496   return len;
497 
498  use_shell_always_quoting_style:
499   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
500 				   shell_always_quoting_style, o);
501 }
502 
503 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
504    argument ARG (of size ARGSIZE), using O to control quoting.
505    If O is null, use the default.
506    Terminate the output with a null character, and return the written
507    size of the output, not counting the terminating null.
508    If BUFFERSIZE is too small to store the output string, return the
509    value that would have been returned had BUFFERSIZE been large enough.
510    If ARGSIZE is -1, use the string length of the argument for ARGSIZE.  */
511 size_t
quotearg_buffer(char * buffer,size_t buffersize,char const * arg,size_t argsize,struct quoting_options const * o)512 quotearg_buffer (char *buffer, size_t buffersize,
513 		 char const *arg, size_t argsize,
514 		 struct quoting_options const *o)
515 {
516   struct quoting_options const *p = o ? o : &default_quoting_options;
517   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
518 				   p->style, p);
519 }
520 
521 /* Use storage slot N to return a quoted version of the string ARG.
522    OPTIONS specifies the quoting options.
523    The returned value points to static storage that can be
524    reused by the next call to this function with the same value of N.
525    N must be nonnegative.  N is deliberately declared with type "int"
526    to allow for future extensions (using negative values).  */
527 static char *
quotearg_n_options(int n,char const * arg,struct quoting_options const * options)528 quotearg_n_options (int n, char const *arg,
529 		    struct quoting_options const *options)
530 {
531   /* Preallocate a slot 0 buffer, so that the caller can always quote
532      one small component of a "memory exhausted" message in slot 0.  */
533   static char slot0[256];
534   static unsigned int nslots = 1;
535   struct slotvec
536     {
537       size_t size;
538       char *val;
539     };
540   static struct slotvec slotvec0 = {sizeof slot0, slot0};
541   static struct slotvec *slotvec = &slotvec0;
542 
543   if (nslots <= n)
544     {
545       int n1 = n + 1;
546       size_t s = n1 * sizeof (struct slotvec);
547       if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
548 	abort ();
549       if (slotvec == &slotvec0)
550 	{
551 	  slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
552 	  *slotvec = slotvec0;
553 	}
554       slotvec = (struct slotvec *) xrealloc (slotvec, s);
555       memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
556       nslots = n;
557     }
558 
559   {
560     size_t size = slotvec[n].size;
561     char *val = slotvec[n].val;
562     size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
563 
564     if (size <= qsize)
565       {
566 	slotvec[n].size = size = qsize + 1;
567 	slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
568 	quotearg_buffer (val, size, arg, (size_t) -1, options);
569       }
570 
571     return val;
572   }
573 }
574 
575 char *
quotearg_n(unsigned int n,char const * arg)576 quotearg_n (unsigned int n, char const *arg)
577 {
578   return quotearg_n_options (n, arg, &default_quoting_options);
579 }
580 
581 char *
quotearg(char const * arg)582 quotearg (char const *arg)
583 {
584   return quotearg_n (0, arg);
585 }
586 
587 char *
quotearg_n_style(unsigned int n,enum quoting_style s,char const * arg)588 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
589 {
590   struct quoting_options o;
591   o.style = s;
592   memset (o.quote_these_too, 0, sizeof o.quote_these_too);
593   return quotearg_n_options (n, arg, &o);
594 }
595 
596 char *
quotearg_style(enum quoting_style s,char const * arg)597 quotearg_style (enum quoting_style s, char const *arg)
598 {
599   return quotearg_n_style (0, s, arg);
600 }
601 
602 char *
quotearg_char(char const * arg,char ch)603 quotearg_char (char const *arg, char ch)
604 {
605   struct quoting_options options;
606   options = default_quoting_options;
607   set_char_quoting (&options, ch, 1);
608   return quotearg_n_options (0, arg, &options);
609 }
610 
611 char *
quotearg_colon(char const * arg)612 quotearg_colon (char const *arg)
613 {
614   return quotearg_char (arg, ':');
615 }
616