1 /* quotearg.c - quote arguments for output 2 3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005 Free Software 4 Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 #include <sys/cdefs.h> 20 __RCSID("$NetBSD: quotearg.c,v 1.3 2016/05/17 14:00:09 christos Exp $"); 21 22 23 /* Written by Paul Eggert <eggert@twinsun.com> */ 24 25 #ifdef HAVE_CONFIG_H 26 # include <config.h> 27 #endif 28 29 #include "quotearg.h" 30 31 #include "xalloc.h" 32 33 #include <ctype.h> 34 #include <errno.h> 35 #include <limits.h> 36 #include <stdbool.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #include "gettext.h" 41 #define _(msgid) gettext (msgid) 42 #define N_(msgid) msgid 43 44 #if HAVE_WCHAR_H 45 46 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */ 47 # include <stdio.h> 48 # include <time.h> 49 50 # include <wchar.h> 51 #endif 52 53 #if !HAVE_MBRTOWC 54 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the 55 other macros are defined only for documentation and to satisfy C 56 syntax. */ 57 # undef MB_CUR_MAX 58 # define MB_CUR_MAX 1 59 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) 60 # define iswprint(wc) isprint ((unsigned char) (wc)) 61 # undef HAVE_MBSINIT 62 #endif 63 64 #if !defined mbsinit && !HAVE_MBSINIT 65 # define mbsinit(ps) 1 66 #endif 67 68 #ifndef iswprint 69 # if HAVE_WCTYPE_H 70 # include <wctype.h> 71 # endif 72 # if !defined iswprint && !HAVE_ISWPRINT 73 # define iswprint(wc) 1 74 # endif 75 #endif 76 77 #ifndef SIZE_MAX 78 # define SIZE_MAX ((size_t) -1) 79 #endif 80 81 #define INT_BITS (sizeof (int) * CHAR_BIT) 82 83 struct quoting_options 84 { 85 /* Basic quoting style. */ 86 enum quoting_style style; 87 88 /* Quote the characters indicated by this bit vector even if the 89 quoting style would not normally require them to be quoted. */ 90 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; 91 }; 92 93 /* Names of quoting styles. */ 94 char const *const quoting_style_args[] = 95 { 96 "literal", 97 "shell", 98 "shell-always", 99 "c", 100 "escape", 101 "locale", 102 "clocale", 103 0 104 }; 105 106 /* Correspondences to quoting style names. */ 107 enum quoting_style const quoting_style_vals[] = 108 { 109 literal_quoting_style, 110 shell_quoting_style, 111 shell_always_quoting_style, 112 c_quoting_style, 113 escape_quoting_style, 114 locale_quoting_style, 115 clocale_quoting_style 116 }; 117 118 /* The default quoting options. */ 119 static struct quoting_options default_quoting_options; 120 121 /* Allocate a new set of quoting options, with contents initially identical 122 to O if O is not null, or to the default if O is null. 123 It is the caller's responsibility to free the result. */ 124 struct quoting_options * 125 clone_quoting_options (struct quoting_options *o) 126 { 127 int e = errno; 128 struct quoting_options *p = xmalloc (sizeof *p); 129 *p = *(o ? o : &default_quoting_options); 130 errno = e; 131 return p; 132 } 133 134 /* Get the value of O's quoting style. If O is null, use the default. */ 135 enum quoting_style 136 get_quoting_style (struct quoting_options *o) 137 { 138 return (o ? o : &default_quoting_options)->style; 139 } 140 141 /* In O (or in the default if O is null), 142 set the value of the quoting style to S. */ 143 void 144 set_quoting_style (struct quoting_options *o, enum quoting_style s) 145 { 146 (o ? o : &default_quoting_options)->style = s; 147 } 148 149 /* In O (or in the default if O is null), 150 set the value of the quoting options for character C to I. 151 Return the old value. Currently, the only values defined for I are 152 0 (the default) and 1 (which means to quote the character even if 153 it would not otherwise be quoted). */ 154 int 155 set_char_quoting (struct quoting_options *o, char c, int i) 156 { 157 unsigned char uc = c; 158 unsigned int *p = 159 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; 160 int shift = uc % INT_BITS; 161 int r = (*p >> shift) & 1; 162 *p ^= ((i & 1) ^ r) << shift; 163 return r; 164 } 165 166 /* MSGID approximates a quotation mark. Return its translation if it 167 has one; otherwise, return either it or "\"", depending on S. */ 168 static char const * 169 gettext_quote (char const *msgid, enum quoting_style s) 170 { 171 char const *translation = _(msgid); 172 if (translation == msgid && s == clocale_quoting_style) 173 translation = "\""; 174 return translation; 175 } 176 177 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 178 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the 179 non-quoting-style part of O to control quoting. 180 Terminate the output with a null character, and return the written 181 size of the output, not counting the terminating null. 182 If BUFFERSIZE is too small to store the output string, return the 183 value that would have been returned had BUFFERSIZE been large enough. 184 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. 185 186 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, 187 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting 188 style specified by O, and O may not be null. */ 189 190 static size_t 191 quotearg_buffer_restyled (char *buffer, size_t buffersize, 192 char const *arg, size_t argsize, 193 enum quoting_style quoting_style, 194 struct quoting_options const *o) 195 { 196 size_t i; 197 size_t len = 0; 198 char const *quote_string = 0; 199 size_t quote_string_len = 0; 200 bool backslash_escapes = false; 201 bool unibyte_locale = MB_CUR_MAX == 1; 202 203 #define STORE(c) \ 204 do \ 205 { \ 206 if (len < buffersize) \ 207 buffer[len] = (c); \ 208 len++; \ 209 } \ 210 while (0) 211 212 switch (quoting_style) 213 { 214 case c_quoting_style: 215 STORE ('"'); 216 backslash_escapes = true; 217 quote_string = "\""; 218 quote_string_len = 1; 219 break; 220 221 case escape_quoting_style: 222 backslash_escapes = true; 223 break; 224 225 case locale_quoting_style: 226 case clocale_quoting_style: 227 { 228 /* TRANSLATORS: 229 Get translations for open and closing quotation marks. 230 231 The message catalog should translate "`" to a left 232 quotation mark suitable for the locale, and similarly for 233 "'". If the catalog has no translation, 234 locale_quoting_style quotes `like this', and 235 clocale_quoting_style quotes "like this". 236 237 For example, an American English Unicode locale should 238 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and 239 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION 240 MARK). A British English Unicode locale should instead 241 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and 242 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. 243 244 If you don't know what to put here, please see 245 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs> 246 and use glyphs suitable for your language. */ 247 248 char const *left = gettext_quote (N_("`"), quoting_style); 249 char const *right = gettext_quote (N_("'"), quoting_style); 250 for (quote_string = left; *quote_string; quote_string++) 251 STORE (*quote_string); 252 backslash_escapes = true; 253 quote_string = right; 254 quote_string_len = strlen (quote_string); 255 } 256 break; 257 258 case shell_always_quoting_style: 259 STORE ('\''); 260 quote_string = "'"; 261 quote_string_len = 1; 262 break; 263 264 default: 265 break; 266 } 267 268 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) 269 { 270 unsigned char c; 271 unsigned char esc; 272 273 if (backslash_escapes 274 && quote_string_len 275 && i + quote_string_len <= argsize 276 && memcmp (arg + i, quote_string, quote_string_len) == 0) 277 STORE ('\\'); 278 279 c = arg[i]; 280 switch (c) 281 { 282 case '\0': 283 if (backslash_escapes) 284 { 285 STORE ('\\'); 286 STORE ('0'); 287 STORE ('0'); 288 c = '0'; 289 } 290 break; 291 292 case '?': 293 switch (quoting_style) 294 { 295 case shell_quoting_style: 296 goto use_shell_always_quoting_style; 297 298 case c_quoting_style: 299 if (i + 2 < argsize && arg[i + 1] == '?') 300 switch (arg[i + 2]) 301 { 302 case '!': case '\'': 303 case '(': case ')': case '-': case '/': 304 case '<': case '=': case '>': 305 /* Escape the second '?' in what would otherwise be 306 a trigraph. */ 307 c = arg[i + 2]; 308 i += 2; 309 STORE ('?'); 310 STORE ('\\'); 311 STORE ('?'); 312 break; 313 } 314 break; 315 316 default: 317 break; 318 } 319 break; 320 321 case '\a': esc = 'a'; goto c_escape; 322 case '\b': esc = 'b'; goto c_escape; 323 case '\f': esc = 'f'; goto c_escape; 324 case '\n': esc = 'n'; goto c_and_shell_escape; 325 case '\r': esc = 'r'; goto c_and_shell_escape; 326 case '\t': esc = 't'; goto c_and_shell_escape; 327 case '\v': esc = 'v'; goto c_escape; 328 case '\\': esc = c; goto c_and_shell_escape; 329 330 c_and_shell_escape: 331 if (quoting_style == shell_quoting_style) 332 goto use_shell_always_quoting_style; 333 c_escape: 334 if (backslash_escapes) 335 { 336 c = esc; 337 goto store_escape; 338 } 339 break; 340 341 case '{': case '}': /* sometimes special if isolated */ 342 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) 343 break; 344 /* Fall through. */ 345 case '#': case '~': 346 if (i != 0) 347 break; 348 /* Fall through. */ 349 case ' ': 350 case '!': /* special in bash */ 351 case '"': case '$': case '&': 352 case '(': case ')': case '*': case ';': 353 case '<': 354 case '=': /* sometimes special in 0th or (with "set -k") later args */ 355 case '>': case '[': 356 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ 357 case '`': case '|': 358 /* A shell special character. In theory, '$' and '`' could 359 be the first bytes of multibyte characters, which means 360 we should check them with mbrtowc, but in practice this 361 doesn't happen so it's not worth worrying about. */ 362 if (quoting_style == shell_quoting_style) 363 goto use_shell_always_quoting_style; 364 break; 365 366 case '\'': 367 switch (quoting_style) 368 { 369 case shell_quoting_style: 370 goto use_shell_always_quoting_style; 371 372 case shell_always_quoting_style: 373 STORE ('\''); 374 STORE ('\\'); 375 STORE ('\''); 376 break; 377 378 default: 379 break; 380 } 381 break; 382 383 case '%': case '+': case ',': case '-': case '.': case '/': 384 case '0': case '1': case '2': case '3': case '4': case '5': 385 case '6': case '7': case '8': case '9': case ':': 386 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 387 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 388 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 389 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 390 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': 391 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': 392 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 393 case 'o': case 'p': case 'q': case 'r': case 's': case 't': 394 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 395 /* These characters don't cause problems, no matter what the 396 quoting style is. They cannot start multibyte sequences. */ 397 break; 398 399 default: 400 /* If we have a multibyte sequence, copy it until we reach 401 its end, find an error, or come back to the initial shift 402 state. For C-like styles, if the sequence has 403 unprintable characters, escape the whole sequence, since 404 we can't easily escape single characters within it. */ 405 { 406 /* Length of multibyte sequence found so far. */ 407 size_t m; 408 409 bool printable; 410 411 if (unibyte_locale) 412 { 413 m = 1; 414 printable = isprint (c) != 0; 415 } 416 else 417 { 418 mbstate_t mbstate; 419 memset (&mbstate, 0, sizeof mbstate); 420 421 m = 0; 422 printable = true; 423 if (argsize == SIZE_MAX) 424 argsize = strlen (arg); 425 426 do 427 { 428 wchar_t w; 429 size_t bytes = mbrtowc (&w, &arg[i + m], 430 argsize - (i + m), &mbstate); 431 if (bytes == 0) 432 break; 433 else if (bytes == (size_t) -1) 434 { 435 printable = false; 436 break; 437 } 438 else if (bytes == (size_t) -2) 439 { 440 printable = false; 441 while (i + m < argsize && arg[i + m]) 442 m++; 443 break; 444 } 445 else 446 { 447 /* Work around a bug with older shells that "see" a '\' 448 that is really the 2nd byte of a multibyte character. 449 In practice the problem is limited to ASCII 450 chars >= '@' that are shell special chars. */ 451 if ('[' == 0x5b && quoting_style == shell_quoting_style) 452 { 453 size_t j; 454 for (j = 1; j < bytes; j++) 455 switch (arg[i + m + j]) 456 { 457 case '[': case '\\': case '^': 458 case '`': case '|': 459 goto use_shell_always_quoting_style; 460 } 461 } 462 463 if (! iswprint (w)) 464 printable = false; 465 m += bytes; 466 } 467 } 468 while (! mbsinit (&mbstate)); 469 } 470 471 if (1 < m || (backslash_escapes && ! printable)) 472 { 473 /* Output a multibyte sequence, or an escaped 474 unprintable unibyte character. */ 475 size_t ilim = i + m; 476 477 for (;;) 478 { 479 if (backslash_escapes && ! printable) 480 { 481 STORE ('\\'); 482 STORE ('0' + (c >> 6)); 483 STORE ('0' + ((c >> 3) & 7)); 484 c = '0' + (c & 7); 485 } 486 if (ilim <= i + 1) 487 break; 488 STORE (c); 489 c = arg[++i]; 490 } 491 492 goto store_c; 493 } 494 } 495 } 496 497 if (! (backslash_escapes 498 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) 499 goto store_c; 500 501 store_escape: 502 STORE ('\\'); 503 504 store_c: 505 STORE (c); 506 } 507 508 if (i == 0 && quoting_style == shell_quoting_style) 509 goto use_shell_always_quoting_style; 510 511 if (quote_string) 512 for (; *quote_string; quote_string++) 513 STORE (*quote_string); 514 515 if (len < buffersize) 516 buffer[len] = '\0'; 517 return len; 518 519 use_shell_always_quoting_style: 520 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 521 shell_always_quoting_style, o); 522 } 523 524 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 525 argument ARG (of size ARGSIZE), using O to control quoting. 526 If O is null, use the default. 527 Terminate the output with a null character, and return the written 528 size of the output, not counting the terminating null. 529 If BUFFERSIZE is too small to store the output string, return the 530 value that would have been returned had BUFFERSIZE been large enough. 531 If ARGSIZE is SIZE_MAX, use the string length of the argument for 532 ARGSIZE. */ 533 size_t 534 quotearg_buffer (char *buffer, size_t buffersize, 535 char const *arg, size_t argsize, 536 struct quoting_options const *o) 537 { 538 struct quoting_options const *p = o ? o : &default_quoting_options; 539 int e = errno; 540 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 541 p->style, p); 542 errno = e; 543 return r; 544 } 545 546 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly 547 allocated storage containing the quoted string. */ 548 char * 549 quotearg_alloc (char const *arg, size_t argsize, 550 struct quoting_options const *o) 551 { 552 int e = errno; 553 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1; 554 char *buf = xmalloc (bufsize); 555 quotearg_buffer (buf, bufsize, arg, argsize, o); 556 errno = e; 557 return buf; 558 } 559 560 /* Use storage slot N to return a quoted version of argument ARG. 561 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a 562 null-terminated string. 563 OPTIONS specifies the quoting options. 564 The returned value points to static storage that can be 565 reused by the next call to this function with the same value of N. 566 N must be nonnegative. N is deliberately declared with type "int" 567 to allow for future extensions (using negative values). */ 568 static char * 569 quotearg_n_options (int n, char const *arg, size_t argsize, 570 struct quoting_options const *options) 571 { 572 int e = errno; 573 574 /* Preallocate a slot 0 buffer, so that the caller can always quote 575 one small component of a "memory exhausted" message in slot 0. */ 576 static char slot0[256]; 577 static unsigned int nslots = 1; 578 unsigned int n0 = n; 579 struct slotvec 580 { 581 size_t size; 582 char *val; 583 }; 584 static struct slotvec slotvec0 = {sizeof slot0, slot0}; 585 static struct slotvec *slotvec = &slotvec0; 586 587 if (n < 0) 588 abort (); 589 590 if (nslots <= n0) 591 { 592 unsigned int n1 = n0 + 1; 593 594 /* XXX: wrong int cast to avoid gcc warning */ 595 if (xalloc_oversized ((int)n1, sizeof *slotvec)) 596 xalloc_die (); 597 598 if (slotvec == &slotvec0) 599 { 600 slotvec = xmalloc (sizeof *slotvec); 601 *slotvec = slotvec0; 602 } 603 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec); 604 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec); 605 nslots = n1; 606 } 607 608 { 609 size_t size = slotvec[n].size; 610 char *val = slotvec[n].val; 611 size_t qsize = quotearg_buffer (val, size, arg, argsize, options); 612 613 if (size <= qsize) 614 { 615 slotvec[n].size = size = qsize + 1; 616 if (val != slot0) 617 free (val); 618 slotvec[n].val = val = xmalloc (size); 619 quotearg_buffer (val, size, arg, argsize, options); 620 } 621 622 errno = e; 623 return val; 624 } 625 } 626 627 char * 628 quotearg_n (int n, char const *arg) 629 { 630 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); 631 } 632 633 char * 634 quotearg (char const *arg) 635 { 636 return quotearg_n (0, arg); 637 } 638 639 /* Return quoting options for STYLE, with no extra quoting. */ 640 static struct quoting_options 641 quoting_options_from_style (enum quoting_style style) 642 { 643 struct quoting_options o; 644 o.style = style; 645 memset (o.quote_these_too, 0, sizeof o.quote_these_too); 646 return o; 647 } 648 649 char * 650 quotearg_n_style (int n, enum quoting_style s, char const *arg) 651 { 652 struct quoting_options const o = quoting_options_from_style (s); 653 return quotearg_n_options (n, arg, SIZE_MAX, &o); 654 } 655 656 char * 657 quotearg_n_style_mem (int n, enum quoting_style s, 658 char const *arg, size_t argsize) 659 { 660 struct quoting_options const o = quoting_options_from_style (s); 661 return quotearg_n_options (n, arg, argsize, &o); 662 } 663 664 char * 665 quotearg_style (enum quoting_style s, char const *arg) 666 { 667 return quotearg_n_style (0, s, arg); 668 } 669 670 char * 671 quotearg_char (char const *arg, char ch) 672 { 673 struct quoting_options options; 674 options = default_quoting_options; 675 set_char_quoting (&options, ch, 1); 676 return quotearg_n_options (0, arg, SIZE_MAX, &options); 677 } 678 679 char * 680 quotearg_colon (char const *arg) 681 { 682 return quotearg_char (arg, ':'); 683 } 684