1 /* Perl format strings. 2 Copyright (C) 2004, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2003. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 #ifdef HAVE_CONFIG_H 20 # include <config.h> 21 #endif 22 23 #include <stdbool.h> 24 #include <stdlib.h> 25 26 #include "format.h" 27 #include "c-ctype.h" 28 #include "xalloc.h" 29 #include "xvasprintf.h" 30 #include "format-invalid.h" 31 #include "gettext.h" 32 33 #define _(str) gettext (str) 34 35 /* Perl format strings are implemented in function Perl_sv_vcatpvfn in 36 perl-5.8.0/sv.c. 37 A directive 38 - starts with '%' or '%m$' where m is a positive integer starting with a 39 nonzero digit, 40 - is optionally followed by any of the characters '#', '0', '-', ' ', '+', 41 each of which acts as a flag, 42 - is optionally followed by a vector specification: 'v' or '*v' (reads an 43 argument) or '*m$v' where m is a positive integer starting with a nonzero 44 digit, 45 - is optionally followed by a width specification: '*' (reads an argument) 46 or '*m$' where m is a positive integer starting with a nonzero digit or 47 a nonempty digit sequence starting with a nonzero digit, 48 - is optionally followed by '.' and a precision specification: '*' (reads 49 an argument) or '*m$' where m is a positive integer starting with a 50 nonzero digit or a digit sequence, 51 - is optionally followed by a size specifier, one of 'h' 'l' 'll' 'L' 'q' 52 'V' 'I32' 'I64' 'I', 53 - is finished by a specifier 54 - '%', that needs no argument, 55 - 'c', that needs a small integer argument, 56 - 's', that needs a string argument, 57 - '_', that needs a scalar vector argument, 58 - 'p', that needs a pointer argument, 59 - 'i', 'd', 'D', that need an integer argument, 60 - 'u', 'U', 'b', 'o', 'O', 'x', 'X', that need an unsigned integer 61 argument, 62 - 'e', 'E', 'f', 'F', 'g', 'G', that need a floating-point argument, 63 - 'n', that needs a pointer to integer. 64 So there can be numbered argument specifications: 65 - '%m$' for the format string, 66 - '*m$v' for the vector, 67 - '*m$' for the width, 68 - '.*m$' for the precision. 69 Numbered and unnumbered argument specifications can be used in the same 70 string. The effect of '%m$' is to take argument number m, without affecting 71 the current argument number. The current argument number is incremented 72 after processing a directive with an unnumbered argument specification. 73 */ 74 75 enum format_arg_type 76 { 77 FAT_NONE = 0, 78 /* Basic types */ 79 FAT_INTEGER = 1, 80 FAT_DOUBLE = 2, 81 FAT_CHAR = 3, 82 FAT_STRING = 4, 83 FAT_SCALAR_VECTOR = 5, 84 FAT_POINTER = 6, 85 FAT_COUNT_POINTER = 7, 86 /* Flags */ 87 FAT_UNSIGNED = 1 << 3, 88 FAT_SIZE_SHORT = 1 << 4, 89 FAT_SIZE_V = 2 << 4, 90 FAT_SIZE_PTR = 3 << 4, 91 FAT_SIZE_LONG = 4 << 4, 92 FAT_SIZE_LONGLONG = 5 << 4, 93 /* Bitmasks */ 94 FAT_SIZE_MASK = (FAT_SIZE_SHORT | FAT_SIZE_V | FAT_SIZE_PTR 95 | FAT_SIZE_LONG | FAT_SIZE_LONGLONG) 96 }; 97 98 struct numbered_arg 99 { 100 unsigned int number; 101 enum format_arg_type type; 102 }; 103 104 struct spec 105 { 106 unsigned int directives; 107 unsigned int numbered_arg_count; 108 unsigned int allocated; 109 struct numbered_arg *numbered; 110 }; 111 112 /* Locale independent test for a decimal digit. 113 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 114 <ctype.h> isdigit must be an 'unsigned char'.) */ 115 #undef isdigit 116 #define isdigit(c) ((unsigned int) ((c) - '0') < 10) 117 118 /* Locale independent test for a nonzero decimal digit. */ 119 #define isnonzerodigit(c) ((unsigned int) ((c) - '1') < 9) 120 121 122 static int 123 numbered_arg_compare (const void *p1, const void *p2) 124 { 125 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 126 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 127 128 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 129 } 130 131 static void * 132 format_parse (const char *format, bool translated, char **invalid_reason) 133 { 134 unsigned int directives; 135 unsigned int numbered_arg_count; 136 unsigned int allocated; 137 struct numbered_arg *numbered; 138 unsigned int unnumbered_arg_count; 139 struct spec *result; 140 141 directives = 0; 142 numbered_arg_count = 0; 143 unnumbered_arg_count = 0; 144 allocated = 0; 145 numbered = NULL; 146 147 for (; *format != '\0';) 148 if (*format++ == '%') 149 { 150 /* A directive. */ 151 unsigned int number = 0; 152 bool vectorize = false; 153 enum format_arg_type type; 154 enum format_arg_type size; 155 156 directives++; 157 158 if (isnonzerodigit (*format)) 159 { 160 const char *f = format; 161 unsigned int m = 0; 162 163 do 164 { 165 m = 10 * m + (*f - '0'); 166 f++; 167 } 168 while (isdigit (*f)); 169 170 if (*f == '$') 171 { 172 number = m; 173 format = ++f; 174 } 175 } 176 177 /* Parse flags. */ 178 while (*format == ' ' || *format == '+' || *format == '-' 179 || *format == '#' || *format == '0') 180 format++; 181 182 /* Parse vector. */ 183 if (*format == 'v') 184 { 185 format++; 186 vectorize = true; 187 } 188 else if (*format == '*') 189 { 190 const char *f = format; 191 192 f++; 193 if (*f == 'v') 194 { 195 format = ++f; 196 vectorize = true; 197 198 /* Unnumbered argument. */ 199 if (allocated == numbered_arg_count) 200 { 201 allocated = 2 * allocated + 1; 202 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 203 } 204 numbered[numbered_arg_count].number = ++unnumbered_arg_count; 205 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */ 206 numbered_arg_count++; 207 } 208 else if (isnonzerodigit (*f)) 209 { 210 unsigned int m = 0; 211 212 do 213 { 214 m = 10 * m + (*f - '0'); 215 f++; 216 } 217 while (isdigit (*f)); 218 219 if (*f == '$') 220 { 221 f++; 222 if (*f == 'v') 223 { 224 unsigned int vector_number = m; 225 226 format = ++f; 227 vectorize = true; 228 229 /* Numbered argument. */ 230 /* Note: As of perl-5.8.0, this is not correctly 231 implemented in perl's sv.c. */ 232 if (allocated == numbered_arg_count) 233 { 234 allocated = 2 * allocated + 1; 235 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 236 } 237 numbered[numbered_arg_count].number = vector_number; 238 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */ 239 numbered_arg_count++; 240 } 241 } 242 } 243 } 244 245 if (vectorize) 246 { 247 /* Numbered or unnumbered argument. */ 248 if (allocated == numbered_arg_count) 249 { 250 allocated = 2 * allocated + 1; 251 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 252 } 253 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count); 254 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; 255 numbered_arg_count++; 256 } 257 258 /* Parse width. */ 259 if (*format == '*') 260 { 261 unsigned int width_number = 0; 262 263 format++; 264 265 if (isnonzerodigit (*format)) 266 { 267 const char *f = format; 268 unsigned int m = 0; 269 270 do 271 { 272 m = 10 * m + (*f - '0'); 273 f++; 274 } 275 while (isdigit (*f)); 276 277 if (*f == '$') 278 { 279 width_number = m; 280 format = ++f; 281 } 282 } 283 284 /* Numbered or unnumbered argument. */ 285 /* Note: As of perl-5.8.0, this is not correctly 286 implemented in perl's sv.c. */ 287 if (allocated == numbered_arg_count) 288 { 289 allocated = 2 * allocated + 1; 290 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 291 } 292 numbered[numbered_arg_count].number = (width_number ? width_number : ++unnumbered_arg_count); 293 numbered[numbered_arg_count].type = FAT_INTEGER; 294 numbered_arg_count++; 295 } 296 else if (isnonzerodigit (*format)) 297 { 298 do format++; while (isdigit (*format)); 299 } 300 301 /* Parse precision. */ 302 if (*format == '.') 303 { 304 format++; 305 306 if (*format == '*') 307 { 308 unsigned int precision_number = 0; 309 310 format++; 311 312 if (isnonzerodigit (*format)) 313 { 314 const char *f = format; 315 unsigned int m = 0; 316 317 do 318 { 319 m = 10 * m + (*f - '0'); 320 f++; 321 } 322 while (isdigit (*f)); 323 324 if (*f == '$') 325 { 326 precision_number = m; 327 format = ++f; 328 } 329 } 330 331 /* Numbered or unnumbered argument. */ 332 if (allocated == numbered_arg_count) 333 { 334 allocated = 2 * allocated + 1; 335 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 336 } 337 numbered[numbered_arg_count].number = (precision_number ? precision_number : ++unnumbered_arg_count); 338 numbered[numbered_arg_count].type = FAT_INTEGER; 339 numbered_arg_count++; 340 } 341 else 342 { 343 while (isdigit (*format)) format++; 344 } 345 } 346 347 /* Parse size. */ 348 size = 0; 349 if (*format == 'h') 350 { 351 size = FAT_SIZE_SHORT; 352 format++; 353 } 354 else if (*format == 'l') 355 { 356 if (format[1] == 'l') 357 { 358 size = FAT_SIZE_LONGLONG; 359 format += 2; 360 } 361 else 362 { 363 size = FAT_SIZE_LONG; 364 format++; 365 } 366 } 367 else if (*format == 'L' || *format == 'q') 368 { 369 size = FAT_SIZE_LONGLONG; 370 format++; 371 } 372 else if (*format == 'V') 373 { 374 size = FAT_SIZE_V; 375 format++; 376 } 377 else if (*format == 'I') 378 { 379 if (format[1] == '6' && format[2] == '4') 380 { 381 size = FAT_SIZE_LONGLONG; 382 format += 3; 383 } 384 else if (format[1] == '3' && format[2] == '2') 385 { 386 size = 0; /* FAT_SIZE_INT */ 387 format += 3; 388 } 389 else 390 { 391 size = FAT_SIZE_PTR; 392 format++; 393 } 394 } 395 396 switch (*format) 397 { 398 case '%': 399 type = FAT_NONE; 400 break; 401 case 'c': 402 type = FAT_CHAR; 403 break; 404 case 's': 405 type = FAT_STRING; 406 break; 407 case '_': 408 type = FAT_SCALAR_VECTOR; 409 break; 410 case 'D': 411 type = FAT_INTEGER | FAT_SIZE_V; 412 break; 413 case 'i': case 'd': 414 type = FAT_INTEGER | size; 415 break; 416 case 'U': case 'O': 417 type = FAT_INTEGER | FAT_UNSIGNED | FAT_SIZE_V; 418 break; 419 case 'u': case 'b': case 'o': case 'x': case 'X': 420 type = FAT_INTEGER | FAT_UNSIGNED | size; 421 break; 422 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': 423 if (size == FAT_SIZE_SHORT || size == FAT_SIZE_LONG) 424 { 425 *invalid_reason = 426 xasprintf (_("In the directive number %u, the size specifier is incompatible with the conversion specifier '%c'."), directives, *format); 427 goto bad_format; 428 } 429 type = FAT_DOUBLE | size; 430 break; 431 case 'p': 432 type = FAT_POINTER; 433 break; 434 case 'n': 435 type = FAT_COUNT_POINTER | size; 436 break; 437 default: 438 *invalid_reason = 439 (*format == '\0' 440 ? INVALID_UNTERMINATED_DIRECTIVE () 441 : INVALID_CONVERSION_SPECIFIER (directives, *format)); 442 goto bad_format; 443 } 444 445 if (type != FAT_NONE && !vectorize) 446 { 447 /* Numbered or unnumbered argument. */ 448 if (allocated == numbered_arg_count) 449 { 450 allocated = 2 * allocated + 1; 451 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 452 } 453 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count); 454 numbered[numbered_arg_count].type = type; 455 numbered_arg_count++; 456 } 457 458 format++; 459 } 460 461 /* Sort the numbered argument array, and eliminate duplicates. */ 462 if (numbered_arg_count > 1) 463 { 464 unsigned int i, j; 465 bool err; 466 467 qsort (numbered, numbered_arg_count, 468 sizeof (struct numbered_arg), numbered_arg_compare); 469 470 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 471 err = false; 472 for (i = j = 0; i < numbered_arg_count; i++) 473 if (j > 0 && numbered[i].number == numbered[j-1].number) 474 { 475 enum format_arg_type type1 = numbered[i].type; 476 enum format_arg_type type2 = numbered[j-1].type; 477 enum format_arg_type type_both; 478 479 if (type1 == type2) 480 type_both = type1; 481 else 482 { 483 /* Incompatible types. */ 484 type_both = FAT_NONE; 485 if (!err) 486 *invalid_reason = 487 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number); 488 err = true; 489 } 490 491 numbered[j-1].type = type_both; 492 } 493 else 494 { 495 if (j < i) 496 { 497 numbered[j].number = numbered[i].number; 498 numbered[j].type = numbered[i].type; 499 } 500 j++; 501 } 502 numbered_arg_count = j; 503 if (err) 504 /* *invalid_reason has already been set above. */ 505 goto bad_format; 506 } 507 508 result = (struct spec *) xmalloc (sizeof (struct spec)); 509 result->directives = directives; 510 result->numbered_arg_count = numbered_arg_count; 511 result->allocated = allocated; 512 result->numbered = numbered; 513 return result; 514 515 bad_format: 516 if (numbered != NULL) 517 free (numbered); 518 return NULL; 519 } 520 521 static void 522 format_free (void *descr) 523 { 524 struct spec *spec = (struct spec *) descr; 525 526 if (spec->numbered != NULL) 527 free (spec->numbered); 528 free (spec); 529 } 530 531 static int 532 format_get_number_of_directives (void *descr) 533 { 534 struct spec *spec = (struct spec *) descr; 535 536 return spec->directives; 537 } 538 539 static bool 540 format_check (void *msgid_descr, void *msgstr_descr, bool equality, 541 formatstring_error_logger_t error_logger, 542 const char *pretty_msgstr) 543 { 544 struct spec *spec1 = (struct spec *) msgid_descr; 545 struct spec *spec2 = (struct spec *) msgstr_descr; 546 bool err = false; 547 548 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 549 { 550 unsigned int i, j; 551 unsigned int n1 = spec1->numbered_arg_count; 552 unsigned int n2 = spec2->numbered_arg_count; 553 554 /* Check the argument names are the same. 555 Both arrays are sorted. We search for the first difference. */ 556 for (i = 0, j = 0; i < n1 || j < n2; ) 557 { 558 int cmp = (i >= n1 ? 1 : 559 j >= n2 ? -1 : 560 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 561 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 562 0); 563 564 if (cmp > 0) 565 { 566 if (error_logger) 567 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 568 spec2->numbered[j].number, pretty_msgstr); 569 err = true; 570 break; 571 } 572 else if (cmp < 0) 573 { 574 if (equality) 575 { 576 if (error_logger) 577 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 578 spec1->numbered[i].number, pretty_msgstr); 579 err = true; 580 break; 581 } 582 else 583 i++; 584 } 585 else 586 j++, i++; 587 } 588 /* Check the argument types are the same. */ 589 if (!err) 590 for (i = 0, j = 0; j < n2; ) 591 { 592 if (spec1->numbered[i].number == spec2->numbered[j].number) 593 { 594 if (spec1->numbered[i].type != spec2->numbered[j].type) 595 { 596 if (error_logger) 597 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 598 pretty_msgstr, spec2->numbered[j].number); 599 err = true; 600 break; 601 } 602 j++, i++; 603 } 604 else 605 i++; 606 } 607 } 608 609 return err; 610 } 611 612 613 struct formatstring_parser formatstring_perl = 614 { 615 format_parse, 616 format_free, 617 format_get_number_of_directives, 618 NULL, 619 format_check 620 }; 621 622 623 #ifdef TEST 624 625 /* Test program: Print the argument list specification returned by 626 format_parse for strings read from standard input. */ 627 628 #include <stdio.h> 629 #include "getline.h" 630 631 static void 632 format_print (void *descr) 633 { 634 struct spec *spec = (struct spec *) descr; 635 unsigned int last; 636 unsigned int i; 637 638 if (spec == NULL) 639 { 640 printf ("INVALID"); 641 return; 642 } 643 644 printf ("("); 645 last = 1; 646 for (i = 0; i < spec->numbered_arg_count; i++) 647 { 648 unsigned int number = spec->numbered[i].number; 649 650 if (i > 0) 651 printf (" "); 652 if (number < last) 653 abort (); 654 for (; last < number; last++) 655 printf ("_ "); 656 if (spec->numbered[i].type & FAT_UNSIGNED) 657 printf ("[unsigned]"); 658 switch (spec->numbered[i].type & FAT_SIZE_MASK) 659 { 660 case 0: 661 break; 662 case FAT_SIZE_SHORT: 663 printf ("[short]"); 664 break; 665 case FAT_SIZE_V: 666 printf ("[IV]"); 667 break; 668 case FAT_SIZE_PTR: 669 printf ("[PTR]"); 670 break; 671 case FAT_SIZE_LONG: 672 printf ("[long]"); 673 break; 674 case FAT_SIZE_LONGLONG: 675 printf ("[long long]"); 676 break; 677 default: 678 abort (); 679 } 680 switch (spec->numbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK)) 681 { 682 case FAT_INTEGER: 683 printf ("i"); 684 break; 685 case FAT_DOUBLE: 686 printf ("f"); 687 break; 688 case FAT_CHAR: 689 printf ("c"); 690 break; 691 case FAT_STRING: 692 printf ("s"); 693 break; 694 case FAT_SCALAR_VECTOR: 695 printf ("sv"); 696 break; 697 case FAT_POINTER: 698 printf ("p"); 699 break; 700 case FAT_COUNT_POINTER: 701 printf ("n"); 702 break; 703 default: 704 abort (); 705 } 706 last = number + 1; 707 } 708 printf (")"); 709 } 710 711 int 712 main () 713 { 714 for (;;) 715 { 716 char *line = NULL; 717 size_t line_size = 0; 718 int line_len; 719 char *invalid_reason; 720 void *descr; 721 722 line_len = getline (&line, &line_size, stdin); 723 if (line_len < 0) 724 break; 725 if (line_len > 0 && line[line_len - 1] == '\n') 726 line[--line_len] = '\0'; 727 728 invalid_reason = NULL; 729 descr = format_parse (line, false, &invalid_reason); 730 731 format_print (descr); 732 printf ("\n"); 733 if (descr == NULL) 734 printf ("%s\n", invalid_reason); 735 736 free (invalid_reason); 737 free (line); 738 } 739 740 return 0; 741 } 742 743 /* 744 * For Emacs M-x compile 745 * Local Variables: 746 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-perl.c ../lib/libgettextlib.la" 747 * End: 748 */ 749 750 #endif /* TEST */ 751