1 /* PHP format strings. 2 Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2002. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 #ifdef HAVE_CONFIG_H 20 # include <config.h> 21 #endif 22 23 #include <stdbool.h> 24 #include <stdlib.h> 25 26 #include "format.h" 27 #include "c-ctype.h" 28 #include "xalloc.h" 29 #include "xvasprintf.h" 30 #include "format-invalid.h" 31 #include "gettext.h" 32 33 #define _(str) gettext (str) 34 35 /* PHP format strings are described in phpdoc-4.0.6, file 36 phpdoc/manual/function.sprintf.html, and are implemented in 37 php-4.1.0/ext/standard/formatted_print.c. 38 A directive 39 - starts with '%' or '%m$' where m is a positive integer, 40 - is optionally followed by any of the characters '0', '-', ' ', or 41 "'<anychar>", each of which acts as a flag, 42 - is optionally followed by a width specification: a nonempty digit 43 sequence, 44 - is optionally followed by '.' and a precision specification: a nonempty 45 digit sequence, 46 - is optionally followed by a size specifier 'l', which is ignored, 47 - is finished by a specifier 48 - 's', that needs a string argument, 49 - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument, 50 - 'e', 'f', that need a floating-point argument, 51 - 'c', that needs a character argument. 52 Additionally there is the directive '%%', which takes no argument. 53 Numbered and unnumbered argument specifications can be used in the same 54 string. Numbered argument specifications have no influence on the 55 "current argument index", that is incremented each time an argument is read. 56 */ 57 58 enum format_arg_type 59 { 60 FAT_INTEGER, 61 FAT_FLOAT, 62 FAT_CHARACTER, 63 FAT_STRING 64 }; 65 66 struct numbered_arg 67 { 68 unsigned int number; 69 enum format_arg_type type; 70 }; 71 72 struct spec 73 { 74 unsigned int directives; 75 unsigned int numbered_arg_count; 76 unsigned int allocated; 77 struct numbered_arg *numbered; 78 }; 79 80 /* Locale independent test for a decimal digit. 81 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 82 <ctype.h> isdigit must be an 'unsigned char'.) */ 83 #undef isdigit 84 #define isdigit(c) ((unsigned int) ((c) - '0') < 10) 85 86 87 static int 88 numbered_arg_compare (const void *p1, const void *p2) 89 { 90 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 91 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 92 93 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 94 } 95 96 static void * 97 format_parse (const char *format, bool translated, char **invalid_reason) 98 { 99 unsigned int directives; 100 unsigned int numbered_arg_count; 101 unsigned int allocated; 102 struct numbered_arg *numbered; 103 unsigned int unnumbered_arg_count; 104 struct spec *result; 105 106 directives = 0; 107 numbered_arg_count = 0; 108 allocated = 0; 109 numbered = NULL; 110 unnumbered_arg_count = 0; 111 112 for (; *format != '\0';) 113 if (*format++ == '%') 114 { 115 /* A directive. */ 116 directives++; 117 118 if (*format != '%') 119 { 120 /* A complex directive. */ 121 unsigned int number; 122 enum format_arg_type type; 123 124 number = ++unnumbered_arg_count; 125 if (isdigit (*format)) 126 { 127 const char *f = format; 128 unsigned int m = 0; 129 130 do 131 { 132 m = 10 * m + (*f - '0'); 133 f++; 134 } 135 while (isdigit (*f)); 136 137 if (*f == '$') 138 { 139 if (m == 0) 140 { 141 *invalid_reason = INVALID_ARGNO_0 (directives); 142 goto bad_format; 143 } 144 number = m; 145 format = ++f; 146 --unnumbered_arg_count; 147 } 148 } 149 150 /* Parse flags. */ 151 for (;;) 152 { 153 if (*format == '0' || *format == '-' || *format == ' ') 154 format++; 155 else if (*format == '\'') 156 { 157 format++; 158 if (*format == '\0') 159 { 160 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 161 goto bad_format; 162 } 163 format++; 164 } 165 else 166 break; 167 } 168 169 /* Parse width. */ 170 if (isdigit (*format)) 171 { 172 do 173 format++; 174 while (isdigit (*format)); 175 } 176 177 /* Parse precision. */ 178 if (*format == '.') 179 { 180 format++; 181 182 if (isdigit (*format)) 183 { 184 do 185 format++; 186 while (isdigit (*format)); 187 } 188 else 189 --format; /* will jump to bad_format */ 190 } 191 192 /* Parse size. */ 193 if (*format == 'l') 194 format++; 195 196 switch (*format) 197 { 198 case 'b': case 'd': case 'u': case 'o': case 'x': case 'X': 199 type = FAT_INTEGER; 200 break; 201 case 'e': case 'f': 202 type = FAT_FLOAT; 203 break; 204 case 'c': 205 type = FAT_CHARACTER; 206 break; 207 case 's': 208 type = FAT_STRING; 209 break; 210 default: 211 *invalid_reason = 212 (*format == '\0' 213 ? INVALID_UNTERMINATED_DIRECTIVE () 214 : INVALID_CONVERSION_SPECIFIER (directives, *format)); 215 goto bad_format; 216 } 217 218 if (allocated == numbered_arg_count) 219 { 220 allocated = 2 * allocated + 1; 221 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 222 } 223 numbered[numbered_arg_count].number = number; 224 numbered[numbered_arg_count].type = type; 225 numbered_arg_count++; 226 } 227 228 format++; 229 } 230 231 /* Sort the numbered argument array, and eliminate duplicates. */ 232 if (numbered_arg_count > 1) 233 { 234 unsigned int i, j; 235 bool err; 236 237 qsort (numbered, numbered_arg_count, 238 sizeof (struct numbered_arg), numbered_arg_compare); 239 240 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 241 err = false; 242 for (i = j = 0; i < numbered_arg_count; i++) 243 if (j > 0 && numbered[i].number == numbered[j-1].number) 244 { 245 enum format_arg_type type1 = numbered[i].type; 246 enum format_arg_type type2 = numbered[j-1].type; 247 enum format_arg_type type_both; 248 249 if (type1 == type2) 250 type_both = type1; 251 else 252 { 253 /* Incompatible types. */ 254 type_both = type1; 255 if (!err) 256 *invalid_reason = 257 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number); 258 err = true; 259 } 260 261 numbered[j-1].type = type_both; 262 } 263 else 264 { 265 if (j < i) 266 { 267 numbered[j].number = numbered[i].number; 268 numbered[j].type = numbered[i].type; 269 } 270 j++; 271 } 272 numbered_arg_count = j; 273 if (err) 274 /* *invalid_reason has already been set above. */ 275 goto bad_format; 276 } 277 278 result = (struct spec *) xmalloc (sizeof (struct spec)); 279 result->directives = directives; 280 result->numbered_arg_count = numbered_arg_count; 281 result->allocated = allocated; 282 result->numbered = numbered; 283 return result; 284 285 bad_format: 286 if (numbered != NULL) 287 free (numbered); 288 return NULL; 289 } 290 291 static void 292 format_free (void *descr) 293 { 294 struct spec *spec = (struct spec *) descr; 295 296 if (spec->numbered != NULL) 297 free (spec->numbered); 298 free (spec); 299 } 300 301 static int 302 format_get_number_of_directives (void *descr) 303 { 304 struct spec *spec = (struct spec *) descr; 305 306 return spec->directives; 307 } 308 309 static bool 310 format_check (void *msgid_descr, void *msgstr_descr, bool equality, 311 formatstring_error_logger_t error_logger, 312 const char *pretty_msgstr) 313 { 314 struct spec *spec1 = (struct spec *) msgid_descr; 315 struct spec *spec2 = (struct spec *) msgstr_descr; 316 bool err = false; 317 318 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 319 { 320 unsigned int i, j; 321 unsigned int n1 = spec1->numbered_arg_count; 322 unsigned int n2 = spec2->numbered_arg_count; 323 324 /* Check the argument names are the same. 325 Both arrays are sorted. We search for the first difference. */ 326 for (i = 0, j = 0; i < n1 || j < n2; ) 327 { 328 int cmp = (i >= n1 ? 1 : 329 j >= n2 ? -1 : 330 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 331 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 332 0); 333 334 if (cmp > 0) 335 { 336 if (error_logger) 337 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 338 spec2->numbered[j].number, pretty_msgstr); 339 err = true; 340 break; 341 } 342 else if (cmp < 0) 343 { 344 if (equality) 345 { 346 if (error_logger) 347 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 348 spec1->numbered[i].number, pretty_msgstr); 349 err = true; 350 break; 351 } 352 else 353 i++; 354 } 355 else 356 j++, i++; 357 } 358 /* Check the argument types are the same. */ 359 if (!err) 360 for (i = 0, j = 0; j < n2; ) 361 { 362 if (spec1->numbered[i].number == spec2->numbered[j].number) 363 { 364 if (spec1->numbered[i].type != spec2->numbered[j].type) 365 { 366 if (error_logger) 367 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 368 pretty_msgstr, spec2->numbered[j].number); 369 err = true; 370 break; 371 } 372 j++, i++; 373 } 374 else 375 i++; 376 } 377 } 378 379 return err; 380 } 381 382 383 struct formatstring_parser formatstring_php = 384 { 385 format_parse, 386 format_free, 387 format_get_number_of_directives, 388 NULL, 389 format_check 390 }; 391 392 393 #ifdef TEST 394 395 /* Test program: Print the argument list specification returned by 396 format_parse for strings read from standard input. */ 397 398 #include <stdio.h> 399 #include "getline.h" 400 401 static void 402 format_print (void *descr) 403 { 404 struct spec *spec = (struct spec *) descr; 405 unsigned int last; 406 unsigned int i; 407 408 if (spec == NULL) 409 { 410 printf ("INVALID"); 411 return; 412 } 413 414 printf ("("); 415 last = 1; 416 for (i = 0; i < spec->numbered_arg_count; i++) 417 { 418 unsigned int number = spec->numbered[i].number; 419 420 if (i > 0) 421 printf (" "); 422 if (number < last) 423 abort (); 424 for (; last < number; last++) 425 printf ("_ "); 426 switch (spec->numbered[i].type) 427 { 428 case FAT_INTEGER: 429 printf ("i"); 430 break; 431 case FAT_FLOAT: 432 printf ("f"); 433 break; 434 case FAT_CHARACTER: 435 printf ("c"); 436 break; 437 case FAT_STRING: 438 printf ("s"); 439 break; 440 default: 441 abort (); 442 } 443 last = number + 1; 444 } 445 printf (")"); 446 } 447 448 int 449 main () 450 { 451 for (;;) 452 { 453 char *line = NULL; 454 size_t line_size = 0; 455 int line_len; 456 char *invalid_reason; 457 void *descr; 458 459 line_len = getline (&line, &line_size, stdin); 460 if (line_len < 0) 461 break; 462 if (line_len > 0 && line[line_len - 1] == '\n') 463 line[--line_len] = '\0'; 464 465 invalid_reason = NULL; 466 descr = format_parse (line, false, &invalid_reason); 467 468 format_print (descr); 469 printf ("\n"); 470 if (descr == NULL) 471 printf ("%s\n", invalid_reason); 472 473 free (invalid_reason); 474 free (line); 475 } 476 477 return 0; 478 } 479 480 /* 481 * For Emacs M-x compile 482 * Local Variables: 483 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-php.c ../lib/libgettextlib.la" 484 * End: 485 */ 486 487 #endif /* TEST */ 488