xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/format-c.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* C format strings.
2    Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* C format strings are described in POSIX (IEEE P1003.1 2001), section
36    XSH 3 fprintf().  See also Linux fprintf(3) manual page.
37    A directive
38    - starts with '%' or '%m$' where m is a positive integer,
39    - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40      "'", or - only in msgstr strings - the string "I", each of which acts as
41      a flag,
42    - is optionally followed by a width specification: '*' (reads an argument)
43      or '*m$' or a nonempty digit sequence,
44    - is optionally followed by '.' and a precision specification: '*' (reads
45      an argument) or '*m$' or a nonempty digit sequence,
46    - is either continued like this:
47        - is optionally followed by a size specifier, one of 'hh' 'h' 'l' 'll'
48          'L' 'q' 'j' 'z' 't',
49        - is finished by a specifier
50            - '%', that needs no argument,
51            - 'c', 'C', that need a character argument,
52            - 's', 'S', that need a string argument,
53            - 'i', 'd', that need a signed integer argument,
54            - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
55            - 'e', 'E', 'f', 'F', 'g', 'G', 'a', 'A', that need a floating-point
56              argument,
57            - 'p', that needs a 'void *' argument,
58            - 'n', that needs a pointer to integer.
59      or is finished by a specifier '<' inttypes-macro '>' where inttypes-macro
60      is an ISO C 99 section 7.8.1 format directive.
61    Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
62    be used in the same string.  When numbered argument specifications are
63    used, specifying the Nth argument requires that all the leading arguments,
64    from the first to the (N-1)th, are specified in the format string.
65  */
66 
67 enum format_arg_type
68 {
69   FAT_NONE		= 0,
70   /* Basic types */
71   FAT_INTEGER		= 1,
72   FAT_DOUBLE		= 2,
73   FAT_CHAR		= 3,
74   FAT_STRING		= 4,
75   FAT_OBJC_OBJECT	= 5,
76   FAT_POINTER		= 6,
77   FAT_COUNT_POINTER	= 7,
78   /* Flags */
79   FAT_UNSIGNED		= 1 << 3,
80   FAT_SIZE_SHORT	= 1 << 4,
81   FAT_SIZE_CHAR		= 2 << 4,
82   FAT_SIZE_LONG		= 1 << 6,
83   FAT_SIZE_LONGLONG	= 2 << 6,
84   FAT_SIZE_8_T		= 1 << 8,
85   FAT_SIZE_16_T		= 1 << 9,
86   FAT_SIZE_32_T		= 1 << 10,
87   FAT_SIZE_64_T		= 1 << 11,
88   FAT_SIZE_LEAST8_T	= 1 << 12,
89   FAT_SIZE_LEAST16_T	= 1 << 13,
90   FAT_SIZE_LEAST32_T	= 1 << 14,
91   FAT_SIZE_LEAST64_T	= 1 << 15,
92   FAT_SIZE_FAST8_T	= 1 << 16,
93   FAT_SIZE_FAST16_T	= 1 << 17,
94   FAT_SIZE_FAST32_T	= 1 << 18,
95   FAT_SIZE_FAST64_T	= 1 << 19,
96   FAT_SIZE_INTMAX_T	= 1 << 20,
97   FAT_SIZE_INTPTR_T	= 1 << 21,
98   FAT_SIZE_SIZE_T	= 1 << 22,
99   FAT_SIZE_PTRDIFF_T	= 1 << 23,
100   FAT_WIDE		= FAT_SIZE_LONG,
101   /* Meaningful combinations of basic types and flags:
102   'signed char'			= FAT_INTEGER | FAT_SIZE_CHAR,
103   'unsigned char'		= FAT_INTEGER | FAT_SIZE_CHAR | FAT_UNSIGNED,
104   'short'			= FAT_INTEGER | FAT_SIZE_SHORT,
105   'unsigned short'		= FAT_INTEGER | FAT_SIZE_SHORT | FAT_UNSIGNED,
106   'int'				= FAT_INTEGER,
107   'unsigned int'		= FAT_INTEGER | FAT_UNSIGNED,
108   'long int'			= FAT_INTEGER | FAT_SIZE_LONG,
109   'unsigned long int'		= FAT_INTEGER | FAT_SIZE_LONG | FAT_UNSIGNED,
110   'long long int'		= FAT_INTEGER | FAT_SIZE_LONGLONG,
111   'unsigned long long int'	= FAT_INTEGER | FAT_SIZE_LONGLONG | FAT_UNSIGNED,
112   'double'			= FAT_DOUBLE,
113   'long double'			= FAT_DOUBLE | FAT_SIZE_LONGLONG,
114   'char'/'int'			= FAT_CHAR,
115   'wchar_t'/'wint_t'		= FAT_CHAR | FAT_SIZE_LONG,
116   'const char *'		= FAT_STRING,
117   'const wchar_t *'		= FAT_STRING | FAT_SIZE_LONG,
118   'void *'			= FAT_POINTER,
119   FAT_COUNT_SCHAR_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_CHAR,
120   FAT_COUNT_SHORT_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_SHORT,
121   FAT_COUNT_INT_POINTER		= FAT_COUNT_POINTER,
122   FAT_COUNT_LONGINT_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_LONG,
123   FAT_COUNT_LONGLONGINT_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_LONGLONG,
124   */
125   /* Bitmasks */
126   FAT_SIZE_MASK		= (FAT_SIZE_SHORT | FAT_SIZE_CHAR
127 			   | FAT_SIZE_LONG | FAT_SIZE_LONGLONG
128 			   | FAT_SIZE_8_T | FAT_SIZE_16_T
129 			   | FAT_SIZE_32_T | FAT_SIZE_64_T
130 			   | FAT_SIZE_LEAST8_T | FAT_SIZE_LEAST16_T
131 			   | FAT_SIZE_LEAST32_T | FAT_SIZE_LEAST64_T
132 			   | FAT_SIZE_FAST8_T | FAT_SIZE_FAST16_T
133 			   | FAT_SIZE_FAST32_T | FAT_SIZE_FAST64_T
134 			   | FAT_SIZE_INTMAX_T | FAT_SIZE_INTPTR_T
135 			   | FAT_SIZE_SIZE_T | FAT_SIZE_PTRDIFF_T)
136 };
137 
138 struct numbered_arg
139 {
140   unsigned int number;
141   enum format_arg_type type;
142 };
143 
144 struct unnumbered_arg
145 {
146   enum format_arg_type type;
147 };
148 
149 struct spec
150 {
151   unsigned int directives;
152   unsigned int unnumbered_arg_count;
153   unsigned int allocated;
154   struct unnumbered_arg *unnumbered;
155   bool unlikely_intentional;
156   unsigned int sysdep_directives_count;
157   const char **sysdep_directives;
158 };
159 
160 /* Locale independent test for a decimal digit.
161    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
162    <ctype.h> isdigit must be an 'unsigned char'.)  */
163 #undef isdigit
164 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
165 
166 
167 static int
numbered_arg_compare(const void * p1,const void * p2)168 numbered_arg_compare (const void *p1, const void *p2)
169 {
170   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
171   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
172 
173   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
174 }
175 
176 #define INVALID_C99_MACRO(directive_number) \
177   xasprintf (_("In the directive number %u, the token after '<' is not the name of a format specifier macro. The valid macro names are listed in ISO C 99 section 7.8.1."), directive_number)
178 
179 static void *
format_parse(const char * format,bool translated,bool objc_extensions,char ** invalid_reason)180 format_parse (const char *format, bool translated, bool objc_extensions,
181 	      char **invalid_reason)
182 {
183   struct spec spec;
184   unsigned int numbered_arg_count;
185   struct numbered_arg *numbered;
186   struct spec *result;
187 
188   spec.directives = 0;
189   numbered_arg_count = 0;
190   spec.unnumbered_arg_count = 0;
191   spec.allocated = 0;
192   numbered = NULL;
193   spec.unnumbered = NULL;
194   spec.unlikely_intentional = false;
195   spec.sysdep_directives_count = 0;
196   spec.sysdep_directives = NULL;
197 
198   for (; *format != '\0';)
199     if (*format++ == '%')
200       {
201 	/* A directive.  */
202 	unsigned int number = 0;
203 	enum format_arg_type type;
204 	enum format_arg_type size;
205 
206 	spec.directives++;
207 
208 	if (isdigit (*format))
209 	  {
210 	    const char *f = format;
211 	    unsigned int m = 0;
212 
213 	    do
214 	      {
215 		m = 10 * m + (*f - '0');
216 		f++;
217 	      }
218 	    while (isdigit (*f));
219 
220 	    if (*f == '$')
221 	      {
222 		if (m == 0)
223 		  {
224 		    *invalid_reason = INVALID_ARGNO_0 (spec.directives);
225 		    goto bad_format;
226 		  }
227 		number = m;
228 		format = ++f;
229 	      }
230 	  }
231 
232 	/* Parse flags.  */
233 	for (;;)
234 	  {
235 	    if (*format == ' ' || *format == '+' || *format == '-'
236 		|| *format == '#' || *format == '0' || *format == '\'')
237 	      format++;
238 	    else if (translated && *format == 'I')
239 	      {
240 		spec.sysdep_directives =
241 		  (const char **)
242 		  xrealloc (spec.sysdep_directives,
243 			    2 * (spec.sysdep_directives_count + 1)
244 			    * sizeof (const char *));
245 		spec.sysdep_directives[2 * spec.sysdep_directives_count] = format;
246 		spec.sysdep_directives[2 * spec.sysdep_directives_count + 1] = format + 1;
247 		spec.sysdep_directives_count++;
248 		format++;
249 	      }
250 	    else
251 	      break;
252 	  }
253 
254 	/* Parse width.  */
255 	if (*format == '*')
256 	  {
257 	    unsigned int width_number = 0;
258 
259 	    format++;
260 
261 	    if (isdigit (*format))
262 	      {
263 		const char *f = format;
264 		unsigned int m = 0;
265 
266 		do
267 		  {
268 		    m = 10 * m + (*f - '0');
269 		    f++;
270 		  }
271 		while (isdigit (*f));
272 
273 		if (*f == '$')
274 		  {
275 		    if (m == 0)
276 		      {
277 			*invalid_reason =
278 			  INVALID_WIDTH_ARGNO_0 (spec.directives);
279 			goto bad_format;
280 		      }
281 		    width_number = m;
282 		    format = ++f;
283 		  }
284 	      }
285 
286 	    if (width_number)
287 	      {
288 		/* Numbered argument.  */
289 
290 		/* Numbered and unnumbered specifications are exclusive.  */
291 		if (spec.unnumbered_arg_count > 0)
292 		  {
293 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
294 		    goto bad_format;
295 		  }
296 
297 		if (spec.allocated == numbered_arg_count)
298 		  {
299 		    spec.allocated = 2 * spec.allocated + 1;
300 		    numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
301 		  }
302 		numbered[numbered_arg_count].number = width_number;
303 		numbered[numbered_arg_count].type = FAT_INTEGER;
304 		numbered_arg_count++;
305 	      }
306 	    else
307 	      {
308 		/* Unnumbered argument.  */
309 
310 		/* Numbered and unnumbered specifications are exclusive.  */
311 		if (numbered_arg_count > 0)
312 		  {
313 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
314 		    goto bad_format;
315 		  }
316 
317 		if (spec.allocated == spec.unnumbered_arg_count)
318 		  {
319 		    spec.allocated = 2 * spec.allocated + 1;
320 		    spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
321 		  }
322 		spec.unnumbered[spec.unnumbered_arg_count].type = FAT_INTEGER;
323 		spec.unnumbered_arg_count++;
324 	      }
325 	  }
326 	else if (isdigit (*format))
327 	  {
328 	    do format++; while (isdigit (*format));
329 	  }
330 
331 	/* Parse precision.  */
332 	if (*format == '.')
333 	  {
334 	    format++;
335 
336 	    if (*format == '*')
337 	      {
338 		unsigned int precision_number = 0;
339 
340 		format++;
341 
342 		if (isdigit (*format))
343 		  {
344 		    const char *f = format;
345 		    unsigned int m = 0;
346 
347 		    do
348 		      {
349 			m = 10 * m + (*f - '0');
350 			f++;
351 		      }
352 		    while (isdigit (*f));
353 
354 		    if (*f == '$')
355 		      {
356 			if (m == 0)
357 			  {
358 			    *invalid_reason =
359 			      INVALID_PRECISION_ARGNO_0 (spec.directives);
360 			    goto bad_format;
361 			  }
362 			precision_number = m;
363 			format = ++f;
364 		      }
365 		  }
366 
367 		if (precision_number)
368 		  {
369 		    /* Numbered argument.  */
370 
371 		    /* Numbered and unnumbered specifications are exclusive.  */
372 		    if (spec.unnumbered_arg_count > 0)
373 		      {
374 			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
375 			goto bad_format;
376 		      }
377 
378 		    if (spec.allocated == numbered_arg_count)
379 		      {
380 			spec.allocated = 2 * spec.allocated + 1;
381 			numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
382 		      }
383 		    numbered[numbered_arg_count].number = precision_number;
384 		    numbered[numbered_arg_count].type = FAT_INTEGER;
385 		    numbered_arg_count++;
386 		  }
387 		else
388 		  {
389 		    /* Unnumbered argument.  */
390 
391 		    /* Numbered and unnumbered specifications are exclusive.  */
392 		    if (numbered_arg_count > 0)
393 		      {
394 			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
395 			goto bad_format;
396 		      }
397 
398 		    if (spec.allocated == spec.unnumbered_arg_count)
399 		      {
400 			spec.allocated = 2 * spec.allocated + 1;
401 			spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
402 		      }
403 		    spec.unnumbered[spec.unnumbered_arg_count].type = FAT_INTEGER;
404 		    spec.unnumbered_arg_count++;
405 		  }
406 	      }
407 	    else if (isdigit (*format))
408 	      {
409 		do format++; while (isdigit (*format));
410 	      }
411 	  }
412 
413 	if (*format == '<')
414 	  {
415 	    spec.sysdep_directives =
416 	      (const char **)
417 	      xrealloc (spec.sysdep_directives,
418 			2 * (spec.sysdep_directives_count + 1)
419 			* sizeof (const char *));
420 	    spec.sysdep_directives[2 * spec.sysdep_directives_count] = format;
421 
422 	    format++;
423 	    /* Parse ISO C 99 section 7.8.1 format string directive.
424 	       Syntax:
425 	       P R I { d | i | o | u | x | X }
426 	       { { | LEAST | FAST } { 8 | 16 | 32 | 64 } | MAX | PTR }  */
427 	    if (*format != 'P')
428 	      {
429 		*invalid_reason = INVALID_C99_MACRO (spec.directives);
430 		goto bad_format;
431 	      }
432 	    format++;
433 	    if (*format != 'R')
434 	      {
435 		*invalid_reason = INVALID_C99_MACRO (spec.directives);
436 		goto bad_format;
437 	      }
438 	    format++;
439 	    if (*format != 'I')
440 	      {
441 		*invalid_reason = INVALID_C99_MACRO (spec.directives);
442 		goto bad_format;
443 	      }
444 	    format++;
445 
446 	    switch (*format)
447 	      {
448 	      case 'i': case 'd':
449 		type = FAT_INTEGER;
450 		break;
451 	      case 'u': case 'o': case 'x': case 'X':
452 		type = FAT_INTEGER | FAT_UNSIGNED;
453 		break;
454 	      default:
455 		*invalid_reason = INVALID_C99_MACRO (spec.directives);
456 		goto bad_format;
457 	      }
458 	    format++;
459 
460 	    if (format[0] == 'M' && format[1] == 'A' && format[2] == 'X')
461 	      {
462 		type |= FAT_SIZE_INTMAX_T;
463 		format += 3;
464 	      }
465 	    else if (format[0] == 'P' && format[1] == 'T' && format[2] == 'R')
466 	      {
467 		type |= FAT_SIZE_INTPTR_T;
468 		format += 3;
469 	      }
470 	    else
471 	      {
472 		if (format[0] == 'L' && format[1] == 'E' && format[2] == 'A'
473 		    && format[3] == 'S' && format[4] == 'T')
474 		  {
475 		    format += 5;
476 		    if (format[0] == '8')
477 		      {
478 			type |= FAT_SIZE_LEAST8_T;
479 			format++;
480 		      }
481 		    else if (format[0] == '1' && format[1] == '6')
482 		      {
483 			type |= FAT_SIZE_LEAST16_T;
484 			format += 2;
485 		      }
486 		    else if (format[0] == '3' && format[1] == '2')
487 		      {
488 			type |= FAT_SIZE_LEAST32_T;
489 			format += 2;
490 		      }
491 		    else if (format[0] == '6' && format[1] == '4')
492 		      {
493 			type |= FAT_SIZE_LEAST64_T;
494 			format += 2;
495 		      }
496 		    else
497 		      {
498 			*invalid_reason = INVALID_C99_MACRO (spec.directives);
499 			goto bad_format;
500 		      }
501 		  }
502 		else if (format[0] == 'F' && format[1] == 'A'
503 			 && format[2] == 'S' && format[3] == 'T')
504 		  {
505 		    format += 4;
506 		    if (format[0] == '8')
507 		      {
508 			type |= FAT_SIZE_FAST8_T;
509 			format++;
510 		      }
511 		    else if (format[0] == '1' && format[1] == '6')
512 		      {
513 			type |= FAT_SIZE_FAST16_T;
514 			format += 2;
515 		      }
516 		    else if (format[0] == '3' && format[1] == '2')
517 		      {
518 			type |= FAT_SIZE_FAST32_T;
519 			format += 2;
520 		      }
521 		    else if (format[0] == '6' && format[1] == '4')
522 		      {
523 			type |= FAT_SIZE_FAST64_T;
524 			format += 2;
525 		      }
526 		    else
527 		      {
528 			*invalid_reason = INVALID_C99_MACRO (spec.directives);
529 			goto bad_format;
530 		      }
531 		  }
532 		else
533 		  {
534 		    if (format[0] == '8')
535 		      {
536 			type |= FAT_SIZE_8_T;
537 			format++;
538 		      }
539 		    else if (format[0] == '1' && format[1] == '6')
540 		      {
541 			type |= FAT_SIZE_16_T;
542 			format += 2;
543 		      }
544 		    else if (format[0] == '3' && format[1] == '2')
545 		      {
546 			type |= FAT_SIZE_32_T;
547 			format += 2;
548 		      }
549 		    else if (format[0] == '6' && format[1] == '4')
550 		      {
551 			type |= FAT_SIZE_64_T;
552 			format += 2;
553 		      }
554 		    else
555 		      {
556 			*invalid_reason = INVALID_C99_MACRO (spec.directives);
557 			goto bad_format;
558 		      }
559 		  }
560 	      }
561 
562 	    if (*format != '>')
563 	      {
564 		*invalid_reason =
565 		  xasprintf (_("In the directive number %u, the token after '<' is not followed by '>'."), spec.directives);
566 		goto bad_format;
567 	      }
568 
569 	    spec.sysdep_directives[2 * spec.sysdep_directives_count + 1] = format + 1;
570 	    spec.sysdep_directives_count++;
571 	  }
572 	else
573 	  {
574 	    /* Parse size.  */
575 	    size = 0;
576 	    for (;; format++)
577 	      {
578 		if (*format == 'h')
579 		  {
580 		    if (size & (FAT_SIZE_SHORT | FAT_SIZE_CHAR))
581 		      size = FAT_SIZE_CHAR;
582 		    else
583 		      size = FAT_SIZE_SHORT;
584 		  }
585 		else if (*format == 'l')
586 		  {
587 		    if (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG))
588 		      size = FAT_SIZE_LONGLONG;
589 		    else
590 		      size = FAT_SIZE_LONG;
591 		  }
592 		else if (*format == 'L')
593 		  size = FAT_SIZE_LONGLONG;
594 		else if (*format == 'q')
595 		  /* Old BSD 4.4 convention.  */
596 		  size = FAT_SIZE_LONGLONG;
597 		else if (*format == 'j')
598 		  size = FAT_SIZE_INTMAX_T;
599 		else if (*format == 'z' || *format == 'Z')
600 		  /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
601 		     because the warning facility in gcc-2.95.2 understands
602 		     only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784).  */
603 		  size = FAT_SIZE_SIZE_T;
604 		else if (*format == 't')
605 		  size = FAT_SIZE_PTRDIFF_T;
606 		else
607 		  break;
608 	      }
609 
610 	    switch (*format)
611 	      {
612 	      case '%':
613 		/* Programmers writing _("%2%") most often will not want to
614 		   use this string as a c-format string, but rather as a
615 		   literal or as a different kind of format string.  */
616 		if (format[-1] != '%')
617 		  spec.unlikely_intentional = true;
618 		type = FAT_NONE;
619 		break;
620 	      case 'm': /* glibc extension */
621 		type = FAT_NONE;
622 		break;
623 	      case 'c':
624 		type = FAT_CHAR;
625 		type |= (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
626 			 ? FAT_WIDE : 0);
627 		break;
628 	      case 'C': /* obsolete */
629 		type = FAT_CHAR | FAT_WIDE;
630 		break;
631 	      case 's':
632 		type = FAT_STRING;
633 		type |= (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
634 			 ? FAT_WIDE : 0);
635 		break;
636 	      case 'S': /* obsolete */
637 		type = FAT_STRING | FAT_WIDE;
638 		break;
639 	      case 'i': case 'd':
640 		type = FAT_INTEGER;
641 		type |= (size & FAT_SIZE_MASK);
642 		break;
643 	      case 'u': case 'o': case 'x': case 'X':
644 		type = FAT_INTEGER | FAT_UNSIGNED;
645 		type |= (size & FAT_SIZE_MASK);
646 		break;
647 	      case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
648 	      case 'a': case 'A':
649 		type = FAT_DOUBLE;
650 		type |= (size & FAT_SIZE_LONGLONG);
651 		break;
652 	      case '@':
653 		if (objc_extensions)
654 		  {
655 		    type = FAT_OBJC_OBJECT;
656 		    break;
657 		  }
658 		goto other;
659 	      case 'p':
660 		type = FAT_POINTER;
661 		break;
662 	      case 'n':
663 		type = FAT_COUNT_POINTER;
664 		type |= (size & FAT_SIZE_MASK);
665 		break;
666 	      other:
667 	      default:
668 		*invalid_reason =
669 		  (*format == '\0'
670 		   ? INVALID_UNTERMINATED_DIRECTIVE ()
671 		   : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
672 		goto bad_format;
673 	      }
674 	  }
675 
676 	if (type != FAT_NONE)
677 	  {
678 	    if (number)
679 	      {
680 		/* Numbered argument.  */
681 
682 		/* Numbered and unnumbered specifications are exclusive.  */
683 		if (spec.unnumbered_arg_count > 0)
684 		  {
685 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
686 		    goto bad_format;
687 		  }
688 
689 		if (spec.allocated == numbered_arg_count)
690 		  {
691 		    spec.allocated = 2 * spec.allocated + 1;
692 		    numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
693 		  }
694 		numbered[numbered_arg_count].number = number;
695 		numbered[numbered_arg_count].type = type;
696 		numbered_arg_count++;
697 	      }
698 	    else
699 	      {
700 		/* Unnumbered argument.  */
701 
702 		/* Numbered and unnumbered specifications are exclusive.  */
703 		if (numbered_arg_count > 0)
704 		  {
705 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
706 		    goto bad_format;
707 		  }
708 
709 		if (spec.allocated == spec.unnumbered_arg_count)
710 		  {
711 		    spec.allocated = 2 * spec.allocated + 1;
712 		    spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
713 		  }
714 		spec.unnumbered[spec.unnumbered_arg_count].type = type;
715 		spec.unnumbered_arg_count++;
716 	      }
717 	  }
718 
719 	format++;
720       }
721 
722   /* Sort the numbered argument array, and eliminate duplicates.  */
723   if (numbered_arg_count > 1)
724     {
725       unsigned int i, j;
726       bool err;
727 
728       qsort (numbered, numbered_arg_count,
729 	     sizeof (struct numbered_arg), numbered_arg_compare);
730 
731       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
732       err = false;
733       for (i = j = 0; i < numbered_arg_count; i++)
734 	if (j > 0 && numbered[i].number == numbered[j-1].number)
735 	  {
736 	    enum format_arg_type type1 = numbered[i].type;
737 	    enum format_arg_type type2 = numbered[j-1].type;
738 	    enum format_arg_type type_both;
739 
740 	    if (type1 == type2)
741 	      type_both = type1;
742 	    else
743 	      {
744 		/* Incompatible types.  */
745 		type_both = FAT_NONE;
746 		if (!err)
747 		  *invalid_reason =
748 		    INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
749 		err = true;
750 	      }
751 
752 	    numbered[j-1].type = type_both;
753 	  }
754 	else
755 	  {
756 	    if (j < i)
757 	      {
758 		numbered[j].number = numbered[i].number;
759 		numbered[j].type = numbered[i].type;
760 	      }
761 	    j++;
762 	  }
763       numbered_arg_count = j;
764       if (err)
765 	/* *invalid_reason has already been set above.  */
766 	goto bad_format;
767     }
768 
769   /* Verify that the format strings uses all arguments up to the highest
770      numbered one.  */
771   if (numbered_arg_count > 0)
772     {
773       unsigned int i;
774 
775       for (i = 0; i < numbered_arg_count; i++)
776 	if (numbered[i].number != i + 1)
777 	  {
778 	    *invalid_reason =
779 	      xasprintf (_("The string refers to argument number %u but ignores argument number %u."), numbered[i].number, i + 1);
780 	    goto bad_format;
781 	  }
782 
783       /* So now the numbered arguments array is equivalent to a sequence
784 	 of unnumbered arguments.  */
785       spec.unnumbered_arg_count = numbered_arg_count;
786       spec.allocated = spec.unnumbered_arg_count;
787       spec.unnumbered = (struct unnumbered_arg *) xmalloc (spec.allocated * sizeof (struct unnumbered_arg));
788       for (i = 0; i < spec.unnumbered_arg_count; i++)
789 	spec.unnumbered[i].type = numbered[i].type;
790       free (numbered);
791       numbered_arg_count = 0;
792     }
793 
794   result = (struct spec *) xmalloc (sizeof (struct spec));
795   *result = spec;
796   return result;
797 
798  bad_format:
799   if (numbered != NULL)
800     free (numbered);
801   if (spec.unnumbered != NULL)
802     free (spec.unnumbered);
803   if (spec.sysdep_directives != NULL)
804     free (spec.sysdep_directives);
805   return NULL;
806 }
807 
808 static void *
format_c_parse(const char * format,bool translated,char ** invalid_reason)809 format_c_parse (const char *format, bool translated, char **invalid_reason)
810 {
811   return format_parse (format, translated, false, invalid_reason);
812 }
813 
814 static void *
format_objc_parse(const char * format,bool translated,char ** invalid_reason)815 format_objc_parse (const char *format, bool translated, char **invalid_reason)
816 {
817   return format_parse (format, translated, true, invalid_reason);
818 }
819 
820 static void
format_free(void * descr)821 format_free (void *descr)
822 {
823   struct spec *spec = (struct spec *) descr;
824 
825   if (spec->unnumbered != NULL)
826     free (spec->unnumbered);
827   if (spec->sysdep_directives != NULL)
828     free (spec->sysdep_directives);
829   free (spec);
830 }
831 
832 static bool
format_is_unlikely_intentional(void * descr)833 format_is_unlikely_intentional (void *descr)
834 {
835   struct spec *spec = (struct spec *) descr;
836 
837   return spec->unlikely_intentional;
838 }
839 
840 static int
format_get_number_of_directives(void * descr)841 format_get_number_of_directives (void *descr)
842 {
843   struct spec *spec = (struct spec *) descr;
844 
845   return spec->directives;
846 }
847 
848 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)849 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
850 	      formatstring_error_logger_t error_logger,
851 	      const char *pretty_msgstr)
852 {
853   struct spec *spec1 = (struct spec *) msgid_descr;
854   struct spec *spec2 = (struct spec *) msgstr_descr;
855   bool err = false;
856   unsigned int i;
857 
858   /* Check the argument types are the same.  */
859   if (equality
860       ? spec1->unnumbered_arg_count != spec2->unnumbered_arg_count
861       : spec1->unnumbered_arg_count < spec2->unnumbered_arg_count)
862     {
863       if (error_logger)
864 	error_logger (_("number of format specifications in 'msgid' and '%s' does not match"),
865 		      pretty_msgstr);
866       err = true;
867     }
868   else
869     for (i = 0; i < spec2->unnumbered_arg_count; i++)
870       if (spec1->unnumbered[i].type != spec2->unnumbered[i].type)
871 	{
872 	  if (error_logger)
873 	    error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
874 			  pretty_msgstr, i + 1);
875 	  err = true;
876 	}
877 
878   return err;
879 }
880 
881 
882 struct formatstring_parser formatstring_c =
883 {
884   format_c_parse,
885   format_free,
886   format_get_number_of_directives,
887   format_is_unlikely_intentional,
888   format_check
889 };
890 
891 
892 struct formatstring_parser formatstring_objc =
893 {
894   format_objc_parse,
895   format_free,
896   format_get_number_of_directives,
897   format_is_unlikely_intentional,
898   format_check
899 };
900 
901 
902 void
get_sysdep_c_format_directives(const char * string,bool translated,struct interval ** intervalsp,size_t * lengthp)903 get_sysdep_c_format_directives (const char *string, bool translated,
904 				struct interval **intervalsp, size_t *lengthp)
905 {
906   /* Parse the format string with all possible extensions turned on.  (The
907      caller has already verified that the format string is valid for the
908      particular language.)  */
909   char *invalid_reason = NULL;
910   struct spec *descr =
911     (struct spec *) format_parse (string, translated, true, &invalid_reason);
912 
913   if (descr != NULL && descr->sysdep_directives_count > 0)
914     {
915       unsigned int n = descr->sysdep_directives_count;
916       struct interval *intervals =
917 	(struct interval *) xmalloc (n * sizeof (struct interval));
918       unsigned int i;
919 
920       for (i = 0; i < n; i++)
921 	{
922 	  intervals[i].startpos = descr->sysdep_directives[2 * i] - string;
923 	  intervals[i].endpos = descr->sysdep_directives[2 * i + 1] - string;
924 	}
925       *intervalsp = intervals;
926       *lengthp = n;
927     }
928   else
929     {
930       *intervalsp = NULL;
931       *lengthp = 0;
932     }
933 
934   if (descr != NULL)
935     format_free (descr);
936   else
937     free (invalid_reason);
938 }
939 
940 
941 #ifdef TEST
942 
943 /* Test program: Print the argument list specification returned by
944    format_parse for strings read from standard input.  */
945 
946 #include <stdio.h>
947 #include "getline.h"
948 
949 static void
format_print(void * descr)950 format_print (void *descr)
951 {
952   struct spec *spec = (struct spec *) descr;
953   unsigned int i;
954 
955   if (spec == NULL)
956     {
957       printf ("INVALID");
958       return;
959     }
960 
961   printf ("(");
962   for (i = 0; i < spec->unnumbered_arg_count; i++)
963     {
964       if (i > 0)
965 	printf (" ");
966       if (spec->unnumbered[i].type & FAT_UNSIGNED)
967 	printf ("[unsigned]");
968       switch (spec->unnumbered[i].type & FAT_SIZE_MASK)
969 	{
970 	case 0:
971 	  break;
972 	case FAT_SIZE_SHORT:
973 	  printf ("[short]");
974 	  break;
975 	case FAT_SIZE_CHAR:
976 	  printf ("[char]");
977 	  break;
978 	case FAT_SIZE_LONG:
979 	  printf ("[long]");
980 	  break;
981 	case FAT_SIZE_LONGLONG:
982 	  printf ("[long long]");
983 	  break;
984 	case FAT_SIZE_8_T:
985 	  printf ("[int8_t]");
986 	  break;
987 	case FAT_SIZE_16_T:
988 	  printf ("[int16_t]");
989 	  break;
990 	case FAT_SIZE_32_T:
991 	  printf ("[int32_t]");
992 	  break;
993 	case FAT_SIZE_64_T:
994 	  printf ("[int64_t]");
995 	  break;
996 	case FAT_SIZE_LEAST8_T:
997 	  printf ("[int_least8_t]");
998 	  break;
999 	case FAT_SIZE_LEAST16_T:
1000 	  printf ("[int_least16_t]");
1001 	  break;
1002 	case FAT_SIZE_LEAST32_T:
1003 	  printf ("[int_least32_t]");
1004 	  break;
1005 	case FAT_SIZE_LEAST64_T:
1006 	  printf ("[int_least64_t]");
1007 	  break;
1008 	case FAT_SIZE_FAST8_T:
1009 	  printf ("[int_fast8_t]");
1010 	  break;
1011 	case FAT_SIZE_FAST16_T:
1012 	  printf ("[int_fast16_t]");
1013 	  break;
1014 	case FAT_SIZE_FAST32_T:
1015 	  printf ("[int_fast32_t]");
1016 	  break;
1017 	case FAT_SIZE_FAST64_T:
1018 	  printf ("[int_fast64_t]");
1019 	  break;
1020 	case FAT_SIZE_INTMAX_T:
1021 	  printf ("[intmax_t]");
1022 	  break;
1023 	case FAT_SIZE_INTPTR_T:
1024 	  printf ("[intptr_t]");
1025 	  break;
1026 	case FAT_SIZE_SIZE_T:
1027 	  printf ("[size_t]");
1028 	  break;
1029 	case FAT_SIZE_PTRDIFF_T:
1030 	  printf ("[ptrdiff_t]");
1031 	  break;
1032 	default:
1033 	  abort ();
1034 	}
1035       switch (spec->unnumbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK))
1036 	{
1037 	case FAT_INTEGER:
1038 	  printf ("i");
1039 	  break;
1040 	case FAT_DOUBLE:
1041 	  printf ("f");
1042 	  break;
1043 	case FAT_CHAR:
1044 	  printf ("c");
1045 	  break;
1046 	case FAT_STRING:
1047 	  printf ("s");
1048 	  break;
1049 	case FAT_OBJC_OBJECT:
1050 	  printf ("@");
1051 	  break;
1052 	case FAT_POINTER:
1053 	  printf ("p");
1054 	  break;
1055 	case FAT_COUNT_POINTER:
1056 	  printf ("n");
1057 	  break;
1058 	default:
1059 	  abort ();
1060 	}
1061     }
1062   printf (")");
1063 }
1064 
1065 int
main()1066 main ()
1067 {
1068   for (;;)
1069     {
1070       char *line = NULL;
1071       size_t line_size = 0;
1072       int line_len;
1073       char *invalid_reason;
1074       void *descr;
1075 
1076       line_len = getline (&line, &line_size, stdin);
1077       if (line_len < 0)
1078 	break;
1079       if (line_len > 0 && line[line_len - 1] == '\n')
1080 	line[--line_len] = '\0';
1081 
1082       invalid_reason = NULL;
1083       descr = format_c_parse (line, false, &invalid_reason);
1084 
1085       format_print (descr);
1086       printf ("\n");
1087       if (descr == NULL)
1088 	printf ("%s\n", invalid_reason);
1089 
1090       free (invalid_reason);
1091       free (line);
1092     }
1093 
1094   return 0;
1095 }
1096 
1097 /*
1098  * For Emacs M-x compile
1099  * Local Variables:
1100  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-c.c ../lib/libgettextlib.la"
1101  * End:
1102  */
1103 
1104 #endif /* TEST */
1105