xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/format-boost.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Boost format strings.
2    Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2006.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* Boost format strings are described in
36      boost_1_33_1/libs/format/doc/format.html
37    and implemented in
38      boost_1_33_1/boost/format/parsing.hpp.
39    A directive (other than '%%')
40    - starts with '%' or '%|'; in the latter case it must end in '|',
41    - is continued either by
42        - 'm%' where m is a positive integer, starting with a nonzero digit;
43          in this case the directive must not have started with '%|'; or
44        - the following:
45            - optional: 'm$' where m is a positive integer, starting with a
46              nonzero digit,
47            - optional: any of the characters '#', '0', '-', ' ', '+', "'",
48              '_', '=', 'h', 'l',
49            - optional: a width specification: '*' (reads an argument) or '*m$'
50              or a nonempty digit sequence,
51            - optional: a '.' and a precision specification: '*' (reads an
52              argument) or '*m$' or a nonempty digit sequence,
53            - optional: any of the characters 'h', 'l', 'L',
54            - if the directive started with '%|':
55                an optional specifier and a final '|',
56              otherwise
57                a mandatory specifier.
58              If no specifier is given, it needs an argument of any type.
59              The possible specifiers are:
60                - 'c', 'C', that need a character argument,
61                - 's', 'S', that need an argument of any type,
62                - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument,
63                - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument,
64                - 'p', that needs a 'void *' argument,
65                - 't', that doesn't need an argument,
66                - 'TX', where X is any character, that doesn't need an argument,
67                - 'n', that needs a pointer to integer.
68              The Boost format string interpreter doesn't actually care about
69              the argument types, but we do, because it increases the likelihood
70              of detecting translator mistakes.
71    Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications
72    cannot be used in the same string.
73  */
74 
75 enum format_arg_type
76 {
77   FAT_NONE		= 0,
78   /* Basic types */
79   FAT_INTEGER		= 1,
80   FAT_DOUBLE		= 2,
81   FAT_CHAR		= 3,
82   FAT_POINTER		= 4,
83   FAT_ANY		= 5
84 };
85 
86 struct numbered_arg
87 {
88   unsigned int number;
89   enum format_arg_type type;
90 };
91 
92 struct spec
93 {
94   unsigned int directives;
95   unsigned int numbered_arg_count;
96   unsigned int allocated;
97   struct numbered_arg *numbered;
98 };
99 
100 /* Locale independent test for a decimal digit.
101    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
102    <ctype.h> isdigit must be an 'unsigned char'.)  */
103 #undef isdigit
104 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
105 
106 
107 static int
numbered_arg_compare(const void * p1,const void * p2)108 numbered_arg_compare (const void *p1, const void *p2)
109 {
110   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
111   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
112 
113   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
114 }
115 
116 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)117 format_parse (const char *format, bool translated, char **invalid_reason)
118 {
119   struct spec spec;
120   unsigned int unnumbered_arg_count;
121   struct spec *result;
122 
123   spec.directives = 0;
124   spec.numbered_arg_count = 0;
125   spec.allocated = 0;
126   spec.numbered = NULL;
127   unnumbered_arg_count = 0;
128 
129   for (; *format != '\0';)
130     if (*format++ == '%')
131       {
132 	/* A directive.  */
133 	spec.directives++;
134 
135 	if (*format == '%')
136 	  format++;
137 	else
138 	  {
139 	    bool brackets = false;
140 	    bool done = false;
141 	    unsigned int number = 0;
142 	    enum format_arg_type type = FAT_NONE;
143 
144 	    if (*format == '|')
145 	      {
146 		format++;
147 		brackets = true;
148 	      }
149 
150 	    if (isdigit (*format) && *format != '0')
151 	      {
152 		const char *f = format;
153 		unsigned int m = 0;
154 
155 		do
156 		  {
157 		    m = 10 * m + (*f - '0');
158 		    f++;
159 		  }
160 		while (isdigit (*f));
161 
162 		if ((!brackets && *f == '%') || *f == '$')
163 		  {
164 		    if (m == 0) /* can happen if m overflows */
165 		      {
166 			*invalid_reason = INVALID_ARGNO_0 (spec.directives);
167 			goto bad_format;
168 		      }
169 		    number = m;
170 		    if (*f == '%')
171 		      {
172 			type = FAT_ANY;
173 			done = true;
174 		      }
175 		    format = ++f;
176 		  }
177 	      }
178 
179 	    if (!done)
180 	      {
181 		/* Parse flags.  */
182 		for (;;)
183 		  {
184 		    if (*format == ' ' || *format == '+' || *format == '-'
185 			|| *format == '#' || *format == '0' || *format == '\''
186 			|| *format == '_' || *format == '=' || *format == 'h'
187 			|| *format == 'l')
188 		      format++;
189 		    else
190 		      break;
191 		  }
192 
193 		/* Parse width.  */
194 		if (*format == '*')
195 		  {
196 		    unsigned int width_number = 0;
197 
198 		    format++;
199 
200 		    if (isdigit (*format))
201 		      {
202 			const char *f = format;
203 			unsigned int m = 0;
204 
205 			do
206 			  {
207 			    m = 10 * m + (*f - '0');
208 			    f++;
209 			  }
210 			while (isdigit (*f));
211 
212 			if (*f == '$')
213 			  {
214 			    if (m == 0)
215 			      {
216 				*invalid_reason =
217 				  INVALID_WIDTH_ARGNO_0 (spec.directives);
218 				goto bad_format;
219 			      }
220 			    width_number = m;
221 			    format = ++f;
222 			  }
223 		      }
224 
225 		    if (width_number)
226 		      {
227 			/* Numbered argument.  */
228 
229 			/* Numbered and unnumbered specifications are
230 			   exclusive.  */
231 			if (unnumbered_arg_count > 0)
232 			  {
233 			    *invalid_reason =
234 			      INVALID_MIXES_NUMBERED_UNNUMBERED ();
235 			    goto bad_format;
236 			  }
237 
238 			if (spec.allocated == spec.numbered_arg_count)
239 			  {
240 			    spec.allocated = 2 * spec.allocated + 1;
241 			    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
242 			  }
243 			spec.numbered[spec.numbered_arg_count].number = width_number;
244 			spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
245 			spec.numbered_arg_count++;
246 		      }
247 		    else
248 		      {
249 			/* Unnumbered argument.  */
250 
251 			/* Numbered and unnumbered specifications are
252 			   exclusive.  */
253 			if (spec.numbered_arg_count > 0)
254 			  {
255 			    *invalid_reason =
256 			      INVALID_MIXES_NUMBERED_UNNUMBERED ();
257 			    goto bad_format;
258 			  }
259 
260 			if (spec.allocated == unnumbered_arg_count)
261 			  {
262 			    spec.allocated = 2 * spec.allocated + 1;
263 			    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
264 			  }
265 			spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
266 			spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
267 			unnumbered_arg_count++;
268 		      }
269 		  }
270 		else if (isdigit (*format))
271 		  {
272 		    do format++; while (isdigit (*format));
273 		  }
274 
275 		/* Parse precision.  */
276 		if (*format == '.')
277 		  {
278 		    format++;
279 
280 		    if (*format == '*')
281 		      {
282 			unsigned int precision_number = 0;
283 
284 			format++;
285 
286 			if (isdigit (*format))
287 			  {
288 			    const char *f = format;
289 			    unsigned int m = 0;
290 
291 			    do
292 			      {
293 				m = 10 * m + (*f - '0');
294 				f++;
295 			      }
296 			    while (isdigit (*f));
297 
298 			    if (*f == '$')
299 			      {
300 				if (m == 0)
301 				  {
302 				    *invalid_reason =
303 				      INVALID_PRECISION_ARGNO_0 (spec.directives);
304 				    goto bad_format;
305 				  }
306 				precision_number = m;
307 				format = ++f;
308 			      }
309 			  }
310 
311 			if (precision_number)
312 			  {
313 			    /* Numbered argument.  */
314 
315 			    /* Numbered and unnumbered specifications are
316 			       exclusive.  */
317 			    if (unnumbered_arg_count > 0)
318 			      {
319 				*invalid_reason =
320 				  INVALID_MIXES_NUMBERED_UNNUMBERED ();
321 				goto bad_format;
322 			      }
323 
324 			    if (spec.allocated == spec.numbered_arg_count)
325 			      {
326 				spec.allocated = 2 * spec.allocated + 1;
327 				spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
328 			      }
329 			    spec.numbered[spec.numbered_arg_count].number = precision_number;
330 			    spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
331 			    spec.numbered_arg_count++;
332 			  }
333 			else
334 			  {
335 			    /* Unnumbered argument.  */
336 
337 			    /* Numbered and unnumbered specifications are
338 			       exclusive.  */
339 			    if (spec.numbered_arg_count > 0)
340 			      {
341 				*invalid_reason =
342 				  INVALID_MIXES_NUMBERED_UNNUMBERED ();
343 				goto bad_format;
344 			      }
345 
346 			    if (spec.allocated == unnumbered_arg_count)
347 			      {
348 				spec.allocated = 2 * spec.allocated + 1;
349 				spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated  * sizeof (struct numbered_arg));
350 			      }
351 			    spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
352 			    spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
353 			    unnumbered_arg_count++;
354 			  }
355 		      }
356 		    else if (isdigit (*format))
357 		      {
358 			do format++; while (isdigit (*format));
359 		      }
360 		  }
361 
362 		/* Parse size.  */
363 		for (;;)
364 		  {
365 		    if (*format == 'h' || *format == 'l' || *format == 'L')
366 		      format++;
367 		    else
368 		      break;
369 		  }
370 
371 		switch (*format++)
372 		  {
373 		  case 'c': case 'C':
374 		    type = FAT_CHAR;
375 		    break;
376 		  case 's': case 'S':
377 		    type = FAT_ANY;
378 		    break;
379 		  case 'i': case 'd': case 'o': case 'u': case 'x': case 'X':
380 		    type = FAT_INTEGER;
381 		    break;
382 		  case 'e': case 'E': case 'f': case 'g': case 'G':
383 		    type = FAT_DOUBLE;
384 		    break;
385 		  case 'p':
386 		    type = FAT_POINTER;
387 		    break;
388 		  case 't':
389 		    type = FAT_NONE;
390 		    break;
391 		  case 'T':
392 		    if (*format == '\0')
393 		      {
394 			*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
395 			goto bad_format;
396 		      }
397 		    format++;
398 		    type = FAT_NONE;
399 		    break;
400 		  case 'n':
401 		    type = FAT_NONE;
402 		    break;
403 		  case '|':
404 		    if (brackets)
405 		      {
406 			--format;
407 			type = FAT_ANY;
408 			break;
409 		      }
410 		    /*FALLTHROUGH*/
411 		  default:
412 		    --format;
413 		    *invalid_reason =
414 		      (*format == '\0'
415 		       ? INVALID_UNTERMINATED_DIRECTIVE ()
416 		       : INVALID_CONVERSION_SPECIFIER (spec.directives,
417 						       *format));
418 		    goto bad_format;
419 		  }
420 		if (brackets)
421 		  {
422 		    if (*format != '|')
423 		      {
424 			*invalid_reason =
425 			  (*format == '\0'
426 			   ? INVALID_UNTERMINATED_DIRECTIVE ()
427 			   : xasprintf (_("The directive number %u starts with | but does not end with |."),
428 					spec.directives));
429 			goto bad_format;
430 		      }
431 		    format++;
432 		  }
433 	      }
434 
435 	    if (type != FAT_NONE)
436 	      {
437 		if (number)
438 		  {
439 		    /* Numbered argument.  */
440 
441 		    /* Numbered and unnumbered specifications are exclusive.  */
442 		    if (unnumbered_arg_count > 0)
443 		      {
444 			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
445 			goto bad_format;
446 		      }
447 
448 		    if (spec.allocated == spec.numbered_arg_count)
449 		      {
450 			spec.allocated = 2 * spec.allocated + 1;
451 			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
452 		      }
453 		    spec.numbered[spec.numbered_arg_count].number = number;
454 		    spec.numbered[spec.numbered_arg_count].type = type;
455 		    spec.numbered_arg_count++;
456 		  }
457 		else
458 		  {
459 		    /* Unnumbered argument.  */
460 
461 		    /* Numbered and unnumbered specifications are exclusive.  */
462 		    if (spec.numbered_arg_count > 0)
463 		      {
464 			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
465 			goto bad_format;
466 		      }
467 
468 		    if (spec.allocated == unnumbered_arg_count)
469 		      {
470 			spec.allocated = 2 * spec.allocated + 1;
471 			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
472 		      }
473 		    spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
474 		    spec.numbered[unnumbered_arg_count].type = type;
475 		    unnumbered_arg_count++;
476 		  }
477 	      }
478 	  }
479       }
480 
481   /* Convert the unnumbered argument array to numbered arguments.  */
482   if (unnumbered_arg_count > 0)
483     spec.numbered_arg_count = unnumbered_arg_count;
484   /* Sort the numbered argument array, and eliminate duplicates.  */
485   else if (spec.numbered_arg_count > 1)
486     {
487       unsigned int i, j;
488       bool err;
489 
490       qsort (spec.numbered, spec.numbered_arg_count,
491 	     sizeof (struct numbered_arg), numbered_arg_compare);
492 
493       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
494       err = false;
495       for (i = j = 0; i < spec.numbered_arg_count; i++)
496 	if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
497 	  {
498 	    enum format_arg_type type1 = spec.numbered[i].type;
499 	    enum format_arg_type type2 = spec.numbered[j-1].type;
500 	    enum format_arg_type type_both;
501 
502 	    if (type1 == type2 || type2 == FAT_ANY)
503 	      type_both = type1;
504 	    else if (type1 == FAT_ANY)
505 	      type_both = type2;
506 	    else
507 	      {
508 		/* Incompatible types.  */
509 		type_both = FAT_NONE;
510 		if (!err)
511 		  *invalid_reason =
512 		    INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
513 		err = true;
514 	      }
515 
516 	    spec.numbered[j-1].type = type_both;
517 	  }
518 	else
519 	  {
520 	    if (j < i)
521 	      {
522 		spec.numbered[j].number = spec.numbered[i].number;
523 		spec.numbered[j].type = spec.numbered[i].type;
524 	      }
525 	    j++;
526 	  }
527       spec.numbered_arg_count = j;
528       if (err)
529 	/* *invalid_reason has already been set above.  */
530 	goto bad_format;
531     }
532 
533   result = (struct spec *) xmalloc (sizeof (struct spec));
534   *result = spec;
535   return result;
536 
537  bad_format:
538   if (spec.numbered != NULL)
539     free (spec.numbered);
540   return NULL;
541 }
542 
543 static void
format_free(void * descr)544 format_free (void *descr)
545 {
546   struct spec *spec = (struct spec *) descr;
547 
548   if (spec->numbered != NULL)
549     free (spec->numbered);
550   free (spec);
551 }
552 
553 static int
format_get_number_of_directives(void * descr)554 format_get_number_of_directives (void *descr)
555 {
556   struct spec *spec = (struct spec *) descr;
557 
558   return spec->directives;
559 }
560 
561 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)562 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
563 	      formatstring_error_logger_t error_logger,
564 	      const char *pretty_msgstr)
565 {
566   struct spec *spec1 = (struct spec *) msgid_descr;
567   struct spec *spec2 = (struct spec *) msgstr_descr;
568   bool err = false;
569 
570   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
571     {
572       unsigned int i, j;
573       unsigned int n1 = spec1->numbered_arg_count;
574       unsigned int n2 = spec2->numbered_arg_count;
575 
576       /* Check the argument names are the same.
577 	 Both arrays are sorted.  We search for the first difference.  */
578       for (i = 0, j = 0; i < n1 || j < n2; )
579 	{
580 	  int cmp = (i >= n1 ? 1 :
581 		     j >= n2 ? -1 :
582 		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
583 		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
584 		     0);
585 
586 	  if (cmp > 0)
587 	    {
588 	      if (error_logger)
589 		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
590 			      spec2->numbered[j].number, pretty_msgstr);
591 	      err = true;
592 	      break;
593 	    }
594 	  else if (cmp < 0)
595 	    {
596 	      if (equality)
597 		{
598 		  if (error_logger)
599 		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
600 				  spec1->numbered[i].number, pretty_msgstr);
601 		  err = true;
602 		  break;
603 		}
604 	      else
605 		i++;
606 	    }
607 	  else
608 	    j++, i++;
609 	}
610       /* Check the argument types are the same.  */
611       if (!err)
612 	for (i = 0, j = 0; j < n2; )
613 	  {
614 	    if (spec1->numbered[i].number == spec2->numbered[j].number)
615 	      {
616 		if (spec1->numbered[i].type != spec2->numbered[j].type)
617 		  {
618 		    if (error_logger)
619 		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
620 				    pretty_msgstr, spec2->numbered[j].number);
621 		    err = true;
622 		    break;
623 		  }
624 		j++, i++;
625 	      }
626 	    else
627 	      i++;
628 	  }
629     }
630 
631   return err;
632 }
633 
634 
635 struct formatstring_parser formatstring_boost =
636 {
637   format_parse,
638   format_free,
639   format_get_number_of_directives,
640   NULL,
641   format_check
642 };
643 
644 
645 #ifdef TEST
646 
647 /* Test program: Print the argument list specification returned by
648    format_parse for strings read from standard input.  */
649 
650 #include <stdio.h>
651 #include "getline.h"
652 
653 static void
format_print(void * descr)654 format_print (void *descr)
655 {
656   struct spec *spec = (struct spec *) descr;
657   unsigned int last;
658   unsigned int i;
659 
660   if (spec == NULL)
661     {
662       printf ("INVALID");
663       return;
664     }
665 
666   printf ("(");
667   last = 1;
668   for (i = 0; i < spec->numbered_arg_count; i++)
669     {
670       unsigned int number = spec->numbered[i].number;
671 
672       if (i > 0)
673 	printf (" ");
674       if (number < last)
675 	abort ();
676       for (; last < number; last++)
677 	printf ("_ ");
678       switch (spec->numbered[i].type)
679 	{
680 	case FAT_INTEGER:
681 	  printf ("i");
682 	  break;
683 	case FAT_DOUBLE:
684 	  printf ("f");
685 	  break;
686 	case FAT_CHAR:
687 	  printf ("c");
688 	  break;
689 	case FAT_POINTER:
690 	  printf ("p");
691 	  break;
692 	case FAT_ANY:
693 	  printf ("*");
694 	  break;
695 	default:
696 	  abort ();
697 	}
698       last = number + 1;
699     }
700   printf (")");
701 }
702 
703 int
main()704 main ()
705 {
706   for (;;)
707     {
708       char *line = NULL;
709       size_t line_size = 0;
710       int line_len;
711       char *invalid_reason;
712       void *descr;
713 
714       line_len = getline (&line, &line_size, stdin);
715       if (line_len < 0)
716 	break;
717       if (line_len > 0 && line[line_len - 1] == '\n')
718 	line[--line_len] = '\0';
719 
720       invalid_reason = NULL;
721       descr = format_parse (line, false, &invalid_reason);
722 
723       format_print (descr);
724       printf ("\n");
725       if (descr == NULL)
726 	printf ("%s\n", invalid_reason);
727 
728       free (invalid_reason);
729       free (line);
730     }
731 
732   return 0;
733 }
734 
735 /*
736  * For Emacs M-x compile
737  * Local Variables:
738  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-boost.c ../lib/libgettextlib.la"
739  * End:
740  */
741 
742 #endif /* TEST */
743 
744