1*946379e7Schristos /* Java format strings.
2*946379e7Schristos Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3*946379e7Schristos Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4*946379e7Schristos
5*946379e7Schristos This program is free software; you can redistribute it and/or modify
6*946379e7Schristos it under the terms of the GNU General Public License as published by
7*946379e7Schristos the Free Software Foundation; either version 2, or (at your option)
8*946379e7Schristos any later version.
9*946379e7Schristos
10*946379e7Schristos This program is distributed in the hope that it will be useful,
11*946379e7Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
12*946379e7Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13*946379e7Schristos GNU General Public License for more details.
14*946379e7Schristos
15*946379e7Schristos You should have received a copy of the GNU General Public License
16*946379e7Schristos along with this program; if not, write to the Free Software Foundation,
17*946379e7Schristos Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18*946379e7Schristos
19*946379e7Schristos #ifdef HAVE_CONFIG_H
20*946379e7Schristos # include <config.h>
21*946379e7Schristos #endif
22*946379e7Schristos #include <alloca.h>
23*946379e7Schristos
24*946379e7Schristos #include <stdbool.h>
25*946379e7Schristos #include <stdlib.h>
26*946379e7Schristos #include <string.h>
27*946379e7Schristos
28*946379e7Schristos #include "format.h"
29*946379e7Schristos #include "c-ctype.h"
30*946379e7Schristos #include "xalloc.h"
31*946379e7Schristos #include "xallocsa.h"
32*946379e7Schristos #include "xvasprintf.h"
33*946379e7Schristos #include "format-invalid.h"
34*946379e7Schristos #include "gettext.h"
35*946379e7Schristos
36*946379e7Schristos #define _(str) gettext (str)
37*946379e7Schristos
38*946379e7Schristos /* Java format strings are described in java/text/MessageFormat.html.
39*946379e7Schristos See also the ICU documentation class_MessageFormat.html.
40*946379e7Schristos
41*946379e7Schristos messageFormatPattern := string ( "{" messageFormatElement "}" string )*
42*946379e7Schristos
43*946379e7Schristos messageFormatElement := argument { "," elementFormat }
44*946379e7Schristos
45*946379e7Schristos elementFormat := "time" { "," datetimeStyle }
46*946379e7Schristos | "date" { "," datetimeStyle }
47*946379e7Schristos | "number" { "," numberStyle }
48*946379e7Schristos | "choice" { "," choiceStyle }
49*946379e7Schristos
50*946379e7Schristos datetimeStyle := "short"
51*946379e7Schristos | "medium"
52*946379e7Schristos | "long"
53*946379e7Schristos | "full"
54*946379e7Schristos | dateFormatPattern
55*946379e7Schristos
56*946379e7Schristos numberStyle := "currency"
57*946379e7Schristos | "percent"
58*946379e7Schristos | "integer"
59*946379e7Schristos | numberFormatPattern
60*946379e7Schristos
61*946379e7Schristos choiceStyle := choiceFormatPattern
62*946379e7Schristos
63*946379e7Schristos dateFormatPattern see SimpleDateFormat.applyPattern
64*946379e7Schristos
65*946379e7Schristos numberFormatPattern see DecimalFormat.applyPattern
66*946379e7Schristos
67*946379e7Schristos choiceFormatPattern see ChoiceFormat constructor
68*946379e7Schristos
69*946379e7Schristos In strings, literal curly braces can be used if quoted between single
70*946379e7Schristos quotes. A real single quote is represented by ''.
71*946379e7Schristos
72*946379e7Schristos If a pattern is used, then unquoted braces in the pattern, if any, must
73*946379e7Schristos match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab {0'}' de" and
74*946379e7Schristos "ab } de" are not.
75*946379e7Schristos
76*946379e7Schristos The argument is a number from 0 to 9, which corresponds to the arguments
77*946379e7Schristos presented in an array to be formatted.
78*946379e7Schristos
79*946379e7Schristos It is ok to have unused arguments in the array.
80*946379e7Schristos
81*946379e7Schristos Adding a dateFormatPattern / numberFormatPattern / choiceFormatPattern
82*946379e7Schristos to an elementFormat is equivalent to creating a SimpleDateFormat /
83*946379e7Schristos DecimalFormat / ChoiceFormat and use of setFormat. For example,
84*946379e7Schristos
85*946379e7Schristos MessageFormat form =
86*946379e7Schristos new MessageFormat("The disk \"{1}\" contains {0,choice,0#no files|1#one file|2#{0,number} files}.");
87*946379e7Schristos
88*946379e7Schristos is equivalent to
89*946379e7Schristos
90*946379e7Schristos MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
91*946379e7Schristos form.setFormat(1, // Number of {} occurrence in the string!
92*946379e7Schristos new ChoiceFormat(new double[] { 0, 1, 2 },
93*946379e7Schristos new String[] { "no files", "one file",
94*946379e7Schristos "{0,number} files" }));
95*946379e7Schristos
96*946379e7Schristos Note: The behaviour of quotes inside a choiceFormatPattern is not clear.
97*946379e7Schristos Example 1:
98*946379e7Schristos "abc{1,choice,0#{1,number,00';'000}}def"
99*946379e7Schristos JDK 1.1.x: exception
100*946379e7Schristos JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;000}}def"
101*946379e7Schristos Example 2:
102*946379e7Schristos "abc{1,choice,0#{1,number,00';'}}def"
103*946379e7Schristos JDK 1.1.x: interprets the semicolon as number suffix
104*946379e7Schristos JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;}}def"
105*946379e7Schristos */
106*946379e7Schristos
107*946379e7Schristos enum format_arg_type
108*946379e7Schristos {
109*946379e7Schristos FAT_NONE,
110*946379e7Schristos FAT_OBJECT, /* java.lang.Object */
111*946379e7Schristos FAT_NUMBER, /* java.lang.Number */
112*946379e7Schristos FAT_DATE /* java.util.Date */
113*946379e7Schristos };
114*946379e7Schristos
115*946379e7Schristos struct numbered_arg
116*946379e7Schristos {
117*946379e7Schristos unsigned int number;
118*946379e7Schristos enum format_arg_type type;
119*946379e7Schristos };
120*946379e7Schristos
121*946379e7Schristos struct spec
122*946379e7Schristos {
123*946379e7Schristos unsigned int directives;
124*946379e7Schristos unsigned int numbered_arg_count;
125*946379e7Schristos unsigned int allocated;
126*946379e7Schristos struct numbered_arg *numbered;
127*946379e7Schristos };
128*946379e7Schristos
129*946379e7Schristos
130*946379e7Schristos /* Forward declaration of local functions. */
131*946379e7Schristos static bool date_format_parse (const char *format);
132*946379e7Schristos static bool number_format_parse (const char *format);
133*946379e7Schristos static bool choice_format_parse (const char *format, struct spec *spec,
134*946379e7Schristos char **invalid_reason);
135*946379e7Schristos
136*946379e7Schristos
137*946379e7Schristos /* Quote handling:
138*946379e7Schristos - When we see a single-quote, ignore it, but toggle the quoting flag.
139*946379e7Schristos - When we see a double single-quote, ignore the first of the two.
140*946379e7Schristos Assumes local variables format, quoting. */
141*946379e7Schristos #define HANDLE_QUOTE \
142*946379e7Schristos if (*format == '\'' && *++format != '\'') \
143*946379e7Schristos quoting = !quoting;
144*946379e7Schristos
145*946379e7Schristos /* Note that message_format_parse and choice_format_parse are mutually
146*946379e7Schristos recursive. This is because MessageFormat can use some ChoiceFormats,
147*946379e7Schristos and a ChoiceFormat is made up from several MessageFormats. */
148*946379e7Schristos
149*946379e7Schristos /* Return true if a format is a valid messageFormatPattern.
150*946379e7Schristos Extracts argument type information into spec. */
151*946379e7Schristos static bool
message_format_parse(const char * format,struct spec * spec,char ** invalid_reason)152*946379e7Schristos message_format_parse (const char *format, struct spec *spec,
153*946379e7Schristos char **invalid_reason)
154*946379e7Schristos {
155*946379e7Schristos bool quoting = false;
156*946379e7Schristos
157*946379e7Schristos for (;;)
158*946379e7Schristos {
159*946379e7Schristos HANDLE_QUOTE;
160*946379e7Schristos if (!quoting && *format == '{')
161*946379e7Schristos {
162*946379e7Schristos unsigned int depth;
163*946379e7Schristos const char *element_start;
164*946379e7Schristos const char *element_end;
165*946379e7Schristos size_t n;
166*946379e7Schristos char *element_alloced;
167*946379e7Schristos char *element;
168*946379e7Schristos unsigned int number;
169*946379e7Schristos enum format_arg_type type;
170*946379e7Schristos
171*946379e7Schristos spec->directives++;
172*946379e7Schristos
173*946379e7Schristos element_start = ++format;
174*946379e7Schristos depth = 0;
175*946379e7Schristos for (; *format != '\0'; format++)
176*946379e7Schristos {
177*946379e7Schristos if (*format == '{')
178*946379e7Schristos depth++;
179*946379e7Schristos else if (*format == '}')
180*946379e7Schristos {
181*946379e7Schristos if (depth == 0)
182*946379e7Schristos break;
183*946379e7Schristos else
184*946379e7Schristos depth--;
185*946379e7Schristos }
186*946379e7Schristos }
187*946379e7Schristos if (*format == '\0')
188*946379e7Schristos {
189*946379e7Schristos *invalid_reason =
190*946379e7Schristos xstrdup (_("The string ends in the middle of a directive: found '{' without matching '}'."));
191*946379e7Schristos return false;
192*946379e7Schristos }
193*946379e7Schristos element_end = format++;
194*946379e7Schristos
195*946379e7Schristos n = element_end - element_start;
196*946379e7Schristos element = element_alloced = (char *) xallocsa (n + 1);
197*946379e7Schristos memcpy (element, element_start, n);
198*946379e7Schristos element[n] = '\0';
199*946379e7Schristos
200*946379e7Schristos if (!c_isdigit (*element))
201*946379e7Schristos {
202*946379e7Schristos *invalid_reason =
203*946379e7Schristos xasprintf (_("In the directive number %u, '{' is not followed by an argument number."), spec->directives);
204*946379e7Schristos freesa (element_alloced);
205*946379e7Schristos return false;
206*946379e7Schristos }
207*946379e7Schristos number = 0;
208*946379e7Schristos do
209*946379e7Schristos {
210*946379e7Schristos number = 10 * number + (*element - '0');
211*946379e7Schristos element++;
212*946379e7Schristos }
213*946379e7Schristos while (c_isdigit (*element));
214*946379e7Schristos
215*946379e7Schristos type = FAT_OBJECT;
216*946379e7Schristos if (*element == '\0')
217*946379e7Schristos ;
218*946379e7Schristos else if (strncmp (element, ",time", 5) == 0
219*946379e7Schristos || strncmp (element, ",date", 5) == 0)
220*946379e7Schristos {
221*946379e7Schristos type = FAT_DATE;
222*946379e7Schristos element += 5;
223*946379e7Schristos if (*element == '\0')
224*946379e7Schristos ;
225*946379e7Schristos else if (*element == ',')
226*946379e7Schristos {
227*946379e7Schristos element++;
228*946379e7Schristos if (strcmp (element, "short") == 0
229*946379e7Schristos || strcmp (element, "medium") == 0
230*946379e7Schristos || strcmp (element, "long") == 0
231*946379e7Schristos || strcmp (element, "full") == 0
232*946379e7Schristos || date_format_parse (element))
233*946379e7Schristos ;
234*946379e7Schristos else
235*946379e7Schristos {
236*946379e7Schristos *invalid_reason =
237*946379e7Schristos xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid date/time style."), spec->directives, element);
238*946379e7Schristos freesa (element_alloced);
239*946379e7Schristos return false;
240*946379e7Schristos }
241*946379e7Schristos }
242*946379e7Schristos else
243*946379e7Schristos {
244*946379e7Schristos *element = '\0';
245*946379e7Schristos element -= 4;
246*946379e7Schristos *invalid_reason =
247*946379e7Schristos xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
248*946379e7Schristos freesa (element_alloced);
249*946379e7Schristos return false;
250*946379e7Schristos }
251*946379e7Schristos }
252*946379e7Schristos else if (strncmp (element, ",number", 7) == 0)
253*946379e7Schristos {
254*946379e7Schristos type = FAT_NUMBER;
255*946379e7Schristos element += 7;
256*946379e7Schristos if (*element == '\0')
257*946379e7Schristos ;
258*946379e7Schristos else if (*element == ',')
259*946379e7Schristos {
260*946379e7Schristos element++;
261*946379e7Schristos if (strcmp (element, "currency") == 0
262*946379e7Schristos || strcmp (element, "percent") == 0
263*946379e7Schristos || strcmp (element, "integer") == 0
264*946379e7Schristos || number_format_parse (element))
265*946379e7Schristos ;
266*946379e7Schristos else
267*946379e7Schristos {
268*946379e7Schristos *invalid_reason =
269*946379e7Schristos xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid number style."), spec->directives, element);
270*946379e7Schristos freesa (element_alloced);
271*946379e7Schristos return false;
272*946379e7Schristos }
273*946379e7Schristos }
274*946379e7Schristos else
275*946379e7Schristos {
276*946379e7Schristos *element = '\0';
277*946379e7Schristos element -= 6;
278*946379e7Schristos *invalid_reason =
279*946379e7Schristos xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
280*946379e7Schristos freesa (element_alloced);
281*946379e7Schristos return false;
282*946379e7Schristos }
283*946379e7Schristos }
284*946379e7Schristos else if (strncmp (element, ",choice", 7) == 0)
285*946379e7Schristos {
286*946379e7Schristos type = FAT_NUMBER; /* because ChoiceFormat extends NumberFormat */
287*946379e7Schristos element += 7;
288*946379e7Schristos if (*element == '\0')
289*946379e7Schristos ;
290*946379e7Schristos else if (*element == ',')
291*946379e7Schristos {
292*946379e7Schristos element++;
293*946379e7Schristos if (choice_format_parse (element, spec, invalid_reason))
294*946379e7Schristos ;
295*946379e7Schristos else
296*946379e7Schristos {
297*946379e7Schristos freesa (element_alloced);
298*946379e7Schristos return false;
299*946379e7Schristos }
300*946379e7Schristos }
301*946379e7Schristos else
302*946379e7Schristos {
303*946379e7Schristos *element = '\0';
304*946379e7Schristos element -= 6;
305*946379e7Schristos *invalid_reason =
306*946379e7Schristos xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
307*946379e7Schristos freesa (element_alloced);
308*946379e7Schristos return false;
309*946379e7Schristos }
310*946379e7Schristos }
311*946379e7Schristos else
312*946379e7Schristos {
313*946379e7Schristos *invalid_reason =
314*946379e7Schristos xasprintf (_("In the directive number %u, the argument number is not followed by a comma and one of \"%s\", \"%s\", \"%s\", \"%s\"."), spec->directives, "time", "date", "number", "choice");
315*946379e7Schristos freesa (element_alloced);
316*946379e7Schristos return false;
317*946379e7Schristos }
318*946379e7Schristos freesa (element_alloced);
319*946379e7Schristos
320*946379e7Schristos if (spec->allocated == spec->numbered_arg_count)
321*946379e7Schristos {
322*946379e7Schristos spec->allocated = 2 * spec->allocated + 1;
323*946379e7Schristos spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, spec->allocated * sizeof (struct numbered_arg));
324*946379e7Schristos }
325*946379e7Schristos spec->numbered[spec->numbered_arg_count].number = number;
326*946379e7Schristos spec->numbered[spec->numbered_arg_count].type = type;
327*946379e7Schristos spec->numbered_arg_count++;
328*946379e7Schristos }
329*946379e7Schristos /* The doc says "ab}de" is invalid. Even though JDK accepts it. */
330*946379e7Schristos else if (!quoting && *format == '}')
331*946379e7Schristos {
332*946379e7Schristos *invalid_reason =
333*946379e7Schristos xstrdup (_("The string starts in the middle of a directive: found '}' without matching '{'."));
334*946379e7Schristos return false;
335*946379e7Schristos }
336*946379e7Schristos else if (*format != '\0')
337*946379e7Schristos format++;
338*946379e7Schristos else
339*946379e7Schristos break;
340*946379e7Schristos }
341*946379e7Schristos
342*946379e7Schristos return true;
343*946379e7Schristos }
344*946379e7Schristos
345*946379e7Schristos /* Return true if a format is a valid dateFormatPattern. */
346*946379e7Schristos static bool
date_format_parse(const char * format)347*946379e7Schristos date_format_parse (const char *format)
348*946379e7Schristos {
349*946379e7Schristos /* Any string is valid. Single-quote starts a quoted section, to be
350*946379e7Schristos terminated at the next single-quote or string end. Double single-quote
351*946379e7Schristos gives a single single-quote. Non-quoted ASCII letters are first grouped
352*946379e7Schristos into blocks of equal letters. Then each block (e.g. 'yyyy') is
353*946379e7Schristos interpreted according to some rules. */
354*946379e7Schristos return true;
355*946379e7Schristos }
356*946379e7Schristos
357*946379e7Schristos /* Return true if a format is a valid numberFormatPattern. */
358*946379e7Schristos static bool
number_format_parse(const char * format)359*946379e7Schristos number_format_parse (const char *format)
360*946379e7Schristos {
361*946379e7Schristos /* Pattern Syntax:
362*946379e7Schristos pattern := pos_pattern{';' neg_pattern}
363*946379e7Schristos pos_pattern := {prefix}number{suffix}
364*946379e7Schristos neg_pattern := {prefix}number{suffix}
365*946379e7Schristos number := integer{'.' fraction}{exponent}
366*946379e7Schristos prefix := '\u0000'..'\uFFFD' - special_characters
367*946379e7Schristos suffix := '\u0000'..'\uFFFD' - special_characters
368*946379e7Schristos integer := min_int | '#' | '#' integer | '#' ',' integer
369*946379e7Schristos min_int := '0' | '0' min_int | '0' ',' min_int
370*946379e7Schristos fraction := '0'* '#'*
371*946379e7Schristos exponent := 'E' '0' '0'*
372*946379e7Schristos Notation:
373*946379e7Schristos X* 0 or more instances of X
374*946379e7Schristos { X } 0 or 1 instances of X
375*946379e7Schristos X | Y either X or Y
376*946379e7Schristos X..Y any character from X up to Y, inclusive
377*946379e7Schristos S - T characters in S, except those in T
378*946379e7Schristos Single-quote starts a quoted section, to be terminated at the next
379*946379e7Schristos single-quote or string end. Double single-quote gives a single
380*946379e7Schristos single-quote.
381*946379e7Schristos */
382*946379e7Schristos bool quoting = false;
383*946379e7Schristos bool seen_semicolon = false;
384*946379e7Schristos
385*946379e7Schristos HANDLE_QUOTE;
386*946379e7Schristos for (;;)
387*946379e7Schristos {
388*946379e7Schristos /* Parse prefix. */
389*946379e7Schristos while (*format != '\0'
390*946379e7Schristos && !(!quoting && (*format == '0' || *format == '#')))
391*946379e7Schristos {
392*946379e7Schristos if (format[0] == '\\')
393*946379e7Schristos {
394*946379e7Schristos if (format[1] == 'u'
395*946379e7Schristos && c_isxdigit (format[2])
396*946379e7Schristos && c_isxdigit (format[3])
397*946379e7Schristos && c_isxdigit (format[4])
398*946379e7Schristos && c_isxdigit (format[5]))
399*946379e7Schristos format += 6;
400*946379e7Schristos else
401*946379e7Schristos format += 2;
402*946379e7Schristos }
403*946379e7Schristos else
404*946379e7Schristos format += 1;
405*946379e7Schristos HANDLE_QUOTE;
406*946379e7Schristos }
407*946379e7Schristos
408*946379e7Schristos /* Parse integer. */
409*946379e7Schristos if (!(!quoting && (*format == '0' || *format == '#')))
410*946379e7Schristos return false;
411*946379e7Schristos while (!quoting && *format == '#')
412*946379e7Schristos {
413*946379e7Schristos format++;
414*946379e7Schristos HANDLE_QUOTE;
415*946379e7Schristos if (!quoting && *format == ',')
416*946379e7Schristos {
417*946379e7Schristos format++;
418*946379e7Schristos HANDLE_QUOTE;
419*946379e7Schristos }
420*946379e7Schristos }
421*946379e7Schristos while (!quoting && *format == '0')
422*946379e7Schristos {
423*946379e7Schristos format++;
424*946379e7Schristos HANDLE_QUOTE;
425*946379e7Schristos if (!quoting && *format == ',')
426*946379e7Schristos {
427*946379e7Schristos format++;
428*946379e7Schristos HANDLE_QUOTE;
429*946379e7Schristos }
430*946379e7Schristos }
431*946379e7Schristos
432*946379e7Schristos /* Parse fraction. */
433*946379e7Schristos if (!quoting && *format == '.')
434*946379e7Schristos {
435*946379e7Schristos format++;
436*946379e7Schristos HANDLE_QUOTE;
437*946379e7Schristos while (!quoting && *format == '0')
438*946379e7Schristos {
439*946379e7Schristos format++;
440*946379e7Schristos HANDLE_QUOTE;
441*946379e7Schristos }
442*946379e7Schristos while (!quoting && *format == '#')
443*946379e7Schristos {
444*946379e7Schristos format++;
445*946379e7Schristos HANDLE_QUOTE;
446*946379e7Schristos }
447*946379e7Schristos }
448*946379e7Schristos
449*946379e7Schristos /* Parse exponent. */
450*946379e7Schristos if (!quoting && *format == 'E')
451*946379e7Schristos {
452*946379e7Schristos const char *format_save = format;
453*946379e7Schristos format++;
454*946379e7Schristos HANDLE_QUOTE;
455*946379e7Schristos if (!quoting && *format == '0')
456*946379e7Schristos {
457*946379e7Schristos do
458*946379e7Schristos {
459*946379e7Schristos format++;
460*946379e7Schristos HANDLE_QUOTE;
461*946379e7Schristos }
462*946379e7Schristos while (!quoting && *format == '0');
463*946379e7Schristos }
464*946379e7Schristos else
465*946379e7Schristos {
466*946379e7Schristos /* Back up. */
467*946379e7Schristos format = format_save;
468*946379e7Schristos quoting = false;
469*946379e7Schristos }
470*946379e7Schristos }
471*946379e7Schristos
472*946379e7Schristos /* Parse suffix. */
473*946379e7Schristos while (*format != '\0'
474*946379e7Schristos && (seen_semicolon || !(!quoting && *format == ';')))
475*946379e7Schristos {
476*946379e7Schristos if (format[0] == '\\')
477*946379e7Schristos {
478*946379e7Schristos if (format[1] == 'u'
479*946379e7Schristos && c_isxdigit (format[2])
480*946379e7Schristos && c_isxdigit (format[3])
481*946379e7Schristos && c_isxdigit (format[4])
482*946379e7Schristos && c_isxdigit (format[5]))
483*946379e7Schristos format += 6;
484*946379e7Schristos else
485*946379e7Schristos format += 2;
486*946379e7Schristos }
487*946379e7Schristos else
488*946379e7Schristos format += 1;
489*946379e7Schristos HANDLE_QUOTE;
490*946379e7Schristos }
491*946379e7Schristos
492*946379e7Schristos if (seen_semicolon || !(!quoting && *format == ';'))
493*946379e7Schristos break;
494*946379e7Schristos }
495*946379e7Schristos
496*946379e7Schristos return (*format == '\0');
497*946379e7Schristos }
498*946379e7Schristos
499*946379e7Schristos /* Return true if a format is a valid choiceFormatPattern.
500*946379e7Schristos Extracts argument type information into spec. */
501*946379e7Schristos static bool
choice_format_parse(const char * format,struct spec * spec,char ** invalid_reason)502*946379e7Schristos choice_format_parse (const char *format, struct spec *spec,
503*946379e7Schristos char **invalid_reason)
504*946379e7Schristos {
505*946379e7Schristos /* Pattern syntax:
506*946379e7Schristos pattern := | choice | choice '|' pattern
507*946379e7Schristos choice := number separator messageformat
508*946379e7Schristos separator := '<' | '#' | '\u2264'
509*946379e7Schristos Single-quote starts a quoted section, to be terminated at the next
510*946379e7Schristos single-quote or string end. Double single-quote gives a single
511*946379e7Schristos single-quote.
512*946379e7Schristos */
513*946379e7Schristos bool quoting = false;
514*946379e7Schristos
515*946379e7Schristos HANDLE_QUOTE;
516*946379e7Schristos if (*format == '\0')
517*946379e7Schristos return true;
518*946379e7Schristos for (;;)
519*946379e7Schristos {
520*946379e7Schristos /* Don't bother looking too precisely into the syntax of the number.
521*946379e7Schristos It can contain various Unicode characters. */
522*946379e7Schristos bool number_nonempty;
523*946379e7Schristos char *msgformat;
524*946379e7Schristos char *mp;
525*946379e7Schristos bool msgformat_valid;
526*946379e7Schristos
527*946379e7Schristos /* Parse number. */
528*946379e7Schristos number_nonempty = false;
529*946379e7Schristos while (*format != '\0'
530*946379e7Schristos && !(!quoting && (*format == '<' || *format == '#'
531*946379e7Schristos || strncmp (format, "\\u2264", 6) == 0
532*946379e7Schristos || *format == '|')))
533*946379e7Schristos {
534*946379e7Schristos if (format[0] == '\\')
535*946379e7Schristos {
536*946379e7Schristos if (format[1] == 'u'
537*946379e7Schristos && c_isxdigit (format[2])
538*946379e7Schristos && c_isxdigit (format[3])
539*946379e7Schristos && c_isxdigit (format[4])
540*946379e7Schristos && c_isxdigit (format[5]))
541*946379e7Schristos format += 6;
542*946379e7Schristos else
543*946379e7Schristos format += 2;
544*946379e7Schristos }
545*946379e7Schristos else
546*946379e7Schristos format += 1;
547*946379e7Schristos number_nonempty = true;
548*946379e7Schristos HANDLE_QUOTE;
549*946379e7Schristos }
550*946379e7Schristos
551*946379e7Schristos /* Short clause at end of pattern is valid and is ignored! */
552*946379e7Schristos if (*format == '\0')
553*946379e7Schristos break;
554*946379e7Schristos
555*946379e7Schristos if (!number_nonempty)
556*946379e7Schristos {
557*946379e7Schristos *invalid_reason =
558*946379e7Schristos xasprintf (_("In the directive number %u, a choice contains no number."), spec->directives);
559*946379e7Schristos return false;
560*946379e7Schristos }
561*946379e7Schristos
562*946379e7Schristos if (*format == '<' || *format == '#')
563*946379e7Schristos format += 1;
564*946379e7Schristos else if (strncmp (format, "\\u2264", 6) == 0)
565*946379e7Schristos format += 6;
566*946379e7Schristos else
567*946379e7Schristos {
568*946379e7Schristos *invalid_reason =
569*946379e7Schristos xasprintf (_("In the directive number %u, a choice contains a number that is not followed by '<', '#' or '%s'."), spec->directives, "\\u2264");
570*946379e7Schristos return false;
571*946379e7Schristos }
572*946379e7Schristos HANDLE_QUOTE;
573*946379e7Schristos
574*946379e7Schristos msgformat = (char *) xallocsa (strlen (format) + 1);
575*946379e7Schristos mp = msgformat;
576*946379e7Schristos
577*946379e7Schristos while (*format != '\0' && !(!quoting && *format == '|'))
578*946379e7Schristos {
579*946379e7Schristos *mp++ = *format++;
580*946379e7Schristos HANDLE_QUOTE;
581*946379e7Schristos }
582*946379e7Schristos *mp = '\0';
583*946379e7Schristos
584*946379e7Schristos msgformat_valid = message_format_parse (msgformat, spec, invalid_reason);
585*946379e7Schristos
586*946379e7Schristos freesa (msgformat);
587*946379e7Schristos
588*946379e7Schristos if (!msgformat_valid)
589*946379e7Schristos return false;
590*946379e7Schristos
591*946379e7Schristos if (*format == '\0')
592*946379e7Schristos break;
593*946379e7Schristos
594*946379e7Schristos format++;
595*946379e7Schristos HANDLE_QUOTE;
596*946379e7Schristos }
597*946379e7Schristos
598*946379e7Schristos return true;
599*946379e7Schristos }
600*946379e7Schristos
601*946379e7Schristos static int
numbered_arg_compare(const void * p1,const void * p2)602*946379e7Schristos numbered_arg_compare (const void *p1, const void *p2)
603*946379e7Schristos {
604*946379e7Schristos unsigned int n1 = ((const struct numbered_arg *) p1)->number;
605*946379e7Schristos unsigned int n2 = ((const struct numbered_arg *) p2)->number;
606*946379e7Schristos
607*946379e7Schristos return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
608*946379e7Schristos }
609*946379e7Schristos
610*946379e7Schristos static void *
format_parse(const char * format,bool translated,char ** invalid_reason)611*946379e7Schristos format_parse (const char *format, bool translated, char **invalid_reason)
612*946379e7Schristos {
613*946379e7Schristos struct spec spec;
614*946379e7Schristos struct spec *result;
615*946379e7Schristos
616*946379e7Schristos spec.directives = 0;
617*946379e7Schristos spec.numbered_arg_count = 0;
618*946379e7Schristos spec.allocated = 0;
619*946379e7Schristos spec.numbered = NULL;
620*946379e7Schristos
621*946379e7Schristos if (!message_format_parse (format, &spec, invalid_reason))
622*946379e7Schristos goto bad_format;
623*946379e7Schristos
624*946379e7Schristos /* Sort the numbered argument array, and eliminate duplicates. */
625*946379e7Schristos if (spec.numbered_arg_count > 1)
626*946379e7Schristos {
627*946379e7Schristos unsigned int i, j;
628*946379e7Schristos bool err;
629*946379e7Schristos
630*946379e7Schristos qsort (spec.numbered, spec.numbered_arg_count,
631*946379e7Schristos sizeof (struct numbered_arg), numbered_arg_compare);
632*946379e7Schristos
633*946379e7Schristos /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
634*946379e7Schristos err = false;
635*946379e7Schristos for (i = j = 0; i < spec.numbered_arg_count; i++)
636*946379e7Schristos if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
637*946379e7Schristos {
638*946379e7Schristos enum format_arg_type type1 = spec.numbered[i].type;
639*946379e7Schristos enum format_arg_type type2 = spec.numbered[j-1].type;
640*946379e7Schristos enum format_arg_type type_both;
641*946379e7Schristos
642*946379e7Schristos if (type1 == type2 || type2 == FAT_OBJECT)
643*946379e7Schristos type_both = type1;
644*946379e7Schristos else if (type1 == FAT_OBJECT)
645*946379e7Schristos type_both = type2;
646*946379e7Schristos else
647*946379e7Schristos {
648*946379e7Schristos /* Incompatible types. */
649*946379e7Schristos type_both = FAT_NONE;
650*946379e7Schristos if (!err)
651*946379e7Schristos *invalid_reason =
652*946379e7Schristos INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
653*946379e7Schristos err = true;
654*946379e7Schristos }
655*946379e7Schristos
656*946379e7Schristos spec.numbered[j-1].type = type_both;
657*946379e7Schristos }
658*946379e7Schristos else
659*946379e7Schristos {
660*946379e7Schristos if (j < i)
661*946379e7Schristos {
662*946379e7Schristos spec.numbered[j].number = spec.numbered[i].number;
663*946379e7Schristos spec.numbered[j].type = spec.numbered[i].type;
664*946379e7Schristos }
665*946379e7Schristos j++;
666*946379e7Schristos }
667*946379e7Schristos spec.numbered_arg_count = j;
668*946379e7Schristos if (err)
669*946379e7Schristos /* *invalid_reason has already been set above. */
670*946379e7Schristos goto bad_format;
671*946379e7Schristos }
672*946379e7Schristos
673*946379e7Schristos result = (struct spec *) xmalloc (sizeof (struct spec));
674*946379e7Schristos *result = spec;
675*946379e7Schristos return result;
676*946379e7Schristos
677*946379e7Schristos bad_format:
678*946379e7Schristos if (spec.numbered != NULL)
679*946379e7Schristos free (spec.numbered);
680*946379e7Schristos return NULL;
681*946379e7Schristos }
682*946379e7Schristos
683*946379e7Schristos static void
format_free(void * descr)684*946379e7Schristos format_free (void *descr)
685*946379e7Schristos {
686*946379e7Schristos struct spec *spec = (struct spec *) descr;
687*946379e7Schristos
688*946379e7Schristos if (spec->numbered != NULL)
689*946379e7Schristos free (spec->numbered);
690*946379e7Schristos free (spec);
691*946379e7Schristos }
692*946379e7Schristos
693*946379e7Schristos static int
format_get_number_of_directives(void * descr)694*946379e7Schristos format_get_number_of_directives (void *descr)
695*946379e7Schristos {
696*946379e7Schristos struct spec *spec = (struct spec *) descr;
697*946379e7Schristos
698*946379e7Schristos return spec->directives;
699*946379e7Schristos }
700*946379e7Schristos
701*946379e7Schristos static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)702*946379e7Schristos format_check (void *msgid_descr, void *msgstr_descr, bool equality,
703*946379e7Schristos formatstring_error_logger_t error_logger,
704*946379e7Schristos const char *pretty_msgstr)
705*946379e7Schristos {
706*946379e7Schristos struct spec *spec1 = (struct spec *) msgid_descr;
707*946379e7Schristos struct spec *spec2 = (struct spec *) msgstr_descr;
708*946379e7Schristos bool err = false;
709*946379e7Schristos
710*946379e7Schristos if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
711*946379e7Schristos {
712*946379e7Schristos unsigned int i, j;
713*946379e7Schristos unsigned int n1 = spec1->numbered_arg_count;
714*946379e7Schristos unsigned int n2 = spec2->numbered_arg_count;
715*946379e7Schristos
716*946379e7Schristos /* Check the argument names are the same.
717*946379e7Schristos Both arrays are sorted. We search for the first difference. */
718*946379e7Schristos for (i = 0, j = 0; i < n1 || j < n2; )
719*946379e7Schristos {
720*946379e7Schristos int cmp = (i >= n1 ? 1 :
721*946379e7Schristos j >= n2 ? -1 :
722*946379e7Schristos spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
723*946379e7Schristos spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
724*946379e7Schristos 0);
725*946379e7Schristos
726*946379e7Schristos if (cmp > 0)
727*946379e7Schristos {
728*946379e7Schristos if (error_logger)
729*946379e7Schristos error_logger (_("a format specification for argument {%u}, as in '%s', doesn't exist in 'msgid'"),
730*946379e7Schristos spec2->numbered[j].number, pretty_msgstr);
731*946379e7Schristos err = true;
732*946379e7Schristos break;
733*946379e7Schristos }
734*946379e7Schristos else if (cmp < 0)
735*946379e7Schristos {
736*946379e7Schristos if (equality)
737*946379e7Schristos {
738*946379e7Schristos if (error_logger)
739*946379e7Schristos error_logger (_("a format specification for argument {%u} doesn't exist in '%s'"),
740*946379e7Schristos spec1->numbered[i].number, pretty_msgstr);
741*946379e7Schristos err = true;
742*946379e7Schristos break;
743*946379e7Schristos }
744*946379e7Schristos else
745*946379e7Schristos i++;
746*946379e7Schristos }
747*946379e7Schristos else
748*946379e7Schristos j++, i++;
749*946379e7Schristos }
750*946379e7Schristos /* Check the argument types are the same. */
751*946379e7Schristos if (!err)
752*946379e7Schristos for (i = 0, j = 0; j < n2; )
753*946379e7Schristos {
754*946379e7Schristos if (spec1->numbered[i].number == spec2->numbered[j].number)
755*946379e7Schristos {
756*946379e7Schristos if (spec1->numbered[i].type != spec2->numbered[j].type)
757*946379e7Schristos {
758*946379e7Schristos if (error_logger)
759*946379e7Schristos error_logger (_("format specifications in 'msgid' and '%s' for argument {%u} are not the same"),
760*946379e7Schristos pretty_msgstr, spec2->numbered[j].number);
761*946379e7Schristos err = true;
762*946379e7Schristos break;
763*946379e7Schristos }
764*946379e7Schristos j++, i++;
765*946379e7Schristos }
766*946379e7Schristos else
767*946379e7Schristos i++;
768*946379e7Schristos }
769*946379e7Schristos }
770*946379e7Schristos
771*946379e7Schristos return err;
772*946379e7Schristos }
773*946379e7Schristos
774*946379e7Schristos
775*946379e7Schristos struct formatstring_parser formatstring_java =
776*946379e7Schristos {
777*946379e7Schristos format_parse,
778*946379e7Schristos format_free,
779*946379e7Schristos format_get_number_of_directives,
780*946379e7Schristos NULL,
781*946379e7Schristos format_check
782*946379e7Schristos };
783*946379e7Schristos
784*946379e7Schristos
785*946379e7Schristos #ifdef TEST
786*946379e7Schristos
787*946379e7Schristos /* Test program: Print the argument list specification returned by
788*946379e7Schristos format_parse for strings read from standard input. */
789*946379e7Schristos
790*946379e7Schristos #include <stdio.h>
791*946379e7Schristos #include "getline.h"
792*946379e7Schristos
793*946379e7Schristos static void
format_print(void * descr)794*946379e7Schristos format_print (void *descr)
795*946379e7Schristos {
796*946379e7Schristos struct spec *spec = (struct spec *) descr;
797*946379e7Schristos unsigned int last;
798*946379e7Schristos unsigned int i;
799*946379e7Schristos
800*946379e7Schristos if (spec == NULL)
801*946379e7Schristos {
802*946379e7Schristos printf ("INVALID");
803*946379e7Schristos return;
804*946379e7Schristos }
805*946379e7Schristos
806*946379e7Schristos printf ("(");
807*946379e7Schristos last = 0;
808*946379e7Schristos for (i = 0; i < spec->numbered_arg_count; i++)
809*946379e7Schristos {
810*946379e7Schristos unsigned int number = spec->numbered[i].number;
811*946379e7Schristos
812*946379e7Schristos if (i > 0)
813*946379e7Schristos printf (" ");
814*946379e7Schristos if (number < last)
815*946379e7Schristos abort ();
816*946379e7Schristos for (; last < number; last++)
817*946379e7Schristos printf ("_ ");
818*946379e7Schristos switch (spec->numbered[i].type)
819*946379e7Schristos {
820*946379e7Schristos case FAT_OBJECT:
821*946379e7Schristos printf ("*");
822*946379e7Schristos break;
823*946379e7Schristos case FAT_NUMBER:
824*946379e7Schristos printf ("Number");
825*946379e7Schristos break;
826*946379e7Schristos case FAT_DATE:
827*946379e7Schristos printf ("Date");
828*946379e7Schristos break;
829*946379e7Schristos default:
830*946379e7Schristos abort ();
831*946379e7Schristos }
832*946379e7Schristos last = number + 1;
833*946379e7Schristos }
834*946379e7Schristos printf (")");
835*946379e7Schristos }
836*946379e7Schristos
837*946379e7Schristos int
main()838*946379e7Schristos main ()
839*946379e7Schristos {
840*946379e7Schristos for (;;)
841*946379e7Schristos {
842*946379e7Schristos char *line = NULL;
843*946379e7Schristos size_t line_size = 0;
844*946379e7Schristos int line_len;
845*946379e7Schristos char *invalid_reason;
846*946379e7Schristos void *descr;
847*946379e7Schristos
848*946379e7Schristos line_len = getline (&line, &line_size, stdin);
849*946379e7Schristos if (line_len < 0)
850*946379e7Schristos break;
851*946379e7Schristos if (line_len > 0 && line[line_len - 1] == '\n')
852*946379e7Schristos line[--line_len] = '\0';
853*946379e7Schristos
854*946379e7Schristos invalid_reason = NULL;
855*946379e7Schristos descr = format_parse (line, false, &invalid_reason);
856*946379e7Schristos
857*946379e7Schristos format_print (descr);
858*946379e7Schristos printf ("\n");
859*946379e7Schristos if (descr == NULL)
860*946379e7Schristos printf ("%s\n", invalid_reason);
861*946379e7Schristos
862*946379e7Schristos free (invalid_reason);
863*946379e7Schristos free (line);
864*946379e7Schristos }
865*946379e7Schristos
866*946379e7Schristos return 0;
867*946379e7Schristos }
868*946379e7Schristos
869*946379e7Schristos /*
870*946379e7Schristos * For Emacs M-x compile
871*946379e7Schristos * Local Variables:
872*946379e7Schristos * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-java.c ../lib/libgettextlib.la"
873*946379e7Schristos * End:
874*946379e7Schristos */
875*946379e7Schristos
876*946379e7Schristos #endif /* TEST */
877