xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/format-sh.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1*946379e7Schristos /* Shell format strings.
2*946379e7Schristos    Copyright (C) 2003-2004, 2006 Free Software Foundation, Inc.
3*946379e7Schristos    Written by Bruno Haible <bruno@clisp.org>, 2003.
4*946379e7Schristos 
5*946379e7Schristos    This program is free software; you can redistribute it and/or modify
6*946379e7Schristos    it under the terms of the GNU General Public License as published by
7*946379e7Schristos    the Free Software Foundation; either version 2, or (at your option)
8*946379e7Schristos    any later version.
9*946379e7Schristos 
10*946379e7Schristos    This program is distributed in the hope that it will be useful,
11*946379e7Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*946379e7Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*946379e7Schristos    GNU General Public License for more details.
14*946379e7Schristos 
15*946379e7Schristos    You should have received a copy of the GNU General Public License
16*946379e7Schristos    along with this program; if not, write to the Free Software Foundation,
17*946379e7Schristos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18*946379e7Schristos 
19*946379e7Schristos #ifdef HAVE_CONFIG_H
20*946379e7Schristos # include <config.h>
21*946379e7Schristos #endif
22*946379e7Schristos 
23*946379e7Schristos #include <stdbool.h>
24*946379e7Schristos #include <stdlib.h>
25*946379e7Schristos #include <string.h>
26*946379e7Schristos 
27*946379e7Schristos #include "format.h"
28*946379e7Schristos #include "c-ctype.h"
29*946379e7Schristos #include "xalloc.h"
30*946379e7Schristos #include "format-invalid.h"
31*946379e7Schristos #include "gettext.h"
32*946379e7Schristos 
33*946379e7Schristos #define _(str) gettext (str)
34*946379e7Schristos 
35*946379e7Schristos /* Shell format strings are simply strings subjects to variable substitution.
36*946379e7Schristos    A variable substitution starts with '$' and is finished by either
37*946379e7Schristos    - a nonempty sequence of alphanumeric ASCII characters, the first being
38*946379e7Schristos      not a digit, or
39*946379e7Schristos    - an opening brace '{', a nonempty sequence of alphanumeric ASCII
40*946379e7Schristos      characters, the first being not a digit, and a closing brace '}'.
41*946379e7Schristos    We don't support variable references like $1, $$ or $? since they make
42*946379e7Schristos    no sense when 'envsubst' is invoked.
43*946379e7Schristos    We don't support non-ASCII variable names, to avoid dependencies w.r.t. the
44*946379e7Schristos    current encoding: While "${\xe0}" looks like a variable access in ISO-8859-1
45*946379e7Schristos    encoding, it doesn't look like one in the BIG5, BIG5-HKSCS, GBK, GB18030,
46*946379e7Schristos    SHIFT_JIS, JOHAB encodings, because \xe0\x7d is a single character in these
47*946379e7Schristos    encodings.
48*946379e7Schristos    We don't support the POSIX syntax for default or alternate values:
49*946379e7Schristos      ${variable-default}        ${variable:-default}
50*946379e7Schristos      ${variable=default}        ${variable:=default}
51*946379e7Schristos      ${variable+replacement}    ${variable:+replacement}
52*946379e7Schristos      ${variable?ignored}        ${variable:?ignored}
53*946379e7Schristos    because the translator might be tempted to change the default value; if
54*946379e7Schristos    we allow it we have a security problem; if we don't allow it the translator
55*946379e7Schristos    will be surprised.
56*946379e7Schristos  */
57*946379e7Schristos 
58*946379e7Schristos struct named_arg
59*946379e7Schristos {
60*946379e7Schristos   char *name;
61*946379e7Schristos };
62*946379e7Schristos 
63*946379e7Schristos struct spec
64*946379e7Schristos {
65*946379e7Schristos   unsigned int directives;
66*946379e7Schristos   unsigned int named_arg_count;
67*946379e7Schristos   unsigned int allocated;
68*946379e7Schristos   struct named_arg *named;
69*946379e7Schristos };
70*946379e7Schristos 
71*946379e7Schristos 
72*946379e7Schristos static int
named_arg_compare(const void * p1,const void * p2)73*946379e7Schristos named_arg_compare (const void *p1, const void *p2)
74*946379e7Schristos {
75*946379e7Schristos   return strcmp (((const struct named_arg *) p1)->name,
76*946379e7Schristos 		 ((const struct named_arg *) p2)->name);
77*946379e7Schristos }
78*946379e7Schristos 
79*946379e7Schristos #define INVALID_NON_ASCII_VARIABLE() \
80*946379e7Schristos   xstrdup (_("The string refers to a shell variable with a non-ASCII name."))
81*946379e7Schristos #define INVALID_SHELL_SYNTAX() \
82*946379e7Schristos   xstrdup (_("The string refers to a shell variable with complex shell brace syntax. This syntax is unsupported here due to security reasons."))
83*946379e7Schristos #define INVALID_CONTEXT_DEPENDENT_VARIABLE() \
84*946379e7Schristos   xstrdup (_("The string refers to a shell variable whose value may be different inside shell functions."))
85*946379e7Schristos #define INVALID_EMPTY_VARIABLE() \
86*946379e7Schristos   xstrdup (_("The string refers to a shell variable with an empty name."))
87*946379e7Schristos 
88*946379e7Schristos static void *
format_parse(const char * format,bool translated,char ** invalid_reason)89*946379e7Schristos format_parse (const char *format, bool translated, char **invalid_reason)
90*946379e7Schristos {
91*946379e7Schristos   struct spec spec;
92*946379e7Schristos   struct spec *result;
93*946379e7Schristos 
94*946379e7Schristos   spec.directives = 0;
95*946379e7Schristos   spec.named_arg_count = 0;
96*946379e7Schristos   spec.allocated = 0;
97*946379e7Schristos   spec.named = NULL;
98*946379e7Schristos 
99*946379e7Schristos   for (; *format != '\0';)
100*946379e7Schristos     if (*format++ == '$')
101*946379e7Schristos       {
102*946379e7Schristos 	/* A variable substitution.  */
103*946379e7Schristos 	char *name;
104*946379e7Schristos 
105*946379e7Schristos 	spec.directives++;
106*946379e7Schristos 
107*946379e7Schristos 	if (*format == '{')
108*946379e7Schristos 	  {
109*946379e7Schristos 	    const char *name_start;
110*946379e7Schristos 	    const char *name_end;
111*946379e7Schristos 	    size_t n;
112*946379e7Schristos 
113*946379e7Schristos 	    name_start = ++format;
114*946379e7Schristos 	    for (; *format != '\0'; format++)
115*946379e7Schristos 	      {
116*946379e7Schristos 		if (*format == '}')
117*946379e7Schristos 		  break;
118*946379e7Schristos 		if (!c_isascii (*format))
119*946379e7Schristos 		  {
120*946379e7Schristos 		    *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
121*946379e7Schristos 		    goto bad_format;
122*946379e7Schristos 		  }
123*946379e7Schristos 		if (format > name_start
124*946379e7Schristos 		    && (*format == '-' || *format == '=' || *format == '+'
125*946379e7Schristos 			|| *format == '?' || *format == ':'))
126*946379e7Schristos 		  {
127*946379e7Schristos 		    *invalid_reason = INVALID_SHELL_SYNTAX ();
128*946379e7Schristos 		    goto bad_format;
129*946379e7Schristos 		  }
130*946379e7Schristos 		if (!(c_isalnum (*format) || *format == '_')
131*946379e7Schristos 		    || (format == name_start && c_isdigit (*format)))
132*946379e7Schristos 		  {
133*946379e7Schristos 		    *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
134*946379e7Schristos 		    goto bad_format;
135*946379e7Schristos 		  }
136*946379e7Schristos 	      }
137*946379e7Schristos 	    if (*format == '\0')
138*946379e7Schristos 	      {
139*946379e7Schristos 		*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
140*946379e7Schristos 		goto bad_format;
141*946379e7Schristos 	      }
142*946379e7Schristos 	    name_end = format++;
143*946379e7Schristos 
144*946379e7Schristos 	    n = name_end - name_start;
145*946379e7Schristos 	    if (n == 0)
146*946379e7Schristos 	      {
147*946379e7Schristos 		*invalid_reason = INVALID_EMPTY_VARIABLE ();
148*946379e7Schristos 		goto bad_format;
149*946379e7Schristos 	      }
150*946379e7Schristos 	    name = (char *) xmalloc (n + 1);
151*946379e7Schristos 	    memcpy (name, name_start, n);
152*946379e7Schristos 	    name[n] = '\0';
153*946379e7Schristos 	  }
154*946379e7Schristos 	else if (c_isalpha (*format) || *format == '_')
155*946379e7Schristos 	  {
156*946379e7Schristos 	    const char *name_start;
157*946379e7Schristos 	    const char *name_end;
158*946379e7Schristos 	    size_t n;
159*946379e7Schristos 
160*946379e7Schristos 	    name_start = format;
161*946379e7Schristos 	    do
162*946379e7Schristos 	      format++;
163*946379e7Schristos 	    while (*format != '\0' && (c_isalnum (*format) || *format == '_'));
164*946379e7Schristos 	    name_end = format;
165*946379e7Schristos 
166*946379e7Schristos 	    n = name_end - name_start;
167*946379e7Schristos 	    name = (char *) xmalloc (n + 1);
168*946379e7Schristos 	    memcpy (name, name_start, n);
169*946379e7Schristos 	    name[n] = '\0';
170*946379e7Schristos 	  }
171*946379e7Schristos 	else if (*format != '\0')
172*946379e7Schristos 	  {
173*946379e7Schristos 	    if (!c_isascii (*format))
174*946379e7Schristos 	      {
175*946379e7Schristos 		*invalid_reason = INVALID_NON_ASCII_VARIABLE ();
176*946379e7Schristos 		goto bad_format;
177*946379e7Schristos 	      }
178*946379e7Schristos 	    else
179*946379e7Schristos 	      {
180*946379e7Schristos 		*invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
181*946379e7Schristos 		goto bad_format;
182*946379e7Schristos 	      }
183*946379e7Schristos 	  }
184*946379e7Schristos 	else
185*946379e7Schristos 	  {
186*946379e7Schristos 	    *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
187*946379e7Schristos 	    goto bad_format;
188*946379e7Schristos 	  }
189*946379e7Schristos 
190*946379e7Schristos 	/* Named argument.  */
191*946379e7Schristos 	if (spec.allocated == spec.named_arg_count)
192*946379e7Schristos 	  {
193*946379e7Schristos 	    spec.allocated = 2 * spec.allocated + 1;
194*946379e7Schristos 	    spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
195*946379e7Schristos 	  }
196*946379e7Schristos 	spec.named[spec.named_arg_count].name = name;
197*946379e7Schristos 	spec.named_arg_count++;
198*946379e7Schristos       }
199*946379e7Schristos 
200*946379e7Schristos   /* Sort the named argument array, and eliminate duplicates.  */
201*946379e7Schristos   if (spec.named_arg_count > 1)
202*946379e7Schristos     {
203*946379e7Schristos       unsigned int i, j;
204*946379e7Schristos 
205*946379e7Schristos       qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
206*946379e7Schristos 	     named_arg_compare);
207*946379e7Schristos 
208*946379e7Schristos       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
209*946379e7Schristos       for (i = j = 0; i < spec.named_arg_count; i++)
210*946379e7Schristos 	if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
211*946379e7Schristos 	  free (spec.named[i].name);
212*946379e7Schristos 	else
213*946379e7Schristos 	  {
214*946379e7Schristos 	    if (j < i)
215*946379e7Schristos 	      spec.named[j].name = spec.named[i].name;
216*946379e7Schristos 	    j++;
217*946379e7Schristos 	  }
218*946379e7Schristos       spec.named_arg_count = j;
219*946379e7Schristos     }
220*946379e7Schristos 
221*946379e7Schristos   result = (struct spec *) xmalloc (sizeof (struct spec));
222*946379e7Schristos   *result = spec;
223*946379e7Schristos   return result;
224*946379e7Schristos 
225*946379e7Schristos  bad_format:
226*946379e7Schristos   if (spec.named != NULL)
227*946379e7Schristos     {
228*946379e7Schristos       unsigned int i;
229*946379e7Schristos       for (i = 0; i < spec.named_arg_count; i++)
230*946379e7Schristos 	free (spec.named[i].name);
231*946379e7Schristos       free (spec.named);
232*946379e7Schristos     }
233*946379e7Schristos   return NULL;
234*946379e7Schristos }
235*946379e7Schristos 
236*946379e7Schristos static void
format_free(void * descr)237*946379e7Schristos format_free (void *descr)
238*946379e7Schristos {
239*946379e7Schristos   struct spec *spec = (struct spec *) descr;
240*946379e7Schristos 
241*946379e7Schristos   if (spec->named != NULL)
242*946379e7Schristos     {
243*946379e7Schristos       unsigned int i;
244*946379e7Schristos       for (i = 0; i < spec->named_arg_count; i++)
245*946379e7Schristos 	free (spec->named[i].name);
246*946379e7Schristos       free (spec->named);
247*946379e7Schristos     }
248*946379e7Schristos   free (spec);
249*946379e7Schristos }
250*946379e7Schristos 
251*946379e7Schristos static int
format_get_number_of_directives(void * descr)252*946379e7Schristos format_get_number_of_directives (void *descr)
253*946379e7Schristos {
254*946379e7Schristos   struct spec *spec = (struct spec *) descr;
255*946379e7Schristos 
256*946379e7Schristos   return spec->directives;
257*946379e7Schristos }
258*946379e7Schristos 
259*946379e7Schristos static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)260*946379e7Schristos format_check (void *msgid_descr, void *msgstr_descr, bool equality,
261*946379e7Schristos 	      formatstring_error_logger_t error_logger,
262*946379e7Schristos 	      const char *pretty_msgstr)
263*946379e7Schristos {
264*946379e7Schristos   struct spec *spec1 = (struct spec *) msgid_descr;
265*946379e7Schristos   struct spec *spec2 = (struct spec *) msgstr_descr;
266*946379e7Schristos   bool err = false;
267*946379e7Schristos 
268*946379e7Schristos   if (spec1->named_arg_count + spec2->named_arg_count > 0)
269*946379e7Schristos     {
270*946379e7Schristos       unsigned int i, j;
271*946379e7Schristos       unsigned int n1 = spec1->named_arg_count;
272*946379e7Schristos       unsigned int n2 = spec2->named_arg_count;
273*946379e7Schristos 
274*946379e7Schristos       /* Check the argument names are the same.
275*946379e7Schristos 	 Both arrays are sorted.  We search for the first difference.  */
276*946379e7Schristos       for (i = 0, j = 0; i < n1 || j < n2; )
277*946379e7Schristos 	{
278*946379e7Schristos 	  int cmp = (i >= n1 ? 1 :
279*946379e7Schristos 		     j >= n2 ? -1 :
280*946379e7Schristos 		     strcmp (spec1->named[i].name, spec2->named[j].name));
281*946379e7Schristos 
282*946379e7Schristos 	  if (cmp > 0)
283*946379e7Schristos 	    {
284*946379e7Schristos 	      if (error_logger)
285*946379e7Schristos 		error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"),
286*946379e7Schristos 			      spec2->named[j].name, pretty_msgstr);
287*946379e7Schristos 	      err = true;
288*946379e7Schristos 	      break;
289*946379e7Schristos 	    }
290*946379e7Schristos 	  else if (cmp < 0)
291*946379e7Schristos 	    {
292*946379e7Schristos 	      if (equality)
293*946379e7Schristos 		{
294*946379e7Schristos 		  if (error_logger)
295*946379e7Schristos 		    error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
296*946379e7Schristos 				  spec1->named[i].name, pretty_msgstr);
297*946379e7Schristos 		  err = true;
298*946379e7Schristos 		  break;
299*946379e7Schristos 		}
300*946379e7Schristos 	      else
301*946379e7Schristos 		i++;
302*946379e7Schristos 	    }
303*946379e7Schristos 	  else
304*946379e7Schristos 	    j++, i++;
305*946379e7Schristos 	}
306*946379e7Schristos     }
307*946379e7Schristos 
308*946379e7Schristos   return err;
309*946379e7Schristos }
310*946379e7Schristos 
311*946379e7Schristos 
312*946379e7Schristos struct formatstring_parser formatstring_sh =
313*946379e7Schristos {
314*946379e7Schristos   format_parse,
315*946379e7Schristos   format_free,
316*946379e7Schristos   format_get_number_of_directives,
317*946379e7Schristos   NULL,
318*946379e7Schristos   format_check
319*946379e7Schristos };
320*946379e7Schristos 
321*946379e7Schristos 
322*946379e7Schristos #ifdef TEST
323*946379e7Schristos 
324*946379e7Schristos /* Test program: Print the argument list specification returned by
325*946379e7Schristos    format_parse for strings read from standard input.  */
326*946379e7Schristos 
327*946379e7Schristos #include <stdio.h>
328*946379e7Schristos #include "getline.h"
329*946379e7Schristos 
330*946379e7Schristos static void
format_print(void * descr)331*946379e7Schristos format_print (void *descr)
332*946379e7Schristos {
333*946379e7Schristos   struct spec *spec = (struct spec *) descr;
334*946379e7Schristos   unsigned int i;
335*946379e7Schristos 
336*946379e7Schristos   if (spec == NULL)
337*946379e7Schristos     {
338*946379e7Schristos       printf ("INVALID");
339*946379e7Schristos       return;
340*946379e7Schristos     }
341*946379e7Schristos 
342*946379e7Schristos   printf ("{");
343*946379e7Schristos   for (i = 0; i < spec->named_arg_count; i++)
344*946379e7Schristos     {
345*946379e7Schristos       if (i > 0)
346*946379e7Schristos 	printf (", ");
347*946379e7Schristos       printf ("'%s'", spec->named[i].name);
348*946379e7Schristos     }
349*946379e7Schristos   printf ("}");
350*946379e7Schristos }
351*946379e7Schristos 
352*946379e7Schristos int
main()353*946379e7Schristos main ()
354*946379e7Schristos {
355*946379e7Schristos   for (;;)
356*946379e7Schristos     {
357*946379e7Schristos       char *line = NULL;
358*946379e7Schristos       size_t line_size = 0;
359*946379e7Schristos       int line_len;
360*946379e7Schristos       char *invalid_reason;
361*946379e7Schristos       void *descr;
362*946379e7Schristos 
363*946379e7Schristos       line_len = getline (&line, &line_size, stdin);
364*946379e7Schristos       if (line_len < 0)
365*946379e7Schristos 	break;
366*946379e7Schristos       if (line_len > 0 && line[line_len - 1] == '\n')
367*946379e7Schristos 	line[--line_len] = '\0';
368*946379e7Schristos 
369*946379e7Schristos       invalid_reason = NULL;
370*946379e7Schristos       descr = format_parse (line, false, &invalid_reason);
371*946379e7Schristos 
372*946379e7Schristos       format_print (descr);
373*946379e7Schristos       printf ("\n");
374*946379e7Schristos       if (descr == NULL)
375*946379e7Schristos 	printf ("%s\n", invalid_reason);
376*946379e7Schristos 
377*946379e7Schristos       free (invalid_reason);
378*946379e7Schristos       free (line);
379*946379e7Schristos     }
380*946379e7Schristos 
381*946379e7Schristos   return 0;
382*946379e7Schristos }
383*946379e7Schristos 
384*946379e7Schristos /*
385*946379e7Schristos  * For Emacs M-x compile
386*946379e7Schristos  * Local Variables:
387*946379e7Schristos  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-sh.c ../lib/libgettextlib.la"
388*946379e7Schristos  * End:
389*946379e7Schristos  */
390*946379e7Schristos 
391*946379e7Schristos #endif /* TEST */
392