1 /* Shell format strings.
2    Copyright (C) 2003-2004, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2003.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "format.h"
28 #include "c-ctype.h"
29 #include "xalloc.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* Shell format strings are simply strings subjects to variable substitution.
36    A variable substitution starts with '$' and is finished by either
37    - a nonempty sequence of alphanumeric ASCII characters, the first being
38      not a digit, or
39    - an opening brace '{', a nonempty sequence of alphanumeric ASCII
40      characters, the first being not a digit, and a closing brace '}'.
41    We don't support variable references like $1, $$ or $? since they make
42    no sense when 'envsubst' is invoked.
43    We don't support non-ASCII variable names, to avoid dependencies w.r.t. the
44    current encoding: While "${\xe0}" looks like a variable access in ISO-8859-1
45    encoding, it doesn't look like one in the BIG5, BIG5-HKSCS, GBK, GB18030,
46    SHIFT_JIS, JOHAB encodings, because \xe0\x7d is a single character in these
47    encodings.
48    We don't support the POSIX syntax for default or alternate values:
49      ${variable-default}        ${variable:-default}
50      ${variable=default}        ${variable:=default}
51      ${variable+replacement}    ${variable:+replacement}
52      ${variable?ignored}        ${variable:?ignored}
53    because the translator might be tempted to change the default value; if
54    we allow it we have a security problem; if we don't allow it the translator
55    will be surprised.
56  */
57 
58 struct named_arg
59 {
60   char *name;
61 };
62 
63 struct spec
64 {
65   unsigned int directives;
66   unsigned int named_arg_count;
67   unsigned int allocated;
68   struct named_arg *named;
69 };
70 
71 
72 static int
named_arg_compare(const void * p1,const void * p2)73 named_arg_compare (const void *p1, const void *p2)
74 {
75   return strcmp (((const struct named_arg *) p1)->name,
76 		 ((const struct named_arg *) p2)->name);
77 }
78 
79 #define INVALID_NON_ASCII_VARIABLE() \
80   xstrdup (_("The string refers to a shell variable with a non-ASCII name."))
81 #define INVALID_SHELL_SYNTAX() \
82   xstrdup (_("The string refers to a shell variable with complex shell brace syntax. This syntax is unsupported here due to security reasons."))
83 #define INVALID_CONTEXT_DEPENDENT_VARIABLE() \
84   xstrdup (_("The string refers to a shell variable whose value may be different inside shell functions."))
85 #define INVALID_EMPTY_VARIABLE() \
86   xstrdup (_("The string refers to a shell variable with an empty name."))
87 
88 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)89 format_parse (const char *format, bool translated, char **invalid_reason)
90 {
91   struct spec spec;
92   struct spec *result;
93 
94   spec.directives = 0;
95   spec.named_arg_count = 0;
96   spec.allocated = 0;
97   spec.named = NULL;
98 
99   for (; *format != '\0';)
100     if (*format++ == '$')
101       {
102 	/* A variable substitution.  */
103 	char *name;
104 
105 	spec.directives++;
106 
107 	if (*format == '{')
108 	  {
109 	    const char *name_start;
110 	    const char *name_end;
111 	    size_t n;
112 
113 	    name_start = ++format;
114 	    for (; *format != '\0'; format++)
115 	      {
116 		if (*format == '}')
117 		  break;
118 		if (!c_isascii (*format))
119 		  {
120 		    *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
121 		    goto bad_format;
122 		  }
123 		if (format > name_start
124 		    && (*format == '-' || *format == '=' || *format == '+'
125 			|| *format == '?' || *format == ':'))
126 		  {
127 		    *invalid_reason = INVALID_SHELL_SYNTAX ();
128 		    goto bad_format;
129 		  }
130 		if (!(c_isalnum (*format) || *format == '_')
131 		    || (format == name_start && c_isdigit (*format)))
132 		  {
133 		    *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
134 		    goto bad_format;
135 		  }
136 	      }
137 	    if (*format == '\0')
138 	      {
139 		*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
140 		goto bad_format;
141 	      }
142 	    name_end = format++;
143 
144 	    n = name_end - name_start;
145 	    if (n == 0)
146 	      {
147 		*invalid_reason = INVALID_EMPTY_VARIABLE ();
148 		goto bad_format;
149 	      }
150 	    name = (char *) xmalloc (n + 1);
151 	    memcpy (name, name_start, n);
152 	    name[n] = '\0';
153 	  }
154 	else if (c_isalpha (*format) || *format == '_')
155 	  {
156 	    const char *name_start;
157 	    const char *name_end;
158 	    size_t n;
159 
160 	    name_start = format;
161 	    do
162 	      format++;
163 	    while (*format != '\0' && (c_isalnum (*format) || *format == '_'));
164 	    name_end = format;
165 
166 	    n = name_end - name_start;
167 	    name = (char *) xmalloc (n + 1);
168 	    memcpy (name, name_start, n);
169 	    name[n] = '\0';
170 	  }
171 	else if (*format != '\0')
172 	  {
173 	    if (!c_isascii (*format))
174 	      {
175 		*invalid_reason = INVALID_NON_ASCII_VARIABLE ();
176 		goto bad_format;
177 	      }
178 	    else
179 	      {
180 		*invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
181 		goto bad_format;
182 	      }
183 	  }
184 	else
185 	  {
186 	    *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
187 	    goto bad_format;
188 	  }
189 
190 	/* Named argument.  */
191 	if (spec.allocated == spec.named_arg_count)
192 	  {
193 	    spec.allocated = 2 * spec.allocated + 1;
194 	    spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
195 	  }
196 	spec.named[spec.named_arg_count].name = name;
197 	spec.named_arg_count++;
198       }
199 
200   /* Sort the named argument array, and eliminate duplicates.  */
201   if (spec.named_arg_count > 1)
202     {
203       unsigned int i, j;
204 
205       qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
206 	     named_arg_compare);
207 
208       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
209       for (i = j = 0; i < spec.named_arg_count; i++)
210 	if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
211 	  free (spec.named[i].name);
212 	else
213 	  {
214 	    if (j < i)
215 	      spec.named[j].name = spec.named[i].name;
216 	    j++;
217 	  }
218       spec.named_arg_count = j;
219     }
220 
221   result = (struct spec *) xmalloc (sizeof (struct spec));
222   *result = spec;
223   return result;
224 
225  bad_format:
226   if (spec.named != NULL)
227     {
228       unsigned int i;
229       for (i = 0; i < spec.named_arg_count; i++)
230 	free (spec.named[i].name);
231       free (spec.named);
232     }
233   return NULL;
234 }
235 
236 static void
format_free(void * descr)237 format_free (void *descr)
238 {
239   struct spec *spec = (struct spec *) descr;
240 
241   if (spec->named != NULL)
242     {
243       unsigned int i;
244       for (i = 0; i < spec->named_arg_count; i++)
245 	free (spec->named[i].name);
246       free (spec->named);
247     }
248   free (spec);
249 }
250 
251 static int
format_get_number_of_directives(void * descr)252 format_get_number_of_directives (void *descr)
253 {
254   struct spec *spec = (struct spec *) descr;
255 
256   return spec->directives;
257 }
258 
259 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)260 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
261 	      formatstring_error_logger_t error_logger,
262 	      const char *pretty_msgstr)
263 {
264   struct spec *spec1 = (struct spec *) msgid_descr;
265   struct spec *spec2 = (struct spec *) msgstr_descr;
266   bool err = false;
267 
268   if (spec1->named_arg_count + spec2->named_arg_count > 0)
269     {
270       unsigned int i, j;
271       unsigned int n1 = spec1->named_arg_count;
272       unsigned int n2 = spec2->named_arg_count;
273 
274       /* Check the argument names are the same.
275 	 Both arrays are sorted.  We search for the first difference.  */
276       for (i = 0, j = 0; i < n1 || j < n2; )
277 	{
278 	  int cmp = (i >= n1 ? 1 :
279 		     j >= n2 ? -1 :
280 		     strcmp (spec1->named[i].name, spec2->named[j].name));
281 
282 	  if (cmp > 0)
283 	    {
284 	      if (error_logger)
285 		error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"),
286 			      spec2->named[j].name, pretty_msgstr);
287 	      err = true;
288 	      break;
289 	    }
290 	  else if (cmp < 0)
291 	    {
292 	      if (equality)
293 		{
294 		  if (error_logger)
295 		    error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
296 				  spec1->named[i].name, pretty_msgstr);
297 		  err = true;
298 		  break;
299 		}
300 	      else
301 		i++;
302 	    }
303 	  else
304 	    j++, i++;
305 	}
306     }
307 
308   return err;
309 }
310 
311 
312 struct formatstring_parser formatstring_sh =
313 {
314   format_parse,
315   format_free,
316   format_get_number_of_directives,
317   NULL,
318   format_check
319 };
320 
321 
322 #ifdef TEST
323 
324 /* Test program: Print the argument list specification returned by
325    format_parse for strings read from standard input.  */
326 
327 #include <stdio.h>
328 #include "getline.h"
329 
330 static void
format_print(void * descr)331 format_print (void *descr)
332 {
333   struct spec *spec = (struct spec *) descr;
334   unsigned int i;
335 
336   if (spec == NULL)
337     {
338       printf ("INVALID");
339       return;
340     }
341 
342   printf ("{");
343   for (i = 0; i < spec->named_arg_count; i++)
344     {
345       if (i > 0)
346 	printf (", ");
347       printf ("'%s'", spec->named[i].name);
348     }
349   printf ("}");
350 }
351 
352 int
main()353 main ()
354 {
355   for (;;)
356     {
357       char *line = NULL;
358       size_t line_size = 0;
359       int line_len;
360       char *invalid_reason;
361       void *descr;
362 
363       line_len = getline (&line, &line_size, stdin);
364       if (line_len < 0)
365 	break;
366       if (line_len > 0 && line[line_len - 1] == '\n')
367 	line[--line_len] = '\0';
368 
369       invalid_reason = NULL;
370       descr = format_parse (line, false, &invalid_reason);
371 
372       format_print (descr);
373       printf ("\n");
374       if (descr == NULL)
375 	printf ("%s\n", invalid_reason);
376 
377       free (invalid_reason);
378       free (line);
379     }
380 
381   return 0;
382 }
383 
384 /*
385  * For Emacs M-x compile
386  * Local Variables:
387  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-sh.c ../lib/libgettextlib.la"
388  * End:
389  */
390 
391 #endif /* TEST */
392