1 /* Shell format strings.
2 Copyright (C) 2003-2004, 2006 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include "format.h"
28 #include "c-ctype.h"
29 #include "xalloc.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32
33 #define _(str) gettext (str)
34
35 /* Shell format strings are simply strings subjects to variable substitution.
36 A variable substitution starts with '$' and is finished by either
37 - a nonempty sequence of alphanumeric ASCII characters, the first being
38 not a digit, or
39 - an opening brace '{', a nonempty sequence of alphanumeric ASCII
40 characters, the first being not a digit, and a closing brace '}'.
41 We don't support variable references like $1, $$ or $? since they make
42 no sense when 'envsubst' is invoked.
43 We don't support non-ASCII variable names, to avoid dependencies w.r.t. the
44 current encoding: While "${\xe0}" looks like a variable access in ISO-8859-1
45 encoding, it doesn't look like one in the BIG5, BIG5-HKSCS, GBK, GB18030,
46 SHIFT_JIS, JOHAB encodings, because \xe0\x7d is a single character in these
47 encodings.
48 We don't support the POSIX syntax for default or alternate values:
49 ${variable-default} ${variable:-default}
50 ${variable=default} ${variable:=default}
51 ${variable+replacement} ${variable:+replacement}
52 ${variable?ignored} ${variable:?ignored}
53 because the translator might be tempted to change the default value; if
54 we allow it we have a security problem; if we don't allow it the translator
55 will be surprised.
56 */
57
58 struct named_arg
59 {
60 char *name;
61 };
62
63 struct spec
64 {
65 unsigned int directives;
66 unsigned int named_arg_count;
67 unsigned int allocated;
68 struct named_arg *named;
69 };
70
71
72 static int
named_arg_compare(const void * p1,const void * p2)73 named_arg_compare (const void *p1, const void *p2)
74 {
75 return strcmp (((const struct named_arg *) p1)->name,
76 ((const struct named_arg *) p2)->name);
77 }
78
79 #define INVALID_NON_ASCII_VARIABLE() \
80 xstrdup (_("The string refers to a shell variable with a non-ASCII name."))
81 #define INVALID_SHELL_SYNTAX() \
82 xstrdup (_("The string refers to a shell variable with complex shell brace syntax. This syntax is unsupported here due to security reasons."))
83 #define INVALID_CONTEXT_DEPENDENT_VARIABLE() \
84 xstrdup (_("The string refers to a shell variable whose value may be different inside shell functions."))
85 #define INVALID_EMPTY_VARIABLE() \
86 xstrdup (_("The string refers to a shell variable with an empty name."))
87
88 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)89 format_parse (const char *format, bool translated, char **invalid_reason)
90 {
91 struct spec spec;
92 struct spec *result;
93
94 spec.directives = 0;
95 spec.named_arg_count = 0;
96 spec.allocated = 0;
97 spec.named = NULL;
98
99 for (; *format != '\0';)
100 if (*format++ == '$')
101 {
102 /* A variable substitution. */
103 char *name;
104
105 spec.directives++;
106
107 if (*format == '{')
108 {
109 const char *name_start;
110 const char *name_end;
111 size_t n;
112
113 name_start = ++format;
114 for (; *format != '\0'; format++)
115 {
116 if (*format == '}')
117 break;
118 if (!c_isascii (*format))
119 {
120 *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
121 goto bad_format;
122 }
123 if (format > name_start
124 && (*format == '-' || *format == '=' || *format == '+'
125 || *format == '?' || *format == ':'))
126 {
127 *invalid_reason = INVALID_SHELL_SYNTAX ();
128 goto bad_format;
129 }
130 if (!(c_isalnum (*format) || *format == '_')
131 || (format == name_start && c_isdigit (*format)))
132 {
133 *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
134 goto bad_format;
135 }
136 }
137 if (*format == '\0')
138 {
139 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
140 goto bad_format;
141 }
142 name_end = format++;
143
144 n = name_end - name_start;
145 if (n == 0)
146 {
147 *invalid_reason = INVALID_EMPTY_VARIABLE ();
148 goto bad_format;
149 }
150 name = (char *) xmalloc (n + 1);
151 memcpy (name, name_start, n);
152 name[n] = '\0';
153 }
154 else if (c_isalpha (*format) || *format == '_')
155 {
156 const char *name_start;
157 const char *name_end;
158 size_t n;
159
160 name_start = format;
161 do
162 format++;
163 while (*format != '\0' && (c_isalnum (*format) || *format == '_'));
164 name_end = format;
165
166 n = name_end - name_start;
167 name = (char *) xmalloc (n + 1);
168 memcpy (name, name_start, n);
169 name[n] = '\0';
170 }
171 else if (*format != '\0')
172 {
173 if (!c_isascii (*format))
174 {
175 *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
176 goto bad_format;
177 }
178 else
179 {
180 *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
181 goto bad_format;
182 }
183 }
184 else
185 {
186 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
187 goto bad_format;
188 }
189
190 /* Named argument. */
191 if (spec.allocated == spec.named_arg_count)
192 {
193 spec.allocated = 2 * spec.allocated + 1;
194 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
195 }
196 spec.named[spec.named_arg_count].name = name;
197 spec.named_arg_count++;
198 }
199
200 /* Sort the named argument array, and eliminate duplicates. */
201 if (spec.named_arg_count > 1)
202 {
203 unsigned int i, j;
204
205 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
206 named_arg_compare);
207
208 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
209 for (i = j = 0; i < spec.named_arg_count; i++)
210 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
211 free (spec.named[i].name);
212 else
213 {
214 if (j < i)
215 spec.named[j].name = spec.named[i].name;
216 j++;
217 }
218 spec.named_arg_count = j;
219 }
220
221 result = (struct spec *) xmalloc (sizeof (struct spec));
222 *result = spec;
223 return result;
224
225 bad_format:
226 if (spec.named != NULL)
227 {
228 unsigned int i;
229 for (i = 0; i < spec.named_arg_count; i++)
230 free (spec.named[i].name);
231 free (spec.named);
232 }
233 return NULL;
234 }
235
236 static void
format_free(void * descr)237 format_free (void *descr)
238 {
239 struct spec *spec = (struct spec *) descr;
240
241 if (spec->named != NULL)
242 {
243 unsigned int i;
244 for (i = 0; i < spec->named_arg_count; i++)
245 free (spec->named[i].name);
246 free (spec->named);
247 }
248 free (spec);
249 }
250
251 static int
format_get_number_of_directives(void * descr)252 format_get_number_of_directives (void *descr)
253 {
254 struct spec *spec = (struct spec *) descr;
255
256 return spec->directives;
257 }
258
259 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)260 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
261 formatstring_error_logger_t error_logger,
262 const char *pretty_msgstr)
263 {
264 struct spec *spec1 = (struct spec *) msgid_descr;
265 struct spec *spec2 = (struct spec *) msgstr_descr;
266 bool err = false;
267
268 if (spec1->named_arg_count + spec2->named_arg_count > 0)
269 {
270 unsigned int i, j;
271 unsigned int n1 = spec1->named_arg_count;
272 unsigned int n2 = spec2->named_arg_count;
273
274 /* Check the argument names are the same.
275 Both arrays are sorted. We search for the first difference. */
276 for (i = 0, j = 0; i < n1 || j < n2; )
277 {
278 int cmp = (i >= n1 ? 1 :
279 j >= n2 ? -1 :
280 strcmp (spec1->named[i].name, spec2->named[j].name));
281
282 if (cmp > 0)
283 {
284 if (error_logger)
285 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"),
286 spec2->named[j].name, pretty_msgstr);
287 err = true;
288 break;
289 }
290 else if (cmp < 0)
291 {
292 if (equality)
293 {
294 if (error_logger)
295 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
296 spec1->named[i].name, pretty_msgstr);
297 err = true;
298 break;
299 }
300 else
301 i++;
302 }
303 else
304 j++, i++;
305 }
306 }
307
308 return err;
309 }
310
311
312 struct formatstring_parser formatstring_sh =
313 {
314 format_parse,
315 format_free,
316 format_get_number_of_directives,
317 NULL,
318 format_check
319 };
320
321
322 #ifdef TEST
323
324 /* Test program: Print the argument list specification returned by
325 format_parse for strings read from standard input. */
326
327 #include <stdio.h>
328 #include "getline.h"
329
330 static void
format_print(void * descr)331 format_print (void *descr)
332 {
333 struct spec *spec = (struct spec *) descr;
334 unsigned int i;
335
336 if (spec == NULL)
337 {
338 printf ("INVALID");
339 return;
340 }
341
342 printf ("{");
343 for (i = 0; i < spec->named_arg_count; i++)
344 {
345 if (i > 0)
346 printf (", ");
347 printf ("'%s'", spec->named[i].name);
348 }
349 printf ("}");
350 }
351
352 int
main()353 main ()
354 {
355 for (;;)
356 {
357 char *line = NULL;
358 size_t line_size = 0;
359 int line_len;
360 char *invalid_reason;
361 void *descr;
362
363 line_len = getline (&line, &line_size, stdin);
364 if (line_len < 0)
365 break;
366 if (line_len > 0 && line[line_len - 1] == '\n')
367 line[--line_len] = '\0';
368
369 invalid_reason = NULL;
370 descr = format_parse (line, false, &invalid_reason);
371
372 format_print (descr);
373 printf ("\n");
374 if (descr == NULL)
375 printf ("%s\n", invalid_reason);
376
377 free (invalid_reason);
378 free (line);
379 }
380
381 return 0;
382 }
383
384 /*
385 * For Emacs M-x compile
386 * Local Variables:
387 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-sh.c ../lib/libgettextlib.la"
388 * End:
389 */
390
391 #endif /* TEST */
392