xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/xgettext.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Extracts strings from C source file to Uniforum style .po file.
2    Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc.
3    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <alloca.h>
23 
24 #include <ctype.h>
25 #include <errno.h>
26 #include <getopt.h>
27 #include <stdio.h>
28 #include <time.h>
29 #include <stdlib.h>
30 #include <stdbool.h>
31 #include <string.h>
32 #include <locale.h>
33 #include <limits.h>
34 
35 #include "xgettext.h"
36 #include "closeout.h"
37 #include "dir-list.h"
38 #include "file-list.h"
39 #include "str-list.h"
40 #include "error.h"
41 #include "error-progname.h"
42 #include "progname.h"
43 #include "relocatable.h"
44 #include "basename.h"
45 #include "xerror.h"
46 #include "xvasprintf.h"
47 #include "xalloc.h"
48 #include "xallocsa.h"
49 #include "c-strstr.h"
50 #include "xerror.h"
51 #include "exit.h"
52 #include "pathname.h"
53 #include "c-strcase.h"
54 #include "open-catalog.h"
55 #include "read-catalog-abstract.h"
56 #include "read-po.h"
57 #include "message.h"
58 #include "po-charset.h"
59 #include "msgl-iconv.h"
60 #include "msgl-ascii.h"
61 #include "po-time.h"
62 #include "write-catalog.h"
63 #include "write-po.h"
64 #include "write-properties.h"
65 #include "write-stringtable.h"
66 #include "format.h"
67 #include "propername.h"
68 #include "gettext.h"
69 
70 /* A convenience macro.  I don't like writing gettext() every time.  */
71 #define _(str) gettext (str)
72 
73 
74 #ifdef __cplusplus
75 extern "C" {
76 #endif
77 
78 #include "x-c.h"
79 #include "x-po.h"
80 #include "x-sh.h"
81 #include "x-python.h"
82 #include "x-lisp.h"
83 #include "x-elisp.h"
84 #include "x-librep.h"
85 #include "x-scheme.h"
86 #include "x-smalltalk.h"
87 #include "x-java.h"
88 #include "x-properties.h"
89 #include "x-csharp.h"
90 #include "x-awk.h"
91 #include "x-ycp.h"
92 #include "x-tcl.h"
93 #include "x-perl.h"
94 #include "x-php.h"
95 #include "x-stringtable.h"
96 #include "x-rst.h"
97 #include "x-glade.h"
98 
99 #ifdef __cplusplus
100 }
101 #endif
102 
103 
104 /* If nonzero add all comments immediately preceding one of the keywords. */
105 static bool add_all_comments = false;
106 
107 /* Tag used in comment of prevailing domain.  */
108 static char *comment_tag;
109 
110 /* Name of default domain file.  If not set defaults to messages.po.  */
111 static const char *default_domain;
112 
113 /* If called with --debug option the output reflects whether format
114    string recognition is done automatically or forced by the user.  */
115 static int do_debug;
116 
117 /* Content of .po files with symbols to be excluded.  */
118 message_list_ty *exclude;
119 
120 /* Force output of PO file even if empty.  */
121 static int force_po;
122 
123 /* Copyright holder of the output file and the translations.  */
124 static const char *copyright_holder = "THE PACKAGE'S COPYRIGHT HOLDER";
125 
126 /* Email address or URL for reports of bugs in msgids.  */
127 static const char *msgid_bugs_address = NULL;
128 
129 /* String used as prefix for msgstr.  */
130 static const char *msgstr_prefix;
131 
132 /* String used as suffix for msgstr.  */
133 static const char *msgstr_suffix;
134 
135 /* Directory in which output files are created.  */
136 static char *output_dir;
137 
138 /* The output syntax: .pot or .properties or .strings.  */
139 static catalog_output_format_ty output_syntax = &output_format_po;
140 
141 /* If nonzero omit header with information about this run.  */
142 int xgettext_omit_header;
143 
144 /* Table of flag_context_list_ty tables.  */
145 static flag_context_list_table_ty flag_table_c;
146 static flag_context_list_table_ty flag_table_cxx_qt;
147 static flag_context_list_table_ty flag_table_cxx_boost;
148 static flag_context_list_table_ty flag_table_objc;
149 static flag_context_list_table_ty flag_table_gcc_internal;
150 static flag_context_list_table_ty flag_table_sh;
151 static flag_context_list_table_ty flag_table_python;
152 static flag_context_list_table_ty flag_table_lisp;
153 static flag_context_list_table_ty flag_table_elisp;
154 static flag_context_list_table_ty flag_table_librep;
155 static flag_context_list_table_ty flag_table_scheme;
156 static flag_context_list_table_ty flag_table_java;
157 static flag_context_list_table_ty flag_table_csharp;
158 static flag_context_list_table_ty flag_table_awk;
159 static flag_context_list_table_ty flag_table_ycp;
160 static flag_context_list_table_ty flag_table_tcl;
161 static flag_context_list_table_ty flag_table_perl;
162 static flag_context_list_table_ty flag_table_php;
163 
164 /* If true, recognize Qt format strings.  */
165 static bool recognize_format_qt;
166 
167 /* If true, recognize Boost format strings.  */
168 static bool recognize_format_boost;
169 
170 /* Canonicalized encoding name for all input files.  */
171 const char *xgettext_global_source_encoding;
172 
173 #if HAVE_ICONV
174 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
175    ASCII or UTF-8, when this conversion is a no-op).  */
176 iconv_t xgettext_global_source_iconv;
177 #endif
178 
179 /* Canonicalized encoding name for the current input file.  */
180 const char *xgettext_current_source_encoding;
181 
182 #if HAVE_ICONV
183 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
184    ASCII or UTF-8, when this conversion is a no-op).  */
185 iconv_t xgettext_current_source_iconv;
186 #endif
187 
188 /* Long options.  */
189 static const struct option long_options[] =
190 {
191   { "add-comments", optional_argument, NULL, 'c' },
192   { "add-location", no_argument, &line_comment, 1 },
193   { "boost", no_argument, NULL, CHAR_MAX + 10 },
194   { "c++", no_argument, NULL, 'C' },
195   { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
196   { "debug", no_argument, &do_debug, 1 },
197   { "default-domain", required_argument, NULL, 'd' },
198   { "directory", required_argument, NULL, 'D' },
199   { "escape", no_argument, NULL, 'E' },
200   { "exclude-file", required_argument, NULL, 'x' },
201   { "extract-all", no_argument, NULL, 'a' },
202   { "files-from", required_argument, NULL, 'f' },
203   { "flag", required_argument, NULL, CHAR_MAX + 8 },
204   { "force-po", no_argument, &force_po, 1 },
205   { "foreign-user", no_argument, NULL, CHAR_MAX + 2 },
206   { "from-code", required_argument, NULL, CHAR_MAX + 3 },
207   { "help", no_argument, NULL, 'h' },
208   { "indent", no_argument, NULL, 'i' },
209   { "join-existing", no_argument, NULL, 'j' },
210   { "keyword", optional_argument, NULL, 'k' },
211   { "language", required_argument, NULL, 'L' },
212   { "msgid-bugs-address", required_argument, NULL, CHAR_MAX + 5 },
213   { "msgstr-prefix", optional_argument, NULL, 'm' },
214   { "msgstr-suffix", optional_argument, NULL, 'M' },
215   { "no-escape", no_argument, NULL, 'e' },
216   { "no-location", no_argument, &line_comment, 0 },
217   { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
218   { "omit-header", no_argument, &xgettext_omit_header, 1 },
219   { "output", required_argument, NULL, 'o' },
220   { "output-dir", required_argument, NULL, 'p' },
221   { "properties-output", no_argument, NULL, CHAR_MAX + 6 },
222   { "qt", no_argument, NULL, CHAR_MAX + 9 },
223   { "sort-by-file", no_argument, NULL, 'F' },
224   { "sort-output", no_argument, NULL, 's' },
225   { "strict", no_argument, NULL, 'S' },
226   { "string-limit", required_argument, NULL, 'l' },
227   { "stringtable-output", no_argument, NULL, CHAR_MAX + 7 },
228   { "trigraphs", no_argument, NULL, 'T' },
229   { "version", no_argument, NULL, 'V' },
230   { "width", required_argument, NULL, 'w', },
231   { NULL, 0, NULL, 0 }
232 };
233 
234 
235 /* The extractors must all be functions returning void and taking three
236    arguments designating the input stream and one message domain list argument
237    in which to add the messages.  */
238 typedef void (*extractor_func) (FILE *fp, const char *real_filename,
239 				const char *logical_filename,
240 				flag_context_list_table_ty *flag_table,
241 				msgdomain_list_ty *mdlp);
242 
243 typedef struct extractor_ty extractor_ty;
244 struct extractor_ty
245 {
246   extractor_func func;
247   flag_context_list_table_ty *flag_table;
248   struct formatstring_parser *formatstring_parser1;
249   struct formatstring_parser *formatstring_parser2;
250 };
251 
252 
253 /* Forward declaration of local functions.  */
254 static void usage (int status)
255 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ > 4) || __GNUC__ > 2)
256 	__attribute__ ((noreturn))
257 #endif
258 ;
259 static void read_exclusion_file (char *file_name);
260 static void extract_from_file (const char *file_name, extractor_ty extractor,
261 			       msgdomain_list_ty *mdlp);
262 static message_ty *construct_header (void);
263 static void finalize_header (msgdomain_list_ty *mdlp);
264 static extractor_ty language_to_extractor (const char *name);
265 static const char *extension_to_language (const char *extension);
266 
267 
268 int
main(int argc,char * argv[])269 main (int argc, char *argv[])
270 {
271   int optchar;
272   bool do_help = false;
273   bool do_version = false;
274   msgdomain_list_ty *mdlp;
275   bool join_existing = false;
276   bool no_default_keywords = false;
277   bool some_additional_keywords = false;
278   bool sort_by_msgid = false;
279   bool sort_by_filepos = false;
280   const char *file_name;
281   const char *files_from = NULL;
282   string_list_ty *file_list;
283   char *output_file = NULL;
284   const char *language = NULL;
285   extractor_ty extractor = { NULL, NULL, NULL, NULL };
286   int cnt;
287   size_t i;
288 
289   /* Set program name for messages.  */
290   set_program_name (argv[0]);
291   error_print_progname = maybe_print_progname;
292 
293 #ifdef HAVE_SETLOCALE
294   /* Set locale via LC_ALL.  */
295   setlocale (LC_ALL, "");
296 #endif
297 
298   /* Set the text message domain.  */
299   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
300   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
301   textdomain (PACKAGE);
302 
303   /* Ensure that write errors on stdout are detected.  */
304   atexit (close_stdout);
305 
306   /* Set initial value of variables.  */
307   default_domain = MESSAGE_DOMAIN_DEFAULT;
308   xgettext_global_source_encoding = po_charset_ascii;
309   init_flag_table_c ();
310   init_flag_table_objc ();
311   init_flag_table_gcc_internal ();
312   init_flag_table_sh ();
313   init_flag_table_python ();
314   init_flag_table_lisp ();
315   init_flag_table_elisp ();
316   init_flag_table_librep ();
317   init_flag_table_scheme ();
318   init_flag_table_java ();
319   init_flag_table_csharp ();
320   init_flag_table_awk ();
321   init_flag_table_ycp ();
322   init_flag_table_tcl ();
323   init_flag_table_perl ();
324   init_flag_table_php ();
325 
326   while ((optchar = getopt_long (argc, argv,
327 				 "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
328 				 long_options, NULL)) != EOF)
329     switch (optchar)
330       {
331       case '\0':		/* Long option.  */
332 	break;
333       case 'a':
334 	x_c_extract_all ();
335 	x_sh_extract_all ();
336 	x_python_extract_all ();
337 	x_lisp_extract_all ();
338 	x_elisp_extract_all ();
339 	x_librep_extract_all ();
340 	x_scheme_extract_all ();
341 	x_java_extract_all ();
342 	x_csharp_extract_all ();
343 	x_awk_extract_all ();
344 	x_tcl_extract_all ();
345 	x_perl_extract_all ();
346 	x_php_extract_all ();
347 	x_glade_extract_all ();
348 	break;
349       case 'c':
350 	if (optarg == NULL)
351 	  {
352 	    add_all_comments = true;
353 	    comment_tag = NULL;
354 	  }
355 	else
356 	  {
357 	    add_all_comments = false;
358 	    comment_tag = optarg;
359 	    /* We ignore leading white space.  */
360 	    while (isspace ((unsigned char) *comment_tag))
361 	      ++comment_tag;
362 	  }
363 	break;
364       case 'C':
365 	language = "C++";
366 	break;
367       case 'd':
368 	default_domain = optarg;
369 	break;
370       case 'D':
371 	dir_list_append (optarg);
372 	break;
373       case 'e':
374 	message_print_style_escape (false);
375 	break;
376       case 'E':
377 	message_print_style_escape (true);
378 	break;
379       case 'f':
380 	files_from = optarg;
381 	break;
382       case 'F':
383 	sort_by_filepos = true;
384 	break;
385       case 'h':
386 	do_help = true;
387 	break;
388       case 'i':
389 	message_print_style_indent ();
390 	break;
391       case 'j':
392 	join_existing = true;
393 	break;
394       case 'k':
395 	if (optarg != NULL && *optarg == '\0')
396 	  /* Make "--keyword=" work like "--keyword" and "-k".  */
397 	  optarg = NULL;
398 	x_c_keyword (optarg);
399 	x_objc_keyword (optarg);
400 	x_sh_keyword (optarg);
401 	x_python_keyword (optarg);
402 	x_lisp_keyword (optarg);
403 	x_elisp_keyword (optarg);
404 	x_librep_keyword (optarg);
405 	x_scheme_keyword (optarg);
406 	x_java_keyword (optarg);
407 	x_csharp_keyword (optarg);
408 	x_awk_keyword (optarg);
409 	x_tcl_keyword (optarg);
410 	x_perl_keyword (optarg);
411 	x_php_keyword (optarg);
412 	x_glade_keyword (optarg);
413 	if (optarg == NULL)
414 	  no_default_keywords = true;
415 	else
416 	  some_additional_keywords = true;
417 	break;
418       case 'l':
419 	/* Accepted for backward compatibility with 0.10.35.  */
420 	break;
421       case 'L':
422 	language = optarg;
423 	break;
424       case 'm':
425 	/* -m takes an optional argument.  If none is given "" is assumed. */
426 	msgstr_prefix = optarg == NULL ? "" : optarg;
427 	break;
428       case 'M':
429 	/* -M takes an optional argument.  If none is given "" is assumed. */
430 	msgstr_suffix = optarg == NULL ? "" : optarg;
431 	break;
432       case 'n':
433 	line_comment = 1;
434 	break;
435       case 'o':
436 	output_file = optarg;
437 	break;
438       case 'p':
439 	{
440 	  size_t len = strlen (optarg);
441 
442 	  if (output_dir != NULL)
443 	    free (output_dir);
444 
445 	  if (optarg[len - 1] == '/')
446 	    output_dir = xstrdup (optarg);
447 	  else
448 	    output_dir = xasprintf ("%s/", optarg);
449 	}
450 	break;
451       case 's':
452 	sort_by_msgid = true;
453 	break;
454       case 'S':
455 	message_print_style_uniforum ();
456 	break;
457       case 'T':
458 	x_c_trigraphs ();
459 	break;
460       case 'V':
461 	do_version = true;
462 	break;
463       case 'w':
464 	{
465 	  int value;
466 	  char *endp;
467 	  value = strtol (optarg, &endp, 10);
468 	  if (endp != optarg)
469 	    message_page_width_set (value);
470 	}
471 	break;
472       case 'x':
473 	read_exclusion_file (optarg);
474 	break;
475       case CHAR_MAX + 1:	/* --copyright-holder */
476 	copyright_holder = optarg;
477 	break;
478       case CHAR_MAX + 2:	/* --foreign-user */
479 	copyright_holder = "";
480 	break;
481       case CHAR_MAX + 3:	/* --from-code */
482 	xgettext_global_source_encoding = po_charset_canonicalize (optarg);
483 	if (xgettext_global_source_encoding == NULL)
484 	  xgettext_global_source_encoding = po_charset_ascii;
485 	break;
486       case CHAR_MAX + 4:	/* --no-wrap */
487 	message_page_width_ignore ();
488 	break;
489       case CHAR_MAX + 5:	/* --msgid-bugs-address */
490 	msgid_bugs_address = optarg;
491 	break;
492       case CHAR_MAX + 6:	/* --properties-output */
493 	output_syntax = &output_format_properties;
494 	break;
495       case CHAR_MAX + 7:	/* --stringtable-output */
496 	output_syntax = &output_format_stringtable;
497 	break;
498       case CHAR_MAX + 8:	/* --flag */
499 	xgettext_record_flag (optarg);
500 	break;
501       case CHAR_MAX + 9:	/* --qt */
502 	recognize_format_qt = true;
503 	break;
504       case CHAR_MAX + 10:	/* --boost */
505 	recognize_format_boost = true;
506 	break;
507       default:
508 	usage (EXIT_FAILURE);
509 	/* NOTREACHED */
510       }
511 
512   /* Version information requested.  */
513   if (do_version)
514     {
515       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
516       /* xgettext: no-wrap */
517       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
518 This is free software; see the source for copying conditions.  There is NO\n\
519 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
520 "),
521 	      "1995-1998, 2000-2006");
522       printf (_("Written by %s.\n"), proper_name ("Ulrich Drepper"));
523       exit (EXIT_SUCCESS);
524     }
525 
526   /* Help is requested.  */
527   if (do_help)
528     usage (EXIT_SUCCESS);
529 
530   /* Verify selected options.  */
531   if (!line_comment && sort_by_filepos)
532     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
533 	   "--no-location", "--sort-by-file");
534 
535   if (sort_by_msgid && sort_by_filepos)
536     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
537 	   "--sort-output", "--sort-by-file");
538 
539   if (recognize_format_qt && recognize_format_boost)
540     /* We cannot support both Qt and Boost format strings, because there are
541        only two formatstring parsers per language, and formatstring_c is the
542        first one for C++.  */
543     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
544 	   "--qt", "--boost");
545 
546   if (join_existing && strcmp (default_domain, "-") == 0)
547     error (EXIT_FAILURE, 0, _("\
548 --join-existing cannot be used when output is written to stdout"));
549 
550   if (no_default_keywords && !some_additional_keywords)
551     {
552       error (0, 0, _("\
553 xgettext cannot work without keywords to look for"));
554       usage (EXIT_FAILURE);
555     }
556 
557   /* Test whether we have some input files given.  */
558   if (files_from == NULL && optind >= argc)
559     {
560       error (EXIT_SUCCESS, 0, _("no input file given"));
561       usage (EXIT_FAILURE);
562     }
563 
564   /* Determine extractor from language.  */
565   if (language != NULL)
566     extractor = language_to_extractor (language);
567 
568   /* Canonize msgstr prefix/suffix.  */
569   if (msgstr_prefix != NULL && msgstr_suffix == NULL)
570     msgstr_suffix = "";
571   else if (msgstr_prefix == NULL && msgstr_suffix != NULL)
572     msgstr_prefix = "";
573 
574   /* Default output directory is the current directory.  */
575   if (output_dir == NULL)
576     output_dir = ".";
577 
578   /* Construct the name of the output file.  If the default domain has
579      the special name "-" we write to stdout.  */
580   if (output_file)
581     {
582       if (IS_ABSOLUTE_PATH (output_file) || strcmp (output_file, "-") == 0)
583 	file_name = xstrdup (output_file);
584       else
585 	/* Please do NOT add a .po suffix! */
586 	file_name = concatenated_pathname (output_dir, output_file, NULL);
587     }
588   else if (strcmp (default_domain, "-") == 0)
589     file_name = "-";
590   else
591     file_name = concatenated_pathname (output_dir, default_domain, ".po");
592 
593   /* Determine list of files we have to process.  */
594   if (files_from != NULL)
595     file_list = read_names_from_file (files_from);
596   else
597     file_list = string_list_alloc ();
598   /* Append names from command line.  */
599   for (cnt = optind; cnt < argc; ++cnt)
600     string_list_append_unique (file_list, argv[cnt]);
601 
602   /* Allocate converter from xgettext_global_source_encoding to UTF-8 (except
603      from ASCII or UTF-8, when this conversion is a no-op).  */
604   if (xgettext_global_source_encoding != po_charset_ascii
605       && xgettext_global_source_encoding != po_charset_utf8)
606     {
607 #if HAVE_ICONV
608       iconv_t cd;
609 
610       /* Avoid glibc-2.1 bug with EUC-KR.  */
611 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
612       if (strcmp (xgettext_global_source_encoding, "EUC-KR") == 0)
613 	cd = (iconv_t)(-1);
614       else
615 # endif
616       cd = iconv_open (po_charset_utf8, xgettext_global_source_encoding);
617       if (cd == (iconv_t)(-1))
618 	error (EXIT_FAILURE, 0, _("\
619 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
620 and iconv() does not support this conversion."),
621 	       xgettext_global_source_encoding, po_charset_utf8,
622 	       basename (program_name));
623       xgettext_global_source_iconv = cd;
624 #else
625       error (EXIT_FAILURE, 0, _("\
626 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
627 This version was built without iconv()."),
628 	     xgettext_global_source_encoding, po_charset_utf8,
629 	     basename (program_name));
630 #endif
631     }
632 
633   /* Allocate a message list to remember all the messages.  */
634   mdlp = msgdomain_list_alloc (true);
635 
636   /* Generate a header, so that we know how and when this PO file was
637      created.  */
638   if (!xgettext_omit_header)
639     message_list_append (mdlp->item[0]->messages, construct_header ());
640 
641   /* Read in the old messages, so that we can add to them.  */
642   if (join_existing)
643     {
644       /* Temporarily reset the directory list to empty, because file_name
645 	 is an output file and therefore should not be searched for.  */
646       void *saved_directory_list = dir_list_save_reset ();
647       extractor_ty po_extractor = { extract_po, NULL, NULL, NULL };
648 
649       extract_from_file (file_name, po_extractor, mdlp);
650       if (!is_ascii_msgdomain_list (mdlp))
651 	mdlp = iconv_msgdomain_list (mdlp, "UTF-8", file_name);
652 
653       dir_list_restore (saved_directory_list);
654     }
655 
656   /* Process all input files.  */
657   for (i = 0; i < file_list->nitems; i++)
658     {
659       const char *filename;
660       extractor_ty this_file_extractor;
661 
662       filename = file_list->item[i];
663 
664       if (extractor.func)
665 	this_file_extractor = extractor;
666       else
667 	{
668 	  const char *base;
669 	  char *reduced;
670 	  const char *extension;
671 	  const char *language;
672 
673 	  base = strrchr (filename, '/');
674 	  if (!base)
675 	    base = filename;
676 
677 	  reduced = xstrdup (base);
678 	  /* Remove a trailing ".in" - it's a generic suffix.  */
679 	  if (strlen (reduced) >= 3
680 	      && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0)
681 	    reduced[strlen (reduced) - 3] = '\0';
682 
683 	  /* Work out what the file extension is.  */
684 	  extension = strrchr (reduced, '.');
685 	  if (extension)
686 	    ++extension;
687 	  else
688 	    extension = "";
689 
690 	  /* Derive the language from the extension, and the extractor
691 	     function from the language.  */
692 	  language = extension_to_language (extension);
693 	  if (language == NULL)
694 	    {
695 	      error (0, 0, _("\
696 warning: file `%s' extension `%s' is unknown; will try C"), filename, extension);
697 	      language = "C";
698 	    }
699 	  this_file_extractor = language_to_extractor (language);
700 
701 	  free (reduced);
702 	}
703 
704       /* Extract the strings from the file.  */
705       extract_from_file (filename, this_file_extractor, mdlp);
706     }
707   string_list_free (file_list);
708 
709   /* Finalize the constructed header.  */
710   if (!xgettext_omit_header)
711     finalize_header (mdlp);
712 
713   /* Free the allocated converter.  */
714 #if HAVE_ICONV
715   if (xgettext_global_source_encoding != po_charset_ascii
716       && xgettext_global_source_encoding != po_charset_utf8)
717     iconv_close (xgettext_global_source_iconv);
718 #endif
719 
720   /* Sorting the list of messages.  */
721   if (sort_by_filepos)
722     msgdomain_list_sort_by_filepos (mdlp);
723   else if (sort_by_msgid)
724     msgdomain_list_sort_by_msgid (mdlp);
725 
726   /* Write the PO file.  */
727   msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug);
728 
729   exit (EXIT_SUCCESS);
730 }
731 
732 
733 /* Display usage information and exit.  */
734 static void
usage(int status)735 usage (int status)
736 {
737   if (status != EXIT_SUCCESS)
738     fprintf (stderr, _("Try `%s --help' for more information.\n"),
739 	     program_name);
740   else
741     {
742       printf (_("\
743 Usage: %s [OPTION] [INPUTFILE]...\n\
744 "), program_name);
745       printf ("\n");
746       printf (_("\
747 Extract translatable strings from given input files.\n\
748 "));
749       printf ("\n");
750       /* xgettext: no-wrap */
751       printf (_("\
752 Mandatory arguments to long options are mandatory for short options too.\n\
753 Similarly for optional arguments.\n\
754 "));
755       printf ("\n");
756       printf (_("\
757 Input file location:\n"));
758       printf (_("\
759   INPUTFILE ...               input files\n"));
760       printf (_("\
761   -f, --files-from=FILE       get list of input files from FILE\n"));
762       printf (_("\
763   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
764       printf (_("\
765 If input file is -, standard input is read.\n"));
766       printf ("\n");
767       printf (_("\
768 Output file location:\n"));
769       printf (_("\
770   -d, --default-domain=NAME   use NAME.po for output (instead of messages.po)\n"));
771       printf (_("\
772   -o, --output=FILE           write output to specified file\n"));
773       printf (_("\
774   -p, --output-dir=DIR        output files will be placed in directory DIR\n"));
775       printf (_("\
776 If output file is -, output is written to standard output.\n"));
777       printf ("\n");
778       printf (_("\
779 Choice of input file language:\n"));
780       printf (_("\
781   -L, --language=NAME         recognise the specified language\n\
782                                 (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
783                                 EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
784                                 JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
785                                 GCC-source, NXStringTable, RST, Glade)\n"));
786       printf (_("\
787   -C, --c++                   shorthand for --language=C++\n"));
788       printf (_("\
789 By default the language is guessed depending on the input file name extension.\n"));
790       printf ("\n");
791       printf (_("\
792 Input file interpretation:\n"));
793       printf (_("\
794       --from-code=NAME        encoding of input files\n\
795                                 (except for Python, Tcl, Glade)\n"));
796       printf (_("\
797 By default the input files are assumed to be in ASCII.\n"));
798       printf ("\n");
799       printf (_("\
800 Operation mode:\n"));
801       printf (_("\
802   -j, --join-existing         join messages with existing file\n"));
803       printf (_("\
804   -x, --exclude-file=FILE.po  entries from FILE.po are not extracted\n"));
805       printf (_("\
806   -c, --add-comments[=TAG]    place comment block with TAG (or those\n\
807                               preceding keyword lines) in output file\n"));
808       printf ("\n");
809       printf (_("\
810 Language specific options:\n"));
811       printf (_("\
812   -a, --extract-all           extract all strings\n"));
813       printf (_("\
814                                 (only languages C, C++, ObjectiveC, Shell,\n\
815                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
816                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
817       printf (_("\
818   -k, --keyword[=WORD]        additional keyword to be looked for (without\n\
819                               WORD means not to use default keywords)\n"));
820       printf (_("\
821                                 (only languages C, C++, ObjectiveC, Shell,\n\
822                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
823                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
824       printf (_("\
825       --flag=WORD:ARG:FLAG    additional flag for strings inside the argument\n\
826                               number ARG of keyword WORD\n"));
827       printf (_("\
828                                 (only languages C, C++, ObjectiveC, Shell,\n\
829                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
830                                 C#, awk, YCP, Tcl, Perl, PHP, GCC-source)\n"));
831       printf (_("\
832   -T, --trigraphs             understand ANSI C trigraphs for input\n"));
833       printf (_("\
834                                 (only languages C, C++, ObjectiveC)\n"));
835       printf (_("\
836       --qt                    recognize Qt format strings\n"));
837       printf (_("\
838                                 (only language C++)\n"));
839       printf (_("\
840       --boost                 recognize Boost format strings\n"));
841       printf (_("\
842                                 (only language C++)\n"));
843       printf (_("\
844       --debug                 more detailed formatstring recognition result\n"));
845       printf ("\n");
846       printf (_("\
847 Output details:\n"));
848       printf (_("\
849   -e, --no-escape             do not use C escapes in output (default)\n"));
850       printf (_("\
851   -E, --escape                use C escapes in output, no extended chars\n"));
852       printf (_("\
853       --force-po              write PO file even if empty\n"));
854       printf (_("\
855   -i, --indent                write the .po file using indented style\n"));
856       printf (_("\
857       --no-location           do not write '#: filename:line' lines\n"));
858       printf (_("\
859   -n, --add-location          generate '#: filename:line' lines (default)\n"));
860       printf (_("\
861       --strict                write out strict Uniforum conforming .po file\n"));
862       printf (_("\
863       --properties-output     write out a Java .properties file\n"));
864       printf (_("\
865       --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
866       printf (_("\
867   -w, --width=NUMBER          set output page width\n"));
868       printf (_("\
869       --no-wrap               do not break long message lines, longer than\n\
870                               the output page width, into several lines\n"));
871       printf (_("\
872   -s, --sort-output           generate sorted output\n"));
873       printf (_("\
874   -F, --sort-by-file          sort output by file location\n"));
875       printf (_("\
876       --omit-header           don't write header with `msgid \"\"' entry\n"));
877       printf (_("\
878       --copyright-holder=STRING  set copyright holder in output\n"));
879       printf (_("\
880       --foreign-user          omit FSF copyright in output for foreign user\n"));
881       printf (_("\
882       --msgid-bugs-address=EMAIL@ADDRESS  set report address for msgid bugs\n"));
883       printf (_("\
884   -m, --msgstr-prefix[=STRING]  use STRING or \"\" as prefix for msgstr entries\n"));
885       printf (_("\
886   -M, --msgstr-suffix[=STRING]  use STRING or \"\" as suffix for msgstr entries\n"));
887       printf ("\n");
888       printf (_("\
889 Informative output:\n"));
890       printf (_("\
891   -h, --help                  display this help and exit\n"));
892       printf (_("\
893   -V, --version               output version information and exit\n"));
894       printf ("\n");
895       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
896 	     stdout);
897     }
898 
899   exit (status);
900 }
901 
902 
903 static void
exclude_directive_domain(abstract_catalog_reader_ty * pop,char * name)904 exclude_directive_domain (abstract_catalog_reader_ty *pop, char *name)
905 {
906   po_gram_error_at_line (&gram_pos,
907 			 _("this file may not contain domain directives"));
908 }
909 
910 
911 static void
exclude_directive_message(abstract_catalog_reader_ty * pop,char * msgctxt,char * msgid,lex_pos_ty * msgid_pos,char * msgid_plural,char * msgstr,size_t msgstr_len,lex_pos_ty * msgstr_pos,char * prev_msgctxt,char * prev_msgid,char * prev_msgid_plural,bool force_fuzzy,bool obsolete)912 exclude_directive_message (abstract_catalog_reader_ty *pop,
913 			   char *msgctxt,
914 			   char *msgid,
915 			   lex_pos_ty *msgid_pos,
916 			   char *msgid_plural,
917 			   char *msgstr, size_t msgstr_len,
918 			   lex_pos_ty *msgstr_pos,
919 			   char *prev_msgctxt,
920 			   char *prev_msgid,
921 			   char *prev_msgid_plural,
922 			   bool force_fuzzy, bool obsolete)
923 {
924   message_ty *mp;
925 
926   /* See if this message ID has been seen before.  */
927   if (exclude == NULL)
928     exclude = message_list_alloc (true);
929   mp = message_list_search (exclude, msgctxt, msgid);
930   if (mp != NULL)
931     free (msgid);
932   else
933     {
934       mp = message_alloc (msgctxt, msgid, msgid_plural, "", 1, msgstr_pos);
935       /* Do not free msgid.  */
936       message_list_append (exclude, mp);
937     }
938 
939   /* All we care about is the msgid.  Throw the msgstr away.
940      Don't even check for duplicate msgids.  */
941   free (msgstr);
942 }
943 
944 
945 /* So that the one parser can be used for multiple programs, and also
946    use good data hiding and encapsulation practices, an object
947    oriented approach has been taken.  An object instance is allocated,
948    and all actions resulting from the parse will be through
949    invocations of method functions of that object.  */
950 
951 static abstract_catalog_reader_class_ty exclude_methods =
952 {
953   sizeof (abstract_catalog_reader_ty),
954   NULL, /* constructor */
955   NULL, /* destructor */
956   NULL, /* parse_brief */
957   NULL, /* parse_debrief */
958   exclude_directive_domain,
959   exclude_directive_message,
960   NULL, /* comment */
961   NULL, /* comment_dot */
962   NULL, /* comment_filepos */
963   NULL, /* comment_special */
964 };
965 
966 
967 static void
read_exclusion_file(char * filename)968 read_exclusion_file (char *filename)
969 {
970   char *real_filename;
971   FILE *fp = open_catalog_file (filename, &real_filename, true);
972   abstract_catalog_reader_ty *pop;
973 
974   pop = catalog_reader_alloc (&exclude_methods);
975   catalog_reader_parse (pop, fp, real_filename, filename, &input_format_po);
976   catalog_reader_free (pop);
977 
978   if (fp != stdin)
979     fclose (fp);
980 }
981 
982 
983 void
split_keywordspec(const char * spec,const char ** endp,struct callshape * shapep)984 split_keywordspec (const char *spec,
985 		   const char **endp, struct callshape *shapep)
986 {
987   const char *p;
988   int argnum1 = 0;
989   int argnum2 = 0;
990   int argnumc = 0;
991   bool argnum1_glib_context = false;
992   bool argnum2_glib_context = false;
993   int argtotal = 0;
994   string_list_ty xcomments;
995 
996   string_list_init (&xcomments);
997 
998   /* Start parsing from the end.  */
999   p = spec + strlen (spec);
1000   while (p > spec)
1001     {
1002       if (isdigit ((unsigned char) p[-1])
1003 	  || ((p[-1] == 'c' || p[-1] == 'g' || p[-1] == 't')
1004 	      && p - 1 > spec && isdigit ((unsigned char) p[-2])))
1005 	{
1006 	  bool contextp = (p[-1] == 'c');
1007 	  bool glibp = (p[-1] == 'g');
1008 	  bool totalp = (p[-1] == 't');
1009 
1010 	  do
1011 	    p--;
1012 	  while (p > spec && isdigit ((unsigned char) p[-1]));
1013 
1014 	  if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1015 	    {
1016 	      char *dummy;
1017 	      int arg = strtol (p, &dummy, 10);
1018 
1019 	      if (contextp)
1020 		{
1021 		  if (argnumc != 0)
1022 		    /* Only one context argument can be given.  */
1023 		    break;
1024 		  argnumc = arg;
1025 		}
1026 	      else if (totalp)
1027 		{
1028 		  if (argtotal != 0)
1029 		    /* Only one total number of arguments can be given.  */
1030 		    break;
1031 		  argtotal = arg;
1032 		}
1033 	      else
1034 		{
1035 		  if (argnum2 != 0)
1036 		    /* At most two normal arguments can be given.  */
1037 		    break;
1038 		  argnum2 = argnum1;
1039 		  argnum2_glib_context = argnum1_glib_context;
1040 		  argnum1 = arg;
1041 		  argnum1_glib_context = glibp;
1042 		}
1043 	    }
1044 	  else
1045 	    break;
1046 	}
1047       else if (p[-1] == '"')
1048 	{
1049 	  const char *xcomment_end;
1050 
1051 	  p--;
1052 	  xcomment_end = p;
1053 
1054 	  while (p > spec && p[-1] != '"')
1055 	    p--;
1056 
1057 	  if (p > spec /* && p[-1] == '"' */)
1058 	    {
1059 	      const char *xcomment_start;
1060 
1061 	      xcomment_start = p;
1062 	      p--;
1063 	      if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1064 		{
1065 		  size_t xcomment_len = xcomment_end - xcomment_start;
1066 		  char *xcomment = (char *) xmalloc (xcomment_len + 1);
1067 
1068 		  memcpy (xcomment, xcomment_start, xcomment_len);
1069 		  xcomment[xcomment_len] = '\0';
1070 		  string_list_append (&xcomments, xcomment);
1071 		}
1072 	      else
1073 		break;
1074 	    }
1075 	  else
1076 	    break;
1077 	}
1078       else
1079 	break;
1080 
1081       /* Here an element of the comma-separated list has been parsed.  */
1082       if (!(p > spec && (p[-1] == ',' || p[-1] == ':')))
1083 	abort ();
1084       p--;
1085       if (*p == ':')
1086 	{
1087 	  size_t i;
1088 
1089 	  if (argnum1 == 0 && argnum2 == 0)
1090 	    /* At least one non-context argument must be given.  */
1091 	    break;
1092 	  if (argnumc != 0
1093 	      && (argnum1_glib_context || argnum2_glib_context))
1094 	    /* Incompatible ways to specify the context.  */
1095 	    break;
1096 	  *endp = p;
1097 	  shapep->argnum1 = (argnum1 > 0 ? argnum1 : 1);
1098 	  shapep->argnum2 = argnum2;
1099 	  shapep->argnumc = argnumc;
1100 	  shapep->argnum1_glib_context = argnum1_glib_context;
1101 	  shapep->argnum2_glib_context = argnum2_glib_context;
1102 	  shapep->argtotal = argtotal;
1103 	  /* Reverse the order of the xcomments.  */
1104 	  string_list_init (&shapep->xcomments);
1105 	  for (i = xcomments.nitems; i > 0; )
1106 	    string_list_append (&shapep->xcomments, xcomments.item[--i]);
1107 	  string_list_destroy (&xcomments);
1108 	  return;
1109 	}
1110     }
1111 
1112   /* Couldn't parse the desired syntax.  */
1113   *endp = spec + strlen (spec);
1114   shapep->argnum1 = 1;
1115   shapep->argnum2 = 0;
1116   shapep->argnumc = 0;
1117   shapep->argnum1_glib_context = false;
1118   shapep->argnum2_glib_context = false;
1119   shapep->argtotal = 0;
1120   string_list_init (&shapep->xcomments);
1121   string_list_destroy (&xcomments);
1122 }
1123 
1124 
1125 void
insert_keyword_callshape(hash_table * table,const char * keyword,size_t keyword_len,const struct callshape * shape)1126 insert_keyword_callshape (hash_table *table,
1127 			  const char *keyword, size_t keyword_len,
1128 			  const struct callshape *shape)
1129 {
1130   void *old_value;
1131 
1132   if (hash_find_entry (table, keyword, keyword_len, &old_value))
1133     {
1134       /* Create a one-element 'struct callshapes'.  */
1135       struct callshapes *shapes =
1136 	(struct callshapes *) xmalloc (sizeof (struct callshapes));
1137       shapes->nshapes = 1;
1138       shapes->shapes[0] = *shape;
1139       keyword =
1140 	(const char *) hash_insert_entry (table, keyword, keyword_len, shapes);
1141       if (keyword == NULL)
1142 	abort ();
1143       shapes->keyword = keyword;
1144       shapes->keyword_len = keyword_len;
1145     }
1146   else
1147     {
1148       /* Found a 'struct callshapes'.  See whether it already contains the
1149 	 desired shape.  */
1150       struct callshapes *old_shapes = (struct callshapes *) old_value;
1151       bool found;
1152       size_t i;
1153 
1154       found = false;
1155       for (i = 0; i < old_shapes->nshapes; i++)
1156 	if (old_shapes->shapes[i].argnum1 == shape->argnum1
1157 	    && old_shapes->shapes[i].argnum2 == shape->argnum2
1158 	    && old_shapes->shapes[i].argnumc == shape->argnumc
1159 	    && old_shapes->shapes[i].argnum1_glib_context
1160 	       == shape->argnum1_glib_context
1161 	    && old_shapes->shapes[i].argnum2_glib_context
1162 	       == shape->argnum2_glib_context
1163 	    && old_shapes->shapes[i].argtotal == shape->argtotal)
1164 	  {
1165 	    old_shapes->shapes[i].xcomments = shape->xcomments;
1166 	    found = true;
1167 	    break;
1168 	  }
1169 
1170       if (!found)
1171 	{
1172 	  /* Replace the existing 'struct callshapes' with a new one.  */
1173 	  struct callshapes *shapes =
1174 	    (struct callshapes *)
1175 	    xmalloc (sizeof (struct callshapes)
1176 		     + old_shapes->nshapes * sizeof (struct callshape));
1177 
1178 	  shapes->keyword = old_shapes->keyword;
1179 	  shapes->keyword_len = old_shapes->keyword_len;
1180 	  shapes->nshapes = old_shapes->nshapes + 1;
1181 	  for (i = 0; i < old_shapes->nshapes; i++)
1182 	    shapes->shapes[i] = old_shapes->shapes[i];
1183 	  shapes->shapes[i] = *shape;
1184 	  if (hash_set_value (table, keyword, keyword_len, shapes))
1185 	    abort ();
1186 	  free (old_shapes);
1187 	}
1188     }
1189 }
1190 
1191 
1192 /* Null context.  */
1193 flag_context_ty null_context = { undecided, false, undecided, false };
1194 
1195 /* Transparent context.  */
1196 flag_context_ty passthrough_context = { undecided, true, undecided, true };
1197 
1198 
1199 flag_context_ty
inherited_context(flag_context_ty outer_context,flag_context_ty modifier_context)1200 inherited_context (flag_context_ty outer_context,
1201 		   flag_context_ty modifier_context)
1202 {
1203   flag_context_ty result = modifier_context;
1204 
1205   if (result.pass_format1)
1206     {
1207       result.is_format1 = outer_context.is_format1;
1208       result.pass_format1 = false;
1209     }
1210   if (result.pass_format2)
1211     {
1212       result.is_format2 = outer_context.is_format2;
1213       result.pass_format2 = false;
1214     }
1215   return result;
1216 }
1217 
1218 
1219 /* Null context list iterator.  */
1220 flag_context_list_iterator_ty null_context_list_iterator = { 1, NULL };
1221 
1222 /* Transparent context list iterator.  */
1223 static flag_context_list_ty passthrough_context_circular_list =
1224   {
1225     1,
1226     { undecided, true, undecided, true },
1227     &passthrough_context_circular_list
1228   };
1229 flag_context_list_iterator_ty passthrough_context_list_iterator =
1230   {
1231     1,
1232     &passthrough_context_circular_list
1233   };
1234 
1235 
1236 flag_context_list_iterator_ty
flag_context_list_iterator(flag_context_list_ty * list)1237 flag_context_list_iterator (flag_context_list_ty *list)
1238 {
1239   flag_context_list_iterator_ty result;
1240 
1241   result.argnum = 1;
1242   result.head = list;
1243   return result;
1244 }
1245 
1246 
1247 flag_context_ty
flag_context_list_iterator_advance(flag_context_list_iterator_ty * iter)1248 flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter)
1249 {
1250   if (iter->head == NULL)
1251     return null_context;
1252   if (iter->argnum == iter->head->argnum)
1253     {
1254       flag_context_ty result = iter->head->flags;
1255 
1256       /* Special casing of circular list.  */
1257       if (iter->head != iter->head->next)
1258 	{
1259 	  iter->head = iter->head->next;
1260 	  iter->argnum++;
1261 	}
1262 
1263       return result;
1264     }
1265   else
1266     {
1267       iter->argnum++;
1268       return null_context;
1269     }
1270 }
1271 
1272 
1273 flag_context_list_ty *
flag_context_list_table_lookup(flag_context_list_table_ty * flag_table,const void * key,size_t keylen)1274 flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
1275 				const void *key, size_t keylen)
1276 {
1277   void *entry;
1278 
1279   if (flag_table->table != NULL
1280       && hash_find_entry (flag_table, key, keylen, &entry) == 0)
1281     return (flag_context_list_ty *) entry;
1282   else
1283     return NULL;
1284 }
1285 
1286 
1287 static void
flag_context_list_table_insert(flag_context_list_table_ty * table,unsigned int index,const char * name_start,const char * name_end,int argnum,enum is_format value,bool pass)1288 flag_context_list_table_insert (flag_context_list_table_ty *table,
1289 				unsigned int index,
1290 				const char *name_start, const char *name_end,
1291 				int argnum, enum is_format value, bool pass)
1292 {
1293   char *allocated_name = NULL;
1294 
1295   if (table == &flag_table_lisp)
1296     {
1297       /* Convert NAME to upper case.  */
1298       size_t name_len = name_end - name_start;
1299       char *name = allocated_name = (char *) xallocsa (name_len);
1300       size_t i;
1301 
1302       for (i = 0; i < name_len; i++)
1303 	name[i] = (name_start[i] >= 'a' && name_start[i] <= 'z'
1304 		   ? name_start[i] - 'a' + 'A'
1305 		   : name_start[i]);
1306       name_start = name;
1307       name_end = name + name_len;
1308     }
1309   else if (table == &flag_table_tcl)
1310     {
1311       /* Remove redundant "::" prefix.  */
1312       if (name_end - name_start > 2
1313 	  && name_start[0] == ':' && name_start[1] == ':')
1314 	name_start += 2;
1315     }
1316 
1317   /* Insert the pair (VALUE, PASS) at INDEX in the element numbered ARGNUM
1318      of the list corresponding to NAME in the TABLE.  */
1319   if (table->table == NULL)
1320     hash_init (table, 100);
1321   {
1322     void *entry;
1323 
1324     if (hash_find_entry (table, name_start, name_end - name_start, &entry) != 0)
1325       {
1326 	/* Create new hash table entry.  */
1327 	flag_context_list_ty *list =
1328 	  (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1329 	list->argnum = argnum;
1330 	memset (&list->flags, '\0', sizeof (list->flags));
1331 	switch (index)
1332 	  {
1333 	  case 0:
1334 	    list->flags.is_format1 = value;
1335 	    list->flags.pass_format1 = pass;
1336 	    break;
1337 	  case 1:
1338 	    list->flags.is_format2 = value;
1339 	    list->flags.pass_format2 = pass;
1340 	    break;
1341 	  default:
1342 	    abort ();
1343 	  }
1344 	list->next = NULL;
1345 	hash_insert_entry (table, name_start, name_end - name_start, list);
1346       }
1347     else
1348       {
1349 	flag_context_list_ty *list = (flag_context_list_ty *)entry;
1350 	flag_context_list_ty **lastp = NULL;
1351 
1352 	while (list != NULL && list->argnum < argnum)
1353 	  {
1354 	    lastp = &list->next;
1355 	    list = *lastp;
1356 	  }
1357 	if (list != NULL && list->argnum == argnum)
1358 	  {
1359 	    /* Add this flag to the current argument number.  */
1360 	    switch (index)
1361 	      {
1362 	      case 0:
1363 		list->flags.is_format1 = value;
1364 		list->flags.pass_format1 = pass;
1365 		break;
1366 	      case 1:
1367 		list->flags.is_format2 = value;
1368 		list->flags.pass_format2 = pass;
1369 		break;
1370 	      default:
1371 		abort ();
1372 	      }
1373 	  }
1374 	else if (lastp != NULL)
1375 	  {
1376 	    /* Add a new list entry for this argument number.  */
1377 	    list =
1378 	      (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1379 	    list->argnum = argnum;
1380 	    memset (&list->flags, '\0', sizeof (list->flags));
1381 	    switch (index)
1382 	      {
1383 	      case 0:
1384 		list->flags.is_format1 = value;
1385 		list->flags.pass_format1 = pass;
1386 		break;
1387 	      case 1:
1388 		list->flags.is_format2 = value;
1389 		list->flags.pass_format2 = pass;
1390 		break;
1391 	      default:
1392 		abort ();
1393 	      }
1394 	    list->next = *lastp;
1395 	    *lastp = list;
1396 	  }
1397 	else
1398 	  {
1399 	    /* Add a new list entry for this argument number, at the beginning
1400 	       of the list.  Since we don't have an API for replacing the
1401 	       value of a key in the hash table, we have to copy the first
1402 	       list element.  */
1403 	    flag_context_list_ty *copy =
1404 	      (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1405 	    *copy = *list;
1406 
1407 	    list->argnum = argnum;
1408 	    memset (&list->flags, '\0', sizeof (list->flags));
1409 	    switch (index)
1410 	      {
1411 	      case 0:
1412 		list->flags.is_format1 = value;
1413 		list->flags.pass_format1 = pass;
1414 		break;
1415 	      case 1:
1416 		list->flags.is_format2 = value;
1417 		list->flags.pass_format2 = pass;
1418 		break;
1419 	      default:
1420 		abort ();
1421 	      }
1422 	    list->next = copy;
1423 	  }
1424       }
1425   }
1426 
1427   if (allocated_name != NULL)
1428     freesa (allocated_name);
1429 }
1430 
1431 
1432 void
xgettext_record_flag(const char * optionstring)1433 xgettext_record_flag (const char *optionstring)
1434 {
1435   /* Check the string has at least two colons.  (Colons in the name are
1436      allowed, needed for the Lisp and the Tcl backends.)  */
1437   const char *colon1;
1438   const char *colon2;
1439 
1440   for (colon2 = optionstring + strlen (optionstring); ; )
1441     {
1442       if (colon2 == optionstring)
1443 	goto err;
1444       colon2--;
1445       if (*colon2 == ':')
1446 	break;
1447     }
1448   for (colon1 = colon2; ; )
1449     {
1450       if (colon1 == optionstring)
1451 	goto err;
1452       colon1--;
1453       if (*colon1 == ':')
1454 	break;
1455     }
1456   {
1457     const char *name_start = optionstring;
1458     const char *name_end = colon1;
1459     const char *argnum_start = colon1 + 1;
1460     const char *argnum_end = colon2;
1461     const char *flag = colon2 + 1;
1462     int argnum;
1463 
1464     /* Check the parts' syntax.  */
1465     if (name_end == name_start)
1466       goto err;
1467     if (argnum_end == argnum_start)
1468       goto err;
1469     {
1470       char *endp;
1471       argnum = strtol (argnum_start, &endp, 10);
1472       if (endp != argnum_end)
1473 	goto err;
1474     }
1475     if (argnum <= 0)
1476       goto err;
1477 
1478     /* Analyze the flag part.  */
1479     {
1480       bool pass;
1481 
1482       pass = false;
1483       if (strlen (flag) >= 5 && memcmp (flag, "pass-", 5) == 0)
1484 	{
1485 	  pass = true;
1486 	  flag += 5;
1487 	}
1488 
1489       /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap"
1490 	 here - it has no sense.  */
1491       if (strlen (flag) >= 7
1492 	  && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
1493 	{
1494 	  const char *p;
1495 	  size_t n;
1496 	  enum is_format value;
1497 	  size_t type;
1498 
1499 	  p = flag;
1500 	  n = strlen (flag) - 7;
1501 
1502 	  if (n >= 3 && memcmp (p, "no-", 3) == 0)
1503 	    {
1504 	      p += 3;
1505 	      n -= 3;
1506 	      value = no;
1507 	    }
1508 	  else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
1509 	    {
1510 	      p += 9;
1511 	      n -= 9;
1512 	      value = possible;
1513 	    }
1514 	  else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
1515 	    {
1516 	      p += 11;
1517 	      n -= 11;
1518 	      value = impossible;
1519 	    }
1520 	  else
1521 	    value = yes_according_to_context;
1522 
1523 	  for (type = 0; type < NFORMATS; type++)
1524 	    if (strlen (format_language[type]) == n
1525 		&& memcmp (format_language[type], p, n) == 0)
1526 	      {
1527 		switch (type)
1528 		  {
1529 		  case format_c:
1530 		    flag_context_list_table_insert (&flag_table_c, 0,
1531 						    name_start, name_end,
1532 						    argnum, value, pass);
1533 		    flag_context_list_table_insert (&flag_table_cxx_qt, 0,
1534 						    name_start, name_end,
1535 						    argnum, value, pass);
1536 		    flag_context_list_table_insert (&flag_table_cxx_boost, 0,
1537 						    name_start, name_end,
1538 						    argnum, value, pass);
1539 		    flag_context_list_table_insert (&flag_table_objc, 0,
1540 						    name_start, name_end,
1541 						    argnum, value, pass);
1542 		    break;
1543 		  case format_objc:
1544 		    flag_context_list_table_insert (&flag_table_objc, 1,
1545 						    name_start, name_end,
1546 						    argnum, value, pass);
1547 		    break;
1548 		  case format_sh:
1549 		    flag_context_list_table_insert (&flag_table_sh, 0,
1550 						    name_start, name_end,
1551 						    argnum, value, pass);
1552 		    break;
1553 		  case format_python:
1554 		    flag_context_list_table_insert (&flag_table_python, 0,
1555 						    name_start, name_end,
1556 						    argnum, value, pass);
1557 		    break;
1558 		  case format_lisp:
1559 		    flag_context_list_table_insert (&flag_table_lisp, 0,
1560 						    name_start, name_end,
1561 						    argnum, value, pass);
1562 		    break;
1563 		  case format_elisp:
1564 		    flag_context_list_table_insert (&flag_table_elisp, 0,
1565 						    name_start, name_end,
1566 						    argnum, value, pass);
1567 		    break;
1568 		  case format_librep:
1569 		    flag_context_list_table_insert (&flag_table_librep, 0,
1570 						    name_start, name_end,
1571 						    argnum, value, pass);
1572 		    break;
1573 		  case format_scheme:
1574 		    flag_context_list_table_insert (&flag_table_scheme, 0,
1575 						    name_start, name_end,
1576 						    argnum, value, pass);
1577 		    break;
1578 		  case format_smalltalk:
1579 		    break;
1580 		  case format_java:
1581 		    flag_context_list_table_insert (&flag_table_java, 0,
1582 						    name_start, name_end,
1583 						    argnum, value, pass);
1584 		    break;
1585 		  case format_csharp:
1586 		    flag_context_list_table_insert (&flag_table_csharp, 0,
1587 						    name_start, name_end,
1588 						    argnum, value, pass);
1589 		    break;
1590 		  case format_awk:
1591 		    flag_context_list_table_insert (&flag_table_awk, 0,
1592 						    name_start, name_end,
1593 						    argnum, value, pass);
1594 		    break;
1595 		  case format_pascal:
1596 		    break;
1597 		  case format_ycp:
1598 		    flag_context_list_table_insert (&flag_table_ycp, 0,
1599 						    name_start, name_end,
1600 						    argnum, value, pass);
1601 		    break;
1602 		  case format_tcl:
1603 		    flag_context_list_table_insert (&flag_table_tcl, 0,
1604 						    name_start, name_end,
1605 						    argnum, value, pass);
1606 		    break;
1607 		  case format_perl:
1608 		    flag_context_list_table_insert (&flag_table_perl, 0,
1609 						    name_start, name_end,
1610 						    argnum, value, pass);
1611 		    break;
1612 		  case format_perl_brace:
1613 		    flag_context_list_table_insert (&flag_table_perl, 1,
1614 						    name_start, name_end,
1615 						    argnum, value, pass);
1616 		    break;
1617 		  case format_php:
1618 		    flag_context_list_table_insert (&flag_table_php, 0,
1619 						    name_start, name_end,
1620 						    argnum, value, pass);
1621 		    break;
1622 		  case format_gcc_internal:
1623 		    flag_context_list_table_insert (&flag_table_gcc_internal, 0,
1624 						    name_start, name_end,
1625 						    argnum, value, pass);
1626 		    break;
1627 		  case format_qt:
1628 		    flag_context_list_table_insert (&flag_table_cxx_qt, 1,
1629 						    name_start, name_end,
1630 						    argnum, value, pass);
1631 		    break;
1632 		  case format_boost:
1633 		    flag_context_list_table_insert (&flag_table_cxx_boost, 1,
1634 						    name_start, name_end,
1635 						    argnum, value, pass);
1636 		    break;
1637 		  default:
1638 		    abort ();
1639 		  }
1640 		return;
1641 	      }
1642 	  /* If the flag is not among the valid values, the optionstring is
1643 	     invalid.  */
1644 	}
1645     }
1646   }
1647 
1648 err:
1649   error (EXIT_FAILURE, 0, _("\
1650 A --flag argument doesn't have the <keyword>:<argnum>:[pass-]<flag> syntax: %s"),
1651 	 optionstring);
1652 }
1653 
1654 
1655 /* Comment handling: There is a list of automatic comments that may be appended
1656    to the next message.  Used by remember_a_message().  */
1657 
1658 static string_list_ty *comment;
1659 
1660 static void
xgettext_comment_add(const char * str)1661 xgettext_comment_add (const char *str)
1662 {
1663   if (comment == NULL)
1664     comment = string_list_alloc ();
1665   string_list_append (comment, str);
1666 }
1667 
1668 static const char *
xgettext_comment(size_t n)1669 xgettext_comment (size_t n)
1670 {
1671   if (comment == NULL || n >= comment->nitems)
1672     return NULL;
1673   return comment->item[n];
1674 }
1675 
1676 static void
xgettext_comment_reset()1677 xgettext_comment_reset ()
1678 {
1679   if (comment != NULL)
1680     {
1681       string_list_free (comment);
1682       comment = NULL;
1683     }
1684 }
1685 
1686 
1687 refcounted_string_list_ty *savable_comment;
1688 
1689 void
savable_comment_add(const char * str)1690 savable_comment_add (const char *str)
1691 {
1692   if (savable_comment == NULL)
1693     {
1694       savable_comment =
1695 	(refcounted_string_list_ty *) xmalloc (sizeof (*savable_comment));
1696       savable_comment->refcount = 1;
1697       string_list_init (&savable_comment->contents);
1698     }
1699   else if (savable_comment->refcount > 1)
1700     {
1701       /* Unshare the list by making copies.  */
1702       struct string_list_ty *oldcontents;
1703       size_t i;
1704 
1705       savable_comment->refcount--;
1706       oldcontents = &savable_comment->contents;
1707 
1708       savable_comment =
1709 	(refcounted_string_list_ty *) xmalloc (sizeof (*savable_comment));
1710       savable_comment->refcount = 1;
1711       string_list_init (&savable_comment->contents);
1712       for (i = 0; i < oldcontents->nitems; i++)
1713 	string_list_append (&savable_comment->contents, oldcontents->item[i]);
1714     }
1715   string_list_append (&savable_comment->contents, str);
1716 }
1717 
1718 void
savable_comment_reset()1719 savable_comment_reset ()
1720 {
1721   drop_reference (savable_comment);
1722   savable_comment = NULL;
1723 }
1724 
1725 static void
savable_comment_to_xgettext_comment(refcounted_string_list_ty * rslp)1726 savable_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
1727 {
1728   xgettext_comment_reset ();
1729   if (rslp != NULL)
1730     {
1731       size_t i;
1732 
1733       for (i = 0; i < rslp->contents.nitems; i++)
1734 	xgettext_comment_add (rslp->contents.item[i]);
1735     }
1736 }
1737 
1738 
1739 
1740 static FILE *
xgettext_open(const char * fn,char ** logical_file_name_p,char ** real_file_name_p)1741 xgettext_open (const char *fn,
1742 	       char **logical_file_name_p, char **real_file_name_p)
1743 {
1744   FILE *fp;
1745   char *new_name;
1746   char *logical_file_name;
1747 
1748   if (strcmp (fn, "-") == 0)
1749     {
1750       new_name = xstrdup (_("standard input"));
1751       logical_file_name = xstrdup (new_name);
1752       fp = stdin;
1753     }
1754   else if (IS_ABSOLUTE_PATH (fn))
1755     {
1756       new_name = xstrdup (fn);
1757       fp = fopen (fn, "r");
1758       if (fp == NULL)
1759 	error (EXIT_FAILURE, errno, _("\
1760 error while opening \"%s\" for reading"), fn);
1761       logical_file_name = xstrdup (new_name);
1762     }
1763   else
1764     {
1765       int j;
1766 
1767       for (j = 0; ; ++j)
1768 	{
1769 	  const char *dir = dir_list_nth (j);
1770 
1771 	  if (dir == NULL)
1772 	    error (EXIT_FAILURE, ENOENT, _("\
1773 error while opening \"%s\" for reading"), fn);
1774 
1775 	  new_name = concatenated_pathname (dir, fn, NULL);
1776 
1777 	  fp = fopen (new_name, "r");
1778 	  if (fp != NULL)
1779 	    break;
1780 
1781 	  if (errno != ENOENT)
1782 	    error (EXIT_FAILURE, errno, _("\
1783 error while opening \"%s\" for reading"), new_name);
1784 	  free (new_name);
1785 	}
1786 
1787       /* Note that the NEW_NAME variable contains the actual file name
1788 	 and the logical file name is what is reported by xgettext.  In
1789 	 this case NEW_NAME is set to the file which was found along the
1790 	 directory search path, and LOGICAL_FILE_NAME is is set to the
1791 	 file name which was searched for.  */
1792       logical_file_name = xstrdup (fn);
1793     }
1794 
1795   *logical_file_name_p = logical_file_name;
1796   *real_file_name_p = new_name;
1797   return fp;
1798 }
1799 
1800 
1801 /* Language dependent format string parser.
1802    NULL if the language has no notion of format strings.  */
1803 static struct formatstring_parser *current_formatstring_parser1;
1804 static struct formatstring_parser *current_formatstring_parser2;
1805 
1806 
1807 static void
extract_from_file(const char * file_name,extractor_ty extractor,msgdomain_list_ty * mdlp)1808 extract_from_file (const char *file_name, extractor_ty extractor,
1809 		   msgdomain_list_ty *mdlp)
1810 {
1811   char *logical_file_name;
1812   char *real_file_name;
1813   FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
1814 
1815   /* Set the default for the source file encoding.  May be overridden by
1816      the extractor function.  */
1817   xgettext_current_source_encoding = xgettext_global_source_encoding;
1818 #if HAVE_ICONV
1819   xgettext_current_source_iconv = xgettext_global_source_iconv;
1820 #endif
1821 
1822   current_formatstring_parser1 = extractor.formatstring_parser1;
1823   current_formatstring_parser2 = extractor.formatstring_parser2;
1824   extractor.func (fp, real_file_name, logical_file_name, extractor.flag_table,
1825 		  mdlp);
1826 
1827   if (fp != stdin)
1828     fclose (fp);
1829   free (logical_file_name);
1830   free (real_file_name);
1831 }
1832 
1833 
1834 
1835 /* Convert the given string from xgettext_current_source_encoding to
1836    the output file encoding (i.e. ASCII or UTF-8).
1837    The resulting string is either the argument string, or freshly allocated.
1838    The file_name and line_number are only used for error message purposes.  */
1839 char *
from_current_source_encoding(const char * string,const char * file_name,size_t line_number)1840 from_current_source_encoding (const char *string,
1841 			      const char *file_name, size_t line_number)
1842 {
1843   if (xgettext_current_source_encoding == po_charset_ascii)
1844     {
1845       if (!is_ascii_string (string))
1846 	{
1847 	  char buffer[21];
1848 
1849 	  if (line_number == (size_t)(-1))
1850 	    buffer[0] = '\0';
1851 	  else
1852 	    sprintf (buffer, ":%ld", (long) line_number);
1853 	  multiline_error (xstrdup (""),
1854 			   xasprintf (_("\
1855 Non-ASCII string at %s%s.\n\
1856 Please specify the source encoding through --from-code.\n"),
1857 				      file_name, buffer));
1858 	  exit (EXIT_FAILURE);
1859 	}
1860     }
1861   else if (xgettext_current_source_encoding != po_charset_utf8)
1862     {
1863 #if HAVE_ICONV
1864       struct conversion_context context;
1865 
1866       context.from_code = xgettext_current_source_encoding;
1867       context.to_code = po_charset_utf8;
1868       context.from_filename = file_name;
1869       context.message = NULL;
1870 
1871       string = convert_string (xgettext_current_source_iconv, string, &context);
1872 #else
1873       /* If we don't have iconv(), the only supported values for
1874 	 xgettext_global_source_encoding and thus also for
1875 	 xgettext_current_source_encoding are ASCII and UTF-8.
1876 	 convert_string() should not be called in this case.  */
1877       abort ();
1878 #endif
1879     }
1880 
1881   return (char *) string;
1882 }
1883 
1884 #define CONVERT_STRING(string) \
1885   string = from_current_source_encoding (string, pos->file_name, \
1886 					 pos->line_number);
1887 
1888 
1889 /* Update the is_format[] flags depending on the information given in the
1890    context.  */
1891 static void
set_format_flags_from_context(enum is_format is_format[NFORMATS],flag_context_ty context,const char * string,lex_pos_ty * pos,const char * pretty_msgstr)1892 set_format_flags_from_context (enum is_format is_format[NFORMATS],
1893 			       flag_context_ty context, const char *string,
1894 			       lex_pos_ty *pos, const char *pretty_msgstr)
1895 {
1896   size_t i;
1897 
1898   if (context.is_format1 != undecided || context.is_format2 != undecided)
1899     for (i = 0; i < NFORMATS; i++)
1900       {
1901 	if (is_format[i] == undecided)
1902 	  {
1903 	    if (formatstring_parsers[i] == current_formatstring_parser1
1904 		&& context.is_format1 != undecided)
1905 	      is_format[i] = (enum is_format) context.is_format1;
1906 	    if (formatstring_parsers[i] == current_formatstring_parser2
1907 		&& context.is_format2 != undecided)
1908 	      is_format[i] = (enum is_format) context.is_format2;
1909 	  }
1910 	if (possible_format_p (is_format[i]))
1911 	  {
1912 	    struct formatstring_parser *parser = formatstring_parsers[i];
1913 	    char *invalid_reason = NULL;
1914 	    void *descr = parser->parse (string, false, &invalid_reason);
1915 
1916 	    if (descr != NULL)
1917 	      parser->free (descr);
1918 	    else
1919 	      {
1920 		/* The string is not a valid format string.  */
1921 		if (is_format[i] != possible)
1922 		  {
1923 		    char buffer[21];
1924 
1925 		    error_with_progname = false;
1926 		    if (pos->line_number == (size_t)(-1))
1927 		      buffer[0] = '\0';
1928 		    else
1929 		      sprintf (buffer, ":%ld", (long) pos->line_number);
1930 		    multiline_warning (xasprintf (_("%s%s: warning: "),
1931 						  pos->file_name, buffer),
1932 				       xasprintf (is_format[i] == yes_according_to_context
1933 						  ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n")
1934 						  : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
1935 						  pretty_msgstr,
1936 						  format_language_pretty[i],
1937 						  invalid_reason));
1938 		    error_with_progname = true;
1939 		  }
1940 
1941 		is_format[i] = impossible;
1942 		free (invalid_reason);
1943 	      }
1944 	  }
1945       }
1946 }
1947 
1948 
1949 static void
warn_format_string(enum is_format is_format[NFORMATS],const char * string,lex_pos_ty * pos,const char * pretty_msgstr)1950 warn_format_string (enum is_format is_format[NFORMATS], const char *string,
1951 		    lex_pos_ty *pos, const char *pretty_msgstr)
1952 {
1953   if (possible_format_p (is_format[format_python])
1954       && get_python_format_unnamed_arg_count (string) > 1)
1955     {
1956       char buffer[21];
1957 
1958       error_with_progname = false;
1959       if (pos->line_number == (size_t)(-1))
1960         buffer[0] = '\0';
1961       else
1962         sprintf (buffer, ":%ld", (long) pos->line_number);
1963       multiline_warning (xasprintf (_("%s%s: warning: "),
1964 				    pos->file_name, buffer),
1965 		         xasprintf (_("\
1966 '%s' format string with unnamed arguments cannot be properly localized:\n\
1967 The translator cannot reorder the arguments.\n\
1968 Please consider using a format string with named arguments,\n\
1969 and a mapping instead of a tuple for the arguments.\n"),
1970                                     pretty_msgstr));
1971       error_with_progname = true;
1972     }
1973 }
1974 
1975 
1976 message_ty *
remember_a_message(message_list_ty * mlp,char * msgctxt,char * msgid,flag_context_ty context,lex_pos_ty * pos,refcounted_string_list_ty * comment)1977 remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
1978 		    flag_context_ty context, lex_pos_ty *pos,
1979 		    refcounted_string_list_ty *comment)
1980 {
1981   enum is_format is_format[NFORMATS];
1982   enum is_wrap do_wrap;
1983   message_ty *mp;
1984   char *msgstr;
1985   size_t i;
1986 
1987   /* See whether we shall exclude this message.  */
1988   if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
1989     {
1990       /* Tell the lexer to reset its comment buffer, so that the next
1991 	 message gets the correct comments.  */
1992       xgettext_comment_reset ();
1993       savable_comment_reset ();
1994 
1995       if (msgctxt != NULL)
1996 	free (msgctxt);
1997       free (msgid);
1998 
1999       return NULL;
2000     }
2001 
2002   savable_comment_to_xgettext_comment (comment);
2003 
2004   for (i = 0; i < NFORMATS; i++)
2005     is_format[i] = undecided;
2006   do_wrap = undecided;
2007 
2008   if (msgctxt != NULL)
2009     CONVERT_STRING (msgctxt);
2010   CONVERT_STRING (msgid);
2011 
2012   if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
2013     {
2014       char buffer[21];
2015 
2016       error_with_progname = false;
2017       if (pos->line_number == (size_t)(-1))
2018 	buffer[0] = '\0';
2019       else
2020 	sprintf (buffer, ":%ld", (long) pos->line_number);
2021       multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
2022 				    buffer),
2023 			 xstrdup (_("\
2024 Empty msgid.  It is reserved by GNU gettext:\n\
2025 gettext(\"\") returns the header entry with\n\
2026 meta information, not the empty string.\n")));
2027       error_with_progname = true;
2028     }
2029 
2030   /* See if we have seen this message before.  */
2031   mp = message_list_search (mlp, msgctxt, msgid);
2032   if (mp != NULL)
2033     {
2034       if (msgctxt != NULL)
2035 	free (msgctxt);
2036       free (msgid);
2037       for (i = 0; i < NFORMATS; i++)
2038 	is_format[i] = mp->is_format[i];
2039       do_wrap = mp->do_wrap;
2040     }
2041   else
2042     {
2043       /* Construct the msgstr from the prefix and suffix, otherwise use the
2044 	 empty string.  */
2045       if (msgstr_prefix)
2046 	msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix);
2047       else
2048 	msgstr = "";
2049 
2050       /* Allocate a new message and append the message to the list.  */
2051       mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1,
2052 			  pos);
2053       /* Do not free msgctxt and msgid.  */
2054       message_list_append (mlp, mp);
2055     }
2056 
2057   /* Determine whether the context specifies that the msgid is a format
2058      string.  */
2059   set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
2060 
2061   /* Ask the lexer for the comments it has seen.  */
2062   {
2063     size_t nitems_before;
2064     size_t nitems_after;
2065     int j;
2066     bool add_all_remaining_comments;
2067 
2068     nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2069 
2070     add_all_remaining_comments = add_all_comments;
2071     for (j = 0; ; ++j)
2072       {
2073 	const char *s = xgettext_comment (j);
2074 	const char *t;
2075 	if (s == NULL)
2076 	  break;
2077 
2078 	CONVERT_STRING (s);
2079 
2080 	/* To reduce the possibility of unwanted matches we do a two
2081 	   step match: the line must contain `xgettext:' and one of
2082 	   the possible format description strings.  */
2083 	if ((t = c_strstr (s, "xgettext:")) != NULL)
2084 	  {
2085 	    bool tmp_fuzzy;
2086 	    enum is_format tmp_format[NFORMATS];
2087 	    enum is_wrap tmp_wrap;
2088 	    bool interesting;
2089 
2090 	    t += strlen ("xgettext:");
2091 
2092 	    po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_wrap);
2093 
2094 	    interesting = false;
2095 	    for (i = 0; i < NFORMATS; i++)
2096 	      if (tmp_format[i] != undecided)
2097 		{
2098 		  is_format[i] = tmp_format[i];
2099 		  interesting = true;
2100 		}
2101 	    if (tmp_wrap != undecided)
2102 	      {
2103 		do_wrap = tmp_wrap;
2104 		interesting = true;
2105 	      }
2106 
2107 	    /* If the "xgettext:" marker was followed by an interesting
2108 	       keyword, and we updated our is_format/do_wrap variables,
2109 	       we don't print the comment as a #. comment.  */
2110 	    if (interesting)
2111 	      continue;
2112 	  }
2113 	/* When the comment tag is seen, it drags in not only the line
2114 	   which it starts, but all remaining comment lines.  */
2115 	if (add_all_remaining_comments
2116 	    || (add_all_remaining_comments =
2117 		  (comment_tag != NULL
2118 		   && strncmp (s, comment_tag, strlen (comment_tag)) == 0)))
2119 	  message_comment_dot_append (mp, s);
2120       }
2121 
2122     nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2123 
2124     /* Don't add the comments if they are a repetition of the tail of the
2125        already present comments.  This avoids unneeded duplication if the
2126        same message appears several times, each time with the same comment.  */
2127     if (nitems_before < nitems_after)
2128       {
2129 	size_t added = nitems_after - nitems_before;
2130 
2131 	if (added <= nitems_before)
2132 	  {
2133 	    bool repeated = true;
2134 
2135 	    for (i = 0; i < added; i++)
2136 	      if (strcmp (mp->comment_dot->item[nitems_before - added + i],
2137 			  mp->comment_dot->item[nitems_before + i]) != 0)
2138 		{
2139 		  repeated = false;
2140 		  break;
2141 		}
2142 
2143 	    if (repeated)
2144 	      {
2145 		for (i = 0; i < added; i++)
2146 		  free ((char *) mp->comment_dot->item[nitems_before + i]);
2147 		mp->comment_dot->nitems = nitems_before;
2148 	      }
2149 	  }
2150       }
2151   }
2152 
2153   /* If it is not already decided, through programmer comments, whether the
2154      msgid is a format string, examine the msgid.  This is a heuristic.  */
2155   for (i = 0; i < NFORMATS; i++)
2156     {
2157       if (is_format[i] == undecided
2158 	  && (formatstring_parsers[i] == current_formatstring_parser1
2159 	      || formatstring_parsers[i] == current_formatstring_parser2)
2160 	  /* But avoid redundancy: objc-format is stronger than c-format.  */
2161 	  && !(i == format_c && possible_format_p (is_format[format_objc]))
2162 	  && !(i == format_objc && possible_format_p (is_format[format_c]))
2163 	  /* Avoid flagging a string as c-format when it's known to be a
2164 	     qt-format or boost-format string.  */
2165 	  && !(i == format_c
2166 	       && (possible_format_p (is_format[format_qt])
2167 		   || possible_format_p (is_format[format_boost]))))
2168 	{
2169 	  struct formatstring_parser *parser = formatstring_parsers[i];
2170 	  char *invalid_reason = NULL;
2171 	  void *descr = parser->parse (mp->msgid, false, &invalid_reason);
2172 
2173 	  if (descr != NULL)
2174 	    {
2175 	      /* msgid is a valid format string.  We mark only those msgids
2176 		 as format strings which contain at least one format directive
2177 		 and thus are format strings with a high probability.  We
2178 		 don't mark strings without directives as format strings,
2179 		 because that would force the programmer to add
2180 		 "xgettext: no-c-format" anywhere where a translator wishes
2181 		 to use a percent sign.  So, the msgfmt checking will not be
2182 		 perfect.  Oh well.  */
2183 	      if (parser->get_number_of_directives (descr) > 0
2184 		  && !(parser->is_unlikely_intentional != NULL
2185 		       && parser->is_unlikely_intentional (descr)))
2186 		is_format[i] = possible;
2187 
2188 	      parser->free (descr);
2189 	    }
2190 	  else
2191 	    {
2192 	      /* msgid is not a valid format string.  */
2193 	      is_format[i] = impossible;
2194 	      free (invalid_reason);
2195 	    }
2196 	}
2197       mp->is_format[i] = is_format[i];
2198     }
2199 
2200   mp->do_wrap = do_wrap == no ? no : yes;	/* By default we wrap.  */
2201 
2202   /* Warn about the use of non-reorderable format strings when the programming
2203      language also provides reorderable format strings.  */
2204   warn_format_string (is_format, mp->msgid, pos, "msgid");
2205 
2206   /* Remember where we saw this msgid.  */
2207   if (line_comment)
2208     message_comment_filepos (mp, pos->file_name, pos->line_number);
2209 
2210   /* Tell the lexer to reset its comment buffer, so that the next
2211      message gets the correct comments.  */
2212   xgettext_comment_reset ();
2213   savable_comment_reset ();
2214 
2215   return mp;
2216 }
2217 
2218 
2219 void
remember_a_message_plural(message_ty * mp,char * string,flag_context_ty context,lex_pos_ty * pos,refcounted_string_list_ty * comment)2220 remember_a_message_plural (message_ty *mp, char *string,
2221 			   flag_context_ty context, lex_pos_ty *pos,
2222 			   refcounted_string_list_ty *comment)
2223 {
2224   char *msgid_plural;
2225   char *msgstr1;
2226   size_t msgstr1_len;
2227   char *msgstr;
2228   size_t i;
2229 
2230   msgid_plural = string;
2231 
2232   savable_comment_to_xgettext_comment (comment);
2233 
2234   CONVERT_STRING (msgid_plural);
2235 
2236   /* See if the message is already a plural message.  */
2237   if (mp->msgid_plural == NULL)
2238     {
2239       mp->msgid_plural = msgid_plural;
2240 
2241       /* Construct the first plural form from the prefix and suffix,
2242 	 otherwise use the empty string.  The translator will have to
2243 	 provide additional plural forms.  */
2244       if (msgstr_prefix)
2245 	msgstr1 =
2246 	  xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix);
2247       else
2248 	msgstr1 = "";
2249       msgstr1_len = strlen (msgstr1) + 1;
2250       msgstr = (char *) xmalloc (mp->msgstr_len + msgstr1_len);
2251       memcpy (msgstr, mp->msgstr, mp->msgstr_len);
2252       memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
2253       mp->msgstr = msgstr;
2254       mp->msgstr_len = mp->msgstr_len + msgstr1_len;
2255       if (msgstr_prefix)
2256 	free (msgstr1);
2257 
2258       /* Determine whether the context specifies that the msgid_plural is a
2259 	 format string.  */
2260       set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
2261 				     pos, "msgid_plural");
2262 
2263       /* If it is not already decided, through programmer comments or
2264 	 the msgid, whether the msgid is a format string, examine the
2265 	 msgid_plural.  This is a heuristic.  */
2266       for (i = 0; i < NFORMATS; i++)
2267 	if ((formatstring_parsers[i] == current_formatstring_parser1
2268 	     || formatstring_parsers[i] == current_formatstring_parser2)
2269 	    && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
2270 	    /* But avoid redundancy: objc-format is stronger than c-format.  */
2271 	    && !(i == format_c
2272 		 && possible_format_p (mp->is_format[format_objc]))
2273 	    && !(i == format_objc
2274 		 && possible_format_p (mp->is_format[format_c]))
2275 	    /* Avoid flagging a string as c-format when it's known to be a
2276 	       qt-format or boost-format string.  */
2277 	    && !(i == format_c
2278 		 && (possible_format_p (mp->is_format[format_qt])
2279 		     || possible_format_p (mp->is_format[format_boost]))))
2280 	  {
2281 	    struct formatstring_parser *parser = formatstring_parsers[i];
2282 	    char *invalid_reason = NULL;
2283 	    void *descr =
2284 	      parser->parse (mp->msgid_plural, false, &invalid_reason);
2285 
2286 	    if (descr != NULL)
2287 	      {
2288 		/* Same heuristic as in remember_a_message.  */
2289 		if (parser->get_number_of_directives (descr) > 0
2290 		    && !(parser->is_unlikely_intentional != NULL
2291 			 && parser->is_unlikely_intentional (descr)))
2292 		  mp->is_format[i] = possible;
2293 
2294 		parser->free (descr);
2295 	      }
2296 	    else
2297 	      {
2298 		/* msgid_plural is not a valid format string.  */
2299 		mp->is_format[i] = impossible;
2300 		free (invalid_reason);
2301 	      }
2302 	  }
2303 
2304       /* Warn about the use of non-reorderable format strings when the programming
2305          language also provides reorderable format strings.  */
2306       warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural");
2307     }
2308   else
2309     free (msgid_plural);
2310 
2311   /* Tell the lexer to reset its comment buffer, so that the next
2312      message gets the correct comments.  */
2313   xgettext_comment_reset ();
2314   savable_comment_reset ();
2315 }
2316 
2317 
2318 struct arglist_parser *
arglist_parser_alloc(message_list_ty * mlp,const struct callshapes * shapes)2319 arglist_parser_alloc (message_list_ty *mlp, const struct callshapes *shapes)
2320 {
2321   if (shapes == NULL || shapes->nshapes == 0)
2322     {
2323       struct arglist_parser *ap =
2324 	(struct arglist_parser *)
2325 	xmalloc (offsetof (struct arglist_parser, alternative[0]));
2326 
2327       ap->mlp = mlp;
2328       ap->keyword = NULL;
2329       ap->keyword_len = 0;
2330       ap->nalternatives = 0;
2331 
2332       return ap;
2333     }
2334   else
2335     {
2336       struct arglist_parser *ap =
2337 	(struct arglist_parser *)
2338 	xmalloc (sizeof (struct arglist_parser)
2339 		 + (shapes->nshapes - 1) * sizeof (struct partial_call));
2340       size_t i;
2341 
2342       ap->mlp = mlp;
2343       ap->keyword = shapes->keyword;
2344       ap->keyword_len = shapes->keyword_len;
2345       ap->nalternatives = shapes->nshapes;
2346       for (i = 0; i < shapes->nshapes; i++)
2347 	{
2348 	  ap->alternative[i].argnumc = shapes->shapes[i].argnumc;
2349 	  ap->alternative[i].argnum1 = shapes->shapes[i].argnum1;
2350 	  ap->alternative[i].argnum2 = shapes->shapes[i].argnum2;
2351 	  ap->alternative[i].argnum1_glib_context =
2352 	    shapes->shapes[i].argnum1_glib_context;
2353 	  ap->alternative[i].argnum2_glib_context =
2354 	    shapes->shapes[i].argnum2_glib_context;
2355 	  ap->alternative[i].argtotal = shapes->shapes[i].argtotal;
2356 	  ap->alternative[i].xcomments = shapes->shapes[i].xcomments;
2357 	  ap->alternative[i].msgctxt = NULL;
2358 	  ap->alternative[i].msgctxt_pos.file_name = NULL;
2359 	  ap->alternative[i].msgctxt_pos.line_number = (size_t)(-1);
2360 	  ap->alternative[i].msgid = NULL;
2361 	  ap->alternative[i].msgid_context = null_context;
2362 	  ap->alternative[i].msgid_pos.file_name = NULL;
2363 	  ap->alternative[i].msgid_pos.line_number = (size_t)(-1);
2364 	  ap->alternative[i].msgid_comment = NULL;
2365 	  ap->alternative[i].msgid_plural = NULL;
2366 	  ap->alternative[i].msgid_plural_context = null_context;
2367 	  ap->alternative[i].msgid_plural_pos.file_name = NULL;
2368 	  ap->alternative[i].msgid_plural_pos.line_number = (size_t)(-1);
2369 	}
2370 
2371       return ap;
2372     }
2373 }
2374 
2375 
2376 struct arglist_parser *
arglist_parser_clone(struct arglist_parser * ap)2377 arglist_parser_clone (struct arglist_parser *ap)
2378 {
2379   struct arglist_parser *copy =
2380     (struct arglist_parser *)
2381     xmalloc (sizeof (struct arglist_parser) - sizeof (struct partial_call)
2382 	     + ap->nalternatives * sizeof (struct partial_call));
2383   size_t i;
2384 
2385   copy->mlp = ap->mlp;
2386   copy->keyword = ap->keyword;
2387   copy->keyword_len = ap->keyword_len;
2388   copy->nalternatives = ap->nalternatives;
2389   for (i = 0; i < ap->nalternatives; i++)
2390     {
2391       const struct partial_call *cp = &ap->alternative[i];
2392       struct partial_call *ccp = &copy->alternative[i];
2393 
2394       ccp->argnumc = cp->argnumc;
2395       ccp->argnum1 = cp->argnum1;
2396       ccp->argnum2 = cp->argnum2;
2397       ccp->argnum1_glib_context = cp->argnum1_glib_context;
2398       ccp->argnum2_glib_context = cp->argnum2_glib_context;
2399       ccp->argtotal = cp->argtotal;
2400       ccp->xcomments = cp->xcomments;
2401       ccp->msgctxt = (cp->msgctxt != NULL ? xstrdup (cp->msgctxt) : NULL);
2402       ccp->msgctxt_pos = cp->msgctxt_pos;
2403       ccp->msgid = (cp->msgid != NULL ? xstrdup (cp->msgid) : NULL);
2404       ccp->msgid_context = cp->msgid_context;
2405       ccp->msgid_pos = cp->msgctxt_pos;
2406       ccp->msgid_comment = add_reference (cp->msgid_comment);
2407       ccp->msgid_plural =
2408 	(cp->msgid_plural != NULL ? xstrdup (cp->msgid_plural) : NULL);
2409       ccp->msgid_plural_context = cp->msgid_plural_context;
2410       ccp->msgid_plural_pos = cp->msgid_plural_pos;
2411     }
2412 
2413   return copy;
2414 }
2415 
2416 
2417 void
arglist_parser_remember(struct arglist_parser * ap,int argnum,char * string,flag_context_ty context,char * file_name,size_t line_number,refcounted_string_list_ty * comment)2418 arglist_parser_remember (struct arglist_parser *ap,
2419 			 int argnum, char *string,
2420 			 flag_context_ty context,
2421 			 char *file_name, size_t line_number,
2422 			 refcounted_string_list_ty *comment)
2423 {
2424   bool stored_string = false;
2425   size_t nalternatives = ap->nalternatives;
2426   size_t i;
2427 
2428   if (!(argnum > 0))
2429     abort ();
2430   for (i = 0; i < nalternatives; i++)
2431     {
2432       struct partial_call *cp = &ap->alternative[i];
2433 
2434       if (argnum == cp->argnumc)
2435 	{
2436 	  cp->msgctxt = string;
2437 	  cp->msgctxt_pos.file_name = file_name;
2438 	  cp->msgctxt_pos.line_number = line_number;
2439 	  stored_string = true;
2440 	  /* Mark msgctxt as done.  */
2441 	  cp->argnumc = 0;
2442 	}
2443       else if (argnum == cp->argnum1)
2444 	{
2445 	  cp->msgid = string;
2446 	  cp->msgid_context = context;
2447 	  cp->msgid_pos.file_name = file_name;
2448 	  cp->msgid_pos.line_number = line_number;
2449 	  cp->msgid_comment = add_reference (comment);
2450 	  stored_string = true;
2451 	  /* Mark msgid as done.  */
2452 	  cp->argnum1 = 0;
2453 	}
2454       else if (argnum == cp->argnum2)
2455 	{
2456 	  cp->msgid_plural = string;
2457 	  cp->msgid_plural_context = context;
2458 	  cp->msgid_plural_pos.file_name = file_name;
2459 	  cp->msgid_plural_pos.line_number = line_number;
2460 	  stored_string = true;
2461 	  /* Mark msgid_plural as done.  */
2462 	  cp->argnum2 = 0;
2463 	}
2464     }
2465   /* Note: There is a memory leak here: When string was stored but is later
2466      not used by arglist_parser_done, we don't free it.  */
2467   if (!stored_string)
2468     free (string);
2469 }
2470 
2471 
2472 bool
arglist_parser_decidedp(struct arglist_parser * ap,int argnum)2473 arglist_parser_decidedp (struct arglist_parser *ap, int argnum)
2474 {
2475   size_t i;
2476 
2477   /* Test whether all alternatives are decided.
2478      Note: A decided alternative can be complete
2479        cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2480        && cp->argtotal == 0
2481      or it can be failed if no literal strings were found at the specified
2482      argument positions:
2483        cp->argnumc <= argnum && cp->argnum1 <= argnum && cp->argnum2 <= argnum
2484      or it can be failed if the number of arguments is exceeded:
2485        cp->argtotal > 0 && cp->argtotal < argnum
2486    */
2487   for (i = 0; i < ap->nalternatives; i++)
2488     {
2489       struct partial_call *cp = &ap->alternative[i];
2490 
2491       if (!((cp->argnumc <= argnum
2492 	     && cp->argnum1 <= argnum
2493 	     && cp->argnum2 <= argnum)
2494 	    || (cp->argtotal > 0 && cp->argtotal < argnum)))
2495 	/* cp is still undecided.  */
2496 	return false;
2497     }
2498   return true;
2499 }
2500 
2501 
2502 void
arglist_parser_done(struct arglist_parser * ap,int argnum)2503 arglist_parser_done (struct arglist_parser *ap, int argnum)
2504 {
2505   size_t ncomplete;
2506   size_t i;
2507 
2508   /* Determine the number of complete calls.  */
2509   ncomplete = 0;
2510   for (i = 0; i < ap->nalternatives; i++)
2511     {
2512       struct partial_call *cp = &ap->alternative[i];
2513 
2514       if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2515 	  && (cp->argtotal == 0 || cp->argtotal == argnum))
2516 	ncomplete++;
2517     }
2518 
2519   if (ncomplete > 0)
2520     {
2521       struct partial_call *best_cp = NULL;
2522       bool ambiguous = false;
2523 
2524       /* Find complete calls where msgctxt, msgid, msgid_plural are all
2525 	 provided.  */
2526       for (i = 0; i < ap->nalternatives; i++)
2527 	{
2528 	  struct partial_call *cp = &ap->alternative[i];
2529 
2530 	  if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2531 	      && (cp->argtotal == 0 || cp->argtotal == argnum)
2532 	      && cp->msgctxt != NULL
2533 	      && cp->msgid != NULL
2534 	      && cp->msgid_plural != NULL)
2535 	    {
2536 	      if (best_cp != NULL)
2537 		{
2538 		  ambiguous = true;
2539 		  break;
2540 		}
2541 	      best_cp = cp;
2542 	    }
2543 	}
2544 
2545       if (best_cp == NULL)
2546 	{
2547 	  struct partial_call *best_cp1 = NULL;
2548 	  struct partial_call *best_cp2 = NULL;
2549 
2550 	  /* Find complete calls where msgctxt, msgid are provided.  */
2551 	  for (i = 0; i < ap->nalternatives; i++)
2552 	    {
2553 	      struct partial_call *cp = &ap->alternative[i];
2554 
2555 	      if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2556 		  && (cp->argtotal == 0 || cp->argtotal == argnum)
2557 		  && cp->msgctxt != NULL
2558 		  && cp->msgid != NULL)
2559 		{
2560 		  if (best_cp1 != NULL)
2561 		    {
2562 		      ambiguous = true;
2563 		      break;
2564 		    }
2565 		  best_cp1 = cp;
2566 		}
2567 	    }
2568 
2569 	  /* Find complete calls where msgid, msgid_plural are provided.  */
2570 	  for (i = 0; i < ap->nalternatives; i++)
2571 	    {
2572 	      struct partial_call *cp = &ap->alternative[i];
2573 
2574 	      if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2575 		  && (cp->argtotal == 0 || cp->argtotal == argnum)
2576 		  && cp->msgid != NULL
2577 		  && cp->msgid_plural != NULL)
2578 		{
2579 		  if (best_cp2 != NULL)
2580 		    {
2581 		      ambiguous = true;
2582 		      break;
2583 		    }
2584 		  best_cp2 = cp;
2585 		}
2586 	    }
2587 
2588 	  if (best_cp1 != NULL)
2589 	    best_cp = best_cp1;
2590 	  if (best_cp2 != NULL)
2591 	    {
2592 	      if (best_cp != NULL)
2593 		ambiguous = true;
2594 	      else
2595 		best_cp = best_cp2;
2596 	    }
2597 	}
2598 
2599       if (best_cp == NULL)
2600 	{
2601 	  /* Find complete calls where msgid is provided.  */
2602 	  for (i = 0; i < ap->nalternatives; i++)
2603 	    {
2604 	      struct partial_call *cp = &ap->alternative[i];
2605 
2606 	      if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2607 		  && (cp->argtotal == 0 || cp->argtotal == argnum)
2608 		  && cp->msgid != NULL)
2609 		{
2610 		  if (best_cp != NULL)
2611 		    {
2612 		      ambiguous = true;
2613 		      break;
2614 		    }
2615 		  best_cp = cp;
2616 		}
2617 	    }
2618 	}
2619 
2620       if (ambiguous)
2621 	{
2622 	  error_with_progname = false;
2623 	  error_at_line (0, 0,
2624 			 best_cp->msgid_pos.file_name,
2625 			 best_cp->msgid_pos.line_number,
2626 			 _("ambiguous argument specification for keyword '%.*s'"),
2627 			 (int) ap->keyword_len, ap->keyword);
2628 	  error_with_progname = true;
2629 	}
2630 
2631       if (best_cp != NULL)
2632 	{
2633 	  /* best_cp indicates the best found complete call.
2634 	     Now call remember_a_message.  */
2635 	  message_ty *mp;
2636 
2637 	  /* Split strings in the GNOME glib syntax "msgctxt|msgid".  */
2638 	  if (best_cp->argnum1_glib_context || best_cp->argnum2_glib_context)
2639 	    /* split_keywordspec should not allow the context to be specified
2640 	       in two different ways.  */
2641 	    if (best_cp->msgctxt != NULL)
2642 	      abort ();
2643 	  if (best_cp->argnum1_glib_context)
2644 	    {
2645 	      const char *separator = strchr (best_cp->msgid, '|');
2646 
2647 	      if (separator == NULL)
2648 		{
2649 		  error_with_progname = false;
2650 		  error_at_line (0, 0,
2651 				 best_cp->msgid_pos.file_name,
2652 				 best_cp->msgid_pos.line_number,
2653 				 _("warning: missing context for keyword '%.*s'"),
2654 				 (int) ap->keyword_len, ap->keyword);
2655 		  error_with_progname = true;
2656 		}
2657 	      else
2658 		{
2659 		  size_t ctxt_len = separator - best_cp->msgid;
2660 		  char *ctxt = (char *) xmalloc (ctxt_len + 1);
2661 
2662 		  memcpy (ctxt, best_cp->msgid, ctxt_len);
2663 		  ctxt[ctxt_len] = '\0';
2664 		  best_cp->msgctxt = ctxt;
2665 		  best_cp->msgid = xstrdup (separator + 1);
2666 		}
2667 	    }
2668 	  if (best_cp->msgid_plural != NULL && best_cp->argnum2_glib_context)
2669 	    {
2670 	      const char *separator = strchr (best_cp->msgid_plural, '|');
2671 
2672 	      if (separator == NULL)
2673 		{
2674 		  error_with_progname = false;
2675 		  error_at_line (0, 0,
2676 				 best_cp->msgid_plural_pos.file_name,
2677 				 best_cp->msgid_plural_pos.line_number,
2678 				 _("warning: missing context for plural argument of keyword '%.*s'"),
2679 				 (int) ap->keyword_len, ap->keyword);
2680 		  error_with_progname = true;
2681 		}
2682 	      else
2683 		{
2684 		  size_t ctxt_len = separator - best_cp->msgid_plural;
2685 		  char *ctxt = (char *) xmalloc (ctxt_len + 1);
2686 
2687 		  memcpy (ctxt, best_cp->msgid_plural, ctxt_len);
2688 		  ctxt[ctxt_len] = '\0';
2689 		  if (best_cp->msgctxt == NULL)
2690 		    best_cp->msgctxt = ctxt;
2691 		  else
2692 		    {
2693 		      if (strcmp (ctxt, best_cp->msgctxt) != 0)
2694 			{
2695 			  error_with_progname = false;
2696 			  error_at_line (0, 0,
2697 					 best_cp->msgid_plural_pos.file_name,
2698 					 best_cp->msgid_plural_pos.line_number,
2699 					 _("context mismatch between singular and plural form"));
2700 			  error_with_progname = true;
2701 			}
2702 		      free (ctxt);
2703 		    }
2704 		  best_cp->msgid_plural = xstrdup (separator + 1);
2705 		}
2706 	    }
2707 
2708 	  mp = remember_a_message (ap->mlp, best_cp->msgctxt, best_cp->msgid,
2709 				   best_cp->msgid_context,
2710 				   &best_cp->msgid_pos,
2711 				   best_cp->msgid_comment);
2712 	  if (best_cp->msgid_plural != NULL)
2713 	    remember_a_message_plural (mp, best_cp->msgid_plural,
2714 				       best_cp->msgid_plural_context,
2715 				       &best_cp->msgid_plural_pos,
2716 				       NULL);
2717 	  if (best_cp->xcomments.nitems > 0)
2718 	    {
2719 	      /* Add best_cp->xcomments to mp->comment_dot, unless already
2720 		 present.  */
2721 	      size_t i;
2722 
2723 	      for (i = 0; i < best_cp->xcomments.nitems; i++)
2724 		{
2725 		  const char *xcomment = best_cp->xcomments.item[i];
2726 		  bool found = false;
2727 
2728 		  if (mp->comment_dot != NULL)
2729 		    {
2730 		      size_t j;
2731 
2732 		      for (j = 0; j < mp->comment_dot->nitems; j++)
2733 			if (strcmp (xcomment, mp->comment_dot->item[j]) == 0)
2734 			  {
2735 			    found = true;
2736 			    break;
2737 			  }
2738 		    }
2739 		  if (!found)
2740 		    message_comment_dot_append (mp, xcomment);
2741 		}
2742 	    }
2743 	}
2744     }
2745   else
2746     {
2747       /* No complete call was parsed.  */
2748       /* Note: There is a memory leak here: When there is more than one
2749 	 alternative, the same string can be stored in multiple alternatives,
2750 	 and it's not easy to free all strings reliably.  */
2751       if (ap->nalternatives == 1)
2752 	{
2753 	  if (ap->alternative[0].msgctxt != NULL)
2754 	    free (ap->alternative[0].msgctxt);
2755 	  if (ap->alternative[0].msgid != NULL)
2756 	    free (ap->alternative[0].msgid);
2757 	  if (ap->alternative[0].msgid_plural != NULL)
2758 	    free (ap->alternative[0].msgid_plural);
2759 	}
2760     }
2761 
2762   for (i = 0; i < ap->nalternatives; i++)
2763     drop_reference (ap->alternative[i].msgid_comment);
2764   free (ap);
2765 }
2766 
2767 
2768 static message_ty *
construct_header()2769 construct_header ()
2770 {
2771   time_t now;
2772   char *timestring;
2773   message_ty *mp;
2774   char *msgstr;
2775   static lex_pos_ty pos = { __FILE__, __LINE__ };
2776 
2777   if (msgid_bugs_address != NULL && msgid_bugs_address[0] == '\0')
2778     multiline_warning (xasprintf (_("warning: ")),
2779 		       xstrdup (_("\
2780 The option --msgid-bugs-address was not specified.\n\
2781 If you are using a `Makevars' file, please specify\n\
2782 the MSGID_BUGS_ADDRESS variable there; otherwise please\n\
2783 specify an --msgid-bugs-address command line option.\n\
2784 ")));
2785 
2786   time (&now);
2787   timestring = po_strftime (&now);
2788 
2789   msgstr = xasprintf ("\
2790 Project-Id-Version: PACKAGE VERSION\n\
2791 Report-Msgid-Bugs-To: %s\n\
2792 POT-Creation-Date: %s\n\
2793 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n\
2794 Last-Translator: FULL NAME <EMAIL@ADDRESS>\n\
2795 Language-Team: LANGUAGE <LL@li.org>\n\
2796 MIME-Version: 1.0\n\
2797 Content-Type: text/plain; charset=CHARSET\n\
2798 Content-Transfer-Encoding: 8bit\n",
2799 		      msgid_bugs_address != NULL ? msgid_bugs_address : "",
2800 		      timestring);
2801   free (timestring);
2802 
2803   mp = message_alloc (NULL, "", NULL, msgstr, strlen (msgstr) + 1, &pos);
2804 
2805   message_comment_append (mp,
2806 			  copyright_holder[0] != '\0'
2807 			  ? xasprintf ("\
2808 SOME DESCRIPTIVE TITLE.\n\
2809 Copyright (C) YEAR %s\n\
2810 This file is distributed under the same license as the PACKAGE package.\n\
2811 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n",
2812 				       copyright_holder)
2813 			  : "\
2814 SOME DESCRIPTIVE TITLE.\n\
2815 This file is put in the public domain.\n\
2816 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
2817 
2818   mp->is_fuzzy = true;
2819 
2820   return mp;
2821 }
2822 
2823 static void
finalize_header(msgdomain_list_ty * mdlp)2824 finalize_header (msgdomain_list_ty *mdlp)
2825 {
2826   /* If the generated PO file has plural forms, add a Plural-Forms template
2827      to the constructed header.  */
2828   {
2829     bool has_plural;
2830     size_t i, j;
2831 
2832     has_plural = false;
2833     for (i = 0; i < mdlp->nitems; i++)
2834       {
2835 	message_list_ty *mlp = mdlp->item[i]->messages;
2836 
2837 	for (j = 0; j < mlp->nitems; j++)
2838 	  {
2839 	    message_ty *mp = mlp->item[j];
2840 
2841 	    if (mp->msgid_plural != NULL)
2842 	      {
2843 		has_plural = true;
2844 		break;
2845 	      }
2846 	  }
2847 	if (has_plural)
2848 	  break;
2849       }
2850 
2851     if (has_plural)
2852       {
2853 	message_ty *header =
2854 	  message_list_search (mdlp->item[0]->messages, NULL, "");
2855 	if (header != NULL
2856 	    && c_strstr (header->msgstr, "Plural-Forms:") == NULL)
2857 	  {
2858 	    size_t insertpos = strlen (header->msgstr);
2859 	    const char *suffix;
2860 	    size_t suffix_len;
2861 	    char *new_msgstr;
2862 
2863 	    suffix = "\nPlural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n";
2864 	    if (insertpos == 0 || header->msgstr[insertpos-1] == '\n')
2865 	      suffix++;
2866 	    suffix_len = strlen (suffix);
2867 	    new_msgstr = (char *) xmalloc (header->msgstr_len + suffix_len);
2868 	    memcpy (new_msgstr, header->msgstr, insertpos);
2869 	    memcpy (new_msgstr + insertpos, suffix, suffix_len);
2870 	    memcpy (new_msgstr + insertpos + suffix_len,
2871 		    header->msgstr + insertpos,
2872 		    header->msgstr_len - insertpos);
2873 	    header->msgstr = new_msgstr;
2874 	    header->msgstr_len = header->msgstr_len + suffix_len;
2875 	  }
2876       }
2877   }
2878 
2879   /* If not all the strings were plain ASCII, or if the output syntax
2880      requires a charset conversion, set the charset in the header to UTF-8.
2881      All messages have already been converted to UTF-8 in remember_a_message
2882      and remember_a_message_plural.  */
2883   {
2884     bool has_nonascii = false;
2885     size_t i;
2886 
2887     for (i = 0; i < mdlp->nitems; i++)
2888       {
2889 	message_list_ty *mlp = mdlp->item[i]->messages;
2890 
2891 	if (!is_ascii_message_list (mlp))
2892 	  has_nonascii = true;
2893       }
2894 
2895     if (has_nonascii || output_syntax->requires_utf8)
2896       {
2897 	message_list_ty *mlp = mdlp->item[0]->messages;
2898 
2899 	iconv_message_list (mlp, po_charset_utf8, po_charset_utf8, NULL);
2900       }
2901   }
2902 }
2903 
2904 
2905 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
2906 #define ENDOF(a) ((a) + SIZEOF(a))
2907 
2908 
2909 static extractor_ty
language_to_extractor(const char * name)2910 language_to_extractor (const char *name)
2911 {
2912   struct table_ty
2913   {
2914     const char *name;
2915     extractor_func func;
2916     flag_context_list_table_ty *flag_table;
2917     struct formatstring_parser *formatstring_parser1;
2918     struct formatstring_parser *formatstring_parser2;
2919   };
2920   typedef struct table_ty table_ty;
2921 
2922   static table_ty table[] =
2923   {
2924     SCANNERS_C
2925     SCANNERS_PO
2926     SCANNERS_SH
2927     SCANNERS_PYTHON
2928     SCANNERS_LISP
2929     SCANNERS_ELISP
2930     SCANNERS_LIBREP
2931     SCANNERS_SCHEME
2932     SCANNERS_SMALLTALK
2933     SCANNERS_JAVA
2934     SCANNERS_PROPERTIES
2935     SCANNERS_CSHARP
2936     SCANNERS_AWK
2937     SCANNERS_YCP
2938     SCANNERS_TCL
2939     SCANNERS_PERL
2940     SCANNERS_PHP
2941     SCANNERS_STRINGTABLE
2942     SCANNERS_RST
2943     SCANNERS_GLADE
2944     /* Here may follow more languages and their scanners: pike, etc...
2945        Make sure new scanners honor the --exclude-file option.  */
2946   };
2947 
2948   table_ty *tp;
2949 
2950   for (tp = table; tp < ENDOF(table); ++tp)
2951     if (c_strcasecmp (name, tp->name) == 0)
2952       {
2953 	extractor_ty result;
2954 
2955 	result.func = tp->func;
2956 	result.flag_table = tp->flag_table;
2957 	result.formatstring_parser1 = tp->formatstring_parser1;
2958 	result.formatstring_parser2 = tp->formatstring_parser2;
2959 
2960 	/* Handle --qt.  It's preferrable to handle this facility here rather
2961 	   than through an option --language=C++/Qt because the latter would
2962 	   conflict with the language "C++" regarding the file extensions.  */
2963 	if (recognize_format_qt && strcmp (tp->name, "C++") == 0)
2964 	  {
2965 	    result.flag_table = &flag_table_cxx_qt;
2966 	    result.formatstring_parser2 = &formatstring_qt;
2967 	  }
2968 	/* Likewise for --boost.  */
2969 	if (recognize_format_boost && strcmp (tp->name, "C++") == 0)
2970 	  {
2971 	    result.flag_table = &flag_table_cxx_boost;
2972 	    result.formatstring_parser2 = &formatstring_boost;
2973 	  }
2974 
2975 	return result;
2976       }
2977 
2978   error (EXIT_FAILURE, 0, _("language `%s' unknown"), name);
2979   /* NOTREACHED */
2980   {
2981     extractor_ty result = { NULL, NULL, NULL, NULL };
2982     return result;
2983   }
2984 }
2985 
2986 
2987 static const char *
extension_to_language(const char * extension)2988 extension_to_language (const char *extension)
2989 {
2990   struct table_ty
2991   {
2992     const char *extension;
2993     const char *language;
2994   };
2995   typedef struct table_ty table_ty;
2996 
2997   static table_ty table[] =
2998   {
2999     EXTENSIONS_C
3000     EXTENSIONS_PO
3001     EXTENSIONS_SH
3002     EXTENSIONS_PYTHON
3003     EXTENSIONS_LISP
3004     EXTENSIONS_ELISP
3005     EXTENSIONS_LIBREP
3006     EXTENSIONS_SCHEME
3007     EXTENSIONS_SMALLTALK
3008     EXTENSIONS_JAVA
3009     EXTENSIONS_PROPERTIES
3010     EXTENSIONS_CSHARP
3011     EXTENSIONS_AWK
3012     EXTENSIONS_YCP
3013     EXTENSIONS_TCL
3014     EXTENSIONS_PERL
3015     EXTENSIONS_PHP
3016     EXTENSIONS_STRINGTABLE
3017     EXTENSIONS_RST
3018     EXTENSIONS_GLADE
3019     /* Here may follow more file extensions... */
3020   };
3021 
3022   table_ty *tp;
3023 
3024   for (tp = table; tp < ENDOF(table); ++tp)
3025     if (strcmp (extension, tp->extension) == 0)
3026       return tp->language;
3027   return NULL;
3028 }
3029