xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/msgmerge.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* GNU gettext - internationalization aids
2    Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc.
3    This file was written by Peter Miller <millerp@canb.auug.org.au>
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <alloca.h>
23 
24 #include <getopt.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <locale.h>
31 
32 #include "closeout.h"
33 #include "dir-list.h"
34 #include "error.h"
35 #include "error-progname.h"
36 #include "progname.h"
37 #include "relocatable.h"
38 #include "basename.h"
39 #include "message.h"
40 #include "read-catalog.h"
41 #include "read-po.h"
42 #include "read-properties.h"
43 #include "read-stringtable.h"
44 #include "write-catalog.h"
45 #include "write-po.h"
46 #include "write-properties.h"
47 #include "write-stringtable.h"
48 #include "format.h"
49 #include "xalloc.h"
50 #include "xallocsa.h"
51 #include "obstack.h"
52 #include "c-strstr.h"
53 #include "exit.h"
54 #include "c-strcase.h"
55 #include "stpcpy.h"
56 #include "stpncpy.h"
57 #include "po-charset.h"
58 #include "msgl-iconv.h"
59 #include "msgl-equal.h"
60 #include "msgl-fsearch.h"
61 #include "lock.h"
62 #include "plural-count.h"
63 #include "backupfile.h"
64 #include "copy-file.h"
65 #include "propername.h"
66 #include "gettext.h"
67 
68 #define _(str) gettext (str)
69 
70 #define obstack_chunk_alloc xmalloc
71 #define obstack_chunk_free free
72 
73 
74 /* If true do not print unneeded messages.  */
75 static bool quiet;
76 
77 /* Verbosity level.  */
78 static int verbosity_level;
79 
80 /* Force output of PO file even if empty.  */
81 static int force_po;
82 
83 /* Apply the .pot file to each of the domains in the PO file.  */
84 static bool multi_domain_mode = false;
85 
86 /* Determines whether to use fuzzy matching.  */
87 static bool use_fuzzy_matching = true;
88 
89 /* Determines whether to keep old msgids as previous msgids.  */
90 static bool keep_previous = false;
91 
92 /* List of user-specified compendiums.  */
93 static message_list_list_ty *compendiums;
94 
95 /* List of corresponding filenames.  */
96 static string_list_ty *compendium_filenames;
97 
98 /* Update mode.  */
99 static bool update_mode = false;
100 static const char *version_control_string;
101 static const char *backup_suffix_string;
102 
103 /* Long options.  */
104 static const struct option long_options[] =
105 {
106   { "add-location", no_argument, &line_comment, 1 },
107   { "backup", required_argument, NULL, CHAR_MAX + 1 },
108   { "compendium", required_argument, NULL, 'C', },
109   { "directory", required_argument, NULL, 'D' },
110   { "escape", no_argument, NULL, 'E' },
111   { "force-po", no_argument, &force_po, 1 },
112   { "help", no_argument, NULL, 'h' },
113   { "indent", no_argument, NULL, 'i' },
114   { "multi-domain", no_argument, NULL, 'm' },
115   { "no-escape", no_argument, NULL, 'e' },
116   { "no-fuzzy-matching", no_argument, NULL, 'N' },
117   { "no-location", no_argument, &line_comment, 0 },
118   { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
119   { "output-file", required_argument, NULL, 'o' },
120   { "previous", no_argument, NULL, CHAR_MAX + 7 },
121   { "properties-input", no_argument, NULL, 'P' },
122   { "properties-output", no_argument, NULL, 'p' },
123   { "quiet", no_argument, NULL, 'q' },
124   { "sort-by-file", no_argument, NULL, 'F' },
125   { "sort-output", no_argument, NULL, 's' },
126   { "silent", no_argument, NULL, 'q' },
127   { "strict", no_argument, NULL, CHAR_MAX + 2 },
128   { "stringtable-input", no_argument, NULL, CHAR_MAX + 5 },
129   { "stringtable-output", no_argument, NULL, CHAR_MAX + 6 },
130   { "suffix", required_argument, NULL, CHAR_MAX + 3 },
131   { "update", no_argument, NULL, 'U' },
132   { "verbose", no_argument, NULL, 'v' },
133   { "version", no_argument, NULL, 'V' },
134   { "width", required_argument, NULL, 'w', },
135   { NULL, 0, NULL, 0 }
136 };
137 
138 
139 struct statistics
140 {
141   size_t merged;
142   size_t fuzzied;
143   size_t missing;
144   size_t obsolete;
145 };
146 
147 
148 /* Forward declaration of local functions.  */
149 static void usage (int status)
150 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
151 	__attribute__ ((noreturn))
152 #endif
153 ;
154 static void compendium (const char *filename);
155 static msgdomain_list_ty *merge (const char *fn1, const char *fn2,
156 				 catalog_input_format_ty input_syntax,
157 				 msgdomain_list_ty **defp);
158 
159 
160 int
main(int argc,char ** argv)161 main (int argc, char **argv)
162 {
163   int opt;
164   bool do_help;
165   bool do_version;
166   char *output_file;
167   msgdomain_list_ty *def;
168   msgdomain_list_ty *result;
169   catalog_input_format_ty input_syntax = &input_format_po;
170   catalog_output_format_ty output_syntax = &output_format_po;
171   bool sort_by_filepos = false;
172   bool sort_by_msgid = false;
173 
174   /* Set program name for messages.  */
175   set_program_name (argv[0]);
176   error_print_progname = maybe_print_progname;
177   verbosity_level = 0;
178   quiet = false;
179   gram_max_allowed_errors = UINT_MAX;
180 
181 #ifdef HAVE_SETLOCALE
182   /* Set locale via LC_ALL.  */
183   setlocale (LC_ALL, "");
184 #endif
185 
186   /* Set the text message domain.  */
187   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
188   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
189   textdomain (PACKAGE);
190 
191   /* Ensure that write errors on stdout are detected.  */
192   atexit (close_stdout);
193 
194   /* Set default values for variables.  */
195   do_help = false;
196   do_version = false;
197   output_file = NULL;
198 
199   while ((opt = getopt_long (argc, argv, "C:D:eEFhimNo:pPqsUvVw:",
200 			     long_options, NULL))
201 	 != EOF)
202     switch (opt)
203       {
204       case '\0':		/* Long option.  */
205 	break;
206 
207       case 'C':
208 	compendium (optarg);
209 	break;
210 
211       case 'D':
212 	dir_list_append (optarg);
213 	break;
214 
215       case 'e':
216 	message_print_style_escape (false);
217 	break;
218 
219       case 'E':
220 	message_print_style_escape (true);
221 	break;
222 
223       case 'F':
224 	sort_by_filepos = true;
225 	break;
226 
227       case 'h':
228 	do_help = true;
229 	break;
230 
231       case 'i':
232 	message_print_style_indent ();
233 	break;
234 
235       case 'm':
236 	multi_domain_mode = true;
237 	break;
238 
239       case 'N':
240 	use_fuzzy_matching = false;
241 	break;
242 
243       case 'o':
244 	output_file = optarg;
245 	break;
246 
247       case 'p':
248 	output_syntax = &output_format_properties;
249 	break;
250 
251       case 'P':
252 	input_syntax = &input_format_properties;
253 	break;
254 
255       case 'q':
256 	quiet = true;
257 	break;
258 
259       case 's':
260 	sort_by_msgid = true;
261 	break;
262 
263       case 'U':
264 	update_mode = true;
265 	break;
266 
267       case 'v':
268 	++verbosity_level;
269 	break;
270 
271       case 'V':
272 	do_version = true;
273 	break;
274 
275       case 'w':
276 	{
277 	  int value;
278 	  char *endp;
279 	  value = strtol (optarg, &endp, 10);
280 	  if (endp != optarg)
281 	    message_page_width_set (value);
282 	}
283 	break;
284 
285       case CHAR_MAX + 1: /* --backup */
286 	version_control_string = optarg;
287 	break;
288 
289       case CHAR_MAX + 2: /* --strict */
290 	message_print_style_uniforum ();
291 	break;
292 
293       case CHAR_MAX + 3: /* --suffix */
294 	backup_suffix_string = optarg;
295 	break;
296 
297       case CHAR_MAX + 4: /* --no-wrap */
298 	message_page_width_ignore ();
299 	break;
300 
301       case CHAR_MAX + 5: /* --stringtable-input */
302 	input_syntax = &input_format_stringtable;
303 	break;
304 
305       case CHAR_MAX + 6: /* --stringtable-output */
306 	output_syntax = &output_format_stringtable;
307 	break;
308 
309       case CHAR_MAX + 7: /* --previous */
310 	keep_previous = true;
311 	break;
312 
313       default:
314 	usage (EXIT_FAILURE);
315 	break;
316       }
317 
318   /* Version information is requested.  */
319   if (do_version)
320     {
321       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
322       /* xgettext: no-wrap */
323       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
324 This is free software; see the source for copying conditions.  There is NO\n\
325 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
326 "),
327 	      "1995-1998, 2000-2006");
328       printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
329       exit (EXIT_SUCCESS);
330     }
331 
332   /* Help is requested.  */
333   if (do_help)
334     usage (EXIT_SUCCESS);
335 
336   /* Test whether we have an .po file name as argument.  */
337   if (optind >= argc)
338     {
339       error (EXIT_SUCCESS, 0, _("no input files given"));
340       usage (EXIT_FAILURE);
341     }
342   if (optind + 2 != argc)
343     {
344       error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
345       usage (EXIT_FAILURE);
346     }
347 
348   /* Verify selected options.  */
349   if (update_mode)
350     {
351       if (output_file != NULL)
352 	{
353 	  error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
354 		 "--update", "--output-file");
355 	}
356     }
357   else
358     {
359       if (version_control_string != NULL)
360 	{
361 	  error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
362 		 "--backup", "--update");
363 	  usage (EXIT_FAILURE);
364 	}
365       if (backup_suffix_string != NULL)
366 	{
367 	  error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
368 		 "--suffix", "--update");
369 	  usage (EXIT_FAILURE);
370 	}
371     }
372 
373   if (!line_comment && sort_by_filepos)
374     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
375 	   "--no-location", "--sort-by-file");
376 
377   if (sort_by_msgid && sort_by_filepos)
378     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
379 	   "--sort-output", "--sort-by-file");
380 
381   /* In update mode, --properties-input implies --properties-output.  */
382   if (update_mode && input_syntax == &input_format_properties)
383     output_syntax = &output_format_properties;
384   /* In update mode, --stringtable-input implies --stringtable-output.  */
385   if (update_mode && input_syntax == &input_format_stringtable)
386     output_syntax = &output_format_stringtable;
387 
388   /* Merge the two files.  */
389   result = merge (argv[optind], argv[optind + 1], input_syntax, &def);
390 
391   /* Sort the results.  */
392   if (sort_by_filepos)
393     msgdomain_list_sort_by_filepos (result);
394   else if (sort_by_msgid)
395     msgdomain_list_sort_by_msgid (result);
396 
397   if (update_mode)
398     {
399       /* Do nothing if the original file and the result are equal.  Also do
400 	 nothing if the original file and the result differ only by the
401 	 POT-Creation-Date in the header entry; this is needed for projects
402 	 which don't put the .pot file under CVS.  */
403       if (!msgdomain_list_equal (def, result, true))
404 	{
405 	  /* Back up def.po.  */
406 	  enum backup_type backup_type;
407 	  char *backup_file;
408 
409 	  output_file = argv[optind];
410 
411 	  if (backup_suffix_string == NULL)
412 	    {
413 	      backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX");
414 	      if (backup_suffix_string != NULL
415 		  && backup_suffix_string[0] == '\0')
416 		backup_suffix_string = NULL;
417 	    }
418 	  if (backup_suffix_string != NULL)
419 	    simple_backup_suffix = backup_suffix_string;
420 
421 	  backup_type = xget_version (_("backup type"), version_control_string);
422 	  if (backup_type != none)
423 	    {
424 	      backup_file = find_backup_file_name (output_file, backup_type);
425 	      copy_file_preserving (output_file, backup_file);
426 	    }
427 
428 	  /* Write the merged message list out.  */
429 	  msgdomain_list_print (result, output_file, output_syntax, true,
430 				false);
431 	}
432     }
433   else
434     {
435       /* Write the merged message list out.  */
436       msgdomain_list_print (result, output_file, output_syntax, force_po,
437 			    false);
438     }
439 
440   exit (EXIT_SUCCESS);
441 }
442 
443 
444 /* Display usage information and exit.  */
445 static void
usage(int status)446 usage (int status)
447 {
448   if (status != EXIT_SUCCESS)
449     fprintf (stderr, _("Try `%s --help' for more information.\n"),
450 	     program_name);
451   else
452     {
453       printf (_("\
454 Usage: %s [OPTION] def.po ref.pot\n\
455 "), program_name);
456       printf ("\n");
457       /* xgettext: no-wrap */
458       printf (_("\
459 Merges two Uniforum style .po files together.  The def.po file is an\n\
460 existing PO file with translations which will be taken over to the newly\n\
461 created file as long as they still match; comments will be preserved,\n\
462 but extracted comments and file positions will be discarded.  The ref.pot\n\
463 file is the last created PO file with up-to-date source references but\n\
464 old translations, or a PO Template file (generally created by xgettext);\n\
465 any translations or comments in the file will be discarded, however dot\n\
466 comments and file positions will be preserved.  Where an exact match\n\
467 cannot be found, fuzzy matching is used to produce better results.\n\
468 "));
469       printf ("\n");
470       printf (_("\
471 Mandatory arguments to long options are mandatory for short options too.\n"));
472       printf ("\n");
473       printf (_("\
474 Input file location:\n"));
475       printf (_("\
476   def.po                      translations referring to old sources\n"));
477       printf (_("\
478   ref.pot                     references to new sources\n"));
479       printf (_("\
480   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
481       printf (_("\
482   -C, --compendium=FILE       additional library of message translations,\n\
483                               may be specified more than once\n"));
484       printf ("\n");
485       printf (_("\
486 Operation mode:\n"));
487       printf (_("\
488   -U, --update                update def.po,\n\
489                               do nothing if def.po already up to date\n"));
490       printf ("\n");
491       printf (_("\
492 Output file location:\n"));
493       printf (_("\
494   -o, --output-file=FILE      write output to specified file\n"));
495       printf (_("\
496 The results are written to standard output if no output file is specified\n\
497 or if it is -.\n"));
498       printf ("\n");
499       printf (_("\
500 Output file location in update mode:\n"));
501       printf (_("\
502 The result is written back to def.po.\n"));
503       printf (_("\
504       --backup=CONTROL        make a backup of def.po\n"));
505       printf (_("\
506       --suffix=SUFFIX         override the usual backup suffix\n"));
507       printf (_("\
508 The version control method may be selected via the --backup option or through\n\
509 the VERSION_CONTROL environment variable.  Here are the values:\n\
510   none, off       never make backups (even if --backup is given)\n\
511   numbered, t     make numbered backups\n\
512   existing, nil   numbered if numbered backups exist, simple otherwise\n\
513   simple, never   always make simple backups\n"));
514       printf (_("\
515 The backup suffix is `~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\
516 environment variable.\n\
517 "));
518       printf ("\n");
519       printf (_("\
520 Operation modifiers:\n"));
521       printf (_("\
522   -m, --multi-domain          apply ref.pot to each of the domains in def.po\n"));
523       printf (_("\
524   -N, --no-fuzzy-matching     do not use fuzzy matching\n"));
525       printf (_("\
526       --previous              keep previous msgids of translated messages\n"));
527       printf ("\n");
528       printf (_("\
529 Input file syntax:\n"));
530       printf (_("\
531   -P, --properties-input      input files are in Java .properties syntax\n"));
532       printf (_("\
533       --stringtable-input     input files are in NeXTstep/GNUstep .strings\n\
534                               syntax\n"));
535       printf ("\n");
536       printf (_("\
537 Output details:\n"));
538       printf (_("\
539   -e, --no-escape             do not use C escapes in output (default)\n"));
540       printf (_("\
541   -E, --escape                use C escapes in output, no extended chars\n"));
542       printf (_("\
543       --force-po              write PO file even if empty\n"));
544       printf (_("\
545   -i, --indent                indented output style\n"));
546       printf (_("\
547       --no-location           suppress '#: filename:line' lines\n"));
548       printf (_("\
549       --add-location          preserve '#: filename:line' lines (default)\n"));
550       printf (_("\
551       --strict                strict Uniforum output style\n"));
552       printf (_("\
553   -p, --properties-output     write out a Java .properties file\n"));
554       printf (_("\
555       --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
556       printf (_("\
557   -w, --width=NUMBER          set output page width\n"));
558       printf (_("\
559       --no-wrap               do not break long message lines, longer than\n\
560                               the output page width, into several lines\n"));
561       printf (_("\
562   -s, --sort-output           generate sorted output\n"));
563       printf (_("\
564   -F, --sort-by-file          sort output by file location\n"));
565       printf ("\n");
566       printf (_("\
567 Informative output:\n"));
568       printf (_("\
569   -h, --help                  display this help and exit\n"));
570       printf (_("\
571   -V, --version               output version information and exit\n"));
572       printf (_("\
573   -v, --verbose               increase verbosity level\n"));
574       printf (_("\
575   -q, --quiet, --silent       suppress progress indicators\n"));
576       printf ("\n");
577       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
578 	     stdout);
579     }
580 
581   exit (status);
582 }
583 
584 
585 static void
compendium(const char * filename)586 compendium (const char *filename)
587 {
588   msgdomain_list_ty *mdlp;
589   size_t k;
590 
591   mdlp = read_catalog_file (filename, &input_format_po);
592   if (compendiums == NULL)
593     {
594       compendiums = message_list_list_alloc ();
595       compendium_filenames = string_list_alloc ();
596     }
597   for (k = 0; k < mdlp->nitems; k++)
598     {
599       message_list_list_append (compendiums, mdlp->item[k]->messages);
600       string_list_append (compendium_filenames, filename);
601     }
602 }
603 
604 
605 /* Data structure representing the messages with known translations.
606    They are composed of
607      - A message list from def.po,
608      - The compendiums.
609    The data structure is optimized for exact and fuzzy searches.  */
610 typedef struct definitions_ty definitions_ty;
611 struct definitions_ty
612 {
613   /* A list of message lists.  The first comes from def.po, the other ones
614      from the compendiums.  Each message list has a built-in hash table,
615      for speed when doing the exact searches.  */
616   message_list_list_ty *lists;
617   /* A fuzzy index of the compendiums, for speed when doing fuzzy searches.
618      Used only if use_fuzzy_matching is true and compendiums != NULL.  */
619   message_fuzzy_index_ty *findex;
620   /* A once-only execution guard for the initialization of the fuzzy index.
621      Needed for OpenMP.  */
622   gl_lock_define(, findex_init_lock)
623   /* The canonical encoding of the compendiums.  */
624   const char *canon_charset;
625 };
626 
627 static inline void
definitions_init(definitions_ty * definitions,const char * canon_charset)628 definitions_init (definitions_ty *definitions, const char *canon_charset)
629 {
630   definitions->lists = message_list_list_alloc ();
631   message_list_list_append (definitions->lists, NULL);
632   if (compendiums != NULL)
633     message_list_list_append_list (definitions->lists, compendiums);
634   definitions->findex = NULL;
635   gl_lock_init (definitions->findex_init_lock);
636   definitions->canon_charset = canon_charset;
637 }
638 
639 /* Create the fuzzy index.
640    Used only if use_fuzzy_matching is true and compendiums != NULL.  */
641 static inline void
definitions_init_findex(definitions_ty * definitions)642 definitions_init_findex (definitions_ty *definitions)
643 {
644   /* Protect against concurrent execution.  */
645   gl_lock_lock (definitions->findex_init_lock);
646   if (definitions->findex == NULL)
647     {
648       /* Combine all the compendium message lists into a single one.  Don't
649 	 bother checking for duplicates.  */
650       message_list_ty *all_compendium;
651       size_t i;
652 
653       all_compendium = message_list_alloc (false);
654       for (i = 0; i < compendiums->nitems; i++)
655 	{
656 	  message_list_ty *mlp = compendiums->item[i];
657 	  size_t j;
658 
659 	  for (j = 0; j < mlp->nitems; j++)
660 	    message_list_append (all_compendium, mlp->item[j]);
661 	}
662 
663       /* Create the fuzzy index from it.  */
664       definitions->findex =
665 	message_fuzzy_index_alloc (all_compendium, definitions->canon_charset);
666     }
667   gl_lock_unlock (definitions->findex_init_lock);
668 }
669 
670 /* Return the current list of non-compendium messages.  */
671 static inline message_list_ty *
definitions_current_list(const definitions_ty * definitions)672 definitions_current_list (const definitions_ty *definitions)
673 {
674   return definitions->lists->item[0];
675 }
676 
677 /* Set the current list of non-compendium messages.  */
678 static inline void
definitions_set_current_list(definitions_ty * definitions,message_list_ty * mlp)679 definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp)
680 {
681   definitions->lists->item[0] = mlp;
682 }
683 
684 /* Exact search.  */
685 static inline message_ty *
definitions_search(const definitions_ty * definitions,const char * msgctxt,const char * msgid)686 definitions_search (const definitions_ty *definitions,
687 		    const char *msgctxt, const char *msgid)
688 {
689   return message_list_list_search (definitions->lists, msgctxt, msgid);
690 }
691 
692 /* Fuzzy search.
693    Used only if use_fuzzy_matching is true.  */
694 static inline message_ty *
definitions_search_fuzzy(definitions_ty * definitions,const char * msgctxt,const char * msgid)695 definitions_search_fuzzy (definitions_ty *definitions,
696 			  const char *msgctxt, const char *msgid)
697 {
698   message_ty *mp1 =
699     message_list_search_fuzzy (definitions_current_list (definitions),
700 			       msgctxt, msgid);
701   if (compendiums != NULL)
702     {
703       message_ty *mp2;
704 
705       /* Create the fuzzy index lazily.  */
706       if (definitions->findex == NULL)
707 	definitions_init_findex (definitions);
708 
709       mp2 = message_fuzzy_index_search (definitions->findex, msgctxt, msgid);
710 
711       /* Choose the best among mp1, mp2.  */
712       if (mp1 == NULL
713 	  || (mp2 != NULL
714 	      && (fuzzy_search_goal_function (mp2, msgctxt, msgid)
715 		  > fuzzy_search_goal_function (mp1, msgctxt, msgid))))
716 	mp1 = mp2;
717     }
718 
719   return mp1;
720 }
721 
722 static inline void
definitions_destroy(definitions_ty * definitions)723 definitions_destroy (definitions_ty *definitions)
724 {
725   message_list_list_free (definitions->lists, 2);
726   if (definitions->findex != NULL)
727     message_fuzzy_index_free (definitions->findex);
728 }
729 
730 
731 static bool
msgfmt_check_pair_fails(const lex_pos_ty * pos,const char * msgid,const char * msgid_plural,const char * msgstr,size_t msgstr_len,size_t fmt)732 msgfmt_check_pair_fails (const lex_pos_ty *pos,
733 			 const char *msgid, const char *msgid_plural,
734 			 const char *msgstr, size_t msgstr_len,
735 			 size_t fmt)
736 {
737   bool failure;
738   struct formatstring_parser *parser = formatstring_parsers[fmt];
739   char *invalid_reason = NULL;
740   void *msgid_descr =
741     parser->parse (msgid_plural != NULL ? msgid_plural : msgid, false,
742 		   &invalid_reason);
743 
744   failure = false;
745   if (msgid_descr != NULL)
746     {
747       const char *p_end = msgstr + msgstr_len;
748       const char *p;
749 
750       for (p = msgstr; p < p_end; p += strlen (p) + 1)
751 	{
752 	  void *msgstr_descr = parser->parse (msgstr, true, &invalid_reason);
753 
754 	  if (msgstr_descr != NULL)
755 	    {
756 	      failure = parser->check (msgid_descr, msgstr_descr,
757 				       msgid_plural == NULL, NULL, NULL);
758 	      parser->free (msgstr_descr);
759 	    }
760 	  else
761 	    {
762 	      failure = true;
763 	      free (invalid_reason);
764 	    }
765 
766 	  if (failure)
767 	    break;
768 	}
769 
770       parser->free (msgid_descr);
771     }
772   else
773     free (invalid_reason);
774 
775   return failure;
776 }
777 
778 
779 static message_ty *
message_merge(message_ty * def,message_ty * ref,bool force_fuzzy)780 message_merge (message_ty *def, message_ty *ref, bool force_fuzzy)
781 {
782   const char *msgstr;
783   size_t msgstr_len;
784   const char *prev_msgctxt;
785   const char *prev_msgid;
786   const char *prev_msgid_plural;
787   message_ty *result;
788   size_t j, i;
789 
790   /* Take the msgid from the reference.  When fuzzy matches are made,
791      the definition will not be unique, but the reference will be -
792      usually because it has only been slightly changed.  */
793 
794   /* Take the msgstr from the definition.  The msgstr of the reference
795      is usually empty, as it was generated by xgettext.  If we currently
796      process the header entry we have to merge the msgstr by using the
797      Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference.  */
798   if (is_header (ref))
799     {
800       /* Oh, oh.  The header entry and we have something to fill in.  */
801       static const struct
802       {
803 	const char *name;
804 	size_t len;
805       } known_fields[] =
806       {
807 	{ "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
808 #define PROJECT_ID		0
809 	{ "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
810 #define REPORT_MSGID_BUGS_TO	1
811 	{ "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
812 #define POT_CREATION_DATE	2
813 	{ "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
814 #define PO_REVISION_DATE	3
815 	{ "Last-Translator:", sizeof ("Last-Translator:") - 1 },
816 #define LAST_TRANSLATOR		4
817 	{ "Language-Team:", sizeof ("Language-Team:") - 1 },
818 #define LANGUAGE_TEAM		5
819 	{ "MIME-Version:", sizeof ("MIME-Version:") - 1 },
820 #define MIME_VERSION		6
821 	{ "Content-Type:", sizeof ("Content-Type:") - 1 },
822 #define CONTENT_TYPE		7
823 	{ "Content-Transfer-Encoding:",
824 	  sizeof ("Content-Transfer-Encoding:") - 1 }
825 #define CONTENT_TRANSFER	8
826       };
827 #define UNKNOWN	9
828       struct
829       {
830 	const char *string;
831 	size_t len;
832       } header_fields[UNKNOWN + 1];
833       struct obstack pool;
834       const char *cp;
835       char *newp;
836       size_t len, cnt;
837 
838       /* Clear all fields.  */
839       memset (header_fields, '\0', sizeof (header_fields));
840 
841       /* Prepare a temporary memory pool.  */
842       obstack_init (&pool);
843 
844       cp = def->msgstr;
845       while (*cp != '\0')
846 	{
847 	  const char *endp = strchr (cp, '\n');
848 	  int terminated = endp != NULL;
849 
850 	  if (!terminated)
851 	    {
852 	      /* Add a trailing newline.  */
853 	      char *copy;
854 	      endp = strchr (cp, '\0');
855 
856 	      len = endp - cp + 1;
857 
858 	      copy = (char *) obstack_alloc (&pool, len + 1);
859 	      stpcpy (stpcpy (copy, cp), "\n");
860 	      cp = copy;
861 	    }
862 	  else
863 	    {
864 	      len = (endp - cp) + 1;
865 	      ++endp;
866 	    }
867 
868 	  /* Compare with any of the known fields.  */
869 	  for (cnt = 0;
870 	       cnt < sizeof (known_fields) / sizeof (known_fields[0]);
871 	       ++cnt)
872 	    if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
873 		== 0)
874 	      break;
875 
876 	  if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
877 	    {
878 	      header_fields[cnt].string = &cp[known_fields[cnt].len];
879 	      header_fields[cnt].len = len - known_fields[cnt].len;
880 	    }
881 	  else
882 	    {
883 	      /* It's an unknown field.  Append content to what is already
884 		 known.  */
885 	      char *extended =
886 		(char *) obstack_alloc (&pool,
887 					header_fields[UNKNOWN].len + len + 1);
888 	      memcpy (extended, header_fields[UNKNOWN].string,
889 		      header_fields[UNKNOWN].len);
890 	      memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
891 	      extended[header_fields[UNKNOWN].len + len] = '\0';
892 	      header_fields[UNKNOWN].string = extended;
893 	      header_fields[UNKNOWN].len += len;
894 	    }
895 
896 	  cp = endp;
897 	}
898 
899       {
900 	const char *msgid_bugs_ptr;
901 
902 	msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
903 	if (msgid_bugs_ptr != NULL)
904 	  {
905 	    size_t msgid_bugs_len;
906 	    const char *endp;
907 
908 	    msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;
909 
910 	    endp = strchr (msgid_bugs_ptr, '\n');
911 	    if (endp == NULL)
912 	      {
913 		/* Add a trailing newline.  */
914 		char *extended;
915 		endp = strchr (msgid_bugs_ptr, '\0');
916 		msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
917 		extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
918 		stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
919 		msgid_bugs_ptr = extended;
920 	      }
921 	    else
922 	      msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
923 
924 	    header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
925 	    header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
926 	  }
927       }
928 
929       {
930 	const char *pot_date_ptr;
931 
932 	pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:");
933 	if (pot_date_ptr != NULL)
934 	  {
935 	    size_t pot_date_len;
936 	    const char *endp;
937 
938 	    pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;
939 
940 	    endp = strchr (pot_date_ptr, '\n');
941 	    if (endp == NULL)
942 	      {
943 		/* Add a trailing newline.  */
944 		char *extended;
945 		endp = strchr (pot_date_ptr, '\0');
946 		pot_date_len = (endp - pot_date_ptr) + 1;
947 		extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
948 		stpcpy (stpcpy (extended, pot_date_ptr), "\n");
949 		pot_date_ptr = extended;
950 	      }
951 	    else
952 	      pot_date_len = (endp - pot_date_ptr) + 1;
953 
954 	    header_fields[POT_CREATION_DATE].string = pot_date_ptr;
955 	    header_fields[POT_CREATION_DATE].len = pot_date_len;
956 	  }
957       }
958 
959       /* Concatenate all the various fields.  */
960       len = 0;
961       for (cnt = 0; cnt < UNKNOWN; ++cnt)
962 	if (header_fields[cnt].string != NULL)
963 	  len += known_fields[cnt].len + header_fields[cnt].len;
964       len += header_fields[UNKNOWN].len;
965 
966       cp = newp = (char *) xmalloc (len + 1);
967       newp[len] = '\0';
968 
969 #define IF_FILLED(idx)							      \
970       if (header_fields[idx].string)					      \
971 	newp = stpncpy (stpcpy (newp, known_fields[idx].name),		      \
972 			header_fields[idx].string, header_fields[idx].len)
973 
974       IF_FILLED (PROJECT_ID);
975       IF_FILLED (REPORT_MSGID_BUGS_TO);
976       IF_FILLED (POT_CREATION_DATE);
977       IF_FILLED (PO_REVISION_DATE);
978       IF_FILLED (LAST_TRANSLATOR);
979       IF_FILLED (LANGUAGE_TEAM);
980       IF_FILLED (MIME_VERSION);
981       IF_FILLED (CONTENT_TYPE);
982       IF_FILLED (CONTENT_TRANSFER);
983       if (header_fields[UNKNOWN].string != NULL)
984 	stpcpy (newp, header_fields[UNKNOWN].string);
985 
986 #undef IF_FILLED
987 
988       /* Free the temporary memory pool.  */
989       obstack_free (&pool, NULL);
990 
991       msgstr = cp;
992       msgstr_len = strlen (cp) + 1;
993 
994       prev_msgctxt = NULL;
995       prev_msgid = NULL;
996       prev_msgid_plural = NULL;
997     }
998   else
999     {
1000       msgstr = def->msgstr;
1001       msgstr_len = def->msgstr_len;
1002 
1003       if (def->is_fuzzy)
1004 	{
1005 	  prev_msgctxt = def->prev_msgctxt;
1006 	  prev_msgid = def->prev_msgid;
1007 	  prev_msgid_plural = def->prev_msgid_plural;
1008 	}
1009       else
1010 	{
1011 	  prev_msgctxt = def->msgctxt;
1012 	  prev_msgid = def->msgid;
1013 	  prev_msgid_plural = def->msgid_plural;
1014 	}
1015     }
1016 
1017   result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL,
1018 			  xstrdup (ref->msgid), ref->msgid_plural,
1019 			  msgstr, msgstr_len, &def->pos);
1020 
1021   /* Take the comments from the definition file.  There will be none at
1022      all in the reference file, as it was generated by xgettext.  */
1023   if (def->comment)
1024     for (j = 0; j < def->comment->nitems; ++j)
1025       message_comment_append (result, def->comment->item[j]);
1026 
1027   /* Take the dot comments from the reference file, as they are
1028      generated by xgettext.  Any in the definition file are old ones
1029      collected by previous runs of xgettext and msgmerge.  */
1030   if (ref->comment_dot)
1031     for (j = 0; j < ref->comment_dot->nitems; ++j)
1032       message_comment_dot_append (result, ref->comment_dot->item[j]);
1033 
1034   /* The flags are mixed in a special way.  Some informations come
1035      from the reference message (such as format/no-format), others
1036      come from the definition file (fuzzy or not).  */
1037   result->is_fuzzy = def->is_fuzzy | force_fuzzy;
1038 
1039   for (i = 0; i < NFORMATS; i++)
1040     {
1041       result->is_format[i] = ref->is_format[i];
1042 
1043       /* If the reference message is marked as being a format specifier,
1044 	 but the definition message is not, we check if the resulting
1045 	 message would pass "msgfmt -c".  If yes, then all is fine.  If
1046 	 not, we add a fuzzy marker, because
1047 	 1. the message needs the translator's attention,
1048 	 2. msgmerge must not transform a PO file which passes "msgfmt -c"
1049 	    into a PO file which doesn't.  */
1050       if (!result->is_fuzzy
1051 	  && possible_format_p (ref->is_format[i])
1052 	  && !possible_format_p (def->is_format[i])
1053 	  && msgfmt_check_pair_fails (&def->pos, ref->msgid, ref->msgid_plural,
1054 				      msgstr, msgstr_len, i))
1055 	result->is_fuzzy = true;
1056     }
1057 
1058   result->do_wrap = ref->do_wrap;
1059 
1060   /* Insert previous msgid, commented out with "#|".
1061      Do so only when --previous is specified, for backward compatibility.
1062      Since the "previous msgid" represents the original msgid that led to
1063      the current msgstr,
1064        - we can omit it if the resulting message is not fuzzy,
1065        - otherwise, if the corresponding message from the definition file
1066          was translated (not fuzzy), we use that message's msgid,
1067        - otherwise, we use that message's prev_msgid.  */
1068   if (keep_previous && result->is_fuzzy)
1069     {
1070       result->prev_msgctxt = prev_msgctxt;
1071       result->prev_msgid = prev_msgid;
1072       result->prev_msgid_plural = prev_msgid_plural;
1073     }
1074 
1075   /* Take the file position comments from the reference file, as they
1076      are generated by xgettext.  Any in the definition file are old ones
1077      collected by previous runs of xgettext and msgmerge.  */
1078   for (j = 0; j < ref->filepos_count; ++j)
1079     {
1080       lex_pos_ty *pp = &ref->filepos[j];
1081       message_comment_filepos (result, pp->file_name, pp->line_number);
1082     }
1083 
1084   /* Special postprocessing is needed if the reference message is a
1085      plural form and the definition message isn't, or vice versa.  */
1086   if (ref->msgid_plural != NULL)
1087     {
1088       if (def->msgid_plural == NULL)
1089 	result->used = 1;
1090     }
1091   else
1092     {
1093       if (def->msgid_plural != NULL)
1094 	result->used = 2;
1095     }
1096 
1097   /* All done, return the merged message to the caller.  */
1098   return result;
1099 }
1100 
1101 
1102 #define DOT_FREQUENCY 10
1103 
1104 static void
match_domain(const char * fn1,const char * fn2,definitions_ty * definitions,message_list_ty * refmlp,message_list_ty * resultmlp,struct statistics * stats,unsigned int * processed)1105 match_domain (const char *fn1, const char *fn2,
1106 	      definitions_ty *definitions, message_list_ty *refmlp,
1107 	      message_list_ty *resultmlp,
1108 	      struct statistics *stats, unsigned int *processed)
1109 {
1110   message_ty *header_entry;
1111   unsigned long int nplurals;
1112   char *untranslated_plural_msgstr;
1113   struct search_result { message_ty *found; bool fuzzy; } *search_results;
1114   size_t j;
1115 
1116   header_entry =
1117     message_list_search (definitions_current_list (definitions), NULL, "");
1118   nplurals = get_plural_count (header_entry ? header_entry->msgstr : NULL);
1119   untranslated_plural_msgstr = (char *) xmalloc (nplurals);
1120   memset (untranslated_plural_msgstr, '\0', nplurals);
1121 
1122   /* Most of the time is spent in definitions_search_fuzzy.
1123      Perform it in a separate loop that can be parallelized by an OpenMP
1124      capable compiler.  */
1125   search_results =
1126     (struct search_result *)
1127     xmalloc (refmlp->nitems * sizeof (struct search_result));
1128   {
1129     long int nn = refmlp->nitems;
1130     long int jj;
1131 
1132     /* Tell the OpenMP capable compiler to distribute this loop across
1133        several threads.  The schedule is dynamic, because for some messages
1134        the loop body can be executed very quickly, whereas for others it takes
1135        a long time.  */
1136     #ifdef _OPENMP
1137     # pragma omp parallel for schedule(dynamic)
1138     #endif
1139     for (jj = 0; jj < nn; jj++)
1140       {
1141 	message_ty *refmsg = refmlp->item[jj];
1142 	message_ty *defmsg;
1143 
1144 	/* Because merging can take a while we print something to signal
1145 	   we are not dead.  */
1146 	if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
1147 	  fputc ('.', stderr);
1148 	#ifdef _OPENMP
1149 	# pragma omp atomic
1150 	#endif
1151 	(*processed)++;
1152 
1153 	/* See if it is in the other file.  */
1154 	defmsg =
1155 	  definitions_search (definitions, refmsg->msgctxt, refmsg->msgid);
1156 	if (defmsg != NULL)
1157 	  {
1158 	    search_results[jj].found = defmsg;
1159 	    search_results[jj].fuzzy = false;
1160 	  }
1161 	else if (!is_header (refmsg)
1162 		 /* If the message was not defined at all, try to find a very
1163 		    similar message, it could be a typo, or the suggestion may
1164 		    help.  */
1165 		 && use_fuzzy_matching
1166 		 && ((defmsg =
1167 		        definitions_search_fuzzy (definitions,
1168 						  refmsg->msgctxt,
1169 						  refmsg->msgid)) != NULL))
1170 	  {
1171 	    search_results[jj].found = defmsg;
1172 	    search_results[jj].fuzzy = true;
1173 	  }
1174 	else
1175 	  search_results[jj].found = NULL;
1176       }
1177   }
1178 
1179   for (j = 0; j < refmlp->nitems; j++)
1180     {
1181       message_ty *refmsg = refmlp->item[j];
1182 
1183       /* See if it is in the other file.
1184 	 This used definitions_search.  */
1185       if (search_results[j].found != NULL && !search_results[j].fuzzy)
1186 	{
1187 	  message_ty *defmsg = search_results[j].found;
1188 	  /* Merge the reference with the definition: take the #. and
1189 	     #: comments from the reference, take the # comments from
1190 	     the definition, take the msgstr from the definition.  Add
1191 	     this merged entry to the output message list.  */
1192 	  message_ty *mp = message_merge (defmsg, refmsg, false);
1193 
1194 	  message_list_append (resultmlp, mp);
1195 
1196 	  /* Remember that this message has been used, when we scan
1197 	     later to see if anything was omitted.  */
1198 	  defmsg->used = 1;
1199 	  stats->merged++;
1200 	}
1201       else if (!is_header (refmsg))
1202 	{
1203 	  /* If the message was not defined at all, try to find a very
1204 	     similar message, it could be a typo, or the suggestion may
1205 	     help.  This search assumed use_fuzzy_matching and used
1206 	     definitions_search_fuzzy.  */
1207 	  if (search_results[j].found != NULL && search_results[j].fuzzy)
1208 	    {
1209 	      message_ty *defmsg = search_results[j].found;
1210 	      message_ty *mp;
1211 
1212 	      if (verbosity_level > 1)
1213 		{
1214 		  po_gram_error_at_line (&refmsg->pos, _("\
1215 this message is used but not defined..."));
1216 		  error_message_count--;
1217 		  po_gram_error_at_line (&defmsg->pos, _("\
1218 ...but this definition is similar"));
1219 		}
1220 
1221 	      /* Merge the reference with the definition: take the #. and
1222 		 #: comments from the reference, take the # comments from
1223 		 the definition, take the msgstr from the definition.  Add
1224 		 this merged entry to the output message list.  */
1225 	      mp = message_merge (defmsg, refmsg, true);
1226 
1227 	      message_list_append (resultmlp, mp);
1228 
1229 	      /* Remember that this message has been used, when we scan
1230 		 later to see if anything was omitted.  */
1231 	      defmsg->used = 1;
1232 	      stats->fuzzied++;
1233 	      if (!quiet && verbosity_level <= 1)
1234 		/* Always print a dot if we handled a fuzzy match.  */
1235 		fputc ('.', stderr);
1236 	    }
1237 	  else
1238 	    {
1239 	      message_ty *mp;
1240 	      bool is_untranslated;
1241 	      const char *p;
1242 	      const char *pend;
1243 
1244 	      if (verbosity_level > 1)
1245 		po_gram_error_at_line (&refmsg->pos, _("\
1246 this message is used but not defined in %s"), fn1);
1247 
1248 	      mp = message_copy (refmsg);
1249 
1250 	      if (mp->msgid_plural != NULL)
1251 		{
1252 		  /* Test if mp is untranslated.  (It most likely is.)  */
1253 		  is_untranslated = true;
1254 		  for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
1255 		    if (*p != '\0')
1256 		      {
1257 			is_untranslated = false;
1258 			break;
1259 		      }
1260 		  if (is_untranslated)
1261 		    {
1262 		      /* Change mp->msgstr_len consecutive empty strings into
1263 			 nplurals consecutive empty strings.  */
1264 		      if (nplurals > mp->msgstr_len)
1265 			mp->msgstr = untranslated_plural_msgstr;
1266 		      mp->msgstr_len = nplurals;
1267 		    }
1268 		}
1269 
1270 	      message_list_append (resultmlp, mp);
1271 	      stats->missing++;
1272 	    }
1273 	}
1274     }
1275 
1276   free (search_results);
1277 
1278   /* Now postprocess the problematic merges.  This is needed because we
1279      want the result to pass the "msgfmt -c -v" check.  */
1280   {
1281     /* message_merge sets mp->used to 1 or 2, depending on the problem.
1282        Compute the bitwise OR of all these.  */
1283     int problematic = 0;
1284 
1285     for (j = 0; j < resultmlp->nitems; j++)
1286       problematic |= resultmlp->item[j]->used;
1287 
1288     if (problematic)
1289       {
1290 	unsigned long int nplurals = 0;
1291 
1292 	if (problematic & 1)
1293 	  {
1294 	    /* Need to know nplurals of the result domain.  */
1295 	    message_ty *header_entry =
1296 	      message_list_search (resultmlp, NULL, "");
1297 
1298 	    nplurals = get_plural_count (header_entry
1299 					 ? header_entry->msgstr
1300 					 : NULL);
1301 	  }
1302 
1303 	for (j = 0; j < resultmlp->nitems; j++)
1304 	  {
1305 	    message_ty *mp = resultmlp->item[j];
1306 
1307 	    if ((mp->used & 1) && (nplurals > 0))
1308 	      {
1309 		/* ref->msgid_plural != NULL but def->msgid_plural == NULL.
1310 		   Use a copy of def->msgstr for each possible plural form.  */
1311 		size_t new_msgstr_len;
1312 		char *new_msgstr;
1313 		char *p;
1314 		unsigned long i;
1315 
1316 		if (verbosity_level > 1)
1317 		  {
1318 		    po_gram_error_at_line (&mp->pos, _("\
1319 this message should define plural forms"));
1320 		  }
1321 
1322 		new_msgstr_len = nplurals * mp->msgstr_len;
1323 		new_msgstr = (char *) xmalloc (new_msgstr_len);
1324 		for (i = 0, p = new_msgstr; i < nplurals; i++)
1325 		  {
1326 		    memcpy (p, mp->msgstr, mp->msgstr_len);
1327 		    p += mp->msgstr_len;
1328 		  }
1329 		mp->msgstr = new_msgstr;
1330 		mp->msgstr_len = new_msgstr_len;
1331 		mp->is_fuzzy = true;
1332 	      }
1333 
1334 	    if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
1335 	      {
1336 		/* ref->msgid_plural == NULL but def->msgid_plural != NULL.
1337 		   Use only the first among the plural forms.  */
1338 
1339 		if (verbosity_level > 1)
1340 		  {
1341 		    po_gram_error_at_line (&mp->pos, _("\
1342 this message should not define plural forms"));
1343 		  }
1344 
1345 		mp->msgstr_len = strlen (mp->msgstr) + 1;
1346 		mp->is_fuzzy = true;
1347 	      }
1348 
1349 	    /* Postprocessing of this message is done.  */
1350 	    mp->used = 0;
1351 	  }
1352       }
1353   }
1354 }
1355 
1356 static msgdomain_list_ty *
merge(const char * fn1,const char * fn2,catalog_input_format_ty input_syntax,msgdomain_list_ty ** defp)1357 merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax,
1358        msgdomain_list_ty **defp)
1359 {
1360   msgdomain_list_ty *def;
1361   msgdomain_list_ty *ref;
1362   size_t j, k;
1363   unsigned int processed;
1364   struct statistics stats;
1365   msgdomain_list_ty *result;
1366   definitions_ty definitions;
1367   message_list_ty *empty_list;
1368 
1369   stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;
1370 
1371   /* This is the definitions file, created by a human.  */
1372   def = read_catalog_file (fn1, input_syntax);
1373 
1374   /* This is the references file, created by groping the sources with
1375      the xgettext program.  */
1376   ref = read_catalog_file (fn2, input_syntax);
1377   /* Add a dummy header entry, if the references file contains none.  */
1378   for (k = 0; k < ref->nitems; k++)
1379     if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
1380       {
1381 	static lex_pos_ty pos = { __FILE__, __LINE__ };
1382 	message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos);
1383 
1384 	message_list_prepend (ref->item[k]->messages, refheader);
1385       }
1386 
1387   /* The references file can be either in ASCII or in UTF-8.  If it is
1388      in UTF-8, we have to convert the definitions and the compendiums to
1389      UTF-8 as well.  */
1390   {
1391     bool was_utf8 = false;
1392     for (k = 0; k < ref->nitems; k++)
1393       {
1394 	message_list_ty *mlp = ref->item[k]->messages;
1395 
1396 	for (j = 0; j < mlp->nitems; j++)
1397 	  if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1398 	    {
1399 	      const char *header = mlp->item[j]->msgstr;
1400 
1401 	      if (header != NULL)
1402 		{
1403 		  const char *charsetstr = c_strstr (header, "charset=");
1404 
1405 		  if (charsetstr != NULL)
1406 		    {
1407 		      size_t len;
1408 
1409 		      charsetstr += strlen ("charset=");
1410 		      len = strcspn (charsetstr, " \t\n");
1411 		      if (len == strlen ("UTF-8")
1412 			  && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
1413 			was_utf8 = true;
1414 		    }
1415 		}
1416 	    }
1417 	}
1418     if (was_utf8)
1419       {
1420 	def = iconv_msgdomain_list (def, "UTF-8", fn1);
1421 	if (compendiums != NULL)
1422 	  for (k = 0; k < compendiums->nitems; k++)
1423 	    iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
1424 				compendium_filenames->item[k]);
1425       }
1426     else if (compendiums != NULL && compendiums->nitems > 0)
1427       {
1428 	/* Ensure that the definitions and the compendiums are in the same
1429 	   encoding.  Prefer the encoding of the definitions file, if
1430 	   possible; otherwise, if the definitions file is empty and the
1431 	   compendiums are all in the same encoding, use that encoding;
1432 	   otherwise, use UTF-8.  */
1433 	bool conversion_done = false;
1434 	{
1435 	  char *charset = NULL;
1436 
1437 	  /* Get the encoding of the definitions file.  */
1438 	  for (k = 0; k < def->nitems; k++)
1439 	    {
1440 	      message_list_ty *mlp = def->item[k]->messages;
1441 
1442 	      for (j = 0; j < mlp->nitems; j++)
1443 		if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1444 		  {
1445 		    const char *header = mlp->item[j]->msgstr;
1446 
1447 		    if (header != NULL)
1448 		      {
1449 			const char *charsetstr = c_strstr (header, "charset=");
1450 
1451 			if (charsetstr != NULL)
1452 			  {
1453 			    size_t len;
1454 
1455 			    charsetstr += strlen ("charset=");
1456 			    len = strcspn (charsetstr, " \t\n");
1457 			    charset = (char *) xallocsa (len + 1);
1458 			    memcpy (charset, charsetstr, len);
1459 			    charset[len] = '\0';
1460 			    break;
1461 			  }
1462 		      }
1463 		  }
1464 	      if (charset != NULL)
1465 		break;
1466 	    }
1467 	  if (charset != NULL)
1468 	    {
1469 	      const char *canon_charset = po_charset_canonicalize (charset);
1470 
1471 	      if (canon_charset != NULL)
1472 		{
1473 		  bool all_compendiums_iconvable = true;
1474 
1475 		  if (compendiums != NULL)
1476 		    for (k = 0; k < compendiums->nitems; k++)
1477 		      if (!is_message_list_iconvable (compendiums->item[k],
1478 						      NULL, canon_charset))
1479 			{
1480 			  all_compendiums_iconvable = false;
1481 			  break;
1482 			}
1483 
1484 		  if (all_compendiums_iconvable)
1485 		    {
1486 		      /* Convert the compendiums to def's encoding.  */
1487 		      if (compendiums != NULL)
1488 			for (k = 0; k < compendiums->nitems; k++)
1489 			  iconv_message_list (compendiums->item[k],
1490 					      NULL, canon_charset,
1491 					      compendium_filenames->item[k]);
1492 		      conversion_done = true;
1493 		    }
1494 		}
1495 	      freesa (charset);
1496 	    }
1497 	}
1498 	if (!conversion_done)
1499 	  {
1500 	    if (def->nitems == 0
1501 		|| (def->nitems == 1 && def->item[0]->messages->nitems == 0))
1502 	      {
1503 		/* The definitions file is empty.
1504 		   Compare the encodings of the compendiums.  */
1505 		const char *common_canon_charset = NULL;
1506 
1507 		for (k = 0; k < compendiums->nitems; k++)
1508 		  {
1509 		    message_list_ty *mlp = compendiums->item[k];
1510 		    char *charset = NULL;
1511 		    const char *canon_charset = NULL;
1512 
1513 		    for (j = 0; j < mlp->nitems; j++)
1514 		      if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1515 			{
1516 			  const char *header = mlp->item[j]->msgstr;
1517 
1518 			  if (header != NULL)
1519 			    {
1520 			      const char *charsetstr =
1521 				c_strstr (header, "charset=");
1522 
1523 			      if (charsetstr != NULL)
1524 				{
1525 				  size_t len;
1526 
1527 				  charsetstr += strlen ("charset=");
1528 				  len = strcspn (charsetstr, " \t\n");
1529 				  charset = (char *) xallocsa (len + 1);
1530 				  memcpy (charset, charsetstr, len);
1531 				  charset[len] = '\0';
1532 
1533 				  break;
1534 				}
1535 			    }
1536 			}
1537 		    if (charset != NULL)
1538 		      {
1539 			canon_charset = po_charset_canonicalize (charset);
1540 			freesa (charset);
1541 		      }
1542 		    /* If no charset declaration was found in this file,
1543 		       or if it is not a valid encoding name, or if it
1544 		       differs from the common charset found so far,
1545 		       we have no common charset.  */
1546 		    if (canon_charset == NULL
1547 			|| (common_canon_charset != NULL
1548 			    && canon_charset != common_canon_charset))
1549 		      {
1550 			common_canon_charset = NULL;
1551 			break;
1552 		      }
1553 		    common_canon_charset = canon_charset;
1554 		  }
1555 
1556 		if (common_canon_charset != NULL)
1557 		  /* No conversion needed in this case.  */
1558 		  conversion_done = true;
1559 	      }
1560 	    if (!conversion_done)
1561 	      {
1562 		/* It's too hairy to find out what would be the optimal target
1563 		   encoding.  So, convert everything to UTF-8.  */
1564 		def = iconv_msgdomain_list (def, "UTF-8", fn1);
1565 		if (compendiums != NULL)
1566 		  for (k = 0; k < compendiums->nitems; k++)
1567 		    iconv_message_list (compendiums->item[k],
1568 					NULL, po_charset_utf8,
1569 					compendium_filenames->item[k]);
1570 	      }
1571 	  }
1572       }
1573   }
1574 
1575   /* Initialize and preprocess the total set of message definitions.  */
1576   definitions_init (&definitions, po_charset_utf8);
1577   empty_list = message_list_alloc (false);
1578 
1579   result = msgdomain_list_alloc (false);
1580   processed = 0;
1581 
1582   /* Every reference must be matched with its definition. */
1583   if (!multi_domain_mode)
1584     for (k = 0; k < ref->nitems; k++)
1585       {
1586 	const char *domain = ref->item[k]->domain;
1587 	message_list_ty *refmlp = ref->item[k]->messages;
1588 	message_list_ty *resultmlp =
1589 	  msgdomain_list_sublist (result, domain, true);
1590 	message_list_ty *defmlp;
1591 
1592 	defmlp = msgdomain_list_sublist (def, domain, false);
1593 	if (defmlp == NULL)
1594 	  defmlp = empty_list;
1595 	definitions_set_current_list (&definitions, defmlp);
1596 
1597 	match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
1598 		      &stats, &processed);
1599       }
1600   else
1601     {
1602       /* Apply the references messages in the default domain to each of
1603 	 the definition domains.  */
1604       message_list_ty *refmlp = ref->item[0]->messages;
1605 
1606       for (k = 0; k < def->nitems; k++)
1607 	{
1608 	  const char *domain = def->item[k]->domain;
1609 	  message_list_ty *defmlp = def->item[k]->messages;
1610 
1611 	  /* Ignore the default message domain if it has no messages.  */
1612 	  if (k > 0 || defmlp->nitems > 0)
1613 	    {
1614 	      message_list_ty *resultmlp =
1615 		msgdomain_list_sublist (result, domain, true);
1616 
1617 	      definitions_set_current_list (&definitions, defmlp);
1618 
1619 	      match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
1620 			    &stats, &processed);
1621 	    }
1622 	}
1623     }
1624 
1625   definitions_destroy (&definitions);
1626 
1627   /* Look for messages in the definition file, which are not present
1628      in the reference file, indicating messages which defined but not
1629      used in the program.  Don't scan the compendium(s).  */
1630   for (k = 0; k < def->nitems; ++k)
1631     {
1632       const char *domain = def->item[k]->domain;
1633       message_list_ty *defmlp = def->item[k]->messages;
1634 
1635       for (j = 0; j < defmlp->nitems; j++)
1636 	{
1637 	  message_ty *defmsg = defmlp->item[j];
1638 
1639 	  if (!defmsg->used)
1640 	    {
1641 	      /* Remember the old translation although it is not used anymore.
1642 		 But we mark it as obsolete.  */
1643 	      message_ty *mp;
1644 
1645 	      mp = message_copy (defmsg);
1646 	      /* Clear the extracted comments.  */
1647 	      if (mp->comment_dot != NULL)
1648 		{
1649 		  string_list_free (mp->comment_dot);
1650 		  mp->comment_dot = NULL;
1651 		}
1652 	      /* Clear the file position comments.  */
1653 	      if (mp->filepos != NULL)
1654 		{
1655 		  size_t i;
1656 
1657 		  for (i = 0; i < mp->filepos_count; i++)
1658 		    free ((char *) mp->filepos[i].file_name);
1659 		  mp->filepos_count = 0;
1660 		  free (mp->filepos);
1661 		  mp->filepos = NULL;
1662 		}
1663 	      /* Mark as obsolete.   */
1664 	      mp->obsolete = true;
1665 
1666 	      message_list_append (msgdomain_list_sublist (result, domain, true),
1667 				   mp);
1668 	      stats.obsolete++;
1669 	    }
1670 	}
1671     }
1672 
1673   /* Determine the known a-priori encoding, if any.  */
1674   if (def->encoding == ref->encoding)
1675     result->encoding = def->encoding;
1676 
1677   /* Report some statistics.  */
1678   if (verbosity_level > 0)
1679     fprintf (stderr, _("%s\
1680 Read %ld old + %ld reference, \
1681 merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
1682 	     !quiet && verbosity_level <= 1 ? "\n" : "",
1683 	     (long) def->nitems, (long) ref->nitems,
1684 	     (long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
1685 	     (long) stats.obsolete);
1686   else if (!quiet)
1687     fputs (_(" done.\n"), stderr);
1688 
1689   /* Return results.  */
1690   *defp = def;
1691   return result;
1692 }
1693