1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc.
3 This file was written by Peter Miller <millerp@canb.auug.org.au>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <alloca.h>
23
24 #include <getopt.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <locale.h>
31
32 #include "closeout.h"
33 #include "dir-list.h"
34 #include "error.h"
35 #include "error-progname.h"
36 #include "progname.h"
37 #include "relocatable.h"
38 #include "basename.h"
39 #include "message.h"
40 #include "read-catalog.h"
41 #include "read-po.h"
42 #include "read-properties.h"
43 #include "read-stringtable.h"
44 #include "write-catalog.h"
45 #include "write-po.h"
46 #include "write-properties.h"
47 #include "write-stringtable.h"
48 #include "format.h"
49 #include "xalloc.h"
50 #include "xallocsa.h"
51 #include "obstack.h"
52 #include "c-strstr.h"
53 #include "exit.h"
54 #include "c-strcase.h"
55 #include "stpcpy.h"
56 #include "stpncpy.h"
57 #include "po-charset.h"
58 #include "msgl-iconv.h"
59 #include "msgl-equal.h"
60 #include "msgl-fsearch.h"
61 #include "lock.h"
62 #include "plural-count.h"
63 #include "backupfile.h"
64 #include "copy-file.h"
65 #include "propername.h"
66 #include "gettext.h"
67
68 #define _(str) gettext (str)
69
70 #define obstack_chunk_alloc xmalloc
71 #define obstack_chunk_free free
72
73
74 /* If true do not print unneeded messages. */
75 static bool quiet;
76
77 /* Verbosity level. */
78 static int verbosity_level;
79
80 /* Force output of PO file even if empty. */
81 static int force_po;
82
83 /* Apply the .pot file to each of the domains in the PO file. */
84 static bool multi_domain_mode = false;
85
86 /* Determines whether to use fuzzy matching. */
87 static bool use_fuzzy_matching = true;
88
89 /* Determines whether to keep old msgids as previous msgids. */
90 static bool keep_previous = false;
91
92 /* List of user-specified compendiums. */
93 static message_list_list_ty *compendiums;
94
95 /* List of corresponding filenames. */
96 static string_list_ty *compendium_filenames;
97
98 /* Update mode. */
99 static bool update_mode = false;
100 static const char *version_control_string;
101 static const char *backup_suffix_string;
102
103 /* Long options. */
104 static const struct option long_options[] =
105 {
106 { "add-location", no_argument, &line_comment, 1 },
107 { "backup", required_argument, NULL, CHAR_MAX + 1 },
108 { "compendium", required_argument, NULL, 'C', },
109 { "directory", required_argument, NULL, 'D' },
110 { "escape", no_argument, NULL, 'E' },
111 { "force-po", no_argument, &force_po, 1 },
112 { "help", no_argument, NULL, 'h' },
113 { "indent", no_argument, NULL, 'i' },
114 { "multi-domain", no_argument, NULL, 'm' },
115 { "no-escape", no_argument, NULL, 'e' },
116 { "no-fuzzy-matching", no_argument, NULL, 'N' },
117 { "no-location", no_argument, &line_comment, 0 },
118 { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
119 { "output-file", required_argument, NULL, 'o' },
120 { "previous", no_argument, NULL, CHAR_MAX + 7 },
121 { "properties-input", no_argument, NULL, 'P' },
122 { "properties-output", no_argument, NULL, 'p' },
123 { "quiet", no_argument, NULL, 'q' },
124 { "sort-by-file", no_argument, NULL, 'F' },
125 { "sort-output", no_argument, NULL, 's' },
126 { "silent", no_argument, NULL, 'q' },
127 { "strict", no_argument, NULL, CHAR_MAX + 2 },
128 { "stringtable-input", no_argument, NULL, CHAR_MAX + 5 },
129 { "stringtable-output", no_argument, NULL, CHAR_MAX + 6 },
130 { "suffix", required_argument, NULL, CHAR_MAX + 3 },
131 { "update", no_argument, NULL, 'U' },
132 { "verbose", no_argument, NULL, 'v' },
133 { "version", no_argument, NULL, 'V' },
134 { "width", required_argument, NULL, 'w', },
135 { NULL, 0, NULL, 0 }
136 };
137
138
139 struct statistics
140 {
141 size_t merged;
142 size_t fuzzied;
143 size_t missing;
144 size_t obsolete;
145 };
146
147
148 /* Forward declaration of local functions. */
149 static void usage (int status)
150 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
151 __attribute__ ((noreturn))
152 #endif
153 ;
154 static void compendium (const char *filename);
155 static msgdomain_list_ty *merge (const char *fn1, const char *fn2,
156 catalog_input_format_ty input_syntax,
157 msgdomain_list_ty **defp);
158
159
160 int
main(int argc,char ** argv)161 main (int argc, char **argv)
162 {
163 int opt;
164 bool do_help;
165 bool do_version;
166 char *output_file;
167 msgdomain_list_ty *def;
168 msgdomain_list_ty *result;
169 catalog_input_format_ty input_syntax = &input_format_po;
170 catalog_output_format_ty output_syntax = &output_format_po;
171 bool sort_by_filepos = false;
172 bool sort_by_msgid = false;
173
174 /* Set program name for messages. */
175 set_program_name (argv[0]);
176 error_print_progname = maybe_print_progname;
177 verbosity_level = 0;
178 quiet = false;
179 gram_max_allowed_errors = UINT_MAX;
180
181 #ifdef HAVE_SETLOCALE
182 /* Set locale via LC_ALL. */
183 setlocale (LC_ALL, "");
184 #endif
185
186 /* Set the text message domain. */
187 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
188 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
189 textdomain (PACKAGE);
190
191 /* Ensure that write errors on stdout are detected. */
192 atexit (close_stdout);
193
194 /* Set default values for variables. */
195 do_help = false;
196 do_version = false;
197 output_file = NULL;
198
199 while ((opt = getopt_long (argc, argv, "C:D:eEFhimNo:pPqsUvVw:",
200 long_options, NULL))
201 != EOF)
202 switch (opt)
203 {
204 case '\0': /* Long option. */
205 break;
206
207 case 'C':
208 compendium (optarg);
209 break;
210
211 case 'D':
212 dir_list_append (optarg);
213 break;
214
215 case 'e':
216 message_print_style_escape (false);
217 break;
218
219 case 'E':
220 message_print_style_escape (true);
221 break;
222
223 case 'F':
224 sort_by_filepos = true;
225 break;
226
227 case 'h':
228 do_help = true;
229 break;
230
231 case 'i':
232 message_print_style_indent ();
233 break;
234
235 case 'm':
236 multi_domain_mode = true;
237 break;
238
239 case 'N':
240 use_fuzzy_matching = false;
241 break;
242
243 case 'o':
244 output_file = optarg;
245 break;
246
247 case 'p':
248 output_syntax = &output_format_properties;
249 break;
250
251 case 'P':
252 input_syntax = &input_format_properties;
253 break;
254
255 case 'q':
256 quiet = true;
257 break;
258
259 case 's':
260 sort_by_msgid = true;
261 break;
262
263 case 'U':
264 update_mode = true;
265 break;
266
267 case 'v':
268 ++verbosity_level;
269 break;
270
271 case 'V':
272 do_version = true;
273 break;
274
275 case 'w':
276 {
277 int value;
278 char *endp;
279 value = strtol (optarg, &endp, 10);
280 if (endp != optarg)
281 message_page_width_set (value);
282 }
283 break;
284
285 case CHAR_MAX + 1: /* --backup */
286 version_control_string = optarg;
287 break;
288
289 case CHAR_MAX + 2: /* --strict */
290 message_print_style_uniforum ();
291 break;
292
293 case CHAR_MAX + 3: /* --suffix */
294 backup_suffix_string = optarg;
295 break;
296
297 case CHAR_MAX + 4: /* --no-wrap */
298 message_page_width_ignore ();
299 break;
300
301 case CHAR_MAX + 5: /* --stringtable-input */
302 input_syntax = &input_format_stringtable;
303 break;
304
305 case CHAR_MAX + 6: /* --stringtable-output */
306 output_syntax = &output_format_stringtable;
307 break;
308
309 case CHAR_MAX + 7: /* --previous */
310 keep_previous = true;
311 break;
312
313 default:
314 usage (EXIT_FAILURE);
315 break;
316 }
317
318 /* Version information is requested. */
319 if (do_version)
320 {
321 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
322 /* xgettext: no-wrap */
323 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
324 This is free software; see the source for copying conditions. There is NO\n\
325 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
326 "),
327 "1995-1998, 2000-2006");
328 printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
329 exit (EXIT_SUCCESS);
330 }
331
332 /* Help is requested. */
333 if (do_help)
334 usage (EXIT_SUCCESS);
335
336 /* Test whether we have an .po file name as argument. */
337 if (optind >= argc)
338 {
339 error (EXIT_SUCCESS, 0, _("no input files given"));
340 usage (EXIT_FAILURE);
341 }
342 if (optind + 2 != argc)
343 {
344 error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
345 usage (EXIT_FAILURE);
346 }
347
348 /* Verify selected options. */
349 if (update_mode)
350 {
351 if (output_file != NULL)
352 {
353 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
354 "--update", "--output-file");
355 }
356 }
357 else
358 {
359 if (version_control_string != NULL)
360 {
361 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
362 "--backup", "--update");
363 usage (EXIT_FAILURE);
364 }
365 if (backup_suffix_string != NULL)
366 {
367 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
368 "--suffix", "--update");
369 usage (EXIT_FAILURE);
370 }
371 }
372
373 if (!line_comment && sort_by_filepos)
374 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
375 "--no-location", "--sort-by-file");
376
377 if (sort_by_msgid && sort_by_filepos)
378 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
379 "--sort-output", "--sort-by-file");
380
381 /* In update mode, --properties-input implies --properties-output. */
382 if (update_mode && input_syntax == &input_format_properties)
383 output_syntax = &output_format_properties;
384 /* In update mode, --stringtable-input implies --stringtable-output. */
385 if (update_mode && input_syntax == &input_format_stringtable)
386 output_syntax = &output_format_stringtable;
387
388 /* Merge the two files. */
389 result = merge (argv[optind], argv[optind + 1], input_syntax, &def);
390
391 /* Sort the results. */
392 if (sort_by_filepos)
393 msgdomain_list_sort_by_filepos (result);
394 else if (sort_by_msgid)
395 msgdomain_list_sort_by_msgid (result);
396
397 if (update_mode)
398 {
399 /* Do nothing if the original file and the result are equal. Also do
400 nothing if the original file and the result differ only by the
401 POT-Creation-Date in the header entry; this is needed for projects
402 which don't put the .pot file under CVS. */
403 if (!msgdomain_list_equal (def, result, true))
404 {
405 /* Back up def.po. */
406 enum backup_type backup_type;
407 char *backup_file;
408
409 output_file = argv[optind];
410
411 if (backup_suffix_string == NULL)
412 {
413 backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX");
414 if (backup_suffix_string != NULL
415 && backup_suffix_string[0] == '\0')
416 backup_suffix_string = NULL;
417 }
418 if (backup_suffix_string != NULL)
419 simple_backup_suffix = backup_suffix_string;
420
421 backup_type = xget_version (_("backup type"), version_control_string);
422 if (backup_type != none)
423 {
424 backup_file = find_backup_file_name (output_file, backup_type);
425 copy_file_preserving (output_file, backup_file);
426 }
427
428 /* Write the merged message list out. */
429 msgdomain_list_print (result, output_file, output_syntax, true,
430 false);
431 }
432 }
433 else
434 {
435 /* Write the merged message list out. */
436 msgdomain_list_print (result, output_file, output_syntax, force_po,
437 false);
438 }
439
440 exit (EXIT_SUCCESS);
441 }
442
443
444 /* Display usage information and exit. */
445 static void
usage(int status)446 usage (int status)
447 {
448 if (status != EXIT_SUCCESS)
449 fprintf (stderr, _("Try `%s --help' for more information.\n"),
450 program_name);
451 else
452 {
453 printf (_("\
454 Usage: %s [OPTION] def.po ref.pot\n\
455 "), program_name);
456 printf ("\n");
457 /* xgettext: no-wrap */
458 printf (_("\
459 Merges two Uniforum style .po files together. The def.po file is an\n\
460 existing PO file with translations which will be taken over to the newly\n\
461 created file as long as they still match; comments will be preserved,\n\
462 but extracted comments and file positions will be discarded. The ref.pot\n\
463 file is the last created PO file with up-to-date source references but\n\
464 old translations, or a PO Template file (generally created by xgettext);\n\
465 any translations or comments in the file will be discarded, however dot\n\
466 comments and file positions will be preserved. Where an exact match\n\
467 cannot be found, fuzzy matching is used to produce better results.\n\
468 "));
469 printf ("\n");
470 printf (_("\
471 Mandatory arguments to long options are mandatory for short options too.\n"));
472 printf ("\n");
473 printf (_("\
474 Input file location:\n"));
475 printf (_("\
476 def.po translations referring to old sources\n"));
477 printf (_("\
478 ref.pot references to new sources\n"));
479 printf (_("\
480 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
481 printf (_("\
482 -C, --compendium=FILE additional library of message translations,\n\
483 may be specified more than once\n"));
484 printf ("\n");
485 printf (_("\
486 Operation mode:\n"));
487 printf (_("\
488 -U, --update update def.po,\n\
489 do nothing if def.po already up to date\n"));
490 printf ("\n");
491 printf (_("\
492 Output file location:\n"));
493 printf (_("\
494 -o, --output-file=FILE write output to specified file\n"));
495 printf (_("\
496 The results are written to standard output if no output file is specified\n\
497 or if it is -.\n"));
498 printf ("\n");
499 printf (_("\
500 Output file location in update mode:\n"));
501 printf (_("\
502 The result is written back to def.po.\n"));
503 printf (_("\
504 --backup=CONTROL make a backup of def.po\n"));
505 printf (_("\
506 --suffix=SUFFIX override the usual backup suffix\n"));
507 printf (_("\
508 The version control method may be selected via the --backup option or through\n\
509 the VERSION_CONTROL environment variable. Here are the values:\n\
510 none, off never make backups (even if --backup is given)\n\
511 numbered, t make numbered backups\n\
512 existing, nil numbered if numbered backups exist, simple otherwise\n\
513 simple, never always make simple backups\n"));
514 printf (_("\
515 The backup suffix is `~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\
516 environment variable.\n\
517 "));
518 printf ("\n");
519 printf (_("\
520 Operation modifiers:\n"));
521 printf (_("\
522 -m, --multi-domain apply ref.pot to each of the domains in def.po\n"));
523 printf (_("\
524 -N, --no-fuzzy-matching do not use fuzzy matching\n"));
525 printf (_("\
526 --previous keep previous msgids of translated messages\n"));
527 printf ("\n");
528 printf (_("\
529 Input file syntax:\n"));
530 printf (_("\
531 -P, --properties-input input files are in Java .properties syntax\n"));
532 printf (_("\
533 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\
534 syntax\n"));
535 printf ("\n");
536 printf (_("\
537 Output details:\n"));
538 printf (_("\
539 -e, --no-escape do not use C escapes in output (default)\n"));
540 printf (_("\
541 -E, --escape use C escapes in output, no extended chars\n"));
542 printf (_("\
543 --force-po write PO file even if empty\n"));
544 printf (_("\
545 -i, --indent indented output style\n"));
546 printf (_("\
547 --no-location suppress '#: filename:line' lines\n"));
548 printf (_("\
549 --add-location preserve '#: filename:line' lines (default)\n"));
550 printf (_("\
551 --strict strict Uniforum output style\n"));
552 printf (_("\
553 -p, --properties-output write out a Java .properties file\n"));
554 printf (_("\
555 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
556 printf (_("\
557 -w, --width=NUMBER set output page width\n"));
558 printf (_("\
559 --no-wrap do not break long message lines, longer than\n\
560 the output page width, into several lines\n"));
561 printf (_("\
562 -s, --sort-output generate sorted output\n"));
563 printf (_("\
564 -F, --sort-by-file sort output by file location\n"));
565 printf ("\n");
566 printf (_("\
567 Informative output:\n"));
568 printf (_("\
569 -h, --help display this help and exit\n"));
570 printf (_("\
571 -V, --version output version information and exit\n"));
572 printf (_("\
573 -v, --verbose increase verbosity level\n"));
574 printf (_("\
575 -q, --quiet, --silent suppress progress indicators\n"));
576 printf ("\n");
577 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
578 stdout);
579 }
580
581 exit (status);
582 }
583
584
585 static void
compendium(const char * filename)586 compendium (const char *filename)
587 {
588 msgdomain_list_ty *mdlp;
589 size_t k;
590
591 mdlp = read_catalog_file (filename, &input_format_po);
592 if (compendiums == NULL)
593 {
594 compendiums = message_list_list_alloc ();
595 compendium_filenames = string_list_alloc ();
596 }
597 for (k = 0; k < mdlp->nitems; k++)
598 {
599 message_list_list_append (compendiums, mdlp->item[k]->messages);
600 string_list_append (compendium_filenames, filename);
601 }
602 }
603
604
605 /* Data structure representing the messages with known translations.
606 They are composed of
607 - A message list from def.po,
608 - The compendiums.
609 The data structure is optimized for exact and fuzzy searches. */
610 typedef struct definitions_ty definitions_ty;
611 struct definitions_ty
612 {
613 /* A list of message lists. The first comes from def.po, the other ones
614 from the compendiums. Each message list has a built-in hash table,
615 for speed when doing the exact searches. */
616 message_list_list_ty *lists;
617 /* A fuzzy index of the compendiums, for speed when doing fuzzy searches.
618 Used only if use_fuzzy_matching is true and compendiums != NULL. */
619 message_fuzzy_index_ty *findex;
620 /* A once-only execution guard for the initialization of the fuzzy index.
621 Needed for OpenMP. */
622 gl_lock_define(, findex_init_lock)
623 /* The canonical encoding of the compendiums. */
624 const char *canon_charset;
625 };
626
627 static inline void
definitions_init(definitions_ty * definitions,const char * canon_charset)628 definitions_init (definitions_ty *definitions, const char *canon_charset)
629 {
630 definitions->lists = message_list_list_alloc ();
631 message_list_list_append (definitions->lists, NULL);
632 if (compendiums != NULL)
633 message_list_list_append_list (definitions->lists, compendiums);
634 definitions->findex = NULL;
635 gl_lock_init (definitions->findex_init_lock);
636 definitions->canon_charset = canon_charset;
637 }
638
639 /* Create the fuzzy index.
640 Used only if use_fuzzy_matching is true and compendiums != NULL. */
641 static inline void
definitions_init_findex(definitions_ty * definitions)642 definitions_init_findex (definitions_ty *definitions)
643 {
644 /* Protect against concurrent execution. */
645 gl_lock_lock (definitions->findex_init_lock);
646 if (definitions->findex == NULL)
647 {
648 /* Combine all the compendium message lists into a single one. Don't
649 bother checking for duplicates. */
650 message_list_ty *all_compendium;
651 size_t i;
652
653 all_compendium = message_list_alloc (false);
654 for (i = 0; i < compendiums->nitems; i++)
655 {
656 message_list_ty *mlp = compendiums->item[i];
657 size_t j;
658
659 for (j = 0; j < mlp->nitems; j++)
660 message_list_append (all_compendium, mlp->item[j]);
661 }
662
663 /* Create the fuzzy index from it. */
664 definitions->findex =
665 message_fuzzy_index_alloc (all_compendium, definitions->canon_charset);
666 }
667 gl_lock_unlock (definitions->findex_init_lock);
668 }
669
670 /* Return the current list of non-compendium messages. */
671 static inline message_list_ty *
definitions_current_list(const definitions_ty * definitions)672 definitions_current_list (const definitions_ty *definitions)
673 {
674 return definitions->lists->item[0];
675 }
676
677 /* Set the current list of non-compendium messages. */
678 static inline void
definitions_set_current_list(definitions_ty * definitions,message_list_ty * mlp)679 definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp)
680 {
681 definitions->lists->item[0] = mlp;
682 }
683
684 /* Exact search. */
685 static inline message_ty *
definitions_search(const definitions_ty * definitions,const char * msgctxt,const char * msgid)686 definitions_search (const definitions_ty *definitions,
687 const char *msgctxt, const char *msgid)
688 {
689 return message_list_list_search (definitions->lists, msgctxt, msgid);
690 }
691
692 /* Fuzzy search.
693 Used only if use_fuzzy_matching is true. */
694 static inline message_ty *
definitions_search_fuzzy(definitions_ty * definitions,const char * msgctxt,const char * msgid)695 definitions_search_fuzzy (definitions_ty *definitions,
696 const char *msgctxt, const char *msgid)
697 {
698 message_ty *mp1 =
699 message_list_search_fuzzy (definitions_current_list (definitions),
700 msgctxt, msgid);
701 if (compendiums != NULL)
702 {
703 message_ty *mp2;
704
705 /* Create the fuzzy index lazily. */
706 if (definitions->findex == NULL)
707 definitions_init_findex (definitions);
708
709 mp2 = message_fuzzy_index_search (definitions->findex, msgctxt, msgid);
710
711 /* Choose the best among mp1, mp2. */
712 if (mp1 == NULL
713 || (mp2 != NULL
714 && (fuzzy_search_goal_function (mp2, msgctxt, msgid)
715 > fuzzy_search_goal_function (mp1, msgctxt, msgid))))
716 mp1 = mp2;
717 }
718
719 return mp1;
720 }
721
722 static inline void
definitions_destroy(definitions_ty * definitions)723 definitions_destroy (definitions_ty *definitions)
724 {
725 message_list_list_free (definitions->lists, 2);
726 if (definitions->findex != NULL)
727 message_fuzzy_index_free (definitions->findex);
728 }
729
730
731 static bool
msgfmt_check_pair_fails(const lex_pos_ty * pos,const char * msgid,const char * msgid_plural,const char * msgstr,size_t msgstr_len,size_t fmt)732 msgfmt_check_pair_fails (const lex_pos_ty *pos,
733 const char *msgid, const char *msgid_plural,
734 const char *msgstr, size_t msgstr_len,
735 size_t fmt)
736 {
737 bool failure;
738 struct formatstring_parser *parser = formatstring_parsers[fmt];
739 char *invalid_reason = NULL;
740 void *msgid_descr =
741 parser->parse (msgid_plural != NULL ? msgid_plural : msgid, false,
742 &invalid_reason);
743
744 failure = false;
745 if (msgid_descr != NULL)
746 {
747 const char *p_end = msgstr + msgstr_len;
748 const char *p;
749
750 for (p = msgstr; p < p_end; p += strlen (p) + 1)
751 {
752 void *msgstr_descr = parser->parse (msgstr, true, &invalid_reason);
753
754 if (msgstr_descr != NULL)
755 {
756 failure = parser->check (msgid_descr, msgstr_descr,
757 msgid_plural == NULL, NULL, NULL);
758 parser->free (msgstr_descr);
759 }
760 else
761 {
762 failure = true;
763 free (invalid_reason);
764 }
765
766 if (failure)
767 break;
768 }
769
770 parser->free (msgid_descr);
771 }
772 else
773 free (invalid_reason);
774
775 return failure;
776 }
777
778
779 static message_ty *
message_merge(message_ty * def,message_ty * ref,bool force_fuzzy)780 message_merge (message_ty *def, message_ty *ref, bool force_fuzzy)
781 {
782 const char *msgstr;
783 size_t msgstr_len;
784 const char *prev_msgctxt;
785 const char *prev_msgid;
786 const char *prev_msgid_plural;
787 message_ty *result;
788 size_t j, i;
789
790 /* Take the msgid from the reference. When fuzzy matches are made,
791 the definition will not be unique, but the reference will be -
792 usually because it has only been slightly changed. */
793
794 /* Take the msgstr from the definition. The msgstr of the reference
795 is usually empty, as it was generated by xgettext. If we currently
796 process the header entry we have to merge the msgstr by using the
797 Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference. */
798 if (is_header (ref))
799 {
800 /* Oh, oh. The header entry and we have something to fill in. */
801 static const struct
802 {
803 const char *name;
804 size_t len;
805 } known_fields[] =
806 {
807 { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
808 #define PROJECT_ID 0
809 { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
810 #define REPORT_MSGID_BUGS_TO 1
811 { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
812 #define POT_CREATION_DATE 2
813 { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
814 #define PO_REVISION_DATE 3
815 { "Last-Translator:", sizeof ("Last-Translator:") - 1 },
816 #define LAST_TRANSLATOR 4
817 { "Language-Team:", sizeof ("Language-Team:") - 1 },
818 #define LANGUAGE_TEAM 5
819 { "MIME-Version:", sizeof ("MIME-Version:") - 1 },
820 #define MIME_VERSION 6
821 { "Content-Type:", sizeof ("Content-Type:") - 1 },
822 #define CONTENT_TYPE 7
823 { "Content-Transfer-Encoding:",
824 sizeof ("Content-Transfer-Encoding:") - 1 }
825 #define CONTENT_TRANSFER 8
826 };
827 #define UNKNOWN 9
828 struct
829 {
830 const char *string;
831 size_t len;
832 } header_fields[UNKNOWN + 1];
833 struct obstack pool;
834 const char *cp;
835 char *newp;
836 size_t len, cnt;
837
838 /* Clear all fields. */
839 memset (header_fields, '\0', sizeof (header_fields));
840
841 /* Prepare a temporary memory pool. */
842 obstack_init (&pool);
843
844 cp = def->msgstr;
845 while (*cp != '\0')
846 {
847 const char *endp = strchr (cp, '\n');
848 int terminated = endp != NULL;
849
850 if (!terminated)
851 {
852 /* Add a trailing newline. */
853 char *copy;
854 endp = strchr (cp, '\0');
855
856 len = endp - cp + 1;
857
858 copy = (char *) obstack_alloc (&pool, len + 1);
859 stpcpy (stpcpy (copy, cp), "\n");
860 cp = copy;
861 }
862 else
863 {
864 len = (endp - cp) + 1;
865 ++endp;
866 }
867
868 /* Compare with any of the known fields. */
869 for (cnt = 0;
870 cnt < sizeof (known_fields) / sizeof (known_fields[0]);
871 ++cnt)
872 if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
873 == 0)
874 break;
875
876 if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
877 {
878 header_fields[cnt].string = &cp[known_fields[cnt].len];
879 header_fields[cnt].len = len - known_fields[cnt].len;
880 }
881 else
882 {
883 /* It's an unknown field. Append content to what is already
884 known. */
885 char *extended =
886 (char *) obstack_alloc (&pool,
887 header_fields[UNKNOWN].len + len + 1);
888 memcpy (extended, header_fields[UNKNOWN].string,
889 header_fields[UNKNOWN].len);
890 memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
891 extended[header_fields[UNKNOWN].len + len] = '\0';
892 header_fields[UNKNOWN].string = extended;
893 header_fields[UNKNOWN].len += len;
894 }
895
896 cp = endp;
897 }
898
899 {
900 const char *msgid_bugs_ptr;
901
902 msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
903 if (msgid_bugs_ptr != NULL)
904 {
905 size_t msgid_bugs_len;
906 const char *endp;
907
908 msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;
909
910 endp = strchr (msgid_bugs_ptr, '\n');
911 if (endp == NULL)
912 {
913 /* Add a trailing newline. */
914 char *extended;
915 endp = strchr (msgid_bugs_ptr, '\0');
916 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
917 extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
918 stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
919 msgid_bugs_ptr = extended;
920 }
921 else
922 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
923
924 header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
925 header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
926 }
927 }
928
929 {
930 const char *pot_date_ptr;
931
932 pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:");
933 if (pot_date_ptr != NULL)
934 {
935 size_t pot_date_len;
936 const char *endp;
937
938 pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;
939
940 endp = strchr (pot_date_ptr, '\n');
941 if (endp == NULL)
942 {
943 /* Add a trailing newline. */
944 char *extended;
945 endp = strchr (pot_date_ptr, '\0');
946 pot_date_len = (endp - pot_date_ptr) + 1;
947 extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
948 stpcpy (stpcpy (extended, pot_date_ptr), "\n");
949 pot_date_ptr = extended;
950 }
951 else
952 pot_date_len = (endp - pot_date_ptr) + 1;
953
954 header_fields[POT_CREATION_DATE].string = pot_date_ptr;
955 header_fields[POT_CREATION_DATE].len = pot_date_len;
956 }
957 }
958
959 /* Concatenate all the various fields. */
960 len = 0;
961 for (cnt = 0; cnt < UNKNOWN; ++cnt)
962 if (header_fields[cnt].string != NULL)
963 len += known_fields[cnt].len + header_fields[cnt].len;
964 len += header_fields[UNKNOWN].len;
965
966 cp = newp = (char *) xmalloc (len + 1);
967 newp[len] = '\0';
968
969 #define IF_FILLED(idx) \
970 if (header_fields[idx].string) \
971 newp = stpncpy (stpcpy (newp, known_fields[idx].name), \
972 header_fields[idx].string, header_fields[idx].len)
973
974 IF_FILLED (PROJECT_ID);
975 IF_FILLED (REPORT_MSGID_BUGS_TO);
976 IF_FILLED (POT_CREATION_DATE);
977 IF_FILLED (PO_REVISION_DATE);
978 IF_FILLED (LAST_TRANSLATOR);
979 IF_FILLED (LANGUAGE_TEAM);
980 IF_FILLED (MIME_VERSION);
981 IF_FILLED (CONTENT_TYPE);
982 IF_FILLED (CONTENT_TRANSFER);
983 if (header_fields[UNKNOWN].string != NULL)
984 stpcpy (newp, header_fields[UNKNOWN].string);
985
986 #undef IF_FILLED
987
988 /* Free the temporary memory pool. */
989 obstack_free (&pool, NULL);
990
991 msgstr = cp;
992 msgstr_len = strlen (cp) + 1;
993
994 prev_msgctxt = NULL;
995 prev_msgid = NULL;
996 prev_msgid_plural = NULL;
997 }
998 else
999 {
1000 msgstr = def->msgstr;
1001 msgstr_len = def->msgstr_len;
1002
1003 if (def->is_fuzzy)
1004 {
1005 prev_msgctxt = def->prev_msgctxt;
1006 prev_msgid = def->prev_msgid;
1007 prev_msgid_plural = def->prev_msgid_plural;
1008 }
1009 else
1010 {
1011 prev_msgctxt = def->msgctxt;
1012 prev_msgid = def->msgid;
1013 prev_msgid_plural = def->msgid_plural;
1014 }
1015 }
1016
1017 result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL,
1018 xstrdup (ref->msgid), ref->msgid_plural,
1019 msgstr, msgstr_len, &def->pos);
1020
1021 /* Take the comments from the definition file. There will be none at
1022 all in the reference file, as it was generated by xgettext. */
1023 if (def->comment)
1024 for (j = 0; j < def->comment->nitems; ++j)
1025 message_comment_append (result, def->comment->item[j]);
1026
1027 /* Take the dot comments from the reference file, as they are
1028 generated by xgettext. Any in the definition file are old ones
1029 collected by previous runs of xgettext and msgmerge. */
1030 if (ref->comment_dot)
1031 for (j = 0; j < ref->comment_dot->nitems; ++j)
1032 message_comment_dot_append (result, ref->comment_dot->item[j]);
1033
1034 /* The flags are mixed in a special way. Some informations come
1035 from the reference message (such as format/no-format), others
1036 come from the definition file (fuzzy or not). */
1037 result->is_fuzzy = def->is_fuzzy | force_fuzzy;
1038
1039 for (i = 0; i < NFORMATS; i++)
1040 {
1041 result->is_format[i] = ref->is_format[i];
1042
1043 /* If the reference message is marked as being a format specifier,
1044 but the definition message is not, we check if the resulting
1045 message would pass "msgfmt -c". If yes, then all is fine. If
1046 not, we add a fuzzy marker, because
1047 1. the message needs the translator's attention,
1048 2. msgmerge must not transform a PO file which passes "msgfmt -c"
1049 into a PO file which doesn't. */
1050 if (!result->is_fuzzy
1051 && possible_format_p (ref->is_format[i])
1052 && !possible_format_p (def->is_format[i])
1053 && msgfmt_check_pair_fails (&def->pos, ref->msgid, ref->msgid_plural,
1054 msgstr, msgstr_len, i))
1055 result->is_fuzzy = true;
1056 }
1057
1058 result->do_wrap = ref->do_wrap;
1059
1060 /* Insert previous msgid, commented out with "#|".
1061 Do so only when --previous is specified, for backward compatibility.
1062 Since the "previous msgid" represents the original msgid that led to
1063 the current msgstr,
1064 - we can omit it if the resulting message is not fuzzy,
1065 - otherwise, if the corresponding message from the definition file
1066 was translated (not fuzzy), we use that message's msgid,
1067 - otherwise, we use that message's prev_msgid. */
1068 if (keep_previous && result->is_fuzzy)
1069 {
1070 result->prev_msgctxt = prev_msgctxt;
1071 result->prev_msgid = prev_msgid;
1072 result->prev_msgid_plural = prev_msgid_plural;
1073 }
1074
1075 /* Take the file position comments from the reference file, as they
1076 are generated by xgettext. Any in the definition file are old ones
1077 collected by previous runs of xgettext and msgmerge. */
1078 for (j = 0; j < ref->filepos_count; ++j)
1079 {
1080 lex_pos_ty *pp = &ref->filepos[j];
1081 message_comment_filepos (result, pp->file_name, pp->line_number);
1082 }
1083
1084 /* Special postprocessing is needed if the reference message is a
1085 plural form and the definition message isn't, or vice versa. */
1086 if (ref->msgid_plural != NULL)
1087 {
1088 if (def->msgid_plural == NULL)
1089 result->used = 1;
1090 }
1091 else
1092 {
1093 if (def->msgid_plural != NULL)
1094 result->used = 2;
1095 }
1096
1097 /* All done, return the merged message to the caller. */
1098 return result;
1099 }
1100
1101
1102 #define DOT_FREQUENCY 10
1103
1104 static void
match_domain(const char * fn1,const char * fn2,definitions_ty * definitions,message_list_ty * refmlp,message_list_ty * resultmlp,struct statistics * stats,unsigned int * processed)1105 match_domain (const char *fn1, const char *fn2,
1106 definitions_ty *definitions, message_list_ty *refmlp,
1107 message_list_ty *resultmlp,
1108 struct statistics *stats, unsigned int *processed)
1109 {
1110 message_ty *header_entry;
1111 unsigned long int nplurals;
1112 char *untranslated_plural_msgstr;
1113 struct search_result { message_ty *found; bool fuzzy; } *search_results;
1114 size_t j;
1115
1116 header_entry =
1117 message_list_search (definitions_current_list (definitions), NULL, "");
1118 nplurals = get_plural_count (header_entry ? header_entry->msgstr : NULL);
1119 untranslated_plural_msgstr = (char *) xmalloc (nplurals);
1120 memset (untranslated_plural_msgstr, '\0', nplurals);
1121
1122 /* Most of the time is spent in definitions_search_fuzzy.
1123 Perform it in a separate loop that can be parallelized by an OpenMP
1124 capable compiler. */
1125 search_results =
1126 (struct search_result *)
1127 xmalloc (refmlp->nitems * sizeof (struct search_result));
1128 {
1129 long int nn = refmlp->nitems;
1130 long int jj;
1131
1132 /* Tell the OpenMP capable compiler to distribute this loop across
1133 several threads. The schedule is dynamic, because for some messages
1134 the loop body can be executed very quickly, whereas for others it takes
1135 a long time. */
1136 #ifdef _OPENMP
1137 # pragma omp parallel for schedule(dynamic)
1138 #endif
1139 for (jj = 0; jj < nn; jj++)
1140 {
1141 message_ty *refmsg = refmlp->item[jj];
1142 message_ty *defmsg;
1143
1144 /* Because merging can take a while we print something to signal
1145 we are not dead. */
1146 if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
1147 fputc ('.', stderr);
1148 #ifdef _OPENMP
1149 # pragma omp atomic
1150 #endif
1151 (*processed)++;
1152
1153 /* See if it is in the other file. */
1154 defmsg =
1155 definitions_search (definitions, refmsg->msgctxt, refmsg->msgid);
1156 if (defmsg != NULL)
1157 {
1158 search_results[jj].found = defmsg;
1159 search_results[jj].fuzzy = false;
1160 }
1161 else if (!is_header (refmsg)
1162 /* If the message was not defined at all, try to find a very
1163 similar message, it could be a typo, or the suggestion may
1164 help. */
1165 && use_fuzzy_matching
1166 && ((defmsg =
1167 definitions_search_fuzzy (definitions,
1168 refmsg->msgctxt,
1169 refmsg->msgid)) != NULL))
1170 {
1171 search_results[jj].found = defmsg;
1172 search_results[jj].fuzzy = true;
1173 }
1174 else
1175 search_results[jj].found = NULL;
1176 }
1177 }
1178
1179 for (j = 0; j < refmlp->nitems; j++)
1180 {
1181 message_ty *refmsg = refmlp->item[j];
1182
1183 /* See if it is in the other file.
1184 This used definitions_search. */
1185 if (search_results[j].found != NULL && !search_results[j].fuzzy)
1186 {
1187 message_ty *defmsg = search_results[j].found;
1188 /* Merge the reference with the definition: take the #. and
1189 #: comments from the reference, take the # comments from
1190 the definition, take the msgstr from the definition. Add
1191 this merged entry to the output message list. */
1192 message_ty *mp = message_merge (defmsg, refmsg, false);
1193
1194 message_list_append (resultmlp, mp);
1195
1196 /* Remember that this message has been used, when we scan
1197 later to see if anything was omitted. */
1198 defmsg->used = 1;
1199 stats->merged++;
1200 }
1201 else if (!is_header (refmsg))
1202 {
1203 /* If the message was not defined at all, try to find a very
1204 similar message, it could be a typo, or the suggestion may
1205 help. This search assumed use_fuzzy_matching and used
1206 definitions_search_fuzzy. */
1207 if (search_results[j].found != NULL && search_results[j].fuzzy)
1208 {
1209 message_ty *defmsg = search_results[j].found;
1210 message_ty *mp;
1211
1212 if (verbosity_level > 1)
1213 {
1214 po_gram_error_at_line (&refmsg->pos, _("\
1215 this message is used but not defined..."));
1216 error_message_count--;
1217 po_gram_error_at_line (&defmsg->pos, _("\
1218 ...but this definition is similar"));
1219 }
1220
1221 /* Merge the reference with the definition: take the #. and
1222 #: comments from the reference, take the # comments from
1223 the definition, take the msgstr from the definition. Add
1224 this merged entry to the output message list. */
1225 mp = message_merge (defmsg, refmsg, true);
1226
1227 message_list_append (resultmlp, mp);
1228
1229 /* Remember that this message has been used, when we scan
1230 later to see if anything was omitted. */
1231 defmsg->used = 1;
1232 stats->fuzzied++;
1233 if (!quiet && verbosity_level <= 1)
1234 /* Always print a dot if we handled a fuzzy match. */
1235 fputc ('.', stderr);
1236 }
1237 else
1238 {
1239 message_ty *mp;
1240 bool is_untranslated;
1241 const char *p;
1242 const char *pend;
1243
1244 if (verbosity_level > 1)
1245 po_gram_error_at_line (&refmsg->pos, _("\
1246 this message is used but not defined in %s"), fn1);
1247
1248 mp = message_copy (refmsg);
1249
1250 if (mp->msgid_plural != NULL)
1251 {
1252 /* Test if mp is untranslated. (It most likely is.) */
1253 is_untranslated = true;
1254 for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
1255 if (*p != '\0')
1256 {
1257 is_untranslated = false;
1258 break;
1259 }
1260 if (is_untranslated)
1261 {
1262 /* Change mp->msgstr_len consecutive empty strings into
1263 nplurals consecutive empty strings. */
1264 if (nplurals > mp->msgstr_len)
1265 mp->msgstr = untranslated_plural_msgstr;
1266 mp->msgstr_len = nplurals;
1267 }
1268 }
1269
1270 message_list_append (resultmlp, mp);
1271 stats->missing++;
1272 }
1273 }
1274 }
1275
1276 free (search_results);
1277
1278 /* Now postprocess the problematic merges. This is needed because we
1279 want the result to pass the "msgfmt -c -v" check. */
1280 {
1281 /* message_merge sets mp->used to 1 or 2, depending on the problem.
1282 Compute the bitwise OR of all these. */
1283 int problematic = 0;
1284
1285 for (j = 0; j < resultmlp->nitems; j++)
1286 problematic |= resultmlp->item[j]->used;
1287
1288 if (problematic)
1289 {
1290 unsigned long int nplurals = 0;
1291
1292 if (problematic & 1)
1293 {
1294 /* Need to know nplurals of the result domain. */
1295 message_ty *header_entry =
1296 message_list_search (resultmlp, NULL, "");
1297
1298 nplurals = get_plural_count (header_entry
1299 ? header_entry->msgstr
1300 : NULL);
1301 }
1302
1303 for (j = 0; j < resultmlp->nitems; j++)
1304 {
1305 message_ty *mp = resultmlp->item[j];
1306
1307 if ((mp->used & 1) && (nplurals > 0))
1308 {
1309 /* ref->msgid_plural != NULL but def->msgid_plural == NULL.
1310 Use a copy of def->msgstr for each possible plural form. */
1311 size_t new_msgstr_len;
1312 char *new_msgstr;
1313 char *p;
1314 unsigned long i;
1315
1316 if (verbosity_level > 1)
1317 {
1318 po_gram_error_at_line (&mp->pos, _("\
1319 this message should define plural forms"));
1320 }
1321
1322 new_msgstr_len = nplurals * mp->msgstr_len;
1323 new_msgstr = (char *) xmalloc (new_msgstr_len);
1324 for (i = 0, p = new_msgstr; i < nplurals; i++)
1325 {
1326 memcpy (p, mp->msgstr, mp->msgstr_len);
1327 p += mp->msgstr_len;
1328 }
1329 mp->msgstr = new_msgstr;
1330 mp->msgstr_len = new_msgstr_len;
1331 mp->is_fuzzy = true;
1332 }
1333
1334 if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
1335 {
1336 /* ref->msgid_plural == NULL but def->msgid_plural != NULL.
1337 Use only the first among the plural forms. */
1338
1339 if (verbosity_level > 1)
1340 {
1341 po_gram_error_at_line (&mp->pos, _("\
1342 this message should not define plural forms"));
1343 }
1344
1345 mp->msgstr_len = strlen (mp->msgstr) + 1;
1346 mp->is_fuzzy = true;
1347 }
1348
1349 /* Postprocessing of this message is done. */
1350 mp->used = 0;
1351 }
1352 }
1353 }
1354 }
1355
1356 static msgdomain_list_ty *
merge(const char * fn1,const char * fn2,catalog_input_format_ty input_syntax,msgdomain_list_ty ** defp)1357 merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax,
1358 msgdomain_list_ty **defp)
1359 {
1360 msgdomain_list_ty *def;
1361 msgdomain_list_ty *ref;
1362 size_t j, k;
1363 unsigned int processed;
1364 struct statistics stats;
1365 msgdomain_list_ty *result;
1366 definitions_ty definitions;
1367 message_list_ty *empty_list;
1368
1369 stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;
1370
1371 /* This is the definitions file, created by a human. */
1372 def = read_catalog_file (fn1, input_syntax);
1373
1374 /* This is the references file, created by groping the sources with
1375 the xgettext program. */
1376 ref = read_catalog_file (fn2, input_syntax);
1377 /* Add a dummy header entry, if the references file contains none. */
1378 for (k = 0; k < ref->nitems; k++)
1379 if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
1380 {
1381 static lex_pos_ty pos = { __FILE__, __LINE__ };
1382 message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos);
1383
1384 message_list_prepend (ref->item[k]->messages, refheader);
1385 }
1386
1387 /* The references file can be either in ASCII or in UTF-8. If it is
1388 in UTF-8, we have to convert the definitions and the compendiums to
1389 UTF-8 as well. */
1390 {
1391 bool was_utf8 = false;
1392 for (k = 0; k < ref->nitems; k++)
1393 {
1394 message_list_ty *mlp = ref->item[k]->messages;
1395
1396 for (j = 0; j < mlp->nitems; j++)
1397 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1398 {
1399 const char *header = mlp->item[j]->msgstr;
1400
1401 if (header != NULL)
1402 {
1403 const char *charsetstr = c_strstr (header, "charset=");
1404
1405 if (charsetstr != NULL)
1406 {
1407 size_t len;
1408
1409 charsetstr += strlen ("charset=");
1410 len = strcspn (charsetstr, " \t\n");
1411 if (len == strlen ("UTF-8")
1412 && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
1413 was_utf8 = true;
1414 }
1415 }
1416 }
1417 }
1418 if (was_utf8)
1419 {
1420 def = iconv_msgdomain_list (def, "UTF-8", fn1);
1421 if (compendiums != NULL)
1422 for (k = 0; k < compendiums->nitems; k++)
1423 iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
1424 compendium_filenames->item[k]);
1425 }
1426 else if (compendiums != NULL && compendiums->nitems > 0)
1427 {
1428 /* Ensure that the definitions and the compendiums are in the same
1429 encoding. Prefer the encoding of the definitions file, if
1430 possible; otherwise, if the definitions file is empty and the
1431 compendiums are all in the same encoding, use that encoding;
1432 otherwise, use UTF-8. */
1433 bool conversion_done = false;
1434 {
1435 char *charset = NULL;
1436
1437 /* Get the encoding of the definitions file. */
1438 for (k = 0; k < def->nitems; k++)
1439 {
1440 message_list_ty *mlp = def->item[k]->messages;
1441
1442 for (j = 0; j < mlp->nitems; j++)
1443 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1444 {
1445 const char *header = mlp->item[j]->msgstr;
1446
1447 if (header != NULL)
1448 {
1449 const char *charsetstr = c_strstr (header, "charset=");
1450
1451 if (charsetstr != NULL)
1452 {
1453 size_t len;
1454
1455 charsetstr += strlen ("charset=");
1456 len = strcspn (charsetstr, " \t\n");
1457 charset = (char *) xallocsa (len + 1);
1458 memcpy (charset, charsetstr, len);
1459 charset[len] = '\0';
1460 break;
1461 }
1462 }
1463 }
1464 if (charset != NULL)
1465 break;
1466 }
1467 if (charset != NULL)
1468 {
1469 const char *canon_charset = po_charset_canonicalize (charset);
1470
1471 if (canon_charset != NULL)
1472 {
1473 bool all_compendiums_iconvable = true;
1474
1475 if (compendiums != NULL)
1476 for (k = 0; k < compendiums->nitems; k++)
1477 if (!is_message_list_iconvable (compendiums->item[k],
1478 NULL, canon_charset))
1479 {
1480 all_compendiums_iconvable = false;
1481 break;
1482 }
1483
1484 if (all_compendiums_iconvable)
1485 {
1486 /* Convert the compendiums to def's encoding. */
1487 if (compendiums != NULL)
1488 for (k = 0; k < compendiums->nitems; k++)
1489 iconv_message_list (compendiums->item[k],
1490 NULL, canon_charset,
1491 compendium_filenames->item[k]);
1492 conversion_done = true;
1493 }
1494 }
1495 freesa (charset);
1496 }
1497 }
1498 if (!conversion_done)
1499 {
1500 if (def->nitems == 0
1501 || (def->nitems == 1 && def->item[0]->messages->nitems == 0))
1502 {
1503 /* The definitions file is empty.
1504 Compare the encodings of the compendiums. */
1505 const char *common_canon_charset = NULL;
1506
1507 for (k = 0; k < compendiums->nitems; k++)
1508 {
1509 message_list_ty *mlp = compendiums->item[k];
1510 char *charset = NULL;
1511 const char *canon_charset = NULL;
1512
1513 for (j = 0; j < mlp->nitems; j++)
1514 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1515 {
1516 const char *header = mlp->item[j]->msgstr;
1517
1518 if (header != NULL)
1519 {
1520 const char *charsetstr =
1521 c_strstr (header, "charset=");
1522
1523 if (charsetstr != NULL)
1524 {
1525 size_t len;
1526
1527 charsetstr += strlen ("charset=");
1528 len = strcspn (charsetstr, " \t\n");
1529 charset = (char *) xallocsa (len + 1);
1530 memcpy (charset, charsetstr, len);
1531 charset[len] = '\0';
1532
1533 break;
1534 }
1535 }
1536 }
1537 if (charset != NULL)
1538 {
1539 canon_charset = po_charset_canonicalize (charset);
1540 freesa (charset);
1541 }
1542 /* If no charset declaration was found in this file,
1543 or if it is not a valid encoding name, or if it
1544 differs from the common charset found so far,
1545 we have no common charset. */
1546 if (canon_charset == NULL
1547 || (common_canon_charset != NULL
1548 && canon_charset != common_canon_charset))
1549 {
1550 common_canon_charset = NULL;
1551 break;
1552 }
1553 common_canon_charset = canon_charset;
1554 }
1555
1556 if (common_canon_charset != NULL)
1557 /* No conversion needed in this case. */
1558 conversion_done = true;
1559 }
1560 if (!conversion_done)
1561 {
1562 /* It's too hairy to find out what would be the optimal target
1563 encoding. So, convert everything to UTF-8. */
1564 def = iconv_msgdomain_list (def, "UTF-8", fn1);
1565 if (compendiums != NULL)
1566 for (k = 0; k < compendiums->nitems; k++)
1567 iconv_message_list (compendiums->item[k],
1568 NULL, po_charset_utf8,
1569 compendium_filenames->item[k]);
1570 }
1571 }
1572 }
1573 }
1574
1575 /* Initialize and preprocess the total set of message definitions. */
1576 definitions_init (&definitions, po_charset_utf8);
1577 empty_list = message_list_alloc (false);
1578
1579 result = msgdomain_list_alloc (false);
1580 processed = 0;
1581
1582 /* Every reference must be matched with its definition. */
1583 if (!multi_domain_mode)
1584 for (k = 0; k < ref->nitems; k++)
1585 {
1586 const char *domain = ref->item[k]->domain;
1587 message_list_ty *refmlp = ref->item[k]->messages;
1588 message_list_ty *resultmlp =
1589 msgdomain_list_sublist (result, domain, true);
1590 message_list_ty *defmlp;
1591
1592 defmlp = msgdomain_list_sublist (def, domain, false);
1593 if (defmlp == NULL)
1594 defmlp = empty_list;
1595 definitions_set_current_list (&definitions, defmlp);
1596
1597 match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
1598 &stats, &processed);
1599 }
1600 else
1601 {
1602 /* Apply the references messages in the default domain to each of
1603 the definition domains. */
1604 message_list_ty *refmlp = ref->item[0]->messages;
1605
1606 for (k = 0; k < def->nitems; k++)
1607 {
1608 const char *domain = def->item[k]->domain;
1609 message_list_ty *defmlp = def->item[k]->messages;
1610
1611 /* Ignore the default message domain if it has no messages. */
1612 if (k > 0 || defmlp->nitems > 0)
1613 {
1614 message_list_ty *resultmlp =
1615 msgdomain_list_sublist (result, domain, true);
1616
1617 definitions_set_current_list (&definitions, defmlp);
1618
1619 match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
1620 &stats, &processed);
1621 }
1622 }
1623 }
1624
1625 definitions_destroy (&definitions);
1626
1627 /* Look for messages in the definition file, which are not present
1628 in the reference file, indicating messages which defined but not
1629 used in the program. Don't scan the compendium(s). */
1630 for (k = 0; k < def->nitems; ++k)
1631 {
1632 const char *domain = def->item[k]->domain;
1633 message_list_ty *defmlp = def->item[k]->messages;
1634
1635 for (j = 0; j < defmlp->nitems; j++)
1636 {
1637 message_ty *defmsg = defmlp->item[j];
1638
1639 if (!defmsg->used)
1640 {
1641 /* Remember the old translation although it is not used anymore.
1642 But we mark it as obsolete. */
1643 message_ty *mp;
1644
1645 mp = message_copy (defmsg);
1646 /* Clear the extracted comments. */
1647 if (mp->comment_dot != NULL)
1648 {
1649 string_list_free (mp->comment_dot);
1650 mp->comment_dot = NULL;
1651 }
1652 /* Clear the file position comments. */
1653 if (mp->filepos != NULL)
1654 {
1655 size_t i;
1656
1657 for (i = 0; i < mp->filepos_count; i++)
1658 free ((char *) mp->filepos[i].file_name);
1659 mp->filepos_count = 0;
1660 free (mp->filepos);
1661 mp->filepos = NULL;
1662 }
1663 /* Mark as obsolete. */
1664 mp->obsolete = true;
1665
1666 message_list_append (msgdomain_list_sublist (result, domain, true),
1667 mp);
1668 stats.obsolete++;
1669 }
1670 }
1671 }
1672
1673 /* Determine the known a-priori encoding, if any. */
1674 if (def->encoding == ref->encoding)
1675 result->encoding = def->encoding;
1676
1677 /* Report some statistics. */
1678 if (verbosity_level > 0)
1679 fprintf (stderr, _("%s\
1680 Read %ld old + %ld reference, \
1681 merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
1682 !quiet && verbosity_level <= 1 ? "\n" : "",
1683 (long) def->nitems, (long) ref->nitems,
1684 (long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
1685 (long) stats.obsolete);
1686 else if (!quiet)
1687 fputs (_(" done.\n"), stderr);
1688
1689 /* Return results. */
1690 *defp = def;
1691 return result;
1692 }
1693