1 /* GNU gettext - internationalization aids 2 Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc. 3 This file was written by Peter Miller <millerp@canb.auug.org.au> 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 #ifdef HAVE_CONFIG_H 20 # include <config.h> 21 #endif 22 #include <alloca.h> 23 24 #include <getopt.h> 25 #include <limits.h> 26 #include <stdbool.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <locale.h> 31 32 #include "closeout.h" 33 #include "dir-list.h" 34 #include "error.h" 35 #include "error-progname.h" 36 #include "progname.h" 37 #include "relocatable.h" 38 #include "basename.h" 39 #include "message.h" 40 #include "read-catalog.h" 41 #include "read-po.h" 42 #include "read-properties.h" 43 #include "read-stringtable.h" 44 #include "write-catalog.h" 45 #include "write-po.h" 46 #include "write-properties.h" 47 #include "write-stringtable.h" 48 #include "format.h" 49 #include "xalloc.h" 50 #include "xallocsa.h" 51 #include "obstack.h" 52 #include "c-strstr.h" 53 #include "exit.h" 54 #include "c-strcase.h" 55 #include "stpcpy.h" 56 #include "stpncpy.h" 57 #include "po-charset.h" 58 #include "msgl-iconv.h" 59 #include "msgl-equal.h" 60 #include "msgl-fsearch.h" 61 #include "lock.h" 62 #include "plural-count.h" 63 #include "backupfile.h" 64 #include "copy-file.h" 65 #include "propername.h" 66 #include "gettext.h" 67 68 #define _(str) gettext (str) 69 70 #define obstack_chunk_alloc xmalloc 71 #define obstack_chunk_free free 72 73 74 /* If true do not print unneeded messages. */ 75 static bool quiet; 76 77 /* Verbosity level. */ 78 static int verbosity_level; 79 80 /* Force output of PO file even if empty. */ 81 static int force_po; 82 83 /* Apply the .pot file to each of the domains in the PO file. */ 84 static bool multi_domain_mode = false; 85 86 /* Determines whether to use fuzzy matching. */ 87 static bool use_fuzzy_matching = true; 88 89 /* Determines whether to keep old msgids as previous msgids. */ 90 static bool keep_previous = false; 91 92 /* List of user-specified compendiums. */ 93 static message_list_list_ty *compendiums; 94 95 /* List of corresponding filenames. */ 96 static string_list_ty *compendium_filenames; 97 98 /* Update mode. */ 99 static bool update_mode = false; 100 static const char *version_control_string; 101 static const char *backup_suffix_string; 102 103 /* Long options. */ 104 static const struct option long_options[] = 105 { 106 { "add-location", no_argument, &line_comment, 1 }, 107 { "backup", required_argument, NULL, CHAR_MAX + 1 }, 108 { "compendium", required_argument, NULL, 'C', }, 109 { "directory", required_argument, NULL, 'D' }, 110 { "escape", no_argument, NULL, 'E' }, 111 { "force-po", no_argument, &force_po, 1 }, 112 { "help", no_argument, NULL, 'h' }, 113 { "indent", no_argument, NULL, 'i' }, 114 { "multi-domain", no_argument, NULL, 'm' }, 115 { "no-escape", no_argument, NULL, 'e' }, 116 { "no-fuzzy-matching", no_argument, NULL, 'N' }, 117 { "no-location", no_argument, &line_comment, 0 }, 118 { "no-wrap", no_argument, NULL, CHAR_MAX + 4 }, 119 { "output-file", required_argument, NULL, 'o' }, 120 { "previous", no_argument, NULL, CHAR_MAX + 7 }, 121 { "properties-input", no_argument, NULL, 'P' }, 122 { "properties-output", no_argument, NULL, 'p' }, 123 { "quiet", no_argument, NULL, 'q' }, 124 { "sort-by-file", no_argument, NULL, 'F' }, 125 { "sort-output", no_argument, NULL, 's' }, 126 { "silent", no_argument, NULL, 'q' }, 127 { "strict", no_argument, NULL, CHAR_MAX + 2 }, 128 { "stringtable-input", no_argument, NULL, CHAR_MAX + 5 }, 129 { "stringtable-output", no_argument, NULL, CHAR_MAX + 6 }, 130 { "suffix", required_argument, NULL, CHAR_MAX + 3 }, 131 { "update", no_argument, NULL, 'U' }, 132 { "verbose", no_argument, NULL, 'v' }, 133 { "version", no_argument, NULL, 'V' }, 134 { "width", required_argument, NULL, 'w', }, 135 { NULL, 0, NULL, 0 } 136 }; 137 138 139 struct statistics 140 { 141 size_t merged; 142 size_t fuzzied; 143 size_t missing; 144 size_t obsolete; 145 }; 146 147 148 /* Forward declaration of local functions. */ 149 static void usage (int status) 150 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) 151 __attribute__ ((noreturn)) 152 #endif 153 ; 154 static void compendium (const char *filename); 155 static msgdomain_list_ty *merge (const char *fn1, const char *fn2, 156 catalog_input_format_ty input_syntax, 157 msgdomain_list_ty **defp); 158 159 160 int 161 main (int argc, char **argv) 162 { 163 int opt; 164 bool do_help; 165 bool do_version; 166 char *output_file; 167 msgdomain_list_ty *def; 168 msgdomain_list_ty *result; 169 catalog_input_format_ty input_syntax = &input_format_po; 170 catalog_output_format_ty output_syntax = &output_format_po; 171 bool sort_by_filepos = false; 172 bool sort_by_msgid = false; 173 174 /* Set program name for messages. */ 175 set_program_name (argv[0]); 176 error_print_progname = maybe_print_progname; 177 verbosity_level = 0; 178 quiet = false; 179 gram_max_allowed_errors = UINT_MAX; 180 181 #ifdef HAVE_SETLOCALE 182 /* Set locale via LC_ALL. */ 183 setlocale (LC_ALL, ""); 184 #endif 185 186 /* Set the text message domain. */ 187 bindtextdomain (PACKAGE, relocate (LOCALEDIR)); 188 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR)); 189 textdomain (PACKAGE); 190 191 /* Ensure that write errors on stdout are detected. */ 192 atexit (close_stdout); 193 194 /* Set default values for variables. */ 195 do_help = false; 196 do_version = false; 197 output_file = NULL; 198 199 while ((opt = getopt_long (argc, argv, "C:D:eEFhimNo:pPqsUvVw:", 200 long_options, NULL)) 201 != EOF) 202 switch (opt) 203 { 204 case '\0': /* Long option. */ 205 break; 206 207 case 'C': 208 compendium (optarg); 209 break; 210 211 case 'D': 212 dir_list_append (optarg); 213 break; 214 215 case 'e': 216 message_print_style_escape (false); 217 break; 218 219 case 'E': 220 message_print_style_escape (true); 221 break; 222 223 case 'F': 224 sort_by_filepos = true; 225 break; 226 227 case 'h': 228 do_help = true; 229 break; 230 231 case 'i': 232 message_print_style_indent (); 233 break; 234 235 case 'm': 236 multi_domain_mode = true; 237 break; 238 239 case 'N': 240 use_fuzzy_matching = false; 241 break; 242 243 case 'o': 244 output_file = optarg; 245 break; 246 247 case 'p': 248 output_syntax = &output_format_properties; 249 break; 250 251 case 'P': 252 input_syntax = &input_format_properties; 253 break; 254 255 case 'q': 256 quiet = true; 257 break; 258 259 case 's': 260 sort_by_msgid = true; 261 break; 262 263 case 'U': 264 update_mode = true; 265 break; 266 267 case 'v': 268 ++verbosity_level; 269 break; 270 271 case 'V': 272 do_version = true; 273 break; 274 275 case 'w': 276 { 277 int value; 278 char *endp; 279 value = strtol (optarg, &endp, 10); 280 if (endp != optarg) 281 message_page_width_set (value); 282 } 283 break; 284 285 case CHAR_MAX + 1: /* --backup */ 286 version_control_string = optarg; 287 break; 288 289 case CHAR_MAX + 2: /* --strict */ 290 message_print_style_uniforum (); 291 break; 292 293 case CHAR_MAX + 3: /* --suffix */ 294 backup_suffix_string = optarg; 295 break; 296 297 case CHAR_MAX + 4: /* --no-wrap */ 298 message_page_width_ignore (); 299 break; 300 301 case CHAR_MAX + 5: /* --stringtable-input */ 302 input_syntax = &input_format_stringtable; 303 break; 304 305 case CHAR_MAX + 6: /* --stringtable-output */ 306 output_syntax = &output_format_stringtable; 307 break; 308 309 case CHAR_MAX + 7: /* --previous */ 310 keep_previous = true; 311 break; 312 313 default: 314 usage (EXIT_FAILURE); 315 break; 316 } 317 318 /* Version information is requested. */ 319 if (do_version) 320 { 321 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); 322 /* xgettext: no-wrap */ 323 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ 324 This is free software; see the source for copying conditions. There is NO\n\ 325 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ 326 "), 327 "1995-1998, 2000-2006"); 328 printf (_("Written by %s.\n"), proper_name ("Peter Miller")); 329 exit (EXIT_SUCCESS); 330 } 331 332 /* Help is requested. */ 333 if (do_help) 334 usage (EXIT_SUCCESS); 335 336 /* Test whether we have an .po file name as argument. */ 337 if (optind >= argc) 338 { 339 error (EXIT_SUCCESS, 0, _("no input files given")); 340 usage (EXIT_FAILURE); 341 } 342 if (optind + 2 != argc) 343 { 344 error (EXIT_SUCCESS, 0, _("exactly 2 input files required")); 345 usage (EXIT_FAILURE); 346 } 347 348 /* Verify selected options. */ 349 if (update_mode) 350 { 351 if (output_file != NULL) 352 { 353 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), 354 "--update", "--output-file"); 355 } 356 } 357 else 358 { 359 if (version_control_string != NULL) 360 { 361 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"), 362 "--backup", "--update"); 363 usage (EXIT_FAILURE); 364 } 365 if (backup_suffix_string != NULL) 366 { 367 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"), 368 "--suffix", "--update"); 369 usage (EXIT_FAILURE); 370 } 371 } 372 373 if (!line_comment && sort_by_filepos) 374 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), 375 "--no-location", "--sort-by-file"); 376 377 if (sort_by_msgid && sort_by_filepos) 378 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), 379 "--sort-output", "--sort-by-file"); 380 381 /* In update mode, --properties-input implies --properties-output. */ 382 if (update_mode && input_syntax == &input_format_properties) 383 output_syntax = &output_format_properties; 384 /* In update mode, --stringtable-input implies --stringtable-output. */ 385 if (update_mode && input_syntax == &input_format_stringtable) 386 output_syntax = &output_format_stringtable; 387 388 /* Merge the two files. */ 389 result = merge (argv[optind], argv[optind + 1], input_syntax, &def); 390 391 /* Sort the results. */ 392 if (sort_by_filepos) 393 msgdomain_list_sort_by_filepos (result); 394 else if (sort_by_msgid) 395 msgdomain_list_sort_by_msgid (result); 396 397 if (update_mode) 398 { 399 /* Do nothing if the original file and the result are equal. Also do 400 nothing if the original file and the result differ only by the 401 POT-Creation-Date in the header entry; this is needed for projects 402 which don't put the .pot file under CVS. */ 403 if (!msgdomain_list_equal (def, result, true)) 404 { 405 /* Back up def.po. */ 406 enum backup_type backup_type; 407 char *backup_file; 408 409 output_file = argv[optind]; 410 411 if (backup_suffix_string == NULL) 412 { 413 backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX"); 414 if (backup_suffix_string != NULL 415 && backup_suffix_string[0] == '\0') 416 backup_suffix_string = NULL; 417 } 418 if (backup_suffix_string != NULL) 419 simple_backup_suffix = backup_suffix_string; 420 421 backup_type = xget_version (_("backup type"), version_control_string); 422 if (backup_type != none) 423 { 424 backup_file = find_backup_file_name (output_file, backup_type); 425 copy_file_preserving (output_file, backup_file); 426 } 427 428 /* Write the merged message list out. */ 429 msgdomain_list_print (result, output_file, output_syntax, true, 430 false); 431 } 432 } 433 else 434 { 435 /* Write the merged message list out. */ 436 msgdomain_list_print (result, output_file, output_syntax, force_po, 437 false); 438 } 439 440 exit (EXIT_SUCCESS); 441 } 442 443 444 /* Display usage information and exit. */ 445 static void 446 usage (int status) 447 { 448 if (status != EXIT_SUCCESS) 449 fprintf (stderr, _("Try `%s --help' for more information.\n"), 450 program_name); 451 else 452 { 453 printf (_("\ 454 Usage: %s [OPTION] def.po ref.pot\n\ 455 "), program_name); 456 printf ("\n"); 457 /* xgettext: no-wrap */ 458 printf (_("\ 459 Merges two Uniforum style .po files together. The def.po file is an\n\ 460 existing PO file with translations which will be taken over to the newly\n\ 461 created file as long as they still match; comments will be preserved,\n\ 462 but extracted comments and file positions will be discarded. The ref.pot\n\ 463 file is the last created PO file with up-to-date source references but\n\ 464 old translations, or a PO Template file (generally created by xgettext);\n\ 465 any translations or comments in the file will be discarded, however dot\n\ 466 comments and file positions will be preserved. Where an exact match\n\ 467 cannot be found, fuzzy matching is used to produce better results.\n\ 468 ")); 469 printf ("\n"); 470 printf (_("\ 471 Mandatory arguments to long options are mandatory for short options too.\n")); 472 printf ("\n"); 473 printf (_("\ 474 Input file location:\n")); 475 printf (_("\ 476 def.po translations referring to old sources\n")); 477 printf (_("\ 478 ref.pot references to new sources\n")); 479 printf (_("\ 480 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n")); 481 printf (_("\ 482 -C, --compendium=FILE additional library of message translations,\n\ 483 may be specified more than once\n")); 484 printf ("\n"); 485 printf (_("\ 486 Operation mode:\n")); 487 printf (_("\ 488 -U, --update update def.po,\n\ 489 do nothing if def.po already up to date\n")); 490 printf ("\n"); 491 printf (_("\ 492 Output file location:\n")); 493 printf (_("\ 494 -o, --output-file=FILE write output to specified file\n")); 495 printf (_("\ 496 The results are written to standard output if no output file is specified\n\ 497 or if it is -.\n")); 498 printf ("\n"); 499 printf (_("\ 500 Output file location in update mode:\n")); 501 printf (_("\ 502 The result is written back to def.po.\n")); 503 printf (_("\ 504 --backup=CONTROL make a backup of def.po\n")); 505 printf (_("\ 506 --suffix=SUFFIX override the usual backup suffix\n")); 507 printf (_("\ 508 The version control method may be selected via the --backup option or through\n\ 509 the VERSION_CONTROL environment variable. Here are the values:\n\ 510 none, off never make backups (even if --backup is given)\n\ 511 numbered, t make numbered backups\n\ 512 existing, nil numbered if numbered backups exist, simple otherwise\n\ 513 simple, never always make simple backups\n")); 514 printf (_("\ 515 The backup suffix is `~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\ 516 environment variable.\n\ 517 ")); 518 printf ("\n"); 519 printf (_("\ 520 Operation modifiers:\n")); 521 printf (_("\ 522 -m, --multi-domain apply ref.pot to each of the domains in def.po\n")); 523 printf (_("\ 524 -N, --no-fuzzy-matching do not use fuzzy matching\n")); 525 printf (_("\ 526 --previous keep previous msgids of translated messages\n")); 527 printf ("\n"); 528 printf (_("\ 529 Input file syntax:\n")); 530 printf (_("\ 531 -P, --properties-input input files are in Java .properties syntax\n")); 532 printf (_("\ 533 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\ 534 syntax\n")); 535 printf ("\n"); 536 printf (_("\ 537 Output details:\n")); 538 printf (_("\ 539 -e, --no-escape do not use C escapes in output (default)\n")); 540 printf (_("\ 541 -E, --escape use C escapes in output, no extended chars\n")); 542 printf (_("\ 543 --force-po write PO file even if empty\n")); 544 printf (_("\ 545 -i, --indent indented output style\n")); 546 printf (_("\ 547 --no-location suppress '#: filename:line' lines\n")); 548 printf (_("\ 549 --add-location preserve '#: filename:line' lines (default)\n")); 550 printf (_("\ 551 --strict strict Uniforum output style\n")); 552 printf (_("\ 553 -p, --properties-output write out a Java .properties file\n")); 554 printf (_("\ 555 --stringtable-output write out a NeXTstep/GNUstep .strings file\n")); 556 printf (_("\ 557 -w, --width=NUMBER set output page width\n")); 558 printf (_("\ 559 --no-wrap do not break long message lines, longer than\n\ 560 the output page width, into several lines\n")); 561 printf (_("\ 562 -s, --sort-output generate sorted output\n")); 563 printf (_("\ 564 -F, --sort-by-file sort output by file location\n")); 565 printf ("\n"); 566 printf (_("\ 567 Informative output:\n")); 568 printf (_("\ 569 -h, --help display this help and exit\n")); 570 printf (_("\ 571 -V, --version output version information and exit\n")); 572 printf (_("\ 573 -v, --verbose increase verbosity level\n")); 574 printf (_("\ 575 -q, --quiet, --silent suppress progress indicators\n")); 576 printf ("\n"); 577 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), 578 stdout); 579 } 580 581 exit (status); 582 } 583 584 585 static void 586 compendium (const char *filename) 587 { 588 msgdomain_list_ty *mdlp; 589 size_t k; 590 591 mdlp = read_catalog_file (filename, &input_format_po); 592 if (compendiums == NULL) 593 { 594 compendiums = message_list_list_alloc (); 595 compendium_filenames = string_list_alloc (); 596 } 597 for (k = 0; k < mdlp->nitems; k++) 598 { 599 message_list_list_append (compendiums, mdlp->item[k]->messages); 600 string_list_append (compendium_filenames, filename); 601 } 602 } 603 604 605 /* Data structure representing the messages with known translations. 606 They are composed of 607 - A message list from def.po, 608 - The compendiums. 609 The data structure is optimized for exact and fuzzy searches. */ 610 typedef struct definitions_ty definitions_ty; 611 struct definitions_ty 612 { 613 /* A list of message lists. The first comes from def.po, the other ones 614 from the compendiums. Each message list has a built-in hash table, 615 for speed when doing the exact searches. */ 616 message_list_list_ty *lists; 617 /* A fuzzy index of the compendiums, for speed when doing fuzzy searches. 618 Used only if use_fuzzy_matching is true and compendiums != NULL. */ 619 message_fuzzy_index_ty *findex; 620 /* A once-only execution guard for the initialization of the fuzzy index. 621 Needed for OpenMP. */ 622 gl_lock_define(, findex_init_lock) 623 /* The canonical encoding of the compendiums. */ 624 const char *canon_charset; 625 }; 626 627 static inline void 628 definitions_init (definitions_ty *definitions, const char *canon_charset) 629 { 630 definitions->lists = message_list_list_alloc (); 631 message_list_list_append (definitions->lists, NULL); 632 if (compendiums != NULL) 633 message_list_list_append_list (definitions->lists, compendiums); 634 definitions->findex = NULL; 635 gl_lock_init (definitions->findex_init_lock); 636 definitions->canon_charset = canon_charset; 637 } 638 639 /* Create the fuzzy index. 640 Used only if use_fuzzy_matching is true and compendiums != NULL. */ 641 static inline void 642 definitions_init_findex (definitions_ty *definitions) 643 { 644 /* Protect against concurrent execution. */ 645 gl_lock_lock (definitions->findex_init_lock); 646 if (definitions->findex == NULL) 647 { 648 /* Combine all the compendium message lists into a single one. Don't 649 bother checking for duplicates. */ 650 message_list_ty *all_compendium; 651 size_t i; 652 653 all_compendium = message_list_alloc (false); 654 for (i = 0; i < compendiums->nitems; i++) 655 { 656 message_list_ty *mlp = compendiums->item[i]; 657 size_t j; 658 659 for (j = 0; j < mlp->nitems; j++) 660 message_list_append (all_compendium, mlp->item[j]); 661 } 662 663 /* Create the fuzzy index from it. */ 664 definitions->findex = 665 message_fuzzy_index_alloc (all_compendium, definitions->canon_charset); 666 } 667 gl_lock_unlock (definitions->findex_init_lock); 668 } 669 670 /* Return the current list of non-compendium messages. */ 671 static inline message_list_ty * 672 definitions_current_list (const definitions_ty *definitions) 673 { 674 return definitions->lists->item[0]; 675 } 676 677 /* Set the current list of non-compendium messages. */ 678 static inline void 679 definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp) 680 { 681 definitions->lists->item[0] = mlp; 682 } 683 684 /* Exact search. */ 685 static inline message_ty * 686 definitions_search (const definitions_ty *definitions, 687 const char *msgctxt, const char *msgid) 688 { 689 return message_list_list_search (definitions->lists, msgctxt, msgid); 690 } 691 692 /* Fuzzy search. 693 Used only if use_fuzzy_matching is true. */ 694 static inline message_ty * 695 definitions_search_fuzzy (definitions_ty *definitions, 696 const char *msgctxt, const char *msgid) 697 { 698 message_ty *mp1 = 699 message_list_search_fuzzy (definitions_current_list (definitions), 700 msgctxt, msgid); 701 if (compendiums != NULL) 702 { 703 message_ty *mp2; 704 705 /* Create the fuzzy index lazily. */ 706 if (definitions->findex == NULL) 707 definitions_init_findex (definitions); 708 709 mp2 = message_fuzzy_index_search (definitions->findex, msgctxt, msgid); 710 711 /* Choose the best among mp1, mp2. */ 712 if (mp1 == NULL 713 || (mp2 != NULL 714 && (fuzzy_search_goal_function (mp2, msgctxt, msgid) 715 > fuzzy_search_goal_function (mp1, msgctxt, msgid)))) 716 mp1 = mp2; 717 } 718 719 return mp1; 720 } 721 722 static inline void 723 definitions_destroy (definitions_ty *definitions) 724 { 725 message_list_list_free (definitions->lists, 2); 726 if (definitions->findex != NULL) 727 message_fuzzy_index_free (definitions->findex); 728 } 729 730 731 static bool 732 msgfmt_check_pair_fails (const lex_pos_ty *pos, 733 const char *msgid, const char *msgid_plural, 734 const char *msgstr, size_t msgstr_len, 735 size_t fmt) 736 { 737 bool failure; 738 struct formatstring_parser *parser = formatstring_parsers[fmt]; 739 char *invalid_reason = NULL; 740 void *msgid_descr = 741 parser->parse (msgid_plural != NULL ? msgid_plural : msgid, false, 742 &invalid_reason); 743 744 failure = false; 745 if (msgid_descr != NULL) 746 { 747 const char *p_end = msgstr + msgstr_len; 748 const char *p; 749 750 for (p = msgstr; p < p_end; p += strlen (p) + 1) 751 { 752 void *msgstr_descr = parser->parse (msgstr, true, &invalid_reason); 753 754 if (msgstr_descr != NULL) 755 { 756 failure = parser->check (msgid_descr, msgstr_descr, 757 msgid_plural == NULL, NULL, NULL); 758 parser->free (msgstr_descr); 759 } 760 else 761 { 762 failure = true; 763 free (invalid_reason); 764 } 765 766 if (failure) 767 break; 768 } 769 770 parser->free (msgid_descr); 771 } 772 else 773 free (invalid_reason); 774 775 return failure; 776 } 777 778 779 static message_ty * 780 message_merge (message_ty *def, message_ty *ref, bool force_fuzzy) 781 { 782 const char *msgstr; 783 size_t msgstr_len; 784 const char *prev_msgctxt; 785 const char *prev_msgid; 786 const char *prev_msgid_plural; 787 message_ty *result; 788 size_t j, i; 789 790 /* Take the msgid from the reference. When fuzzy matches are made, 791 the definition will not be unique, but the reference will be - 792 usually because it has only been slightly changed. */ 793 794 /* Take the msgstr from the definition. The msgstr of the reference 795 is usually empty, as it was generated by xgettext. If we currently 796 process the header entry we have to merge the msgstr by using the 797 Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference. */ 798 if (is_header (ref)) 799 { 800 /* Oh, oh. The header entry and we have something to fill in. */ 801 static const struct 802 { 803 const char *name; 804 size_t len; 805 } known_fields[] = 806 { 807 { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 }, 808 #define PROJECT_ID 0 809 { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 }, 810 #define REPORT_MSGID_BUGS_TO 1 811 { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 }, 812 #define POT_CREATION_DATE 2 813 { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 }, 814 #define PO_REVISION_DATE 3 815 { "Last-Translator:", sizeof ("Last-Translator:") - 1 }, 816 #define LAST_TRANSLATOR 4 817 { "Language-Team:", sizeof ("Language-Team:") - 1 }, 818 #define LANGUAGE_TEAM 5 819 { "MIME-Version:", sizeof ("MIME-Version:") - 1 }, 820 #define MIME_VERSION 6 821 { "Content-Type:", sizeof ("Content-Type:") - 1 }, 822 #define CONTENT_TYPE 7 823 { "Content-Transfer-Encoding:", 824 sizeof ("Content-Transfer-Encoding:") - 1 } 825 #define CONTENT_TRANSFER 8 826 }; 827 #define UNKNOWN 9 828 struct 829 { 830 const char *string; 831 size_t len; 832 } header_fields[UNKNOWN + 1]; 833 struct obstack pool; 834 const char *cp; 835 char *newp; 836 size_t len, cnt; 837 838 /* Clear all fields. */ 839 memset (header_fields, '\0', sizeof (header_fields)); 840 841 /* Prepare a temporary memory pool. */ 842 obstack_init (&pool); 843 844 cp = def->msgstr; 845 while (*cp != '\0') 846 { 847 const char *endp = strchr (cp, '\n'); 848 int terminated = endp != NULL; 849 850 if (!terminated) 851 { 852 /* Add a trailing newline. */ 853 char *copy; 854 endp = strchr (cp, '\0'); 855 856 len = endp - cp + 1; 857 858 copy = (char *) obstack_alloc (&pool, len + 1); 859 stpcpy (stpcpy (copy, cp), "\n"); 860 cp = copy; 861 } 862 else 863 { 864 len = (endp - cp) + 1; 865 ++endp; 866 } 867 868 /* Compare with any of the known fields. */ 869 for (cnt = 0; 870 cnt < sizeof (known_fields) / sizeof (known_fields[0]); 871 ++cnt) 872 if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len) 873 == 0) 874 break; 875 876 if (cnt < sizeof (known_fields) / sizeof (known_fields[0])) 877 { 878 header_fields[cnt].string = &cp[known_fields[cnt].len]; 879 header_fields[cnt].len = len - known_fields[cnt].len; 880 } 881 else 882 { 883 /* It's an unknown field. Append content to what is already 884 known. */ 885 char *extended = 886 (char *) obstack_alloc (&pool, 887 header_fields[UNKNOWN].len + len + 1); 888 memcpy (extended, header_fields[UNKNOWN].string, 889 header_fields[UNKNOWN].len); 890 memcpy (&extended[header_fields[UNKNOWN].len], cp, len); 891 extended[header_fields[UNKNOWN].len + len] = '\0'; 892 header_fields[UNKNOWN].string = extended; 893 header_fields[UNKNOWN].len += len; 894 } 895 896 cp = endp; 897 } 898 899 { 900 const char *msgid_bugs_ptr; 901 902 msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:"); 903 if (msgid_bugs_ptr != NULL) 904 { 905 size_t msgid_bugs_len; 906 const char *endp; 907 908 msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1; 909 910 endp = strchr (msgid_bugs_ptr, '\n'); 911 if (endp == NULL) 912 { 913 /* Add a trailing newline. */ 914 char *extended; 915 endp = strchr (msgid_bugs_ptr, '\0'); 916 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1; 917 extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1); 918 stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n"); 919 msgid_bugs_ptr = extended; 920 } 921 else 922 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1; 923 924 header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr; 925 header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len; 926 } 927 } 928 929 { 930 const char *pot_date_ptr; 931 932 pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:"); 933 if (pot_date_ptr != NULL) 934 { 935 size_t pot_date_len; 936 const char *endp; 937 938 pot_date_ptr += sizeof ("POT-Creation-Date:") - 1; 939 940 endp = strchr (pot_date_ptr, '\n'); 941 if (endp == NULL) 942 { 943 /* Add a trailing newline. */ 944 char *extended; 945 endp = strchr (pot_date_ptr, '\0'); 946 pot_date_len = (endp - pot_date_ptr) + 1; 947 extended = (char *) obstack_alloc (&pool, pot_date_len + 1); 948 stpcpy (stpcpy (extended, pot_date_ptr), "\n"); 949 pot_date_ptr = extended; 950 } 951 else 952 pot_date_len = (endp - pot_date_ptr) + 1; 953 954 header_fields[POT_CREATION_DATE].string = pot_date_ptr; 955 header_fields[POT_CREATION_DATE].len = pot_date_len; 956 } 957 } 958 959 /* Concatenate all the various fields. */ 960 len = 0; 961 for (cnt = 0; cnt < UNKNOWN; ++cnt) 962 if (header_fields[cnt].string != NULL) 963 len += known_fields[cnt].len + header_fields[cnt].len; 964 len += header_fields[UNKNOWN].len; 965 966 cp = newp = (char *) xmalloc (len + 1); 967 newp[len] = '\0'; 968 969 #define IF_FILLED(idx) \ 970 if (header_fields[idx].string) \ 971 newp = stpncpy (stpcpy (newp, known_fields[idx].name), \ 972 header_fields[idx].string, header_fields[idx].len) 973 974 IF_FILLED (PROJECT_ID); 975 IF_FILLED (REPORT_MSGID_BUGS_TO); 976 IF_FILLED (POT_CREATION_DATE); 977 IF_FILLED (PO_REVISION_DATE); 978 IF_FILLED (LAST_TRANSLATOR); 979 IF_FILLED (LANGUAGE_TEAM); 980 IF_FILLED (MIME_VERSION); 981 IF_FILLED (CONTENT_TYPE); 982 IF_FILLED (CONTENT_TRANSFER); 983 if (header_fields[UNKNOWN].string != NULL) 984 stpcpy (newp, header_fields[UNKNOWN].string); 985 986 #undef IF_FILLED 987 988 /* Free the temporary memory pool. */ 989 obstack_free (&pool, NULL); 990 991 msgstr = cp; 992 msgstr_len = strlen (cp) + 1; 993 994 prev_msgctxt = NULL; 995 prev_msgid = NULL; 996 prev_msgid_plural = NULL; 997 } 998 else 999 { 1000 msgstr = def->msgstr; 1001 msgstr_len = def->msgstr_len; 1002 1003 if (def->is_fuzzy) 1004 { 1005 prev_msgctxt = def->prev_msgctxt; 1006 prev_msgid = def->prev_msgid; 1007 prev_msgid_plural = def->prev_msgid_plural; 1008 } 1009 else 1010 { 1011 prev_msgctxt = def->msgctxt; 1012 prev_msgid = def->msgid; 1013 prev_msgid_plural = def->msgid_plural; 1014 } 1015 } 1016 1017 result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL, 1018 xstrdup (ref->msgid), ref->msgid_plural, 1019 msgstr, msgstr_len, &def->pos); 1020 1021 /* Take the comments from the definition file. There will be none at 1022 all in the reference file, as it was generated by xgettext. */ 1023 if (def->comment) 1024 for (j = 0; j < def->comment->nitems; ++j) 1025 message_comment_append (result, def->comment->item[j]); 1026 1027 /* Take the dot comments from the reference file, as they are 1028 generated by xgettext. Any in the definition file are old ones 1029 collected by previous runs of xgettext and msgmerge. */ 1030 if (ref->comment_dot) 1031 for (j = 0; j < ref->comment_dot->nitems; ++j) 1032 message_comment_dot_append (result, ref->comment_dot->item[j]); 1033 1034 /* The flags are mixed in a special way. Some informations come 1035 from the reference message (such as format/no-format), others 1036 come from the definition file (fuzzy or not). */ 1037 result->is_fuzzy = def->is_fuzzy | force_fuzzy; 1038 1039 for (i = 0; i < NFORMATS; i++) 1040 { 1041 result->is_format[i] = ref->is_format[i]; 1042 1043 /* If the reference message is marked as being a format specifier, 1044 but the definition message is not, we check if the resulting 1045 message would pass "msgfmt -c". If yes, then all is fine. If 1046 not, we add a fuzzy marker, because 1047 1. the message needs the translator's attention, 1048 2. msgmerge must not transform a PO file which passes "msgfmt -c" 1049 into a PO file which doesn't. */ 1050 if (!result->is_fuzzy 1051 && possible_format_p (ref->is_format[i]) 1052 && !possible_format_p (def->is_format[i]) 1053 && msgfmt_check_pair_fails (&def->pos, ref->msgid, ref->msgid_plural, 1054 msgstr, msgstr_len, i)) 1055 result->is_fuzzy = true; 1056 } 1057 1058 result->do_wrap = ref->do_wrap; 1059 1060 /* Insert previous msgid, commented out with "#|". 1061 Do so only when --previous is specified, for backward compatibility. 1062 Since the "previous msgid" represents the original msgid that led to 1063 the current msgstr, 1064 - we can omit it if the resulting message is not fuzzy, 1065 - otherwise, if the corresponding message from the definition file 1066 was translated (not fuzzy), we use that message's msgid, 1067 - otherwise, we use that message's prev_msgid. */ 1068 if (keep_previous && result->is_fuzzy) 1069 { 1070 result->prev_msgctxt = prev_msgctxt; 1071 result->prev_msgid = prev_msgid; 1072 result->prev_msgid_plural = prev_msgid_plural; 1073 } 1074 1075 /* Take the file position comments from the reference file, as they 1076 are generated by xgettext. Any in the definition file are old ones 1077 collected by previous runs of xgettext and msgmerge. */ 1078 for (j = 0; j < ref->filepos_count; ++j) 1079 { 1080 lex_pos_ty *pp = &ref->filepos[j]; 1081 message_comment_filepos (result, pp->file_name, pp->line_number); 1082 } 1083 1084 /* Special postprocessing is needed if the reference message is a 1085 plural form and the definition message isn't, or vice versa. */ 1086 if (ref->msgid_plural != NULL) 1087 { 1088 if (def->msgid_plural == NULL) 1089 result->used = 1; 1090 } 1091 else 1092 { 1093 if (def->msgid_plural != NULL) 1094 result->used = 2; 1095 } 1096 1097 /* All done, return the merged message to the caller. */ 1098 return result; 1099 } 1100 1101 1102 #define DOT_FREQUENCY 10 1103 1104 static void 1105 match_domain (const char *fn1, const char *fn2, 1106 definitions_ty *definitions, message_list_ty *refmlp, 1107 message_list_ty *resultmlp, 1108 struct statistics *stats, unsigned int *processed) 1109 { 1110 message_ty *header_entry; 1111 unsigned long int nplurals; 1112 char *untranslated_plural_msgstr; 1113 struct search_result { message_ty *found; bool fuzzy; } *search_results; 1114 size_t j; 1115 1116 header_entry = 1117 message_list_search (definitions_current_list (definitions), NULL, ""); 1118 nplurals = get_plural_count (header_entry ? header_entry->msgstr : NULL); 1119 untranslated_plural_msgstr = (char *) xmalloc (nplurals); 1120 memset (untranslated_plural_msgstr, '\0', nplurals); 1121 1122 /* Most of the time is spent in definitions_search_fuzzy. 1123 Perform it in a separate loop that can be parallelized by an OpenMP 1124 capable compiler. */ 1125 search_results = 1126 (struct search_result *) 1127 xmalloc (refmlp->nitems * sizeof (struct search_result)); 1128 { 1129 long int nn = refmlp->nitems; 1130 long int jj; 1131 1132 /* Tell the OpenMP capable compiler to distribute this loop across 1133 several threads. The schedule is dynamic, because for some messages 1134 the loop body can be executed very quickly, whereas for others it takes 1135 a long time. */ 1136 #ifdef _OPENMP 1137 # pragma omp parallel for schedule(dynamic) 1138 #endif 1139 for (jj = 0; jj < nn; jj++) 1140 { 1141 message_ty *refmsg = refmlp->item[jj]; 1142 message_ty *defmsg; 1143 1144 /* Because merging can take a while we print something to signal 1145 we are not dead. */ 1146 if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0) 1147 fputc ('.', stderr); 1148 #ifdef _OPENMP 1149 # pragma omp atomic 1150 #endif 1151 (*processed)++; 1152 1153 /* See if it is in the other file. */ 1154 defmsg = 1155 definitions_search (definitions, refmsg->msgctxt, refmsg->msgid); 1156 if (defmsg != NULL) 1157 { 1158 search_results[jj].found = defmsg; 1159 search_results[jj].fuzzy = false; 1160 } 1161 else if (!is_header (refmsg) 1162 /* If the message was not defined at all, try to find a very 1163 similar message, it could be a typo, or the suggestion may 1164 help. */ 1165 && use_fuzzy_matching 1166 && ((defmsg = 1167 definitions_search_fuzzy (definitions, 1168 refmsg->msgctxt, 1169 refmsg->msgid)) != NULL)) 1170 { 1171 search_results[jj].found = defmsg; 1172 search_results[jj].fuzzy = true; 1173 } 1174 else 1175 search_results[jj].found = NULL; 1176 } 1177 } 1178 1179 for (j = 0; j < refmlp->nitems; j++) 1180 { 1181 message_ty *refmsg = refmlp->item[j]; 1182 1183 /* See if it is in the other file. 1184 This used definitions_search. */ 1185 if (search_results[j].found != NULL && !search_results[j].fuzzy) 1186 { 1187 message_ty *defmsg = search_results[j].found; 1188 /* Merge the reference with the definition: take the #. and 1189 #: comments from the reference, take the # comments from 1190 the definition, take the msgstr from the definition. Add 1191 this merged entry to the output message list. */ 1192 message_ty *mp = message_merge (defmsg, refmsg, false); 1193 1194 message_list_append (resultmlp, mp); 1195 1196 /* Remember that this message has been used, when we scan 1197 later to see if anything was omitted. */ 1198 defmsg->used = 1; 1199 stats->merged++; 1200 } 1201 else if (!is_header (refmsg)) 1202 { 1203 /* If the message was not defined at all, try to find a very 1204 similar message, it could be a typo, or the suggestion may 1205 help. This search assumed use_fuzzy_matching and used 1206 definitions_search_fuzzy. */ 1207 if (search_results[j].found != NULL && search_results[j].fuzzy) 1208 { 1209 message_ty *defmsg = search_results[j].found; 1210 message_ty *mp; 1211 1212 if (verbosity_level > 1) 1213 { 1214 po_gram_error_at_line (&refmsg->pos, _("\ 1215 this message is used but not defined...")); 1216 error_message_count--; 1217 po_gram_error_at_line (&defmsg->pos, _("\ 1218 ...but this definition is similar")); 1219 } 1220 1221 /* Merge the reference with the definition: take the #. and 1222 #: comments from the reference, take the # comments from 1223 the definition, take the msgstr from the definition. Add 1224 this merged entry to the output message list. */ 1225 mp = message_merge (defmsg, refmsg, true); 1226 1227 message_list_append (resultmlp, mp); 1228 1229 /* Remember that this message has been used, when we scan 1230 later to see if anything was omitted. */ 1231 defmsg->used = 1; 1232 stats->fuzzied++; 1233 if (!quiet && verbosity_level <= 1) 1234 /* Always print a dot if we handled a fuzzy match. */ 1235 fputc ('.', stderr); 1236 } 1237 else 1238 { 1239 message_ty *mp; 1240 bool is_untranslated; 1241 const char *p; 1242 const char *pend; 1243 1244 if (verbosity_level > 1) 1245 po_gram_error_at_line (&refmsg->pos, _("\ 1246 this message is used but not defined in %s"), fn1); 1247 1248 mp = message_copy (refmsg); 1249 1250 if (mp->msgid_plural != NULL) 1251 { 1252 /* Test if mp is untranslated. (It most likely is.) */ 1253 is_untranslated = true; 1254 for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++) 1255 if (*p != '\0') 1256 { 1257 is_untranslated = false; 1258 break; 1259 } 1260 if (is_untranslated) 1261 { 1262 /* Change mp->msgstr_len consecutive empty strings into 1263 nplurals consecutive empty strings. */ 1264 if (nplurals > mp->msgstr_len) 1265 mp->msgstr = untranslated_plural_msgstr; 1266 mp->msgstr_len = nplurals; 1267 } 1268 } 1269 1270 message_list_append (resultmlp, mp); 1271 stats->missing++; 1272 } 1273 } 1274 } 1275 1276 free (search_results); 1277 1278 /* Now postprocess the problematic merges. This is needed because we 1279 want the result to pass the "msgfmt -c -v" check. */ 1280 { 1281 /* message_merge sets mp->used to 1 or 2, depending on the problem. 1282 Compute the bitwise OR of all these. */ 1283 int problematic = 0; 1284 1285 for (j = 0; j < resultmlp->nitems; j++) 1286 problematic |= resultmlp->item[j]->used; 1287 1288 if (problematic) 1289 { 1290 unsigned long int nplurals = 0; 1291 1292 if (problematic & 1) 1293 { 1294 /* Need to know nplurals of the result domain. */ 1295 message_ty *header_entry = 1296 message_list_search (resultmlp, NULL, ""); 1297 1298 nplurals = get_plural_count (header_entry 1299 ? header_entry->msgstr 1300 : NULL); 1301 } 1302 1303 for (j = 0; j < resultmlp->nitems; j++) 1304 { 1305 message_ty *mp = resultmlp->item[j]; 1306 1307 if ((mp->used & 1) && (nplurals > 0)) 1308 { 1309 /* ref->msgid_plural != NULL but def->msgid_plural == NULL. 1310 Use a copy of def->msgstr for each possible plural form. */ 1311 size_t new_msgstr_len; 1312 char *new_msgstr; 1313 char *p; 1314 unsigned long i; 1315 1316 if (verbosity_level > 1) 1317 { 1318 po_gram_error_at_line (&mp->pos, _("\ 1319 this message should define plural forms")); 1320 } 1321 1322 new_msgstr_len = nplurals * mp->msgstr_len; 1323 new_msgstr = (char *) xmalloc (new_msgstr_len); 1324 for (i = 0, p = new_msgstr; i < nplurals; i++) 1325 { 1326 memcpy (p, mp->msgstr, mp->msgstr_len); 1327 p += mp->msgstr_len; 1328 } 1329 mp->msgstr = new_msgstr; 1330 mp->msgstr_len = new_msgstr_len; 1331 mp->is_fuzzy = true; 1332 } 1333 1334 if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1)) 1335 { 1336 /* ref->msgid_plural == NULL but def->msgid_plural != NULL. 1337 Use only the first among the plural forms. */ 1338 1339 if (verbosity_level > 1) 1340 { 1341 po_gram_error_at_line (&mp->pos, _("\ 1342 this message should not define plural forms")); 1343 } 1344 1345 mp->msgstr_len = strlen (mp->msgstr) + 1; 1346 mp->is_fuzzy = true; 1347 } 1348 1349 /* Postprocessing of this message is done. */ 1350 mp->used = 0; 1351 } 1352 } 1353 } 1354 } 1355 1356 static msgdomain_list_ty * 1357 merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax, 1358 msgdomain_list_ty **defp) 1359 { 1360 msgdomain_list_ty *def; 1361 msgdomain_list_ty *ref; 1362 size_t j, k; 1363 unsigned int processed; 1364 struct statistics stats; 1365 msgdomain_list_ty *result; 1366 definitions_ty definitions; 1367 message_list_ty *empty_list; 1368 1369 stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0; 1370 1371 /* This is the definitions file, created by a human. */ 1372 def = read_catalog_file (fn1, input_syntax); 1373 1374 /* This is the references file, created by groping the sources with 1375 the xgettext program. */ 1376 ref = read_catalog_file (fn2, input_syntax); 1377 /* Add a dummy header entry, if the references file contains none. */ 1378 for (k = 0; k < ref->nitems; k++) 1379 if (message_list_search (ref->item[k]->messages, NULL, "") == NULL) 1380 { 1381 static lex_pos_ty pos = { __FILE__, __LINE__ }; 1382 message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos); 1383 1384 message_list_prepend (ref->item[k]->messages, refheader); 1385 } 1386 1387 /* The references file can be either in ASCII or in UTF-8. If it is 1388 in UTF-8, we have to convert the definitions and the compendiums to 1389 UTF-8 as well. */ 1390 { 1391 bool was_utf8 = false; 1392 for (k = 0; k < ref->nitems; k++) 1393 { 1394 message_list_ty *mlp = ref->item[k]->messages; 1395 1396 for (j = 0; j < mlp->nitems; j++) 1397 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 1398 { 1399 const char *header = mlp->item[j]->msgstr; 1400 1401 if (header != NULL) 1402 { 1403 const char *charsetstr = c_strstr (header, "charset="); 1404 1405 if (charsetstr != NULL) 1406 { 1407 size_t len; 1408 1409 charsetstr += strlen ("charset="); 1410 len = strcspn (charsetstr, " \t\n"); 1411 if (len == strlen ("UTF-8") 1412 && c_strncasecmp (charsetstr, "UTF-8", len) == 0) 1413 was_utf8 = true; 1414 } 1415 } 1416 } 1417 } 1418 if (was_utf8) 1419 { 1420 def = iconv_msgdomain_list (def, "UTF-8", fn1); 1421 if (compendiums != NULL) 1422 for (k = 0; k < compendiums->nitems; k++) 1423 iconv_message_list (compendiums->item[k], NULL, po_charset_utf8, 1424 compendium_filenames->item[k]); 1425 } 1426 else if (compendiums != NULL && compendiums->nitems > 0) 1427 { 1428 /* Ensure that the definitions and the compendiums are in the same 1429 encoding. Prefer the encoding of the definitions file, if 1430 possible; otherwise, if the definitions file is empty and the 1431 compendiums are all in the same encoding, use that encoding; 1432 otherwise, use UTF-8. */ 1433 bool conversion_done = false; 1434 { 1435 char *charset = NULL; 1436 1437 /* Get the encoding of the definitions file. */ 1438 for (k = 0; k < def->nitems; k++) 1439 { 1440 message_list_ty *mlp = def->item[k]->messages; 1441 1442 for (j = 0; j < mlp->nitems; j++) 1443 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 1444 { 1445 const char *header = mlp->item[j]->msgstr; 1446 1447 if (header != NULL) 1448 { 1449 const char *charsetstr = c_strstr (header, "charset="); 1450 1451 if (charsetstr != NULL) 1452 { 1453 size_t len; 1454 1455 charsetstr += strlen ("charset="); 1456 len = strcspn (charsetstr, " \t\n"); 1457 charset = (char *) xallocsa (len + 1); 1458 memcpy (charset, charsetstr, len); 1459 charset[len] = '\0'; 1460 break; 1461 } 1462 } 1463 } 1464 if (charset != NULL) 1465 break; 1466 } 1467 if (charset != NULL) 1468 { 1469 const char *canon_charset = po_charset_canonicalize (charset); 1470 1471 if (canon_charset != NULL) 1472 { 1473 bool all_compendiums_iconvable = true; 1474 1475 if (compendiums != NULL) 1476 for (k = 0; k < compendiums->nitems; k++) 1477 if (!is_message_list_iconvable (compendiums->item[k], 1478 NULL, canon_charset)) 1479 { 1480 all_compendiums_iconvable = false; 1481 break; 1482 } 1483 1484 if (all_compendiums_iconvable) 1485 { 1486 /* Convert the compendiums to def's encoding. */ 1487 if (compendiums != NULL) 1488 for (k = 0; k < compendiums->nitems; k++) 1489 iconv_message_list (compendiums->item[k], 1490 NULL, canon_charset, 1491 compendium_filenames->item[k]); 1492 conversion_done = true; 1493 } 1494 } 1495 freesa (charset); 1496 } 1497 } 1498 if (!conversion_done) 1499 { 1500 if (def->nitems == 0 1501 || (def->nitems == 1 && def->item[0]->messages->nitems == 0)) 1502 { 1503 /* The definitions file is empty. 1504 Compare the encodings of the compendiums. */ 1505 const char *common_canon_charset = NULL; 1506 1507 for (k = 0; k < compendiums->nitems; k++) 1508 { 1509 message_list_ty *mlp = compendiums->item[k]; 1510 char *charset = NULL; 1511 const char *canon_charset = NULL; 1512 1513 for (j = 0; j < mlp->nitems; j++) 1514 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 1515 { 1516 const char *header = mlp->item[j]->msgstr; 1517 1518 if (header != NULL) 1519 { 1520 const char *charsetstr = 1521 c_strstr (header, "charset="); 1522 1523 if (charsetstr != NULL) 1524 { 1525 size_t len; 1526 1527 charsetstr += strlen ("charset="); 1528 len = strcspn (charsetstr, " \t\n"); 1529 charset = (char *) xallocsa (len + 1); 1530 memcpy (charset, charsetstr, len); 1531 charset[len] = '\0'; 1532 1533 break; 1534 } 1535 } 1536 } 1537 if (charset != NULL) 1538 { 1539 canon_charset = po_charset_canonicalize (charset); 1540 freesa (charset); 1541 } 1542 /* If no charset declaration was found in this file, 1543 or if it is not a valid encoding name, or if it 1544 differs from the common charset found so far, 1545 we have no common charset. */ 1546 if (canon_charset == NULL 1547 || (common_canon_charset != NULL 1548 && canon_charset != common_canon_charset)) 1549 { 1550 common_canon_charset = NULL; 1551 break; 1552 } 1553 common_canon_charset = canon_charset; 1554 } 1555 1556 if (common_canon_charset != NULL) 1557 /* No conversion needed in this case. */ 1558 conversion_done = true; 1559 } 1560 if (!conversion_done) 1561 { 1562 /* It's too hairy to find out what would be the optimal target 1563 encoding. So, convert everything to UTF-8. */ 1564 def = iconv_msgdomain_list (def, "UTF-8", fn1); 1565 if (compendiums != NULL) 1566 for (k = 0; k < compendiums->nitems; k++) 1567 iconv_message_list (compendiums->item[k], 1568 NULL, po_charset_utf8, 1569 compendium_filenames->item[k]); 1570 } 1571 } 1572 } 1573 } 1574 1575 /* Initialize and preprocess the total set of message definitions. */ 1576 definitions_init (&definitions, po_charset_utf8); 1577 empty_list = message_list_alloc (false); 1578 1579 result = msgdomain_list_alloc (false); 1580 processed = 0; 1581 1582 /* Every reference must be matched with its definition. */ 1583 if (!multi_domain_mode) 1584 for (k = 0; k < ref->nitems; k++) 1585 { 1586 const char *domain = ref->item[k]->domain; 1587 message_list_ty *refmlp = ref->item[k]->messages; 1588 message_list_ty *resultmlp = 1589 msgdomain_list_sublist (result, domain, true); 1590 message_list_ty *defmlp; 1591 1592 defmlp = msgdomain_list_sublist (def, domain, false); 1593 if (defmlp == NULL) 1594 defmlp = empty_list; 1595 definitions_set_current_list (&definitions, defmlp); 1596 1597 match_domain (fn1, fn2, &definitions, refmlp, resultmlp, 1598 &stats, &processed); 1599 } 1600 else 1601 { 1602 /* Apply the references messages in the default domain to each of 1603 the definition domains. */ 1604 message_list_ty *refmlp = ref->item[0]->messages; 1605 1606 for (k = 0; k < def->nitems; k++) 1607 { 1608 const char *domain = def->item[k]->domain; 1609 message_list_ty *defmlp = def->item[k]->messages; 1610 1611 /* Ignore the default message domain if it has no messages. */ 1612 if (k > 0 || defmlp->nitems > 0) 1613 { 1614 message_list_ty *resultmlp = 1615 msgdomain_list_sublist (result, domain, true); 1616 1617 definitions_set_current_list (&definitions, defmlp); 1618 1619 match_domain (fn1, fn2, &definitions, refmlp, resultmlp, 1620 &stats, &processed); 1621 } 1622 } 1623 } 1624 1625 definitions_destroy (&definitions); 1626 1627 /* Look for messages in the definition file, which are not present 1628 in the reference file, indicating messages which defined but not 1629 used in the program. Don't scan the compendium(s). */ 1630 for (k = 0; k < def->nitems; ++k) 1631 { 1632 const char *domain = def->item[k]->domain; 1633 message_list_ty *defmlp = def->item[k]->messages; 1634 1635 for (j = 0; j < defmlp->nitems; j++) 1636 { 1637 message_ty *defmsg = defmlp->item[j]; 1638 1639 if (!defmsg->used) 1640 { 1641 /* Remember the old translation although it is not used anymore. 1642 But we mark it as obsolete. */ 1643 message_ty *mp; 1644 1645 mp = message_copy (defmsg); 1646 /* Clear the extracted comments. */ 1647 if (mp->comment_dot != NULL) 1648 { 1649 string_list_free (mp->comment_dot); 1650 mp->comment_dot = NULL; 1651 } 1652 /* Clear the file position comments. */ 1653 if (mp->filepos != NULL) 1654 { 1655 size_t i; 1656 1657 for (i = 0; i < mp->filepos_count; i++) 1658 free ((char *) mp->filepos[i].file_name); 1659 mp->filepos_count = 0; 1660 free (mp->filepos); 1661 mp->filepos = NULL; 1662 } 1663 /* Mark as obsolete. */ 1664 mp->obsolete = true; 1665 1666 message_list_append (msgdomain_list_sublist (result, domain, true), 1667 mp); 1668 stats.obsolete++; 1669 } 1670 } 1671 } 1672 1673 /* Determine the known a-priori encoding, if any. */ 1674 if (def->encoding == ref->encoding) 1675 result->encoding = def->encoding; 1676 1677 /* Report some statistics. */ 1678 if (verbosity_level > 0) 1679 fprintf (stderr, _("%s\ 1680 Read %ld old + %ld reference, \ 1681 merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"), 1682 !quiet && verbosity_level <= 1 ? "\n" : "", 1683 (long) def->nitems, (long) ref->nitems, 1684 (long) stats.merged, (long) stats.fuzzied, (long) stats.missing, 1685 (long) stats.obsolete); 1686 else if (!quiet) 1687 fputs (_(" done.\n"), stderr); 1688 1689 /* Return results. */ 1690 *defp = def; 1691 return result; 1692 } 1693