xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/msgcmp.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* GNU gettext - internationalization aids
2    Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc.
3    This file was written by Peter Miller <millerp@canb.auug.org.au>
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <getopt.h>
24 #include <limits.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29 
30 #include "closeout.h"
31 #include "dir-list.h"
32 #include "error.h"
33 #include "error-progname.h"
34 #include "progname.h"
35 #include "relocatable.h"
36 #include "basename.h"
37 #include "message.h"
38 #include "exit.h"
39 #include "read-catalog.h"
40 #include "read-po.h"
41 #include "read-properties.h"
42 #include "read-stringtable.h"
43 #include "msgl-iconv.h"
44 #include "c-strstr.h"
45 #include "c-strcase.h"
46 #include "propername.h"
47 #include "gettext.h"
48 
49 #define _(str) gettext (str)
50 
51 
52 /* Apply the .pot file to each of the domains in the PO file.  */
53 static bool multi_domain_mode = false;
54 
55 /* Whether to consider fuzzy messages as translations.  */
56 static bool include_fuzzies = false;
57 
58 /* Whether to consider untranslated messages as translations.  */
59 static bool include_untranslated = false;
60 
61 /* Long options.  */
62 static const struct option long_options[] =
63 {
64   { "directory", required_argument, NULL, 'D' },
65   { "help", no_argument, NULL, 'h' },
66   { "multi-domain", no_argument, NULL, 'm' },
67   { "properties-input", no_argument, NULL, 'P' },
68   { "stringtable-input", no_argument, NULL, CHAR_MAX + 1 },
69   { "use-fuzzy", no_argument, NULL, CHAR_MAX + 2 },
70   { "use-untranslated", no_argument, NULL, CHAR_MAX + 3 },
71   { "version", no_argument, NULL, 'V' },
72   { NULL, 0, NULL, 0 }
73 };
74 
75 
76 /* Forward declaration of local functions.  */
77 static void usage (int status)
78 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
79 	__attribute__ ((noreturn))
80 #endif
81 ;
82 static void compare (const char *fn1, const char *fn2,
83 		     catalog_input_format_ty input_syntax);
84 
85 
86 int
main(int argc,char * argv[])87 main (int argc, char *argv[])
88 {
89   int optchar;
90   bool do_help;
91   bool do_version;
92   catalog_input_format_ty input_syntax = &input_format_po;
93 
94   /* Set program name for messages.  */
95   set_program_name (argv[0]);
96   error_print_progname = maybe_print_progname;
97   gram_max_allowed_errors = UINT_MAX;
98 
99 #ifdef HAVE_SETLOCALE
100   /* Set locale via LC_ALL.  */
101   setlocale (LC_ALL, "");
102 #endif
103 
104   /* Set the text message domain.  */
105   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
106   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
107   textdomain (PACKAGE);
108 
109   /* Ensure that write errors on stdout are detected.  */
110   atexit (close_stdout);
111 
112   do_help = false;
113   do_version = false;
114   while ((optchar = getopt_long (argc, argv, "D:hmPV", long_options, NULL))
115 	 != EOF)
116     switch (optchar)
117       {
118       case '\0':		/* long option */
119 	break;
120 
121       case 'D':
122 	dir_list_append (optarg);
123 	break;
124 
125       case 'h':
126 	do_help = true;
127 	break;
128 
129       case 'm':
130 	multi_domain_mode = true;
131 	break;
132 
133       case 'P':
134 	input_syntax = &input_format_properties;
135 	break;
136 
137       case 'V':
138 	do_version = true;
139 	break;
140 
141       case CHAR_MAX + 1:	/* --stringtable-input */
142 	input_syntax = &input_format_stringtable;
143 	break;
144 
145       case CHAR_MAX + 2:	/* --use-fuzzy */
146 	include_fuzzies = true;
147 	break;
148 
149       case CHAR_MAX + 3:	/* --use-untranslated */
150 	include_untranslated = true;
151 	break;
152 
153       default:
154 	usage (EXIT_FAILURE);
155 	break;
156       }
157 
158   /* Version information is requested.  */
159   if (do_version)
160     {
161       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
162       /* xgettext: no-wrap */
163       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
164 This is free software; see the source for copying conditions.  There is NO\n\
165 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
166 "),
167 	      "1995-1998, 2000-2006");
168       printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
169       exit (EXIT_SUCCESS);
170     }
171 
172   /* Help is requested.  */
173   if (do_help)
174     usage (EXIT_SUCCESS);
175 
176   /* Test whether we have an .po file name as argument.  */
177   if (optind >= argc)
178     {
179       error (EXIT_SUCCESS, 0, _("no input files given"));
180       usage (EXIT_FAILURE);
181     }
182   if (optind + 2 != argc)
183     {
184       error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
185       usage (EXIT_FAILURE);
186     }
187 
188   /* compare the two files */
189   compare (argv[optind], argv[optind + 1], input_syntax);
190   exit (EXIT_SUCCESS);
191 }
192 
193 
194 /* Display usage information and exit.  */
195 static void
usage(int status)196 usage (int status)
197 {
198   if (status != EXIT_SUCCESS)
199     fprintf (stderr, _("Try `%s --help' for more information.\n"),
200 	     program_name);
201   else
202     {
203       printf (_("\
204 Usage: %s [OPTION] def.po ref.pot\n\
205 "), program_name);
206       printf ("\n");
207       /* xgettext: no-wrap */
208       printf (_("\
209 Compare two Uniforum style .po files to check that both contain the same\n\
210 set of msgid strings.  The def.po file is an existing PO file with the\n\
211 translations.  The ref.pot file is the last created PO file, or a PO Template\n\
212 file (generally created by xgettext).  This is useful for checking that\n\
213 you have translated each and every message in your program.  Where an exact\n\
214 match cannot be found, fuzzy matching is used to produce better diagnostics.\n\
215 "));
216       printf ("\n");
217       printf (_("\
218 Mandatory arguments to long options are mandatory for short options too.\n"));
219       printf ("\n");
220       printf (_("\
221 Input file location:\n"));
222       printf (_("\
223   def.po                      translations\n"));
224       printf (_("\
225   ref.pot                     references to the sources\n"));
226       printf (_("\
227   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
228       printf ("\n");
229       printf (_("\
230 Operation modifiers:\n"));
231       printf (_("\
232   -m, --multi-domain          apply ref.pot to each of the domains in def.po\n"));
233       printf (_("\
234       --use-fuzzy             consider fuzzy entries\n"));
235       printf (_("\
236       --use-untranslated      consider untranslated entries\n"));
237       printf ("\n");
238       printf (_("\
239 Input file syntax:\n"));
240       printf (_("\
241   -P, --properties-input      input files are in Java .properties syntax\n"));
242       printf (_("\
243       --stringtable-input     input files are in NeXTstep/GNUstep .strings\n\
244                               syntax\n"));
245       printf ("\n");
246       printf (_("\
247 Informative output:\n"));
248       printf (_("\
249   -h, --help                  display this help and exit\n"));
250       printf (_("\
251   -V, --version               output version information and exit\n"));
252       printf ("\n");
253       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
254     }
255 
256   exit (status);
257 }
258 
259 
260 /* Return true if a message should be kept.  */
261 static bool
is_message_selected(const message_ty * mp)262 is_message_selected (const message_ty *mp)
263 {
264   /* Always keep the header entry.  */
265   if (is_header (mp))
266     return true;
267 
268   return !mp->obsolete;
269 }
270 
271 
272 /* Remove obsolete messages from a message list.  Return the modified list.  */
273 static msgdomain_list_ty *
remove_obsoletes(msgdomain_list_ty * mdlp)274 remove_obsoletes (msgdomain_list_ty *mdlp)
275 {
276   size_t k;
277 
278   for (k = 0; k < mdlp->nitems; k++)
279     message_list_remove_if_not (mdlp->item[k]->messages, is_message_selected);
280 
281   return mdlp;
282 }
283 
284 
285 static void
match_domain(const char * fn1,const char * fn2,message_list_ty * defmlp,message_list_ty * refmlp,int * nerrors)286 match_domain (const char *fn1, const char *fn2,
287 	      message_list_ty *defmlp, message_list_ty *refmlp,
288 	      int *nerrors)
289 {
290   size_t j;
291 
292   for (j = 0; j < refmlp->nitems; j++)
293     {
294       message_ty *refmsg;
295       message_ty *defmsg;
296 
297       refmsg = refmlp->item[j];
298 
299       /* See if it is in the other file.  */
300       defmsg = message_list_search (defmlp, refmsg->msgctxt, refmsg->msgid);
301       if (defmsg)
302 	{
303 	  if (!include_untranslated && defmsg->msgstr[0] == '\0')
304 	    {
305 	      (*nerrors)++;
306 	      po_gram_error_at_line (&defmsg->pos, _("\
307 this message is untranslated"));
308 	    }
309 	  else if (!include_fuzzies && defmsg->is_fuzzy && !is_header (defmsg))
310 	    {
311 	      (*nerrors)++;
312 	      po_gram_error_at_line (&defmsg->pos, _("\
313 this message needs to be reviewed by the translator"));
314 	    }
315 	  else
316 	    defmsg->used = 1;
317 	}
318       else
319 	{
320 	  /* If the message was not defined at all, try to find a very
321 	     similar message, it could be a typo, or the suggestion may
322 	     help.  */
323 	  (*nerrors)++;
324 	  defmsg =
325 	    message_list_search_fuzzy (defmlp, refmsg->msgctxt, refmsg->msgid);
326 	  if (defmsg)
327 	    {
328 	      po_gram_error_at_line (&refmsg->pos, _("\
329 this message is used but not defined..."));
330 	      error_message_count--;
331 	      po_gram_error_at_line (&defmsg->pos, _("\
332 ...but this definition is similar"));
333 	      defmsg->used = 1;
334 	    }
335 	  else
336 	    po_gram_error_at_line (&refmsg->pos, _("\
337 this message is used but not defined in %s"), fn1);
338 	}
339     }
340 }
341 
342 
343 static void
compare(const char * fn1,const char * fn2,catalog_input_format_ty input_syntax)344 compare (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax)
345 {
346   msgdomain_list_ty *def;
347   msgdomain_list_ty *ref;
348   int nerrors;
349   size_t j, k;
350   message_list_ty *empty_list;
351 
352   /* This is the master file, created by a human.  */
353   def = remove_obsoletes (read_catalog_file (fn1, input_syntax));
354 
355   /* This is the generated file, created by groping the sources with
356      the xgettext program.  */
357   ref = remove_obsoletes (read_catalog_file (fn2, input_syntax));
358 
359   /* The references file can be either in ASCII or in UTF-8.  If it is
360      in UTF-8, we have to convert the definitions to UTF-8 as well.  */
361   {
362     bool was_utf8 = false;
363     for (k = 0; k < ref->nitems; k++)
364       {
365 	message_list_ty *mlp = ref->item[k]->messages;
366 
367 	for (j = 0; j < mlp->nitems; j++)
368 	  if (is_header (mlp->item[j]) /* && !mlp->item[j]->obsolete */)
369 	    {
370 	      const char *header = mlp->item[j]->msgstr;
371 
372 	      if (header != NULL)
373 		{
374 		  const char *charsetstr = c_strstr (header, "charset=");
375 
376 		  if (charsetstr != NULL)
377 		    {
378 		      size_t len;
379 
380 		      charsetstr += strlen ("charset=");
381 		      len = strcspn (charsetstr, " \t\n");
382 		      if (len == strlen ("UTF-8")
383 			  && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
384 			was_utf8 = true;
385 		    }
386 		}
387 	    }
388 	}
389     if (was_utf8)
390       def = iconv_msgdomain_list (def, "UTF-8", fn1);
391   }
392 
393   empty_list = message_list_alloc (false);
394 
395   /* Every entry in the xgettext generated file must be matched by a
396      (single) entry in the human created file.  */
397   nerrors = 0;
398   if (!multi_domain_mode)
399     for (k = 0; k < ref->nitems; k++)
400       {
401 	const char *domain = ref->item[k]->domain;
402 	message_list_ty *refmlp = ref->item[k]->messages;
403 	message_list_ty *defmlp;
404 
405 	defmlp = msgdomain_list_sublist (def, domain, false);
406 	if (defmlp == NULL)
407 	  defmlp = empty_list;
408 
409 	match_domain (fn1, fn2, defmlp, refmlp, &nerrors);
410       }
411   else
412     {
413       /* Apply the references messages in the default domain to each of
414 	 the definition domains.  */
415       message_list_ty *refmlp = ref->item[0]->messages;
416 
417       for (k = 0; k < def->nitems; k++)
418 	{
419 	  message_list_ty *defmlp = def->item[k]->messages;
420 
421 	  /* Ignore the default message domain if it has no messages.  */
422 	  if (k > 0 || defmlp->nitems > 0)
423 	    match_domain (fn1, fn2, defmlp, refmlp, &nerrors);
424 	}
425     }
426 
427   /* Look for messages in the definition file, which are not present
428      in the reference file, indicating messages which defined but not
429      used in the program.  */
430   for (k = 0; k < def->nitems; ++k)
431     {
432       message_list_ty *defmlp = def->item[k]->messages;
433 
434       for (j = 0; j < defmlp->nitems; j++)
435 	{
436 	  message_ty *defmsg = defmlp->item[j];
437 
438 	  if (!defmsg->used)
439 	    po_gram_error_at_line (&defmsg->pos,
440 				   _("warning: this message is not used"));
441 	}
442     }
443 
444   /* Exit with status 1 on any error.  */
445   if (nerrors > 0)
446     error (EXIT_FAILURE, 0,
447 	   ngettext ("found %d fatal error", "found %d fatal errors", nerrors),
448 	   nerrors);
449 }
450