xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/read-catalog-abstract.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Reading PO files, abstract class.
2    Copyright (C) 1995-1996, 1998, 2000-2006 Free Software Foundation, Inc.
3 
4    This file was written by Peter Miller <millerp@canb.auug.org.au>
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 
20 
21 #ifdef HAVE_CONFIG_H
22 # include "config.h"
23 #endif
24 
25 /* Specification.  */
26 #include "read-catalog-abstract.h"
27 
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "xalloc.h"
32 #include "xvasprintf.h"
33 #include "po-xerror.h"
34 #include "gettext.h"
35 
36 /* Local variables.  */
37 static abstract_catalog_reader_ty *callback_arg;
38 
39 
40 /* ========================================================================= */
41 /* Allocating and freeing instances of abstract_catalog_reader_ty.  */
42 
43 
44 abstract_catalog_reader_ty *
catalog_reader_alloc(abstract_catalog_reader_class_ty * method_table)45 catalog_reader_alloc (abstract_catalog_reader_class_ty *method_table)
46 {
47   abstract_catalog_reader_ty *pop;
48 
49   pop = (abstract_catalog_reader_ty *) xmalloc (method_table->size);
50   pop->methods = method_table;
51   if (method_table->constructor)
52     method_table->constructor (pop);
53   return pop;
54 }
55 
56 
57 void
catalog_reader_free(abstract_catalog_reader_ty * pop)58 catalog_reader_free (abstract_catalog_reader_ty *pop)
59 {
60   if (pop->methods->destructor)
61     pop->methods->destructor (pop);
62   free (pop);
63 }
64 
65 
66 /* ========================================================================= */
67 /* Inline functions to invoke the methods.  */
68 
69 
70 static inline void
call_parse_brief(abstract_catalog_reader_ty * pop)71 call_parse_brief (abstract_catalog_reader_ty *pop)
72 {
73   if (pop->methods->parse_brief)
74     pop->methods->parse_brief (pop);
75 }
76 
77 static inline void
call_parse_debrief(abstract_catalog_reader_ty * pop)78 call_parse_debrief (abstract_catalog_reader_ty *pop)
79 {
80   if (pop->methods->parse_debrief)
81     pop->methods->parse_debrief (pop);
82 }
83 
84 static inline void
call_directive_domain(abstract_catalog_reader_ty * pop,char * name)85 call_directive_domain (abstract_catalog_reader_ty *pop, char *name)
86 {
87   if (pop->methods->directive_domain)
88     pop->methods->directive_domain (pop, name);
89 }
90 
91 static inline void
call_directive_message(abstract_catalog_reader_ty * pop,char * msgctxt,char * msgid,lex_pos_ty * msgid_pos,char * msgid_plural,char * msgstr,size_t msgstr_len,lex_pos_ty * msgstr_pos,char * prev_msgctxt,char * prev_msgid,char * prev_msgid_plural,bool force_fuzzy,bool obsolete)92 call_directive_message (abstract_catalog_reader_ty *pop,
93 			char *msgctxt,
94 			char *msgid,
95 			lex_pos_ty *msgid_pos,
96 			char *msgid_plural,
97 			char *msgstr, size_t msgstr_len,
98 			lex_pos_ty *msgstr_pos,
99 			char *prev_msgctxt,
100 			char *prev_msgid,
101 			char *prev_msgid_plural,
102 			bool force_fuzzy, bool obsolete)
103 {
104   if (pop->methods->directive_message)
105     pop->methods->directive_message (pop, msgctxt,
106 				     msgid, msgid_pos, msgid_plural,
107 				     msgstr, msgstr_len, msgstr_pos,
108 				     prev_msgctxt,
109 				     prev_msgid,
110 				     prev_msgid_plural,
111 				     force_fuzzy, obsolete);
112 }
113 
114 static inline void
call_comment(abstract_catalog_reader_ty * pop,const char * s)115 call_comment (abstract_catalog_reader_ty *pop, const char *s)
116 {
117   if (pop->methods->comment != NULL)
118     pop->methods->comment (pop, s);
119 }
120 
121 static inline void
call_comment_dot(abstract_catalog_reader_ty * pop,const char * s)122 call_comment_dot (abstract_catalog_reader_ty *pop, const char *s)
123 {
124   if (pop->methods->comment_dot != NULL)
125     pop->methods->comment_dot (pop, s);
126 }
127 
128 static inline void
call_comment_filepos(abstract_catalog_reader_ty * pop,const char * name,size_t line)129 call_comment_filepos (abstract_catalog_reader_ty *pop, const char *name,
130 		      size_t line)
131 {
132   if (pop->methods->comment_filepos)
133     pop->methods->comment_filepos (pop, name, line);
134 }
135 
136 static inline void
call_comment_special(abstract_catalog_reader_ty * pop,const char * s)137 call_comment_special (abstract_catalog_reader_ty *pop, const char *s)
138 {
139   if (pop->methods->comment_special != NULL)
140     pop->methods->comment_special (pop, s);
141 }
142 
143 
144 /* ========================================================================= */
145 /* Exported functions.  */
146 
147 
148 static inline void
parse_start(abstract_catalog_reader_ty * pop)149 parse_start (abstract_catalog_reader_ty *pop)
150 {
151   /* The parse will call the po_callback_... functions (see below)
152      when the various directive are recognised.  The callback_arg
153      variable is used to tell these functions which instance is to
154      have the relevant method invoked.  */
155   callback_arg = pop;
156 
157   call_parse_brief (pop);
158 }
159 
160 static inline void
parse_end(abstract_catalog_reader_ty * pop)161 parse_end (abstract_catalog_reader_ty *pop)
162 {
163   call_parse_debrief (pop);
164   callback_arg = NULL;
165 }
166 
167 
168 void
catalog_reader_parse(abstract_catalog_reader_ty * pop,FILE * fp,const char * real_filename,const char * logical_filename,catalog_input_format_ty input_syntax)169 catalog_reader_parse (abstract_catalog_reader_ty *pop, FILE *fp,
170 		      const char *real_filename, const char *logical_filename,
171 		      catalog_input_format_ty input_syntax)
172 {
173   /* Parse the stream's content.  */
174   parse_start (pop);
175   input_syntax->parse (pop, fp, real_filename, logical_filename);
176   parse_end (pop);
177 
178   if (error_message_count > 0)
179     po_xerror (PO_SEVERITY_FATAL_ERROR, NULL,
180 	       /*real_filename*/ NULL, (size_t)(-1), (size_t)(-1), false,
181 	       xasprintf (ngettext ("found %d fatal error",
182 				    "found %d fatal errors",
183 				    error_message_count),
184 			  error_message_count));
185   error_message_count = 0;
186 }
187 
188 
189 /* ========================================================================= */
190 /* Callbacks used by po-gram.y or po-lex.c, indirectly from
191    catalog_reader_parse.  */
192 
193 
194 /* This function is called by po_gram_lex() whenever a domain directive
195    has been seen.  */
196 void
po_callback_domain(char * name)197 po_callback_domain (char *name)
198 {
199   /* assert(callback_arg); */
200   call_directive_domain (callback_arg, name);
201 }
202 
203 
204 /* This function is called by po_gram_lex() whenever a message has been
205    seen.  */
206 void
po_callback_message(char * msgctxt,char * msgid,lex_pos_ty * msgid_pos,char * msgid_plural,char * msgstr,size_t msgstr_len,lex_pos_ty * msgstr_pos,char * prev_msgctxt,char * prev_msgid,char * prev_msgid_plural,bool force_fuzzy,bool obsolete)207 po_callback_message (char *msgctxt,
208 		     char *msgid, lex_pos_ty *msgid_pos, char *msgid_plural,
209 		     char *msgstr, size_t msgstr_len, lex_pos_ty *msgstr_pos,
210 		     char *prev_msgctxt,
211 		     char *prev_msgid,
212 		     char *prev_msgid_plural,
213 		     bool force_fuzzy, bool obsolete)
214 {
215   /* assert(callback_arg); */
216   call_directive_message (callback_arg, msgctxt,
217 			  msgid, msgid_pos, msgid_plural,
218 			  msgstr, msgstr_len, msgstr_pos,
219 			  prev_msgctxt, prev_msgid, prev_msgid_plural,
220 			  force_fuzzy, obsolete);
221 }
222 
223 
224 void
po_callback_comment(const char * s)225 po_callback_comment (const char *s)
226 {
227   /* assert(callback_arg); */
228   call_comment (callback_arg, s);
229 }
230 
231 
232 void
po_callback_comment_dot(const char * s)233 po_callback_comment_dot (const char *s)
234 {
235   /* assert(callback_arg); */
236   call_comment_dot (callback_arg, s);
237 }
238 
239 
240 /* This function is called by po_parse_comment_filepos(), once for each
241    filename.  */
242 void
po_callback_comment_filepos(const char * name,size_t line)243 po_callback_comment_filepos (const char *name, size_t line)
244 {
245   /* assert(callback_arg); */
246   call_comment_filepos (callback_arg, name, line);
247 }
248 
249 
250 void
po_callback_comment_special(const char * s)251 po_callback_comment_special (const char *s)
252 {
253   /* assert(callback_arg); */
254   call_comment_special (callback_arg, s);
255 }
256 
257 
258 /* Parse a special comment and put the result in *fuzzyp, formatp, *wrapp.  */
259 void
po_parse_comment_special(const char * s,bool * fuzzyp,enum is_format formatp[NFORMATS],enum is_wrap * wrapp)260 po_parse_comment_special (const char *s,
261 			  bool *fuzzyp, enum is_format formatp[NFORMATS],
262 			  enum is_wrap *wrapp)
263 {
264   size_t i;
265 
266   *fuzzyp = false;
267   for (i = 0; i < NFORMATS; i++)
268     formatp[i] = undecided;
269   *wrapp = undecided;
270 
271   while (*s != '\0')
272     {
273       const char *t;
274 
275       /* Skip whitespace.  */
276       while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
277 	s++;
278 
279       /* Collect a token.  */
280       t = s;
281       while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
282 	s++;
283       if (s != t)
284 	{
285 	  size_t len = s - t;
286 
287 	  /* Accept fuzzy flag.  */
288 	  if (len == 5 && memcmp (t, "fuzzy", 5) == 0)
289 	    {
290 	      *fuzzyp = true;
291 	      continue;
292 	    }
293 
294 	  /* Accept format description.  */
295 	  if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0)
296 	    {
297 	      const char *p;
298 	      size_t n;
299 	      enum is_format value;
300 
301 	      p = t;
302 	      n = len - 7;
303 
304 	      if (n >= 3 && memcmp (p, "no-", 3) == 0)
305 		{
306 		  p += 3;
307 		  n -= 3;
308 		  value = no;
309 		}
310 	      else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
311 		{
312 		  p += 9;
313 		  n -= 9;
314 		  value = possible;
315 		}
316 	      else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
317 		{
318 		  p += 11;
319 		  n -= 11;
320 		  value = impossible;
321 		}
322 	      else
323 		value = yes;
324 
325 	      for (i = 0; i < NFORMATS; i++)
326 		if (strlen (format_language[i]) == n
327 		    && memcmp (format_language[i], p, n) == 0)
328 		  {
329 		    formatp[i] = value;
330 		    break;
331 		  }
332 	      if (i < NFORMATS)
333 		continue;
334 	    }
335 
336 	  /* Accept wrap description.  */
337 	  if (len == 4 && memcmp (t, "wrap", 4) == 0)
338 	    {
339 	      *wrapp = yes;
340 	      continue;
341 	    }
342 	  if (len == 7 && memcmp (t, "no-wrap", 7) == 0)
343 	    {
344 	      *wrapp = no;
345 	      continue;
346 	    }
347 
348 	  /* Unknown special comment marker.  It may have been generated
349 	     from a future xgettext version.  Ignore it.  */
350 	}
351     }
352 }
353 
354 
355 /* Parse a GNU style file comment.
356    Syntax: an arbitrary number of
357              STRING COLON NUMBER
358            or
359              STRING
360    The latter style, without line number, occurs in PO files converted e.g.
361    from Pascal .rst files or from OpenOffice resource files.
362    Call po_callback_comment_filepos for each of them.  */
363 static void
po_parse_comment_filepos(const char * s)364 po_parse_comment_filepos (const char *s)
365 {
366   while (*s != '\0')
367     {
368       while (*s == ' ' || *s == '\t' || *s == '\n')
369 	s++;
370       if (*s != '\0')
371 	{
372 	  const char *string_start = s;
373 
374 	  do
375 	    s++;
376 	  while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
377 
378 	  /* See if there is a COLON and NUMBER after the STRING, separated
379 	     through optional spaces.  */
380 	  {
381 	    const char *p = s;
382 
383 	    while (*p == ' ' || *p == '\t' || *p == '\n')
384 	      p++;
385 
386 	    if (*p == ':')
387 	      {
388 		p++;
389 
390 		while (*p == ' ' || *p == '\t' || *p == '\n')
391 		  p++;
392 
393 		if (*p >= '0' && *p <= '9')
394 		  {
395 		    /* Accumulate a number.  */
396 		    size_t n = 0;
397 
398 		    do
399 		      {
400 			n = n * 10 + (*p - '0');
401 			p++;
402 		      }
403 		    while (*p >= '0' && *p <= '9');
404 
405 		    if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
406 		      {
407 			/* Parsed a GNU style file comment with spaces.  */
408 			const char *string_end = s;
409 			size_t string_length = string_end - string_start;
410 			char *string = (char *) xmalloc (string_length + 1);
411 
412 			memcpy (string, string_start, string_length);
413 			string[string_length] = '\0';
414 
415 			po_callback_comment_filepos (string, n);
416 
417 			free (string);
418 
419 			s = p;
420 			continue;
421 		      }
422 		  }
423 	      }
424 	  }
425 
426 	  /* See if there is a COLON at the end of STRING and a NUMBER after
427 	     it, separated through optional spaces.  */
428 	  if (s[-1] == ':')
429 	    {
430 	      const char *p = s;
431 
432 	      while (*p == ' ' || *p == '\t' || *p == '\n')
433 		p++;
434 
435 	      if (*p >= '0' && *p <= '9')
436 		{
437 		  /* Accumulate a number.  */
438 		  size_t n = 0;
439 
440 		  do
441 		    {
442 		      n = n * 10 + (*p - '0');
443 		      p++;
444 		    }
445 		  while (*p >= '0' && *p <= '9');
446 
447 		  if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
448 		    {
449 		      /* Parsed a GNU style file comment with spaces.  */
450 		      const char *string_end = s - 1;
451 		      size_t string_length = string_end - string_start;
452 		      char *string = (char *) xmalloc (string_length + 1);
453 
454 		      memcpy (string, string_start, string_length);
455 		      string[string_length] = '\0';
456 
457 		      po_callback_comment_filepos (string, n);
458 
459 		      free (string);
460 
461 		      s = p;
462 		      continue;
463 		    }
464 		}
465 	    }
466 
467 	  /* See if there is a COLON and NUMBER at the end of the STRING,
468 	     without separating spaces.  */
469 	  {
470 	    const char *p = s;
471 
472 	    while (p > string_start)
473 	      {
474 		p--;
475 		if (!(*p >= '0' && *p <= '9'))
476 		  {
477 		    p++;
478 		    break;
479 		  }
480 	      }
481 
482 	    /* p now points to the beginning of the trailing digits segment
483 	       at the end of STRING.  */
484 
485 	    if (p < s
486 		&& p > string_start + 1
487 		&& p[-1] == ':')
488 	      {
489 		/* Parsed a GNU style file comment without spaces.  */
490 		const char *string_end = p - 1;
491 
492 		/* Accumulate a number.  */
493 		{
494 		  size_t n = 0;
495 
496 		  do
497 		    {
498 		      n = n * 10 + (*p - '0');
499 		      p++;
500 		    }
501 		  while (p < s);
502 
503 		  {
504 		    size_t string_length = string_end - string_start;
505 		    char *string = (char *) xmalloc (string_length + 1);
506 
507 		    memcpy (string, string_start, string_length);
508 		    string[string_length] = '\0';
509 
510 		    po_callback_comment_filepos (string, n);
511 
512 		    free (string);
513 
514 		    continue;
515 		  }
516 		}
517 	      }
518 	  }
519 
520 	  /* Parsed a file comment without line number.  */
521 	  {
522 	    const char *string_end = s;
523 	    size_t string_length = string_end - string_start;
524 	    char *string = (char *) xmalloc (string_length + 1);
525 
526 	    memcpy (string, string_start, string_length);
527 	    string[string_length] = '\0';
528 
529 	    po_callback_comment_filepos (string, (size_t)(-1));
530 
531 	    free (string);
532 	  }
533 	}
534     }
535 }
536 
537 
538 /* Parse a SunOS or Solaris style file comment.
539    Syntax of SunOS style:
540      FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD COLON NUMBER
541    Syntax of Solaris style:
542      FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD NUMBER_KEYWORD COLON NUMBER
543    where
544      FILE_KEYWORD ::= "file" | "File"
545      COLON ::= ":"
546      COMMA ::= ","
547      LINE_KEYWORD ::= "line"
548      NUMBER_KEYWORD ::= "number"
549      NUMBER ::= [0-9]+
550    Return true if parsed, false if not a comment of this form. */
551 static bool
po_parse_comment_solaris_filepos(const char * s)552 po_parse_comment_solaris_filepos (const char *s)
553 {
554   if (s[0] == ' '
555       && (s[1] == 'F' || s[1] == 'f')
556       && s[2] == 'i' && s[3] == 'l' && s[4] == 'e'
557       && s[5] == ':')
558     {
559       const char *string_start;
560       const char *string_end;
561 
562       {
563 	const char *p = s + 6;
564 
565 	while (*p == ' ' || *p == '\t')
566 	  p++;
567 	string_start = p;
568       }
569 
570       for (string_end = string_start; *string_end != '\0'; string_end++)
571 	{
572 	  const char *p = string_end;
573 
574 	  while (*p == ' ' || *p == '\t')
575 	    p++;
576 
577 	  if (*p == ',')
578 	    {
579 	      p++;
580 
581 	      while (*p == ' ' || *p == '\t')
582 		p++;
583 
584 	      if (p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e')
585 		{
586 		  p += 4;
587 
588 		  while (*p == ' ' || *p == '\t')
589 		    p++;
590 
591 		  if (p[0] == 'n' && p[1] == 'u' && p[2] == 'm'
592 		      && p[3] == 'b' && p[4] == 'e' && p[5] == 'r')
593 		    {
594 		      p += 6;
595 		      while (*p == ' ' || *p == '\t')
596 			p++;
597 		    }
598 
599 		  if (*p == ':')
600 		    {
601 		      p++;
602 
603 		      if (*p >= '0' && *p <= '9')
604 			{
605 			  /* Accumulate a number.  */
606 			  size_t n = 0;
607 
608 			  do
609 			    {
610 			      n = n * 10 + (*p - '0');
611 			      p++;
612 			    }
613 			  while (*p >= '0' && *p <= '9');
614 
615 			  while (*p == ' ' || *p == '\t' || *p == '\n')
616 			    p++;
617 
618 			  if (*p == '\0')
619 			    {
620 			      /* Parsed a Sun style file comment.  */
621 			      size_t string_length = string_end - string_start;
622 			      char *string =
623 				(char *) xmalloc (string_length + 1);
624 
625 			      memcpy (string, string_start, string_length);
626 			      string[string_length] = '\0';
627 
628 			      po_callback_comment_filepos (string, n);
629 
630 			      free (string);
631 			      return true;
632 			    }
633 			}
634 		    }
635 		}
636 	    }
637 	}
638     }
639 
640   return false;
641 }
642 
643 
644 /* This function is called by po_gram_lex() whenever a comment is
645    seen.  It analyzes the comment to see what sort it is, and then
646    dispatches it to the appropriate method: call_comment, call_comment_dot,
647    call_comment_filepos (via po_parse_comment_filepos), or
648    call_comment_special.  */
649 void
po_callback_comment_dispatcher(const char * s)650 po_callback_comment_dispatcher (const char *s)
651 {
652   if (*s == '.')
653     po_callback_comment_dot (s + 1);
654   else if (*s == ':')
655     {
656       /* Parse the file location string.  The appropriate callback will be
657 	 invoked.  */
658       po_parse_comment_filepos (s + 1);
659     }
660   else if (*s == ',' || *s == '!')
661     {
662       /* Get all entries in the special comment line.  */
663       po_callback_comment_special (s + 1);
664     }
665   else
666     {
667       /* It looks like a plain vanilla comment, but Solaris-style file
668 	 position lines do, too.  Try to parse the lot.  If the parse
669 	 succeeds, the appropriate callback will be invoked.  */
670       if (po_parse_comment_solaris_filepos (s))
671 	/* Do nothing, it is a Sun-style file pos line.  */ ;
672       else
673 	po_callback_comment (s);
674     }
675 }
676