xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/read-stringtable.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Reading NeXTstep/GNUstep .strings files.
2    Copyright (C) 2003, 2005-2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2003.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 /* Specification.  */
24 #include "read-stringtable.h"
25 
26 #include <assert.h>
27 #include <errno.h>
28 #include <stdbool.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "error.h"
34 #include "error-progname.h"
35 #include "read-catalog-abstract.h"
36 #include "xalloc.h"
37 #include "xvasprintf.h"
38 #include "po-xerror.h"
39 #include "utf8-ucs4.h"
40 #include "ucs4-utf8.h"
41 #include "gettext.h"
42 
43 #define _(str) gettext (str)
44 
45 /* The format of NeXTstep/GNUstep .strings files is documented in
46      gnustep-base-1.8.0/Tools/make_strings/Using.txt
47    and in the comments of method propertyListFromStringsFileFormat in
48      gnustep-base-1.8.0/Source/NSString.m
49    In summary, it's a Objective-C like file with pseudo-assignments of the form
50           "key" = "value";
51    where the key is the msgid and the value is the msgstr.
52 
53    The implementation of the parser of .strings files is in
54      gnustep-base-1.8.0/Source/NSString.m
55      function GSPropertyListFromStringsFormat
56      (indirectly called from NSBundle's method localizedStringForKey).
57 
58    A test case is in
59      gnustep-base-1.8.0/Testing/English.lproj/NXStringTable.example
60  */
61 
62 /* Handling of comments: We copy all comments from the .strings file to
63    the PO file. This is not really needed; it's a service for translators
64    who don't like PO files and prefer to maintain the .strings file.  */
65 
66 
67 /* Real filename, used in error messages about the input file.  */
68 static const char *real_file_name;
69 
70 /* File name and line number.  */
71 extern lex_pos_ty gram_pos;
72 
73 /* The input file stream.  */
74 static FILE *fp;
75 
76 
77 /* Phase 1: Read a byte.
78    Max. 4 pushback characters.  */
79 
80 static unsigned char phase1_pushback[4];
81 static int phase1_pushback_length;
82 
83 static int
phase1_getc()84 phase1_getc ()
85 {
86   int c;
87 
88   if (phase1_pushback_length)
89     return phase1_pushback[--phase1_pushback_length];
90 
91   c = getc (fp);
92 
93   if (c == EOF)
94     {
95       if (ferror (fp))
96 	{
97 	  const char *errno_description = strerror (errno);
98 	  po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
99 		     xasprintf ("%s: %s",
100 				xasprintf (_("error while reading \"%s\""),
101 					   real_file_name),
102 				errno_description));
103 	}
104       return EOF;
105     }
106 
107   return c;
108 }
109 
110 static void
phase1_ungetc(int c)111 phase1_ungetc (int c)
112 {
113   if (c != EOF)
114     phase1_pushback[phase1_pushback_length++] = c;
115 }
116 
117 
118 /* Phase 2: Read an UCS-4 character.
119    Max. 2 pushback characters.  */
120 
121 /* End-of-file indicator for functions returning an UCS-4 character.  */
122 #define UEOF -1
123 
124 static int phase2_pushback[4];
125 static int phase2_pushback_length;
126 
127 /* The input file can be in Unicode encoding (UCS-2BE, UCS-2LE, UTF-8, each
128    with a BOM!), or otherwise the locale-dependent default encoding is used.
129    Since we don't want to depend on the locale here, we use ISO-8859-1
130    instead.  */
131 enum enc
132 {
133   enc_undetermined,
134   enc_ucs2be,
135   enc_ucs2le,
136   enc_utf8,
137   enc_iso8859_1
138 };
139 static enum enc encoding;
140 
141 static int
phase2_getc()142 phase2_getc ()
143 {
144   if (phase2_pushback_length)
145     return phase2_pushback[--phase2_pushback_length];
146 
147   if (encoding == enc_undetermined)
148     {
149       /* Determine the input file's encoding.  */
150       int c0, c1;
151 
152       c0 = phase1_getc ();
153       if (c0 == EOF)
154 	return UEOF;
155       c1 = phase1_getc ();
156       if (c1 == EOF)
157 	{
158 	  phase1_ungetc (c0);
159 	  encoding = enc_iso8859_1;
160 	}
161       else if (c0 == 0xfe && c1 == 0xff)
162 	encoding = enc_ucs2be;
163       else if (c0 == 0xff && c1 == 0xfe)
164 	encoding = enc_ucs2le;
165       else
166 	{
167 	  int c2;
168 
169 	  c2 = phase1_getc ();
170 	  if (c2 == EOF)
171 	    {
172 	      phase1_ungetc (c1);
173 	      phase1_ungetc (c0);
174 	      encoding = enc_iso8859_1;
175 	    }
176 	  else if (c0 == 0xef && c1 == 0xbb && c2 == 0xbf)
177 	    encoding = enc_utf8;
178 	  else
179 	    {
180 	      phase1_ungetc (c2);
181 	      phase1_ungetc (c1);
182 	      phase1_ungetc (c0);
183 	      encoding = enc_iso8859_1;
184 	    }
185 	}
186     }
187 
188   switch (encoding)
189     {
190     case enc_ucs2be:
191       /* Read an UCS-2BE encoded character.  */
192       {
193 	int c0, c1;
194 
195 	c0 = phase1_getc ();
196 	if (c0 == EOF)
197 	  return UEOF;
198 	c1 = phase1_getc ();
199 	if (c1 == EOF)
200 	  return UEOF;
201 	return (c0 << 8) + c1;
202       }
203 
204     case enc_ucs2le:
205       /* Read an UCS-2LE encoded character.  */
206       {
207 	int c0, c1;
208 
209 	c0 = phase1_getc ();
210 	if (c0 == EOF)
211 	  return UEOF;
212 	c1 = phase1_getc ();
213 	if (c1 == EOF)
214 	  return UEOF;
215 	return c0 + (c1 << 8);
216       }
217 
218     case enc_utf8:
219       /* Read an UTF-8 encoded character.  */
220       {
221 	unsigned char buf[6];
222 	unsigned int count;
223 	int c;
224 	unsigned int uc;
225 
226 	c = phase1_getc ();
227 	if (c == EOF)
228 	  return UEOF;
229 	buf[0] = c;
230 	count = 1;
231 
232 	if (buf[0] >= 0xc0)
233 	  {
234 	    c = phase1_getc ();
235 	    if (c == EOF)
236 	      return UEOF;
237 	    buf[1] = c;
238 	    count = 2;
239 
240 	    if (buf[0] >= 0xe0
241 		&& ((buf[1] ^ 0x80) < 0x40))
242 	      {
243 		c = phase1_getc ();
244 		if (c == EOF)
245 		  return UEOF;
246 		buf[2] = c;
247 		count = 3;
248 
249 		if (buf[0] >= 0xf0
250 		    && ((buf[2] ^ 0x80) < 0x40))
251 		  {
252 		    c = phase1_getc ();
253 		    if (c == EOF)
254 		      return UEOF;
255 		    buf[3] = c;
256 		    count = 4;
257 
258 		    if (buf[0] >= 0xf8
259 			&& ((buf[3] ^ 0x80) < 0x40))
260 		      {
261 			c = phase1_getc ();
262 			if (c == EOF)
263 			  return UEOF;
264 			buf[4] = c;
265 			count = 5;
266 
267 			if (buf[0] >= 0xfc
268 			    && ((buf[4] ^ 0x80) < 0x40))
269 			  {
270 			    c = phase1_getc ();
271 			    if (c == EOF)
272 			      return UEOF;
273 			    buf[5] = c;
274 			    count = 6;
275 			  }
276 		      }
277 		  }
278 	      }
279 	  }
280 
281 	u8_mbtouc (&uc, buf, count);
282 	return uc;
283       }
284 
285     case enc_iso8859_1:
286       /* Read an ISO-8859-1 encoded character.  */
287       {
288 	int c = phase1_getc ();
289 
290 	if (c == EOF)
291 	  return UEOF;
292 	return c;
293       }
294 
295     default:
296       abort ();
297     }
298 }
299 
300 static void
phase2_ungetc(int c)301 phase2_ungetc (int c)
302 {
303   if (c != UEOF)
304     phase2_pushback[phase2_pushback_length++] = c;
305 }
306 
307 
308 /* Phase 3: Read an UCS-4 character, with line number handling.  */
309 
310 static int
phase3_getc()311 phase3_getc ()
312 {
313   int c = phase2_getc ();
314 
315   if (c == '\n')
316     gram_pos.line_number++;
317 
318   return c;
319 }
320 
321 static void
phase3_ungetc(int c)322 phase3_ungetc (int c)
323 {
324   if (c == '\n')
325     --gram_pos.line_number;
326   phase2_ungetc (c);
327 }
328 
329 
330 /* Convert from UCS-4 to UTF-8.  */
331 static char *
conv_from_ucs4(const int * buffer,size_t buflen)332 conv_from_ucs4 (const int *buffer, size_t buflen)
333 {
334   unsigned char *utf8_string;
335   size_t pos;
336   unsigned char *q;
337 
338   /* Each UCS-4 word needs 6 bytes at worst.  */
339   utf8_string = (unsigned char *) xmalloc (6 * buflen + 1);
340 
341   for (pos = 0, q = utf8_string; pos < buflen; )
342     {
343       unsigned int uc;
344       int n;
345 
346       uc = buffer[pos++];
347       n = u8_uctomb (q, uc, 6);
348       assert (n > 0);
349       q += n;
350     }
351   *q = '\0';
352   assert (q - utf8_string <= 6 * buflen);
353 
354   return (char *) utf8_string;
355 }
356 
357 
358 /* Parse a string enclosed in double-quotes.  Input is UCS-4 encoded.
359    Return the string in UTF-8 encoding, or NULL if the input doesn't represent
360    a valid string enclosed in double-quotes.  */
361 static char *
parse_escaped_string(const int * string,size_t length)362 parse_escaped_string (const int *string, size_t length)
363 {
364   static int *buffer;
365   static size_t bufmax;
366   static size_t buflen;
367   const int *string_limit = string + length;
368   int c;
369 
370   if (string == string_limit)
371     return NULL;
372   c = *string++;
373   if (c != '"')
374     return NULL;
375   buflen = 0;
376   for (;;)
377     {
378       if (string == string_limit)
379 	return NULL;
380       c = *string++;
381       if (c == '"')
382 	break;
383       if (c == '\\')
384 	{
385 	  if (string == string_limit)
386 	    return NULL;
387 	  c = *string++;
388 	  if (c >= '0' && c <= '7')
389 	    {
390 	      unsigned int n = 0;
391 	      int j = 0;
392 	      for (;;)
393 		{
394 		  n = n * 8 + (c - '0');
395 		  if (++j == 3)
396 		    break;
397 		  if (string == string_limit)
398 		    break;
399 		  c = *string;
400 		  if (!(c >= '0' && c <= '7'))
401 		    break;
402 		  string++;
403 		}
404 	      c = n;
405 	    }
406 	  else if (c == 'u' || c == 'U')
407 	    {
408 	      unsigned int n = 0;
409 	      int j;
410 	      for (j = 0; j < 4; j++)
411 		{
412 		  if (string == string_limit)
413 		    break;
414 		  c = *string;
415 		  if (c >= '0' && c <= '9')
416 		    n = n * 16 + (c - '0');
417 		  else if (c >= 'A' && c <= 'F')
418 		    n = n * 16 + (c - 'A' + 10);
419 		  else if (c >= 'a' && c <= 'f')
420 		    n = n * 16 + (c - 'a' + 10);
421 		  else
422 		    break;
423 		  string++;
424 		}
425 	      c = n;
426 	    }
427 	  else
428 	    switch (c)
429 	      {
430 	      case 'a': c = '\a'; break;
431 	      case 'b': c = '\b'; break;
432 	      case 't': c = '\t'; break;
433 	      case 'r': c = '\r'; break;
434 	      case 'n': c = '\n'; break;
435 	      case 'v': c = '\v'; break;
436 	      case 'f': c = '\f'; break;
437 	      }
438 	}
439       if (buflen >= bufmax)
440 	{
441 	  bufmax = 2 * bufmax + 10;
442 	  buffer = xrealloc (buffer, bufmax * sizeof (int));
443 	}
444       buffer[buflen++] = c;
445     }
446 
447   return conv_from_ucs4 (buffer, buflen);
448 }
449 
450 
451 /* Accumulating flag comments.  */
452 
453 static char *special_comment;
454 
455 static inline void
special_comment_reset()456 special_comment_reset ()
457 {
458   if (special_comment != NULL)
459     free (special_comment);
460   special_comment = NULL;
461 }
462 
463 static void
special_comment_add(const char * flag)464 special_comment_add (const char *flag)
465 {
466   if (special_comment == NULL)
467     special_comment = xstrdup (flag);
468   else
469     {
470       size_t total_len = strlen (special_comment) + 2 + strlen (flag) + 1;
471       special_comment = xrealloc (special_comment, total_len);
472       strcat (special_comment, ", ");
473       strcat (special_comment, flag);
474     }
475 }
476 
477 static inline void
special_comment_finish()478 special_comment_finish ()
479 {
480   if (special_comment != NULL)
481     {
482       po_callback_comment_special (special_comment);
483       free (special_comment);
484       special_comment = NULL;
485     }
486 }
487 
488 
489 /* Accumulating comments.  */
490 
491 static int *buffer;
492 static size_t bufmax;
493 static size_t buflen;
494 static bool next_is_obsolete;
495 static bool next_is_fuzzy;
496 static char *fuzzy_msgstr;
497 static bool expect_fuzzy_msgstr_as_c_comment;
498 static bool expect_fuzzy_msgstr_as_cxx_comment;
499 
500 static inline void
comment_start()501 comment_start ()
502 {
503   buflen = 0;
504 }
505 
506 static inline void
comment_add(int c)507 comment_add (int c)
508 {
509   if (buflen >= bufmax)
510     {
511       bufmax = 2 * bufmax + 10;
512       buffer = xrealloc (buffer, bufmax * sizeof (int));
513     }
514   buffer[buflen++] = c;
515 }
516 
517 static inline void
comment_line_end(size_t chars_to_remove,bool test_for_fuzzy_msgstr)518 comment_line_end (size_t chars_to_remove, bool test_for_fuzzy_msgstr)
519 {
520   char *line;
521 
522   buflen -= chars_to_remove;
523   /* Drop trailing white space, but not EOLs.  */
524   while (buflen >= 1
525 	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
526     --buflen;
527 
528   /* At special positions we interpret a comment of the form
529        = "escaped string"
530      with an optional trailing semicolon as being the fuzzy msgstr, not a
531      regular comment.  */
532   if (test_for_fuzzy_msgstr
533       && buflen > 2 && buffer[0] == '=' && buffer[1] == ' '
534       && (fuzzy_msgstr =
535 	  parse_escaped_string (buffer + 2,
536 				buflen - (buffer[buflen - 1] == ';') - 2)))
537     return;
538 
539   line = conv_from_ucs4 (buffer, buflen);
540 
541   if (strcmp (line, "Flag: untranslated") == 0)
542     {
543       special_comment_add ("fuzzy");
544       next_is_fuzzy = true;
545     }
546   else if (strcmp (line, "Flag: unmatched") == 0)
547     next_is_obsolete = true;
548   else if (strlen (line) >= 6 && memcmp (line, "Flag: ", 6) == 0)
549     special_comment_add (line + 6);
550   else if (strlen (line) >= 9 && memcmp (line, "Comment: ", 9) == 0)
551     /* A comment extracted from the source.  */
552     po_callback_comment_dot (line + 9);
553   else
554     {
555       char *last_colon;
556       unsigned long number;
557       char *endp;
558 
559       if (strlen (line) >= 6 && memcmp (line, "File: ", 6) == 0
560 	  && (last_colon = strrchr (line + 6, ':')) != NULL
561 	  && *(last_colon + 1) != '\0'
562 	  && (number = strtoul (last_colon + 1, &endp, 10), *endp == '\0'))
563 	{
564 	  /* A "File: <filename>:<number>" type comment.  */
565 	  *last_colon = '\0';
566 	  po_callback_comment_filepos (line + 6, number);
567 	}
568       else
569 	po_callback_comment (line);
570     }
571 }
572 
573 
574 /* Phase 4: Replace each comment that is not inside a string with a space
575    character.  */
576 
577 static int
phase4_getc()578 phase4_getc ()
579 {
580   int c;
581 
582   c = phase3_getc ();
583   if (c != '/')
584     return c;
585   c = phase3_getc ();
586   switch (c)
587     {
588     default:
589       phase3_ungetc (c);
590       return '/';
591 
592     case '*':
593       /* C style comment.  */
594       {
595 	bool last_was_star;
596 	size_t trailing_stars;
597 	bool seen_newline;
598 
599 	comment_start ();
600 	last_was_star = false;
601 	trailing_stars = 0;
602 	seen_newline = false;
603 	/* Drop additional stars at the beginning of the comment.  */
604 	for (;;)
605 	  {
606 	    c = phase3_getc ();
607 	    if (c != '*')
608 	      break;
609 	    last_was_star = true;
610 	  }
611 	phase3_ungetc (c);
612 	for (;;)
613 	  {
614 	    c = phase3_getc ();
615 	    if (c == UEOF)
616 	      break;
617 	    /* We skip all leading white space, but not EOLs.  */
618 	    if (!(buflen == 0 && (c == ' ' || c == '\t')))
619 	      comment_add (c);
620 	    switch (c)
621 	      {
622 	      case '\n':
623 		seen_newline = true;
624 		comment_line_end (1, false);
625 		comment_start ();
626 		last_was_star = false;
627 		trailing_stars = 0;
628 		continue;
629 
630 	      case '*':
631 		last_was_star = true;
632 		trailing_stars++;
633 		continue;
634 
635 	      case '/':
636 		if (last_was_star)
637 		  {
638 		    /* Drop additional stars at the end of the comment.  */
639 		    comment_line_end (trailing_stars + 1,
640 				      expect_fuzzy_msgstr_as_c_comment
641 				      && !seen_newline);
642 		    break;
643 		  }
644 		/* FALLTHROUGH */
645 
646 	      default:
647 		last_was_star = false;
648 		trailing_stars = 0;
649 		continue;
650 	      }
651 	    break;
652 	  }
653 	return ' ';
654       }
655 
656     case '/':
657       /* C++ style comment.  */
658       comment_start ();
659       for (;;)
660 	{
661 	  c = phase3_getc ();
662 	  if (c == '\n' || c == UEOF)
663 	    break;
664 	  /* We skip all leading white space, but not EOLs.  */
665 	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
666 	    comment_add (c);
667 	}
668       comment_line_end (0, expect_fuzzy_msgstr_as_cxx_comment);
669       return '\n';
670     }
671 }
672 
673 static inline void
phase4_ungetc(int c)674 phase4_ungetc (int c)
675 {
676   phase3_ungetc (c);
677 }
678 
679 
680 /* Return true if a character is considered as whitespace.  */
681 static bool
is_whitespace(int c)682 is_whitespace (int c)
683 {
684   return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'
685 	  || c == '\b');
686 }
687 
688 /* Return true if a character needs quoting, i.e. cannot be used in unquoted
689    tokens.  */
690 static bool
is_quotable(int c)691 is_quotable (int c)
692 {
693   if ((c >= '0' && c <= '9')
694       || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
695     return false;
696   switch (c)
697     {
698     case '!': case '#': case '$': case '%': case '&': case '*':
699     case '+': case '-': case '.': case '/': case ':': case '?':
700     case '@': case '|': case '~': case '_': case '^':
701       return false;
702     default:
703       return true;
704     }
705 }
706 
707 
708 /* Read a key or value string.
709    Return the string in UTF-8 encoding, or NULL if no string is seen.
710    Return the start position of the string in *pos.  */
711 static char *
read_string(lex_pos_ty * pos)712 read_string (lex_pos_ty *pos)
713 {
714   static int *buffer;
715   static size_t bufmax;
716   static size_t buflen;
717   int c;
718 
719   /* Skip whitespace before the string.  */
720   do
721     c = phase4_getc ();
722   while (is_whitespace (c));
723 
724   if (c == UEOF)
725     /* No more string.  */
726     return NULL;
727 
728   *pos = gram_pos;
729   buflen = 0;
730   if (c == '"')
731     {
732       /* Read a string enclosed in double-quotes.  */
733       for (;;)
734 	{
735 	  c = phase3_getc ();
736 	  if (c == UEOF || c == '"')
737 	    break;
738 	  if (c == '\\')
739 	    {
740 	      c = phase3_getc ();
741 	      if (c == UEOF)
742 		break;
743 	      if (c >= '0' && c <= '7')
744 		{
745 		  unsigned int n = 0;
746 		  int j = 0;
747 		  for (;;)
748 		    {
749 		      n = n * 8 + (c - '0');
750 		      if (++j == 3)
751 			break;
752 		      c = phase3_getc ();
753 		      if (!(c >= '0' && c <= '7'))
754 			{
755 			  phase3_ungetc (c);
756 			  break;
757 			}
758 		    }
759 		  c = n;
760 		}
761 	      else if (c == 'u' || c == 'U')
762 		{
763 		  unsigned int n = 0;
764 		  int j;
765 		  for (j = 0; j < 4; j++)
766 		    {
767 		      c = phase3_getc ();
768 		      if (c >= '0' && c <= '9')
769 			n = n * 16 + (c - '0');
770 		      else if (c >= 'A' && c <= 'F')
771 			n = n * 16 + (c - 'A' + 10);
772 		      else if (c >= 'a' && c <= 'f')
773 			n = n * 16 + (c - 'a' + 10);
774 		      else
775 			{
776 			  phase3_ungetc (c);
777 			  break;
778 			}
779 		    }
780 		  c = n;
781 		}
782 	      else
783 		switch (c)
784 		  {
785 		  case 'a': c = '\a'; break;
786 		  case 'b': c = '\b'; break;
787 		  case 't': c = '\t'; break;
788 		  case 'r': c = '\r'; break;
789 		  case 'n': c = '\n'; break;
790 		  case 'v': c = '\v'; break;
791 		  case 'f': c = '\f'; break;
792 		  }
793 	    }
794 	  if (buflen >= bufmax)
795 	    {
796 	      bufmax = 2 * bufmax + 10;
797 	      buffer = xrealloc (buffer, bufmax * sizeof (int));
798 	    }
799 	  buffer[buflen++] = c;
800 	}
801       if (c == UEOF)
802 	po_xerror (PO_SEVERITY_ERROR, NULL,
803 		   real_file_name, gram_pos.line_number, (size_t)(-1), false,
804 		   _("warning: unterminated string"));
805     }
806   else
807     {
808       /* Read a token outside quotes.  */
809       if (is_quotable (c))
810 	po_xerror (PO_SEVERITY_ERROR, NULL,
811 		   real_file_name, gram_pos.line_number, (size_t)(-1), false,
812 		   _("warning: syntax error"));
813       for (; c != UEOF && !is_quotable (c); c = phase4_getc ())
814 	{
815 	  if (buflen >= bufmax)
816 	    {
817 	      bufmax = 2 * bufmax + 10;
818 	      buffer = xrealloc (buffer, bufmax * sizeof (int));
819 	    }
820 	  buffer[buflen++] = c;
821 	}
822     }
823 
824   return conv_from_ucs4 (buffer, buflen);
825 }
826 
827 
828 /* Read a .strings file from a stream, and dispatch to the various
829    abstract_catalog_reader_class_ty methods.  */
830 static void
stringtable_parse(abstract_catalog_reader_ty * pop,FILE * file,const char * real_filename,const char * logical_filename)831 stringtable_parse (abstract_catalog_reader_ty *pop, FILE *file,
832 		   const char *real_filename, const char *logical_filename)
833 {
834   fp = file;
835   real_file_name = real_filename;
836   gram_pos.file_name = xstrdup (real_file_name);
837   gram_pos.line_number = 1;
838   encoding = enc_undetermined;
839   expect_fuzzy_msgstr_as_c_comment = false;
840   expect_fuzzy_msgstr_as_cxx_comment = false;
841 
842   for (;;)
843     {
844       char *msgid;
845       lex_pos_ty msgid_pos;
846       char *msgstr;
847       lex_pos_ty msgstr_pos;
848       int c;
849 
850       /* Prepare for next msgid/msgstr pair.  */
851       special_comment_reset ();
852       next_is_obsolete = false;
853       next_is_fuzzy = false;
854       fuzzy_msgstr = NULL;
855 
856       /* Read the key and all the comments preceding it.  */
857       msgid = read_string (&msgid_pos);
858       if (msgid == NULL)
859 	break;
860 
861       special_comment_finish ();
862 
863       /* Skip whitespace.  */
864       do
865 	c = phase4_getc ();
866       while (is_whitespace (c));
867 
868       /* Expect a '=' or ';'.  */
869       if (c == UEOF)
870 	{
871 	  po_xerror (PO_SEVERITY_ERROR, NULL,
872 		     real_file_name, gram_pos.line_number, (size_t)(-1), false,
873 		     _("warning: unterminated key/value pair"));
874 	  break;
875 	}
876       if (c == ';')
877 	{
878 	  /* "key"; is an abbreviation for "key"=""; and does not
879 	     necessarily designate an untranslated entry.  */
880 	  msgstr = xstrdup ("");
881 	  msgstr_pos = msgid_pos;
882 	  po_callback_message (NULL, msgid, &msgid_pos, NULL,
883 			       msgstr, strlen (msgstr) + 1, &msgstr_pos,
884 			       NULL, NULL, NULL,
885 			       false, next_is_obsolete);
886 	}
887       else if (c == '=')
888 	{
889 	  /* Read the value.  */
890 	  msgstr = read_string (&msgstr_pos);
891 	  if (msgstr == NULL)
892 	    {
893 	      po_xerror (PO_SEVERITY_ERROR, NULL,
894 			 real_file_name, gram_pos.line_number, (size_t)(-1),
895 			 false, _("warning: unterminated key/value pair"));
896 	      break;
897 	    }
898 
899 	  /* Skip whitespace.  But for fuzzy key/value pairs, look for the
900 	     tentative msgstr in the form of a C style comment.  */
901 	  expect_fuzzy_msgstr_as_c_comment = next_is_fuzzy;
902 	  do
903 	    {
904 	      c = phase4_getc ();
905 	      if (fuzzy_msgstr != NULL)
906 		expect_fuzzy_msgstr_as_c_comment = false;
907 	    }
908 	  while (is_whitespace (c));
909 	  expect_fuzzy_msgstr_as_c_comment = false;
910 
911 	  /* Expect a ';'.  */
912 	  if (c == ';')
913 	    {
914 	      /* But for fuzzy key/value pairs, look for the tentative msgstr
915 		 in the form of a C++ style comment. */
916 	      if (fuzzy_msgstr == NULL && next_is_fuzzy)
917 		{
918 		  do
919 		    c = phase3_getc ();
920 		  while (c == ' ');
921 		  phase3_ungetc (c);
922 
923 		  expect_fuzzy_msgstr_as_cxx_comment = true;
924 		  c = phase4_getc ();
925 		  phase4_ungetc (c);
926 		  expect_fuzzy_msgstr_as_cxx_comment = false;
927 		}
928 	      if (fuzzy_msgstr != NULL && strcmp (msgstr, msgid) == 0)
929 		msgstr = fuzzy_msgstr;
930 
931 	      /* A key/value pair.  */
932 	      po_callback_message (NULL, msgid, &msgid_pos, NULL,
933 				   msgstr, strlen (msgstr) + 1, &msgstr_pos,
934 				   NULL, NULL, NULL,
935 				   false, next_is_obsolete);
936 	    }
937 	  else
938 	    {
939 	      po_xerror (PO_SEVERITY_ERROR, NULL,
940 			 real_file_name, gram_pos.line_number, (size_t)(-1),
941 			 false, _("\
942 warning: syntax error, expected ';' after string"));
943 	      break;
944 	    }
945 	}
946       else
947 	{
948 	  po_xerror (PO_SEVERITY_ERROR, NULL,
949 		     real_file_name, gram_pos.line_number, (size_t)(-1), false,
950 		     _("\
951 warning: syntax error, expected '=' or ';' after string"));
952 	  break;
953 	}
954     }
955 
956   fp = NULL;
957   real_file_name = NULL;
958   gram_pos.line_number = 0;
959 }
960 
961 const struct catalog_input_format input_format_stringtable =
962 {
963   stringtable_parse,			/* parse */
964   true					/* produces_utf8 */
965 };
966