xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/x-librep.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* xgettext librep backend.
2    Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc.
3 
4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "c-ctype.h"
31 #include "message.h"
32 #include "xgettext.h"
33 #include "x-librep.h"
34 #include "error.h"
35 #include "xalloc.h"
36 #include "exit.h"
37 #include "hash.h"
38 #include "gettext.h"
39 
40 #define _(s) gettext(s)
41 
42 
43 /* Summary of librep syntax:
44    - ';' starts a comment until end of line.
45    - Block comments start with '#|' and end with '|#'.
46    - Numbers are constituted of an optional prefix (#b, #B for binary,
47      #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
48      #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
49      the digits.
50    - Characters are written as '?' followed by the character, possibly
51      with an escape sequence, for examples '?a', '?\n', '?\177'.
52    - Strings are delimited by double quotes. Backslash introduces an escape
53      sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
54      '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
55    - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
56      if preceded by backslash or enclosed in |...|.
57    - Keywords: written as #:SYMBOL.
58    - () delimit lists.
59    - [] delimit vectors.
60    The reader is implemented in librep-0.14/src/lisp.c.  */
61 
62 
63 /* ====================== Keyword set customization.  ====================== */
64 
65 /* If true extract all strings.  */
66 static bool extract_all = false;
67 
68 static hash_table keywords;
69 static bool default_keywords = true;
70 
71 
72 void
x_librep_extract_all()73 x_librep_extract_all ()
74 {
75   extract_all = true;
76 }
77 
78 
79 void
x_librep_keyword(const char * name)80 x_librep_keyword (const char *name)
81 {
82   if (name == NULL)
83     default_keywords = false;
84   else
85     {
86       const char *end;
87       struct callshape shape;
88       const char *colon;
89 
90       if (keywords.table == NULL)
91 	hash_init (&keywords, 100);
92 
93       split_keywordspec (name, &end, &shape);
94 
95       /* The characters between name and end should form a valid Lisp
96 	 symbol.  */
97       colon = strchr (name, ':');
98       if (colon == NULL || colon >= end)
99 	insert_keyword_callshape (&keywords, name, end - name, &shape);
100     }
101 }
102 
103 /* Finish initializing the keywords hash table.
104    Called after argument processing, before each file is processed.  */
105 static void
init_keywords()106 init_keywords ()
107 {
108   if (default_keywords)
109     {
110       /* When adding new keywords here, also update the documentation in
111 	 xgettext.texi!  */
112       x_librep_keyword ("_");
113       default_keywords = false;
114     }
115 }
116 
117 void
init_flag_table_librep()118 init_flag_table_librep ()
119 {
120   xgettext_record_flag ("_:1:pass-librep-format");
121   xgettext_record_flag ("format:2:librep-format");
122 }
123 
124 
125 /* ======================== Reading of characters.  ======================== */
126 
127 /* Real filename, used in error messages about the input file.  */
128 static const char *real_file_name;
129 
130 /* Logical filename and line number, used to label the extracted messages.  */
131 static char *logical_file_name;
132 static int line_number;
133 
134 /* The input file stream.  */
135 static FILE *fp;
136 
137 
138 /* Fetch the next character from the input file.  */
139 static int
do_getc()140 do_getc ()
141 {
142   int c = getc (fp);
143 
144   if (c == EOF)
145     {
146       if (ferror (fp))
147 	error (EXIT_FAILURE, errno, _("\
148 error while reading \"%s\""), real_file_name);
149     }
150   else if (c == '\n')
151    line_number++;
152 
153   return c;
154 }
155 
156 /* Put back the last fetched character, not EOF.  */
157 static void
do_ungetc(int c)158 do_ungetc (int c)
159 {
160   if (c == '\n')
161     line_number--;
162   ungetc (c, fp);
163 }
164 
165 
166 /* ========================== Reading of tokens.  ========================== */
167 
168 
169 /* A token consists of a sequence of characters.  */
170 struct token
171 {
172   int allocated;		/* number of allocated 'token_char's */
173   int charcount;		/* number of used 'token_char's */
174   char *chars;			/* the token's constituents */
175 };
176 
177 /* Initialize a 'struct token'.  */
178 static inline void
init_token(struct token * tp)179 init_token (struct token *tp)
180 {
181   tp->allocated = 10;
182   tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
183   tp->charcount = 0;
184 }
185 
186 /* Free the memory pointed to by a 'struct token'.  */
187 static inline void
free_token(struct token * tp)188 free_token (struct token *tp)
189 {
190   free (tp->chars);
191 }
192 
193 /* Ensure there is enough room in the token for one more character.  */
194 static inline void
grow_token(struct token * tp)195 grow_token (struct token *tp)
196 {
197   if (tp->charcount == tp->allocated)
198     {
199       tp->allocated *= 2;
200       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
201     }
202 }
203 
204 /* Read the next token.  If 'first' is given, it points to the first
205    character, which has already been read.  Returns true for a symbol,
206    false for a number.  */
207 static bool
read_token(struct token * tp,const int * first)208 read_token (struct token *tp, const int *first)
209 {
210   int c;
211   /* Variables for speculative number parsing:  */
212   int radix = -1;
213   int nfirst = 0;
214   bool exact = true;
215   bool rational = false;
216   bool exponent = false;
217   bool had_sign = false;
218   bool expecting_prefix = false;
219 
220   init_token (tp);
221 
222   if (first)
223     c = *first;
224   else
225     c = do_getc ();
226 
227   for (;; c = do_getc ())
228     {
229       switch (c)
230 	{
231 	case EOF:
232 	  goto done;
233 
234 	case ' ': case '\t': case '\n': case '\f': case '\r':
235 	case '(': case ')': case '[': case ']':
236 	case '\'': case '"': case ';': case ',': case '`':
237 	  goto done;
238 
239 	case '\\':
240 	  radix = 0;
241 	  c = do_getc ();
242 	  if (c == EOF)
243 	    /* Invalid, but be tolerant.  */
244 	    break;
245 	  grow_token (tp);
246 	  tp->chars[tp->charcount++] = c;
247 	  break;
248 
249 	case '|':
250 	  radix = 0;
251 	  for (;;)
252 	    {
253 	      c = do_getc ();
254 	      if (c == EOF || c == '|')
255 		break;
256 	      grow_token (tp);
257 	      tp->chars[tp->charcount++] = c;
258 	    }
259 	  break;
260 
261 	default:
262 	  if (radix != 0)
263 	    {
264 	      if (expecting_prefix)
265 		{
266 		  switch (c)
267 		    {
268 		    case 'B': case 'b':
269 		      radix = 2;
270 		      break;
271 		    case 'O': case 'o':
272 		      radix = 8;
273 		      break;
274 		    case 'D': case 'd':
275 		      radix = 10;
276 		      break;
277 		    case 'X': case 'x':
278 		      radix = 16;
279 		      break;
280 		    case 'E': case 'e':
281 		    case 'I': case 'i':
282 		      break;
283 		    default:
284 		      radix = 0;
285 		      break;
286 		    }
287 		  expecting_prefix = false;
288 		  nfirst = tp->charcount + 1;
289 		}
290 	      else if (tp->charcount == nfirst
291 		       && (c == '+' || c == '-' || c == '#'))
292 		{
293 		  if (c == '#')
294 		    {
295 		      if (had_sign)
296 			radix = 0;
297 		      else
298 			expecting_prefix = true;
299 		    }
300 		  else
301 		    had_sign = true;
302 		  nfirst = tp->charcount + 1;
303 		}
304 	      else
305 		{
306 		  switch (radix)
307 		    {
308 		    case -1:
309 		      if (c == '.')
310 			{
311 			  radix = 10;
312 			  exact = false;
313 			}
314 		      else if (!(c >= '0' && c <= '9'))
315 			radix = 0;
316 		      else if (c == '0')
317 			radix = 1;
318 		      else
319 			radix = 10;
320 		      break;
321 
322 		    case 1:
323 		      switch (c)
324 			{
325 			case 'X': case 'x':
326 			  radix = 16;
327 			  nfirst = tp->charcount + 1;
328 			  break;
329 			case '0': case '1': case '2': case '3': case '4':
330 			case '5': case '6': case '7':
331 			  radix = 8;
332 			  nfirst = tp->charcount;
333 			  break;
334 			case '.': case 'E': case 'e':
335 			  radix = 10;
336 			  exact = false;
337 			  break;
338 			case '/':
339 			  radix = 10;
340 			  rational = true;
341 			  break;
342 			default:
343 			  radix = 0;
344 			  break;
345 			}
346 		      break;
347 
348 		    default:
349 		      switch (c)
350 			{
351 			case '.':
352 			  if (exact && radix == 10 && !rational)
353 			    exact = false;
354 			  else
355 			    radix = 0;
356 			  break;
357 			case '/':
358 			  if (exact && !rational)
359 			    rational = true;
360 			  else
361 			    radix = 0;
362 			  break;
363 			case 'E': case 'e':
364 			  if (radix == 10)
365 			    {
366 			      if (!rational && !exponent)
367 				{
368 				  exponent = true;
369 				  exact = false;
370 				}
371 			      else
372 				radix = 0;
373 			      break;
374 			    }
375 			  /*FALLTHROUGH*/
376 			default:
377 			  if (exponent && (c == '+' || c == '-'))
378 			    break;
379 			  if ((radix <= 10
380 			       && !(c >= '0' && c <= '0' + radix - 1))
381 			      || (radix == 16 && !c_isxdigit (c)))
382 			    radix = 0;
383 			  break;
384 			}
385 		      break;
386 		    }
387 		}
388 	    }
389 	  else
390 	    {
391 	      if (c == '#')
392 		goto done;
393 	    }
394 	  grow_token (tp);
395 	  tp->chars[tp->charcount++] = c;
396 	}
397     }
398  done:
399   if (c != EOF)
400     do_ungetc (c);
401   if (radix > 0 && nfirst < tp->charcount)
402     return false; /* number */
403   else
404     return true; /* symbol */
405 }
406 
407 
408 /* ========================= Accumulating comments ========================= */
409 
410 
411 static char *buffer;
412 static size_t bufmax;
413 static size_t buflen;
414 
415 static inline void
comment_start()416 comment_start ()
417 {
418   buflen = 0;
419 }
420 
421 static inline void
comment_add(int c)422 comment_add (int c)
423 {
424   if (buflen >= bufmax)
425     {
426       bufmax = 2 * bufmax + 10;
427       buffer = xrealloc (buffer, bufmax);
428     }
429   buffer[buflen++] = c;
430 }
431 
432 static inline void
comment_line_end(size_t chars_to_remove)433 comment_line_end (size_t chars_to_remove)
434 {
435   buflen -= chars_to_remove;
436   while (buflen >= 1
437 	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
438     --buflen;
439   if (chars_to_remove == 0 && buflen >= bufmax)
440     {
441       bufmax = 2 * bufmax + 10;
442       buffer = xrealloc (buffer, bufmax);
443     }
444   buffer[buflen] = '\0';
445   savable_comment_add (buffer);
446 }
447 
448 
449 /* These are for tracking whether comments count as immediately before
450    keyword.  */
451 static int last_comment_line;
452 static int last_non_comment_line;
453 
454 
455 /* ========================= Accumulating messages ========================= */
456 
457 
458 static message_list_ty *mlp;
459 
460 
461 /* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
462 
463 
464 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
465    Other objects need not to be represented precisely.  */
466 enum object_type
467 {
468   t_symbol,	/* symbol */
469   t_string,	/* string */
470   t_other,	/* other kind of real object */
471   t_dot,	/* '.' pseudo object */
472   t_close,	/* ')' or ']' pseudo object */
473   t_eof		/* EOF marker */
474 };
475 
476 struct object
477 {
478   enum object_type type;
479   struct token *token;		/* for t_symbol and t_string */
480   int line_number_at_start;	/* for t_string */
481 };
482 
483 /* Free the memory pointed to by a 'struct object'.  */
484 static inline void
free_object(struct object * op)485 free_object (struct object *op)
486 {
487   if (op->type == t_symbol || op->type == t_string)
488     {
489       free_token (op->token);
490       free (op->token);
491     }
492 }
493 
494 /* Convert a t_symbol/t_string token to a char*.  */
495 static char *
string_of_object(const struct object * op)496 string_of_object (const struct object *op)
497 {
498   char *str;
499   int n;
500 
501   if (!(op->type == t_symbol || op->type == t_string))
502     abort ();
503   n = op->token->charcount;
504   str = (char *) xmalloc (n + 1);
505   memcpy (str, op->token->chars, n);
506   str[n] = '\0';
507   return str;
508 }
509 
510 /* Context lookup table.  */
511 static flag_context_list_table_ty *flag_context_list_table;
512 
513 /* Returns the character represented by an escape sequence.  */
514 static int
do_getc_escaped(int c)515 do_getc_escaped (int c)
516 {
517   switch (c)
518     {
519     case 'n':
520       return '\n';
521     case 'r':
522       return '\r';
523     case 'f':
524       return '\f';
525     case 't':
526       return '\t';
527     case 'v':
528       return '\v';
529     case 'a':
530       return '\a';
531     case '^':
532       c = do_getc ();
533       if (c == EOF)
534 	return EOF;
535       return c & 0x1f;
536     case '0': case '1': case '2': case '3': case '4':
537     case '5': case '6': case '7':
538       {
539 	int n = c - '0';
540 
541 	c = do_getc ();
542 	if (c != EOF)
543 	  {
544 	    if (c >= '0' && c <= '7')
545 	      {
546 		n = (n << 3) + (c - '0');
547 		c = do_getc ();
548 		if (c != EOF)
549 		  {
550 		    if (c >= '0' && c <= '7')
551 		      n = (n << 3) + (c - '0');
552 		    else
553 		      do_ungetc (c);
554 		  }
555 	      }
556 	    else
557 	      do_ungetc (c);
558 	  }
559 	return (unsigned char) n;
560       }
561     case 'x':
562       {
563 	int n = 0;
564 
565 	for (;;)
566 	  {
567 	    c = do_getc ();
568 	    if (c == EOF)
569 	      break;
570 	    else if (c >= '0' && c <= '9')
571 	      n = (n << 4) + (c - '0');
572 	    else if (c >= 'A' && c <= 'F')
573 	      n = (n << 4) + (c - 'A' + 10);
574 	    else if (c >= 'a' && c <= 'f')
575 	      n = (n << 4) + (c - 'a' + 10);
576 	    else
577 	      {
578 		do_ungetc (c);
579 		break;
580 	      }
581 	  }
582 	return (unsigned char) n;
583       }
584     default:
585       return c;
586     }
587 }
588 
589 /* Read the next object.  */
590 static void
read_object(struct object * op,flag_context_ty outer_context)591 read_object (struct object *op, flag_context_ty outer_context)
592 {
593   for (;;)
594     {
595       int c;
596 
597       c = do_getc ();
598 
599       switch (c)
600 	{
601 	case EOF:
602 	  op->type = t_eof;
603 	  return;
604 
605 	case '\n':
606 	  /* Comments assumed to be grouped with a message must immediately
607 	     precede it, with no non-whitespace token on a line between
608 	     both.  */
609 	  if (last_non_comment_line > last_comment_line)
610 	    savable_comment_reset ();
611 	  continue;
612 
613 	case ' ': case '\t': case '\f': case '\r':
614 	  continue;
615 
616 	case '(':
617 	  {
618 	    int arg = 0;		/* Current argument number.  */
619 	    flag_context_list_iterator_ty context_iter;
620 	    const struct callshapes *shapes = NULL;
621 	    struct arglist_parser *argparser = NULL;
622 
623 	    for (;; arg++)
624 	      {
625 		struct object inner;
626 		flag_context_ty inner_context;
627 
628 		if (arg == 0)
629 		  inner_context = null_context;
630 		else
631 		  inner_context =
632 		    inherited_context (outer_context,
633 				       flag_context_list_iterator_advance (
634 					 &context_iter));
635 
636 		read_object (&inner, inner_context);
637 
638 		/* Recognize end of list.  */
639 		if (inner.type == t_close)
640 		  {
641 		    op->type = t_other;
642 		    /* Don't bother converting "()" to "NIL".  */
643 		    last_non_comment_line = line_number;
644 		    if (argparser != NULL)
645 		      arglist_parser_done (argparser, arg);
646 		    return;
647 		  }
648 
649 		/* Dots are not allowed in every position.
650 		   But be tolerant.  */
651 
652 		/* EOF inside list is illegal.  But be tolerant.  */
653 		if (inner.type == t_eof)
654 		  break;
655 
656 		if (arg == 0)
657 		  {
658 		    /* This is the function position.  */
659 		    if (inner.type == t_symbol)
660 		      {
661 			char *symbol_name = string_of_object (&inner);
662 			void *keyword_value;
663 
664 			if (hash_find_entry (&keywords,
665 					     symbol_name, strlen (symbol_name),
666 					     &keyword_value)
667 			    == 0)
668 			  shapes = (const struct callshapes *) keyword_value;
669 
670 			argparser = arglist_parser_alloc (mlp, shapes);
671 
672 			context_iter =
673 			  flag_context_list_iterator (
674 			    flag_context_list_table_lookup (
675 			      flag_context_list_table,
676 			      symbol_name, strlen (symbol_name)));
677 
678 			free (symbol_name);
679 		      }
680 		    else
681 		      context_iter = null_context_list_iterator;
682 		  }
683 		else
684 		  {
685 		    /* These are the argument positions.  */
686 		    if (argparser != NULL && inner.type == t_string)
687 		      arglist_parser_remember (argparser, arg,
688 					       string_of_object (&inner),
689 					       inner_context,
690 					       logical_file_name,
691 					       inner.line_number_at_start,
692 					       savable_comment);
693 		  }
694 
695 		free_object (&inner);
696 	      }
697 
698 	    if (argparser != NULL)
699 	      arglist_parser_done (argparser, arg);
700 	  }
701 	  op->type = t_other;
702 	  last_non_comment_line = line_number;
703 	  return;
704 
705 	case '[':
706 	  {
707 	    for (;;)
708 	      {
709 		struct object inner;
710 
711 		read_object (&inner, null_context);
712 
713 		/* Recognize end of vector.  */
714 		if (inner.type == t_close)
715 		  {
716 		    op->type = t_other;
717 		    last_non_comment_line = line_number;
718 		    return;
719 		  }
720 
721 		/* Dots are not allowed.  But be tolerant.  */
722 
723 		/* EOF inside vector is illegal.  But be tolerant.  */
724 		if (inner.type == t_eof)
725 		  break;
726 
727 		free_object (&inner);
728 	      }
729 	  }
730 	  op->type = t_other;
731 	  last_non_comment_line = line_number;
732 	  return;
733 
734 	case ')': case ']':
735 	  /* Tell the caller about the end of list or vector.
736 	     Unmatched closing parenthesis is illegal.  But be tolerant.  */
737 	  op->type = t_close;
738 	  last_non_comment_line = line_number;
739 	  return;
740 
741 	case ',':
742 	  {
743 	    int c = do_getc ();
744 	    /* The ,@ handling inside lists is wrong anyway, because
745 	       ,@form expands to an unknown number of elements.  */
746 	    if (c != EOF && c != '@')
747 	      do_ungetc (c);
748 	  }
749 	  /*FALLTHROUGH*/
750 	case '\'':
751 	case '`':
752 	  {
753 	    struct object inner;
754 
755 	    read_object (&inner, null_context);
756 
757 	    /* Dots and EOF are not allowed here.  But be tolerant.  */
758 
759 	    free_object (&inner);
760 
761 	    op->type = t_other;
762 	    last_non_comment_line = line_number;
763 	    return;
764 	  }
765 
766 	case ';':
767 	  {
768 	    bool all_semicolons = true;
769 
770 	    last_comment_line = line_number;
771 	    comment_start ();
772 	    for (;;)
773 	      {
774 		int c = do_getc ();
775 		if (c == EOF || c == '\n' || c == '\f' || c == '\r')
776 		  break;
777 		if (c != ';')
778 		  all_semicolons = false;
779 		if (!all_semicolons)
780 		  {
781 		    /* We skip all leading white space, but not EOLs.  */
782 		    if (!(buflen == 0 && (c == ' ' || c == '\t')))
783 		      comment_add (c);
784 		  }
785 	      }
786 	    comment_line_end (0);
787 	    continue;
788 	  }
789 
790 	case '"':
791 	  {
792 	    op->token = (struct token *) xmalloc (sizeof (struct token));
793 	    init_token (op->token);
794 	    op->line_number_at_start = line_number;
795 	    for (;;)
796 	      {
797 		int c = do_getc ();
798 		if (c == EOF)
799 		  /* Invalid input.  Be tolerant, no error message.  */
800 		  break;
801 		if (c == '"')
802 		  break;
803 		if (c == '\\')
804 		  {
805 		    c = do_getc ();
806 		    if (c == EOF)
807 		      /* Invalid input.  Be tolerant, no error message.  */
808 		      break;
809 		    if (c == '\n')
810 		      /* Ignore escaped newline.  */
811 		      ;
812 		    else
813 		      {
814 			c = do_getc_escaped (c);
815 			if (c == EOF)
816 			  /* Invalid input.  Be tolerant, no error message.  */
817 			  break;
818 			grow_token (op->token);
819 			op->token->chars[op->token->charcount++] = c;
820 		      }
821 		  }
822 		else
823 		  {
824 		    grow_token (op->token);
825 		    op->token->chars[op->token->charcount++] = c;
826 		  }
827 	      }
828 	    op->type = t_string;
829 
830 	    if (extract_all)
831 	      {
832 		lex_pos_ty pos;
833 
834 		pos.file_name = logical_file_name;
835 		pos.line_number = op->line_number_at_start;
836 		remember_a_message (mlp, NULL, string_of_object (op),
837 				    null_context, &pos, savable_comment);
838 	      }
839 	    last_non_comment_line = line_number;
840 	    return;
841 	  }
842 
843 	case '?':
844 	  c = do_getc ();
845 	  if (c == EOF)
846 	    /* Invalid input.  Be tolerant, no error message.  */
847 	    ;
848 	  else if (c == '\\')
849 	    {
850 	      c = do_getc ();
851 	      if (c == EOF)
852 		/* Invalid input.  Be tolerant, no error message.  */
853 		;
854 	      else
855 		{
856 		  c = do_getc_escaped (c);
857 		  if (c == EOF)
858 		    /* Invalid input.  Be tolerant, no error message.  */
859 		    ;
860 		}
861 	    }
862 	  op->type = t_other;
863 	  last_non_comment_line = line_number;
864 	  return;
865 
866 	case '#':
867 	  /* Dispatch macro handling.  */
868 	  c = do_getc ();
869 	  if (c == EOF)
870 	    /* Invalid input.  Be tolerant, no error message.  */
871 	    {
872 	      op->type = t_other;
873 	      return;
874 	    }
875 
876 	  switch (c)
877 	    {
878 	    case '!':
879 	      if (ftell (fp) == 2)
880 		/* Skip comment until !# */
881 		{
882 		  c = do_getc ();
883 		  for (;;)
884 		    {
885 		      if (c == EOF)
886 			break;
887 		      if (c == '!')
888 			{
889 			  c = do_getc ();
890 			  if (c == EOF || c == '#')
891 			    break;
892 			}
893 		      else
894 			c = do_getc ();
895 		    }
896 		  if (c == EOF)
897 		    {
898 		      /* EOF not allowed here.  But be tolerant.  */
899 		      op->type = t_eof;
900 		      return;
901 		    }
902 		  continue;
903 		}
904 	      /*FALLTHROUGH*/
905 	    case '\'':
906 	    case ':':
907 	      {
908 		struct object inner;
909 		read_object (&inner, null_context);
910 		/* Dots and EOF are not allowed here.
911 		   But be tolerant.  */
912 		free_object (&inner);
913 		op->type = t_other;
914 		last_non_comment_line = line_number;
915 		return;
916 	      }
917 
918 	    case '[':
919 	    case '(':
920 	      {
921 		struct object inner;
922 		do_ungetc (c);
923 		read_object (&inner, null_context);
924 		/* Dots and EOF are not allowed here.
925 		   But be tolerant.  */
926 		free_object (&inner);
927 		op->type = t_other;
928 		last_non_comment_line = line_number;
929 		return;
930 	      }
931 
932 	    case '|':
933 	      {
934 		int depth = 0;
935 
936 		comment_start ();
937 		c = do_getc ();
938 		for (;;)
939 		  {
940 		    if (c == EOF)
941 		      break;
942 		    if (c == '|')
943 		      {
944 			c = do_getc ();
945 			if (c == EOF)
946 			  break;
947 			if (c == '#')
948 			  {
949 			    if (depth == 0)
950 			      {
951 				comment_line_end (0);
952 				break;
953 			      }
954 			    depth--;
955 			    comment_add ('|');
956 			    comment_add ('#');
957 			    c = do_getc ();
958 			  }
959 			else
960 			  comment_add ('|');
961 		      }
962 		    else if (c == '#')
963 		      {
964 			c = do_getc ();
965 			if (c == EOF)
966 			  break;
967 			comment_add ('#');
968 			if (c == '|')
969 			  {
970 			    depth++;
971 			    comment_add ('|');
972 			    c = do_getc ();
973 			  }
974 		      }
975 		    else
976 		      {
977 			/* We skip all leading white space.  */
978 			if (!(buflen == 0 && (c == ' ' || c == '\t')))
979 			  comment_add (c);
980 			if (c == '\n')
981 			  {
982 			    comment_line_end (1);
983 			    comment_start ();
984 			  }
985 			c = do_getc ();
986 		      }
987 		  }
988 		if (c == EOF)
989 		  {
990 		    /* EOF not allowed here.  But be tolerant.  */
991 		    op->type = t_eof;
992 		    return;
993 		  }
994 		last_comment_line = line_number;
995 		continue;
996 	      }
997 
998 	    case '\\':
999 	      {
1000 		struct token token;
1001 		int first = '\\';
1002 		read_token (&token, &first);
1003 		free_token (&token);
1004 		op->type = t_other;
1005 		last_non_comment_line = line_number;
1006 		return;
1007 	      }
1008 
1009 	    case 'T': case 't':
1010 	    case 'F': case 'f':
1011 	      op->type = t_other;
1012 	      last_non_comment_line = line_number;
1013 	      return;
1014 
1015 	    case 'B': case 'b':
1016 	    case 'O': case 'o':
1017 	    case 'D': case 'd':
1018 	    case 'X': case 'x':
1019 	    case 'E': case 'e':
1020 	    case 'I': case 'i':
1021 	      {
1022 		struct token token;
1023 		do_ungetc (c);
1024 		c = '#';
1025 		read_token (&token, &c);
1026 		free_token (&token);
1027 		op->type = t_other;
1028 		last_non_comment_line = line_number;
1029 		return;
1030 	      }
1031 
1032 	    default:
1033 	      /* Invalid input.  Be tolerant, no error message.  */
1034 	      op->type = t_other;
1035 	      last_non_comment_line = line_number;
1036 	      return;
1037 	    }
1038 
1039 	  /*NOTREACHED*/
1040 	  abort ();
1041 
1042 	default:
1043 	  /* Read a token.  */
1044 	  {
1045 	    bool symbol;
1046 
1047 	    op->token = (struct token *) xmalloc (sizeof (struct token));
1048 	    symbol = read_token (op->token, &c);
1049 	    if (op->token->charcount == 1 && op->token->chars[0] == '.')
1050 	      {
1051 		free_token (op->token);
1052 		free (op->token);
1053 		op->type = t_dot;
1054 		last_non_comment_line = line_number;
1055 		return;
1056 	      }
1057 	    if (!symbol)
1058 	      {
1059 		free_token (op->token);
1060 		free (op->token);
1061 		op->type = t_other;
1062 		last_non_comment_line = line_number;
1063 		return;
1064 	      }
1065 	    /* Distinguish between "foo" and "foo#bar".  */
1066 	    c = do_getc ();
1067 	    if (c == '#')
1068 	      {
1069 		struct token second_token;
1070 
1071 		free_token (op->token);
1072 		free (op->token);
1073 		read_token (&second_token, NULL);
1074 		free_token (&second_token);
1075 		op->type = t_other;
1076 		last_non_comment_line = line_number;
1077 		return;
1078 	      }
1079 	    else
1080 	      {
1081 		if (c != EOF)
1082 		  do_ungetc (c);
1083 		op->type = t_symbol;
1084 		last_non_comment_line = line_number;
1085 		return;
1086 	      }
1087 	  }
1088 	}
1089     }
1090 }
1091 
1092 
1093 void
extract_librep(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)1094 extract_librep (FILE *f,
1095 		const char *real_filename, const char *logical_filename,
1096 		flag_context_list_table_ty *flag_table,
1097 		msgdomain_list_ty *mdlp)
1098 {
1099   mlp = mdlp->item[0]->messages;
1100 
1101   fp = f;
1102   real_file_name = real_filename;
1103   logical_file_name = xstrdup (logical_filename);
1104   line_number = 1;
1105 
1106   last_comment_line = -1;
1107   last_non_comment_line = -1;
1108 
1109   flag_context_list_table = flag_table;
1110 
1111   init_keywords ();
1112 
1113   /* Eat tokens until eof is seen.  When read_object returns
1114      due to an unbalanced closing parenthesis, just restart it.  */
1115   do
1116     {
1117       struct object toplevel_object;
1118 
1119       read_object (&toplevel_object, null_context);
1120 
1121       if (toplevel_object.type == t_eof)
1122 	break;
1123 
1124       free_object (&toplevel_object);
1125     }
1126   while (!feof (fp));
1127 
1128   /* Close scanner.  */
1129   fp = NULL;
1130   real_file_name = NULL;
1131   logical_file_name = NULL;
1132   line_number = 0;
1133 }
1134