1 /* xgettext librep backend.
2 Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc.
3
4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "c-ctype.h"
31 #include "message.h"
32 #include "xgettext.h"
33 #include "x-librep.h"
34 #include "error.h"
35 #include "xalloc.h"
36 #include "exit.h"
37 #include "hash.h"
38 #include "gettext.h"
39
40 #define _(s) gettext(s)
41
42
43 /* Summary of librep syntax:
44 - ';' starts a comment until end of line.
45 - Block comments start with '#|' and end with '|#'.
46 - Numbers are constituted of an optional prefix (#b, #B for binary,
47 #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
48 #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
49 the digits.
50 - Characters are written as '?' followed by the character, possibly
51 with an escape sequence, for examples '?a', '?\n', '?\177'.
52 - Strings are delimited by double quotes. Backslash introduces an escape
53 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
54 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
55 - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
56 if preceded by backslash or enclosed in |...|.
57 - Keywords: written as #:SYMBOL.
58 - () delimit lists.
59 - [] delimit vectors.
60 The reader is implemented in librep-0.14/src/lisp.c. */
61
62
63 /* ====================== Keyword set customization. ====================== */
64
65 /* If true extract all strings. */
66 static bool extract_all = false;
67
68 static hash_table keywords;
69 static bool default_keywords = true;
70
71
72 void
x_librep_extract_all()73 x_librep_extract_all ()
74 {
75 extract_all = true;
76 }
77
78
79 void
x_librep_keyword(const char * name)80 x_librep_keyword (const char *name)
81 {
82 if (name == NULL)
83 default_keywords = false;
84 else
85 {
86 const char *end;
87 struct callshape shape;
88 const char *colon;
89
90 if (keywords.table == NULL)
91 hash_init (&keywords, 100);
92
93 split_keywordspec (name, &end, &shape);
94
95 /* The characters between name and end should form a valid Lisp
96 symbol. */
97 colon = strchr (name, ':');
98 if (colon == NULL || colon >= end)
99 insert_keyword_callshape (&keywords, name, end - name, &shape);
100 }
101 }
102
103 /* Finish initializing the keywords hash table.
104 Called after argument processing, before each file is processed. */
105 static void
init_keywords()106 init_keywords ()
107 {
108 if (default_keywords)
109 {
110 /* When adding new keywords here, also update the documentation in
111 xgettext.texi! */
112 x_librep_keyword ("_");
113 default_keywords = false;
114 }
115 }
116
117 void
init_flag_table_librep()118 init_flag_table_librep ()
119 {
120 xgettext_record_flag ("_:1:pass-librep-format");
121 xgettext_record_flag ("format:2:librep-format");
122 }
123
124
125 /* ======================== Reading of characters. ======================== */
126
127 /* Real filename, used in error messages about the input file. */
128 static const char *real_file_name;
129
130 /* Logical filename and line number, used to label the extracted messages. */
131 static char *logical_file_name;
132 static int line_number;
133
134 /* The input file stream. */
135 static FILE *fp;
136
137
138 /* Fetch the next character from the input file. */
139 static int
do_getc()140 do_getc ()
141 {
142 int c = getc (fp);
143
144 if (c == EOF)
145 {
146 if (ferror (fp))
147 error (EXIT_FAILURE, errno, _("\
148 error while reading \"%s\""), real_file_name);
149 }
150 else if (c == '\n')
151 line_number++;
152
153 return c;
154 }
155
156 /* Put back the last fetched character, not EOF. */
157 static void
do_ungetc(int c)158 do_ungetc (int c)
159 {
160 if (c == '\n')
161 line_number--;
162 ungetc (c, fp);
163 }
164
165
166 /* ========================== Reading of tokens. ========================== */
167
168
169 /* A token consists of a sequence of characters. */
170 struct token
171 {
172 int allocated; /* number of allocated 'token_char's */
173 int charcount; /* number of used 'token_char's */
174 char *chars; /* the token's constituents */
175 };
176
177 /* Initialize a 'struct token'. */
178 static inline void
init_token(struct token * tp)179 init_token (struct token *tp)
180 {
181 tp->allocated = 10;
182 tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
183 tp->charcount = 0;
184 }
185
186 /* Free the memory pointed to by a 'struct token'. */
187 static inline void
free_token(struct token * tp)188 free_token (struct token *tp)
189 {
190 free (tp->chars);
191 }
192
193 /* Ensure there is enough room in the token for one more character. */
194 static inline void
grow_token(struct token * tp)195 grow_token (struct token *tp)
196 {
197 if (tp->charcount == tp->allocated)
198 {
199 tp->allocated *= 2;
200 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
201 }
202 }
203
204 /* Read the next token. If 'first' is given, it points to the first
205 character, which has already been read. Returns true for a symbol,
206 false for a number. */
207 static bool
read_token(struct token * tp,const int * first)208 read_token (struct token *tp, const int *first)
209 {
210 int c;
211 /* Variables for speculative number parsing: */
212 int radix = -1;
213 int nfirst = 0;
214 bool exact = true;
215 bool rational = false;
216 bool exponent = false;
217 bool had_sign = false;
218 bool expecting_prefix = false;
219
220 init_token (tp);
221
222 if (first)
223 c = *first;
224 else
225 c = do_getc ();
226
227 for (;; c = do_getc ())
228 {
229 switch (c)
230 {
231 case EOF:
232 goto done;
233
234 case ' ': case '\t': case '\n': case '\f': case '\r':
235 case '(': case ')': case '[': case ']':
236 case '\'': case '"': case ';': case ',': case '`':
237 goto done;
238
239 case '\\':
240 radix = 0;
241 c = do_getc ();
242 if (c == EOF)
243 /* Invalid, but be tolerant. */
244 break;
245 grow_token (tp);
246 tp->chars[tp->charcount++] = c;
247 break;
248
249 case '|':
250 radix = 0;
251 for (;;)
252 {
253 c = do_getc ();
254 if (c == EOF || c == '|')
255 break;
256 grow_token (tp);
257 tp->chars[tp->charcount++] = c;
258 }
259 break;
260
261 default:
262 if (radix != 0)
263 {
264 if (expecting_prefix)
265 {
266 switch (c)
267 {
268 case 'B': case 'b':
269 radix = 2;
270 break;
271 case 'O': case 'o':
272 radix = 8;
273 break;
274 case 'D': case 'd':
275 radix = 10;
276 break;
277 case 'X': case 'x':
278 radix = 16;
279 break;
280 case 'E': case 'e':
281 case 'I': case 'i':
282 break;
283 default:
284 radix = 0;
285 break;
286 }
287 expecting_prefix = false;
288 nfirst = tp->charcount + 1;
289 }
290 else if (tp->charcount == nfirst
291 && (c == '+' || c == '-' || c == '#'))
292 {
293 if (c == '#')
294 {
295 if (had_sign)
296 radix = 0;
297 else
298 expecting_prefix = true;
299 }
300 else
301 had_sign = true;
302 nfirst = tp->charcount + 1;
303 }
304 else
305 {
306 switch (radix)
307 {
308 case -1:
309 if (c == '.')
310 {
311 radix = 10;
312 exact = false;
313 }
314 else if (!(c >= '0' && c <= '9'))
315 radix = 0;
316 else if (c == '0')
317 radix = 1;
318 else
319 radix = 10;
320 break;
321
322 case 1:
323 switch (c)
324 {
325 case 'X': case 'x':
326 radix = 16;
327 nfirst = tp->charcount + 1;
328 break;
329 case '0': case '1': case '2': case '3': case '4':
330 case '5': case '6': case '7':
331 radix = 8;
332 nfirst = tp->charcount;
333 break;
334 case '.': case 'E': case 'e':
335 radix = 10;
336 exact = false;
337 break;
338 case '/':
339 radix = 10;
340 rational = true;
341 break;
342 default:
343 radix = 0;
344 break;
345 }
346 break;
347
348 default:
349 switch (c)
350 {
351 case '.':
352 if (exact && radix == 10 && !rational)
353 exact = false;
354 else
355 radix = 0;
356 break;
357 case '/':
358 if (exact && !rational)
359 rational = true;
360 else
361 radix = 0;
362 break;
363 case 'E': case 'e':
364 if (radix == 10)
365 {
366 if (!rational && !exponent)
367 {
368 exponent = true;
369 exact = false;
370 }
371 else
372 radix = 0;
373 break;
374 }
375 /*FALLTHROUGH*/
376 default:
377 if (exponent && (c == '+' || c == '-'))
378 break;
379 if ((radix <= 10
380 && !(c >= '0' && c <= '0' + radix - 1))
381 || (radix == 16 && !c_isxdigit (c)))
382 radix = 0;
383 break;
384 }
385 break;
386 }
387 }
388 }
389 else
390 {
391 if (c == '#')
392 goto done;
393 }
394 grow_token (tp);
395 tp->chars[tp->charcount++] = c;
396 }
397 }
398 done:
399 if (c != EOF)
400 do_ungetc (c);
401 if (radix > 0 && nfirst < tp->charcount)
402 return false; /* number */
403 else
404 return true; /* symbol */
405 }
406
407
408 /* ========================= Accumulating comments ========================= */
409
410
411 static char *buffer;
412 static size_t bufmax;
413 static size_t buflen;
414
415 static inline void
comment_start()416 comment_start ()
417 {
418 buflen = 0;
419 }
420
421 static inline void
comment_add(int c)422 comment_add (int c)
423 {
424 if (buflen >= bufmax)
425 {
426 bufmax = 2 * bufmax + 10;
427 buffer = xrealloc (buffer, bufmax);
428 }
429 buffer[buflen++] = c;
430 }
431
432 static inline void
comment_line_end(size_t chars_to_remove)433 comment_line_end (size_t chars_to_remove)
434 {
435 buflen -= chars_to_remove;
436 while (buflen >= 1
437 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
438 --buflen;
439 if (chars_to_remove == 0 && buflen >= bufmax)
440 {
441 bufmax = 2 * bufmax + 10;
442 buffer = xrealloc (buffer, bufmax);
443 }
444 buffer[buflen] = '\0';
445 savable_comment_add (buffer);
446 }
447
448
449 /* These are for tracking whether comments count as immediately before
450 keyword. */
451 static int last_comment_line;
452 static int last_non_comment_line;
453
454
455 /* ========================= Accumulating messages ========================= */
456
457
458 static message_list_ty *mlp;
459
460
461 /* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
462
463
464 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
465 Other objects need not to be represented precisely. */
466 enum object_type
467 {
468 t_symbol, /* symbol */
469 t_string, /* string */
470 t_other, /* other kind of real object */
471 t_dot, /* '.' pseudo object */
472 t_close, /* ')' or ']' pseudo object */
473 t_eof /* EOF marker */
474 };
475
476 struct object
477 {
478 enum object_type type;
479 struct token *token; /* for t_symbol and t_string */
480 int line_number_at_start; /* for t_string */
481 };
482
483 /* Free the memory pointed to by a 'struct object'. */
484 static inline void
free_object(struct object * op)485 free_object (struct object *op)
486 {
487 if (op->type == t_symbol || op->type == t_string)
488 {
489 free_token (op->token);
490 free (op->token);
491 }
492 }
493
494 /* Convert a t_symbol/t_string token to a char*. */
495 static char *
string_of_object(const struct object * op)496 string_of_object (const struct object *op)
497 {
498 char *str;
499 int n;
500
501 if (!(op->type == t_symbol || op->type == t_string))
502 abort ();
503 n = op->token->charcount;
504 str = (char *) xmalloc (n + 1);
505 memcpy (str, op->token->chars, n);
506 str[n] = '\0';
507 return str;
508 }
509
510 /* Context lookup table. */
511 static flag_context_list_table_ty *flag_context_list_table;
512
513 /* Returns the character represented by an escape sequence. */
514 static int
do_getc_escaped(int c)515 do_getc_escaped (int c)
516 {
517 switch (c)
518 {
519 case 'n':
520 return '\n';
521 case 'r':
522 return '\r';
523 case 'f':
524 return '\f';
525 case 't':
526 return '\t';
527 case 'v':
528 return '\v';
529 case 'a':
530 return '\a';
531 case '^':
532 c = do_getc ();
533 if (c == EOF)
534 return EOF;
535 return c & 0x1f;
536 case '0': case '1': case '2': case '3': case '4':
537 case '5': case '6': case '7':
538 {
539 int n = c - '0';
540
541 c = do_getc ();
542 if (c != EOF)
543 {
544 if (c >= '0' && c <= '7')
545 {
546 n = (n << 3) + (c - '0');
547 c = do_getc ();
548 if (c != EOF)
549 {
550 if (c >= '0' && c <= '7')
551 n = (n << 3) + (c - '0');
552 else
553 do_ungetc (c);
554 }
555 }
556 else
557 do_ungetc (c);
558 }
559 return (unsigned char) n;
560 }
561 case 'x':
562 {
563 int n = 0;
564
565 for (;;)
566 {
567 c = do_getc ();
568 if (c == EOF)
569 break;
570 else if (c >= '0' && c <= '9')
571 n = (n << 4) + (c - '0');
572 else if (c >= 'A' && c <= 'F')
573 n = (n << 4) + (c - 'A' + 10);
574 else if (c >= 'a' && c <= 'f')
575 n = (n << 4) + (c - 'a' + 10);
576 else
577 {
578 do_ungetc (c);
579 break;
580 }
581 }
582 return (unsigned char) n;
583 }
584 default:
585 return c;
586 }
587 }
588
589 /* Read the next object. */
590 static void
read_object(struct object * op,flag_context_ty outer_context)591 read_object (struct object *op, flag_context_ty outer_context)
592 {
593 for (;;)
594 {
595 int c;
596
597 c = do_getc ();
598
599 switch (c)
600 {
601 case EOF:
602 op->type = t_eof;
603 return;
604
605 case '\n':
606 /* Comments assumed to be grouped with a message must immediately
607 precede it, with no non-whitespace token on a line between
608 both. */
609 if (last_non_comment_line > last_comment_line)
610 savable_comment_reset ();
611 continue;
612
613 case ' ': case '\t': case '\f': case '\r':
614 continue;
615
616 case '(':
617 {
618 int arg = 0; /* Current argument number. */
619 flag_context_list_iterator_ty context_iter;
620 const struct callshapes *shapes = NULL;
621 struct arglist_parser *argparser = NULL;
622
623 for (;; arg++)
624 {
625 struct object inner;
626 flag_context_ty inner_context;
627
628 if (arg == 0)
629 inner_context = null_context;
630 else
631 inner_context =
632 inherited_context (outer_context,
633 flag_context_list_iterator_advance (
634 &context_iter));
635
636 read_object (&inner, inner_context);
637
638 /* Recognize end of list. */
639 if (inner.type == t_close)
640 {
641 op->type = t_other;
642 /* Don't bother converting "()" to "NIL". */
643 last_non_comment_line = line_number;
644 if (argparser != NULL)
645 arglist_parser_done (argparser, arg);
646 return;
647 }
648
649 /* Dots are not allowed in every position.
650 But be tolerant. */
651
652 /* EOF inside list is illegal. But be tolerant. */
653 if (inner.type == t_eof)
654 break;
655
656 if (arg == 0)
657 {
658 /* This is the function position. */
659 if (inner.type == t_symbol)
660 {
661 char *symbol_name = string_of_object (&inner);
662 void *keyword_value;
663
664 if (hash_find_entry (&keywords,
665 symbol_name, strlen (symbol_name),
666 &keyword_value)
667 == 0)
668 shapes = (const struct callshapes *) keyword_value;
669
670 argparser = arglist_parser_alloc (mlp, shapes);
671
672 context_iter =
673 flag_context_list_iterator (
674 flag_context_list_table_lookup (
675 flag_context_list_table,
676 symbol_name, strlen (symbol_name)));
677
678 free (symbol_name);
679 }
680 else
681 context_iter = null_context_list_iterator;
682 }
683 else
684 {
685 /* These are the argument positions. */
686 if (argparser != NULL && inner.type == t_string)
687 arglist_parser_remember (argparser, arg,
688 string_of_object (&inner),
689 inner_context,
690 logical_file_name,
691 inner.line_number_at_start,
692 savable_comment);
693 }
694
695 free_object (&inner);
696 }
697
698 if (argparser != NULL)
699 arglist_parser_done (argparser, arg);
700 }
701 op->type = t_other;
702 last_non_comment_line = line_number;
703 return;
704
705 case '[':
706 {
707 for (;;)
708 {
709 struct object inner;
710
711 read_object (&inner, null_context);
712
713 /* Recognize end of vector. */
714 if (inner.type == t_close)
715 {
716 op->type = t_other;
717 last_non_comment_line = line_number;
718 return;
719 }
720
721 /* Dots are not allowed. But be tolerant. */
722
723 /* EOF inside vector is illegal. But be tolerant. */
724 if (inner.type == t_eof)
725 break;
726
727 free_object (&inner);
728 }
729 }
730 op->type = t_other;
731 last_non_comment_line = line_number;
732 return;
733
734 case ')': case ']':
735 /* Tell the caller about the end of list or vector.
736 Unmatched closing parenthesis is illegal. But be tolerant. */
737 op->type = t_close;
738 last_non_comment_line = line_number;
739 return;
740
741 case ',':
742 {
743 int c = do_getc ();
744 /* The ,@ handling inside lists is wrong anyway, because
745 ,@form expands to an unknown number of elements. */
746 if (c != EOF && c != '@')
747 do_ungetc (c);
748 }
749 /*FALLTHROUGH*/
750 case '\'':
751 case '`':
752 {
753 struct object inner;
754
755 read_object (&inner, null_context);
756
757 /* Dots and EOF are not allowed here. But be tolerant. */
758
759 free_object (&inner);
760
761 op->type = t_other;
762 last_non_comment_line = line_number;
763 return;
764 }
765
766 case ';':
767 {
768 bool all_semicolons = true;
769
770 last_comment_line = line_number;
771 comment_start ();
772 for (;;)
773 {
774 int c = do_getc ();
775 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
776 break;
777 if (c != ';')
778 all_semicolons = false;
779 if (!all_semicolons)
780 {
781 /* We skip all leading white space, but not EOLs. */
782 if (!(buflen == 0 && (c == ' ' || c == '\t')))
783 comment_add (c);
784 }
785 }
786 comment_line_end (0);
787 continue;
788 }
789
790 case '"':
791 {
792 op->token = (struct token *) xmalloc (sizeof (struct token));
793 init_token (op->token);
794 op->line_number_at_start = line_number;
795 for (;;)
796 {
797 int c = do_getc ();
798 if (c == EOF)
799 /* Invalid input. Be tolerant, no error message. */
800 break;
801 if (c == '"')
802 break;
803 if (c == '\\')
804 {
805 c = do_getc ();
806 if (c == EOF)
807 /* Invalid input. Be tolerant, no error message. */
808 break;
809 if (c == '\n')
810 /* Ignore escaped newline. */
811 ;
812 else
813 {
814 c = do_getc_escaped (c);
815 if (c == EOF)
816 /* Invalid input. Be tolerant, no error message. */
817 break;
818 grow_token (op->token);
819 op->token->chars[op->token->charcount++] = c;
820 }
821 }
822 else
823 {
824 grow_token (op->token);
825 op->token->chars[op->token->charcount++] = c;
826 }
827 }
828 op->type = t_string;
829
830 if (extract_all)
831 {
832 lex_pos_ty pos;
833
834 pos.file_name = logical_file_name;
835 pos.line_number = op->line_number_at_start;
836 remember_a_message (mlp, NULL, string_of_object (op),
837 null_context, &pos, savable_comment);
838 }
839 last_non_comment_line = line_number;
840 return;
841 }
842
843 case '?':
844 c = do_getc ();
845 if (c == EOF)
846 /* Invalid input. Be tolerant, no error message. */
847 ;
848 else if (c == '\\')
849 {
850 c = do_getc ();
851 if (c == EOF)
852 /* Invalid input. Be tolerant, no error message. */
853 ;
854 else
855 {
856 c = do_getc_escaped (c);
857 if (c == EOF)
858 /* Invalid input. Be tolerant, no error message. */
859 ;
860 }
861 }
862 op->type = t_other;
863 last_non_comment_line = line_number;
864 return;
865
866 case '#':
867 /* Dispatch macro handling. */
868 c = do_getc ();
869 if (c == EOF)
870 /* Invalid input. Be tolerant, no error message. */
871 {
872 op->type = t_other;
873 return;
874 }
875
876 switch (c)
877 {
878 case '!':
879 if (ftell (fp) == 2)
880 /* Skip comment until !# */
881 {
882 c = do_getc ();
883 for (;;)
884 {
885 if (c == EOF)
886 break;
887 if (c == '!')
888 {
889 c = do_getc ();
890 if (c == EOF || c == '#')
891 break;
892 }
893 else
894 c = do_getc ();
895 }
896 if (c == EOF)
897 {
898 /* EOF not allowed here. But be tolerant. */
899 op->type = t_eof;
900 return;
901 }
902 continue;
903 }
904 /*FALLTHROUGH*/
905 case '\'':
906 case ':':
907 {
908 struct object inner;
909 read_object (&inner, null_context);
910 /* Dots and EOF are not allowed here.
911 But be tolerant. */
912 free_object (&inner);
913 op->type = t_other;
914 last_non_comment_line = line_number;
915 return;
916 }
917
918 case '[':
919 case '(':
920 {
921 struct object inner;
922 do_ungetc (c);
923 read_object (&inner, null_context);
924 /* Dots and EOF are not allowed here.
925 But be tolerant. */
926 free_object (&inner);
927 op->type = t_other;
928 last_non_comment_line = line_number;
929 return;
930 }
931
932 case '|':
933 {
934 int depth = 0;
935
936 comment_start ();
937 c = do_getc ();
938 for (;;)
939 {
940 if (c == EOF)
941 break;
942 if (c == '|')
943 {
944 c = do_getc ();
945 if (c == EOF)
946 break;
947 if (c == '#')
948 {
949 if (depth == 0)
950 {
951 comment_line_end (0);
952 break;
953 }
954 depth--;
955 comment_add ('|');
956 comment_add ('#');
957 c = do_getc ();
958 }
959 else
960 comment_add ('|');
961 }
962 else if (c == '#')
963 {
964 c = do_getc ();
965 if (c == EOF)
966 break;
967 comment_add ('#');
968 if (c == '|')
969 {
970 depth++;
971 comment_add ('|');
972 c = do_getc ();
973 }
974 }
975 else
976 {
977 /* We skip all leading white space. */
978 if (!(buflen == 0 && (c == ' ' || c == '\t')))
979 comment_add (c);
980 if (c == '\n')
981 {
982 comment_line_end (1);
983 comment_start ();
984 }
985 c = do_getc ();
986 }
987 }
988 if (c == EOF)
989 {
990 /* EOF not allowed here. But be tolerant. */
991 op->type = t_eof;
992 return;
993 }
994 last_comment_line = line_number;
995 continue;
996 }
997
998 case '\\':
999 {
1000 struct token token;
1001 int first = '\\';
1002 read_token (&token, &first);
1003 free_token (&token);
1004 op->type = t_other;
1005 last_non_comment_line = line_number;
1006 return;
1007 }
1008
1009 case 'T': case 't':
1010 case 'F': case 'f':
1011 op->type = t_other;
1012 last_non_comment_line = line_number;
1013 return;
1014
1015 case 'B': case 'b':
1016 case 'O': case 'o':
1017 case 'D': case 'd':
1018 case 'X': case 'x':
1019 case 'E': case 'e':
1020 case 'I': case 'i':
1021 {
1022 struct token token;
1023 do_ungetc (c);
1024 c = '#';
1025 read_token (&token, &c);
1026 free_token (&token);
1027 op->type = t_other;
1028 last_non_comment_line = line_number;
1029 return;
1030 }
1031
1032 default:
1033 /* Invalid input. Be tolerant, no error message. */
1034 op->type = t_other;
1035 last_non_comment_line = line_number;
1036 return;
1037 }
1038
1039 /*NOTREACHED*/
1040 abort ();
1041
1042 default:
1043 /* Read a token. */
1044 {
1045 bool symbol;
1046
1047 op->token = (struct token *) xmalloc (sizeof (struct token));
1048 symbol = read_token (op->token, &c);
1049 if (op->token->charcount == 1 && op->token->chars[0] == '.')
1050 {
1051 free_token (op->token);
1052 free (op->token);
1053 op->type = t_dot;
1054 last_non_comment_line = line_number;
1055 return;
1056 }
1057 if (!symbol)
1058 {
1059 free_token (op->token);
1060 free (op->token);
1061 op->type = t_other;
1062 last_non_comment_line = line_number;
1063 return;
1064 }
1065 /* Distinguish between "foo" and "foo#bar". */
1066 c = do_getc ();
1067 if (c == '#')
1068 {
1069 struct token second_token;
1070
1071 free_token (op->token);
1072 free (op->token);
1073 read_token (&second_token, NULL);
1074 free_token (&second_token);
1075 op->type = t_other;
1076 last_non_comment_line = line_number;
1077 return;
1078 }
1079 else
1080 {
1081 if (c != EOF)
1082 do_ungetc (c);
1083 op->type = t_symbol;
1084 last_non_comment_line = line_number;
1085 return;
1086 }
1087 }
1088 }
1089 }
1090 }
1091
1092
1093 void
extract_librep(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)1094 extract_librep (FILE *f,
1095 const char *real_filename, const char *logical_filename,
1096 flag_context_list_table_ty *flag_table,
1097 msgdomain_list_ty *mdlp)
1098 {
1099 mlp = mdlp->item[0]->messages;
1100
1101 fp = f;
1102 real_file_name = real_filename;
1103 logical_file_name = xstrdup (logical_filename);
1104 line_number = 1;
1105
1106 last_comment_line = -1;
1107 last_non_comment_line = -1;
1108
1109 flag_context_list_table = flag_table;
1110
1111 init_keywords ();
1112
1113 /* Eat tokens until eof is seen. When read_object returns
1114 due to an unbalanced closing parenthesis, just restart it. */
1115 do
1116 {
1117 struct object toplevel_object;
1118
1119 read_object (&toplevel_object, null_context);
1120
1121 if (toplevel_object.type == t_eof)
1122 break;
1123
1124 free_object (&toplevel_object);
1125 }
1126 while (!feof (fp));
1127
1128 /* Close scanner. */
1129 fp = NULL;
1130 real_file_name = NULL;
1131 logical_file_name = NULL;
1132 line_number = 0;
1133 }
1134