xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/x-awk.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* xgettext awk backend.
2    Copyright (C) 2002-2003, 2005-2006 Free Software Foundation, Inc.
3 
4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "message.h"
31 #include "xgettext.h"
32 #include "x-awk.h"
33 #include "error.h"
34 #include "error-progname.h"
35 #include "xalloc.h"
36 #include "exit.h"
37 #include "gettext.h"
38 
39 #define _(s) gettext(s)
40 
41 
42 /* The awk syntax is defined in the gawk manual page and documentation.
43    See also gawk/awkgram.y.  */
44 
45 
46 /* ====================== Keyword set customization.  ====================== */
47 
48 /* If true extract all strings.  */
49 static bool extract_all = false;
50 
51 static hash_table keywords;
52 static bool default_keywords = true;
53 
54 
55 void
x_awk_extract_all()56 x_awk_extract_all ()
57 {
58   extract_all = true;
59 }
60 
61 
62 void
x_awk_keyword(const char * name)63 x_awk_keyword (const char *name)
64 {
65   if (name == NULL)
66     default_keywords = false;
67   else
68     {
69       const char *end;
70       struct callshape shape;
71       const char *colon;
72 
73       if (keywords.table == NULL)
74 	hash_init (&keywords, 100);
75 
76       split_keywordspec (name, &end, &shape);
77 
78       /* The characters between name and end should form a valid C identifier.
79 	 A colon means an invalid parse in split_keywordspec().  */
80       colon = strchr (name, ':');
81       if (colon == NULL || colon >= end)
82 	insert_keyword_callshape (&keywords, name, end - name, &shape);
83     }
84 }
85 
86 /* Finish initializing the keywords hash table.
87    Called after argument processing, before each file is processed.  */
88 static void
init_keywords()89 init_keywords ()
90 {
91   if (default_keywords)
92     {
93       /* When adding new keywords here, also update the documentation in
94 	 xgettext.texi!  */
95       x_awk_keyword ("dcgettext");
96       x_awk_keyword ("dcngettext:1,2");
97       default_keywords = false;
98     }
99 }
100 
101 void
init_flag_table_awk()102 init_flag_table_awk ()
103 {
104   xgettext_record_flag ("dcgettext:1:pass-awk-format");
105   xgettext_record_flag ("dcngettext:1:pass-awk-format");
106   xgettext_record_flag ("dcngettext:2:pass-awk-format");
107   xgettext_record_flag ("printf:1:awk-format");
108 }
109 
110 
111 /* ======================== Reading of characters.  ======================== */
112 
113 /* Real filename, used in error messages about the input file.  */
114 static const char *real_file_name;
115 
116 /* Logical filename and line number, used to label the extracted messages.  */
117 static char *logical_file_name;
118 static int line_number;
119 
120 /* The input file stream.  */
121 static FILE *fp;
122 
123 /* These are for tracking whether comments count as immediately before
124    keyword.  */
125 static int last_comment_line;
126 static int last_non_comment_line;
127 
128 
129 /* 1. line_number handling.  */
130 
131 static int
phase1_getc()132 phase1_getc ()
133 {
134   int c = getc (fp);
135 
136   if (c == EOF)
137     {
138       if (ferror (fp))
139 	error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
140 	       real_file_name);
141       return EOF;
142     }
143 
144   if (c == '\n')
145     line_number++;
146 
147   return c;
148 }
149 
150 /* Supports only one pushback character.  */
151 static void
phase1_ungetc(int c)152 phase1_ungetc (int c)
153 {
154   if (c != EOF)
155     {
156       if (c == '\n')
157 	--line_number;
158 
159       ungetc (c, fp);
160     }
161 }
162 
163 
164 /* 2. Replace each comment that is not inside a string literal or regular
165    expression with a newline character.  We need to remember the comment
166    for later, because it may be attached to a keyword string.  */
167 
168 static int
phase2_getc()169 phase2_getc ()
170 {
171   static char *buffer;
172   static size_t bufmax;
173   size_t buflen;
174   int lineno;
175   int c;
176 
177   c = phase1_getc ();
178   if (c == '#')
179     {
180       buflen = 0;
181       lineno = line_number;
182       for (;;)
183 	{
184 	  c = phase1_getc ();
185 	  if (c == '\n' || c == EOF)
186 	    break;
187 	  /* We skip all leading white space, but not EOLs.  */
188 	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
189 	    {
190 	      if (buflen >= bufmax)
191 		{
192 		  bufmax = 2 * bufmax + 10;
193 		  buffer = xrealloc (buffer, bufmax);
194 		}
195 	      buffer[buflen++] = c;
196 	    }
197 	}
198       if (buflen >= bufmax)
199 	{
200 	  bufmax = 2 * bufmax + 10;
201 	  buffer = xrealloc (buffer, bufmax);
202 	}
203       buffer[buflen] = '\0';
204       savable_comment_add (buffer);
205       last_comment_line = lineno;
206     }
207   return c;
208 }
209 
210 /* Supports only one pushback character.  */
211 static void
phase2_ungetc(int c)212 phase2_ungetc (int c)
213 {
214   if (c != EOF)
215     phase1_ungetc (c);
216 }
217 
218 
219 /* ========================== Reading of tokens.  ========================== */
220 
221 
222 enum token_type_ty
223 {
224   token_type_eof,
225   token_type_lparen,		/* ( */
226   token_type_rparen,		/* ) */
227   token_type_comma,		/* , */
228   token_type_string,		/* "abc" */
229   token_type_i18nstring,	/* _"abc" */
230   token_type_symbol,		/* symbol, number */
231   token_type_semicolon,		/* ; */
232   token_type_other		/* regexp, misc. operator */
233 };
234 typedef enum token_type_ty token_type_ty;
235 
236 typedef struct token_ty token_ty;
237 struct token_ty
238 {
239   token_type_ty type;
240   char *string;		/* for token_type_{symbol,string,i18nstring} */
241   int line_number;
242 };
243 
244 
245 /* 7. Replace escape sequences within character strings with their
246    single character equivalents.  */
247 
248 #define P7_QUOTES (1000 + '"')
249 
250 static int
phase7_getc()251 phase7_getc ()
252 {
253   int c;
254 
255   for (;;)
256     {
257       /* Use phase 1, because phase 2 elides comments.  */
258       c = phase1_getc ();
259 
260       if (c == EOF || c == '\n')
261 	break;
262       if (c == '"')
263 	return P7_QUOTES;
264       if (c != '\\')
265 	return c;
266       c = phase1_getc ();
267       if (c == EOF)
268 	break;
269       if (c != '\n')
270 	switch (c)
271 	  {
272 	  case 'a':
273 	    return '\a';
274 	  case 'b':
275 	    return '\b';
276 	  case 'f':
277 	    return '\f';
278 	  case 'n':
279 	    return '\n';
280 	  case 'r':
281 	    return '\r';
282 	  case 't':
283 	    return '\t';
284 	  case 'v':
285 	    return '\v';
286 	  case '0': case '1': case '2': case '3': case '4':
287 	  case '5': case '6': case '7':
288 	    {
289 	      int n = c - '0';
290 
291 	      c = phase1_getc ();
292 	      if (c != EOF)
293 		{
294 		  if (c >= '0' && c <= '7')
295 		    {
296 		      n = (n << 3) + (c - '0');
297 		      c = phase1_getc ();
298 		      if (c != EOF)
299 			{
300 			  if (c >= '0' && c <= '7')
301 			    n = (n << 3) + (c - '0');
302 			  else
303 			    phase1_ungetc (c);
304 			}
305 		    }
306 		  else
307 		    phase1_ungetc (c);
308 		}
309 	      return (unsigned char) n;
310 	    }
311 	  case 'x':
312 	    {
313 	      int n = 0;
314 
315 	      for (;;)
316 		{
317 		  c = phase1_getc ();
318 		  if (c == EOF)
319 		    break;
320 		  else if (c >= '0' && c <= '9')
321 		    n = (n << 4) + (c - '0');
322 		  else if (c >= 'A' && c <= 'F')
323 		    n = (n << 4) + (c - 'A' + 10);
324 		  else if (c >= 'a' && c <= 'f')
325 		    n = (n << 4) + (c - 'a' + 10);
326 		  else
327 		    {
328 		      phase1_ungetc (c);
329 		      break;
330 		    }
331 		}
332 	      return (unsigned char) n;
333 	    }
334 	  default:
335 	    return c;
336 	  }
337     }
338 
339   phase1_ungetc (c);
340   error_with_progname = false;
341   error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
342 	 line_number);
343   error_with_progname = true;
344   return P7_QUOTES;
345 }
346 
347 
348 /* Free the memory pointed to by a 'struct token_ty'.  */
349 static inline void
free_token(token_ty * tp)350 free_token (token_ty *tp)
351 {
352   switch (tp->type)
353     {
354     case token_type_string:
355     case token_type_i18nstring:
356     case token_type_symbol:
357       free (tp->string);
358       break;
359     default:
360       break;
361     }
362 }
363 
364 
365 /* Combine characters into tokens.  Discard whitespace.  */
366 
367 /* There is an ambiguity about '/': It can start a division operator ('/' or
368    '/=') or it can start a regular expression.  The distinction is important
369    because inside regular expressions, '#' and '"' lose its special meanings.
370    If you look at the awk grammar, you see that the operator is only allowed
371    right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
372    can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
373    So we prefer the division operator interpretation only right after
374    symbol, string, number, ')', ']', with whitespace but no newline allowed
375    in between.  */
376 static bool prefer_division_over_regexp;
377 
378 static void
x_awk_lex(token_ty * tp)379 x_awk_lex (token_ty *tp)
380 {
381   static char *buffer;
382   static int bufmax;
383   int bufpos;
384   int c;
385 
386   for (;;)
387     {
388       tp->line_number = line_number;
389       c = phase2_getc ();
390 
391       switch (c)
392 	{
393 	case EOF:
394 	  tp->type = token_type_eof;
395 	  return;
396 
397 	case '\n':
398 	  if (last_non_comment_line > last_comment_line)
399 	    savable_comment_reset ();
400 	  /* Newline is not allowed inside expressions.  It usually
401 	     introduces a fresh statement.
402 	     FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
403 	     does *not* introduce a fresh statement.  */
404 	  prefer_division_over_regexp = false;
405 	  /* FALLTHROUGH */
406 	case '\t':
407 	case ' ':
408 	  /* Ignore whitespace and comments.  */
409 	  continue;
410 
411 	case '\\':
412 	  /* Backslash ought to be immediately followed by a newline.  */
413 	  continue;
414 	}
415 
416       last_non_comment_line = tp->line_number;
417 
418       switch (c)
419 	{
420 	case '.':
421 	  {
422 	    int c2 = phase2_getc ();
423 	    phase2_ungetc (c2);
424 	    if (!(c2 >= '0' && c2 <= '9'))
425 	      {
426 
427 		tp->type = token_type_other;
428 		prefer_division_over_regexp = false;
429 		return;
430 	      }
431 	  }
432 	  /* FALLTHROUGH */
433 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
434 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
435 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
436 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
437 	case 'Y': case 'Z':
438 	case '_':
439 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
440 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
441 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
442 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
443 	case 'y': case 'z':
444 	case '0': case '1': case '2': case '3': case '4':
445 	case '5': case '6': case '7': case '8': case '9':
446 	  /* Symbol, or part of a number.  */
447 	  bufpos = 0;
448 	  for (;;)
449 	    {
450 	      if (bufpos >= bufmax)
451 		{
452 		  bufmax = 2 * bufmax + 10;
453 		  buffer = xrealloc (buffer, bufmax);
454 		}
455 	      buffer[bufpos++] = c;
456 	      c = phase2_getc ();
457 	      switch (c)
458 		{
459 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
460 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
461 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
462 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
463 		case 'Y': case 'Z':
464 		case '_':
465 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
466 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
467 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
468 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
469 		case 'y': case 'z':
470 		case '0': case '1': case '2': case '3': case '4':
471 		case '5': case '6': case '7': case '8': case '9':
472 		  continue;
473 		default:
474 		  if (bufpos == 1 && buffer[0] == '_' && c == '"')
475 		    {
476 		      tp->type = token_type_i18nstring;
477 		      goto case_string;
478 		    }
479 		  phase2_ungetc (c);
480 		  break;
481 		}
482 	      break;
483 	    }
484 	  if (bufpos >= bufmax)
485 	    {
486 	      bufmax = 2 * bufmax + 10;
487 	      buffer = xrealloc (buffer, bufmax);
488 	    }
489 	  buffer[bufpos] = '\0';
490 	  tp->string = xstrdup (buffer);
491 	  tp->type = token_type_symbol;
492 	  /* Most identifiers can be variable names; after them we must
493 	     interpret '/' as division operator.  But for awk's builtin
494 	     keywords we have three cases:
495 	     (a) Must interpret '/' as division operator. "length".
496 	     (b) Must interpret '/' as start of a regular expression.
497 		 "do", "exit", "print", "printf", "return".
498 	     (c) '/' after this keyword in invalid anyway. All others.
499 	     I used the following script for the distinction.
500 		for k in $awk_keywords; do
501 		  echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
502 		done
503 	   */
504 	  if (strcmp (buffer, "do") == 0
505 	      || strcmp (buffer, "exit") == 0
506 	      || strcmp (buffer, "print") == 0
507 	      || strcmp (buffer, "printf") == 0
508 	      || strcmp (buffer, "return") == 0)
509 	    prefer_division_over_regexp = false;
510 	  else
511 	    prefer_division_over_regexp = true;
512 	  return;
513 
514 	case '"':
515 	  tp->type = token_type_string;
516 	case_string:
517 	  bufpos = 0;
518 	  for (;;)
519 	    {
520 	      c = phase7_getc ();
521 	      if (c == EOF || c == P7_QUOTES)
522 		break;
523 	      if (bufpos >= bufmax)
524 		{
525 		  bufmax = 2 * bufmax + 10;
526 		  buffer = xrealloc (buffer, bufmax);
527 		}
528 	      buffer[bufpos++] = c;
529 	    }
530 	  if (bufpos >= bufmax)
531 	    {
532 	      bufmax = 2 * bufmax + 10;
533 	      buffer = xrealloc (buffer, bufmax);
534 	    }
535 	  buffer[bufpos] = '\0';
536 	  tp->string = xstrdup (buffer);
537 	  prefer_division_over_regexp = true;
538 	  return;
539 
540 	case '(':
541 	  tp->type = token_type_lparen;
542 	  prefer_division_over_regexp = false;
543 	  return;
544 
545 	case ')':
546 	  tp->type = token_type_rparen;
547 	  prefer_division_over_regexp = true;
548 	  return;
549 
550 	case ',':
551 	  tp->type = token_type_comma;
552 	  prefer_division_over_regexp = false;
553 	  return;
554 
555 	case ';':
556 	  tp->type = token_type_semicolon;
557 	  prefer_division_over_regexp = false;
558 	  return;
559 
560 	case ']':
561 	  tp->type = token_type_other;
562 	  prefer_division_over_regexp = true;
563 	  return;
564 
565 	case '/':
566 	  if (!prefer_division_over_regexp)
567 	    {
568 	      /* Regular expression.
569 	         Counting brackets is non-trivial. [[] is balanced, and so is
570 	         [\]]. Also, /[/]/ is balanced and ends at the third slash.
571 	         Do not count [ or ] if either one is preceded by a \.
572 	         A '[' should be counted if
573 	          a) it is the first one so far (brackets == 0), or
574 	          b) it is the '[' in '[:'.
575 	         A ']' should be counted if not preceded by a \.
576 	         According to POSIX, []] is how you put a ] into a set.
577 	         Try to handle that too.
578 	       */
579 	      int brackets = 0;
580 	      bool pos0 = true;		/* true at start of regexp */
581 	      bool pos1_open = false;	/* true after [ at start of regexp */
582 	      bool pos2_open_not = false; /* true after [^ at start of regexp */
583 
584 	      for (;;)
585 		{
586 		  c = phase1_getc ();
587 
588 		  if (c == EOF || c == '\n')
589 		    {
590 		      phase1_ungetc (c);
591 		      error_with_progname = false;
592 		      error (0, 0, _("%s:%d: warning: unterminated regular expression"),
593 			     logical_file_name, line_number);
594 		      error_with_progname = true;
595 		      break;
596 		    }
597 		  else if (c == '[')
598 		    {
599 		      if (brackets == 0)
600 			brackets++;
601 		      else
602 			{
603 			  c = phase1_getc ();
604 			  if (c == ':')
605 			    brackets++;
606 			  phase1_ungetc (c);
607 			}
608 		      if (pos0)
609 			{
610 			  pos0 = false;
611 			  pos1_open = true;
612 			  continue;
613 			}
614 		    }
615 		  else if (c == ']')
616 		    {
617 		      if (!(pos1_open || pos2_open_not))
618 			brackets--;
619 		    }
620 		  else if (c == '^')
621 		    {
622 		      if (pos1_open)
623 			{
624 			  pos1_open = false;
625 			  pos2_open_not = true;
626 			  continue;
627 			}
628 		    }
629 		  else if (c == '\\')
630 		    {
631 		      c = phase1_getc ();
632 		      /* Backslash-newline is valid and ignored.  */
633 		    }
634 		  else if (c == '/')
635 		    {
636 		      if (brackets <= 0)
637 			break;
638 		    }
639 
640 		  pos0 = false;
641 		  pos1_open = false;
642 		  pos2_open_not = false;
643 		}
644 
645 	      tp->type = token_type_other;
646 	      prefer_division_over_regexp = false;
647 	      return;
648 	    }
649 	  /* FALLTHROUGH */
650 
651 	default:
652 	  /* We could carefully recognize each of the 2 and 3 character
653 	     operators, but it is not necessary, as we only need to recognize
654 	     gettext invocations.  Don't bother.  */
655 	  tp->type = token_type_other;
656 	  prefer_division_over_regexp = false;
657 	  return;
658 	}
659     }
660 }
661 
662 
663 /* ========================= Extracting strings.  ========================== */
664 
665 
666 /* Context lookup table.  */
667 static flag_context_list_table_ty *flag_context_list_table;
668 
669 
670 /* The file is broken into tokens.  Scan the token stream, looking for
671    a keyword, followed by a left paren, followed by a string.  When we
672    see this sequence, we have something to remember.  We assume we are
673    looking at a valid C or C++ program, and leave the complaints about
674    the grammar to the compiler.
675 
676      Normal handling: Look for
677        keyword ( ... msgid ... )
678      Plural handling: Look for
679        keyword ( ... msgid ... msgid_plural ... )
680 
681    We use recursion because the arguments before msgid or between msgid
682    and msgid_plural can contain subexpressions of the same form.  */
683 
684 
685 /* Extract messages until the next balanced closing parenthesis.
686    Extracted messages are added to MLP.
687    Return true upon eof, false upon closing parenthesis.  */
688 static bool
extract_parenthesized(message_list_ty * mlp,flag_context_ty outer_context,flag_context_list_iterator_ty context_iter,struct arglist_parser * argparser)689 extract_parenthesized (message_list_ty *mlp,
690 		       flag_context_ty outer_context,
691 		       flag_context_list_iterator_ty context_iter,
692 		       struct arglist_parser *argparser)
693 {
694   /* Current argument number.  */
695   int arg = 1;
696   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
697   int state;
698   /* Parameters of the keyword just seen.  Defined only in state 1.  */
699   const struct callshapes *next_shapes = NULL;
700   /* Whether to implicitly assume the next tokens are arguments even without
701      a '('.  */
702   bool next_is_argument = false;
703   /* Context iterator that will be used if the next token is a '('.  */
704   flag_context_list_iterator_ty next_context_iter =
705     passthrough_context_list_iterator;
706   /* Current context.  */
707   flag_context_ty inner_context =
708     inherited_context (outer_context,
709 		       flag_context_list_iterator_advance (&context_iter));
710 
711   /* Start state is 0.  */
712   state = 0;
713 
714   for (;;)
715     {
716       token_ty token;
717 
718       x_awk_lex (&token);
719 
720       if (next_is_argument && token.type != token_type_lparen)
721 	{
722 	  /* An argument list starts, even though there is no '('.  */
723 	  context_iter = next_context_iter;
724 	  outer_context = inner_context;
725 	  inner_context =
726 	    inherited_context (outer_context,
727 			       flag_context_list_iterator_advance (
728 				 &context_iter));
729 	}
730 
731       switch (token.type)
732 	{
733 	case token_type_symbol:
734 	  {
735 	    void *keyword_value;
736 
737 	    if (hash_find_entry (&keywords, token.string, strlen (token.string),
738 				 &keyword_value)
739 		== 0)
740 	      {
741 		next_shapes = (const struct callshapes *) keyword_value;
742 		state = 1;
743 	      }
744 	    else
745 	      state = 0;
746 	  }
747 	  next_is_argument =
748 	    (strcmp (token.string, "print") == 0
749 	     || strcmp (token.string, "printf") == 0);
750 	  next_context_iter =
751 	    flag_context_list_iterator (
752 	      flag_context_list_table_lookup (
753 		flag_context_list_table,
754 		token.string, strlen (token.string)));
755 	  free (token.string);
756 	  continue;
757 
758 	case token_type_lparen:
759 	  if (extract_parenthesized (mlp, inner_context, next_context_iter,
760 				     arglist_parser_alloc (mlp,
761 							   state ? next_shapes : NULL)))
762 	    {
763 	      arglist_parser_done (argparser, arg);
764 	      return true;
765 	    }
766 	  next_is_argument = false;
767 	  next_context_iter = null_context_list_iterator;
768 	  state = 0;
769 	  continue;
770 
771 	case token_type_rparen:
772 	  arglist_parser_done (argparser, arg);
773 	  return false;
774 
775 	case token_type_comma:
776 	  arg++;
777 	  inner_context =
778 	    inherited_context (outer_context,
779 			       flag_context_list_iterator_advance (
780 				 &context_iter));
781 	  next_is_argument = false;
782 	  next_context_iter = passthrough_context_list_iterator;
783 	  state = 0;
784 	  continue;
785 
786 	case token_type_string:
787 	  {
788 	    lex_pos_ty pos;
789 	    pos.file_name = logical_file_name;
790 	    pos.line_number = token.line_number;
791 
792 	    if (extract_all)
793 	      remember_a_message (mlp, NULL, token.string, inner_context, &pos,
794 				  savable_comment);
795 	    else
796 	      arglist_parser_remember (argparser, arg, token.string,
797 				       inner_context,
798 				       pos.file_name, pos.line_number,
799 				       savable_comment);
800 	  }
801 	  next_is_argument = false;
802 	  next_context_iter = null_context_list_iterator;
803 	  state = 0;
804 	  continue;
805 
806 	case token_type_i18nstring:
807 	  {
808 	    lex_pos_ty pos;
809 	    pos.file_name = logical_file_name;
810 	    pos.line_number = token.line_number;
811 
812 	    remember_a_message (mlp, NULL, token.string, inner_context, &pos,
813 				savable_comment);
814 	  }
815 	  next_is_argument = false;
816 	  next_context_iter = null_context_list_iterator;
817 	  state = 0;
818 	  continue;
819 
820 	case token_type_semicolon:
821 	  /* An argument list ends, and a new statement begins.  */
822 	  /* FIXME: Should handle newline that acts as statement separator
823 	     in the same way.  */
824 	  /* FIXME: Instead of resetting outer_context here, it may be better
825 	     to recurse in the next_is_argument handling above, waiting for
826 	     the next semicolon or other statement terminator.  */
827 	  outer_context = null_context;
828 	  context_iter = null_context_list_iterator;
829 	  next_is_argument = false;
830 	  next_context_iter = passthrough_context_list_iterator;
831 	  inner_context =
832 	    inherited_context (outer_context,
833 			       flag_context_list_iterator_advance (
834 				 &context_iter));
835 	  state = 0;
836 	  continue;
837 
838 	case token_type_eof:
839 	  arglist_parser_done (argparser, arg);
840 	  return true;
841 
842 	case token_type_other:
843 	  next_is_argument = false;
844 	  next_context_iter = null_context_list_iterator;
845 	  state = 0;
846 	  continue;
847 
848 	default:
849 	  abort ();
850 	}
851     }
852 }
853 
854 
855 void
extract_awk(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)856 extract_awk (FILE *f,
857 	     const char *real_filename, const char *logical_filename,
858 	     flag_context_list_table_ty *flag_table,
859 	     msgdomain_list_ty *mdlp)
860 {
861   message_list_ty *mlp = mdlp->item[0]->messages;
862 
863   fp = f;
864   real_file_name = real_filename;
865   logical_file_name = xstrdup (logical_filename);
866   line_number = 1;
867 
868   last_comment_line = -1;
869   last_non_comment_line = -1;
870 
871   prefer_division_over_regexp = false;
872 
873   flag_context_list_table = flag_table;
874 
875   init_keywords ();
876 
877   /* Eat tokens until eof is seen.  When extract_parenthesized returns
878      due to an unbalanced closing parenthesis, just restart it.  */
879   while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
880 				 arglist_parser_alloc (mlp, NULL)))
881     ;
882 
883   fp = NULL;
884   real_file_name = NULL;
885   logical_file_name = NULL;
886   line_number = 0;
887 }
888