xref: /openbsd-src/gnu/usr.bin/gcc/gcc/cpptrad.c (revision dd6081ec6b253c97e7202ea58b8ccc493dc34c6c)
1 /* CPP Library - traditional lexical analysis and macro expansion.
2    Copyright (C) 2002 Free Software Foundation, Inc.
3    Contributed by Neil Booth, May 2002
4 
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
8 later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
18 
19 #include "config.h"
20 #include "system.h"
21 #include "cpplib.h"
22 #include "cpphash.h"
23 
24 /* The replacement text of a function-like macro is stored as a
25    contiguous sequence of aligned blocks, each representing the text
26    between subsequent parameters.
27 
28    Each block comprises the text between its surrounding parameters,
29    the length of that text, and the one-based index of the following
30    parameter.  The final block in the replacement text is easily
31    recognizable as it has an argument index of zero.  */
32 
33 struct block
34 {
35   unsigned int text_len;
36   unsigned short arg_index;
37   uchar text[1];
38 };
39 
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42 
43 /* Structure holding information about a function-like macro
44    invocation.  */
45 struct fun_macro
46 {
47   /* Memory buffer holding the trad_arg array.  */
48   _cpp_buff *buff;
49 
50   /* An array of size the number of macro parameters + 1, containing
51      the offsets of the start of each macro argument in the output
52      buffer.  The argument continues until the character before the
53      start of the next one.  */
54   size_t *args;
55 
56   /* The hashnode of the macro.  */
57   cpp_hashnode *node;
58 
59   /* The offset of the macro name in the output buffer.  */
60   size_t offset;
61 
62   /* The line the macro name appeared on.  */
63   unsigned int line;
64 
65   /* Zero-based index of argument being currently lexed.  */
66   unsigned int argc;
67 };
68 
69 /* Lexing state.  It is mostly used to prevent macro expansion.  */
70 enum ls {ls_none = 0,		/* Normal state.  */
71 	 ls_fun_open,		/* When looking for '('.  */
72 	 ls_fun_close,		/* When looking for ')'.  */
73 	 ls_defined,		/* After defined.  */
74 	 ls_defined_close,	/* Looking for ')' of defined().  */
75 	 ls_hash,		/* After # in preprocessor conditional.  */
76 	 ls_predicate,		/* After the predicate, maybe paren?  */
77 	 ls_answer};		/* In answer to predicate.  */
78 
79 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop cpplex.c
80    from recognizing comments and directives during its lexing pass.  */
81 
82 static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
83 static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
84 						   const uchar *));
85 static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *,
86 					     int));
87 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
88 static const uchar *copy_comment PARAMS ((cpp_reader *, const uchar *, int));
89 static void scan_out_logical_line PARAMS ((cpp_reader *pfile, cpp_macro *));
90 static void check_output_buffer PARAMS ((cpp_reader *, size_t));
91 static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
92 static bool scan_parameters PARAMS ((cpp_reader *, cpp_macro *));
93 static bool recursive_macro PARAMS ((cpp_reader *, cpp_hashnode *));
94 static void save_replacement_text PARAMS ((cpp_reader *, cpp_macro *,
95 					   unsigned int));
96 static void maybe_start_funlike PARAMS ((cpp_reader *, cpp_hashnode *,
97 					 const uchar *, struct fun_macro *));
98 static void save_argument PARAMS ((struct fun_macro *, size_t));
99 static void replace_args_and_push PARAMS ((cpp_reader *, struct fun_macro *));
100 static size_t canonicalize_text PARAMS ((uchar *, const uchar *, size_t,
101 					 uchar *));
102 
103 /* Ensures we have N bytes' space in the output buffer, and
104    reallocates it if not.  */
105 static void
check_output_buffer(pfile,n)106 check_output_buffer (pfile, n)
107      cpp_reader *pfile;
108      size_t n;
109 {
110   /* We might need two bytes to terminate an unterminated comment, and
111      one more to terminate the line with a NUL.  */
112   n += 2 + 1;
113 
114   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
115     {
116       size_t size = pfile->out.cur - pfile->out.base;
117       size_t new_size = (size + n) * 3 / 2;
118 
119       pfile->out.base
120 	= (uchar *) xrealloc (pfile->out.base, new_size);
121       pfile->out.limit = pfile->out.base + new_size;
122       pfile->out.cur = pfile->out.base + size;
123     }
124 }
125 
126 /* To be called whenever a newline character is encountered in the
127    input file, at CUR.  Handles DOS, Mac and Unix ends of line, and
128    increments pfile->line.
129 
130    Returns a pointer the character after the newline sequence.  */
131 static const uchar *
handle_newline(pfile,cur)132 handle_newline (pfile, cur)
133      cpp_reader *pfile;
134      const uchar *cur;
135 {
136   pfile->line++;
137   if (cur[0] + cur[1] == '\r' + '\n')
138     cur++;
139   return cur + 1;
140 }
141 
142 /* CUR points to any character in the current context, not necessarily
143    a backslash.  Advances CUR until all escaped newlines are skipped,
144    and returns the new position without updating the context.
145 
146    Warns if a file buffer ends in an escaped newline.  */
147 static const uchar *
skip_escaped_newlines(pfile,cur)148 skip_escaped_newlines (pfile, cur)
149      cpp_reader *pfile;
150      const uchar *cur;
151 {
152   const uchar *orig_cur = cur;
153 
154   while (*cur == '\\' && is_vspace (cur[1]))
155     cur = handle_newline (pfile, cur + 1);
156 
157   if (cur != orig_cur && cur == RLIMIT (pfile->context) && pfile->buffer->inc)
158     cpp_error (pfile, DL_PEDWARN, "backslash-newline at end of file");
159 
160   return cur;
161 }
162 
163 /* CUR points to the asterisk introducing a comment in the current
164    context.  IN_DEFINE is true if we are in the replacement text of a
165    macro.
166 
167    The asterisk and following comment is copied to the buffer pointed
168    to by pfile->out.cur, which must be of sufficient size.
169    Unterminated comments are diagnosed, and correctly terminated in
170    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
171    -C, -CC and pfile->state.in_directive.
172 
173    Returns a pointer to the first character after the comment in the
174    input buffer.  */
175 static const uchar *
copy_comment(pfile,cur,in_define)176 copy_comment (pfile, cur, in_define)
177      cpp_reader *pfile;
178      const uchar *cur;
179      int in_define;
180 {
181   unsigned int from_line = pfile->line;
182   const uchar *limit = RLIMIT (pfile->context);
183   uchar *out = pfile->out.cur;
184 
185   do
186     {
187       unsigned int c = *cur++;
188       *out++ = c;
189 
190       if (c == '/')
191 	{
192 	  /* An immediate slash does not terminate the comment.  */
193 	  if (out[-2] == '*' && out - 2 > pfile->out.cur)
194 	    goto done;
195 
196 	  if (*cur == '*' && cur[1] != '/'
197 	      && CPP_OPTION (pfile, warn_comments))
198 	    cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
199 				 "\"/*\" within comment");
200 	}
201       else if (is_vspace (c))
202 	{
203 	  cur = handle_newline (pfile, cur - 1);
204 	  /* Canonicalize newline sequences and skip escaped ones.  */
205 	  if (out[-2] == '\\')
206 	    out -= 2;
207 	  else
208 	    out[-1] = '\n';
209 	}
210     }
211   while (cur < limit);
212 
213   cpp_error_with_line (pfile, DL_ERROR, from_line, 0, "unterminated comment");
214   *out++ = '*';
215   *out++ = '/';
216 
217  done:
218   /* Comments in directives become spaces so that tokens are properly
219      separated when the ISO preprocessor re-lexes the line.  The
220      exception is #define.  */
221   if (pfile->state.in_directive)
222     {
223       if (in_define)
224 	{
225 	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
226 	    pfile->out.cur--;
227 	  else
228 	    pfile->out.cur = out;
229 	}
230       else
231 	pfile->out.cur[-1] = ' ';
232     }
233   else if (CPP_OPTION (pfile, discard_comments))
234     pfile->out.cur--;
235   else
236     pfile->out.cur = out;
237 
238   return cur;
239 }
240 
241 /* CUR points to any character in the input buffer.  Skips over all
242    contiguous horizontal white space and NULs, including comments if
243    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
244    character or the end of the current context.  Escaped newlines are
245    removed.
246 
247    The whitespace is copied verbatim to the output buffer, except that
248    comments are handled as described in copy_comment().
249    pfile->out.cur is updated.
250 
251    Returns a pointer to the first character after the whitespace in
252    the input buffer.  */
253 static const uchar *
skip_whitespace(pfile,cur,skip_comments)254 skip_whitespace (pfile, cur, skip_comments)
255      cpp_reader *pfile;
256      const uchar *cur;
257      int skip_comments;
258 {
259   uchar *out = pfile->out.cur;
260 
261   for (;;)
262     {
263       unsigned int c = *cur++;
264       *out++ = c;
265 
266       if (is_nvspace (c) && c)
267 	continue;
268 
269       if (!c && cur - 1 != RLIMIT (pfile->context))
270 	continue;
271 
272       if (c == '/' && skip_comments)
273 	{
274 	  const uchar *tmp = skip_escaped_newlines (pfile, cur);
275 	  if (*tmp == '*')
276 	    {
277 	      pfile->out.cur = out;
278 	      cur = copy_comment (pfile, tmp, false /* in_define */);
279 	      out = pfile->out.cur;
280 	      continue;
281 	    }
282 	}
283 
284       out--;
285       if (c == '\\' && is_vspace (*cur))
286 	{
287 	  cur = skip_escaped_newlines (pfile, cur - 1);
288 	  continue;
289 	}
290 
291       break;
292     }
293 
294   pfile->out.cur = out;
295   return cur - 1;
296 }
297 
298 /* Lexes and outputs an identifier starting at CUR, which is assumed
299    to point to a valid first character of an identifier.  Returns
300    the hashnode, and updates out.cur.  */
301 static cpp_hashnode *
lex_identifier(pfile,cur)302 lex_identifier (pfile, cur)
303      cpp_reader *pfile;
304      const uchar *cur;
305 {
306   size_t len;
307   uchar *out = pfile->out.cur;
308   cpp_hashnode *result;
309 
310   do
311     {
312       do
313 	*out++ = *cur++;
314       while (is_numchar (*cur));
315       cur = skip_escaped_newlines (pfile, cur);
316     }
317   while (is_numchar (*cur));
318 
319   CUR (pfile->context) = cur;
320   len = out - pfile->out.cur;
321   result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->out.cur,
322 				       len, HT_ALLOC);
323   pfile->out.cur = out;
324   return result;
325 }
326 
327 /* Overlays the true file buffer temporarily with text of length LEN
328    starting at START.  The true buffer is restored upon calling
329    restore_buff().  */
330 void
_cpp_overlay_buffer(pfile,start,len)331 _cpp_overlay_buffer (pfile, start, len)
332      cpp_reader *pfile;
333      const uchar *start;
334      size_t len;
335 {
336   cpp_buffer *buffer = pfile->buffer;
337 
338   pfile->overlaid_buffer = buffer;
339   buffer->saved_cur = buffer->cur;
340   buffer->saved_rlimit = buffer->rlimit;
341 
342   buffer->cur = start;
343   buffer->rlimit = start + len;
344 
345   pfile->saved_line = pfile->line;
346 }
347 
348 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
349 void
_cpp_remove_overlay(pfile)350 _cpp_remove_overlay (pfile)
351      cpp_reader *pfile;
352 {
353   cpp_buffer *buffer = pfile->overlaid_buffer;
354 
355   buffer->cur = buffer->saved_cur;
356   buffer->rlimit = buffer->saved_rlimit;
357 
358   pfile->line = pfile->saved_line;
359 }
360 
361 /* Reads a logical line into the output buffer.  Returns TRUE if there
362    is more text left in the buffer.  */
363 bool
_cpp_read_logical_line_trad(pfile)364 _cpp_read_logical_line_trad (pfile)
365      cpp_reader *pfile;
366 {
367   do
368     {
369       if (pfile->buffer->cur == pfile->buffer->rlimit)
370 	{
371 	  bool stop = true;
372 
373 	  /* Don't pop the last buffer.  */
374 	  if (pfile->buffer->prev)
375 	    {
376 	      stop = pfile->buffer->return_at_eof;
377 	      _cpp_pop_buffer (pfile);
378 	    }
379 
380 	  if (stop)
381 	    return false;
382 	}
383 
384       scan_out_logical_line (pfile, NULL);
385     }
386   while (pfile->state.skipping);
387 
388   return true;
389 }
390 
391 /* Set up state for finding the opening '(' of a function-like
392    macro.  */
393 static void
maybe_start_funlike(pfile,node,start,macro)394 maybe_start_funlike (pfile, node, start, macro)
395      cpp_reader *pfile;
396      cpp_hashnode *node;
397      const uchar *start;
398      struct fun_macro *macro;
399 {
400   unsigned int n = node->value.macro->paramc + 1;
401 
402   if (macro->buff)
403     _cpp_release_buff (pfile, macro->buff);
404   macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
405   macro->args = (size_t *) BUFF_FRONT (macro->buff);
406   macro->node = node;
407   macro->offset = start - pfile->out.base;
408   macro->argc = 0;
409 }
410 
411 /* Save the OFFSET of the start of the next argument to MACRO.  */
412 static void
save_argument(macro,offset)413 save_argument (macro, offset)
414      struct fun_macro *macro;
415      size_t offset;
416 {
417   macro->argc++;
418   if (macro->argc <= macro->node->value.macro->paramc)
419     macro->args[macro->argc] = offset;
420 }
421 
422 /* Copies the next logical line in the current buffer (starting at
423    buffer->cur) to the output buffer.  The output is guaranteed to
424    terminate with a NUL character.  buffer->cur is updated.
425 
426    If MACRO is non-NULL, then we are scanning the replacement list of
427    MACRO, and we call save_replacement_text() every time we meet an
428    argument.  */
429 static void
scan_out_logical_line(pfile,macro)430 scan_out_logical_line (pfile, macro)
431      cpp_reader *pfile;
432      cpp_macro *macro;
433 {
434   cpp_context *context;
435   const uchar *cur;
436   uchar *out;
437   struct fun_macro fmacro;
438   unsigned int c, paren_depth = 0, quote;
439   enum ls lex_state = ls_none;
440   bool header_ok;
441   const uchar *start_of_input_line;
442 
443   fmacro.buff = NULL;
444 
445  start_logical_line:
446   quote = 0;
447   header_ok = pfile->state.angled_headers;
448   CUR (pfile->context) = pfile->buffer->cur;
449   RLIMIT (pfile->context) = pfile->buffer->rlimit;
450   pfile->out.cur = pfile->out.base;
451   pfile->out.first_line = pfile->line;
452   /* start_of_input_line is needed to make sure that directives really,
453      really start at the first character of the line. */
454   start_of_input_line = pfile->buffer->cur;
455  new_context:
456   context = pfile->context;
457   cur = CUR (context);
458   check_output_buffer (pfile, RLIMIT (context) - cur);
459   out = pfile->out.cur;
460 
461   for (;;)
462     {
463       c = *cur++;
464       *out++ = c;
465 
466       /* Whitespace should "continue" out of the switch,
467 	 non-whitespace should "break" out of it.  */
468       switch (c)
469 	{
470 	case ' ':
471 	case '\t':
472 	case '\f':
473 	case '\v':
474 	  continue;
475 
476 	case '\0':
477 	  if (cur - 1 != RLIMIT (context))
478 	    continue;
479 
480 	  /* If this is a macro's expansion, pop it.  */
481 	  if (context->prev)
482 	    {
483 	      pfile->out.cur = out - 1;
484 	      _cpp_pop_context (pfile);
485 	      goto new_context;
486 	    }
487 
488 	  /* Premature end of file.  Fake a new line.  */
489 	  cur--;
490 	  pfile->line++;
491 	  goto done;
492 
493 	case '\r': case '\n':
494 	  cur = handle_newline (pfile, cur - 1);
495 	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
496 	      && !pfile->state.in_directive)
497 	    {
498 	      /* Newlines in arguments become a space, but we don't
499 		 clear any in-progress quote.  */
500 	      if (lex_state == ls_fun_close)
501 		out[-1] = ' ';
502 	      continue;
503 	    }
504 	  goto done;
505 
506 	case '<':
507 	  if (header_ok)
508 	    quote = '>';
509 	  break;
510 	case '>':
511 	  if (c == quote)
512 	    quote = 0;
513 	  break;
514 
515 	case '"':
516 	case '\'':
517 	  if (c == quote)
518 	    quote = 0;
519 	  else if (!quote)
520 	    quote = c;
521 	  break;
522 
523 	case '\\':
524 	  if (is_vspace (*cur))
525 	    {
526 	      out--;
527 	      cur = skip_escaped_newlines (pfile, cur - 1);
528 	      continue;
529 	    }
530 	  else
531 	    {
532 	      /* Skip escaped quotes here, it's easier than above, but
533 		 take care to first skip escaped newlines.  */
534 	      cur = skip_escaped_newlines (pfile, cur);
535 	      if (*cur == '\\' || *cur == '"' || *cur == '\'')
536 		*out++ = *cur++;
537 	    }
538 	  break;
539 
540 	case '/':
541 	  /* Traditional CPP does not recognize comments within
542 	     literals.  */
543 	  if (!quote)
544 	    {
545 	      cur = skip_escaped_newlines (pfile, cur);
546 	      if (*cur == '*')
547 		{
548 		  pfile->out.cur = out;
549 		  cur = copy_comment (pfile, cur, macro != 0);
550 		  out = pfile->out.cur;
551 		  continue;
552 		}
553 	    }
554 	  break;
555 
556 	case '_':
557 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
558 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
559 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
560 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
561 	case 'y': case 'z':
562 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
563 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
564 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
565 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
566 	case 'Y': case 'Z':
567 	  if (!pfile->state.skipping && (quote == 0 || macro))
568 	    {
569 	      cpp_hashnode *node;
570 	      uchar *out_start = out - 1;
571 
572 	      pfile->out.cur = out_start;
573 	      node = lex_identifier (pfile, cur - 1);
574 	      out = pfile->out.cur;
575 	      cur = CUR (context);
576 
577 	      if (node->type == NT_MACRO
578 		  /* Should we expand for ls_answer?  */
579 		  && (lex_state == ls_none || lex_state == ls_fun_open)
580 		  && !pfile->state.prevent_expansion)
581 		{
582 		  /* Macros invalidate MI optimization.  */
583 		  pfile->mi_valid = false;
584 		  if (! (node->flags & NODE_BUILTIN)
585 		      && node->value.macro->fun_like)
586 		    {
587 		      maybe_start_funlike (pfile, node, out_start, &fmacro);
588 		      lex_state = ls_fun_open;
589 		      fmacro.line = pfile->line;
590 		      continue;
591 		    }
592 		  else if (!recursive_macro (pfile, node))
593 		    {
594 		      /* Remove the object-like macro's name from the
595 			 output, and push its replacement text.  */
596 		      pfile->out.cur = out_start;
597 		      push_replacement_text (pfile, node);
598 		      lex_state = ls_none;
599 		      goto new_context;
600 		    }
601 		}
602 	      else if (macro && node->arg_index)
603 		{
604 		  /* Found a parameter in the replacement text of a
605 		     #define.  Remove its name from the output.  */
606 		  pfile->out.cur = out_start;
607 		  save_replacement_text (pfile, macro, node->arg_index);
608 		  out = pfile->out.base;
609 		}
610 	      else if (lex_state == ls_hash)
611 		{
612 		  lex_state = ls_predicate;
613 		  continue;
614 		}
615 	      else if (pfile->state.in_expression
616 		       && node == pfile->spec_nodes.n_defined)
617 		{
618 		  lex_state = ls_defined;
619 		  continue;
620 		}
621 	    }
622 	  break;
623 
624 	case '(':
625 	  if (quote == 0)
626 	    {
627 	      paren_depth++;
628 	      if (lex_state == ls_fun_open)
629 		{
630 		  if (recursive_macro (pfile, fmacro.node))
631 		    lex_state = ls_none;
632 		  else
633 		    {
634 		      lex_state = ls_fun_close;
635 		      paren_depth = 1;
636 		      out = pfile->out.base + fmacro.offset;
637 		      fmacro.args[0] = fmacro.offset;
638 		    }
639 		}
640 	      else if (lex_state == ls_predicate)
641 		lex_state = ls_answer;
642 	      else if (lex_state == ls_defined)
643 		lex_state = ls_defined_close;
644 	    }
645 	  break;
646 
647 	case ',':
648 	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
649 	    save_argument (&fmacro, out - pfile->out.base);
650 	  break;
651 
652 	case ')':
653 	  if (quote == 0)
654 	    {
655 	      paren_depth--;
656 	      if (lex_state == ls_fun_close && paren_depth == 0)
657 		{
658 		  cpp_macro *m = fmacro.node->value.macro;
659 
660 		  m->used = 1;
661 		  lex_state = ls_none;
662 		  save_argument (&fmacro, out - pfile->out.base);
663 
664 		  /* A single zero-length argument is no argument.  */
665 		  if (fmacro.argc == 1
666 		      && m->paramc == 0
667 		      && out == pfile->out.base + fmacro.offset + 1)
668 		    fmacro.argc = 0;
669 
670 		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
671 		    {
672 		      /* Remove the macro's invocation from the
673 			 output, and push its replacement text.  */
674 		      pfile->out.cur = (pfile->out.base
675 					     + fmacro.offset);
676 		      CUR (context) = cur;
677 		      replace_args_and_push (pfile, &fmacro);
678 		      goto new_context;
679 		    }
680 		}
681 	      else if (lex_state == ls_answer || lex_state == ls_defined_close)
682 		lex_state = ls_none;
683 	    }
684 	  break;
685 
686 	case '#':
687 	  if (cur - 1 == start_of_input_line
688 	      /* A '#' from a macro doesn't start a directive.  */
689 	      && !pfile->context->prev
690 	      && !pfile->state.in_directive)
691 	    {
692 	      /* A directive.  With the way _cpp_handle_directive
693 		 currently works, we only want to call it if either we
694 		 know the directive is OK, or we want it to fail and
695 		 be removed from the output.  If we want it to be
696 		 passed through (the assembler case) then we must not
697 		 call _cpp_handle_directive.  */
698 	      pfile->out.cur = out;
699 	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
700 	      out = pfile->out.cur;
701 
702 	      if (is_vspace (*cur))
703 		{
704 		  /* Null directive.  Ignore it and don't invalidate
705 		     the MI optimization.  */
706 		  out = pfile->out.base;
707 		  continue;
708 		}
709 	      else
710 		{
711 		  bool do_it = false;
712 
713 		  if (is_numstart (*cur)
714 		      && CPP_OPTION (pfile, lang) != CLK_ASM)
715 		    do_it = true;
716 		  else if (is_idstart (*cur))
717 		    /* Check whether we know this directive, but don't
718 		       advance.  */
719 		    do_it = lex_identifier (pfile, cur)->directive_index != 0;
720 
721 		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
722 		    {
723 		      /* This is a kludge.  We want to have the ISO
724 			 preprocessor lex the next token.  */
725 		      pfile->buffer->cur = cur;
726 		      _cpp_handle_directive (pfile, false /* indented */);
727 		      /* #include changes pfile->buffer so we need to
728 			 update the limits of the current context.  */
729 		      goto start_logical_line;
730 		    }
731 		}
732 	    }
733 
734 	  if (pfile->state.in_expression)
735 	    {
736 	      lex_state = ls_hash;
737 	      continue;
738 	    }
739 	  break;
740 
741 	default:
742 	  break;
743 	}
744 
745       /* Non-whitespace disables MI optimization and stops treating
746 	 '<' as a quote in #include.  */
747       header_ok = false;
748       if (!pfile->state.in_directive)
749 	pfile->mi_valid = false;
750 
751       if (lex_state == ls_none)
752 	continue;
753 
754       /* Some of these transitions of state are syntax errors.  The
755 	 ISO preprocessor will issue errors later.  */
756       if (lex_state == ls_fun_open)
757 	/* Missing '('.  */
758 	lex_state = ls_none;
759       else if (lex_state == ls_hash
760 	       || lex_state == ls_predicate
761 	       || lex_state == ls_defined)
762 	lex_state = ls_none;
763 
764       /* ls_answer and ls_defined_close keep going until ')'.  */
765     }
766 
767  done:
768   out[-1] = '\0';
769   pfile->buffer->cur = cur;
770   pfile->out.cur = out - 1;
771   if (fmacro.buff)
772     _cpp_release_buff (pfile, fmacro.buff);
773 
774   if (lex_state == ls_fun_close)
775     cpp_error_with_line (pfile, DL_ERROR, fmacro.line, 0,
776 			 "unterminated argument list invoking macro \"%s\"",
777 			 NODE_NAME (fmacro.node));
778 }
779 
780 /* Push a context holding the replacement text of the macro NODE on
781    the context stack.  NODE is either object-like, or a function-like
782    macro with no arguments.  */
783 static void
push_replacement_text(pfile,node)784 push_replacement_text (pfile, node)
785      cpp_reader *pfile;
786      cpp_hashnode *node;
787 {
788   size_t len;
789   const uchar *text;
790 
791   if (node->flags & NODE_BUILTIN)
792     {
793       text = _cpp_builtin_macro_text (pfile, node);
794       len = ustrlen (text);
795     }
796   else
797     {
798       cpp_macro *macro = node->value.macro;
799       macro->used = 1;
800       text = macro->exp.text;
801       len = macro->count;
802     }
803 
804   _cpp_push_text_context (pfile, node, text, len);
805 }
806 
807 /* Returns TRUE if traditional macro recursion is detected.  */
808 static bool
recursive_macro(pfile,node)809 recursive_macro (pfile, node)
810      cpp_reader *pfile;
811      cpp_hashnode *node;
812 {
813   bool recursing = !!(node->flags & NODE_DISABLED);
814 
815   /* Object-like macros that are already expanding are necessarily
816      recursive.
817 
818      However, it is possible to have traditional function-like macros
819      that are not infinitely recursive but recurse to any given depth.
820      Further, it is easy to construct examples that get ever longer
821      until the point they stop recursing.  So there is no easy way to
822      detect true recursion; instead we assume any expansion more than
823      20 deep since the first invocation of this macro must be
824      recursing.  */
825   if (recursing && node->value.macro->fun_like)
826     {
827       size_t depth = 0;
828       cpp_context *context = pfile->context;
829 
830       do
831 	{
832 	  depth++;
833 	  if (context->macro == node && depth > 20)
834 	    break;
835 	  context = context->prev;
836 	}
837       while (context);
838       recursing = context != NULL;
839     }
840 
841   if (recursing)
842     cpp_error (pfile, DL_ERROR,
843 	       "detected recursion whilst expanding macro \"%s\"",
844 	       NODE_NAME (node));
845 
846   return recursing;
847 }
848 
849 /* Return the length of the replacement text of a function-like or
850    object-like non-builtin macro.  */
851 size_t
_cpp_replacement_text_len(macro)852 _cpp_replacement_text_len (macro)
853      const cpp_macro *macro;
854 {
855   size_t len;
856 
857   if (macro->fun_like && (macro->paramc != 0))
858     {
859       const uchar *exp;
860 
861       len = 0;
862       for (exp = macro->exp.text;;)
863 	{
864 	  struct block *b = (struct block *) exp;
865 
866 	  len += b->text_len;
867 	  if (b->arg_index == 0)
868 	    break;
869 	  len += NODE_LEN (macro->params[b->arg_index - 1]);
870 	  exp += BLOCK_LEN (b->text_len);
871 	}
872     }
873   else
874     len = macro->count;
875 
876   return len;
877 }
878 
879 /* Copy the replacement text of MACRO to DEST, which must be of
880    sufficient size.  It is not NUL-terminated.  The next character is
881    returned.  */
882 uchar *
_cpp_copy_replacement_text(macro,dest)883 _cpp_copy_replacement_text (macro, dest)
884      const cpp_macro *macro;
885      uchar *dest;
886 {
887   if (macro->fun_like && (macro->paramc != 0))
888     {
889       const uchar *exp;
890 
891       for (exp = macro->exp.text;;)
892 	{
893 	  struct block *b = (struct block *) exp;
894 	  cpp_hashnode *param;
895 
896 	  memcpy (dest, b->text, b->text_len);
897 	  dest += b->text_len;
898 	  if (b->arg_index == 0)
899 	    break;
900 	  param = macro->params[b->arg_index - 1];
901 	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
902 	  dest += NODE_LEN (param);
903 	  exp += BLOCK_LEN (b->text_len);
904 	}
905     }
906   else
907     {
908       memcpy (dest, macro->exp.text, macro->count);
909       dest += macro->count;
910     }
911 
912   return dest;
913 }
914 
915 /* Push a context holding the replacement text of the macro NODE on
916    the context stack.  NODE is either object-like, or a function-like
917    macro with no arguments.  */
918 static void
replace_args_and_push(pfile,fmacro)919 replace_args_and_push (pfile, fmacro)
920      cpp_reader *pfile;
921      struct fun_macro *fmacro;
922 {
923   cpp_macro *macro = fmacro->node->value.macro;
924 
925   if (macro->paramc == 0)
926     push_replacement_text (pfile, fmacro->node);
927   else
928     {
929       const uchar *exp;
930       uchar *p;
931       _cpp_buff *buff;
932       size_t len = 0;
933 
934       /* Calculate the length of the argument-replaced text.  */
935       for (exp = macro->exp.text;;)
936 	{
937 	  struct block *b = (struct block *) exp;
938 
939 	  len += b->text_len;
940 	  if (b->arg_index == 0)
941 	    break;
942 	  len += (fmacro->args[b->arg_index]
943 		  - fmacro->args[b->arg_index - 1] - 1);
944 	  exp += BLOCK_LEN (b->text_len);
945 	}
946 
947       /* Allocate room for the expansion plus NUL.  */
948       buff = _cpp_get_buff (pfile, len + 1);
949 
950       /* Copy the expansion and replace arguments.  */
951       p = BUFF_FRONT (buff);
952       for (exp = macro->exp.text;;)
953 	{
954 	  struct block *b = (struct block *) exp;
955 	  size_t arglen;
956 
957 	  memcpy (p, b->text, b->text_len);
958 	  p += b->text_len;
959 	  if (b->arg_index == 0)
960 	    break;
961 	  arglen = (fmacro->args[b->arg_index]
962 		    - fmacro->args[b->arg_index - 1] - 1);
963 	  memcpy (p, pfile->out.base + fmacro->args[b->arg_index - 1],
964 		  arglen);
965 	  p += arglen;
966 	  exp += BLOCK_LEN (b->text_len);
967 	}
968 
969       /* NUL-terminate.  */
970       *p = '\0';
971       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
972 
973       /* So we free buffer allocation when macro is left.  */
974       pfile->context->buff = buff;
975     }
976 }
977 
978 /* Read and record the parameters, if any, of a function-like macro
979    definition.  Destroys pfile->out.cur.
980 
981    Returns true on success, false on failure (syntax error or a
982    duplicate parameter).  On success, CUR (pfile->context) is just
983    past the closing parenthesis.  */
984 static bool
scan_parameters(pfile,macro)985 scan_parameters (pfile, macro)
986      cpp_reader *pfile;
987      cpp_macro *macro;
988 {
989   const uchar *cur = CUR (pfile->context) + 1;
990   bool ok;
991 
992   for (;;)
993     {
994       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
995 
996       if (is_idstart (*cur))
997 	{
998 	  ok = false;
999 	  if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
1000 	    break;
1001 	  cur = skip_whitespace (pfile, CUR (pfile->context),
1002 				 true /* skip_comments */);
1003 	  if (*cur == ',')
1004 	    {
1005 	      cur++;
1006 	      continue;
1007 	    }
1008 	  ok = (*cur == ')');
1009 	  break;
1010 	}
1011 
1012       ok = (*cur == ')' && macro->paramc == 0);
1013       break;
1014     }
1015 
1016   CUR (pfile->context) = cur + (*cur == ')');
1017 
1018   return ok;
1019 }
1020 
1021 /* Save the text from pfile->out.base to pfile->out.cur as
1022    the replacement text for the current macro, followed by argument
1023    ARG_INDEX, with zero indicating the end of the replacement
1024    text.  */
1025 static void
save_replacement_text(pfile,macro,arg_index)1026 save_replacement_text (pfile, macro, arg_index)
1027      cpp_reader *pfile;
1028      cpp_macro *macro;
1029      unsigned int arg_index;
1030 {
1031   size_t len = pfile->out.cur - pfile->out.base;
1032   uchar *exp;
1033 
1034   if (macro->paramc == 0)
1035     {
1036       /* Object-like and function-like macros without parameters
1037 	 simply store their NUL-terminated replacement text.  */
1038       exp = _cpp_unaligned_alloc (pfile, len + 1);
1039       memcpy (exp, pfile->out.base, len);
1040       exp[len] = '\0';
1041       macro->exp.text = exp;
1042       macro->count = len;
1043     }
1044   else
1045     {
1046       /* Store the text's length (unsigned int), the argument index
1047 	 (unsigned short, base 1) and then the text.  */
1048       size_t blen = BLOCK_LEN (len);
1049       struct block *block;
1050 
1051       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1052 	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1053 
1054       exp = BUFF_FRONT (pfile->a_buff);
1055       block = (struct block *) (exp + macro->count);
1056       macro->exp.text = exp;
1057 
1058       /* Write out the block information.  */
1059       block->text_len = len;
1060       block->arg_index = arg_index;
1061       memcpy (block->text, pfile->out.base, len);
1062 
1063       /* Lex the rest into the start of the output buffer.  */
1064       pfile->out.cur = pfile->out.base;
1065 
1066       macro->count += blen;
1067 
1068       /* If we've finished, commit the memory.  */
1069       if (arg_index == 0)
1070 	BUFF_FRONT (pfile->a_buff) += macro->count;
1071     }
1072 }
1073 
1074 /* Analyze and save the replacement text of a macro.  Returns true on
1075    success.  */
1076 bool
_cpp_create_trad_definition(pfile,macro)1077 _cpp_create_trad_definition (pfile, macro)
1078      cpp_reader *pfile;
1079      cpp_macro *macro;
1080 {
1081   const uchar *cur;
1082   uchar *limit;
1083   cpp_context *context = pfile->context;
1084 
1085   /* The context has not been set up for command line defines, and CUR
1086      has not been updated for the macro name for in-file defines.  */
1087   pfile->out.cur = pfile->out.base;
1088   CUR (context) = pfile->buffer->cur;
1089   RLIMIT (context) = pfile->buffer->rlimit;
1090   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1091 
1092   /* Is this a function-like macro?  */
1093   if (* CUR (context) == '(')
1094     {
1095       /* Setting macro to NULL indicates an error occurred, and
1096 	 prevents unnecessary work in scan_out_logical_line.  */
1097       if (!scan_parameters (pfile, macro))
1098 	macro = NULL;
1099       else
1100 	{
1101 	  /* Success.  Commit the parameter array.  */
1102 	  macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
1103 	  BUFF_FRONT (pfile->a_buff) = (uchar *) &macro->params[macro->paramc];
1104 	  macro->fun_like = 1;
1105 	}
1106     }
1107 
1108   /* Skip leading whitespace in the replacement text.  */
1109   pfile->buffer->cur
1110     = skip_whitespace (pfile, CUR (context),
1111 		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1112 
1113   pfile->state.prevent_expansion++;
1114   scan_out_logical_line (pfile, macro);
1115   pfile->state.prevent_expansion--;
1116 
1117   if (!macro)
1118     return false;
1119 
1120   /* Skip trailing white space.  */
1121   cur = pfile->out.base;
1122   limit = pfile->out.cur;
1123   while (limit > cur && is_space (limit[-1]))
1124     limit--;
1125   pfile->out.cur = limit;
1126   save_replacement_text (pfile, macro, 0);
1127 
1128   return true;
1129 }
1130 
1131 /* Copy SRC of length LEN to DEST, but convert all contiguous
1132    whitespace to a single space, provided it is not in quotes.  The
1133    quote currently in effect is pointed to by PQUOTE, and is updated
1134    by the function.  Returns the number of bytes copied.  */
1135 static size_t
canonicalize_text(dest,src,len,pquote)1136 canonicalize_text (dest, src, len, pquote)
1137      uchar *dest;
1138      const uchar *src;
1139      size_t len;
1140      uchar *pquote;
1141 {
1142   uchar *orig_dest = dest;
1143   uchar quote = *pquote;
1144 
1145   while (len)
1146     {
1147       if (is_space (*src) && !quote)
1148 	{
1149 	  do
1150 	    src++, len--;
1151 	  while (len && is_space (*src));
1152 	  *dest++ = ' ';
1153 	}
1154       else
1155 	{
1156 	  if (*src == '\'' || *src == '"')
1157 	    {
1158 	      if (!quote)
1159 		quote = *src;
1160 	      else if (quote == *src)
1161 		quote = 0;
1162 	    }
1163 	  *dest++ = *src++, len--;
1164 	}
1165     }
1166 
1167   *pquote = quote;
1168   return dest - orig_dest;
1169 }
1170 
1171 /* Returns true if MACRO1 and MACRO2 have expansions different other
1172    than in the form of their whitespace.  */
1173 bool
_cpp_expansions_different_trad(macro1,macro2)1174 _cpp_expansions_different_trad (macro1, macro2)
1175      const cpp_macro *macro1, *macro2;
1176 {
1177   uchar *p1 = xmalloc (macro1->count + macro2->count);
1178   uchar *p2 = p1 + macro1->count;
1179   uchar quote1 = 0, quote2 = 0;
1180   bool mismatch;
1181   size_t len1, len2;
1182 
1183   if (macro1->paramc > 0)
1184     {
1185       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1186 
1187       mismatch = true;
1188       for (;;)
1189 	{
1190 	  struct block *b1 = (struct block *) exp1;
1191 	  struct block *b2 = (struct block *) exp2;
1192 
1193 	  if (b1->arg_index != b2->arg_index)
1194 	    break;
1195 
1196 	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1197 	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1198 	  if (len1 != len2 || memcmp (p1, p2, len1))
1199 	    break;
1200 	  if (b1->arg_index == 0)
1201 	    {
1202 	      mismatch = false;
1203 	      break;
1204 	    }
1205 	  exp1 += BLOCK_LEN (b1->text_len);
1206 	  exp2 += BLOCK_LEN (b2->text_len);
1207 	}
1208     }
1209   else
1210     {
1211       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1212       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1213       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1214     }
1215 
1216   free (p1);
1217   return mismatch;
1218 }
1219