xref: /netbsd-src/external/gpl3/gdb.old/dist/gdb/macroexp.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* C preprocessor macro expansion for GDB.
2    Copyright (C) 2002-2016 Free Software Foundation, Inc.
3    Contributed by Red Hat, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "defs.h"
21 #include "gdb_obstack.h"
22 #include "bcache.h"
23 #include "macrotab.h"
24 #include "macroexp.h"
25 #include "c-lang.h"
26 
27 
28 
29 /* A resizeable, substringable string type.  */
30 
31 
32 /* A string type that we can resize, quickly append to, and use to
33    refer to substrings of other strings.  */
34 struct macro_buffer
35 {
36   /* An array of characters.  The first LEN bytes are the real text,
37      but there are SIZE bytes allocated to the array.  If SIZE is
38      zero, then this doesn't point to a malloc'ed block.  If SHARED is
39      non-zero, then this buffer is actually a pointer into some larger
40      string, and we shouldn't append characters to it, etc.  Because
41      of sharing, we can't assume in general that the text is
42      null-terminated.  */
43   char *text;
44 
45   /* The number of characters in the string.  */
46   int len;
47 
48   /* The number of characters allocated to the string.  If SHARED is
49      non-zero, this is meaningless; in this case, we set it to zero so
50      that any "do we have room to append something?" tests will fail,
51      so we don't always have to check SHARED before using this field.  */
52   int size;
53 
54   /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc
55      block).  Non-zero if TEXT is actually pointing into the middle of
56      some other block, and we shouldn't reallocate it.  */
57   int shared;
58 
59   /* For detecting token splicing.
60 
61      This is the index in TEXT of the first character of the token
62      that abuts the end of TEXT.  If TEXT contains no tokens, then we
63      set this equal to LEN.  If TEXT ends in whitespace, then there is
64      no token abutting the end of TEXT (it's just whitespace), and
65      again, we set this equal to LEN.  We set this to -1 if we don't
66      know the nature of TEXT.  */
67   int last_token;
68 
69   /* If this buffer is holding the result from get_token, then this
70      is non-zero if it is an identifier token, zero otherwise.  */
71   int is_identifier;
72 };
73 
74 
75 /* Set the macro buffer *B to the empty string, guessing that its
76    final contents will fit in N bytes.  (It'll get resized if it
77    doesn't, so the guess doesn't have to be right.)  Allocate the
78    initial storage with xmalloc.  */
79 static void
80 init_buffer (struct macro_buffer *b, int n)
81 {
82   b->size = n;
83   if (n > 0)
84     b->text = (char *) xmalloc (n);
85   else
86     b->text = NULL;
87   b->len = 0;
88   b->shared = 0;
89   b->last_token = -1;
90 }
91 
92 
93 /* Set the macro buffer *BUF to refer to the LEN bytes at ADDR, as a
94    shared substring.  */
95 static void
96 init_shared_buffer (struct macro_buffer *buf, char *addr, int len)
97 {
98   buf->text = addr;
99   buf->len = len;
100   buf->shared = 1;
101   buf->size = 0;
102   buf->last_token = -1;
103 }
104 
105 
106 /* Free the text of the buffer B.  Raise an error if B is shared.  */
107 static void
108 free_buffer (struct macro_buffer *b)
109 {
110   gdb_assert (! b->shared);
111   if (b->size)
112     xfree (b->text);
113 }
114 
115 /* Like free_buffer, but return the text as an xstrdup()d string.
116    This only exists to try to make the API relatively clean.  */
117 
118 static char *
119 free_buffer_return_text (struct macro_buffer *b)
120 {
121   gdb_assert (! b->shared);
122   gdb_assert (b->size);
123   /* Nothing to do.  */
124   return b->text;
125 }
126 
127 /* A cleanup function for macro buffers.  */
128 static void
129 cleanup_macro_buffer (void *untyped_buf)
130 {
131   free_buffer ((struct macro_buffer *) untyped_buf);
132 }
133 
134 
135 /* Resize the buffer B to be at least N bytes long.  Raise an error if
136    B shouldn't be resized.  */
137 static void
138 resize_buffer (struct macro_buffer *b, int n)
139 {
140   /* We shouldn't be trying to resize shared strings.  */
141   gdb_assert (! b->shared);
142 
143   if (b->size == 0)
144     b->size = n;
145   else
146     while (b->size <= n)
147       b->size *= 2;
148 
149   b->text = (char *) xrealloc (b->text, b->size);
150 }
151 
152 
153 /* Append the character C to the buffer B.  */
154 static void
155 appendc (struct macro_buffer *b, int c)
156 {
157   int new_len = b->len + 1;
158 
159   if (new_len > b->size)
160     resize_buffer (b, new_len);
161 
162   b->text[b->len] = c;
163   b->len = new_len;
164 }
165 
166 
167 /* Append the LEN bytes at ADDR to the buffer B.  */
168 static void
169 appendmem (struct macro_buffer *b, char *addr, int len)
170 {
171   int new_len = b->len + len;
172 
173   if (new_len > b->size)
174     resize_buffer (b, new_len);
175 
176   memcpy (b->text + b->len, addr, len);
177   b->len = new_len;
178 }
179 
180 
181 
182 /* Recognizing preprocessor tokens.  */
183 
184 
185 int
186 macro_is_whitespace (int c)
187 {
188   return (c == ' '
189           || c == '\t'
190           || c == '\n'
191           || c == '\v'
192           || c == '\f');
193 }
194 
195 
196 int
197 macro_is_digit (int c)
198 {
199   return ('0' <= c && c <= '9');
200 }
201 
202 
203 int
204 macro_is_identifier_nondigit (int c)
205 {
206   return (c == '_'
207           || ('a' <= c && c <= 'z')
208           || ('A' <= c && c <= 'Z'));
209 }
210 
211 
212 static void
213 set_token (struct macro_buffer *tok, char *start, char *end)
214 {
215   init_shared_buffer (tok, start, end - start);
216   tok->last_token = 0;
217 
218   /* Presumed; get_identifier may overwrite this.  */
219   tok->is_identifier = 0;
220 }
221 
222 
223 static int
224 get_comment (struct macro_buffer *tok, char *p, char *end)
225 {
226   if (p + 2 > end)
227     return 0;
228   else if (p[0] == '/'
229            && p[1] == '*')
230     {
231       char *tok_start = p;
232 
233       p += 2;
234 
235       for (; p < end; p++)
236         if (p + 2 <= end
237             && p[0] == '*'
238             && p[1] == '/')
239           {
240             p += 2;
241             set_token (tok, tok_start, p);
242             return 1;
243           }
244 
245       error (_("Unterminated comment in macro expansion."));
246     }
247   else if (p[0] == '/'
248            && p[1] == '/')
249     {
250       char *tok_start = p;
251 
252       p += 2;
253       for (; p < end; p++)
254         if (*p == '\n')
255           break;
256 
257       set_token (tok, tok_start, p);
258       return 1;
259     }
260   else
261     return 0;
262 }
263 
264 
265 static int
266 get_identifier (struct macro_buffer *tok, char *p, char *end)
267 {
268   if (p < end
269       && macro_is_identifier_nondigit (*p))
270     {
271       char *tok_start = p;
272 
273       while (p < end
274              && (macro_is_identifier_nondigit (*p)
275                  || macro_is_digit (*p)))
276         p++;
277 
278       set_token (tok, tok_start, p);
279       tok->is_identifier = 1;
280       return 1;
281     }
282   else
283     return 0;
284 }
285 
286 
287 static int
288 get_pp_number (struct macro_buffer *tok, char *p, char *end)
289 {
290   if (p < end
291       && (macro_is_digit (*p)
292           || (*p == '.'
293 	      && p + 2 <= end
294 	      && macro_is_digit (p[1]))))
295     {
296       char *tok_start = p;
297 
298       while (p < end)
299         {
300 	  if (p + 2 <= end
301 	      && strchr ("eEpP", *p)
302 	      && (p[1] == '+' || p[1] == '-'))
303             p += 2;
304           else if (macro_is_digit (*p)
305 		   || macro_is_identifier_nondigit (*p)
306 		   || *p == '.')
307             p++;
308           else
309             break;
310         }
311 
312       set_token (tok, tok_start, p);
313       return 1;
314     }
315   else
316     return 0;
317 }
318 
319 
320 
321 /* If the text starting at P going up to (but not including) END
322    starts with a character constant, set *TOK to point to that
323    character constant, and return 1.  Otherwise, return zero.
324    Signal an error if it contains a malformed or incomplete character
325    constant.  */
326 static int
327 get_character_constant (struct macro_buffer *tok, char *p, char *end)
328 {
329   /* ISO/IEC 9899:1999 (E)  Section 6.4.4.4  paragraph 1
330      But of course, what really matters is that we handle it the same
331      way GDB's C/C++ lexer does.  So we call parse_escape in utils.c
332      to handle escape sequences.  */
333   if ((p + 1 <= end && *p == '\'')
334       || (p + 2 <= end
335 	  && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
336 	  && p[1] == '\''))
337     {
338       char *tok_start = p;
339       int char_count = 0;
340 
341       if (*p == '\'')
342         p++;
343       else if (*p == 'L' || *p == 'u' || *p == 'U')
344         p += 2;
345       else
346         gdb_assert_not_reached ("unexpected character constant");
347 
348       for (;;)
349         {
350           if (p >= end)
351             error (_("Unmatched single quote."));
352           else if (*p == '\'')
353             {
354               if (!char_count)
355                 error (_("A character constant must contain at least one "
356                        "character."));
357               p++;
358               break;
359             }
360           else if (*p == '\\')
361             {
362 	      const char *s, *o;
363 
364 	      s = o = ++p;
365 	      char_count += c_parse_escape (&s, NULL);
366 	      p += s - o;
367             }
368           else
369 	    {
370 	      p++;
371 	      char_count++;
372 	    }
373         }
374 
375       set_token (tok, tok_start, p);
376       return 1;
377     }
378   else
379     return 0;
380 }
381 
382 
383 /* If the text starting at P going up to (but not including) END
384    starts with a string literal, set *TOK to point to that string
385    literal, and return 1.  Otherwise, return zero.  Signal an error if
386    it contains a malformed or incomplete string literal.  */
387 static int
388 get_string_literal (struct macro_buffer *tok, char *p, char *end)
389 {
390   if ((p + 1 <= end
391        && *p == '"')
392       || (p + 2 <= end
393           && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
394           && p[1] == '"'))
395     {
396       char *tok_start = p;
397 
398       if (*p == '"')
399         p++;
400       else if (*p == 'L' || *p == 'u' || *p == 'U')
401         p += 2;
402       else
403         gdb_assert_not_reached ("unexpected string literal");
404 
405       for (;;)
406         {
407           if (p >= end)
408             error (_("Unterminated string in expression."));
409           else if (*p == '"')
410             {
411               p++;
412               break;
413             }
414           else if (*p == '\n')
415             error (_("Newline characters may not appear in string "
416                    "constants."));
417           else if (*p == '\\')
418             {
419 	      const char *s, *o;
420 
421 	      s = o = ++p;
422 	      c_parse_escape (&s, NULL);
423 	      p += s - o;
424             }
425           else
426             p++;
427         }
428 
429       set_token (tok, tok_start, p);
430       return 1;
431     }
432   else
433     return 0;
434 }
435 
436 
437 static int
438 get_punctuator (struct macro_buffer *tok, char *p, char *end)
439 {
440   /* Here, speed is much less important than correctness and clarity.  */
441 
442   /* ISO/IEC 9899:1999 (E)  Section 6.4.6  Paragraph 1.
443      Note that this table is ordered in a special way.  A punctuator
444      which is a prefix of another punctuator must appear after its
445      "extension".  Otherwise, the wrong token will be returned.  */
446   static const char * const punctuators[] = {
447     "[", "]", "(", ")", "{", "}", "?", ";", ",", "~",
448     "...", ".",
449     "->", "--", "-=", "-",
450     "++", "+=", "+",
451     "*=", "*",
452     "!=", "!",
453     "&&", "&=", "&",
454     "/=", "/",
455     "%>", "%:%:", "%:", "%=", "%",
456     "^=", "^",
457     "##", "#",
458     ":>", ":",
459     "||", "|=", "|",
460     "<<=", "<<", "<=", "<:", "<%", "<",
461     ">>=", ">>", ">=", ">",
462     "==", "=",
463     0
464   };
465 
466   int i;
467 
468   if (p + 1 <= end)
469     {
470       for (i = 0; punctuators[i]; i++)
471         {
472           const char *punctuator = punctuators[i];
473 
474           if (p[0] == punctuator[0])
475             {
476               int len = strlen (punctuator);
477 
478               if (p + len <= end
479                   && ! memcmp (p, punctuator, len))
480                 {
481                   set_token (tok, p, p + len);
482                   return 1;
483                 }
484             }
485         }
486     }
487 
488   return 0;
489 }
490 
491 
492 /* Peel the next preprocessor token off of SRC, and put it in TOK.
493    Mutate TOK to refer to the first token in SRC, and mutate SRC to
494    refer to the text after that token.  SRC must be a shared buffer;
495    the resulting TOK will be shared, pointing into the same string SRC
496    does.  Initialize TOK's last_token field.  Return non-zero if we
497    succeed, or 0 if we didn't find any more tokens in SRC.  */
498 static int
499 get_token (struct macro_buffer *tok,
500            struct macro_buffer *src)
501 {
502   char *p = src->text;
503   char *end = p + src->len;
504 
505   gdb_assert (src->shared);
506 
507   /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4:
508 
509      preprocessing-token:
510          header-name
511          identifier
512          pp-number
513          character-constant
514          string-literal
515          punctuator
516          each non-white-space character that cannot be one of the above
517 
518      We don't have to deal with header-name tokens, since those can
519      only occur after a #include, which we will never see.  */
520 
521   while (p < end)
522     if (macro_is_whitespace (*p))
523       p++;
524     else if (get_comment (tok, p, end))
525       p += tok->len;
526     else if (get_pp_number (tok, p, end)
527              || get_character_constant (tok, p, end)
528              || get_string_literal (tok, p, end)
529              /* Note: the grammar in the standard seems to be
530                 ambiguous: L'x' can be either a wide character
531                 constant, or an identifier followed by a normal
532                 character constant.  By trying `get_identifier' after
533                 we try get_character_constant and get_string_literal,
534                 we give the wide character syntax precedence.  Now,
535                 since GDB doesn't handle wide character constants
536                 anyway, is this the right thing to do?  */
537              || get_identifier (tok, p, end)
538              || get_punctuator (tok, p, end))
539       {
540         /* How many characters did we consume, including whitespace?  */
541         int consumed = p - src->text + tok->len;
542 
543         src->text += consumed;
544         src->len -= consumed;
545         return 1;
546       }
547     else
548       {
549         /* We have found a "non-whitespace character that cannot be
550            one of the above."  Make a token out of it.  */
551         int consumed;
552 
553         set_token (tok, p, p + 1);
554         consumed = p - src->text + tok->len;
555         src->text += consumed;
556         src->len -= consumed;
557         return 1;
558       }
559 
560   return 0;
561 }
562 
563 
564 
565 /* Appending token strings, with and without splicing  */
566 
567 
568 /* Append the macro buffer SRC to the end of DEST, and ensure that
569    doing so doesn't splice the token at the end of SRC with the token
570    at the beginning of DEST.  SRC and DEST must have their last_token
571    fields set.  Upon return, DEST's last_token field is set correctly.
572 
573    For example:
574 
575    If DEST is "(" and SRC is "y", then we can return with
576    DEST set to "(y" --- we've simply appended the two buffers.
577 
578    However, if DEST is "x" and SRC is "y", then we must not return
579    with DEST set to "xy" --- that would splice the two tokens "x" and
580    "y" together to make a single token "xy".  However, it would be
581    fine to return with DEST set to "x y".  Similarly, "<" and "<" must
582    yield "< <", not "<<", etc.  */
583 static void
584 append_tokens_without_splicing (struct macro_buffer *dest,
585                                 struct macro_buffer *src)
586 {
587   int original_dest_len = dest->len;
588   struct macro_buffer dest_tail, new_token;
589 
590   gdb_assert (src->last_token != -1);
591   gdb_assert (dest->last_token != -1);
592 
593   /* First, just try appending the two, and call get_token to see if
594      we got a splice.  */
595   appendmem (dest, src->text, src->len);
596 
597   /* If DEST originally had no token abutting its end, then we can't
598      have spliced anything, so we're done.  */
599   if (dest->last_token == original_dest_len)
600     {
601       dest->last_token = original_dest_len + src->last_token;
602       return;
603     }
604 
605   /* Set DEST_TAIL to point to the last token in DEST, followed by
606      all the stuff we just appended.  */
607   init_shared_buffer (&dest_tail,
608                       dest->text + dest->last_token,
609                       dest->len - dest->last_token);
610 
611   /* Re-parse DEST's last token.  We know that DEST used to contain
612      at least one token, so if it doesn't contain any after the
613      append, then we must have spliced "/" and "*" or "/" and "/" to
614      make a comment start.  (Just for the record, I got this right
615      the first time.  This is not a bug fix.)  */
616   if (get_token (&new_token, &dest_tail)
617       && (new_token.text + new_token.len
618           == dest->text + original_dest_len))
619     {
620       /* No splice, so we're done.  */
621       dest->last_token = original_dest_len + src->last_token;
622       return;
623     }
624 
625   /* Okay, a simple append caused a splice.  Let's chop dest back to
626      its original length and try again, but separate the texts with a
627      space.  */
628   dest->len = original_dest_len;
629   appendc (dest, ' ');
630   appendmem (dest, src->text, src->len);
631 
632   init_shared_buffer (&dest_tail,
633                       dest->text + dest->last_token,
634                       dest->len - dest->last_token);
635 
636   /* Try to re-parse DEST's last token, as above.  */
637   if (get_token (&new_token, &dest_tail)
638       && (new_token.text + new_token.len
639           == dest->text + original_dest_len))
640     {
641       /* No splice, so we're done.  */
642       dest->last_token = original_dest_len + 1 + src->last_token;
643       return;
644     }
645 
646   /* As far as I know, there's no case where inserting a space isn't
647      enough to prevent a splice.  */
648   internal_error (__FILE__, __LINE__,
649                   _("unable to avoid splicing tokens during macro expansion"));
650 }
651 
652 /* Stringify an argument, and insert it into DEST.  ARG is the text to
653    stringify; it is LEN bytes long.  */
654 
655 static void
656 stringify (struct macro_buffer *dest, const char *arg, int len)
657 {
658   /* Trim initial whitespace from ARG.  */
659   while (len > 0 && macro_is_whitespace (*arg))
660     {
661       ++arg;
662       --len;
663     }
664 
665   /* Trim trailing whitespace from ARG.  */
666   while (len > 0 && macro_is_whitespace (arg[len - 1]))
667     --len;
668 
669   /* Insert the string.  */
670   appendc (dest, '"');
671   while (len > 0)
672     {
673       /* We could try to handle strange cases here, like control
674 	 characters, but there doesn't seem to be much point.  */
675       if (macro_is_whitespace (*arg))
676 	{
677 	  /* Replace a sequence of whitespace with a single space.  */
678 	  appendc (dest, ' ');
679 	  while (len > 1 && macro_is_whitespace (arg[1]))
680 	    {
681 	      ++arg;
682 	      --len;
683 	    }
684 	}
685       else if (*arg == '\\' || *arg == '"')
686 	{
687 	  appendc (dest, '\\');
688 	  appendc (dest, *arg);
689 	}
690       else
691 	appendc (dest, *arg);
692       ++arg;
693       --len;
694     }
695   appendc (dest, '"');
696   dest->last_token = dest->len;
697 }
698 
699 /* See macroexp.h.  */
700 
701 char *
702 macro_stringify (const char *str)
703 {
704   struct macro_buffer buffer;
705   int len = strlen (str);
706 
707   init_buffer (&buffer, len);
708   stringify (&buffer, str, len);
709   appendc (&buffer, '\0');
710 
711   return free_buffer_return_text (&buffer);
712 }
713 
714 
715 /* Expanding macros!  */
716 
717 
718 /* A singly-linked list of the names of the macros we are currently
719    expanding --- for detecting expansion loops.  */
720 struct macro_name_list {
721   const char *name;
722   struct macro_name_list *next;
723 };
724 
725 
726 /* Return non-zero if we are currently expanding the macro named NAME,
727    according to LIST; otherwise, return zero.
728 
729    You know, it would be possible to get rid of all the NO_LOOP
730    arguments to these functions by simply generating a new lookup
731    function and baton which refuses to find the definition for a
732    particular macro, and otherwise delegates the decision to another
733    function/baton pair.  But that makes the linked list of excluded
734    macros chained through untyped baton pointers, which will make it
735    harder to debug.  :(  */
736 static int
737 currently_rescanning (struct macro_name_list *list, const char *name)
738 {
739   for (; list; list = list->next)
740     if (strcmp (name, list->name) == 0)
741       return 1;
742 
743   return 0;
744 }
745 
746 
747 /* Gather the arguments to a macro expansion.
748 
749    NAME is the name of the macro being invoked.  (It's only used for
750    printing error messages.)
751 
752    Assume that SRC is the text of the macro invocation immediately
753    following the macro name.  For example, if we're processing the
754    text foo(bar, baz), then NAME would be foo and SRC will be (bar,
755    baz).
756 
757    If SRC doesn't start with an open paren ( token at all, return
758    zero, leave SRC unchanged, and don't set *ARGC_P to anything.
759 
760    If SRC doesn't contain a properly terminated argument list, then
761    raise an error.
762 
763    For a variadic macro, NARGS holds the number of formal arguments to
764    the macro.  For a GNU-style variadic macro, this should be the
765    number of named arguments.  For a non-variadic macro, NARGS should
766    be -1.
767 
768    Otherwise, return a pointer to the first element of an array of
769    macro buffers referring to the argument texts, and set *ARGC_P to
770    the number of arguments we found --- the number of elements in the
771    array.  The macro buffers share their text with SRC, and their
772    last_token fields are initialized.  The array is allocated with
773    xmalloc, and the caller is responsible for freeing it.
774 
775    NOTE WELL: if SRC starts with a open paren ( token followed
776    immediately by a close paren ) token (e.g., the invocation looks
777    like "foo()"), we treat that as one argument, which happens to be
778    the empty list of tokens.  The caller should keep in mind that such
779    a sequence of tokens is a valid way to invoke one-parameter
780    function-like macros, but also a valid way to invoke zero-parameter
781    function-like macros.  Eeew.
782 
783    Consume the tokens from SRC; after this call, SRC contains the text
784    following the invocation.  */
785 
786 static struct macro_buffer *
787 gather_arguments (const char *name, struct macro_buffer *src,
788 		  int nargs, int *argc_p)
789 {
790   struct macro_buffer tok;
791   int args_len, args_size;
792   struct macro_buffer *args = NULL;
793   struct cleanup *back_to = make_cleanup (free_current_contents, &args);
794 
795   /* Does SRC start with an opening paren token?  Read from a copy of
796      SRC, so SRC itself is unaffected if we don't find an opening
797      paren.  */
798   {
799     struct macro_buffer temp;
800 
801     init_shared_buffer (&temp, src->text, src->len);
802 
803     if (! get_token (&tok, &temp)
804         || tok.len != 1
805         || tok.text[0] != '(')
806       {
807         discard_cleanups (back_to);
808         return 0;
809       }
810   }
811 
812   /* Consume SRC's opening paren.  */
813   get_token (&tok, src);
814 
815   args_len = 0;
816   args_size = 6;
817   args = XNEWVEC (struct macro_buffer, args_size);
818 
819   for (;;)
820     {
821       struct macro_buffer *arg;
822       int depth;
823 
824       /* Make sure we have room for the next argument.  */
825       if (args_len >= args_size)
826         {
827           args_size *= 2;
828           args = XRESIZEVEC (struct macro_buffer, args, args_size);
829         }
830 
831       /* Initialize the next argument.  */
832       arg = &args[args_len++];
833       set_token (arg, src->text, src->text);
834 
835       /* Gather the argument's tokens.  */
836       depth = 0;
837       for (;;)
838         {
839           if (! get_token (&tok, src))
840             error (_("Malformed argument list for macro `%s'."), name);
841 
842           /* Is tok an opening paren?  */
843           if (tok.len == 1 && tok.text[0] == '(')
844             depth++;
845 
846           /* Is tok is a closing paren?  */
847           else if (tok.len == 1 && tok.text[0] == ')')
848             {
849               /* If it's a closing paren at the top level, then that's
850                  the end of the argument list.  */
851               if (depth == 0)
852                 {
853 		  /* In the varargs case, the last argument may be
854 		     missing.  Add an empty argument in this case.  */
855 		  if (nargs != -1 && args_len == nargs - 1)
856 		    {
857 		      /* Make sure we have room for the argument.  */
858 		      if (args_len >= args_size)
859 			{
860 			  args_size++;
861 			  args = XRESIZEVEC (struct macro_buffer, args,
862 					     args_size);
863 			}
864 		      arg = &args[args_len++];
865 		      set_token (arg, src->text, src->text);
866 		    }
867 
868                   discard_cleanups (back_to);
869                   *argc_p = args_len;
870                   return args;
871                 }
872 
873               depth--;
874             }
875 
876           /* If tok is a comma at top level, then that's the end of
877              the current argument.  However, if we are handling a
878              variadic macro and we are computing the last argument, we
879              want to include the comma and remaining tokens.  */
880           else if (tok.len == 1 && tok.text[0] == ',' && depth == 0
881 		   && (nargs == -1 || args_len < nargs))
882             break;
883 
884           /* Extend the current argument to enclose this token.  If
885              this is the current argument's first token, leave out any
886              leading whitespace, just for aesthetics.  */
887           if (arg->len == 0)
888             {
889               arg->text = tok.text;
890               arg->len = tok.len;
891               arg->last_token = 0;
892             }
893           else
894             {
895               arg->len = (tok.text + tok.len) - arg->text;
896               arg->last_token = tok.text - arg->text;
897             }
898         }
899     }
900 }
901 
902 
903 /* The `expand' and `substitute_args' functions both invoke `scan'
904    recursively, so we need a forward declaration somewhere.  */
905 static void scan (struct macro_buffer *dest,
906                   struct macro_buffer *src,
907                   struct macro_name_list *no_loop,
908                   macro_lookup_ftype *lookup_func,
909                   void *lookup_baton);
910 
911 
912 /* A helper function for substitute_args.
913 
914    ARGV is a vector of all the arguments; ARGC is the number of
915    arguments.  IS_VARARGS is true if the macro being substituted is a
916    varargs macro; in this case VA_ARG_NAME is the name of the
917    "variable" argument.  VA_ARG_NAME is ignored if IS_VARARGS is
918    false.
919 
920    If the token TOK is the name of a parameter, return the parameter's
921    index.  If TOK is not an argument, return -1.  */
922 
923 static int
924 find_parameter (const struct macro_buffer *tok,
925 		int is_varargs, const struct macro_buffer *va_arg_name,
926 		int argc, const char * const *argv)
927 {
928   int i;
929 
930   if (! tok->is_identifier)
931     return -1;
932 
933   for (i = 0; i < argc; ++i)
934     if (tok->len == strlen (argv[i])
935 	&& !memcmp (tok->text, argv[i], tok->len))
936       return i;
937 
938   if (is_varargs && tok->len == va_arg_name->len
939       && ! memcmp (tok->text, va_arg_name->text, tok->len))
940     return argc - 1;
941 
942   return -1;
943 }
944 
945 /* Given the macro definition DEF, being invoked with the actual
946    arguments given by ARGC and ARGV, substitute the arguments into the
947    replacement list, and store the result in DEST.
948 
949    IS_VARARGS should be true if DEF is a varargs macro.  In this case,
950    VA_ARG_NAME should be the name of the "variable" argument -- either
951    __VA_ARGS__ for c99-style varargs, or the final argument name, for
952    GNU-style varargs.  If IS_VARARGS is false, this parameter is
953    ignored.
954 
955    If it is necessary to expand macro invocations in one of the
956    arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro
957    definitions, and don't expand invocations of the macros listed in
958    NO_LOOP.  */
959 
960 static void
961 substitute_args (struct macro_buffer *dest,
962                  struct macro_definition *def,
963 		 int is_varargs, const struct macro_buffer *va_arg_name,
964                  int argc, struct macro_buffer *argv,
965                  struct macro_name_list *no_loop,
966                  macro_lookup_ftype *lookup_func,
967                  void *lookup_baton)
968 {
969   /* A macro buffer for the macro's replacement list.  */
970   struct macro_buffer replacement_list;
971   /* The token we are currently considering.  */
972   struct macro_buffer tok;
973   /* The replacement list's pointer from just before TOK was lexed.  */
974   char *original_rl_start;
975   /* We have a single lookahead token to handle token splicing.  */
976   struct macro_buffer lookahead;
977   /* The lookahead token might not be valid.  */
978   int lookahead_valid;
979   /* The replacement list's pointer from just before LOOKAHEAD was
980      lexed.  */
981   char *lookahead_rl_start;
982 
983   init_shared_buffer (&replacement_list, (char *) def->replacement,
984                       strlen (def->replacement));
985 
986   gdb_assert (dest->len == 0);
987   dest->last_token = 0;
988 
989   original_rl_start = replacement_list.text;
990   if (! get_token (&tok, &replacement_list))
991     return;
992   lookahead_rl_start = replacement_list.text;
993   lookahead_valid = get_token (&lookahead, &replacement_list);
994 
995   for (;;)
996     {
997       /* Just for aesthetics.  If we skipped some whitespace, copy
998          that to DEST.  */
999       if (tok.text > original_rl_start)
1000         {
1001           appendmem (dest, original_rl_start, tok.text - original_rl_start);
1002           dest->last_token = dest->len;
1003         }
1004 
1005       /* Is this token the stringification operator?  */
1006       if (tok.len == 1
1007           && tok.text[0] == '#')
1008 	{
1009 	  int arg;
1010 
1011 	  if (!lookahead_valid)
1012 	    error (_("Stringification operator requires an argument."));
1013 
1014 	  arg = find_parameter (&lookahead, is_varargs, va_arg_name,
1015 				def->argc, def->argv);
1016 	  if (arg == -1)
1017 	    error (_("Argument to stringification operator must name "
1018 		     "a macro parameter."));
1019 
1020 	  stringify (dest, argv[arg].text, argv[arg].len);
1021 
1022 	  /* Read one token and let the loop iteration code handle the
1023 	     rest.  */
1024 	  lookahead_rl_start = replacement_list.text;
1025 	  lookahead_valid = get_token (&lookahead, &replacement_list);
1026 	}
1027       /* Is this token the splicing operator?  */
1028       else if (tok.len == 2
1029 	       && tok.text[0] == '#'
1030 	       && tok.text[1] == '#')
1031 	error (_("Stray splicing operator"));
1032       /* Is the next token the splicing operator?  */
1033       else if (lookahead_valid
1034 	       && lookahead.len == 2
1035 	       && lookahead.text[0] == '#'
1036 	       && lookahead.text[1] == '#')
1037 	{
1038 	  int finished = 0;
1039 	  int prev_was_comma = 0;
1040 
1041 	  /* Note that GCC warns if the result of splicing is not a
1042 	     token.  In the debugger there doesn't seem to be much
1043 	     benefit from doing this.  */
1044 
1045 	  /* Insert the first token.  */
1046 	  if (tok.len == 1 && tok.text[0] == ',')
1047 	    prev_was_comma = 1;
1048 	  else
1049 	    {
1050 	      int arg = find_parameter (&tok, is_varargs, va_arg_name,
1051 					def->argc, def->argv);
1052 
1053 	      if (arg != -1)
1054 		appendmem (dest, argv[arg].text, argv[arg].len);
1055 	      else
1056 		appendmem (dest, tok.text, tok.len);
1057 	    }
1058 
1059 	  /* Apply a possible sequence of ## operators.  */
1060 	  for (;;)
1061 	    {
1062 	      if (! get_token (&tok, &replacement_list))
1063 		error (_("Splicing operator at end of macro"));
1064 
1065 	      /* Handle a comma before a ##.  If we are handling
1066 		 varargs, and the token on the right hand side is the
1067 		 varargs marker, and the final argument is empty or
1068 		 missing, then drop the comma.  This is a GNU
1069 		 extension.  There is one ambiguous case here,
1070 		 involving pedantic behavior with an empty argument,
1071 		 but we settle that in favor of GNU-style (GCC uses an
1072 		 option).  If we aren't dealing with varargs, we
1073 		 simply insert the comma.  */
1074 	      if (prev_was_comma)
1075 		{
1076 		  if (! (is_varargs
1077 			 && tok.len == va_arg_name->len
1078 			 && !memcmp (tok.text, va_arg_name->text, tok.len)
1079 			 && argv[argc - 1].len == 0))
1080 		    appendmem (dest, ",", 1);
1081 		  prev_was_comma = 0;
1082 		}
1083 
1084 	      /* Insert the token.  If it is a parameter, insert the
1085 		 argument.  If it is a comma, treat it specially.  */
1086 	      if (tok.len == 1 && tok.text[0] == ',')
1087 		prev_was_comma = 1;
1088 	      else
1089 		{
1090 		  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1091 					    def->argc, def->argv);
1092 
1093 		  if (arg != -1)
1094 		    appendmem (dest, argv[arg].text, argv[arg].len);
1095 		  else
1096 		    appendmem (dest, tok.text, tok.len);
1097 		}
1098 
1099 	      /* Now read another token.  If it is another splice, we
1100 		 loop.  */
1101 	      original_rl_start = replacement_list.text;
1102 	      if (! get_token (&tok, &replacement_list))
1103 		{
1104 		  finished = 1;
1105 		  break;
1106 		}
1107 
1108 	      if (! (tok.len == 2
1109 		     && tok.text[0] == '#'
1110 		     && tok.text[1] == '#'))
1111 		break;
1112 	    }
1113 
1114 	  if (prev_was_comma)
1115 	    {
1116 	      /* We saw a comma.  Insert it now.  */
1117 	      appendmem (dest, ",", 1);
1118 	    }
1119 
1120           dest->last_token = dest->len;
1121 	  if (finished)
1122 	    lookahead_valid = 0;
1123 	  else
1124 	    {
1125 	      /* Set up for the loop iterator.  */
1126 	      lookahead = tok;
1127 	      lookahead_rl_start = original_rl_start;
1128 	      lookahead_valid = 1;
1129 	    }
1130 	}
1131       else
1132 	{
1133 	  /* Is this token an identifier?  */
1134 	  int substituted = 0;
1135 	  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1136 				    def->argc, def->argv);
1137 
1138 	  if (arg != -1)
1139 	    {
1140 	      struct macro_buffer arg_src;
1141 
1142 	      /* Expand any macro invocations in the argument text,
1143 		 and append the result to dest.  Remember that scan
1144 		 mutates its source, so we need to scan a new buffer
1145 		 referring to the argument's text, not the argument
1146 		 itself.  */
1147 	      init_shared_buffer (&arg_src, argv[arg].text, argv[arg].len);
1148 	      scan (dest, &arg_src, no_loop, lookup_func, lookup_baton);
1149 	      substituted = 1;
1150 	    }
1151 
1152 	  /* If it wasn't a parameter, then just copy it across.  */
1153 	  if (! substituted)
1154 	    append_tokens_without_splicing (dest, &tok);
1155 	}
1156 
1157       if (! lookahead_valid)
1158 	break;
1159 
1160       tok = lookahead;
1161       original_rl_start = lookahead_rl_start;
1162 
1163       lookahead_rl_start = replacement_list.text;
1164       lookahead_valid = get_token (&lookahead, &replacement_list);
1165     }
1166 }
1167 
1168 
1169 /* Expand a call to a macro named ID, whose definition is DEF.  Append
1170    its expansion to DEST.  SRC is the input text following the ID
1171    token.  We are currently rescanning the expansions of the macros
1172    named in NO_LOOP; don't re-expand them.  Use LOOKUP_FUNC and
1173    LOOKUP_BATON to find definitions for any nested macro references.
1174 
1175    Return 1 if we decided to expand it, zero otherwise.  (If it's a
1176    function-like macro name that isn't followed by an argument list,
1177    we don't expand it.)  If we return zero, leave SRC unchanged.  */
1178 static int
1179 expand (const char *id,
1180         struct macro_definition *def,
1181         struct macro_buffer *dest,
1182         struct macro_buffer *src,
1183         struct macro_name_list *no_loop,
1184         macro_lookup_ftype *lookup_func,
1185         void *lookup_baton)
1186 {
1187   struct macro_name_list new_no_loop;
1188 
1189   /* Create a new node to be added to the front of the no-expand list.
1190      This list is appropriate for re-scanning replacement lists, but
1191      it is *not* appropriate for scanning macro arguments; invocations
1192      of the macro whose arguments we are gathering *do* get expanded
1193      there.  */
1194   new_no_loop.name = id;
1195   new_no_loop.next = no_loop;
1196 
1197   /* What kind of macro are we expanding?  */
1198   if (def->kind == macro_object_like)
1199     {
1200       struct macro_buffer replacement_list;
1201 
1202       init_shared_buffer (&replacement_list, (char *) def->replacement,
1203                           strlen (def->replacement));
1204 
1205       scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton);
1206       return 1;
1207     }
1208   else if (def->kind == macro_function_like)
1209     {
1210       struct cleanup *back_to = make_cleanup (null_cleanup, 0);
1211       int argc = 0;
1212       struct macro_buffer *argv = NULL;
1213       struct macro_buffer substituted;
1214       struct macro_buffer substituted_src;
1215       struct macro_buffer va_arg_name = {0};
1216       int is_varargs = 0;
1217 
1218       if (def->argc >= 1)
1219 	{
1220 	  if (strcmp (def->argv[def->argc - 1], "...") == 0)
1221 	    {
1222 	      /* In C99-style varargs, substitution is done using
1223 		 __VA_ARGS__.  */
1224 	      init_shared_buffer (&va_arg_name, "__VA_ARGS__",
1225 				  strlen ("__VA_ARGS__"));
1226 	      is_varargs = 1;
1227 	    }
1228 	  else
1229 	    {
1230 	      int len = strlen (def->argv[def->argc - 1]);
1231 
1232 	      if (len > 3
1233 		  && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0)
1234 		{
1235 		  /* In GNU-style varargs, the name of the
1236 		     substitution parameter is the name of the formal
1237 		     argument without the "...".  */
1238 		  init_shared_buffer (&va_arg_name,
1239 				      (char *) def->argv[def->argc - 1],
1240 				      len - 3);
1241 		  is_varargs = 1;
1242 		}
1243 	    }
1244 	}
1245 
1246       make_cleanup (free_current_contents, &argv);
1247       argv = gather_arguments (id, src, is_varargs ? def->argc : -1,
1248 			       &argc);
1249 
1250       /* If we couldn't find any argument list, then we don't expand
1251          this macro.  */
1252       if (! argv)
1253         {
1254           do_cleanups (back_to);
1255           return 0;
1256         }
1257 
1258       /* Check that we're passing an acceptable number of arguments for
1259          this macro.  */
1260       if (argc != def->argc)
1261         {
1262 	  if (is_varargs && argc >= def->argc - 1)
1263 	    {
1264 	      /* Ok.  */
1265 	    }
1266           /* Remember that a sequence of tokens like "foo()" is a
1267              valid invocation of a macro expecting either zero or one
1268              arguments.  */
1269           else if (! (argc == 1
1270 		      && argv[0].len == 0
1271 		      && def->argc == 0))
1272             error (_("Wrong number of arguments to macro `%s' "
1273                    "(expected %d, got %d)."),
1274                    id, def->argc, argc);
1275         }
1276 
1277       /* Note that we don't expand macro invocations in the arguments
1278          yet --- we let subst_args take care of that.  Parameters that
1279          appear as operands of the stringifying operator "#" or the
1280          splicing operator "##" don't get macro references expanded,
1281          so we can't really tell whether it's appropriate to macro-
1282          expand an argument until we see how it's being used.  */
1283       init_buffer (&substituted, 0);
1284       make_cleanup (cleanup_macro_buffer, &substituted);
1285       substitute_args (&substituted, def, is_varargs, &va_arg_name,
1286 		       argc, argv, no_loop, lookup_func, lookup_baton);
1287 
1288       /* Now `substituted' is the macro's replacement list, with all
1289          argument values substituted into it properly.  Re-scan it for
1290          macro references, but don't expand invocations of this macro.
1291 
1292          We create a new buffer, `substituted_src', which points into
1293          `substituted', and scan that.  We can't scan `substituted'
1294          itself, since the tokenization process moves the buffer's
1295          text pointer around, and we still need to be able to find
1296          `substituted's original text buffer after scanning it so we
1297          can free it.  */
1298       init_shared_buffer (&substituted_src, substituted.text, substituted.len);
1299       scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton);
1300 
1301       do_cleanups (back_to);
1302 
1303       return 1;
1304     }
1305   else
1306     internal_error (__FILE__, __LINE__, _("bad macro definition kind"));
1307 }
1308 
1309 
1310 /* If the single token in SRC_FIRST followed by the tokens in SRC_REST
1311    constitute a macro invokation not forbidden in NO_LOOP, append its
1312    expansion to DEST and return non-zero.  Otherwise, return zero, and
1313    leave DEST unchanged.
1314 
1315    SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one.
1316    SRC_FIRST must be a string built by get_token.  */
1317 static int
1318 maybe_expand (struct macro_buffer *dest,
1319               struct macro_buffer *src_first,
1320               struct macro_buffer *src_rest,
1321               struct macro_name_list *no_loop,
1322               macro_lookup_ftype *lookup_func,
1323               void *lookup_baton)
1324 {
1325   gdb_assert (src_first->shared);
1326   gdb_assert (src_rest->shared);
1327   gdb_assert (! dest->shared);
1328 
1329   /* Is this token an identifier?  */
1330   if (src_first->is_identifier)
1331     {
1332       /* Make a null-terminated copy of it, since that's what our
1333          lookup function expects.  */
1334       char *id = (char *) xmalloc (src_first->len + 1);
1335       struct cleanup *back_to = make_cleanup (xfree, id);
1336 
1337       memcpy (id, src_first->text, src_first->len);
1338       id[src_first->len] = 0;
1339 
1340       /* If we're currently re-scanning the result of expanding
1341          this macro, don't expand it again.  */
1342       if (! currently_rescanning (no_loop, id))
1343         {
1344           /* Does this identifier have a macro definition in scope?  */
1345           struct macro_definition *def = lookup_func (id, lookup_baton);
1346 
1347           if (def && expand (id, def, dest, src_rest, no_loop,
1348                              lookup_func, lookup_baton))
1349             {
1350               do_cleanups (back_to);
1351               return 1;
1352             }
1353         }
1354 
1355       do_cleanups (back_to);
1356     }
1357 
1358   return 0;
1359 }
1360 
1361 
1362 /* Expand macro references in SRC, appending the results to DEST.
1363    Assume we are re-scanning the result of expanding the macros named
1364    in NO_LOOP, and don't try to re-expand references to them.
1365 
1366    SRC must be a shared buffer; DEST must not be one.  */
1367 static void
1368 scan (struct macro_buffer *dest,
1369       struct macro_buffer *src,
1370       struct macro_name_list *no_loop,
1371       macro_lookup_ftype *lookup_func,
1372       void *lookup_baton)
1373 {
1374   gdb_assert (src->shared);
1375   gdb_assert (! dest->shared);
1376 
1377   for (;;)
1378     {
1379       struct macro_buffer tok;
1380       char *original_src_start = src->text;
1381 
1382       /* Find the next token in SRC.  */
1383       if (! get_token (&tok, src))
1384         break;
1385 
1386       /* Just for aesthetics.  If we skipped some whitespace, copy
1387          that to DEST.  */
1388       if (tok.text > original_src_start)
1389         {
1390           appendmem (dest, original_src_start, tok.text - original_src_start);
1391           dest->last_token = dest->len;
1392         }
1393 
1394       if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton))
1395         /* We didn't end up expanding tok as a macro reference, so
1396            simply append it to dest.  */
1397         append_tokens_without_splicing (dest, &tok);
1398     }
1399 
1400   /* Just for aesthetics.  If there was any trailing whitespace in
1401      src, copy it to dest.  */
1402   if (src->len)
1403     {
1404       appendmem (dest, src->text, src->len);
1405       dest->last_token = dest->len;
1406     }
1407 }
1408 
1409 
1410 char *
1411 macro_expand (const char *source,
1412               macro_lookup_ftype *lookup_func,
1413               void *lookup_func_baton)
1414 {
1415   struct macro_buffer src, dest;
1416   struct cleanup *back_to;
1417 
1418   init_shared_buffer (&src, (char *) source, strlen (source));
1419 
1420   init_buffer (&dest, 0);
1421   dest.last_token = 0;
1422   back_to = make_cleanup (cleanup_macro_buffer, &dest);
1423 
1424   scan (&dest, &src, 0, lookup_func, lookup_func_baton);
1425 
1426   appendc (&dest, '\0');
1427 
1428   discard_cleanups (back_to);
1429   return dest.text;
1430 }
1431 
1432 
1433 char *
1434 macro_expand_once (const char *source,
1435                    macro_lookup_ftype *lookup_func,
1436                    void *lookup_func_baton)
1437 {
1438   error (_("Expand-once not implemented yet."));
1439 }
1440 
1441 
1442 char *
1443 macro_expand_next (const char **lexptr,
1444                    macro_lookup_ftype *lookup_func,
1445                    void *lookup_baton)
1446 {
1447   struct macro_buffer src, dest, tok;
1448   struct cleanup *back_to;
1449 
1450   /* Set up SRC to refer to the input text, pointed to by *lexptr.  */
1451   init_shared_buffer (&src, (char *) *lexptr, strlen (*lexptr));
1452 
1453   /* Set up DEST to receive the expansion, if there is one.  */
1454   init_buffer (&dest, 0);
1455   dest.last_token = 0;
1456   back_to = make_cleanup (cleanup_macro_buffer, &dest);
1457 
1458   /* Get the text's first preprocessing token.  */
1459   if (! get_token (&tok, &src))
1460     {
1461       do_cleanups (back_to);
1462       return 0;
1463     }
1464 
1465   /* If it's a macro invocation, expand it.  */
1466   if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton))
1467     {
1468       /* It was a macro invocation!  Package up the expansion as a
1469          null-terminated string and return it.  Set *lexptr to the
1470          start of the next token in the input.  */
1471       appendc (&dest, '\0');
1472       discard_cleanups (back_to);
1473       *lexptr = src.text;
1474       return dest.text;
1475     }
1476   else
1477     {
1478       /* It wasn't a macro invocation.  */
1479       do_cleanups (back_to);
1480       return 0;
1481     }
1482 }
1483