xref: /netbsd-src/external/gpl3/gdb.old/dist/gdb/macroexp.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* C preprocessor macro expansion for GDB.
2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
3    Contributed by Red Hat, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "defs.h"
21 #include "gdb_obstack.h"
22 #include "bcache.h"
23 #include "macrotab.h"
24 #include "macroexp.h"
25 #include "c-lang.h"
26 
27 
28 
29 /* A resizeable, substringable string type.  */
30 
31 
32 /* A string type that we can resize, quickly append to, and use to
33    refer to substrings of other strings.  */
34 struct macro_buffer
35 {
36   /* An array of characters.  The first LEN bytes are the real text,
37      but there are SIZE bytes allocated to the array.  If SIZE is
38      zero, then this doesn't point to a malloc'ed block.  If SHARED is
39      non-zero, then this buffer is actually a pointer into some larger
40      string, and we shouldn't append characters to it, etc.  Because
41      of sharing, we can't assume in general that the text is
42      null-terminated.  */
43   char *text;
44 
45   /* The number of characters in the string.  */
46   int len;
47 
48   /* The number of characters allocated to the string.  If SHARED is
49      non-zero, this is meaningless; in this case, we set it to zero so
50      that any "do we have room to append something?" tests will fail,
51      so we don't always have to check SHARED before using this field.  */
52   int size;
53 
54   /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc
55      block).  Non-zero if TEXT is actually pointing into the middle of
56      some other block, and we shouldn't reallocate it.  */
57   int shared;
58 
59   /* For detecting token splicing.
60 
61      This is the index in TEXT of the first character of the token
62      that abuts the end of TEXT.  If TEXT contains no tokens, then we
63      set this equal to LEN.  If TEXT ends in whitespace, then there is
64      no token abutting the end of TEXT (it's just whitespace), and
65      again, we set this equal to LEN.  We set this to -1 if we don't
66      know the nature of TEXT.  */
67   int last_token;
68 
69   /* If this buffer is holding the result from get_token, then this
70      is non-zero if it is an identifier token, zero otherwise.  */
71   int is_identifier;
72 };
73 
74 
75 /* Set the macro buffer *B to the empty string, guessing that its
76    final contents will fit in N bytes.  (It'll get resized if it
77    doesn't, so the guess doesn't have to be right.)  Allocate the
78    initial storage with xmalloc.  */
79 static void
80 init_buffer (struct macro_buffer *b, int n)
81 {
82   b->size = n;
83   if (n > 0)
84     b->text = (char *) xmalloc (n);
85   else
86     b->text = NULL;
87   b->len = 0;
88   b->shared = 0;
89   b->last_token = -1;
90 }
91 
92 
93 /* Set the macro buffer *BUF to refer to the LEN bytes at ADDR, as a
94    shared substring.  */
95 static void
96 init_shared_buffer (struct macro_buffer *buf, char *addr, int len)
97 {
98   buf->text = addr;
99   buf->len = len;
100   buf->shared = 1;
101   buf->size = 0;
102   buf->last_token = -1;
103 }
104 
105 
106 /* Free the text of the buffer B.  Raise an error if B is shared.  */
107 static void
108 free_buffer (struct macro_buffer *b)
109 {
110   gdb_assert (! b->shared);
111   if (b->size)
112     xfree (b->text);
113 }
114 
115 /* Like free_buffer, but return the text as an xstrdup()d string.
116    This only exists to try to make the API relatively clean.  */
117 
118 static char *
119 free_buffer_return_text (struct macro_buffer *b)
120 {
121   gdb_assert (! b->shared);
122   gdb_assert (b->size);
123   /* Nothing to do.  */
124   return b->text;
125 }
126 
127 /* A cleanup function for macro buffers.  */
128 static void
129 cleanup_macro_buffer (void *untyped_buf)
130 {
131   free_buffer ((struct macro_buffer *) untyped_buf);
132 }
133 
134 
135 /* Resize the buffer B to be at least N bytes long.  Raise an error if
136    B shouldn't be resized.  */
137 static void
138 resize_buffer (struct macro_buffer *b, int n)
139 {
140   /* We shouldn't be trying to resize shared strings.  */
141   gdb_assert (! b->shared);
142 
143   if (b->size == 0)
144     b->size = n;
145   else
146     while (b->size <= n)
147       b->size *= 2;
148 
149   b->text = xrealloc (b->text, b->size);
150 }
151 
152 
153 /* Append the character C to the buffer B.  */
154 static void
155 appendc (struct macro_buffer *b, int c)
156 {
157   int new_len = b->len + 1;
158 
159   if (new_len > b->size)
160     resize_buffer (b, new_len);
161 
162   b->text[b->len] = c;
163   b->len = new_len;
164 }
165 
166 
167 /* Append the LEN bytes at ADDR to the buffer B.  */
168 static void
169 appendmem (struct macro_buffer *b, char *addr, int len)
170 {
171   int new_len = b->len + len;
172 
173   if (new_len > b->size)
174     resize_buffer (b, new_len);
175 
176   memcpy (b->text + b->len, addr, len);
177   b->len = new_len;
178 }
179 
180 
181 
182 /* Recognizing preprocessor tokens.  */
183 
184 
185 int
186 macro_is_whitespace (int c)
187 {
188   return (c == ' '
189           || c == '\t'
190           || c == '\n'
191           || c == '\v'
192           || c == '\f');
193 }
194 
195 
196 int
197 macro_is_digit (int c)
198 {
199   return ('0' <= c && c <= '9');
200 }
201 
202 
203 int
204 macro_is_identifier_nondigit (int c)
205 {
206   return (c == '_'
207           || ('a' <= c && c <= 'z')
208           || ('A' <= c && c <= 'Z'));
209 }
210 
211 
212 static void
213 set_token (struct macro_buffer *tok, char *start, char *end)
214 {
215   init_shared_buffer (tok, start, end - start);
216   tok->last_token = 0;
217 
218   /* Presumed; get_identifier may overwrite this.  */
219   tok->is_identifier = 0;
220 }
221 
222 
223 static int
224 get_comment (struct macro_buffer *tok, char *p, char *end)
225 {
226   if (p + 2 > end)
227     return 0;
228   else if (p[0] == '/'
229            && p[1] == '*')
230     {
231       char *tok_start = p;
232 
233       p += 2;
234 
235       for (; p < end; p++)
236         if (p + 2 <= end
237             && p[0] == '*'
238             && p[1] == '/')
239           {
240             p += 2;
241             set_token (tok, tok_start, p);
242             return 1;
243           }
244 
245       error (_("Unterminated comment in macro expansion."));
246     }
247   else if (p[0] == '/'
248            && p[1] == '/')
249     {
250       char *tok_start = p;
251 
252       p += 2;
253       for (; p < end; p++)
254         if (*p == '\n')
255           break;
256 
257       set_token (tok, tok_start, p);
258       return 1;
259     }
260   else
261     return 0;
262 }
263 
264 
265 static int
266 get_identifier (struct macro_buffer *tok, char *p, char *end)
267 {
268   if (p < end
269       && macro_is_identifier_nondigit (*p))
270     {
271       char *tok_start = p;
272 
273       while (p < end
274              && (macro_is_identifier_nondigit (*p)
275                  || macro_is_digit (*p)))
276         p++;
277 
278       set_token (tok, tok_start, p);
279       tok->is_identifier = 1;
280       return 1;
281     }
282   else
283     return 0;
284 }
285 
286 
287 static int
288 get_pp_number (struct macro_buffer *tok, char *p, char *end)
289 {
290   if (p < end
291       && (macro_is_digit (*p)
292           || (*p == '.'
293 	      && p + 2 <= end
294 	      && macro_is_digit (p[1]))))
295     {
296       char *tok_start = p;
297 
298       while (p < end)
299         {
300 	  if (p + 2 <= end
301 	      && strchr ("eEpP", *p)
302 	      && (p[1] == '+' || p[1] == '-'))
303             p += 2;
304           else if (macro_is_digit (*p)
305 		   || macro_is_identifier_nondigit (*p)
306 		   || *p == '.')
307             p++;
308           else
309             break;
310         }
311 
312       set_token (tok, tok_start, p);
313       return 1;
314     }
315   else
316     return 0;
317 }
318 
319 
320 
321 /* If the text starting at P going up to (but not including) END
322    starts with a character constant, set *TOK to point to that
323    character constant, and return 1.  Otherwise, return zero.
324    Signal an error if it contains a malformed or incomplete character
325    constant.  */
326 static int
327 get_character_constant (struct macro_buffer *tok, char *p, char *end)
328 {
329   /* ISO/IEC 9899:1999 (E)  Section 6.4.4.4  paragraph 1
330      But of course, what really matters is that we handle it the same
331      way GDB's C/C++ lexer does.  So we call parse_escape in utils.c
332      to handle escape sequences.  */
333   if ((p + 1 <= end && *p == '\'')
334       || (p + 2 <= end
335 	  && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
336 	  && p[1] == '\''))
337     {
338       char *tok_start = p;
339       int char_count = 0;
340 
341       if (*p == '\'')
342         p++;
343       else if (*p == 'L' || *p == 'u' || *p == 'U')
344         p += 2;
345       else
346         gdb_assert_not_reached ("unexpected character constant");
347 
348       for (;;)
349         {
350           if (p >= end)
351             error (_("Unmatched single quote."));
352           else if (*p == '\'')
353             {
354               if (!char_count)
355                 error (_("A character constant must contain at least one "
356                        "character."));
357               p++;
358               break;
359             }
360           else if (*p == '\\')
361             {
362 	      const char *s, *o;
363 
364 	      s = o = ++p;
365 	      char_count += c_parse_escape (&s, NULL);
366 	      p += s - o;
367             }
368           else
369 	    {
370 	      p++;
371 	      char_count++;
372 	    }
373         }
374 
375       set_token (tok, tok_start, p);
376       return 1;
377     }
378   else
379     return 0;
380 }
381 
382 
383 /* If the text starting at P going up to (but not including) END
384    starts with a string literal, set *TOK to point to that string
385    literal, and return 1.  Otherwise, return zero.  Signal an error if
386    it contains a malformed or incomplete string literal.  */
387 static int
388 get_string_literal (struct macro_buffer *tok, char *p, char *end)
389 {
390   if ((p + 1 <= end
391        && *p == '"')
392       || (p + 2 <= end
393           && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
394           && p[1] == '"'))
395     {
396       char *tok_start = p;
397 
398       if (*p == '"')
399         p++;
400       else if (*p == 'L' || *p == 'u' || *p == 'U')
401         p += 2;
402       else
403         gdb_assert_not_reached ("unexpected string literal");
404 
405       for (;;)
406         {
407           if (p >= end)
408             error (_("Unterminated string in expression."));
409           else if (*p == '"')
410             {
411               p++;
412               break;
413             }
414           else if (*p == '\n')
415             error (_("Newline characters may not appear in string "
416                    "constants."));
417           else if (*p == '\\')
418             {
419 	      const char *s, *o;
420 
421 	      s = o = ++p;
422 	      c_parse_escape (&s, NULL);
423 	      p += s - o;
424             }
425           else
426             p++;
427         }
428 
429       set_token (tok, tok_start, p);
430       return 1;
431     }
432   else
433     return 0;
434 }
435 
436 
437 static int
438 get_punctuator (struct macro_buffer *tok, char *p, char *end)
439 {
440   /* Here, speed is much less important than correctness and clarity.  */
441 
442   /* ISO/IEC 9899:1999 (E)  Section 6.4.6  Paragraph 1.
443      Note that this table is ordered in a special way.  A punctuator
444      which is a prefix of another punctuator must appear after its
445      "extension".  Otherwise, the wrong token will be returned.  */
446   static const char * const punctuators[] = {
447     "[", "]", "(", ")", "{", "}", "?", ";", ",", "~",
448     "...", ".",
449     "->", "--", "-=", "-",
450     "++", "+=", "+",
451     "*=", "*",
452     "!=", "!",
453     "&&", "&=", "&",
454     "/=", "/",
455     "%>", "%:%:", "%:", "%=", "%",
456     "^=", "^",
457     "##", "#",
458     ":>", ":",
459     "||", "|=", "|",
460     "<<=", "<<", "<=", "<:", "<%", "<",
461     ">>=", ">>", ">=", ">",
462     "==", "=",
463     0
464   };
465 
466   int i;
467 
468   if (p + 1 <= end)
469     {
470       for (i = 0; punctuators[i]; i++)
471         {
472           const char *punctuator = punctuators[i];
473 
474           if (p[0] == punctuator[0])
475             {
476               int len = strlen (punctuator);
477 
478               if (p + len <= end
479                   && ! memcmp (p, punctuator, len))
480                 {
481                   set_token (tok, p, p + len);
482                   return 1;
483                 }
484             }
485         }
486     }
487 
488   return 0;
489 }
490 
491 
492 /* Peel the next preprocessor token off of SRC, and put it in TOK.
493    Mutate TOK to refer to the first token in SRC, and mutate SRC to
494    refer to the text after that token.  SRC must be a shared buffer;
495    the resulting TOK will be shared, pointing into the same string SRC
496    does.  Initialize TOK's last_token field.  Return non-zero if we
497    succeed, or 0 if we didn't find any more tokens in SRC.  */
498 static int
499 get_token (struct macro_buffer *tok,
500            struct macro_buffer *src)
501 {
502   char *p = src->text;
503   char *end = p + src->len;
504 
505   gdb_assert (src->shared);
506 
507   /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4:
508 
509      preprocessing-token:
510          header-name
511          identifier
512          pp-number
513          character-constant
514          string-literal
515          punctuator
516          each non-white-space character that cannot be one of the above
517 
518      We don't have to deal with header-name tokens, since those can
519      only occur after a #include, which we will never see.  */
520 
521   while (p < end)
522     if (macro_is_whitespace (*p))
523       p++;
524     else if (get_comment (tok, p, end))
525       p += tok->len;
526     else if (get_pp_number (tok, p, end)
527              || get_character_constant (tok, p, end)
528              || get_string_literal (tok, p, end)
529              /* Note: the grammar in the standard seems to be
530                 ambiguous: L'x' can be either a wide character
531                 constant, or an identifier followed by a normal
532                 character constant.  By trying `get_identifier' after
533                 we try get_character_constant and get_string_literal,
534                 we give the wide character syntax precedence.  Now,
535                 since GDB doesn't handle wide character constants
536                 anyway, is this the right thing to do?  */
537              || get_identifier (tok, p, end)
538              || get_punctuator (tok, p, end))
539       {
540         /* How many characters did we consume, including whitespace?  */
541         int consumed = p - src->text + tok->len;
542 
543         src->text += consumed;
544         src->len -= consumed;
545         return 1;
546       }
547     else
548       {
549         /* We have found a "non-whitespace character that cannot be
550            one of the above."  Make a token out of it.  */
551         int consumed;
552 
553         set_token (tok, p, p + 1);
554         consumed = p - src->text + tok->len;
555         src->text += consumed;
556         src->len -= consumed;
557         return 1;
558       }
559 
560   return 0;
561 }
562 
563 
564 
565 /* Appending token strings, with and without splicing  */
566 
567 
568 /* Append the macro buffer SRC to the end of DEST, and ensure that
569    doing so doesn't splice the token at the end of SRC with the token
570    at the beginning of DEST.  SRC and DEST must have their last_token
571    fields set.  Upon return, DEST's last_token field is set correctly.
572 
573    For example:
574 
575    If DEST is "(" and SRC is "y", then we can return with
576    DEST set to "(y" --- we've simply appended the two buffers.
577 
578    However, if DEST is "x" and SRC is "y", then we must not return
579    with DEST set to "xy" --- that would splice the two tokens "x" and
580    "y" together to make a single token "xy".  However, it would be
581    fine to return with DEST set to "x y".  Similarly, "<" and "<" must
582    yield "< <", not "<<", etc.  */
583 static void
584 append_tokens_without_splicing (struct macro_buffer *dest,
585                                 struct macro_buffer *src)
586 {
587   int original_dest_len = dest->len;
588   struct macro_buffer dest_tail, new_token;
589 
590   gdb_assert (src->last_token != -1);
591   gdb_assert (dest->last_token != -1);
592 
593   /* First, just try appending the two, and call get_token to see if
594      we got a splice.  */
595   appendmem (dest, src->text, src->len);
596 
597   /* If DEST originally had no token abutting its end, then we can't
598      have spliced anything, so we're done.  */
599   if (dest->last_token == original_dest_len)
600     {
601       dest->last_token = original_dest_len + src->last_token;
602       return;
603     }
604 
605   /* Set DEST_TAIL to point to the last token in DEST, followed by
606      all the stuff we just appended.  */
607   init_shared_buffer (&dest_tail,
608                       dest->text + dest->last_token,
609                       dest->len - dest->last_token);
610 
611   /* Re-parse DEST's last token.  We know that DEST used to contain
612      at least one token, so if it doesn't contain any after the
613      append, then we must have spliced "/" and "*" or "/" and "/" to
614      make a comment start.  (Just for the record, I got this right
615      the first time.  This is not a bug fix.)  */
616   if (get_token (&new_token, &dest_tail)
617       && (new_token.text + new_token.len
618           == dest->text + original_dest_len))
619     {
620       /* No splice, so we're done.  */
621       dest->last_token = original_dest_len + src->last_token;
622       return;
623     }
624 
625   /* Okay, a simple append caused a splice.  Let's chop dest back to
626      its original length and try again, but separate the texts with a
627      space.  */
628   dest->len = original_dest_len;
629   appendc (dest, ' ');
630   appendmem (dest, src->text, src->len);
631 
632   init_shared_buffer (&dest_tail,
633                       dest->text + dest->last_token,
634                       dest->len - dest->last_token);
635 
636   /* Try to re-parse DEST's last token, as above.  */
637   if (get_token (&new_token, &dest_tail)
638       && (new_token.text + new_token.len
639           == dest->text + original_dest_len))
640     {
641       /* No splice, so we're done.  */
642       dest->last_token = original_dest_len + 1 + src->last_token;
643       return;
644     }
645 
646   /* As far as I know, there's no case where inserting a space isn't
647      enough to prevent a splice.  */
648   internal_error (__FILE__, __LINE__,
649                   _("unable to avoid splicing tokens during macro expansion"));
650 }
651 
652 /* Stringify an argument, and insert it into DEST.  ARG is the text to
653    stringify; it is LEN bytes long.  */
654 
655 static void
656 stringify (struct macro_buffer *dest, const char *arg, int len)
657 {
658   /* Trim initial whitespace from ARG.  */
659   while (len > 0 && macro_is_whitespace (*arg))
660     {
661       ++arg;
662       --len;
663     }
664 
665   /* Trim trailing whitespace from ARG.  */
666   while (len > 0 && macro_is_whitespace (arg[len - 1]))
667     --len;
668 
669   /* Insert the string.  */
670   appendc (dest, '"');
671   while (len > 0)
672     {
673       /* We could try to handle strange cases here, like control
674 	 characters, but there doesn't seem to be much point.  */
675       if (macro_is_whitespace (*arg))
676 	{
677 	  /* Replace a sequence of whitespace with a single space.  */
678 	  appendc (dest, ' ');
679 	  while (len > 1 && macro_is_whitespace (arg[1]))
680 	    {
681 	      ++arg;
682 	      --len;
683 	    }
684 	}
685       else if (*arg == '\\' || *arg == '"')
686 	{
687 	  appendc (dest, '\\');
688 	  appendc (dest, *arg);
689 	}
690       else
691 	appendc (dest, *arg);
692       ++arg;
693       --len;
694     }
695   appendc (dest, '"');
696   dest->last_token = dest->len;
697 }
698 
699 /* See macroexp.h.  */
700 
701 char *
702 macro_stringify (const char *str)
703 {
704   struct macro_buffer buffer;
705   int len = strlen (str);
706 
707   init_buffer (&buffer, len);
708   stringify (&buffer, str, len);
709   appendc (&buffer, '\0');
710 
711   return free_buffer_return_text (&buffer);
712 }
713 
714 
715 /* Expanding macros!  */
716 
717 
718 /* A singly-linked list of the names of the macros we are currently
719    expanding --- for detecting expansion loops.  */
720 struct macro_name_list {
721   const char *name;
722   struct macro_name_list *next;
723 };
724 
725 
726 /* Return non-zero if we are currently expanding the macro named NAME,
727    according to LIST; otherwise, return zero.
728 
729    You know, it would be possible to get rid of all the NO_LOOP
730    arguments to these functions by simply generating a new lookup
731    function and baton which refuses to find the definition for a
732    particular macro, and otherwise delegates the decision to another
733    function/baton pair.  But that makes the linked list of excluded
734    macros chained through untyped baton pointers, which will make it
735    harder to debug.  :(  */
736 static int
737 currently_rescanning (struct macro_name_list *list, const char *name)
738 {
739   for (; list; list = list->next)
740     if (strcmp (name, list->name) == 0)
741       return 1;
742 
743   return 0;
744 }
745 
746 
747 /* Gather the arguments to a macro expansion.
748 
749    NAME is the name of the macro being invoked.  (It's only used for
750    printing error messages.)
751 
752    Assume that SRC is the text of the macro invocation immediately
753    following the macro name.  For example, if we're processing the
754    text foo(bar, baz), then NAME would be foo and SRC will be (bar,
755    baz).
756 
757    If SRC doesn't start with an open paren ( token at all, return
758    zero, leave SRC unchanged, and don't set *ARGC_P to anything.
759 
760    If SRC doesn't contain a properly terminated argument list, then
761    raise an error.
762 
763    For a variadic macro, NARGS holds the number of formal arguments to
764    the macro.  For a GNU-style variadic macro, this should be the
765    number of named arguments.  For a non-variadic macro, NARGS should
766    be -1.
767 
768    Otherwise, return a pointer to the first element of an array of
769    macro buffers referring to the argument texts, and set *ARGC_P to
770    the number of arguments we found --- the number of elements in the
771    array.  The macro buffers share their text with SRC, and their
772    last_token fields are initialized.  The array is allocated with
773    xmalloc, and the caller is responsible for freeing it.
774 
775    NOTE WELL: if SRC starts with a open paren ( token followed
776    immediately by a close paren ) token (e.g., the invocation looks
777    like "foo()"), we treat that as one argument, which happens to be
778    the empty list of tokens.  The caller should keep in mind that such
779    a sequence of tokens is a valid way to invoke one-parameter
780    function-like macros, but also a valid way to invoke zero-parameter
781    function-like macros.  Eeew.
782 
783    Consume the tokens from SRC; after this call, SRC contains the text
784    following the invocation.  */
785 
786 static struct macro_buffer *
787 gather_arguments (const char *name, struct macro_buffer *src,
788 		  int nargs, int *argc_p)
789 {
790   struct macro_buffer tok;
791   int args_len, args_size;
792   struct macro_buffer *args = NULL;
793   struct cleanup *back_to = make_cleanup (free_current_contents, &args);
794 
795   /* Does SRC start with an opening paren token?  Read from a copy of
796      SRC, so SRC itself is unaffected if we don't find an opening
797      paren.  */
798   {
799     struct macro_buffer temp;
800 
801     init_shared_buffer (&temp, src->text, src->len);
802 
803     if (! get_token (&tok, &temp)
804         || tok.len != 1
805         || tok.text[0] != '(')
806       {
807         discard_cleanups (back_to);
808         return 0;
809       }
810   }
811 
812   /* Consume SRC's opening paren.  */
813   get_token (&tok, src);
814 
815   args_len = 0;
816   args_size = 6;
817   args = (struct macro_buffer *) xmalloc (sizeof (*args) * args_size);
818 
819   for (;;)
820     {
821       struct macro_buffer *arg;
822       int depth;
823 
824       /* Make sure we have room for the next argument.  */
825       if (args_len >= args_size)
826         {
827           args_size *= 2;
828           args = xrealloc (args, sizeof (*args) * args_size);
829         }
830 
831       /* Initialize the next argument.  */
832       arg = &args[args_len++];
833       set_token (arg, src->text, src->text);
834 
835       /* Gather the argument's tokens.  */
836       depth = 0;
837       for (;;)
838         {
839           if (! get_token (&tok, src))
840             error (_("Malformed argument list for macro `%s'."), name);
841 
842           /* Is tok an opening paren?  */
843           if (tok.len == 1 && tok.text[0] == '(')
844             depth++;
845 
846           /* Is tok is a closing paren?  */
847           else if (tok.len == 1 && tok.text[0] == ')')
848             {
849               /* If it's a closing paren at the top level, then that's
850                  the end of the argument list.  */
851               if (depth == 0)
852                 {
853 		  /* In the varargs case, the last argument may be
854 		     missing.  Add an empty argument in this case.  */
855 		  if (nargs != -1 && args_len == nargs - 1)
856 		    {
857 		      /* Make sure we have room for the argument.  */
858 		      if (args_len >= args_size)
859 			{
860 			  args_size++;
861 			  args = xrealloc (args, sizeof (*args) * args_size);
862 			}
863 		      arg = &args[args_len++];
864 		      set_token (arg, src->text, src->text);
865 		    }
866 
867                   discard_cleanups (back_to);
868                   *argc_p = args_len;
869                   return args;
870                 }
871 
872               depth--;
873             }
874 
875           /* If tok is a comma at top level, then that's the end of
876              the current argument.  However, if we are handling a
877              variadic macro and we are computing the last argument, we
878              want to include the comma and remaining tokens.  */
879           else if (tok.len == 1 && tok.text[0] == ',' && depth == 0
880 		   && (nargs == -1 || args_len < nargs))
881             break;
882 
883           /* Extend the current argument to enclose this token.  If
884              this is the current argument's first token, leave out any
885              leading whitespace, just for aesthetics.  */
886           if (arg->len == 0)
887             {
888               arg->text = tok.text;
889               arg->len = tok.len;
890               arg->last_token = 0;
891             }
892           else
893             {
894               arg->len = (tok.text + tok.len) - arg->text;
895               arg->last_token = tok.text - arg->text;
896             }
897         }
898     }
899 }
900 
901 
902 /* The `expand' and `substitute_args' functions both invoke `scan'
903    recursively, so we need a forward declaration somewhere.  */
904 static void scan (struct macro_buffer *dest,
905                   struct macro_buffer *src,
906                   struct macro_name_list *no_loop,
907                   macro_lookup_ftype *lookup_func,
908                   void *lookup_baton);
909 
910 
911 /* A helper function for substitute_args.
912 
913    ARGV is a vector of all the arguments; ARGC is the number of
914    arguments.  IS_VARARGS is true if the macro being substituted is a
915    varargs macro; in this case VA_ARG_NAME is the name of the
916    "variable" argument.  VA_ARG_NAME is ignored if IS_VARARGS is
917    false.
918 
919    If the token TOK is the name of a parameter, return the parameter's
920    index.  If TOK is not an argument, return -1.  */
921 
922 static int
923 find_parameter (const struct macro_buffer *tok,
924 		int is_varargs, const struct macro_buffer *va_arg_name,
925 		int argc, const char * const *argv)
926 {
927   int i;
928 
929   if (! tok->is_identifier)
930     return -1;
931 
932   for (i = 0; i < argc; ++i)
933     if (tok->len == strlen (argv[i])
934 	&& !memcmp (tok->text, argv[i], tok->len))
935       return i;
936 
937   if (is_varargs && tok->len == va_arg_name->len
938       && ! memcmp (tok->text, va_arg_name->text, tok->len))
939     return argc - 1;
940 
941   return -1;
942 }
943 
944 /* Given the macro definition DEF, being invoked with the actual
945    arguments given by ARGC and ARGV, substitute the arguments into the
946    replacement list, and store the result in DEST.
947 
948    IS_VARARGS should be true if DEF is a varargs macro.  In this case,
949    VA_ARG_NAME should be the name of the "variable" argument -- either
950    __VA_ARGS__ for c99-style varargs, or the final argument name, for
951    GNU-style varargs.  If IS_VARARGS is false, this parameter is
952    ignored.
953 
954    If it is necessary to expand macro invocations in one of the
955    arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro
956    definitions, and don't expand invocations of the macros listed in
957    NO_LOOP.  */
958 
959 static void
960 substitute_args (struct macro_buffer *dest,
961                  struct macro_definition *def,
962 		 int is_varargs, const struct macro_buffer *va_arg_name,
963                  int argc, struct macro_buffer *argv,
964                  struct macro_name_list *no_loop,
965                  macro_lookup_ftype *lookup_func,
966                  void *lookup_baton)
967 {
968   /* A macro buffer for the macro's replacement list.  */
969   struct macro_buffer replacement_list;
970   /* The token we are currently considering.  */
971   struct macro_buffer tok;
972   /* The replacement list's pointer from just before TOK was lexed.  */
973   char *original_rl_start;
974   /* We have a single lookahead token to handle token splicing.  */
975   struct macro_buffer lookahead;
976   /* The lookahead token might not be valid.  */
977   int lookahead_valid;
978   /* The replacement list's pointer from just before LOOKAHEAD was
979      lexed.  */
980   char *lookahead_rl_start;
981 
982   init_shared_buffer (&replacement_list, (char *) def->replacement,
983                       strlen (def->replacement));
984 
985   gdb_assert (dest->len == 0);
986   dest->last_token = 0;
987 
988   original_rl_start = replacement_list.text;
989   if (! get_token (&tok, &replacement_list))
990     return;
991   lookahead_rl_start = replacement_list.text;
992   lookahead_valid = get_token (&lookahead, &replacement_list);
993 
994   for (;;)
995     {
996       /* Just for aesthetics.  If we skipped some whitespace, copy
997          that to DEST.  */
998       if (tok.text > original_rl_start)
999         {
1000           appendmem (dest, original_rl_start, tok.text - original_rl_start);
1001           dest->last_token = dest->len;
1002         }
1003 
1004       /* Is this token the stringification operator?  */
1005       if (tok.len == 1
1006           && tok.text[0] == '#')
1007 	{
1008 	  int arg;
1009 
1010 	  if (!lookahead_valid)
1011 	    error (_("Stringification operator requires an argument."));
1012 
1013 	  arg = find_parameter (&lookahead, is_varargs, va_arg_name,
1014 				def->argc, def->argv);
1015 	  if (arg == -1)
1016 	    error (_("Argument to stringification operator must name "
1017 		     "a macro parameter."));
1018 
1019 	  stringify (dest, argv[arg].text, argv[arg].len);
1020 
1021 	  /* Read one token and let the loop iteration code handle the
1022 	     rest.  */
1023 	  lookahead_rl_start = replacement_list.text;
1024 	  lookahead_valid = get_token (&lookahead, &replacement_list);
1025 	}
1026       /* Is this token the splicing operator?  */
1027       else if (tok.len == 2
1028 	       && tok.text[0] == '#'
1029 	       && tok.text[1] == '#')
1030 	error (_("Stray splicing operator"));
1031       /* Is the next token the splicing operator?  */
1032       else if (lookahead_valid
1033 	       && lookahead.len == 2
1034 	       && lookahead.text[0] == '#'
1035 	       && lookahead.text[1] == '#')
1036 	{
1037 	  int finished = 0;
1038 	  int prev_was_comma = 0;
1039 
1040 	  /* Note that GCC warns if the result of splicing is not a
1041 	     token.  In the debugger there doesn't seem to be much
1042 	     benefit from doing this.  */
1043 
1044 	  /* Insert the first token.  */
1045 	  if (tok.len == 1 && tok.text[0] == ',')
1046 	    prev_was_comma = 1;
1047 	  else
1048 	    {
1049 	      int arg = find_parameter (&tok, is_varargs, va_arg_name,
1050 					def->argc, def->argv);
1051 
1052 	      if (arg != -1)
1053 		appendmem (dest, argv[arg].text, argv[arg].len);
1054 	      else
1055 		appendmem (dest, tok.text, tok.len);
1056 	    }
1057 
1058 	  /* Apply a possible sequence of ## operators.  */
1059 	  for (;;)
1060 	    {
1061 	      if (! get_token (&tok, &replacement_list))
1062 		error (_("Splicing operator at end of macro"));
1063 
1064 	      /* Handle a comma before a ##.  If we are handling
1065 		 varargs, and the token on the right hand side is the
1066 		 varargs marker, and the final argument is empty or
1067 		 missing, then drop the comma.  This is a GNU
1068 		 extension.  There is one ambiguous case here,
1069 		 involving pedantic behavior with an empty argument,
1070 		 but we settle that in favor of GNU-style (GCC uses an
1071 		 option).  If we aren't dealing with varargs, we
1072 		 simply insert the comma.  */
1073 	      if (prev_was_comma)
1074 		{
1075 		  if (! (is_varargs
1076 			 && tok.len == va_arg_name->len
1077 			 && !memcmp (tok.text, va_arg_name->text, tok.len)
1078 			 && argv[argc - 1].len == 0))
1079 		    appendmem (dest, ",", 1);
1080 		  prev_was_comma = 0;
1081 		}
1082 
1083 	      /* Insert the token.  If it is a parameter, insert the
1084 		 argument.  If it is a comma, treat it specially.  */
1085 	      if (tok.len == 1 && tok.text[0] == ',')
1086 		prev_was_comma = 1;
1087 	      else
1088 		{
1089 		  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1090 					    def->argc, def->argv);
1091 
1092 		  if (arg != -1)
1093 		    appendmem (dest, argv[arg].text, argv[arg].len);
1094 		  else
1095 		    appendmem (dest, tok.text, tok.len);
1096 		}
1097 
1098 	      /* Now read another token.  If it is another splice, we
1099 		 loop.  */
1100 	      original_rl_start = replacement_list.text;
1101 	      if (! get_token (&tok, &replacement_list))
1102 		{
1103 		  finished = 1;
1104 		  break;
1105 		}
1106 
1107 	      if (! (tok.len == 2
1108 		     && tok.text[0] == '#'
1109 		     && tok.text[1] == '#'))
1110 		break;
1111 	    }
1112 
1113 	  if (prev_was_comma)
1114 	    {
1115 	      /* We saw a comma.  Insert it now.  */
1116 	      appendmem (dest, ",", 1);
1117 	    }
1118 
1119           dest->last_token = dest->len;
1120 	  if (finished)
1121 	    lookahead_valid = 0;
1122 	  else
1123 	    {
1124 	      /* Set up for the loop iterator.  */
1125 	      lookahead = tok;
1126 	      lookahead_rl_start = original_rl_start;
1127 	      lookahead_valid = 1;
1128 	    }
1129 	}
1130       else
1131 	{
1132 	  /* Is this token an identifier?  */
1133 	  int substituted = 0;
1134 	  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1135 				    def->argc, def->argv);
1136 
1137 	  if (arg != -1)
1138 	    {
1139 	      struct macro_buffer arg_src;
1140 
1141 	      /* Expand any macro invocations in the argument text,
1142 		 and append the result to dest.  Remember that scan
1143 		 mutates its source, so we need to scan a new buffer
1144 		 referring to the argument's text, not the argument
1145 		 itself.  */
1146 	      init_shared_buffer (&arg_src, argv[arg].text, argv[arg].len);
1147 	      scan (dest, &arg_src, no_loop, lookup_func, lookup_baton);
1148 	      substituted = 1;
1149 	    }
1150 
1151 	  /* If it wasn't a parameter, then just copy it across.  */
1152 	  if (! substituted)
1153 	    append_tokens_without_splicing (dest, &tok);
1154 	}
1155 
1156       if (! lookahead_valid)
1157 	break;
1158 
1159       tok = lookahead;
1160       original_rl_start = lookahead_rl_start;
1161 
1162       lookahead_rl_start = replacement_list.text;
1163       lookahead_valid = get_token (&lookahead, &replacement_list);
1164     }
1165 }
1166 
1167 
1168 /* Expand a call to a macro named ID, whose definition is DEF.  Append
1169    its expansion to DEST.  SRC is the input text following the ID
1170    token.  We are currently rescanning the expansions of the macros
1171    named in NO_LOOP; don't re-expand them.  Use LOOKUP_FUNC and
1172    LOOKUP_BATON to find definitions for any nested macro references.
1173 
1174    Return 1 if we decided to expand it, zero otherwise.  (If it's a
1175    function-like macro name that isn't followed by an argument list,
1176    we don't expand it.)  If we return zero, leave SRC unchanged.  */
1177 static int
1178 expand (const char *id,
1179         struct macro_definition *def,
1180         struct macro_buffer *dest,
1181         struct macro_buffer *src,
1182         struct macro_name_list *no_loop,
1183         macro_lookup_ftype *lookup_func,
1184         void *lookup_baton)
1185 {
1186   struct macro_name_list new_no_loop;
1187 
1188   /* Create a new node to be added to the front of the no-expand list.
1189      This list is appropriate for re-scanning replacement lists, but
1190      it is *not* appropriate for scanning macro arguments; invocations
1191      of the macro whose arguments we are gathering *do* get expanded
1192      there.  */
1193   new_no_loop.name = id;
1194   new_no_loop.next = no_loop;
1195 
1196   /* What kind of macro are we expanding?  */
1197   if (def->kind == macro_object_like)
1198     {
1199       struct macro_buffer replacement_list;
1200 
1201       init_shared_buffer (&replacement_list, (char *) def->replacement,
1202                           strlen (def->replacement));
1203 
1204       scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton);
1205       return 1;
1206     }
1207   else if (def->kind == macro_function_like)
1208     {
1209       struct cleanup *back_to = make_cleanup (null_cleanup, 0);
1210       int argc = 0;
1211       struct macro_buffer *argv = NULL;
1212       struct macro_buffer substituted;
1213       struct macro_buffer substituted_src;
1214       struct macro_buffer va_arg_name = {0};
1215       int is_varargs = 0;
1216 
1217       if (def->argc >= 1)
1218 	{
1219 	  if (strcmp (def->argv[def->argc - 1], "...") == 0)
1220 	    {
1221 	      /* In C99-style varargs, substitution is done using
1222 		 __VA_ARGS__.  */
1223 	      init_shared_buffer (&va_arg_name, "__VA_ARGS__",
1224 				  strlen ("__VA_ARGS__"));
1225 	      is_varargs = 1;
1226 	    }
1227 	  else
1228 	    {
1229 	      int len = strlen (def->argv[def->argc - 1]);
1230 
1231 	      if (len > 3
1232 		  && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0)
1233 		{
1234 		  /* In GNU-style varargs, the name of the
1235 		     substitution parameter is the name of the formal
1236 		     argument without the "...".  */
1237 		  init_shared_buffer (&va_arg_name,
1238 				      (char *) def->argv[def->argc - 1],
1239 				      len - 3);
1240 		  is_varargs = 1;
1241 		}
1242 	    }
1243 	}
1244 
1245       make_cleanup (free_current_contents, &argv);
1246       argv = gather_arguments (id, src, is_varargs ? def->argc : -1,
1247 			       &argc);
1248 
1249       /* If we couldn't find any argument list, then we don't expand
1250          this macro.  */
1251       if (! argv)
1252         {
1253           do_cleanups (back_to);
1254           return 0;
1255         }
1256 
1257       /* Check that we're passing an acceptable number of arguments for
1258          this macro.  */
1259       if (argc != def->argc)
1260         {
1261 	  if (is_varargs && argc >= def->argc - 1)
1262 	    {
1263 	      /* Ok.  */
1264 	    }
1265           /* Remember that a sequence of tokens like "foo()" is a
1266              valid invocation of a macro expecting either zero or one
1267              arguments.  */
1268           else if (! (argc == 1
1269 		      && argv[0].len == 0
1270 		      && def->argc == 0))
1271             error (_("Wrong number of arguments to macro `%s' "
1272                    "(expected %d, got %d)."),
1273                    id, def->argc, argc);
1274         }
1275 
1276       /* Note that we don't expand macro invocations in the arguments
1277          yet --- we let subst_args take care of that.  Parameters that
1278          appear as operands of the stringifying operator "#" or the
1279          splicing operator "##" don't get macro references expanded,
1280          so we can't really tell whether it's appropriate to macro-
1281          expand an argument until we see how it's being used.  */
1282       init_buffer (&substituted, 0);
1283       make_cleanup (cleanup_macro_buffer, &substituted);
1284       substitute_args (&substituted, def, is_varargs, &va_arg_name,
1285 		       argc, argv, no_loop, lookup_func, lookup_baton);
1286 
1287       /* Now `substituted' is the macro's replacement list, with all
1288          argument values substituted into it properly.  Re-scan it for
1289          macro references, but don't expand invocations of this macro.
1290 
1291          We create a new buffer, `substituted_src', which points into
1292          `substituted', and scan that.  We can't scan `substituted'
1293          itself, since the tokenization process moves the buffer's
1294          text pointer around, and we still need to be able to find
1295          `substituted's original text buffer after scanning it so we
1296          can free it.  */
1297       init_shared_buffer (&substituted_src, substituted.text, substituted.len);
1298       scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton);
1299 
1300       do_cleanups (back_to);
1301 
1302       return 1;
1303     }
1304   else
1305     internal_error (__FILE__, __LINE__, _("bad macro definition kind"));
1306 }
1307 
1308 
1309 /* If the single token in SRC_FIRST followed by the tokens in SRC_REST
1310    constitute a macro invokation not forbidden in NO_LOOP, append its
1311    expansion to DEST and return non-zero.  Otherwise, return zero, and
1312    leave DEST unchanged.
1313 
1314    SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one.
1315    SRC_FIRST must be a string built by get_token.  */
1316 static int
1317 maybe_expand (struct macro_buffer *dest,
1318               struct macro_buffer *src_first,
1319               struct macro_buffer *src_rest,
1320               struct macro_name_list *no_loop,
1321               macro_lookup_ftype *lookup_func,
1322               void *lookup_baton)
1323 {
1324   gdb_assert (src_first->shared);
1325   gdb_assert (src_rest->shared);
1326   gdb_assert (! dest->shared);
1327 
1328   /* Is this token an identifier?  */
1329   if (src_first->is_identifier)
1330     {
1331       /* Make a null-terminated copy of it, since that's what our
1332          lookup function expects.  */
1333       char *id = xmalloc (src_first->len + 1);
1334       struct cleanup *back_to = make_cleanup (xfree, id);
1335 
1336       memcpy (id, src_first->text, src_first->len);
1337       id[src_first->len] = 0;
1338 
1339       /* If we're currently re-scanning the result of expanding
1340          this macro, don't expand it again.  */
1341       if (! currently_rescanning (no_loop, id))
1342         {
1343           /* Does this identifier have a macro definition in scope?  */
1344           struct macro_definition *def = lookup_func (id, lookup_baton);
1345 
1346           if (def && expand (id, def, dest, src_rest, no_loop,
1347                              lookup_func, lookup_baton))
1348             {
1349               do_cleanups (back_to);
1350               return 1;
1351             }
1352         }
1353 
1354       do_cleanups (back_to);
1355     }
1356 
1357   return 0;
1358 }
1359 
1360 
1361 /* Expand macro references in SRC, appending the results to DEST.
1362    Assume we are re-scanning the result of expanding the macros named
1363    in NO_LOOP, and don't try to re-expand references to them.
1364 
1365    SRC must be a shared buffer; DEST must not be one.  */
1366 static void
1367 scan (struct macro_buffer *dest,
1368       struct macro_buffer *src,
1369       struct macro_name_list *no_loop,
1370       macro_lookup_ftype *lookup_func,
1371       void *lookup_baton)
1372 {
1373   gdb_assert (src->shared);
1374   gdb_assert (! dest->shared);
1375 
1376   for (;;)
1377     {
1378       struct macro_buffer tok;
1379       char *original_src_start = src->text;
1380 
1381       /* Find the next token in SRC.  */
1382       if (! get_token (&tok, src))
1383         break;
1384 
1385       /* Just for aesthetics.  If we skipped some whitespace, copy
1386          that to DEST.  */
1387       if (tok.text > original_src_start)
1388         {
1389           appendmem (dest, original_src_start, tok.text - original_src_start);
1390           dest->last_token = dest->len;
1391         }
1392 
1393       if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton))
1394         /* We didn't end up expanding tok as a macro reference, so
1395            simply append it to dest.  */
1396         append_tokens_without_splicing (dest, &tok);
1397     }
1398 
1399   /* Just for aesthetics.  If there was any trailing whitespace in
1400      src, copy it to dest.  */
1401   if (src->len)
1402     {
1403       appendmem (dest, src->text, src->len);
1404       dest->last_token = dest->len;
1405     }
1406 }
1407 
1408 
1409 char *
1410 macro_expand (const char *source,
1411               macro_lookup_ftype *lookup_func,
1412               void *lookup_func_baton)
1413 {
1414   struct macro_buffer src, dest;
1415   struct cleanup *back_to;
1416 
1417   init_shared_buffer (&src, (char *) source, strlen (source));
1418 
1419   init_buffer (&dest, 0);
1420   dest.last_token = 0;
1421   back_to = make_cleanup (cleanup_macro_buffer, &dest);
1422 
1423   scan (&dest, &src, 0, lookup_func, lookup_func_baton);
1424 
1425   appendc (&dest, '\0');
1426 
1427   discard_cleanups (back_to);
1428   return dest.text;
1429 }
1430 
1431 
1432 char *
1433 macro_expand_once (const char *source,
1434                    macro_lookup_ftype *lookup_func,
1435                    void *lookup_func_baton)
1436 {
1437   error (_("Expand-once not implemented yet."));
1438 }
1439 
1440 
1441 char *
1442 macro_expand_next (const char **lexptr,
1443                    macro_lookup_ftype *lookup_func,
1444                    void *lookup_baton)
1445 {
1446   struct macro_buffer src, dest, tok;
1447   struct cleanup *back_to;
1448 
1449   /* Set up SRC to refer to the input text, pointed to by *lexptr.  */
1450   init_shared_buffer (&src, (char *) *lexptr, strlen (*lexptr));
1451 
1452   /* Set up DEST to receive the expansion, if there is one.  */
1453   init_buffer (&dest, 0);
1454   dest.last_token = 0;
1455   back_to = make_cleanup (cleanup_macro_buffer, &dest);
1456 
1457   /* Get the text's first preprocessing token.  */
1458   if (! get_token (&tok, &src))
1459     {
1460       do_cleanups (back_to);
1461       return 0;
1462     }
1463 
1464   /* If it's a macro invocation, expand it.  */
1465   if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton))
1466     {
1467       /* It was a macro invocation!  Package up the expansion as a
1468          null-terminated string and return it.  Set *lexptr to the
1469          start of the next token in the input.  */
1470       appendc (&dest, '\0');
1471       discard_cleanups (back_to);
1472       *lexptr = src.text;
1473       return dest.text;
1474     }
1475   else
1476     {
1477       /* It wasn't a macro invocation.  */
1478       do_cleanups (back_to);
1479       return 0;
1480     }
1481 }
1482