xref: /netbsd-src/external/gpl3/gcc.old/dist/libcpp/lex.c (revision 6cf6fe02a981b55727c49c3d37b0d8191a98c0ee)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3    Free Software Foundation, Inc.
4    Contributed by Per Bothner, 1994-95.
5    Based on CCCP program by Paul Rubin, June 1986
6    Adapted to ANSI C, Richard Stallman, Jan 1987
7    Broken out to separate file, Zack Weinberg, Mar 2000
8 
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "internal.h"
27 
28 enum spell_type
29 {
30   SPELL_OPERATOR = 0,
31   SPELL_IDENT,
32   SPELL_LITERAL,
33   SPELL_NONE
34 };
35 
36 struct token_spelling
37 {
38   enum spell_type category;
39   const unsigned char *name;
40 };
41 
42 static const unsigned char *const digraph_spellings[] =
43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44 
45 #define OP(e, s) { SPELL_OPERATOR, UC s  },
46 #define TK(e, s) { SPELL_ ## s,    UC #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48 #undef OP
49 #undef TK
50 
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void store_comment (cpp_reader *, cpp_token *);
60 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61 			    unsigned int, enum cpp_ttype);
62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63 static int name_p (cpp_reader *, const cpp_string *);
64 static tokenrun *next_tokenrun (tokenrun *);
65 
66 static _cpp_buff *new_buff (size_t);
67 
68 
69 /* Utility routine:
70 
71    Compares, the token TOKEN to the NUL-terminated string STRING.
72    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
73 int
74 cpp_ideq (const cpp_token *token, const char *string)
75 {
76   if (token->type != CPP_NAME)
77     return 0;
78 
79   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
80 }
81 
82 /* Record a note TYPE at byte POS into the current cleaned logical
83    line.  */
84 static void
85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 {
87   if (buffer->notes_used == buffer->notes_cap)
88     {
89       buffer->notes_cap = buffer->notes_cap * 2 + 200;
90       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91                                   buffer->notes_cap);
92     }
93 
94   buffer->notes[buffer->notes_used].pos = pos;
95   buffer->notes[buffer->notes_used].type = type;
96   buffer->notes_used++;
97 }
98 
99 /* Returns with a logical line that contains no escaped newlines or
100    trigraphs.  This is a time-critical inner loop.  */
101 void
102 _cpp_clean_line (cpp_reader *pfile)
103 {
104   cpp_buffer *buffer;
105   const uchar *s;
106   uchar c, *d, *p;
107 
108   buffer = pfile->buffer;
109   buffer->cur_note = buffer->notes_used = 0;
110   buffer->cur = buffer->line_base = buffer->next_line;
111   buffer->need_line = false;
112   s = buffer->next_line - 1;
113 
114   if (!buffer->from_stage3)
115     {
116       const uchar *pbackslash = NULL;
117 
118       /* Short circuit for the common case of an un-escaped line with
119 	 no trigraphs.  The primary win here is by not writing any
120 	 data back to memory until we have to.  */
121       for (;;)
122 	{
123 	  c = *++s;
124 	  if (__builtin_expect (c == '\n', false)
125 	      || __builtin_expect (c == '\r', false))
126 	    {
127 	      d = (uchar *) s;
128 
129 	      if (__builtin_expect (s == buffer->rlimit, false))
130 		goto done;
131 
132 	      /* DOS line ending? */
133 	      if (__builtin_expect (c == '\r', false)
134 		  && s[1] == '\n')
135 		{
136 		  s++;
137 		  if (s == buffer->rlimit)
138 		    goto done;
139 		}
140 
141 	      if (__builtin_expect (pbackslash == NULL, true))
142 		goto done;
143 
144 	      /* Check for escaped newline.  */
145 	      p = d;
146 	      while (is_nvspace (p[-1]))
147 		p--;
148 	      if (p - 1 != pbackslash)
149 		goto done;
150 
151 	      /* Have an escaped newline; process it and proceed to
152 		 the slow path.  */
153 	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 	      d = p - 2;
155 	      buffer->next_line = p - 1;
156 	      break;
157 	    }
158 	  if (__builtin_expect (c == '\\', false))
159 	    pbackslash = s;
160 	  else if (__builtin_expect (c == '?', false)
161 		   && __builtin_expect (s[1] == '?', false)
162 		   && _cpp_trigraph_map[s[2]])
163 	    {
164 	      /* Have a trigraph.  We may or may not have to convert
165 		 it.  Add a line note regardless, for -Wtrigraphs.  */
166 	      add_line_note (buffer, s, s[2]);
167 	      if (CPP_OPTION (pfile, trigraphs))
168 		{
169 		  /* We do, and that means we have to switch to the
170 		     slow path.  */
171 		  d = (uchar *) s;
172 		  *d = _cpp_trigraph_map[s[2]];
173 		  s += 2;
174 		  break;
175 		}
176 	    }
177 	}
178 
179 
180       for (;;)
181 	{
182 	  c = *++s;
183 	  *++d = c;
184 
185 	  if (c == '\n' || c == '\r')
186 	    {
187 		  /* Handle DOS line endings.  */
188 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 		s++;
190 	      if (s == buffer->rlimit)
191 		break;
192 
193 	      /* Escaped?  */
194 	      p = d;
195 	      while (p != buffer->next_line && is_nvspace (p[-1]))
196 		p--;
197 	      if (p == buffer->next_line || p[-1] != '\\')
198 		break;
199 
200 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
201 	      d = p - 2;
202 	      buffer->next_line = p - 1;
203 	    }
204 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 	    {
206 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
207 	      add_line_note (buffer, d, s[2]);
208 	      if (CPP_OPTION (pfile, trigraphs))
209 		{
210 		  *d = _cpp_trigraph_map[s[2]];
211 		  s += 2;
212 		}
213 	    }
214 	}
215     }
216   else
217     {
218       do
219 	s++;
220       while (*s != '\n' && *s != '\r');
221       d = (uchar *) s;
222 
223       /* Handle DOS line endings.  */
224       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225 	s++;
226     }
227 
228  done:
229   *d = '\n';
230   /* A sentinel note that should never be processed.  */
231   add_line_note (buffer, d + 1, '\n');
232   buffer->next_line = s + 1;
233 }
234 
235 /* Return true if the trigraph indicated by NOTE should be warned
236    about in a comment.  */
237 static bool
238 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
239 {
240   const uchar *p;
241 
242   /* Within comments we don't warn about trigraphs, unless the
243      trigraph forms an escaped newline, as that may change
244      behavior.  */
245   if (note->type != '/')
246     return false;
247 
248   /* If -trigraphs, then this was an escaped newline iff the next note
249      is coincident.  */
250   if (CPP_OPTION (pfile, trigraphs))
251     return note[1].pos == note->pos;
252 
253   /* Otherwise, see if this forms an escaped newline.  */
254   p = note->pos + 3;
255   while (is_nvspace (*p))
256     p++;
257 
258   /* There might have been escaped newlines between the trigraph and the
259      newline we found.  Hence the position test.  */
260   return (*p == '\n' && p < note[1].pos);
261 }
262 
263 /* Process the notes created by add_line_note as far as the current
264    location.  */
265 void
266 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
267 {
268   cpp_buffer *buffer = pfile->buffer;
269 
270   for (;;)
271     {
272       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273       unsigned int col;
274 
275       if (note->pos > buffer->cur)
276 	break;
277 
278       buffer->cur_note++;
279       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
280 
281       if (note->type == '\\' || note->type == ' ')
282 	{
283 	  if (note->type == ' ' && !in_comment)
284 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
285 				 "backslash and newline separated by space");
286 
287 	  if (buffer->next_line > buffer->rlimit)
288 	    {
289 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
290 				   "backslash-newline at end of file");
291 	      /* Prevent "no newline at end of file" warning.  */
292 	      buffer->next_line = buffer->rlimit;
293 	    }
294 
295 	  buffer->line_base = note->pos;
296 	  CPP_INCREMENT_LINE (pfile, 0);
297 	}
298       else if (_cpp_trigraph_map[note->type])
299 	{
300 	  if (CPP_OPTION (pfile, warn_trigraphs)
301 	      && (!in_comment || warn_in_comment (pfile, note)))
302 	    {
303 	      if (CPP_OPTION (pfile, trigraphs))
304 		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
305 				     "trigraph ??%c converted to %c",
306 				     note->type,
307 				     (int) _cpp_trigraph_map[note->type]);
308 	      else
309 		{
310 		  cpp_error_with_line
311 		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
312 		     "trigraph ??%c ignored, use -trigraphs to enable",
313 		     note->type);
314 		}
315 	    }
316 	}
317       else if (note->type == 0)
318 	/* Already processed in lex_raw_string.  */;
319       else
320 	abort ();
321     }
322 }
323 
324 /* Skip a C-style block comment.  We find the end of the comment by
325    seeing if an asterisk is before every '/' we encounter.  Returns
326    nonzero if comment terminated by EOF, zero otherwise.
327 
328    Buffer->cur points to the initial asterisk of the comment.  */
329 bool
330 _cpp_skip_block_comment (cpp_reader *pfile)
331 {
332   cpp_buffer *buffer = pfile->buffer;
333   const uchar *cur = buffer->cur;
334   uchar c;
335 
336   cur++;
337   if (*cur == '/')
338     cur++;
339 
340   for (;;)
341     {
342       /* People like decorating comments with '*', so check for '/'
343 	 instead for efficiency.  */
344       c = *cur++;
345 
346       if (c == '/')
347 	{
348 	  if (cur[-2] == '*')
349 	    break;
350 
351 	  /* Warn about potential nested comments, but not if the '/'
352 	     comes immediately before the true comment delimiter.
353 	     Don't bother to get it right across escaped newlines.  */
354 	  if (CPP_OPTION (pfile, warn_comments)
355 	      && cur[0] == '*' && cur[1] != '/')
356 	    {
357 	      buffer->cur = cur;
358 	      cpp_error_with_line (pfile, CPP_DL_WARNING,
359 				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
360 				   "\"/*\" within comment");
361 	    }
362 	}
363       else if (c == '\n')
364 	{
365 	  unsigned int cols;
366 	  buffer->cur = cur - 1;
367 	  _cpp_process_line_notes (pfile, true);
368 	  if (buffer->next_line >= buffer->rlimit)
369 	    return true;
370 	  _cpp_clean_line (pfile);
371 
372 	  cols = buffer->next_line - buffer->line_base;
373 	  CPP_INCREMENT_LINE (pfile, cols);
374 
375 	  cur = buffer->cur;
376 	}
377     }
378 
379   buffer->cur = cur;
380   _cpp_process_line_notes (pfile, true);
381   return false;
382 }
383 
384 /* Skip a C++ line comment, leaving buffer->cur pointing to the
385    terminating newline.  Handles escaped newlines.  Returns nonzero
386    if a multiline comment.  */
387 static int
388 skip_line_comment (cpp_reader *pfile)
389 {
390   cpp_buffer *buffer = pfile->buffer;
391   source_location orig_line = pfile->line_table->highest_line;
392 
393   while (*buffer->cur != '\n')
394     buffer->cur++;
395 
396   _cpp_process_line_notes (pfile, true);
397   return orig_line != pfile->line_table->highest_line;
398 }
399 
400 /* Skips whitespace, saving the next non-whitespace character.  */
401 static void
402 skip_whitespace (cpp_reader *pfile, cppchar_t c)
403 {
404   cpp_buffer *buffer = pfile->buffer;
405   bool saw_NUL = false;
406 
407   do
408     {
409       /* Horizontal space always OK.  */
410       if (c == ' ' || c == '\t')
411 	;
412       /* Just \f \v or \0 left.  */
413       else if (c == '\0')
414 	saw_NUL = true;
415       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
416 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
417 			     CPP_BUF_COL (buffer),
418 			     "%s in preprocessing directive",
419 			     c == '\f' ? "form feed" : "vertical tab");
420 
421       c = *buffer->cur++;
422     }
423   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
424   while (is_nvspace (c));
425 
426   if (saw_NUL)
427     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
428 
429   buffer->cur--;
430 }
431 
432 /* See if the characters of a number token are valid in a name (no
433    '.', '+' or '-').  */
434 static int
435 name_p (cpp_reader *pfile, const cpp_string *string)
436 {
437   unsigned int i;
438 
439   for (i = 0; i < string->len; i++)
440     if (!is_idchar (string->text[i]))
441       return 0;
442 
443   return 1;
444 }
445 
446 /* After parsing an identifier or other sequence, produce a warning about
447    sequences not in NFC/NFKC.  */
448 static void
449 warn_about_normalization (cpp_reader *pfile,
450 			  const cpp_token *token,
451 			  const struct normalize_state *s)
452 {
453   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
454       && !pfile->state.skipping)
455     {
456       /* Make sure that the token is printed using UCNs, even
457 	 if we'd otherwise happily print UTF-8.  */
458       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
459       size_t sz;
460 
461       sz = cpp_spell_token (pfile, token, buf, false) - buf;
462       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
463 	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
464 			     "`%.*s' is not in NFKC", (int) sz, buf);
465       else
466 	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
467 			     "`%.*s' is not in NFC", (int) sz, buf);
468     }
469 }
470 
471 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
472    an identifier.  FIRST is TRUE if this starts an identifier.  */
473 static bool
474 forms_identifier_p (cpp_reader *pfile, int first,
475 		    struct normalize_state *state)
476 {
477   cpp_buffer *buffer = pfile->buffer;
478 
479   if (*buffer->cur == '$')
480     {
481       if (!CPP_OPTION (pfile, dollars_in_ident))
482 	return false;
483 
484       buffer->cur++;
485       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
486 	{
487 	  CPP_OPTION (pfile, warn_dollars) = 0;
488 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
489 	}
490 
491       return true;
492     }
493 
494   /* Is this a syntactically valid UCN?  */
495   if (CPP_OPTION (pfile, extended_identifiers)
496       && *buffer->cur == '\\'
497       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
498     {
499       buffer->cur += 2;
500       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
501 			  state))
502 	return true;
503       buffer->cur -= 2;
504     }
505 
506   return false;
507 }
508 
509 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
510 static cpp_hashnode *
511 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
512 {
513   cpp_hashnode *result;
514   const uchar *cur;
515   unsigned int len;
516   unsigned int hash = HT_HASHSTEP (0, *base);
517 
518   cur = base + 1;
519   while (ISIDNUM (*cur))
520     {
521       hash = HT_HASHSTEP (hash, *cur);
522       cur++;
523     }
524   len = cur - base;
525   hash = HT_HASHFINISH (hash, len);
526   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
527 					      base, len, hash, HT_ALLOC));
528 
529   /* Rarely, identifiers require diagnostics when lexed.  */
530   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
531 			&& !pfile->state.skipping, 0))
532     {
533       /* It is allowed to poison the same identifier twice.  */
534       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
535 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
536 		   NODE_NAME (result));
537 
538       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
539 	 replacement list of a variadic macro.  */
540       if (result == pfile->spec_nodes.n__VA_ARGS__
541 	  && !pfile->state.va_args_ok)
542 	cpp_error (pfile, CPP_DL_PEDWARN,
543 		   "__VA_ARGS__ can only appear in the expansion"
544 		   " of a C99 variadic macro");
545 
546       /* For -Wc++-compat, warn about use of C++ named operators.  */
547       if (result->flags & NODE_WARN_OPERATOR)
548 	cpp_error (pfile, CPP_DL_WARNING,
549 		   "identifier \"%s\" is a special operator name in C++",
550 		   NODE_NAME (result));
551     }
552 
553   return result;
554 }
555 
556 /* Get the cpp_hashnode of an identifier specified by NAME in
557    the current cpp_reader object.  If none is found, NULL is returned.  */
558 cpp_hashnode *
559 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
560 {
561   cpp_hashnode *result;
562   result = lex_identifier_intern (pfile, (uchar *) name);
563   return result;
564 }
565 
566 /* Lex an identifier starting at BUFFER->CUR - 1.  */
567 static cpp_hashnode *
568 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
569 		struct normalize_state *nst)
570 {
571   cpp_hashnode *result;
572   const uchar *cur;
573   unsigned int len;
574   unsigned int hash = HT_HASHSTEP (0, *base);
575 
576   cur = pfile->buffer->cur;
577   if (! starts_ucn)
578     while (ISIDNUM (*cur))
579       {
580 	hash = HT_HASHSTEP (hash, *cur);
581 	cur++;
582       }
583   pfile->buffer->cur = cur;
584   if (starts_ucn || forms_identifier_p (pfile, false, nst))
585     {
586       /* Slower version for identifiers containing UCNs (or $).  */
587       do {
588 	while (ISIDNUM (*pfile->buffer->cur))
589 	  {
590 	    pfile->buffer->cur++;
591 	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
592 	  }
593       } while (forms_identifier_p (pfile, false, nst));
594       result = _cpp_interpret_identifier (pfile, base,
595 					  pfile->buffer->cur - base);
596     }
597   else
598     {
599       len = cur - base;
600       hash = HT_HASHFINISH (hash, len);
601 
602       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
603 						  base, len, hash, HT_ALLOC));
604     }
605 
606   /* Rarely, identifiers require diagnostics when lexed.  */
607   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
608 			&& !pfile->state.skipping, 0))
609     {
610       /* It is allowed to poison the same identifier twice.  */
611       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
612 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
613 		   NODE_NAME (result));
614 
615       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
616 	 replacement list of a variadic macro.  */
617       if (result == pfile->spec_nodes.n__VA_ARGS__
618 	  && !pfile->state.va_args_ok)
619 	cpp_error (pfile, CPP_DL_PEDWARN,
620 		   "__VA_ARGS__ can only appear in the expansion"
621 		   " of a C99 variadic macro");
622 
623       /* For -Wc++-compat, warn about use of C++ named operators.  */
624       if (result->flags & NODE_WARN_OPERATOR)
625 	cpp_error (pfile, CPP_DL_WARNING,
626 		   "identifier \"%s\" is a special operator name in C++",
627 		   NODE_NAME (result));
628     }
629 
630   return result;
631 }
632 
633 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
634 static void
635 lex_number (cpp_reader *pfile, cpp_string *number,
636 	    struct normalize_state *nst)
637 {
638   const uchar *cur;
639   const uchar *base;
640   uchar *dest;
641 
642   base = pfile->buffer->cur - 1;
643   do
644     {
645       cur = pfile->buffer->cur;
646 
647       /* N.B. ISIDNUM does not include $.  */
648       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
649 	{
650 	  cur++;
651 	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
652 	}
653 
654       pfile->buffer->cur = cur;
655     }
656   while (forms_identifier_p (pfile, false, nst));
657 
658   number->len = cur - base;
659   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
660   memcpy (dest, base, number->len);
661   dest[number->len] = '\0';
662   number->text = dest;
663 }
664 
665 /* Create a token of type TYPE with a literal spelling.  */
666 static void
667 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
668 		unsigned int len, enum cpp_ttype type)
669 {
670   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
671 
672   memcpy (dest, base, len);
673   dest[len] = '\0';
674   token->type = type;
675   token->val.str.len = len;
676   token->val.str.text = dest;
677 }
678 
679 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
680    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
681 
682 static void
683 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
684 		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
685 {
686   _cpp_buff *first_buff = *first_buff_p;
687   _cpp_buff *last_buff = *last_buff_p;
688 
689   if (first_buff == NULL)
690     first_buff = last_buff = _cpp_get_buff (pfile, len);
691   else if (len > BUFF_ROOM (last_buff))
692     {
693       size_t room = BUFF_ROOM (last_buff);
694       memcpy (BUFF_FRONT (last_buff), base, room);
695       BUFF_FRONT (last_buff) += room;
696       base += room;
697       len -= room;
698       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
699     }
700 
701   memcpy (BUFF_FRONT (last_buff), base, len);
702   BUFF_FRONT (last_buff) += len;
703 
704   *first_buff_p = first_buff;
705   *last_buff_p = last_buff;
706 }
707 
708 /* Lexes a raw string.  The stored string contains the spelling, including
709    double quotes, delimiter string, '(' and ')', any leading
710    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
711    literal, or CPP_OTHER if it was not properly terminated.
712 
713    The spelling is NUL-terminated, but it is not guaranteed that this
714    is the first NUL since embedded NULs are preserved.  */
715 
716 static void
717 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
718 		const uchar *cur)
719 {
720   source_location saw_NUL = 0;
721   const uchar *raw_prefix;
722   unsigned int raw_prefix_len = 0;
723   enum cpp_ttype type;
724   size_t total_len = 0;
725   _cpp_buff *first_buff = NULL, *last_buff = NULL;
726   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
727 
728   type = (*base == 'L' ? CPP_WSTRING :
729 	  *base == 'U' ? CPP_STRING32 :
730 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
731 	  : CPP_STRING);
732 
733   raw_prefix = cur + 1;
734   while (raw_prefix_len < 16)
735     {
736       switch (raw_prefix[raw_prefix_len])
737 	{
738 	case ' ': case '(': case ')': case '\\': case '\t':
739 	case '\v': case '\f': case '\n': default:
740 	  break;
741 	/* Basic source charset except the above chars.  */
742 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
743 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
744 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
745 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
746 	case 'y': case 'z':
747 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
748 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
749 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
750 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
751 	case 'Y': case 'Z':
752 	case '0': case '1': case '2': case '3': case '4': case '5':
753 	case '6': case '7': case '8': case '9':
754 	case '_': case '{': case '}': case '#': case '[': case ']':
755 	case '<': case '>': case '%': case ':': case ';': case '.':
756 	case '?': case '*': case '+': case '-': case '/': case '^':
757 	case '&': case '|': case '~': case '!': case '=': case ',':
758 	case '"': case '\'':
759 	  raw_prefix_len++;
760 	  continue;
761 	}
762       break;
763     }
764 
765   if (raw_prefix[raw_prefix_len] != '(')
766     {
767       int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
768 		+ 1;
769       if (raw_prefix_len == 16)
770 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
771 			     "raw string delimiter longer than 16 characters");
772       else
773 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
774 			     "invalid character '%c' in raw string delimiter",
775 			     (int) raw_prefix[raw_prefix_len]);
776       pfile->buffer->cur = raw_prefix - 1;
777       create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
778       return;
779     }
780 
781   cur = raw_prefix + raw_prefix_len + 1;
782   for (;;)
783     {
784 #define BUF_APPEND(STR,LEN)					\
785       do {							\
786 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
787 			&first_buff, &last_buff);		\
788 	total_len += (LEN);					\
789       } while (0);
790 
791       cppchar_t c;
792 
793       /* If we previously performed any trigraph or line splicing
794 	 transformations, undo them within the body of the raw string.  */
795       while (note->pos < cur)
796 	++note;
797       for (; note->pos == cur; ++note)
798 	{
799 	  switch (note->type)
800 	    {
801 	    case '\\':
802 	    case ' ':
803 	      /* Restore backslash followed by newline.  */
804 	      BUF_APPEND (base, cur - base);
805 	      base = cur;
806 	      BUF_APPEND ("\\", 1);
807 	    after_backslash:
808 	      if (note->type == ' ')
809 		{
810 		  /* GNU backslash whitespace newline extension.  FIXME
811 		     could be any sequence of non-vertical space.  When we
812 		     can properly restore any such sequence, we should mark
813 		     this note as handled so _cpp_process_line_notes
814 		     doesn't warn.  */
815 		  BUF_APPEND (" ", 1);
816 		}
817 
818 	      BUF_APPEND ("\n", 1);
819 	      break;
820 
821 	    case 0:
822 	      /* Already handled.  */
823 	      break;
824 
825 	    default:
826 	      if (_cpp_trigraph_map[note->type])
827 		{
828 		  /* Don't warn about this trigraph in
829 		     _cpp_process_line_notes, since trigraphs show up as
830 		     trigraphs in raw strings.  */
831 		  uchar type = note->type;
832 		  note->type = 0;
833 
834 		  if (!CPP_OPTION (pfile, trigraphs))
835 		    /* If we didn't convert the trigraph in the first
836 		       place, don't do anything now either.  */
837 		    break;
838 
839 		  BUF_APPEND (base, cur - base);
840 		  base = cur;
841 		  BUF_APPEND ("??", 2);
842 
843 		  /* ??/ followed by newline gets two line notes, one for
844 		     the trigraph and one for the backslash/newline.  */
845 		  if (type == '/' && note[1].pos == cur)
846 		    {
847 		      if (note[1].type != '\\'
848 			  && note[1].type != ' ')
849 			abort ();
850 		      BUF_APPEND ("/", 1);
851 		      ++note;
852 		      goto after_backslash;
853 		    }
854 		  /* The ) from ??) could be part of the suffix.  */
855 		  else if (type == ')'
856 			   && strncmp ((const char *) cur+1,
857 				       (const char *) raw_prefix,
858 				       raw_prefix_len) == 0
859 			   && cur[raw_prefix_len+1] == '"')
860 		    {
861 		      BUF_APPEND (")", 1);
862 		      base++;
863 		      cur += raw_prefix_len + 2;
864 		      goto break_outer_loop;
865 		    }
866 		  else
867 		    {
868 		      /* Skip the replacement character.  */
869 		      base = ++cur;
870 		      BUF_APPEND (&type, 1);
871 		    }
872 		}
873 	      else
874 		abort ();
875 	      break;
876 	    }
877 	}
878       c = *cur++;
879 
880       if (c == ')'
881 	  && strncmp ((const char *) cur, (const char *) raw_prefix,
882 		      raw_prefix_len) == 0
883 	  && cur[raw_prefix_len] == '"')
884 	{
885 	  cur += raw_prefix_len + 1;
886 	  break;
887 	}
888       else if (c == '\n')
889 	{
890 	  if (pfile->state.in_directive
891 	      || pfile->state.parsing_args
892 	      || pfile->state.in_deferred_pragma)
893 	    {
894 	      cur--;
895 	      type = CPP_OTHER;
896 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
897 				   "unterminated raw string");
898 	      break;
899 	    }
900 
901 	  BUF_APPEND (base, cur - base);
902 
903 	  if (pfile->buffer->cur < pfile->buffer->rlimit)
904 	    CPP_INCREMENT_LINE (pfile, 0);
905 	  pfile->buffer->need_line = true;
906 
907 	  pfile->buffer->cur = cur-1;
908 	  _cpp_process_line_notes (pfile, false);
909 	  if (!_cpp_get_fresh_line (pfile))
910 	    {
911 	      source_location src_loc = token->src_loc;
912 	      token->type = CPP_EOF;
913 	      /* Tell the compiler the line number of the EOF token.  */
914 	      token->src_loc = pfile->line_table->highest_line;
915 	      token->flags = BOL;
916 	      if (first_buff != NULL)
917 		_cpp_release_buff (pfile, first_buff);
918 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
919 				   "unterminated raw string");
920 	      return;
921 	    }
922 
923 	  cur = base = pfile->buffer->cur;
924 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
925 	}
926       else if (c == '\0' && !saw_NUL)
927 	LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
928 				     CPP_BUF_COLUMN (pfile->buffer, cur));
929     }
930  break_outer_loop:
931 
932   if (saw_NUL && !pfile->state.skipping)
933     cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
934 	       "null character(s) preserved in literal");
935 
936   pfile->buffer->cur = cur;
937   if (first_buff == NULL)
938     create_literal (pfile, token, base, cur - base, type);
939   else
940     {
941       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
942 
943       token->type = type;
944       token->val.str.len = total_len + (cur - base);
945       token->val.str.text = dest;
946       last_buff = first_buff;
947       while (last_buff != NULL)
948 	{
949 	  memcpy (dest, last_buff->base,
950 		  BUFF_FRONT (last_buff) - last_buff->base);
951 	  dest += BUFF_FRONT (last_buff) - last_buff->base;
952 	  last_buff = last_buff->next;
953 	}
954       _cpp_release_buff (pfile, first_buff);
955       memcpy (dest, base, cur - base);
956       dest[cur - base] = '\0';
957     }
958 }
959 
960 /* Lexes a string, character constant, or angle-bracketed header file
961    name.  The stored string contains the spelling, including opening
962    quote and any leading 'L', 'u', 'U' or 'u8' and optional
963    'R' modifier.  It returns the type of the literal, or CPP_OTHER
964    if it was not properly terminated, or CPP_LESS for an unterminated
965    header name which must be relexed as normal tokens.
966 
967    The spelling is NUL-terminated, but it is not guaranteed that this
968    is the first NUL since embedded NULs are preserved.  */
969 static void
970 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
971 {
972   bool saw_NUL = false;
973   const uchar *cur;
974   cppchar_t terminator;
975   enum cpp_ttype type;
976 
977   cur = base;
978   terminator = *cur++;
979   if (terminator == 'L' || terminator == 'U')
980     terminator = *cur++;
981   else if (terminator == 'u')
982     {
983       terminator = *cur++;
984       if (terminator == '8')
985 	terminator = *cur++;
986     }
987   if (terminator == 'R')
988     {
989       lex_raw_string (pfile, token, base, cur);
990       return;
991     }
992   if (terminator == '"')
993     type = (*base == 'L' ? CPP_WSTRING :
994 	    *base == 'U' ? CPP_STRING32 :
995 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
996 			 : CPP_STRING);
997   else if (terminator == '\'')
998     type = (*base == 'L' ? CPP_WCHAR :
999 	    *base == 'U' ? CPP_CHAR32 :
1000 	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1001   else
1002     terminator = '>', type = CPP_HEADER_NAME;
1003 
1004   for (;;)
1005     {
1006       cppchar_t c = *cur++;
1007 
1008       /* In #include-style directives, terminators are not escapable.  */
1009       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1010 	cur++;
1011       else if (c == terminator)
1012 	break;
1013       else if (c == '\n')
1014 	{
1015 	  cur--;
1016 	  /* Unmatched quotes always yield undefined behavior, but
1017 	     greedy lexing means that what appears to be an unterminated
1018 	     header name may actually be a legitimate sequence of tokens.  */
1019 	  if (terminator == '>')
1020 	    {
1021 	      token->type = CPP_LESS;
1022 	      return;
1023 	    }
1024 	  type = CPP_OTHER;
1025 	  break;
1026 	}
1027       else if (c == '\0')
1028 	saw_NUL = true;
1029     }
1030 
1031   if (saw_NUL && !pfile->state.skipping)
1032     cpp_error (pfile, CPP_DL_WARNING,
1033 	       "null character(s) preserved in literal");
1034 
1035   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1036     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1037 	       (int) terminator);
1038 
1039   pfile->buffer->cur = cur;
1040   create_literal (pfile, token, base, cur - base, type);
1041 }
1042 
1043 /* Return the comment table. The client may not make any assumption
1044    about the ordering of the table.  */
1045 cpp_comment_table *
1046 cpp_get_comments (cpp_reader *pfile)
1047 {
1048   return &pfile->comments;
1049 }
1050 
1051 /* Append a comment to the end of the comment table. */
1052 static void
1053 store_comment (cpp_reader *pfile, cpp_token *token)
1054 {
1055   int len;
1056 
1057   if (pfile->comments.allocated == 0)
1058     {
1059       pfile->comments.allocated = 256;
1060       pfile->comments.entries = (cpp_comment *) xmalloc
1061 	(pfile->comments.allocated * sizeof (cpp_comment));
1062     }
1063 
1064   if (pfile->comments.count == pfile->comments.allocated)
1065     {
1066       pfile->comments.allocated *= 2;
1067       pfile->comments.entries = (cpp_comment *) xrealloc
1068 	(pfile->comments.entries,
1069 	 pfile->comments.allocated * sizeof (cpp_comment));
1070     }
1071 
1072   len = token->val.str.len;
1073 
1074   /* Copy comment. Note, token may not be NULL terminated. */
1075   pfile->comments.entries[pfile->comments.count].comment =
1076     (char *) xmalloc (sizeof (char) * (len + 1));
1077   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1078 	  token->val.str.text, len);
1079   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1080 
1081   /* Set source location. */
1082   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1083 
1084   /* Increment the count of entries in the comment table. */
1085   pfile->comments.count++;
1086 }
1087 
1088 /* The stored comment includes the comment start and any terminator.  */
1089 static void
1090 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1091 	      cppchar_t type)
1092 {
1093   unsigned char *buffer;
1094   unsigned int len, clen;
1095   int convert_to_c = (pfile->state.in_directive || pfile->state.collecting_args)
1096     && type == '/';
1097 
1098   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1099 
1100   /* C++ comments probably (not definitely) have moved past a new
1101      line, which we don't want to save in the comment.  */
1102   if (is_vspace (pfile->buffer->cur[-1]))
1103     len--;
1104 
1105   /* If we are currently in a directive, then we need to store all
1106      C++ comments as C comments internally, and so we need to
1107      allocate a little extra space in that case.
1108 
1109      Note that the only time we encounter a directive here is
1110      when we are saving comments in a "#define".  */
1111   clen = convert_to_c ? len + 2 : len;
1112 
1113   buffer = _cpp_unaligned_alloc (pfile, clen);
1114 
1115   token->type = CPP_COMMENT;
1116   token->val.str.len = clen;
1117   token->val.str.text = buffer;
1118 
1119   buffer[0] = '/';
1120   memcpy (buffer + 1, from, len - 1);
1121 
1122   /* Finish conversion to a C comment, if necessary.  */
1123   if (convert_to_c)
1124     {
1125       buffer[1] = '*';
1126       buffer[clen - 2] = '*';
1127       buffer[clen - 1] = '/';
1128     }
1129 
1130   /* Finally store this comment for use by clients of libcpp. */
1131   store_comment (pfile, token);
1132 }
1133 
1134 /* Allocate COUNT tokens for RUN.  */
1135 void
1136 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1137 {
1138   run->base = XNEWVEC (cpp_token, count);
1139   run->limit = run->base + count;
1140   run->next = NULL;
1141 }
1142 
1143 /* Returns the next tokenrun, or creates one if there is none.  */
1144 static tokenrun *
1145 next_tokenrun (tokenrun *run)
1146 {
1147   if (run->next == NULL)
1148     {
1149       run->next = XNEW (tokenrun);
1150       run->next->prev = run;
1151       _cpp_init_tokenrun (run->next, 250);
1152     }
1153 
1154   return run->next;
1155 }
1156 
1157 /* Look ahead in the input stream.  */
1158 const cpp_token *
1159 cpp_peek_token (cpp_reader *pfile, int index)
1160 {
1161   cpp_context *context = pfile->context;
1162   const cpp_token *peektok;
1163   int count;
1164 
1165   /* First, scan through any pending cpp_context objects.  */
1166   while (context->prev)
1167     {
1168       ptrdiff_t sz = (context->direct_p
1169                       ? LAST (context).token - FIRST (context).token
1170                       : LAST (context).ptoken - FIRST (context).ptoken);
1171 
1172       if (index < (int) sz)
1173         return (context->direct_p
1174                 ? FIRST (context).token + index
1175                 : *(FIRST (context).ptoken + index));
1176 
1177       index -= (int) sz;
1178       context = context->prev;
1179     }
1180 
1181   /* We will have to read some new tokens after all (and do so
1182      without invalidating preceding tokens).  */
1183   count = index;
1184   pfile->keep_tokens++;
1185 
1186   do
1187     {
1188       peektok = _cpp_lex_token (pfile);
1189       if (peektok->type == CPP_EOF)
1190 	return peektok;
1191     }
1192   while (index--);
1193 
1194   _cpp_backup_tokens_direct (pfile, count + 1);
1195   pfile->keep_tokens--;
1196 
1197   return peektok;
1198 }
1199 
1200 /* Allocate a single token that is invalidated at the same time as the
1201    rest of the tokens on the line.  Has its line and col set to the
1202    same as the last lexed token, so that diagnostics appear in the
1203    right place.  */
1204 cpp_token *
1205 _cpp_temp_token (cpp_reader *pfile)
1206 {
1207   cpp_token *old, *result;
1208   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1209   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1210 
1211   old = pfile->cur_token - 1;
1212   /* Any pre-existing lookaheads must not be clobbered.  */
1213   if (la)
1214     {
1215       if (sz <= la)
1216         {
1217           tokenrun *next = next_tokenrun (pfile->cur_run);
1218 
1219           if (sz < la)
1220             memmove (next->base + 1, next->base,
1221                      (la - sz) * sizeof (cpp_token));
1222 
1223           next->base[0] = pfile->cur_run->limit[-1];
1224         }
1225 
1226       if (sz > 1)
1227         memmove (pfile->cur_token + 1, pfile->cur_token,
1228                  MIN (la, sz - 1) * sizeof (cpp_token));
1229     }
1230 
1231   if (!sz && pfile->cur_token == pfile->cur_run->limit)
1232     {
1233       pfile->cur_run = next_tokenrun (pfile->cur_run);
1234       pfile->cur_token = pfile->cur_run->base;
1235     }
1236 
1237   result = pfile->cur_token++;
1238   result->src_loc = old->src_loc;
1239   return result;
1240 }
1241 
1242 /* Lex a token into RESULT (external interface).  Takes care of issues
1243    like directive handling, token lookahead, multiple include
1244    optimization and skipping.  */
1245 const cpp_token *
1246 _cpp_lex_token (cpp_reader *pfile)
1247 {
1248   cpp_token *result;
1249 
1250   for (;;)
1251     {
1252       if (pfile->cur_token == pfile->cur_run->limit)
1253 	{
1254 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
1255 	  pfile->cur_token = pfile->cur_run->base;
1256 	}
1257       /* We assume that the current token is somewhere in the current
1258 	 run.  */
1259       if (pfile->cur_token < pfile->cur_run->base
1260 	  || pfile->cur_token >= pfile->cur_run->limit)
1261 	abort ();
1262 
1263       if (pfile->lookaheads)
1264 	{
1265 	  pfile->lookaheads--;
1266 	  result = pfile->cur_token++;
1267 	}
1268       else
1269 	result = _cpp_lex_direct (pfile);
1270 
1271       if (result->flags & BOL)
1272 	{
1273 	  /* Is this a directive.  If _cpp_handle_directive returns
1274 	     false, it is an assembler #.  */
1275 	  if (result->type == CPP_HASH
1276 	      /* 6.10.3 p 11: Directives in a list of macro arguments
1277 		 gives undefined behavior.  This implementation
1278 		 handles the directive as normal.  */
1279 	      && pfile->state.parsing_args != 1)
1280 	    {
1281 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1282 		{
1283 		  if (pfile->directive_result.type == CPP_PADDING)
1284 		    continue;
1285 		  result = &pfile->directive_result;
1286 		}
1287 	    }
1288 	  else if (pfile->state.in_deferred_pragma)
1289 	    result = &pfile->directive_result;
1290 
1291 	  if (pfile->cb.line_change && !pfile->state.skipping)
1292 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1293 	}
1294 
1295       /* We don't skip tokens in directives.  */
1296       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1297 	break;
1298 
1299       /* Outside a directive, invalidate controlling macros.  At file
1300 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1301 	 get here and MI optimization works.  */
1302       pfile->mi_valid = false;
1303 
1304       if (!pfile->state.skipping || result->type == CPP_EOF)
1305 	break;
1306     }
1307 
1308   return result;
1309 }
1310 
1311 /* Returns true if a fresh line has been loaded.  */
1312 bool
1313 _cpp_get_fresh_line (cpp_reader *pfile)
1314 {
1315   int return_at_eof;
1316 
1317   /* We can't get a new line until we leave the current directive.  */
1318   if (pfile->state.in_directive)
1319     return false;
1320 
1321   for (;;)
1322     {
1323       cpp_buffer *buffer = pfile->buffer;
1324 
1325       if (!buffer->need_line)
1326 	return true;
1327 
1328       if (buffer->next_line < buffer->rlimit)
1329 	{
1330 	  _cpp_clean_line (pfile);
1331 	  return true;
1332 	}
1333 
1334       /* First, get out of parsing arguments state.  */
1335       if (pfile->state.parsing_args)
1336 	return false;
1337 
1338       /* End of buffer.  Non-empty files should end in a newline.  */
1339       if (buffer->buf != buffer->rlimit
1340 	  && buffer->next_line > buffer->rlimit
1341 	  && !buffer->from_stage3)
1342 	{
1343 	  /* Clip to buffer size.  */
1344 	  buffer->next_line = buffer->rlimit;
1345 	}
1346 
1347       return_at_eof = buffer->return_at_eof;
1348       _cpp_pop_buffer (pfile);
1349       if (pfile->buffer == NULL || return_at_eof)
1350 	return false;
1351     }
1352 }
1353 
1354 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
1355   do							\
1356     {							\
1357       result->type = ELSE_TYPE;				\
1358       if (*buffer->cur == CHAR)				\
1359 	buffer->cur++, result->type = THEN_TYPE;	\
1360     }							\
1361   while (0)
1362 
1363 /* Lex a token into pfile->cur_token, which is also incremented, to
1364    get diagnostics pointing to the correct location.
1365 
1366    Does not handle issues such as token lookahead, multiple-include
1367    optimization, directives, skipping etc.  This function is only
1368    suitable for use by _cpp_lex_token, and in special cases like
1369    lex_expansion_token which doesn't care for any of these issues.
1370 
1371    When meeting a newline, returns CPP_EOF if parsing a directive,
1372    otherwise returns to the start of the token buffer if permissible.
1373    Returns the location of the lexed token.  */
1374 cpp_token *
1375 _cpp_lex_direct (cpp_reader *pfile)
1376 {
1377   cppchar_t c;
1378   cpp_buffer *buffer;
1379   const unsigned char *comment_start;
1380   cpp_token *result = pfile->cur_token++;
1381 
1382  fresh_line:
1383   result->flags = 0;
1384   buffer = pfile->buffer;
1385   if (buffer->need_line)
1386     {
1387       if (pfile->state.in_deferred_pragma)
1388 	{
1389 	  result->type = CPP_PRAGMA_EOL;
1390 	  pfile->state.in_deferred_pragma = false;
1391 	  if (!pfile->state.pragma_allow_expansion)
1392 	    pfile->state.prevent_expansion--;
1393 	  return result;
1394 	}
1395       if (!_cpp_get_fresh_line (pfile))
1396 	{
1397 	  result->type = CPP_EOF;
1398 	  if (!pfile->state.in_directive)
1399 	    {
1400 	      /* Tell the compiler the line number of the EOF token.  */
1401 	      result->src_loc = pfile->line_table->highest_line;
1402 	      result->flags = BOL;
1403 	    }
1404 	  return result;
1405 	}
1406       if (!pfile->keep_tokens)
1407 	{
1408 	  pfile->cur_run = &pfile->base_run;
1409 	  result = pfile->base_run.base;
1410 	  pfile->cur_token = result + 1;
1411 	}
1412       result->flags = BOL;
1413       if (pfile->state.parsing_args == 2)
1414 	result->flags |= PREV_WHITE;
1415     }
1416   buffer = pfile->buffer;
1417  update_tokens_line:
1418   result->src_loc = pfile->line_table->highest_line;
1419 
1420  skipped_white:
1421   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1422       && !pfile->overlaid_buffer)
1423     {
1424       _cpp_process_line_notes (pfile, false);
1425       result->src_loc = pfile->line_table->highest_line;
1426     }
1427   c = *buffer->cur++;
1428 
1429   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1430 			       CPP_BUF_COLUMN (buffer, buffer->cur));
1431 
1432   switch (c)
1433     {
1434     case ' ': case '\t': case '\f': case '\v': case '\0':
1435       result->flags |= PREV_WHITE;
1436       skip_whitespace (pfile, c);
1437       goto skipped_white;
1438 
1439     case '\n':
1440       if (buffer->cur < buffer->rlimit)
1441 	CPP_INCREMENT_LINE (pfile, 0);
1442       buffer->need_line = true;
1443       goto fresh_line;
1444 
1445     case '0': case '1': case '2': case '3': case '4':
1446     case '5': case '6': case '7': case '8': case '9':
1447       {
1448 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1449 	result->type = CPP_NUMBER;
1450 	lex_number (pfile, &result->val.str, &nst);
1451 	warn_about_normalization (pfile, result, &nst);
1452 	break;
1453       }
1454 
1455     case 'L':
1456     case 'u':
1457     case 'U':
1458     case 'R':
1459       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1460 	 wide strings or raw strings.  */
1461       if (c == 'L' || CPP_OPTION (pfile, uliterals))
1462 	{
1463 	  if ((*buffer->cur == '\'' && c != 'R')
1464 	      || *buffer->cur == '"'
1465 	      || (*buffer->cur == 'R'
1466 		  && c != 'R'
1467 		  && buffer->cur[1] == '"'
1468 		  && CPP_OPTION (pfile, uliterals))
1469 	      || (*buffer->cur == '8'
1470 		  && c == 'u'
1471 		  && (buffer->cur[1] == '"'
1472 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
1473 	    {
1474 	      lex_string (pfile, result, buffer->cur - 1);
1475 	      break;
1476 	    }
1477 	}
1478       /* Fall through.  */
1479 
1480     case '_':
1481     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1482     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1483     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1484     case 's': case 't':           case 'v': case 'w': case 'x':
1485     case 'y': case 'z':
1486     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1487     case 'G': case 'H': case 'I': case 'J': case 'K':
1488     case 'M': case 'N': case 'O': case 'P': case 'Q':
1489     case 'S': case 'T':           case 'V': case 'W': case 'X':
1490     case 'Y': case 'Z':
1491       result->type = CPP_NAME;
1492       {
1493 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1494 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1495 						&nst);
1496 	warn_about_normalization (pfile, result, &nst);
1497       }
1498 
1499       /* Convert named operators to their proper types.  */
1500       if (result->val.node.node->flags & NODE_OPERATOR)
1501 	{
1502 	  result->flags |= NAMED_OP;
1503 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1504 	}
1505       break;
1506 
1507     case '\'':
1508     case '"':
1509       lex_string (pfile, result, buffer->cur - 1);
1510       break;
1511 
1512     case '/':
1513       /* A potential block or line comment.  */
1514       comment_start = buffer->cur;
1515       c = *buffer->cur;
1516 
1517       if (c == '*')
1518 	{
1519 	  if (_cpp_skip_block_comment (pfile))
1520 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1521 	}
1522       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1523 			    || cpp_in_system_header (pfile)))
1524 	{
1525 	  /* Warn about comments only if pedantically GNUC89, and not
1526 	     in system headers.  */
1527 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1528 	      && ! buffer->warned_cplusplus_comments)
1529 	    {
1530 	      cpp_error (pfile, CPP_DL_PEDWARN,
1531 			 "C++ style comments are not allowed in ISO C90");
1532 	      cpp_error (pfile, CPP_DL_PEDWARN,
1533 			 "(this will be reported only once per input file)");
1534 	      buffer->warned_cplusplus_comments = 1;
1535 	    }
1536 
1537 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1538 	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1539 	}
1540       else if (c == '=')
1541 	{
1542 	  buffer->cur++;
1543 	  result->type = CPP_DIV_EQ;
1544 	  break;
1545 	}
1546       else
1547 	{
1548 	  result->type = CPP_DIV;
1549 	  break;
1550 	}
1551 
1552       if (!pfile->state.save_comments)
1553 	{
1554 	  result->flags |= PREV_WHITE;
1555 	  goto update_tokens_line;
1556 	}
1557 
1558       /* Save the comment as a token in its own right.  */
1559       save_comment (pfile, result, comment_start, c);
1560       break;
1561 
1562     case '<':
1563       if (pfile->state.angled_headers)
1564 	{
1565 	  lex_string (pfile, result, buffer->cur - 1);
1566 	  if (result->type != CPP_LESS)
1567 	    break;
1568 	}
1569 
1570       result->type = CPP_LESS;
1571       if (*buffer->cur == '=')
1572 	buffer->cur++, result->type = CPP_LESS_EQ;
1573       else if (*buffer->cur == '<')
1574 	{
1575 	  buffer->cur++;
1576 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1577 	}
1578       else if (CPP_OPTION (pfile, digraphs))
1579 	{
1580 	  if (*buffer->cur == ':')
1581 	    {
1582 	      buffer->cur++;
1583 	      result->flags |= DIGRAPH;
1584 	      result->type = CPP_OPEN_SQUARE;
1585 	    }
1586 	  else if (*buffer->cur == '%')
1587 	    {
1588 	      buffer->cur++;
1589 	      result->flags |= DIGRAPH;
1590 	      result->type = CPP_OPEN_BRACE;
1591 	    }
1592 	}
1593       break;
1594 
1595     case '>':
1596       result->type = CPP_GREATER;
1597       if (*buffer->cur == '=')
1598 	buffer->cur++, result->type = CPP_GREATER_EQ;
1599       else if (*buffer->cur == '>')
1600 	{
1601 	  buffer->cur++;
1602 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1603 	}
1604       break;
1605 
1606     case '%':
1607       result->type = CPP_MOD;
1608       if (*buffer->cur == '=')
1609 	buffer->cur++, result->type = CPP_MOD_EQ;
1610       else if (CPP_OPTION (pfile, digraphs))
1611 	{
1612 	  if (*buffer->cur == ':')
1613 	    {
1614 	      buffer->cur++;
1615 	      result->flags |= DIGRAPH;
1616 	      result->type = CPP_HASH;
1617 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1618 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1619 	    }
1620 	  else if (*buffer->cur == '>')
1621 	    {
1622 	      buffer->cur++;
1623 	      result->flags |= DIGRAPH;
1624 	      result->type = CPP_CLOSE_BRACE;
1625 	    }
1626 	}
1627       break;
1628 
1629     case '.':
1630       result->type = CPP_DOT;
1631       if (ISDIGIT (*buffer->cur))
1632 	{
1633 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1634 	  result->type = CPP_NUMBER;
1635 	  lex_number (pfile, &result->val.str, &nst);
1636 	  warn_about_normalization (pfile, result, &nst);
1637 	}
1638       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1639 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1640       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1641 	buffer->cur++, result->type = CPP_DOT_STAR;
1642       break;
1643 
1644     case '+':
1645       result->type = CPP_PLUS;
1646       if (*buffer->cur == '+')
1647 	buffer->cur++, result->type = CPP_PLUS_PLUS;
1648       else if (*buffer->cur == '=')
1649 	buffer->cur++, result->type = CPP_PLUS_EQ;
1650       break;
1651 
1652     case '-':
1653       result->type = CPP_MINUS;
1654       if (*buffer->cur == '>')
1655 	{
1656 	  buffer->cur++;
1657 	  result->type = CPP_DEREF;
1658 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1659 	    buffer->cur++, result->type = CPP_DEREF_STAR;
1660 	}
1661       else if (*buffer->cur == '-')
1662 	buffer->cur++, result->type = CPP_MINUS_MINUS;
1663       else if (*buffer->cur == '=')
1664 	buffer->cur++, result->type = CPP_MINUS_EQ;
1665       break;
1666 
1667     case '&':
1668       result->type = CPP_AND;
1669       if (*buffer->cur == '&')
1670 	buffer->cur++, result->type = CPP_AND_AND;
1671       else if (*buffer->cur == '=')
1672 	buffer->cur++, result->type = CPP_AND_EQ;
1673       break;
1674 
1675     case '|':
1676       result->type = CPP_OR;
1677       if (*buffer->cur == '|')
1678 	buffer->cur++, result->type = CPP_OR_OR;
1679       else if (*buffer->cur == '=')
1680 	buffer->cur++, result->type = CPP_OR_EQ;
1681       break;
1682 
1683     case ':':
1684       result->type = CPP_COLON;
1685       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1686 	buffer->cur++, result->type = CPP_SCOPE;
1687       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1688 	{
1689 	  buffer->cur++;
1690 	  result->flags |= DIGRAPH;
1691 	  result->type = CPP_CLOSE_SQUARE;
1692 	}
1693       break;
1694 
1695     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1696     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1697     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1698     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1699     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1700 
1701     case '?': result->type = CPP_QUERY; break;
1702     case '~': result->type = CPP_COMPL; break;
1703     case ',': result->type = CPP_COMMA; break;
1704     case '(': result->type = CPP_OPEN_PAREN; break;
1705     case ')': result->type = CPP_CLOSE_PAREN; break;
1706     case '[': result->type = CPP_OPEN_SQUARE; break;
1707     case ']': result->type = CPP_CLOSE_SQUARE; break;
1708     case '{': result->type = CPP_OPEN_BRACE; break;
1709     case '}': result->type = CPP_CLOSE_BRACE; break;
1710     case ';': result->type = CPP_SEMICOLON; break;
1711 
1712       /* @ is a punctuator in Objective-C.  */
1713     case '@': result->type = CPP_ATSIGN; break;
1714 
1715     case '$':
1716     case '\\':
1717       {
1718 	const uchar *base = --buffer->cur;
1719 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1720 
1721 	if (forms_identifier_p (pfile, true, &nst))
1722 	  {
1723 	    result->type = CPP_NAME;
1724 	    result->val.node.node = lex_identifier (pfile, base, true, &nst);
1725 	    warn_about_normalization (pfile, result, &nst);
1726 	    break;
1727 	  }
1728 	buffer->cur++;
1729       }
1730 
1731     default:
1732       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1733       break;
1734     }
1735 
1736   return result;
1737 }
1738 
1739 /* An upper bound on the number of bytes needed to spell TOKEN.
1740    Does not include preceding whitespace.  */
1741 unsigned int
1742 cpp_token_len (const cpp_token *token)
1743 {
1744   unsigned int len;
1745 
1746   switch (TOKEN_SPELL (token))
1747     {
1748     default:		len = 6;				break;
1749     case SPELL_LITERAL:	len = token->val.str.len;		break;
1750     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
1751     }
1752 
1753   return len;
1754 }
1755 
1756 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1757    Return the number of bytes read out of NAME.  (There are always
1758    10 bytes written to BUFFER.)  */
1759 
1760 static size_t
1761 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1762 {
1763   int j;
1764   int ucn_len = 0;
1765   int ucn_len_c;
1766   unsigned t;
1767   unsigned long utf32;
1768 
1769   /* Compute the length of the UTF-8 sequence.  */
1770   for (t = *name; t & 0x80; t <<= 1)
1771     ucn_len++;
1772 
1773   utf32 = *name & (0x7F >> ucn_len);
1774   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1775     {
1776       utf32 = (utf32 << 6) | (*++name & 0x3F);
1777 
1778       /* Ill-formed UTF-8.  */
1779       if ((*name & ~0x3F) != 0x80)
1780 	abort ();
1781     }
1782 
1783   *buffer++ = '\\';
1784   *buffer++ = 'U';
1785   for (j = 7; j >= 0; j--)
1786     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1787   return ucn_len;
1788 }
1789 
1790 /* Given a token TYPE corresponding to a digraph, return a pointer to
1791    the spelling of the digraph.  */
1792 static const unsigned char *
1793 cpp_digraph2name (enum cpp_ttype type)
1794 {
1795   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1796 }
1797 
1798 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1799    already contain the enough space to hold the token's spelling.
1800    Returns a pointer to the character after the last character written.
1801    FORSTRING is true if this is to be the spelling after translation
1802    phase 1 (this is different for UCNs).
1803    FIXME: Would be nice if we didn't need the PFILE argument.  */
1804 unsigned char *
1805 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1806 		 unsigned char *buffer, bool forstring)
1807 {
1808   switch (TOKEN_SPELL (token))
1809     {
1810     case SPELL_OPERATOR:
1811       {
1812 	const unsigned char *spelling;
1813 	unsigned char c;
1814 
1815 	if (token->flags & DIGRAPH)
1816 	  spelling = cpp_digraph2name (token->type);
1817 	else if (token->flags & NAMED_OP)
1818 	  goto spell_ident;
1819 	else
1820 	  spelling = TOKEN_NAME (token);
1821 
1822 	while ((c = *spelling++) != '\0')
1823 	  *buffer++ = c;
1824       }
1825       break;
1826 
1827     spell_ident:
1828     case SPELL_IDENT:
1829       if (forstring)
1830 	{
1831 	  memcpy (buffer, NODE_NAME (token->val.node.node),
1832 		  NODE_LEN (token->val.node.node));
1833 	  buffer += NODE_LEN (token->val.node.node);
1834 	}
1835       else
1836 	{
1837 	  size_t i;
1838 	  const unsigned char * name = NODE_NAME (token->val.node.node);
1839 
1840 	  for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1841 	    if (name[i] & ~0x7F)
1842 	      {
1843 		i += utf8_to_ucn (buffer, name + i) - 1;
1844 		buffer += 10;
1845 	      }
1846 	    else
1847 	      *buffer++ = NODE_NAME (token->val.node.node)[i];
1848 	}
1849       break;
1850 
1851     case SPELL_LITERAL:
1852       memcpy (buffer, token->val.str.text, token->val.str.len);
1853       buffer += token->val.str.len;
1854       break;
1855 
1856     case SPELL_NONE:
1857       cpp_error (pfile, CPP_DL_ICE,
1858 		 "unspellable token %s", TOKEN_NAME (token));
1859       break;
1860     }
1861 
1862   return buffer;
1863 }
1864 
1865 /* Returns TOKEN spelt as a null-terminated string.  The string is
1866    freed when the reader is destroyed.  Useful for diagnostics.  */
1867 unsigned char *
1868 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1869 {
1870   unsigned int len = cpp_token_len (token) + 1;
1871   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1872 
1873   end = cpp_spell_token (pfile, token, start, false);
1874   end[0] = '\0';
1875 
1876   return start;
1877 }
1878 
1879 /* Returns a pointer to a string which spells the token defined by
1880    TYPE and FLAGS.  Used by C front ends, which really should move to
1881    using cpp_token_as_text.  */
1882 const char *
1883 cpp_type2name (enum cpp_ttype type, unsigned char flags)
1884 {
1885   if (flags & DIGRAPH)
1886     return (const char *) cpp_digraph2name (type);
1887   else if (flags & NAMED_OP)
1888     return cpp_named_operator2name (type);
1889 
1890   return (const char *) token_spellings[type].name;
1891 }
1892 
1893 /* Writes the spelling of token to FP, without any preceding space.
1894    Separated from cpp_spell_token for efficiency - to avoid stdio
1895    double-buffering.  */
1896 void
1897 cpp_output_token (const cpp_token *token, FILE *fp)
1898 {
1899   switch (TOKEN_SPELL (token))
1900     {
1901     case SPELL_OPERATOR:
1902       {
1903 	const unsigned char *spelling;
1904 	int c;
1905 
1906 	if (token->flags & DIGRAPH)
1907 	  spelling = cpp_digraph2name (token->type);
1908 	else if (token->flags & NAMED_OP)
1909 	  goto spell_ident;
1910 	else
1911 	  spelling = TOKEN_NAME (token);
1912 
1913 	c = *spelling;
1914 	do
1915 	  putc (c, fp);
1916 	while ((c = *++spelling) != '\0');
1917       }
1918       break;
1919 
1920     spell_ident:
1921     case SPELL_IDENT:
1922       {
1923 	size_t i;
1924 	const unsigned char * name = NODE_NAME (token->val.node.node);
1925 
1926 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1927 	  if (name[i] & ~0x7F)
1928 	    {
1929 	      unsigned char buffer[10];
1930 	      i += utf8_to_ucn (buffer, name + i) - 1;
1931 	      fwrite (buffer, 1, 10, fp);
1932 	    }
1933 	  else
1934 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
1935       }
1936       break;
1937 
1938     case SPELL_LITERAL:
1939       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1940       break;
1941 
1942     case SPELL_NONE:
1943       /* An error, most probably.  */
1944       break;
1945     }
1946 }
1947 
1948 /* Compare two tokens.  */
1949 int
1950 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1951 {
1952   if (a->type == b->type && a->flags == b->flags)
1953     switch (TOKEN_SPELL (a))
1954       {
1955       default:			/* Keep compiler happy.  */
1956       case SPELL_OPERATOR:
1957 	/* token_no is used to track where multiple consecutive ##
1958 	   tokens were originally located.  */
1959 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1960       case SPELL_NONE:
1961 	return (a->type != CPP_MACRO_ARG
1962 		|| a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
1963       case SPELL_IDENT:
1964 	return a->val.node.node == b->val.node.node;
1965       case SPELL_LITERAL:
1966 	return (a->val.str.len == b->val.str.len
1967 		&& !memcmp (a->val.str.text, b->val.str.text,
1968 			    a->val.str.len));
1969       }
1970 
1971   return 0;
1972 }
1973 
1974 /* Returns nonzero if a space should be inserted to avoid an
1975    accidental token paste for output.  For simplicity, it is
1976    conservative, and occasionally advises a space where one is not
1977    needed, e.g. "." and ".2".  */
1978 int
1979 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1980 		 const cpp_token *token2)
1981 {
1982   enum cpp_ttype a = token1->type, b = token2->type;
1983   cppchar_t c;
1984 
1985   if (token1->flags & NAMED_OP)
1986     a = CPP_NAME;
1987   if (token2->flags & NAMED_OP)
1988     b = CPP_NAME;
1989 
1990   c = EOF;
1991   if (token2->flags & DIGRAPH)
1992     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1993   else if (token_spellings[b].category == SPELL_OPERATOR)
1994     c = token_spellings[b].name[0];
1995 
1996   /* Quickly get everything that can paste with an '='.  */
1997   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1998     return 1;
1999 
2000   switch (a)
2001     {
2002     case CPP_GREATER:	return c == '>';
2003     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
2004     case CPP_PLUS:	return c == '+';
2005     case CPP_MINUS:	return c == '-' || c == '>';
2006     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
2007     case CPP_MOD:	return c == ':' || c == '>';
2008     case CPP_AND:	return c == '&';
2009     case CPP_OR:	return c == '|';
2010     case CPP_COLON:	return c == ':' || c == '>';
2011     case CPP_DEREF:	return c == '*';
2012     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
2013     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
2014     case CPP_NAME:	return ((b == CPP_NUMBER
2015 				 && name_p (pfile, &token2->val.str))
2016 				|| b == CPP_NAME
2017 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
2018     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
2019 				|| c == '.' || c == '+' || c == '-');
2020 				      /* UCNs */
2021     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
2022 				 && b == CPP_NAME)
2023 				|| (CPP_OPTION (pfile, objc)
2024 				    && token1->val.str.text[0] == '@'
2025 				    && (b == CPP_NAME || b == CPP_STRING)));
2026     default:		break;
2027     }
2028 
2029   return 0;
2030 }
2031 
2032 /* Output all the remaining tokens on the current line, and a newline
2033    character, to FP.  Leading whitespace is removed.  If there are
2034    macros, special token padding is not performed.  */
2035 void
2036 cpp_output_line (cpp_reader *pfile, FILE *fp)
2037 {
2038   const cpp_token *token;
2039 
2040   token = cpp_get_token (pfile);
2041   while (token->type != CPP_EOF)
2042     {
2043       cpp_output_token (token, fp);
2044       token = cpp_get_token (pfile);
2045       if (token->flags & PREV_WHITE)
2046 	putc (' ', fp);
2047     }
2048 
2049   putc ('\n', fp);
2050 }
2051 
2052 /* Return a string representation of all the remaining tokens on the
2053    current line.  The result is allocated using xmalloc and must be
2054    freed by the caller.  */
2055 unsigned char *
2056 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2057 {
2058   const cpp_token *token;
2059   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2060   unsigned int alloced = 120 + out;
2061   unsigned char *result = (unsigned char *) xmalloc (alloced);
2062 
2063   /* If DIR_NAME is empty, there are no initial contents.  */
2064   if (dir_name)
2065     {
2066       sprintf ((char *) result, "#%s ", dir_name);
2067       out += 2;
2068     }
2069 
2070   token = cpp_get_token (pfile);
2071   while (token->type != CPP_EOF)
2072     {
2073       unsigned char *last;
2074       /* Include room for a possible space and the terminating nul.  */
2075       unsigned int len = cpp_token_len (token) + 2;
2076 
2077       if (out + len > alloced)
2078 	{
2079 	  alloced *= 2;
2080 	  if (out + len > alloced)
2081 	    alloced = out + len;
2082 	  result = (unsigned char *) xrealloc (result, alloced);
2083 	}
2084 
2085       last = cpp_spell_token (pfile, token, &result[out], 0);
2086       out = last - result;
2087 
2088       token = cpp_get_token (pfile);
2089       if (token->flags & PREV_WHITE)
2090 	result[out++] = ' ';
2091     }
2092 
2093   result[out] = '\0';
2094   return result;
2095 }
2096 
2097 /* Memory buffers.  Changing these three constants can have a dramatic
2098    effect on performance.  The values here are reasonable defaults,
2099    but might be tuned.  If you adjust them, be sure to test across a
2100    range of uses of cpplib, including heavy nested function-like macro
2101    expansion.  Also check the change in peak memory usage (NJAMD is a
2102    good tool for this).  */
2103 #define MIN_BUFF_SIZE 8000
2104 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2105 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2106 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2107 
2108 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2109   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2110 #endif
2111 
2112 /* Create a new allocation buffer.  Place the control block at the end
2113    of the buffer, so that buffer overflows will cause immediate chaos.  */
2114 static _cpp_buff *
2115 new_buff (size_t len)
2116 {
2117   _cpp_buff *result;
2118   unsigned char *base;
2119 
2120   if (len < MIN_BUFF_SIZE)
2121     len = MIN_BUFF_SIZE;
2122   len = CPP_ALIGN (len);
2123 
2124   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2125   result = (_cpp_buff *) (base + len);
2126   result->base = base;
2127   result->cur = base;
2128   result->limit = base + len;
2129   result->next = NULL;
2130   return result;
2131 }
2132 
2133 /* Place a chain of unwanted allocation buffers on the free list.  */
2134 void
2135 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2136 {
2137   _cpp_buff *end = buff;
2138 
2139   while (end->next)
2140     end = end->next;
2141   end->next = pfile->free_buffs;
2142   pfile->free_buffs = buff;
2143 }
2144 
2145 /* Return a free buffer of size at least MIN_SIZE.  */
2146 _cpp_buff *
2147 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2148 {
2149   _cpp_buff *result, **p;
2150 
2151   for (p = &pfile->free_buffs;; p = &(*p)->next)
2152     {
2153       size_t size;
2154 
2155       if (*p == NULL)
2156 	return new_buff (min_size);
2157       result = *p;
2158       size = result->limit - result->base;
2159       /* Return a buffer that's big enough, but don't waste one that's
2160          way too big.  */
2161       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2162 	break;
2163     }
2164 
2165   *p = result->next;
2166   result->next = NULL;
2167   result->cur = result->base;
2168   return result;
2169 }
2170 
2171 /* Creates a new buffer with enough space to hold the uncommitted
2172    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2173    the excess bytes to the new buffer.  Chains the new buffer after
2174    BUFF, and returns the new buffer.  */
2175 _cpp_buff *
2176 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2177 {
2178   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2179   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2180 
2181   buff->next = new_buff;
2182   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2183   return new_buff;
2184 }
2185 
2186 /* Creates a new buffer with enough space to hold the uncommitted
2187    remaining bytes of the buffer pointed to by BUFF, and at least
2188    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2189    Chains the new buffer before the buffer pointed to by BUFF, and
2190    updates the pointer to point to the new buffer.  */
2191 void
2192 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2193 {
2194   _cpp_buff *new_buff, *old_buff = *pbuff;
2195   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2196 
2197   new_buff = _cpp_get_buff (pfile, size);
2198   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2199   new_buff->next = old_buff;
2200   *pbuff = new_buff;
2201 }
2202 
2203 /* Free a chain of buffers starting at BUFF.  */
2204 void
2205 _cpp_free_buff (_cpp_buff *buff)
2206 {
2207   _cpp_buff *next;
2208 
2209   for (; buff; buff = next)
2210     {
2211       next = buff->next;
2212       free (buff->base);
2213     }
2214 }
2215 
2216 /* Allocate permanent, unaligned storage of length LEN.  */
2217 unsigned char *
2218 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2219 {
2220   _cpp_buff *buff = pfile->u_buff;
2221   unsigned char *result = buff->cur;
2222 
2223   if (len > (size_t) (buff->limit - result))
2224     {
2225       buff = _cpp_get_buff (pfile, len);
2226       buff->next = pfile->u_buff;
2227       pfile->u_buff = buff;
2228       result = buff->cur;
2229     }
2230 
2231   buff->cur = result + len;
2232   return result;
2233 }
2234 
2235 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2236    That buffer is used for growing allocations when saving macro
2237    replacement lists in a #define, and when parsing an answer to an
2238    assertion in #assert, #unassert or #if (and therefore possibly
2239    whilst expanding macros).  It therefore must not be used by any
2240    code that they might call: specifically the lexer and the guts of
2241    the macro expander.
2242 
2243    All existing other uses clearly fit this restriction: storing
2244    registered pragmas during initialization.  */
2245 unsigned char *
2246 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2247 {
2248   _cpp_buff *buff = pfile->a_buff;
2249   unsigned char *result = buff->cur;
2250 
2251   if (len > (size_t) (buff->limit - result))
2252     {
2253       buff = _cpp_get_buff (pfile, len);
2254       buff->next = pfile->a_buff;
2255       pfile->a_buff = buff;
2256       result = buff->cur;
2257     }
2258 
2259   buff->cur = result + len;
2260   return result;
2261 }
2262 
2263 /* Say which field of TOK is in use.  */
2264 
2265 enum cpp_token_fld_kind
2266 cpp_token_val_index (cpp_token *tok)
2267 {
2268   switch (TOKEN_SPELL (tok))
2269     {
2270     case SPELL_IDENT:
2271       return CPP_TOKEN_FLD_NODE;
2272     case SPELL_LITERAL:
2273       return CPP_TOKEN_FLD_STR;
2274     case SPELL_OPERATOR:
2275       if (tok->type == CPP_PASTE)
2276 	return CPP_TOKEN_FLD_TOKEN_NO;
2277       else
2278 	return CPP_TOKEN_FLD_NONE;
2279     case SPELL_NONE:
2280       if (tok->type == CPP_MACRO_ARG)
2281 	return CPP_TOKEN_FLD_ARG_NO;
2282       else if (tok->type == CPP_PADDING)
2283 	return CPP_TOKEN_FLD_SOURCE;
2284       else if (tok->type == CPP_PRAGMA)
2285 	return CPP_TOKEN_FLD_PRAGMA;
2286       /* else fall through */
2287     default:
2288       return CPP_TOKEN_FLD_NONE;
2289     }
2290 }
2291