xref: /netbsd-src/external/gpl3/gcc.old/dist/libcpp/lex.c (revision afab4e300d3a9fb07dd8c80daf53d0feb3345706)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000-2020 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7 
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26 
27 enum spell_type
28 {
29   SPELL_OPERATOR = 0,
30   SPELL_IDENT,
31   SPELL_LITERAL,
32   SPELL_NONE
33 };
34 
35 struct token_spelling
36 {
37   enum spell_type category;
38   const unsigned char *name;
39 };
40 
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43 
44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
45 #define TK(e, s) { SPELL_ ## s,    UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49 
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52 
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 			    unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
64 
65 static _cpp_buff *new_buff (size_t);
66 
67 
68 /* Utility routine:
69 
70    Compares, the token TOKEN to the NUL-terminated string STRING.
71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
72 int
73 cpp_ideq (const cpp_token *token, const char *string)
74 {
75   if (token->type != CPP_NAME)
76     return 0;
77 
78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
79 }
80 
81 /* Record a note TYPE at byte POS into the current cleaned logical
82    line.  */
83 static void
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 {
86   if (buffer->notes_used == buffer->notes_cap)
87     {
88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90                                   buffer->notes_cap);
91     }
92 
93   buffer->notes[buffer->notes_used].pos = pos;
94   buffer->notes[buffer->notes_used].type = type;
95   buffer->notes_used++;
96 }
97 
98 
99 /* Fast path to find line special characters using optimized character
100    scanning algorithms.  Anything complicated falls back to the slow
101    path below.  Since this loop is very hot it's worth doing these kinds
102    of optimizations.
103 
104    One of the paths through the ifdefs should provide
105 
106      const uchar *search_line_fast (const uchar *s, const uchar *end);
107 
108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
109    the found character.
110 
111    Note that the last character of the buffer is *always* a newline,
112    as forced by _cpp_convert_input.  This fact can be used to avoid
113    explicitly looking for the end of the buffer.  */
114 
115 /* Configure gives us an ifdef test.  */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
118 #endif
119 
120 /* We'd like the largest integer that fits into a register.  There's nothing
121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
123    can get the "real" word size.  */
124 #ifdef __GNUC__
125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
126 #else
127 typedef unsigned long word_type;
128 #endif
129 
130 /* The code below is only expecting sizes 4 or 8.
131    Die at compile-time if this expectation is violated.  */
132 typedef char check_word_type_size
133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134 
135 /* Return X with the first N bytes forced to values that won't match one
136    of the interesting characters.  Note that NUL is not interesting.  */
137 
138 static inline word_type
139 acc_char_mask_misalign (word_type val, unsigned int n)
140 {
141   word_type mask = -1;
142   if (WORDS_BIGENDIAN)
143     mask >>= n * 8;
144   else
145     mask <<= n * 8;
146   return val & mask;
147 }
148 
149 /* Return X replicated to all byte positions within WORD_TYPE.  */
150 
151 static inline word_type
152 acc_char_replicate (uchar x)
153 {
154   word_type ret;
155 
156   ret = (x << 24) | (x << 16) | (x << 8) | x;
157   if (sizeof(word_type) == 8)
158     ret = (ret << 16 << 16) | ret;
159   return ret;
160 }
161 
162 /* Return non-zero if some byte of VAL is (probably) C.  */
163 
164 static inline word_type
165 acc_char_cmp (word_type val, word_type c)
166 {
167 #if defined(__GNUC__) && defined(__alpha__)
168   /* We can get exact results using a compare-bytes instruction.
169      Get (val == c) via (0 >= (val ^ c)).  */
170   return __builtin_alpha_cmpbge (0, val ^ c);
171 #else
172   word_type magic = 0x7efefefeU;
173   if (sizeof(word_type) == 8)
174     magic = (magic << 16 << 16) | 0xfefefefeU;
175   magic |= 1;
176 
177   val ^= c;
178   return ((val + magic) ^ ~val) & ~magic;
179 #endif
180 }
181 
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183    the found character.  If this was a false positive, return -1.  */
184 
185 static inline int
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 		word_type val ATTRIBUTE_UNUSED)
188 {
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190   /* The cmpbge instruction sets *bits* of the result corresponding to
191      matches in the bytes with no false positives.  */
192   return __builtin_ctzl (cmp);
193 #else
194   unsigned int i;
195 
196   /* ??? It would be nice to force unrolling here,
197      and have all of these constants folded.  */
198   for (i = 0; i < sizeof(word_type); ++i)
199     {
200       uchar c;
201       if (WORDS_BIGENDIAN)
202 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203       else
204 	c = (val >> i * 8) & 0xff;
205 
206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 	return i;
208     }
209 
210   return -1;
211 #endif
212 }
213 
214 /* A version of the fast scanner using bit fiddling techniques.
215 
216    For 32-bit words, one would normally perform 16 comparisons and
217    16 branches.  With this algorithm one performs 24 arithmetic
218    operations and one branch.  Whether this is faster with a 32-bit
219    word size is going to be somewhat system dependent.
220 
221    For 64-bit words, we eliminate twice the number of comparisons
222    and branches without increasing the number of arithmetic operations.
223    It's almost certainly going to be a win with 64-bit word size.  */
224 
225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
226   ATTRIBUTE_UNUSED;
227 
228 static const uchar *
229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230 {
231   const word_type repl_nl = acc_char_replicate ('\n');
232   const word_type repl_cr = acc_char_replicate ('\r');
233   const word_type repl_bs = acc_char_replicate ('\\');
234   const word_type repl_qm = acc_char_replicate ('?');
235 
236   unsigned int misalign;
237   const word_type *p;
238   word_type val, t;
239 
240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242   val = *p;
243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244   if (misalign)
245     val = acc_char_mask_misalign (val, misalign);
246 
247   /* Main loop.  */
248   while (1)
249     {
250       t  = acc_char_cmp (val, repl_nl);
251       t |= acc_char_cmp (val, repl_cr);
252       t |= acc_char_cmp (val, repl_bs);
253       t |= acc_char_cmp (val, repl_qm);
254 
255       if (__builtin_expect (t != 0, 0))
256 	{
257 	  int i = acc_char_index (t, val);
258 	  if (i >= 0)
259 	    return (const uchar *)p + i;
260 	}
261 
262       val = *++p;
263     }
264 }
265 
266 /* Disable on Solaris 2/x86 until the following problem can be properly
267    autoconfed:
268 
269    The Solaris 10+ assembler tags objects with the instruction set
270    extensions used, so SSE4.2 executables cannot run on machines that
271    don't support that extension.  */
272 
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
274 
275 /* Replicated character data to be shared between implementations.
276    Recall that outside of a context with vector support we can't
277    define compatible vector types, therefore these are all defined
278    in terms of raw characters.  */
279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286   { '?', '?', '?', '?', '?', '?', '?', '?',
287     '?', '?', '?', '?', '?', '?', '?', '?' },
288 };
289 
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
291 
292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
293    which was packaged into SSE1; it is also present in the AMD MMX
294    extension.  Mark the function as using "sse" so that we emit a real
295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
296 
297 static const uchar *
298 #ifndef __SSE__
299 __attribute__((__target__("sse")))
300 #endif
301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302 {
303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305 
306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310 
311   unsigned int misalign, found, mask;
312   const v8qi *p;
313   v8qi data, t, c;
314 
315   /* Align the source pointer.  While MMX doesn't generate unaligned data
316      faults, this allows us to safely scan to the end of the buffer without
317      reading beyond the end of the last page.  */
318   misalign = (uintptr_t)s & 7;
319   p = (const v8qi *)((uintptr_t)s & -8);
320   data = *p;
321 
322   /* Create a mask for the bytes that are valid within the first
323      16-byte block.  The Idea here is that the AND with the mask
324      within the loop is "free", since we need some AND or TEST
325      insn in order to set the flags for the branch anyway.  */
326   mask = -1u << misalign;
327 
328   /* Main loop processing 8 bytes at a time.  */
329   goto start;
330   do
331     {
332       data = *++p;
333       mask = -1;
334 
335     start:
336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343       found = __builtin_ia32_pmovmskb (t);
344       found &= mask;
345     }
346   while (!found);
347 
348   __builtin_ia32_emms ();
349 
350   /* FOUND contains 1 in bits for which we matched a relevant
351      character.  Conversion to the byte index is trivial.  */
352   found = __builtin_ctz(found);
353   return (const uchar *)p + found;
354 }
355 
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
357 
358 static const uchar *
359 #ifndef __SSE2__
360 __attribute__((__target__("sse2")))
361 #endif
362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363 {
364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
365 
366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370 
371   unsigned int misalign, found, mask;
372   const v16qi *p;
373   v16qi data, t;
374 
375   /* Align the source pointer.  */
376   misalign = (uintptr_t)s & 15;
377   p = (const v16qi *)((uintptr_t)s & -16);
378   data = *p;
379 
380   /* Create a mask for the bytes that are valid within the first
381      16-byte block.  The Idea here is that the AND with the mask
382      within the loop is "free", since we need some AND or TEST
383      insn in order to set the flags for the branch anyway.  */
384   mask = -1u << misalign;
385 
386   /* Main loop processing 16 bytes at a time.  */
387   goto start;
388   do
389     {
390       data = *++p;
391       mask = -1;
392 
393     start:
394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
398       found = __builtin_ia32_pmovmskb128 (t);
399       found &= mask;
400     }
401   while (!found);
402 
403   /* FOUND contains 1 in bits for which we matched a relevant
404      character.  Conversion to the byte index is trivial.  */
405   found = __builtin_ctz(found);
406   return (const uchar *)p + found;
407 }
408 
409 #ifdef HAVE_SSE4
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
411 
412 static const uchar *
413 #ifndef __SSE4_2__
414 __attribute__((__target__("sse4.2")))
415 #endif
416 search_line_sse42 (const uchar *s, const uchar *end)
417 {
418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
419   static const v16qi search = { '\n', '\r', '?', '\\' };
420 
421   uintptr_t si = (uintptr_t)s;
422   uintptr_t index;
423 
424   /* Check for unaligned input.  */
425   if (si & 15)
426     {
427       v16qi sv;
428 
429       if (__builtin_expect (end - s < 16, 0)
430 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431 	{
432 	  /* There are less than 16 bytes left in the buffer, and less
433 	     than 16 bytes left on the page.  Reading 16 bytes at this
434 	     point might generate a spurious page fault.  Defer to the
435 	     SSE2 implementation, which already handles alignment.  */
436 	  return search_line_sse2 (s, end);
437 	}
438 
439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 	 memory need not be aligned.  */
441       sv = __builtin_ia32_loaddqu ((const char *) s);
442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443 
444       if (__builtin_expect (index < 16, 0))
445 	goto found;
446 
447       /* Advance the pointer to an aligned address.  We will re-scan a
448 	 few bytes, but we no longer need care for reading past the
449 	 end of a page, since we're guaranteed a match.  */
450       s = (const uchar *)((si + 15) & -16);
451     }
452 
453   /* Main loop, processing 16 bytes at a time.  */
454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
455   while (1)
456     {
457       char f;
458 
459       /* By using inline assembly instead of the builtin,
460 	 we can use the result, as well as the flags set.  */
461       __asm ("%vpcmpestri\t$0, %2, %3"
462 	     : "=c"(index), "=@ccc"(f)
463 	     : "m"(*s), "x"(search), "a"(4), "d"(16));
464       if (f)
465 	break;
466 
467       s += 16;
468     }
469 #else
470   s -= 16;
471   /* By doing the whole loop in inline assembly,
472      we can make proper use of the flags set.  */
473   __asm (      ".balign 16\n"
474 	"0:	add $16, %1\n"
475 	"	%vpcmpestri\t$0, (%1), %2\n"
476 	"	jnc 0b"
477 	: "=&c"(index), "+r"(s)
478 	: "x"(search), "a"(4), "d"(16));
479 #endif
480 
481  found:
482   return s + index;
483 }
484 
485 #else
486 /* Work around out-dated assemblers without sse4 support.  */
487 #define search_line_sse42 search_line_sse2
488 #endif
489 
490 /* Check the CPU capabilities.  */
491 
492 #include "../gcc/config/i386/cpuid.h"
493 
494 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
495 static search_line_fast_type search_line_fast;
496 
497 #define HAVE_init_vectorized_lexer 1
498 static inline void
499 init_vectorized_lexer (void)
500 {
501   unsigned dummy, ecx = 0, edx = 0;
502   search_line_fast_type impl = search_line_acc_char;
503   int minimum = 0;
504 
505 #if defined(__SSE4_2__)
506   minimum = 3;
507 #elif defined(__SSE2__)
508   minimum = 2;
509 #elif defined(__SSE__)
510   minimum = 1;
511 #endif
512 
513   if (minimum == 3)
514     impl = search_line_sse42;
515   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
516     {
517       if (minimum == 3 || (ecx & bit_SSE4_2))
518         impl = search_line_sse42;
519       else if (minimum == 2 || (edx & bit_SSE2))
520 	impl = search_line_sse2;
521       else if (minimum == 1 || (edx & bit_SSE))
522 	impl = search_line_mmx;
523     }
524   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
525     {
526       if (minimum == 1
527 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
528 	impl = search_line_mmx;
529     }
530 
531   search_line_fast = impl;
532 }
533 
534 #elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
535 
536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537    and VSX unaligned loads (when VSX is available).  This is otherwise
538    the same as the AltiVec version.  */
539 
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
541 static const uchar *
542 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
543 {
544   typedef __attribute__((altivec(vector))) unsigned char vc;
545 
546   const vc repl_nl = {
547     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
549   };
550   const vc repl_cr = {
551     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
553   };
554   const vc repl_bs = {
555     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
557   };
558   const vc repl_qm = {
559     '?', '?', '?', '?', '?', '?', '?', '?',
560     '?', '?', '?', '?', '?', '?', '?', '?',
561   };
562   const vc zero = { 0 };
563 
564   vc data, t;
565 
566   /* Main loop processing 16 bytes at a time.  */
567   do
568     {
569       vc m_nl, m_cr, m_bs, m_qm;
570 
571       data = __builtin_vec_vsx_ld (0, s);
572       s += 16;
573 
574       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
575       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
576       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
577       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
578       t = (m_nl | m_cr) | (m_bs | m_qm);
579 
580       /* T now contains 0xff in bytes for which we matched one of the relevant
581 	 characters.  We want to exit the loop if any byte in T is non-zero.
582 	 Below is the expansion of vec_any_ne(t, zero).  */
583     }
584   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
585 
586   /* Restore s to to point to the 16 bytes we just processed.  */
587   s -= 16;
588 
589   {
590 #define N  (sizeof(vc) / sizeof(long))
591 
592     union {
593       vc v;
594       /* Statically assert that N is 2 or 4.  */
595       unsigned long l[(N == 2 || N == 4) ? N : -1];
596     } u;
597     unsigned long l, i = 0;
598 
599     u.v = t;
600 
601     /* Find the first word of T that is non-zero.  */
602     switch (N)
603       {
604       case 4:
605 	l = u.l[i++];
606 	if (l != 0)
607 	  break;
608 	s += sizeof(unsigned long);
609 	l = u.l[i++];
610 	if (l != 0)
611 	  break;
612 	s += sizeof(unsigned long);
613 	/* FALLTHRU */
614       case 2:
615 	l = u.l[i++];
616 	if (l != 0)
617 	  break;
618 	s += sizeof(unsigned long);
619 	l = u.l[i];
620       }
621 
622     /* L now contains 0xff in bytes for which we matched one of the
623        relevant characters.  We can find the byte index by finding
624        its bit index and dividing by 8.  */
625 #ifdef __BIG_ENDIAN__
626     l = __builtin_clzl(l) >> 3;
627 #else
628     l = __builtin_ctzl(l) >> 3;
629 #endif
630     return s + l;
631 
632 #undef N
633   }
634 }
635 
636 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
637 
638 /* A vection of the fast scanner using AltiVec vectorized byte compares.
639    This cannot be used for little endian because vec_lvsl/lvsr are
640    deprecated for little endian and the code won't work properly.  */
641 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
642    so we can't compile this function without -maltivec on the command line
643    (or implied by some other switch).  */
644 
645 static const uchar *
646 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
647 {
648   typedef __attribute__((altivec(vector))) unsigned char vc;
649 
650   const vc repl_nl = {
651     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
652     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
653   };
654   const vc repl_cr = {
655     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
656     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
657   };
658   const vc repl_bs = {
659     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
660     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
661   };
662   const vc repl_qm = {
663     '?', '?', '?', '?', '?', '?', '?', '?',
664     '?', '?', '?', '?', '?', '?', '?', '?',
665   };
666   const vc ones = {
667     -1, -1, -1, -1, -1, -1, -1, -1,
668     -1, -1, -1, -1, -1, -1, -1, -1,
669   };
670   const vc zero = { 0 };
671 
672   vc data, mask, t;
673 
674   /* Altivec loads automatically mask addresses with -16.  This lets us
675      issue the first load as early as possible.  */
676   data = __builtin_vec_ld(0, (const vc *)s);
677 
678   /* Discard bytes before the beginning of the buffer.  Do this by
679      beginning with all ones and shifting in zeros according to the
680      mis-alignment.  The LVSR instruction pulls the exact shift we
681      want from the address.  */
682   mask = __builtin_vec_lvsr(0, s);
683   mask = __builtin_vec_perm(zero, ones, mask);
684   data &= mask;
685 
686   /* While altivec loads mask addresses, we still need to align S so
687      that the offset we compute at the end is correct.  */
688   s = (const uchar *)((uintptr_t)s & -16);
689 
690   /* Main loop processing 16 bytes at a time.  */
691   goto start;
692   do
693     {
694       vc m_nl, m_cr, m_bs, m_qm;
695 
696       s += 16;
697       data = __builtin_vec_ld(0, (const vc *)s);
698 
699     start:
700       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
701       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
702       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
703       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
704       t = (m_nl | m_cr) | (m_bs | m_qm);
705 
706       /* T now contains 0xff in bytes for which we matched one of the relevant
707 	 characters.  We want to exit the loop if any byte in T is non-zero.
708 	 Below is the expansion of vec_any_ne(t, zero).  */
709     }
710   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
711 
712   {
713 #define N  (sizeof(vc) / sizeof(long))
714 
715     union {
716       vc v;
717       /* Statically assert that N is 2 or 4.  */
718       unsigned long l[(N == 2 || N == 4) ? N : -1];
719     } u;
720     unsigned long l, i = 0;
721 
722     u.v = t;
723 
724     /* Find the first word of T that is non-zero.  */
725     switch (N)
726       {
727       case 4:
728 	l = u.l[i++];
729 	if (l != 0)
730 	  break;
731 	s += sizeof(unsigned long);
732 	l = u.l[i++];
733 	if (l != 0)
734 	  break;
735 	s += sizeof(unsigned long);
736 	/* FALLTHROUGH */
737       case 2:
738 	l = u.l[i++];
739 	if (l != 0)
740 	  break;
741 	s += sizeof(unsigned long);
742 	l = u.l[i];
743       }
744 
745     /* L now contains 0xff in bytes for which we matched one of the
746        relevant characters.  We can find the byte index by finding
747        its bit index and dividing by 8.  */
748     l = __builtin_clzl(l) >> 3;
749     return s + l;
750 
751 #undef N
752   }
753 }
754 
755 #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
756 #include "arm_neon.h"
757 
758 /* This doesn't have to be the exact page size, but no system may use
759    a size smaller than this.  ARMv8 requires a minimum page size of
760    4k.  The impact of being conservative here is a small number of
761    cases will take the slightly slower entry path into the main
762    loop.  */
763 
764 #define AARCH64_MIN_PAGE_SIZE 4096
765 
766 static const uchar *
767 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
768 {
769   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
770   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
771   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
772   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
773   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
774 
775 #ifdef __ARM_BIG_ENDIAN
776   const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
777 #else
778   const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
779 #endif
780 
781   unsigned int found;
782   const uint8_t *p;
783   uint8x16_t data;
784   uint8x16_t t;
785   uint16x8_t m;
786   uint8x16_t u, v, w;
787 
788   /* Align the source pointer.  */
789   p = (const uint8_t *)((uintptr_t)s & -16);
790 
791   /* Assuming random string start positions, with a 4k page size we'll take
792      the slow path about 0.37% of the time.  */
793   if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
794 			 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
795 			< 16, 0))
796     {
797       /* Slow path: the string starts near a possible page boundary.  */
798       uint32_t misalign, mask;
799 
800       misalign = (uintptr_t)s & 15;
801       mask = (-1u << misalign) & 0xffff;
802       data = vld1q_u8 (p);
803       t = vceqq_u8 (data, repl_nl);
804       u = vceqq_u8 (data, repl_cr);
805       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
806       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
807       t = vorrq_u8 (v, w);
808       t = vandq_u8 (t, xmask);
809       m = vpaddlq_u8 (t);
810       m = vshlq_u16 (m, shift);
811       found = vaddvq_u16 (m);
812       found &= mask;
813       if (found)
814 	return (const uchar*)p + __builtin_ctz (found);
815     }
816   else
817     {
818       data = vld1q_u8 ((const uint8_t *) s);
819       t = vceqq_u8 (data, repl_nl);
820       u = vceqq_u8 (data, repl_cr);
821       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
822       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
823       t = vorrq_u8 (v, w);
824       if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
825 	goto done;
826     }
827 
828   do
829     {
830       p += 16;
831       data = vld1q_u8 (p);
832       t = vceqq_u8 (data, repl_nl);
833       u = vceqq_u8 (data, repl_cr);
834       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
835       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
836       t = vorrq_u8 (v, w);
837     } while (!vpaddd_u64 ((uint64x2_t)t));
838 
839 done:
840   /* Now that we've found the terminating substring, work out precisely where
841      we need to stop.  */
842   t = vandq_u8 (t, xmask);
843   m = vpaddlq_u8 (t);
844   m = vshlq_u16 (m, shift);
845   found = vaddvq_u16 (m);
846   return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
847 	  + __builtin_ctz (found));
848 }
849 
850 #elif defined (__ARM_NEON)
851 #include "arm_neon.h"
852 
853 static const uchar *
854 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
855 {
856   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
857   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
858   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
859   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
860   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
861 
862   unsigned int misalign, found, mask;
863   const uint8_t *p;
864   uint8x16_t data;
865 
866   /* Align the source pointer.  */
867   misalign = (uintptr_t)s & 15;
868   p = (const uint8_t *)((uintptr_t)s & -16);
869   data = vld1q_u8 (p);
870 
871   /* Create a mask for the bytes that are valid within the first
872      16-byte block.  The Idea here is that the AND with the mask
873      within the loop is "free", since we need some AND or TEST
874      insn in order to set the flags for the branch anyway.  */
875   mask = (-1u << misalign) & 0xffff;
876 
877   /* Main loop, processing 16 bytes at a time.  */
878   goto start;
879 
880   do
881     {
882       uint8x8_t l;
883       uint16x4_t m;
884       uint32x2_t n;
885       uint8x16_t t, u, v, w;
886 
887       p += 16;
888       data = vld1q_u8 (p);
889       mask = 0xffff;
890 
891     start:
892       t = vceqq_u8 (data, repl_nl);
893       u = vceqq_u8 (data, repl_cr);
894       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
895       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
896       t = vandq_u8 (vorrq_u8 (v, w), xmask);
897       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
898       m = vpaddl_u8 (l);
899       n = vpaddl_u16 (m);
900 
901       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
902 	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
903       found &= mask;
904     }
905   while (!found);
906 
907   /* FOUND contains 1 in bits for which we matched a relevant
908      character.  Conversion to the byte index is trivial.  */
909   found = __builtin_ctz (found);
910   return (const uchar *)p + found;
911 }
912 
913 #else
914 
915 /* We only have one accelerated alternative.  Use a direct call so that
916    we encourage inlining.  */
917 
918 #define search_line_fast  search_line_acc_char
919 
920 #endif
921 
922 /* Initialize the lexer if needed.  */
923 
924 void
925 _cpp_init_lexer (void)
926 {
927 #ifdef HAVE_init_vectorized_lexer
928   init_vectorized_lexer ();
929 #endif
930 }
931 
932 /* Returns with a logical line that contains no escaped newlines or
933    trigraphs.  This is a time-critical inner loop.  */
934 void
935 _cpp_clean_line (cpp_reader *pfile)
936 {
937   cpp_buffer *buffer;
938   const uchar *s;
939   uchar c, *d, *p;
940 
941   buffer = pfile->buffer;
942   buffer->cur_note = buffer->notes_used = 0;
943   buffer->cur = buffer->line_base = buffer->next_line;
944   buffer->need_line = false;
945   s = buffer->next_line;
946 
947   if (!buffer->from_stage3)
948     {
949       const uchar *pbackslash = NULL;
950 
951       /* Fast path.  This is the common case of an un-escaped line with
952 	 no trigraphs.  The primary win here is by not writing any
953 	 data back to memory until we have to.  */
954       while (1)
955 	{
956 	  /* Perform an optimized search for \n, \r, \\, ?.  */
957 	  s = search_line_fast (s, buffer->rlimit);
958 
959 	  c = *s;
960 	  if (c == '\\')
961 	    {
962 	      /* Record the location of the backslash and continue.  */
963 	      pbackslash = s++;
964 	    }
965 	  else if (__builtin_expect (c == '?', 0))
966 	    {
967 	      if (__builtin_expect (s[1] == '?', false)
968 		   && _cpp_trigraph_map[s[2]])
969 		{
970 		  /* Have a trigraph.  We may or may not have to convert
971 		     it.  Add a line note regardless, for -Wtrigraphs.  */
972 		  add_line_note (buffer, s, s[2]);
973 		  if (CPP_OPTION (pfile, trigraphs))
974 		    {
975 		      /* We do, and that means we have to switch to the
976 		         slow path.  */
977 		      d = (uchar *) s;
978 		      *d = _cpp_trigraph_map[s[2]];
979 		      s += 2;
980 		      goto slow_path;
981 		    }
982 		}
983 	      /* Not a trigraph.  Continue on fast-path.  */
984 	      s++;
985 	    }
986 	  else
987 	    break;
988 	}
989 
990       /* This must be \r or \n.  We're either done, or we'll be forced
991 	 to write back to the buffer and continue on the slow path.  */
992       d = (uchar *) s;
993 
994       if (__builtin_expect (s == buffer->rlimit, false))
995 	goto done;
996 
997       /* DOS line ending? */
998       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
999 	{
1000 	  s++;
1001 	  if (s == buffer->rlimit)
1002 	    goto done;
1003 	}
1004 
1005       if (__builtin_expect (pbackslash == NULL, true))
1006 	goto done;
1007 
1008       /* Check for escaped newline.  */
1009       p = d;
1010       while (is_nvspace (p[-1]))
1011 	p--;
1012       if (p - 1 != pbackslash)
1013 	goto done;
1014 
1015       /* Have an escaped newline; process it and proceed to
1016 	 the slow path.  */
1017       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
1018       d = p - 2;
1019       buffer->next_line = p - 1;
1020 
1021     slow_path:
1022       while (1)
1023 	{
1024 	  c = *++s;
1025 	  *++d = c;
1026 
1027 	  if (c == '\n' || c == '\r')
1028 	    {
1029 	      /* Handle DOS line endings.  */
1030 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
1031 		s++;
1032 	      if (s == buffer->rlimit)
1033 		break;
1034 
1035 	      /* Escaped?  */
1036 	      p = d;
1037 	      while (p != buffer->next_line && is_nvspace (p[-1]))
1038 		p--;
1039 	      if (p == buffer->next_line || p[-1] != '\\')
1040 		break;
1041 
1042 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
1043 	      d = p - 2;
1044 	      buffer->next_line = p - 1;
1045 	    }
1046 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
1047 	    {
1048 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
1049 	      add_line_note (buffer, d, s[2]);
1050 	      if (CPP_OPTION (pfile, trigraphs))
1051 		{
1052 		  *d = _cpp_trigraph_map[s[2]];
1053 		  s += 2;
1054 		}
1055 	    }
1056 	}
1057     }
1058   else
1059     {
1060       while (*s != '\n' && *s != '\r')
1061 	s++;
1062       d = (uchar *) s;
1063 
1064       /* Handle DOS line endings.  */
1065       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
1066 	s++;
1067     }
1068 
1069  done:
1070   *d = '\n';
1071   /* A sentinel note that should never be processed.  */
1072   add_line_note (buffer, d + 1, '\n');
1073   buffer->next_line = s + 1;
1074 }
1075 
1076 /* Return true if the trigraph indicated by NOTE should be warned
1077    about in a comment.  */
1078 static bool
1079 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
1080 {
1081   const uchar *p;
1082 
1083   /* Within comments we don't warn about trigraphs, unless the
1084      trigraph forms an escaped newline, as that may change
1085      behavior.  */
1086   if (note->type != '/')
1087     return false;
1088 
1089   /* If -trigraphs, then this was an escaped newline iff the next note
1090      is coincident.  */
1091   if (CPP_OPTION (pfile, trigraphs))
1092     return note[1].pos == note->pos;
1093 
1094   /* Otherwise, see if this forms an escaped newline.  */
1095   p = note->pos + 3;
1096   while (is_nvspace (*p))
1097     p++;
1098 
1099   /* There might have been escaped newlines between the trigraph and the
1100      newline we found.  Hence the position test.  */
1101   return (*p == '\n' && p < note[1].pos);
1102 }
1103 
1104 /* Process the notes created by add_line_note as far as the current
1105    location.  */
1106 void
1107 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
1108 {
1109   cpp_buffer *buffer = pfile->buffer;
1110 
1111   for (;;)
1112     {
1113       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1114       unsigned int col;
1115 
1116       if (note->pos > buffer->cur)
1117 	break;
1118 
1119       buffer->cur_note++;
1120       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1121 
1122       if (note->type == '\\' || note->type == ' ')
1123 	{
1124 	  if (note->type == ' ' && !in_comment)
1125 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1126 				 "backslash and newline separated by space");
1127 
1128 	  if (buffer->next_line > buffer->rlimit)
1129 	    {
1130 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1131 				   "backslash-newline at end of file");
1132 	      /* Prevent "no newline at end of file" warning.  */
1133 	      buffer->next_line = buffer->rlimit;
1134 	    }
1135 
1136 	  buffer->line_base = note->pos;
1137 	  CPP_INCREMENT_LINE (pfile, 0);
1138 	}
1139       else if (_cpp_trigraph_map[note->type])
1140 	{
1141 	  if (CPP_OPTION (pfile, warn_trigraphs)
1142 	      && (!in_comment || warn_in_comment (pfile, note)))
1143 	    {
1144 	      if (CPP_OPTION (pfile, trigraphs))
1145 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1146                                        pfile->line_table->highest_line, col,
1147 				       "trigraph ??%c converted to %c",
1148 				       note->type,
1149 				       (int) _cpp_trigraph_map[note->type]);
1150 	      else
1151 		{
1152 		  cpp_warning_with_line
1153 		    (pfile, CPP_W_TRIGRAPHS,
1154                      pfile->line_table->highest_line, col,
1155 		     "trigraph ??%c ignored, use -trigraphs to enable",
1156 		     note->type);
1157 		}
1158 	    }
1159 	}
1160       else if (note->type == 0)
1161 	/* Already processed in lex_raw_string.  */;
1162       else
1163 	abort ();
1164     }
1165 }
1166 
1167 /* Skip a C-style block comment.  We find the end of the comment by
1168    seeing if an asterisk is before every '/' we encounter.  Returns
1169    nonzero if comment terminated by EOF, zero otherwise.
1170 
1171    Buffer->cur points to the initial asterisk of the comment.  */
1172 bool
1173 _cpp_skip_block_comment (cpp_reader *pfile)
1174 {
1175   cpp_buffer *buffer = pfile->buffer;
1176   const uchar *cur = buffer->cur;
1177   uchar c;
1178 
1179   cur++;
1180   if (*cur == '/')
1181     cur++;
1182 
1183   for (;;)
1184     {
1185       /* People like decorating comments with '*', so check for '/'
1186 	 instead for efficiency.  */
1187       c = *cur++;
1188 
1189       if (c == '/')
1190 	{
1191 	  if (cur[-2] == '*')
1192 	    break;
1193 
1194 	  /* Warn about potential nested comments, but not if the '/'
1195 	     comes immediately before the true comment delimiter.
1196 	     Don't bother to get it right across escaped newlines.  */
1197 	  if (CPP_OPTION (pfile, warn_comments)
1198 	      && cur[0] == '*' && cur[1] != '/')
1199 	    {
1200 	      buffer->cur = cur;
1201 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1202 				     pfile->line_table->highest_line,
1203 				     CPP_BUF_COL (buffer),
1204 				     "\"/*\" within comment");
1205 	    }
1206 	}
1207       else if (c == '\n')
1208 	{
1209 	  unsigned int cols;
1210 	  buffer->cur = cur - 1;
1211 	  _cpp_process_line_notes (pfile, true);
1212 	  if (buffer->next_line >= buffer->rlimit)
1213 	    return true;
1214 	  _cpp_clean_line (pfile);
1215 
1216 	  cols = buffer->next_line - buffer->line_base;
1217 	  CPP_INCREMENT_LINE (pfile, cols);
1218 
1219 	  cur = buffer->cur;
1220 	}
1221     }
1222 
1223   buffer->cur = cur;
1224   _cpp_process_line_notes (pfile, true);
1225   return false;
1226 }
1227 
1228 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1229    terminating newline.  Handles escaped newlines.  Returns nonzero
1230    if a multiline comment.  */
1231 static int
1232 skip_line_comment (cpp_reader *pfile)
1233 {
1234   cpp_buffer *buffer = pfile->buffer;
1235   location_t orig_line = pfile->line_table->highest_line;
1236 
1237   while (*buffer->cur != '\n')
1238     buffer->cur++;
1239 
1240   _cpp_process_line_notes (pfile, true);
1241   return orig_line != pfile->line_table->highest_line;
1242 }
1243 
1244 /* Skips whitespace, saving the next non-whitespace character.  */
1245 static void
1246 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1247 {
1248   cpp_buffer *buffer = pfile->buffer;
1249   bool saw_NUL = false;
1250 
1251   do
1252     {
1253       /* Horizontal space always OK.  */
1254       if (c == ' ' || c == '\t')
1255 	;
1256       /* Just \f \v or \0 left.  */
1257       else if (c == '\0')
1258 	saw_NUL = true;
1259       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1260 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1261 			     CPP_BUF_COL (buffer),
1262 			     "%s in preprocessing directive",
1263 			     c == '\f' ? "form feed" : "vertical tab");
1264 
1265       c = *buffer->cur++;
1266     }
1267   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1268   while (is_nvspace (c));
1269 
1270   if (saw_NUL)
1271     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1272 
1273   buffer->cur--;
1274 }
1275 
1276 /* See if the characters of a number token are valid in a name (no
1277    '.', '+' or '-').  */
1278 static int
1279 name_p (cpp_reader *pfile, const cpp_string *string)
1280 {
1281   unsigned int i;
1282 
1283   for (i = 0; i < string->len; i++)
1284     if (!is_idchar (string->text[i]))
1285       return 0;
1286 
1287   return 1;
1288 }
1289 
1290 /* After parsing an identifier or other sequence, produce a warning about
1291    sequences not in NFC/NFKC.  */
1292 static void
1293 warn_about_normalization (cpp_reader *pfile,
1294 			  const cpp_token *token,
1295 			  const struct normalize_state *s)
1296 {
1297   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1298       && !pfile->state.skipping)
1299     {
1300       /* Make sure that the token is printed using UCNs, even
1301 	 if we'd otherwise happily print UTF-8.  */
1302       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1303       size_t sz;
1304 
1305       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1306       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1307 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1308 			       "`%.*s' is not in NFKC", (int) sz, buf);
1309       else
1310 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1311 			       "`%.*s' is not in NFC", (int) sz, buf);
1312       free (buf);
1313     }
1314 }
1315 
1316 static const cppchar_t utf8_signifier = 0xC0;
1317 
1318 /* Returns TRUE if the sequence starting at buffer->cur is valid in
1319    an identifier.  FIRST is TRUE if this starts an identifier.  */
1320 static bool
1321 forms_identifier_p (cpp_reader *pfile, int first,
1322 		    struct normalize_state *state)
1323 {
1324   cpp_buffer *buffer = pfile->buffer;
1325 
1326   if (*buffer->cur == '$')
1327     {
1328       if (!CPP_OPTION (pfile, dollars_in_ident))
1329 	return false;
1330 
1331       buffer->cur++;
1332       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1333 	{
1334 	  CPP_OPTION (pfile, warn_dollars) = 0;
1335 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1336 	}
1337 
1338       return true;
1339     }
1340 
1341   /* Is this a syntactically valid UCN or a valid UTF-8 char?  */
1342   if (CPP_OPTION (pfile, extended_identifiers))
1343     {
1344       cppchar_t s;
1345       if (*buffer->cur >= utf8_signifier)
1346 	{
1347 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1348 			       state, &s))
1349 	    return true;
1350 	}
1351       else if (*buffer->cur == '\\'
1352 	       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1353 	{
1354 	  buffer->cur += 2;
1355 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1356 			      state, &s, NULL, NULL))
1357 	    return true;
1358 	  buffer->cur -= 2;
1359 	}
1360     }
1361 
1362   return false;
1363 }
1364 
1365 /* Helper function to issue error about improper __VA_OPT__ use.  */
1366 static void
1367 maybe_va_opt_error (cpp_reader *pfile)
1368 {
1369   if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
1370     {
1371       /* __VA_OPT__ should not be accepted at all, but allow it in
1372 	 system headers.  */
1373       if (!cpp_in_system_header (pfile))
1374 	cpp_error (pfile, CPP_DL_PEDWARN,
1375 		   "__VA_OPT__ is not available until C++2a");
1376     }
1377   else if (!pfile->state.va_args_ok)
1378     {
1379       /* __VA_OPT__ should only appear in the replacement list of a
1380 	 variadic macro.  */
1381       cpp_error (pfile, CPP_DL_PEDWARN,
1382 		 "__VA_OPT__ can only appear in the expansion"
1383 		 " of a C++2a variadic macro");
1384     }
1385 }
1386 
1387 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1388 static cpp_hashnode *
1389 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1390 {
1391   cpp_hashnode *result;
1392   const uchar *cur;
1393   unsigned int len;
1394   unsigned int hash = HT_HASHSTEP (0, *base);
1395 
1396   cur = base + 1;
1397   while (ISIDNUM (*cur))
1398     {
1399       hash = HT_HASHSTEP (hash, *cur);
1400       cur++;
1401     }
1402   len = cur - base;
1403   hash = HT_HASHFINISH (hash, len);
1404   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1405 					      base, len, hash, HT_ALLOC));
1406 
1407   /* Rarely, identifiers require diagnostics when lexed.  */
1408   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1409 			&& !pfile->state.skipping, 0))
1410     {
1411       /* It is allowed to poison the same identifier twice.  */
1412       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1413 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1414 		   NODE_NAME (result));
1415 
1416       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1417 	 replacement list of a variadic macro.  */
1418       if (result == pfile->spec_nodes.n__VA_ARGS__
1419 	  && !pfile->state.va_args_ok)
1420 	{
1421 	  if (CPP_OPTION (pfile, cplusplus))
1422 	    cpp_error (pfile, CPP_DL_PEDWARN,
1423 		       "__VA_ARGS__ can only appear in the expansion"
1424 		       " of a C++11 variadic macro");
1425 	  else
1426 	    cpp_error (pfile, CPP_DL_PEDWARN,
1427 		       "__VA_ARGS__ can only appear in the expansion"
1428 		       " of a C99 variadic macro");
1429 	}
1430 
1431       if (result == pfile->spec_nodes.n__VA_OPT__)
1432 	maybe_va_opt_error (pfile);
1433 
1434       /* For -Wc++-compat, warn about use of C++ named operators.  */
1435       if (result->flags & NODE_WARN_OPERATOR)
1436 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1437 		     "identifier \"%s\" is a special operator name in C++",
1438 		     NODE_NAME (result));
1439     }
1440 
1441   return result;
1442 }
1443 
1444 /* Get the cpp_hashnode of an identifier specified by NAME in
1445    the current cpp_reader object.  If none is found, NULL is returned.  */
1446 cpp_hashnode *
1447 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1448 {
1449   cpp_hashnode *result;
1450   result = lex_identifier_intern (pfile, (uchar *) name);
1451   return result;
1452 }
1453 
1454 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1455 static cpp_hashnode *
1456 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1457 		struct normalize_state *nst, cpp_hashnode **spelling)
1458 {
1459   cpp_hashnode *result;
1460   const uchar *cur;
1461   unsigned int len;
1462   unsigned int hash = HT_HASHSTEP (0, *base);
1463 
1464   cur = pfile->buffer->cur;
1465   if (! starts_ucn)
1466     {
1467       while (ISIDNUM (*cur))
1468 	{
1469 	  hash = HT_HASHSTEP (hash, *cur);
1470 	  cur++;
1471 	}
1472       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1473     }
1474   pfile->buffer->cur = cur;
1475   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1476     {
1477       /* Slower version for identifiers containing UCNs
1478 	 or extended chars (including $).  */
1479       do {
1480 	while (ISIDNUM (*pfile->buffer->cur))
1481 	  {
1482 	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1483 	    pfile->buffer->cur++;
1484 	  }
1485       } while (forms_identifier_p (pfile, false, nst));
1486       result = _cpp_interpret_identifier (pfile, base,
1487 					  pfile->buffer->cur - base);
1488       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1489     }
1490   else
1491     {
1492       len = cur - base;
1493       hash = HT_HASHFINISH (hash, len);
1494 
1495       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1496 						  base, len, hash, HT_ALLOC));
1497       *spelling = result;
1498     }
1499 
1500   /* Rarely, identifiers require diagnostics when lexed.  */
1501   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1502 			&& !pfile->state.skipping, 0))
1503     {
1504       /* It is allowed to poison the same identifier twice.  */
1505       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1506 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1507 		   NODE_NAME (result));
1508 
1509       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1510 	 replacement list of a variadic macro.  */
1511       if (result == pfile->spec_nodes.n__VA_ARGS__
1512 	  && !pfile->state.va_args_ok)
1513 	{
1514 	  if (CPP_OPTION (pfile, cplusplus))
1515 	    cpp_error (pfile, CPP_DL_PEDWARN,
1516 		       "__VA_ARGS__ can only appear in the expansion"
1517 		       " of a C++11 variadic macro");
1518 	  else
1519 	    cpp_error (pfile, CPP_DL_PEDWARN,
1520 		       "__VA_ARGS__ can only appear in the expansion"
1521 		       " of a C99 variadic macro");
1522 	}
1523 
1524       /* __VA_OPT__ should only appear in the replacement list of a
1525 	 variadic macro.  */
1526       if (result == pfile->spec_nodes.n__VA_OPT__)
1527 	maybe_va_opt_error (pfile);
1528 
1529       /* For -Wc++-compat, warn about use of C++ named operators.  */
1530       if (result->flags & NODE_WARN_OPERATOR)
1531 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1532 		     "identifier \"%s\" is a special operator name in C++",
1533 		     NODE_NAME (result));
1534     }
1535 
1536   return result;
1537 }
1538 
1539 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1540 static void
1541 lex_number (cpp_reader *pfile, cpp_string *number,
1542 	    struct normalize_state *nst)
1543 {
1544   const uchar *cur;
1545   const uchar *base;
1546   uchar *dest;
1547 
1548   base = pfile->buffer->cur - 1;
1549   do
1550     {
1551       cur = pfile->buffer->cur;
1552 
1553       /* N.B. ISIDNUM does not include $.  */
1554       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1555 	     || VALID_SIGN (*cur, cur[-1]))
1556 	{
1557 	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1558 	  cur++;
1559 	}
1560       /* A number can't end with a digit separator.  */
1561       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1562 	--cur;
1563 
1564       pfile->buffer->cur = cur;
1565     }
1566   while (forms_identifier_p (pfile, false, nst));
1567 
1568   number->len = cur - base;
1569   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1570   memcpy (dest, base, number->len);
1571   dest[number->len] = '\0';
1572   number->text = dest;
1573 }
1574 
1575 /* Create a token of type TYPE with a literal spelling.  */
1576 static void
1577 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1578 		unsigned int len, enum cpp_ttype type)
1579 {
1580   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1581 
1582   memcpy (dest, base, len);
1583   dest[len] = '\0';
1584   token->type = type;
1585   token->val.str.len = len;
1586   token->val.str.text = dest;
1587 }
1588 
1589 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1590    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1591 
1592 static void
1593 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1594 		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1595 {
1596   _cpp_buff *first_buff = *first_buff_p;
1597   _cpp_buff *last_buff = *last_buff_p;
1598 
1599   if (first_buff == NULL)
1600     first_buff = last_buff = _cpp_get_buff (pfile, len);
1601   else if (len > BUFF_ROOM (last_buff))
1602     {
1603       size_t room = BUFF_ROOM (last_buff);
1604       memcpy (BUFF_FRONT (last_buff), base, room);
1605       BUFF_FRONT (last_buff) += room;
1606       base += room;
1607       len -= room;
1608       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1609     }
1610 
1611   memcpy (BUFF_FRONT (last_buff), base, len);
1612   BUFF_FRONT (last_buff) += len;
1613 
1614   *first_buff_p = first_buff;
1615   *last_buff_p = last_buff;
1616 }
1617 
1618 
1619 /* Returns true if a macro has been defined.
1620    This might not work if compile with -save-temps,
1621    or preprocess separately from compilation.  */
1622 
1623 static bool
1624 is_macro(cpp_reader *pfile, const uchar *base)
1625 {
1626   const uchar *cur = base;
1627   if (! ISIDST (*cur))
1628     return false;
1629   unsigned int hash = HT_HASHSTEP (0, *cur);
1630   ++cur;
1631   while (ISIDNUM (*cur))
1632     {
1633       hash = HT_HASHSTEP (hash, *cur);
1634       ++cur;
1635     }
1636   hash = HT_HASHFINISH (hash, cur - base);
1637 
1638   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1639 					base, cur - base, hash, HT_NO_INSERT));
1640 
1641   return result && cpp_macro_p (result);
1642 }
1643 
1644 /* Returns true if a literal suffix does not have the expected form
1645    and is defined as a macro.  */
1646 
1647 static bool
1648 is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
1649 {
1650   /* User-defined literals outside of namespace std must start with a single
1651      underscore, so assume anything of that form really is a UDL suffix.
1652      We don't need to worry about UDLs defined inside namespace std because
1653      their names are reserved, so cannot be used as macro names in valid
1654      programs.  */
1655   if (base[0] == '_' && base[1] != '_')
1656     return false;
1657   return is_macro (pfile, base);
1658 }
1659 
1660 /* Lexes a raw string.  The stored string contains the spelling, including
1661    double quotes, delimiter string, '(' and ')', any leading
1662    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1663    literal, or CPP_OTHER if it was not properly terminated.
1664 
1665    The spelling is NUL-terminated, but it is not guaranteed that this
1666    is the first NUL since embedded NULs are preserved.  */
1667 
1668 static void
1669 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1670 		const uchar *cur)
1671 {
1672   uchar raw_prefix[17];
1673   uchar temp_buffer[18];
1674   const uchar *orig_base;
1675   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1676   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1677   raw_str_phase phase = RAW_STR_PREFIX;
1678   enum cpp_ttype type;
1679   size_t total_len = 0;
1680   /* Index into temp_buffer during phases other than RAW_STR,
1681      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1682      be appended to temp_buffer.  */
1683   size_t temp_buffer_len = 0;
1684   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1685   size_t raw_prefix_start;
1686   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1687 
1688   type = (*base == 'L' ? CPP_WSTRING :
1689 	  *base == 'U' ? CPP_STRING32 :
1690 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1691 	  : CPP_STRING);
1692 
1693 #define BUF_APPEND(STR,LEN)					\
1694       do {							\
1695 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
1696 			&first_buff, &last_buff);		\
1697 	total_len += (LEN);					\
1698 	if (__builtin_expect (temp_buffer_len < 17, 0)		\
1699 	    && (const uchar *)(STR) != base			\
1700 	    && (LEN) <= 2)					\
1701 	  {							\
1702 	    memcpy (temp_buffer + temp_buffer_len,		\
1703 		    (const uchar *)(STR), (LEN));		\
1704 	    temp_buffer_len += (LEN);				\
1705 	  }							\
1706       } while (0)
1707 
1708   orig_base = base;
1709   ++cur;
1710   raw_prefix_start = cur - base;
1711   for (;;)
1712     {
1713       cppchar_t c;
1714 
1715       /* If we previously performed any trigraph or line splicing
1716 	 transformations, undo them in between the opening and closing
1717 	 double quote.  */
1718       while (note->pos < cur)
1719 	++note;
1720       for (; note->pos == cur; ++note)
1721 	{
1722 	  switch (note->type)
1723 	    {
1724 	    case '\\':
1725 	    case ' ':
1726 	      /* Restore backslash followed by newline.  */
1727 	      BUF_APPEND (base, cur - base);
1728 	      base = cur;
1729 	      BUF_APPEND ("\\", 1);
1730 	    after_backslash:
1731 	      if (note->type == ' ')
1732 		{
1733 		  /* GNU backslash whitespace newline extension.  FIXME
1734 		     could be any sequence of non-vertical space.  When we
1735 		     can properly restore any such sequence, we should mark
1736 		     this note as handled so _cpp_process_line_notes
1737 		     doesn't warn.  */
1738 		  BUF_APPEND (" ", 1);
1739 		}
1740 
1741 	      BUF_APPEND ("\n", 1);
1742 	      break;
1743 
1744 	    case 0:
1745 	      /* Already handled.  */
1746 	      break;
1747 
1748 	    default:
1749 	      if (_cpp_trigraph_map[note->type])
1750 		{
1751 		  /* Don't warn about this trigraph in
1752 		     _cpp_process_line_notes, since trigraphs show up as
1753 		     trigraphs in raw strings.  */
1754 		  uchar type = note->type;
1755 		  note->type = 0;
1756 
1757 		  if (!CPP_OPTION (pfile, trigraphs))
1758 		    /* If we didn't convert the trigraph in the first
1759 		       place, don't do anything now either.  */
1760 		    break;
1761 
1762 		  BUF_APPEND (base, cur - base);
1763 		  base = cur;
1764 		  BUF_APPEND ("??", 2);
1765 
1766 		  /* ??/ followed by newline gets two line notes, one for
1767 		     the trigraph and one for the backslash/newline.  */
1768 		  if (type == '/' && note[1].pos == cur)
1769 		    {
1770 		      if (note[1].type != '\\'
1771 			  && note[1].type != ' ')
1772 			abort ();
1773 		      BUF_APPEND ("/", 1);
1774 		      ++note;
1775 		      goto after_backslash;
1776 		    }
1777 		  else
1778 		    {
1779 		      /* Skip the replacement character.  */
1780 		      base = ++cur;
1781 		      BUF_APPEND (&type, 1);
1782 		      c = type;
1783 		      goto check_c;
1784 		    }
1785 		}
1786 	      else
1787 		abort ();
1788 	      break;
1789 	    }
1790 	}
1791       c = *cur++;
1792       if (__builtin_expect (temp_buffer_len < 17, 0))
1793 	temp_buffer[temp_buffer_len++] = c;
1794 
1795      check_c:
1796       if (phase == RAW_STR_PREFIX)
1797 	{
1798 	  while (raw_prefix_len < temp_buffer_len)
1799 	    {
1800 	      raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1801 	      switch (raw_prefix[raw_prefix_len])
1802 		{
1803 		case ' ': case '(': case ')': case '\\': case '\t':
1804 		case '\v': case '\f': case '\n': default:
1805 		  break;
1806 		/* Basic source charset except the above chars.  */
1807 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1808 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1809 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1810 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1811 		case 'y': case 'z':
1812 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1813 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1814 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1815 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1816 		case 'Y': case 'Z':
1817 		case '0': case '1': case '2': case '3': case '4': case '5':
1818 		case '6': case '7': case '8': case '9':
1819 		case '_': case '{': case '}': case '#': case '[': case ']':
1820 		case '<': case '>': case '%': case ':': case ';': case '.':
1821 		case '?': case '*': case '+': case '-': case '/': case '^':
1822 		case '&': case '|': case '~': case '!': case '=': case ',':
1823 		case '"': case '\'':
1824 		  if (raw_prefix_len < 16)
1825 		    {
1826 		      raw_prefix_len++;
1827 		      continue;
1828 		    }
1829 		  break;
1830 		}
1831 
1832 	      if (raw_prefix[raw_prefix_len] != '(')
1833 		{
1834 		  int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1835 		  if (raw_prefix_len == 16)
1836 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1837 					 col, "raw string delimiter longer "
1838 					      "than 16 characters");
1839 		  else if (raw_prefix[raw_prefix_len] == '\n')
1840 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1841 					 col, "invalid new-line in raw "
1842 					      "string delimiter");
1843 		  else
1844 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1845 					 col, "invalid character '%c' in "
1846 					      "raw string delimiter",
1847 					 (int) raw_prefix[raw_prefix_len]);
1848 		  pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1849 		  create_literal (pfile, token, orig_base,
1850 				  raw_prefix_start - 1, CPP_OTHER);
1851 		  if (first_buff)
1852 		    _cpp_release_buff (pfile, first_buff);
1853 		  return;
1854 		}
1855 	      raw_prefix[raw_prefix_len] = '"';
1856 	      phase = RAW_STR;
1857 	      /* Nothing should be appended to temp_buffer during
1858 		 RAW_STR phase.  */
1859 	      temp_buffer_len = 17;
1860 	      break;
1861 	    }
1862 	  continue;
1863 	}
1864       else if (phase == RAW_STR_SUFFIX)
1865 	{
1866 	  while (raw_suffix_len <= raw_prefix_len
1867 		 && raw_suffix_len < temp_buffer_len
1868 		 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1869 	    raw_suffix_len++;
1870 	  if (raw_suffix_len > raw_prefix_len)
1871 	    break;
1872 	  if (raw_suffix_len == temp_buffer_len)
1873 	    continue;
1874 	  phase = RAW_STR;
1875 	  /* Nothing should be appended to temp_buffer during
1876 	     RAW_STR phase.  */
1877 	  temp_buffer_len = 17;
1878 	}
1879       if (c == ')')
1880 	{
1881 	  phase = RAW_STR_SUFFIX;
1882 	  raw_suffix_len = 0;
1883 	  temp_buffer_len = 0;
1884 	}
1885       else if (c == '\n')
1886 	{
1887 	  if (pfile->state.in_directive
1888 	      || (pfile->state.parsing_args
1889 		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
1890 	    {
1891 	      cur--;
1892 	      type = CPP_OTHER;
1893 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1894 				   "unterminated raw string");
1895 	      break;
1896 	    }
1897 
1898 	  BUF_APPEND (base, cur - base);
1899 
1900 	  if (pfile->buffer->cur < pfile->buffer->rlimit)
1901 	    CPP_INCREMENT_LINE (pfile, 0);
1902 	  pfile->buffer->need_line = true;
1903 
1904 	  pfile->buffer->cur = cur-1;
1905 	  _cpp_process_line_notes (pfile, false);
1906 	  if (!_cpp_get_fresh_line (pfile))
1907 	    {
1908 	      location_t src_loc = token->src_loc;
1909 	      token->type = CPP_EOF;
1910 	      /* Tell the compiler the line number of the EOF token.  */
1911 	      token->src_loc = pfile->line_table->highest_line;
1912 	      token->flags = BOL;
1913 	      if (first_buff != NULL)
1914 		_cpp_release_buff (pfile, first_buff);
1915 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1916 				   "unterminated raw string");
1917 	      return;
1918 	    }
1919 
1920 	  cur = base = pfile->buffer->cur;
1921 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
1922 	}
1923     }
1924 
1925   if (CPP_OPTION (pfile, user_literals))
1926     {
1927       /* If a string format macro, say from inttypes.h, is placed touching
1928 	 a string literal it could be parsed as a C++11 user-defined string
1929 	 literal thus breaking the program.  */
1930       if (is_macro_not_literal_suffix (pfile, cur))
1931 	{
1932 	  /* Raise a warning, but do not consume subsequent tokens.  */
1933 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1934 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1935 				   token->src_loc, 0,
1936 				   "invalid suffix on literal; C++11 requires "
1937 				   "a space between literal and string macro");
1938 	}
1939       /* Grab user defined literal suffix.  */
1940       else if (ISIDST (*cur))
1941 	{
1942 	  type = cpp_userdef_string_add_type (type);
1943 	  ++cur;
1944 
1945 	  while (ISIDNUM (*cur))
1946 	    ++cur;
1947 	}
1948     }
1949 
1950   pfile->buffer->cur = cur;
1951   if (first_buff == NULL)
1952     create_literal (pfile, token, base, cur - base, type);
1953   else
1954     {
1955       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1956 
1957       token->type = type;
1958       token->val.str.len = total_len + (cur - base);
1959       token->val.str.text = dest;
1960       last_buff = first_buff;
1961       while (last_buff != NULL)
1962 	{
1963 	  memcpy (dest, last_buff->base,
1964 		  BUFF_FRONT (last_buff) - last_buff->base);
1965 	  dest += BUFF_FRONT (last_buff) - last_buff->base;
1966 	  last_buff = last_buff->next;
1967 	}
1968       _cpp_release_buff (pfile, first_buff);
1969       memcpy (dest, base, cur - base);
1970       dest[cur - base] = '\0';
1971     }
1972 }
1973 
1974 /* Lexes a string, character constant, or angle-bracketed header file
1975    name.  The stored string contains the spelling, including opening
1976    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1977    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1978    if it was not properly terminated, or CPP_LESS for an unterminated
1979    header name which must be relexed as normal tokens.
1980 
1981    The spelling is NUL-terminated, but it is not guaranteed that this
1982    is the first NUL since embedded NULs are preserved.  */
1983 static void
1984 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1985 {
1986   bool saw_NUL = false;
1987   const uchar *cur;
1988   cppchar_t terminator;
1989   enum cpp_ttype type;
1990 
1991   cur = base;
1992   terminator = *cur++;
1993   if (terminator == 'L' || terminator == 'U')
1994     terminator = *cur++;
1995   else if (terminator == 'u')
1996     {
1997       terminator = *cur++;
1998       if (terminator == '8')
1999 	terminator = *cur++;
2000     }
2001   if (terminator == 'R')
2002     {
2003       lex_raw_string (pfile, token, base, cur);
2004       return;
2005     }
2006   if (terminator == '"')
2007     type = (*base == 'L' ? CPP_WSTRING :
2008 	    *base == 'U' ? CPP_STRING32 :
2009 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
2010 			 : CPP_STRING);
2011   else if (terminator == '\'')
2012     type = (*base == 'L' ? CPP_WCHAR :
2013 	    *base == 'U' ? CPP_CHAR32 :
2014 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
2015 			 : CPP_CHAR);
2016   else
2017     terminator = '>', type = CPP_HEADER_NAME;
2018 
2019   for (;;)
2020     {
2021       cppchar_t c = *cur++;
2022 
2023       /* In #include-style directives, terminators are not escapable.  */
2024       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
2025 	cur++;
2026       else if (c == terminator)
2027 	break;
2028       else if (c == '\n')
2029 	{
2030 	  cur--;
2031 	  /* Unmatched quotes always yield undefined behavior, but
2032 	     greedy lexing means that what appears to be an unterminated
2033 	     header name may actually be a legitimate sequence of tokens.  */
2034 	  if (terminator == '>')
2035 	    {
2036 	      token->type = CPP_LESS;
2037 	      return;
2038 	    }
2039 	  type = CPP_OTHER;
2040 	  break;
2041 	}
2042       else if (c == '\0')
2043 	saw_NUL = true;
2044     }
2045 
2046   if (saw_NUL && !pfile->state.skipping)
2047     cpp_error (pfile, CPP_DL_WARNING,
2048 	       "null character(s) preserved in literal");
2049 
2050   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
2051     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
2052 	       (int) terminator);
2053 
2054   if (CPP_OPTION (pfile, user_literals))
2055     {
2056       /* If a string format macro, say from inttypes.h, is placed touching
2057 	 a string literal it could be parsed as a C++11 user-defined string
2058 	 literal thus breaking the program.  */
2059       if (is_macro_not_literal_suffix (pfile, cur))
2060 	{
2061 	  /* Raise a warning, but do not consume subsequent tokens.  */
2062 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
2063 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
2064 				   token->src_loc, 0,
2065 				   "invalid suffix on literal; C++11 requires "
2066 				   "a space between literal and string macro");
2067 	}
2068       /* Grab user defined literal suffix.  */
2069       else if (ISIDST (*cur))
2070 	{
2071 	  type = cpp_userdef_char_add_type (type);
2072 	  type = cpp_userdef_string_add_type (type);
2073           ++cur;
2074 
2075 	  while (ISIDNUM (*cur))
2076 	    ++cur;
2077 	}
2078     }
2079   else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
2080 	   && is_macro (pfile, cur)
2081 	   && !pfile->state.skipping)
2082     cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
2083 			   token->src_loc, 0, "C++11 requires a space "
2084 			   "between string literal and macro");
2085 
2086   pfile->buffer->cur = cur;
2087   create_literal (pfile, token, base, cur - base, type);
2088 }
2089 
2090 /* Return the comment table. The client may not make any assumption
2091    about the ordering of the table.  */
2092 cpp_comment_table *
2093 cpp_get_comments (cpp_reader *pfile)
2094 {
2095   return &pfile->comments;
2096 }
2097 
2098 /* Append a comment to the end of the comment table. */
2099 static void
2100 store_comment (cpp_reader *pfile, cpp_token *token)
2101 {
2102   int len;
2103 
2104   if (pfile->comments.allocated == 0)
2105     {
2106       pfile->comments.allocated = 256;
2107       pfile->comments.entries = (cpp_comment *) xmalloc
2108 	(pfile->comments.allocated * sizeof (cpp_comment));
2109     }
2110 
2111   if (pfile->comments.count == pfile->comments.allocated)
2112     {
2113       pfile->comments.allocated *= 2;
2114       pfile->comments.entries = (cpp_comment *) xrealloc
2115 	(pfile->comments.entries,
2116 	 pfile->comments.allocated * sizeof (cpp_comment));
2117     }
2118 
2119   len = token->val.str.len;
2120 
2121   /* Copy comment. Note, token may not be NULL terminated. */
2122   pfile->comments.entries[pfile->comments.count].comment =
2123     (char *) xmalloc (sizeof (char) * (len + 1));
2124   memcpy (pfile->comments.entries[pfile->comments.count].comment,
2125 	  token->val.str.text, len);
2126   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
2127 
2128   /* Set source location. */
2129   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
2130 
2131   /* Increment the count of entries in the comment table. */
2132   pfile->comments.count++;
2133 }
2134 
2135 /* The stored comment includes the comment start and any terminator.  */
2136 static void
2137 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
2138 	      cppchar_t type)
2139 {
2140   unsigned char *buffer;
2141   unsigned int len, clen, i;
2142   int convert_to_c = (pfile->state.in_directive || pfile->state.parsing_args)
2143     && type == '/';
2144 
2145   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
2146 
2147   /* C++ comments probably (not definitely) have moved past a new
2148      line, which we don't want to save in the comment.  */
2149   if (is_vspace (pfile->buffer->cur[-1]))
2150     len--;
2151 
2152   /* If we are currently in a directive or in argument parsing, then
2153      we need to store all C++ comments as C comments internally, and
2154      so we need to allocate a little extra space in that case.
2155 
2156      Note that the only time we encounter a directive here is
2157      when we are saving comments in a "#define".  */
2158   clen = convert_to_c ? len + 2 : len;
2159 
2160   buffer = _cpp_unaligned_alloc (pfile, clen);
2161 
2162   token->type = CPP_COMMENT;
2163   token->val.str.len = clen;
2164   token->val.str.text = buffer;
2165 
2166   buffer[0] = '/';
2167   memcpy (buffer + 1, from, len - 1);
2168 
2169   /* Finish conversion to a C comment, if necessary.  */
2170   if (convert_to_c)
2171     {
2172       buffer[1] = '*';
2173       buffer[clen - 2] = '*';
2174       buffer[clen - 1] = '/';
2175       /* As there can be in a C++ comments illegal sequences for C comments
2176          we need to filter them out.  */
2177       for (i = 2; i < (clen - 2); i++)
2178         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2179           buffer[i] = '|';
2180     }
2181 
2182   /* Finally store this comment for use by clients of libcpp. */
2183   store_comment (pfile, token);
2184 }
2185 
2186 /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
2187    comment.  */
2188 
2189 static bool
2190 fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
2191 {
2192   const unsigned char *from = comment_start + 1;
2193 
2194   switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
2195     {
2196       /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
2197 	 don't recognize any comments.  The latter only checks attributes,
2198 	 the former doesn't warn.  */
2199     case 0:
2200     default:
2201       return false;
2202       /* -Wimplicit-fallthrough=1 considers any comment, no matter what
2203 	 content it has.  */
2204     case 1:
2205       return true;
2206     case 2:
2207       /* -Wimplicit-fallthrough=2 looks for (case insensitive)
2208 	 .*falls?[ \t-]*thr(u|ough).* regex.  */
2209       for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
2210 	   from++)
2211 	{
2212 	  /* Is there anything like strpbrk with upper boundary, or
2213 	     memchr looking for 2 characters rather than just one?  */
2214 	  if (from[0] != 'f' && from[0] != 'F')
2215 	    continue;
2216 	  if (from[1] != 'a' && from[1] != 'A')
2217 	    continue;
2218 	  if (from[2] != 'l' && from[2] != 'L')
2219 	    continue;
2220 	  if (from[3] != 'l' && from[3] != 'L')
2221 	    continue;
2222 	  from += sizeof "fall" - 1;
2223 	  if (from[0] == 's' || from[0] == 'S')
2224 	    from++;
2225 	  while (*from == ' ' || *from == '\t' || *from == '-')
2226 	    from++;
2227 	  if (from[0] != 't' && from[0] != 'T')
2228 	    continue;
2229 	  if (from[1] != 'h' && from[1] != 'H')
2230 	    continue;
2231 	  if (from[2] != 'r' && from[2] != 'R')
2232 	    continue;
2233 	  if (from[3] == 'u' || from[3] == 'U')
2234 	    return true;
2235 	  if (from[3] != 'o' && from[3] != 'O')
2236 	    continue;
2237 	  if (from[4] != 'u' && from[4] != 'U')
2238 	    continue;
2239 	  if (from[5] != 'g' && from[5] != 'G')
2240 	    continue;
2241 	  if (from[6] != 'h' && from[6] != 'H')
2242 	    continue;
2243 	  return true;
2244 	}
2245       return false;
2246     case 3:
2247     case 4:
2248       break;
2249     }
2250 
2251   /* Whole comment contents:
2252      -fallthrough
2253      @fallthrough@
2254    */
2255   if (*from == '-' || *from == '@')
2256     {
2257       size_t len = sizeof "fallthrough" - 1;
2258       if ((size_t) (pfile->buffer->cur - from - 1) < len)
2259 	return false;
2260       if (memcmp (from + 1, "fallthrough", len))
2261 	return false;
2262       if (*from == '@')
2263 	{
2264 	  if (from[len + 1] != '@')
2265 	    return false;
2266 	  len++;
2267 	}
2268       from += 1 + len;
2269     }
2270   /* Whole comment contents (regex):
2271      lint -fallthrough[ \t]*
2272    */
2273   else if (*from == 'l')
2274     {
2275       size_t len = sizeof "int -fallthrough" - 1;
2276       if ((size_t) (pfile->buffer->cur - from - 1) < len)
2277 	return false;
2278       if (memcmp (from + 1, "int -fallthrough", len))
2279 	return false;
2280       from += 1 + len;
2281       while (*from == ' ' || *from == '\t')
2282 	from++;
2283     }
2284   /* Whole comment contents (regex):
2285      [ \t]*FALLTHR(U|OUGH)[ \t]*
2286    */
2287   else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
2288     {
2289       while (*from == ' ' || *from == '\t')
2290 	from++;
2291       if ((size_t) (pfile->buffer->cur - from)  < sizeof "FALLTHRU" - 1)
2292 	return false;
2293       if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
2294 	return false;
2295       from += sizeof "FALLTHR" - 1;
2296       if (*from == 'U')
2297 	from++;
2298       else if ((size_t) (pfile->buffer->cur - from)  < sizeof "OUGH" - 1)
2299 	return false;
2300       else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
2301 	return false;
2302       else
2303 	from += sizeof "OUGH" - 1;
2304       while (*from == ' ' || *from == '\t')
2305 	from++;
2306     }
2307   /* Whole comment contents (regex):
2308      [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
2309      [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
2310      [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
2311    */
2312   else
2313     {
2314       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
2315 	from++;
2316       unsigned char f = *from;
2317       bool all_upper = false;
2318       if (f == 'E' || f == 'e')
2319 	{
2320 	  if ((size_t) (pfile->buffer->cur - from)
2321 	      < sizeof "else fallthru" - 1)
2322 	    return false;
2323 	  if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
2324 	    all_upper = true;
2325 	  else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
2326 	    return false;
2327 	  from += sizeof "else" - 1;
2328 	  if (*from == ',')
2329 	    from++;
2330 	  if (*from != ' ')
2331 	    return false;
2332 	  from++;
2333 	  if (all_upper && *from == 'f')
2334 	    return false;
2335 	  if (f == 'e' && *from == 'F')
2336 	    return false;
2337 	  f = *from;
2338 	}
2339       else if (f == 'I' || f == 'i')
2340 	{
2341 	  if ((size_t) (pfile->buffer->cur - from)
2342 	      < sizeof "intentional fallthru" - 1)
2343 	    return false;
2344 	  if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
2345 				  sizeof "NTENTIONAL" - 1) == 0)
2346 	    all_upper = true;
2347 	  else if (memcmp (from + 1, "ntentional",
2348 			   sizeof "ntentional" - 1))
2349 	    return false;
2350 	  from += sizeof "intentional" - 1;
2351 	  if (*from == ' ')
2352 	    {
2353 	      from++;
2354 	      if (all_upper && *from == 'f')
2355 		return false;
2356 	    }
2357 	  else if (all_upper)
2358 	    {
2359 	      if (memcmp (from, "LY F", sizeof "LY F" - 1))
2360 		return false;
2361 	      from += sizeof "LY " - 1;
2362 	    }
2363 	  else
2364 	    {
2365 	      if (memcmp (from, "ly ", sizeof "ly " - 1))
2366 		return false;
2367 	      from += sizeof "ly " - 1;
2368 	    }
2369 	  if (f == 'i' && *from == 'F')
2370 	    return false;
2371 	  f = *from;
2372 	}
2373       if (f != 'F' && f != 'f')
2374 	return false;
2375       if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
2376 	return false;
2377       if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
2378 	all_upper = true;
2379       else if (all_upper)
2380 	return false;
2381       else if (memcmp (from + 1, "all", sizeof "all" - 1))
2382 	return false;
2383       from += sizeof "fall" - 1;
2384       if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
2385 	from += 2;
2386       else if (*from == ' ' || *from == '-')
2387 	from++;
2388       else if (*from != (all_upper ? 'T' : 't'))
2389 	return false;
2390       if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
2391 	return false;
2392       if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
2393 	return false;
2394       if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
2395 	{
2396 	  if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
2397 	    return false;
2398 	  if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
2399 		      sizeof "hrough" - 1))
2400 	    return false;
2401 	  from += sizeof "through" - 1;
2402 	}
2403       else
2404 	from += sizeof "thru" - 1;
2405       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
2406 	from++;
2407       if (*from == '-')
2408 	{
2409 	  from++;
2410 	  if (*comment_start == '*')
2411 	    {
2412 	      do
2413 		{
2414 		  while (*from && *from != '*'
2415 			 && *from != '\n' && *from != '\r')
2416 		    from++;
2417 		  if (*from != '*' || from[1] == '/')
2418 		    break;
2419 		  from++;
2420 		}
2421 	      while (1);
2422 	    }
2423 	  else
2424 	    while (*from && *from != '\n' && *from != '\r')
2425 	      from++;
2426 	}
2427     }
2428   /* C block comment.  */
2429   if (*comment_start == '*')
2430     {
2431       if (*from != '*' || from[1] != '/')
2432 	return false;
2433     }
2434   /* C++ line comment.  */
2435   else if (*from != '\n')
2436     return false;
2437 
2438   return true;
2439 }
2440 
2441 /* Allocate COUNT tokens for RUN.  */
2442 void
2443 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2444 {
2445   run->base = XNEWVEC (cpp_token, count);
2446   run->limit = run->base + count;
2447   run->next = NULL;
2448 }
2449 
2450 /* Returns the next tokenrun, or creates one if there is none.  */
2451 static tokenrun *
2452 next_tokenrun (tokenrun *run)
2453 {
2454   if (run->next == NULL)
2455     {
2456       run->next = XNEW (tokenrun);
2457       run->next->prev = run;
2458       _cpp_init_tokenrun (run->next, 250);
2459     }
2460 
2461   return run->next;
2462 }
2463 
2464 /* Return the number of not yet processed token in a given
2465    context.  */
2466 int
2467 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2468 {
2469   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2470     return (LAST (context).token - FIRST (context).token);
2471   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2472 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2473     return (LAST (context).ptoken - FIRST (context).ptoken);
2474   else
2475       abort ();
2476 }
2477 
2478 /* Returns the token present at index INDEX in a given context.  If
2479    INDEX is zero, the next token to be processed is returned.  */
2480 static const cpp_token*
2481 _cpp_token_from_context_at (cpp_context *context, int index)
2482 {
2483   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2484     return &(FIRST (context).token[index]);
2485   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2486 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2487     return FIRST (context).ptoken[index];
2488  else
2489    abort ();
2490 }
2491 
2492 /* Look ahead in the input stream.  */
2493 const cpp_token *
2494 cpp_peek_token (cpp_reader *pfile, int index)
2495 {
2496   cpp_context *context = pfile->context;
2497   const cpp_token *peektok;
2498   int count;
2499 
2500   /* First, scan through any pending cpp_context objects.  */
2501   while (context->prev)
2502     {
2503       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2504 
2505       if (index < (int) sz)
2506         return _cpp_token_from_context_at (context, index);
2507       index -= (int) sz;
2508       context = context->prev;
2509     }
2510 
2511   /* We will have to read some new tokens after all (and do so
2512      without invalidating preceding tokens).  */
2513   count = index;
2514   pfile->keep_tokens++;
2515 
2516   /* For peeked tokens temporarily disable line_change reporting,
2517      until the tokens are parsed for real.  */
2518   void (*line_change) (cpp_reader *, const cpp_token *, int)
2519     = pfile->cb.line_change;
2520   pfile->cb.line_change = NULL;
2521 
2522   do
2523     {
2524       peektok = _cpp_lex_token (pfile);
2525       if (peektok->type == CPP_EOF)
2526 	{
2527 	  index--;
2528 	  break;
2529 	}
2530     }
2531   while (index--);
2532 
2533   _cpp_backup_tokens_direct (pfile, count - index);
2534   pfile->keep_tokens--;
2535   pfile->cb.line_change = line_change;
2536 
2537   return peektok;
2538 }
2539 
2540 /* Allocate a single token that is invalidated at the same time as the
2541    rest of the tokens on the line.  Has its line and col set to the
2542    same as the last lexed token, so that diagnostics appear in the
2543    right place.  */
2544 cpp_token *
2545 _cpp_temp_token (cpp_reader *pfile)
2546 {
2547   cpp_token *old, *result;
2548   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2549   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2550 
2551   old = pfile->cur_token - 1;
2552   /* Any pre-existing lookaheads must not be clobbered.  */
2553   if (la)
2554     {
2555       if (sz <= la)
2556         {
2557           tokenrun *next = next_tokenrun (pfile->cur_run);
2558 
2559           if (sz < la)
2560             memmove (next->base + 1, next->base,
2561                      (la - sz) * sizeof (cpp_token));
2562 
2563           next->base[0] = pfile->cur_run->limit[-1];
2564         }
2565 
2566       if (sz > 1)
2567         memmove (pfile->cur_token + 1, pfile->cur_token,
2568                  MIN (la, sz - 1) * sizeof (cpp_token));
2569     }
2570 
2571   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2572     {
2573       pfile->cur_run = next_tokenrun (pfile->cur_run);
2574       pfile->cur_token = pfile->cur_run->base;
2575     }
2576 
2577   result = pfile->cur_token++;
2578   result->src_loc = old->src_loc;
2579   return result;
2580 }
2581 
2582 /* Lex a token into RESULT (external interface).  Takes care of issues
2583    like directive handling, token lookahead, multiple include
2584    optimization and skipping.  */
2585 const cpp_token *
2586 _cpp_lex_token (cpp_reader *pfile)
2587 {
2588   cpp_token *result;
2589 
2590   for (;;)
2591     {
2592       if (pfile->cur_token == pfile->cur_run->limit)
2593 	{
2594 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
2595 	  pfile->cur_token = pfile->cur_run->base;
2596 	}
2597       /* We assume that the current token is somewhere in the current
2598 	 run.  */
2599       if (pfile->cur_token < pfile->cur_run->base
2600 	  || pfile->cur_token >= pfile->cur_run->limit)
2601 	abort ();
2602 
2603       if (pfile->lookaheads)
2604 	{
2605 	  pfile->lookaheads--;
2606 	  result = pfile->cur_token++;
2607 	}
2608       else
2609 	result = _cpp_lex_direct (pfile);
2610 
2611       if (result->flags & BOL)
2612 	{
2613 	  /* Is this a directive.  If _cpp_handle_directive returns
2614 	     false, it is an assembler #.  */
2615 	  if (result->type == CPP_HASH
2616 	      /* 6.10.3 p 11: Directives in a list of macro arguments
2617 		 gives undefined behavior.  This implementation
2618 		 handles the directive as normal.  */
2619 	      && pfile->state.parsing_args != 1)
2620 	    {
2621 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2622 		{
2623 		  if (pfile->directive_result.type == CPP_PADDING)
2624 		    continue;
2625 		  result = &pfile->directive_result;
2626 		}
2627 	    }
2628 	  else if (pfile->state.in_deferred_pragma)
2629 	    result = &pfile->directive_result;
2630 
2631 	  if (pfile->cb.line_change && !pfile->state.skipping)
2632 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2633 	}
2634 
2635       /* We don't skip tokens in directives.  */
2636       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2637 	break;
2638 
2639       /* Outside a directive, invalidate controlling macros.  At file
2640 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2641 	 get here and MI optimization works.  */
2642       pfile->mi_valid = false;
2643 
2644       if (!pfile->state.skipping || result->type == CPP_EOF)
2645 	break;
2646     }
2647 
2648   return result;
2649 }
2650 
2651 /* Returns true if a fresh line has been loaded.  */
2652 bool
2653 _cpp_get_fresh_line (cpp_reader *pfile)
2654 {
2655   int return_at_eof;
2656 
2657   /* We can't get a new line until we leave the current directive.  */
2658   if (pfile->state.in_directive)
2659     return false;
2660 
2661   for (;;)
2662     {
2663       cpp_buffer *buffer = pfile->buffer;
2664 
2665       if (!buffer->need_line)
2666 	return true;
2667 
2668       if (buffer->next_line < buffer->rlimit)
2669 	{
2670 	  _cpp_clean_line (pfile);
2671 	  return true;
2672 	}
2673 
2674       /* First, get out of parsing arguments state.  */
2675       if (pfile->state.parsing_args)
2676 	return false;
2677 
2678       /* End of buffer.  Non-empty files should end in a newline.  */
2679       if (buffer->buf != buffer->rlimit
2680 	  && buffer->next_line > buffer->rlimit
2681 	  && !buffer->from_stage3)
2682 	{
2683 	  /* Clip to buffer size.  */
2684 	  buffer->next_line = buffer->rlimit;
2685 	}
2686 
2687       return_at_eof = buffer->return_at_eof;
2688       _cpp_pop_buffer (pfile);
2689       if (pfile->buffer == NULL || return_at_eof)
2690 	return false;
2691     }
2692 }
2693 
2694 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
2695   do							\
2696     {							\
2697       result->type = ELSE_TYPE;				\
2698       if (*buffer->cur == CHAR)				\
2699 	buffer->cur++, result->type = THEN_TYPE;	\
2700     }							\
2701   while (0)
2702 
2703 /* Lex a token into pfile->cur_token, which is also incremented, to
2704    get diagnostics pointing to the correct location.
2705 
2706    Does not handle issues such as token lookahead, multiple-include
2707    optimization, directives, skipping etc.  This function is only
2708    suitable for use by _cpp_lex_token, and in special cases like
2709    lex_expansion_token which doesn't care for any of these issues.
2710 
2711    When meeting a newline, returns CPP_EOF if parsing a directive,
2712    otherwise returns to the start of the token buffer if permissible.
2713    Returns the location of the lexed token.  */
2714 cpp_token *
2715 _cpp_lex_direct (cpp_reader *pfile)
2716 {
2717   cppchar_t c;
2718   cpp_buffer *buffer;
2719   const unsigned char *comment_start;
2720   bool fallthrough_comment = false;
2721   cpp_token *result = pfile->cur_token++;
2722 
2723  fresh_line:
2724   result->flags = 0;
2725   buffer = pfile->buffer;
2726   if (buffer->need_line)
2727     {
2728       if (pfile->state.in_deferred_pragma)
2729 	{
2730 	  result->type = CPP_PRAGMA_EOL;
2731 	  pfile->state.in_deferred_pragma = false;
2732 	  if (!pfile->state.pragma_allow_expansion)
2733 	    pfile->state.prevent_expansion--;
2734 	  return result;
2735 	}
2736       if (!_cpp_get_fresh_line (pfile))
2737 	{
2738 	  result->type = CPP_EOF;
2739 	  if (!pfile->state.in_directive)
2740 	    {
2741 	      /* Tell the compiler the line number of the EOF token.  */
2742 	      result->src_loc = pfile->line_table->highest_line;
2743 	      result->flags = BOL;
2744 	    }
2745 	  return result;
2746 	}
2747       if (buffer != pfile->buffer)
2748 	fallthrough_comment = false;
2749       if (!pfile->keep_tokens)
2750 	{
2751 	  pfile->cur_run = &pfile->base_run;
2752 	  result = pfile->base_run.base;
2753 	  pfile->cur_token = result + 1;
2754 	}
2755       result->flags = BOL;
2756       if (pfile->state.parsing_args == 2)
2757 	result->flags |= PREV_WHITE;
2758     }
2759   buffer = pfile->buffer;
2760  update_tokens_line:
2761   result->src_loc = pfile->line_table->highest_line;
2762 
2763  skipped_white:
2764   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2765       && !pfile->overlaid_buffer)
2766     {
2767       _cpp_process_line_notes (pfile, false);
2768       result->src_loc = pfile->line_table->highest_line;
2769     }
2770   c = *buffer->cur++;
2771 
2772   if (pfile->forced_token_location)
2773     result->src_loc = pfile->forced_token_location;
2774   else
2775     result->src_loc = linemap_position_for_column (pfile->line_table,
2776 					  CPP_BUF_COLUMN (buffer, buffer->cur));
2777 
2778   switch (c)
2779     {
2780     case ' ': case '\t': case '\f': case '\v': case '\0':
2781       result->flags |= PREV_WHITE;
2782       skip_whitespace (pfile, c);
2783       goto skipped_white;
2784 
2785     case '\n':
2786       /* Increment the line, unless this is the last line ...  */
2787       if (buffer->cur < buffer->rlimit
2788 	  /* ... or this is a #include, (where _cpp_stack_file needs to
2789 	     unwind by one line) ...  */
2790 	  || (pfile->state.in_directive > 1
2791 	      /* ... except traditional-cpp increments this elsewhere.  */
2792 	      && !CPP_OPTION (pfile, traditional)))
2793 	CPP_INCREMENT_LINE (pfile, 0);
2794       buffer->need_line = true;
2795       goto fresh_line;
2796 
2797     case '0': case '1': case '2': case '3': case '4':
2798     case '5': case '6': case '7': case '8': case '9':
2799       {
2800 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2801 	result->type = CPP_NUMBER;
2802 	lex_number (pfile, &result->val.str, &nst);
2803 	warn_about_normalization (pfile, result, &nst);
2804 	break;
2805       }
2806 
2807     case 'L':
2808     case 'u':
2809     case 'U':
2810     case 'R':
2811       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2812 	 wide strings or raw strings.  */
2813       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2814 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2815 	{
2816 	  if ((*buffer->cur == '\'' && c != 'R')
2817 	      || *buffer->cur == '"'
2818 	      || (*buffer->cur == 'R'
2819 		  && c != 'R'
2820 		  && buffer->cur[1] == '"'
2821 		  && CPP_OPTION (pfile, rliterals))
2822 	      || (*buffer->cur == '8'
2823 		  && c == 'u'
2824 		  && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
2825 				&& CPP_OPTION (pfile, utf8_char_literals)))
2826 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2827 			  && CPP_OPTION (pfile, rliterals)))))
2828 	    {
2829 	      lex_string (pfile, result, buffer->cur - 1);
2830 	      break;
2831 	    }
2832 	}
2833       /* Fall through.  */
2834 
2835     case '_':
2836     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2837     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2838     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2839     case 's': case 't':           case 'v': case 'w': case 'x':
2840     case 'y': case 'z':
2841     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2842     case 'G': case 'H': case 'I': case 'J': case 'K':
2843     case 'M': case 'N': case 'O': case 'P': case 'Q':
2844     case 'S': case 'T':           case 'V': case 'W': case 'X':
2845     case 'Y': case 'Z':
2846       result->type = CPP_NAME;
2847       {
2848 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2849 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2850 						&nst,
2851 						&result->val.node.spelling);
2852 	warn_about_normalization (pfile, result, &nst);
2853       }
2854 
2855       /* Convert named operators to their proper types.  */
2856       if (result->val.node.node->flags & NODE_OPERATOR)
2857 	{
2858 	  result->flags |= NAMED_OP;
2859 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2860 	}
2861 
2862       /* Signal FALLTHROUGH comment followed by another token.  */
2863       if (fallthrough_comment)
2864 	result->flags |= PREV_FALLTHROUGH;
2865       break;
2866 
2867     case '\'':
2868     case '"':
2869       lex_string (pfile, result, buffer->cur - 1);
2870       break;
2871 
2872     case '/':
2873       /* A potential block or line comment.  */
2874       comment_start = buffer->cur;
2875       c = *buffer->cur;
2876 
2877       if (c == '*')
2878 	{
2879 	  if (_cpp_skip_block_comment (pfile))
2880 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2881 	}
2882       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2883 	{
2884 	  /* Don't warn for system headers.  */
2885 	  if (cpp_in_system_header (pfile))
2886 	    ;
2887 	  /* Warn about comments if pedantically GNUC89, and not
2888 	     in system headers.  */
2889 	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2890 		   && CPP_PEDANTIC (pfile)
2891 		   && ! buffer->warned_cplusplus_comments)
2892 	    {
2893 	      if (cpp_error (pfile, CPP_DL_PEDWARN,
2894 			     "C++ style comments are not allowed in ISO C90"))
2895 		cpp_error (pfile, CPP_DL_NOTE,
2896 			   "(this will be reported only once per input file)");
2897 	      buffer->warned_cplusplus_comments = 1;
2898 	    }
2899 	  /* Or if specifically desired via -Wc90-c99-compat.  */
2900 	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2901 		   && ! CPP_OPTION (pfile, cplusplus)
2902 		   && ! buffer->warned_cplusplus_comments)
2903 	    {
2904 	      if (cpp_error (pfile, CPP_DL_WARNING,
2905 			     "C++ style comments are incompatible with C90"))
2906 		cpp_error (pfile, CPP_DL_NOTE,
2907 			   "(this will be reported only once per input file)");
2908 	      buffer->warned_cplusplus_comments = 1;
2909 	    }
2910 	  /* In C89/C94, C++ style comments are forbidden.  */
2911 	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2912 		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
2913 	    {
2914 	      /* But don't be confused about valid code such as
2915 	         - // immediately followed by *,
2916 		 - // in a preprocessing directive,
2917 		 - // in an #if 0 block.  */
2918 	      if (buffer->cur[1] == '*'
2919 		  || pfile->state.in_directive
2920 		  || pfile->state.skipping)
2921 		{
2922 		  result->type = CPP_DIV;
2923 		  break;
2924 		}
2925 	      else if (! buffer->warned_cplusplus_comments)
2926 		{
2927 		  if (cpp_error (pfile, CPP_DL_ERROR,
2928 				 "C++ style comments are not allowed in "
2929 				 "ISO C90"))
2930 		    cpp_error (pfile, CPP_DL_NOTE,
2931 			       "(this will be reported only once per input "
2932 			       "file)");
2933 		  buffer->warned_cplusplus_comments = 1;
2934 		}
2935 	    }
2936 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2937 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2938 	}
2939       else if (c == '=')
2940 	{
2941 	  buffer->cur++;
2942 	  result->type = CPP_DIV_EQ;
2943 	  break;
2944 	}
2945       else
2946 	{
2947 	  result->type = CPP_DIV;
2948 	  break;
2949 	}
2950 
2951       if (fallthrough_comment_p (pfile, comment_start))
2952 	fallthrough_comment = true;
2953 
2954       if (pfile->cb.comment)
2955 	{
2956 	  size_t len = pfile->buffer->cur - comment_start;
2957 	  pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
2958 			     len + 1);
2959 	}
2960 
2961       if (!pfile->state.save_comments)
2962 	{
2963 	  result->flags |= PREV_WHITE;
2964 	  goto update_tokens_line;
2965 	}
2966 
2967       if (fallthrough_comment)
2968 	result->flags |= PREV_FALLTHROUGH;
2969 
2970       /* Save the comment as a token in its own right.  */
2971       save_comment (pfile, result, comment_start, c);
2972       break;
2973 
2974     case '<':
2975       if (pfile->state.angled_headers)
2976 	{
2977 	  lex_string (pfile, result, buffer->cur - 1);
2978 	  if (result->type != CPP_LESS)
2979 	    break;
2980 	}
2981 
2982       result->type = CPP_LESS;
2983       if (*buffer->cur == '=')
2984 	{
2985 	  buffer->cur++, result->type = CPP_LESS_EQ;
2986 	  if (*buffer->cur == '>'
2987 	      && CPP_OPTION (pfile, cplusplus)
2988 	      && CPP_OPTION (pfile, lang) >= CLK_GNUCXX2A)
2989 	    buffer->cur++, result->type = CPP_SPACESHIP;
2990 	}
2991       else if (*buffer->cur == '<')
2992 	{
2993 	  buffer->cur++;
2994 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2995 	}
2996       else if (CPP_OPTION (pfile, digraphs))
2997 	{
2998 	  if (*buffer->cur == ':')
2999 	    {
3000 	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
3001 		 three characters are <:: and the subsequent character
3002 		 is neither : nor >, the < is treated as a preprocessor
3003 		 token by itself".  */
3004 	      if (CPP_OPTION (pfile, cplusplus)
3005 		  && CPP_OPTION (pfile, lang) != CLK_CXX98
3006 		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
3007 		  && buffer->cur[1] == ':'
3008 		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
3009 		break;
3010 
3011 	      buffer->cur++;
3012 	      result->flags |= DIGRAPH;
3013 	      result->type = CPP_OPEN_SQUARE;
3014 	    }
3015 	  else if (*buffer->cur == '%')
3016 	    {
3017 	      buffer->cur++;
3018 	      result->flags |= DIGRAPH;
3019 	      result->type = CPP_OPEN_BRACE;
3020 	    }
3021 	}
3022       break;
3023 
3024     case '>':
3025       result->type = CPP_GREATER;
3026       if (*buffer->cur == '=')
3027 	buffer->cur++, result->type = CPP_GREATER_EQ;
3028       else if (*buffer->cur == '>')
3029 	{
3030 	  buffer->cur++;
3031 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
3032 	}
3033       break;
3034 
3035     case '%':
3036       result->type = CPP_MOD;
3037       if (*buffer->cur == '=')
3038 	buffer->cur++, result->type = CPP_MOD_EQ;
3039       else if (CPP_OPTION (pfile, digraphs))
3040 	{
3041 	  if (*buffer->cur == ':')
3042 	    {
3043 	      buffer->cur++;
3044 	      result->flags |= DIGRAPH;
3045 	      result->type = CPP_HASH;
3046 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
3047 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
3048 	    }
3049 	  else if (*buffer->cur == '>')
3050 	    {
3051 	      buffer->cur++;
3052 	      result->flags |= DIGRAPH;
3053 	      result->type = CPP_CLOSE_BRACE;
3054 	    }
3055 	}
3056       break;
3057 
3058     case '.':
3059       result->type = CPP_DOT;
3060       if (ISDIGIT (*buffer->cur))
3061 	{
3062 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
3063 	  result->type = CPP_NUMBER;
3064 	  lex_number (pfile, &result->val.str, &nst);
3065 	  warn_about_normalization (pfile, result, &nst);
3066 	}
3067       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
3068 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
3069       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
3070 	buffer->cur++, result->type = CPP_DOT_STAR;
3071       break;
3072 
3073     case '+':
3074       result->type = CPP_PLUS;
3075       if (*buffer->cur == '+')
3076 	buffer->cur++, result->type = CPP_PLUS_PLUS;
3077       else if (*buffer->cur == '=')
3078 	buffer->cur++, result->type = CPP_PLUS_EQ;
3079       break;
3080 
3081     case '-':
3082       result->type = CPP_MINUS;
3083       if (*buffer->cur == '>')
3084 	{
3085 	  buffer->cur++;
3086 	  result->type = CPP_DEREF;
3087 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
3088 	    buffer->cur++, result->type = CPP_DEREF_STAR;
3089 	}
3090       else if (*buffer->cur == '-')
3091 	buffer->cur++, result->type = CPP_MINUS_MINUS;
3092       else if (*buffer->cur == '=')
3093 	buffer->cur++, result->type = CPP_MINUS_EQ;
3094       break;
3095 
3096     case '&':
3097       result->type = CPP_AND;
3098       if (*buffer->cur == '&')
3099 	buffer->cur++, result->type = CPP_AND_AND;
3100       else if (*buffer->cur == '=')
3101 	buffer->cur++, result->type = CPP_AND_EQ;
3102       break;
3103 
3104     case '|':
3105       result->type = CPP_OR;
3106       if (*buffer->cur == '|')
3107 	buffer->cur++, result->type = CPP_OR_OR;
3108       else if (*buffer->cur == '=')
3109 	buffer->cur++, result->type = CPP_OR_EQ;
3110       break;
3111 
3112     case ':':
3113       result->type = CPP_COLON;
3114       if (*buffer->cur == ':' && CPP_OPTION (pfile, scope))
3115 	buffer->cur++, result->type = CPP_SCOPE;
3116       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
3117 	{
3118 	  buffer->cur++;
3119 	  result->flags |= DIGRAPH;
3120 	  result->type = CPP_CLOSE_SQUARE;
3121 	}
3122       break;
3123 
3124     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
3125     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
3126     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
3127     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
3128     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
3129 
3130     case '?': result->type = CPP_QUERY; break;
3131     case '~': result->type = CPP_COMPL; break;
3132     case ',': result->type = CPP_COMMA; break;
3133     case '(': result->type = CPP_OPEN_PAREN; break;
3134     case ')': result->type = CPP_CLOSE_PAREN; break;
3135     case '[': result->type = CPP_OPEN_SQUARE; break;
3136     case ']': result->type = CPP_CLOSE_SQUARE; break;
3137     case '{': result->type = CPP_OPEN_BRACE; break;
3138     case '}': result->type = CPP_CLOSE_BRACE; break;
3139     case ';': result->type = CPP_SEMICOLON; break;
3140 
3141       /* @ is a punctuator in Objective-C.  */
3142     case '@': result->type = CPP_ATSIGN; break;
3143 
3144     default:
3145       {
3146 	const uchar *base = --buffer->cur;
3147 
3148 	/* Check for an extended identifier ($ or UCN or UTF-8).  */
3149 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
3150 	if (forms_identifier_p (pfile, true, &nst))
3151 	  {
3152 	    result->type = CPP_NAME;
3153 	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
3154 						    &result->val.node.spelling);
3155 	    warn_about_normalization (pfile, result, &nst);
3156 	    break;
3157 	  }
3158 
3159 	/* Otherwise this will form a CPP_OTHER token.  Parse valid UTF-8 as a
3160 	   single token.  */
3161 	buffer->cur++;
3162 	if (c >= utf8_signifier)
3163 	  {
3164 	    const uchar *pstr = base;
3165 	    cppchar_t s;
3166 	    if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
3167 	      buffer->cur = pstr;
3168 	  }
3169 	create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
3170 	break;
3171       }
3172 
3173     }
3174 
3175   /* Potentially convert the location of the token to a range.  */
3176   if (result->src_loc >= RESERVED_LOCATION_COUNT
3177       && result->type != CPP_EOF)
3178     {
3179       /* Ensure that any line notes are processed, so that we have the
3180 	 correct physical line/column for the end-point of the token even
3181 	 when a logical line is split via one or more backslashes.  */
3182       if (buffer->cur >= buffer->notes[buffer->cur_note].pos
3183 	  && !pfile->overlaid_buffer)
3184 	_cpp_process_line_notes (pfile, false);
3185 
3186       source_range tok_range;
3187       tok_range.m_start = result->src_loc;
3188       tok_range.m_finish
3189 	= linemap_position_for_column (pfile->line_table,
3190 				       CPP_BUF_COLUMN (buffer, buffer->cur));
3191 
3192       result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
3193 					       result->src_loc,
3194 					       tok_range, NULL);
3195     }
3196 
3197   return result;
3198 }
3199 
3200 /* An upper bound on the number of bytes needed to spell TOKEN.
3201    Does not include preceding whitespace.  */
3202 unsigned int
3203 cpp_token_len (const cpp_token *token)
3204 {
3205   unsigned int len;
3206 
3207   switch (TOKEN_SPELL (token))
3208     {
3209     default:		len = 6;				break;
3210     case SPELL_LITERAL:	len = token->val.str.len;		break;
3211     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
3212     }
3213 
3214   return len;
3215 }
3216 
3217 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
3218    Return the number of bytes read out of NAME.  (There are always
3219    10 bytes written to BUFFER.)  */
3220 
3221 static size_t
3222 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
3223 {
3224   int j;
3225   int ucn_len = 0;
3226   int ucn_len_c;
3227   unsigned t;
3228   unsigned long utf32;
3229 
3230   /* Compute the length of the UTF-8 sequence.  */
3231   for (t = *name; t & 0x80; t <<= 1)
3232     ucn_len++;
3233 
3234   utf32 = *name & (0x7F >> ucn_len);
3235   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
3236     {
3237       utf32 = (utf32 << 6) | (*++name & 0x3F);
3238 
3239       /* Ill-formed UTF-8.  */
3240       if ((*name & ~0x3F) != 0x80)
3241 	abort ();
3242     }
3243 
3244   *buffer++ = '\\';
3245   *buffer++ = 'U';
3246   for (j = 7; j >= 0; j--)
3247     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
3248   return ucn_len;
3249 }
3250 
3251 /* Given a token TYPE corresponding to a digraph, return a pointer to
3252    the spelling of the digraph.  */
3253 static const unsigned char *
3254 cpp_digraph2name (enum cpp_ttype type)
3255 {
3256   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
3257 }
3258 
3259 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
3260    The buffer must already contain the enough space to hold the
3261    token's spelling.  Returns a pointer to the character after the
3262    last character written.  */
3263 unsigned char *
3264 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
3265 {
3266   size_t i;
3267   const unsigned char *name = NODE_NAME (ident);
3268 
3269   for (i = 0; i < NODE_LEN (ident); i++)
3270     if (name[i] & ~0x7F)
3271       {
3272 	i += utf8_to_ucn (buffer, name + i) - 1;
3273 	buffer += 10;
3274       }
3275     else
3276       *buffer++ = name[i];
3277 
3278   return buffer;
3279 }
3280 
3281 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
3282    already contain the enough space to hold the token's spelling.
3283    Returns a pointer to the character after the last character written.
3284    FORSTRING is true if this is to be the spelling after translation
3285    phase 1 (with the original spelling of extended identifiers), false
3286    if extended identifiers should always be written using UCNs (there is
3287    no option for always writing them in the internal UTF-8 form).
3288    FIXME: Would be nice if we didn't need the PFILE argument.  */
3289 unsigned char *
3290 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
3291 		 unsigned char *buffer, bool forstring)
3292 {
3293   switch (TOKEN_SPELL (token))
3294     {
3295     case SPELL_OPERATOR:
3296       {
3297 	const unsigned char *spelling;
3298 	unsigned char c;
3299 
3300 	if (token->flags & DIGRAPH)
3301 	  spelling = cpp_digraph2name (token->type);
3302 	else if (token->flags & NAMED_OP)
3303 	  goto spell_ident;
3304 	else
3305 	  spelling = TOKEN_NAME (token);
3306 
3307 	while ((c = *spelling++) != '\0')
3308 	  *buffer++ = c;
3309       }
3310       break;
3311 
3312     spell_ident:
3313     case SPELL_IDENT:
3314       if (forstring)
3315 	{
3316 	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
3317 		  NODE_LEN (token->val.node.spelling));
3318 	  buffer += NODE_LEN (token->val.node.spelling);
3319 	}
3320       else
3321 	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
3322       break;
3323 
3324     case SPELL_LITERAL:
3325       memcpy (buffer, token->val.str.text, token->val.str.len);
3326       buffer += token->val.str.len;
3327       break;
3328 
3329     case SPELL_NONE:
3330       cpp_error (pfile, CPP_DL_ICE,
3331 		 "unspellable token %s", TOKEN_NAME (token));
3332       break;
3333     }
3334 
3335   return buffer;
3336 }
3337 
3338 /* Returns TOKEN spelt as a null-terminated string.  The string is
3339    freed when the reader is destroyed.  Useful for diagnostics.  */
3340 unsigned char *
3341 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
3342 {
3343   unsigned int len = cpp_token_len (token) + 1;
3344   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
3345 
3346   end = cpp_spell_token (pfile, token, start, false);
3347   end[0] = '\0';
3348 
3349   return start;
3350 }
3351 
3352 /* Returns a pointer to a string which spells the token defined by
3353    TYPE and FLAGS.  Used by C front ends, which really should move to
3354    using cpp_token_as_text.  */
3355 const char *
3356 cpp_type2name (enum cpp_ttype type, unsigned char flags)
3357 {
3358   if (flags & DIGRAPH)
3359     return (const char *) cpp_digraph2name (type);
3360   else if (flags & NAMED_OP)
3361     return cpp_named_operator2name (type);
3362 
3363   return (const char *) token_spellings[type].name;
3364 }
3365 
3366 /* Writes the spelling of token to FP, without any preceding space.
3367    Separated from cpp_spell_token for efficiency - to avoid stdio
3368    double-buffering.  */
3369 void
3370 cpp_output_token (const cpp_token *token, FILE *fp)
3371 {
3372   switch (TOKEN_SPELL (token))
3373     {
3374     case SPELL_OPERATOR:
3375       {
3376 	const unsigned char *spelling;
3377 	int c;
3378 
3379 	if (token->flags & DIGRAPH)
3380 	  spelling = cpp_digraph2name (token->type);
3381 	else if (token->flags & NAMED_OP)
3382 	  goto spell_ident;
3383 	else
3384 	  spelling = TOKEN_NAME (token);
3385 
3386 	c = *spelling;
3387 	do
3388 	  putc (c, fp);
3389 	while ((c = *++spelling) != '\0');
3390       }
3391       break;
3392 
3393     spell_ident:
3394     case SPELL_IDENT:
3395       {
3396 	size_t i;
3397 	const unsigned char * name = NODE_NAME (token->val.node.node);
3398 
3399 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
3400 	  if (name[i] & ~0x7F)
3401 	    {
3402 	      unsigned char buffer[10];
3403 	      i += utf8_to_ucn (buffer, name + i) - 1;
3404 	      fwrite (buffer, 1, 10, fp);
3405 	    }
3406 	  else
3407 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
3408       }
3409       break;
3410 
3411     case SPELL_LITERAL:
3412       fwrite (token->val.str.text, 1, token->val.str.len, fp);
3413       break;
3414 
3415     case SPELL_NONE:
3416       /* An error, most probably.  */
3417       break;
3418     }
3419 }
3420 
3421 /* Compare two tokens.  */
3422 int
3423 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
3424 {
3425   if (a->type == b->type && a->flags == b->flags)
3426     switch (TOKEN_SPELL (a))
3427       {
3428       default:			/* Keep compiler happy.  */
3429       case SPELL_OPERATOR:
3430 	/* token_no is used to track where multiple consecutive ##
3431 	   tokens were originally located.  */
3432 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
3433       case SPELL_NONE:
3434 	return (a->type != CPP_MACRO_ARG
3435 		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
3436 		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
3437       case SPELL_IDENT:
3438 	return (a->val.node.node == b->val.node.node
3439 		&& a->val.node.spelling == b->val.node.spelling);
3440       case SPELL_LITERAL:
3441 	return (a->val.str.len == b->val.str.len
3442 		&& !memcmp (a->val.str.text, b->val.str.text,
3443 			    a->val.str.len));
3444       }
3445 
3446   return 0;
3447 }
3448 
3449 /* Returns nonzero if a space should be inserted to avoid an
3450    accidental token paste for output.  For simplicity, it is
3451    conservative, and occasionally advises a space where one is not
3452    needed, e.g. "." and ".2".  */
3453 int
3454 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
3455 		 const cpp_token *token2)
3456 {
3457   enum cpp_ttype a = token1->type, b = token2->type;
3458   cppchar_t c;
3459 
3460   if (token1->flags & NAMED_OP)
3461     a = CPP_NAME;
3462   if (token2->flags & NAMED_OP)
3463     b = CPP_NAME;
3464 
3465   c = EOF;
3466   if (token2->flags & DIGRAPH)
3467     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
3468   else if (token_spellings[b].category == SPELL_OPERATOR)
3469     c = token_spellings[b].name[0];
3470 
3471   /* Quickly get everything that can paste with an '='.  */
3472   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
3473     return 1;
3474 
3475   switch (a)
3476     {
3477     case CPP_GREATER:	return c == '>';
3478     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
3479     case CPP_PLUS:	return c == '+';
3480     case CPP_MINUS:	return c == '-' || c == '>';
3481     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
3482     case CPP_MOD:	return c == ':' || c == '>';
3483     case CPP_AND:	return c == '&';
3484     case CPP_OR:	return c == '|';
3485     case CPP_COLON:	return c == ':' || c == '>';
3486     case CPP_DEREF:	return c == '*';
3487     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
3488     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
3489     case CPP_PRAGMA:
3490     case CPP_NAME:	return ((b == CPP_NUMBER
3491 				 && name_p (pfile, &token2->val.str))
3492 				|| b == CPP_NAME
3493 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
3494     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
3495 				|| c == '.' || c == '+' || c == '-');
3496 				      /* UCNs */
3497     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
3498 				 && b == CPP_NAME)
3499 				|| (CPP_OPTION (pfile, objc)
3500 				    && token1->val.str.text[0] == '@'
3501 				    && (b == CPP_NAME || b == CPP_STRING)));
3502     case CPP_LESS_EQ:	return c == '>';
3503     case CPP_STRING:
3504     case CPP_WSTRING:
3505     case CPP_UTF8STRING:
3506     case CPP_STRING16:
3507     case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
3508 				&& (b == CPP_NAME
3509 				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
3510 					&& ISIDST (token2->val.str.text[0]))));
3511 
3512     default:		break;
3513     }
3514 
3515   return 0;
3516 }
3517 
3518 /* Output all the remaining tokens on the current line, and a newline
3519    character, to FP.  Leading whitespace is removed.  If there are
3520    macros, special token padding is not performed.  */
3521 void
3522 cpp_output_line (cpp_reader *pfile, FILE *fp)
3523 {
3524   const cpp_token *token;
3525 
3526   token = cpp_get_token (pfile);
3527   while (token->type != CPP_EOF)
3528     {
3529       cpp_output_token (token, fp);
3530       token = cpp_get_token (pfile);
3531       if (token->flags & PREV_WHITE)
3532 	putc (' ', fp);
3533     }
3534 
3535   putc ('\n', fp);
3536 }
3537 
3538 /* Return a string representation of all the remaining tokens on the
3539    current line.  The result is allocated using xmalloc and must be
3540    freed by the caller.  */
3541 unsigned char *
3542 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3543 {
3544   const cpp_token *token;
3545   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3546   unsigned int alloced = 120 + out;
3547   unsigned char *result = (unsigned char *) xmalloc (alloced);
3548 
3549   /* If DIR_NAME is empty, there are no initial contents.  */
3550   if (dir_name)
3551     {
3552       sprintf ((char *) result, "#%s ", dir_name);
3553       out += 2;
3554     }
3555 
3556   token = cpp_get_token (pfile);
3557   while (token->type != CPP_EOF)
3558     {
3559       unsigned char *last;
3560       /* Include room for a possible space and the terminating nul.  */
3561       unsigned int len = cpp_token_len (token) + 2;
3562 
3563       if (out + len > alloced)
3564 	{
3565 	  alloced *= 2;
3566 	  if (out + len > alloced)
3567 	    alloced = out + len;
3568 	  result = (unsigned char *) xrealloc (result, alloced);
3569 	}
3570 
3571       last = cpp_spell_token (pfile, token, &result[out], 0);
3572       out = last - result;
3573 
3574       token = cpp_get_token (pfile);
3575       if (token->flags & PREV_WHITE)
3576 	result[out++] = ' ';
3577     }
3578 
3579   result[out] = '\0';
3580   return result;
3581 }
3582 
3583 /* Memory buffers.  Changing these three constants can have a dramatic
3584    effect on performance.  The values here are reasonable defaults,
3585    but might be tuned.  If you adjust them, be sure to test across a
3586    range of uses of cpplib, including heavy nested function-like macro
3587    expansion.  Also check the change in peak memory usage (NJAMD is a
3588    good tool for this).  */
3589 #define MIN_BUFF_SIZE 8000
3590 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3591 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3592 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3593 
3594 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3595   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3596 #endif
3597 
3598 /* Create a new allocation buffer.  Place the control block at the end
3599    of the buffer, so that buffer overflows will cause immediate chaos.  */
3600 static _cpp_buff *
3601 new_buff (size_t len)
3602 {
3603   _cpp_buff *result;
3604   unsigned char *base;
3605 
3606   if (len < MIN_BUFF_SIZE)
3607     len = MIN_BUFF_SIZE;
3608   len = CPP_ALIGN (len);
3609 
3610 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3611   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3612      struct first.  */
3613   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3614   base = XNEWVEC (unsigned char, len + slen);
3615   result = (_cpp_buff *) base;
3616   base += slen;
3617 #else
3618   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3619   result = (_cpp_buff *) (base + len);
3620 #endif
3621   result->base = base;
3622   result->cur = base;
3623   result->limit = base + len;
3624   result->next = NULL;
3625   return result;
3626 }
3627 
3628 /* Place a chain of unwanted allocation buffers on the free list.  */
3629 void
3630 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3631 {
3632   _cpp_buff *end = buff;
3633 
3634   while (end->next)
3635     end = end->next;
3636   end->next = pfile->free_buffs;
3637   pfile->free_buffs = buff;
3638 }
3639 
3640 /* Return a free buffer of size at least MIN_SIZE.  */
3641 _cpp_buff *
3642 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3643 {
3644   _cpp_buff *result, **p;
3645 
3646   for (p = &pfile->free_buffs;; p = &(*p)->next)
3647     {
3648       size_t size;
3649 
3650       if (*p == NULL)
3651 	return new_buff (min_size);
3652       result = *p;
3653       size = result->limit - result->base;
3654       /* Return a buffer that's big enough, but don't waste one that's
3655          way too big.  */
3656       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3657 	break;
3658     }
3659 
3660   *p = result->next;
3661   result->next = NULL;
3662   result->cur = result->base;
3663   return result;
3664 }
3665 
3666 /* Creates a new buffer with enough space to hold the uncommitted
3667    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3668    the excess bytes to the new buffer.  Chains the new buffer after
3669    BUFF, and returns the new buffer.  */
3670 _cpp_buff *
3671 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3672 {
3673   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3674   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3675 
3676   buff->next = new_buff;
3677   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3678   return new_buff;
3679 }
3680 
3681 /* Creates a new buffer with enough space to hold the uncommitted
3682    remaining bytes of the buffer pointed to by BUFF, and at least
3683    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3684    Chains the new buffer before the buffer pointed to by BUFF, and
3685    updates the pointer to point to the new buffer.  */
3686 void
3687 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3688 {
3689   _cpp_buff *new_buff, *old_buff = *pbuff;
3690   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3691 
3692   new_buff = _cpp_get_buff (pfile, size);
3693   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3694   new_buff->next = old_buff;
3695   *pbuff = new_buff;
3696 }
3697 
3698 /* Free a chain of buffers starting at BUFF.  */
3699 void
3700 _cpp_free_buff (_cpp_buff *buff)
3701 {
3702   _cpp_buff *next;
3703 
3704   for (; buff; buff = next)
3705     {
3706       next = buff->next;
3707 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3708       free (buff);
3709 #else
3710       free (buff->base);
3711 #endif
3712     }
3713 }
3714 
3715 /* Allocate permanent, unaligned storage of length LEN.  */
3716 unsigned char *
3717 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3718 {
3719   _cpp_buff *buff = pfile->u_buff;
3720   unsigned char *result = buff->cur;
3721 
3722   if (len > (size_t) (buff->limit - result))
3723     {
3724       buff = _cpp_get_buff (pfile, len);
3725       buff->next = pfile->u_buff;
3726       pfile->u_buff = buff;
3727       result = buff->cur;
3728     }
3729 
3730   buff->cur = result + len;
3731   return result;
3732 }
3733 
3734 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3735    That buffer is used for growing allocations when saving macro
3736    replacement lists in a #define, and when parsing an answer to an
3737    assertion in #assert, #unassert or #if (and therefore possibly
3738    whilst expanding macros).  It therefore must not be used by any
3739    code that they might call: specifically the lexer and the guts of
3740    the macro expander.
3741 
3742    All existing other uses clearly fit this restriction: storing
3743    registered pragmas during initialization.  */
3744 unsigned char *
3745 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3746 {
3747   _cpp_buff *buff = pfile->a_buff;
3748   unsigned char *result = buff->cur;
3749 
3750   if (len > (size_t) (buff->limit - result))
3751     {
3752       buff = _cpp_get_buff (pfile, len);
3753       buff->next = pfile->a_buff;
3754       pfile->a_buff = buff;
3755       result = buff->cur;
3756     }
3757 
3758   buff->cur = result + len;
3759   return result;
3760 }
3761 
3762 /* Commit or allocate storage from a buffer.  */
3763 
3764 void *
3765 _cpp_commit_buff (cpp_reader *pfile, size_t size)
3766 {
3767   void *ptr = BUFF_FRONT (pfile->a_buff);
3768 
3769   if (pfile->hash_table->alloc_subobject)
3770     {
3771       void *copy = pfile->hash_table->alloc_subobject (size);
3772       memcpy (copy, ptr, size);
3773       ptr = copy;
3774     }
3775   else
3776     BUFF_FRONT (pfile->a_buff) += size;
3777 
3778   return ptr;
3779 }
3780 
3781 /* Say which field of TOK is in use.  */
3782 
3783 enum cpp_token_fld_kind
3784 cpp_token_val_index (const cpp_token *tok)
3785 {
3786   switch (TOKEN_SPELL (tok))
3787     {
3788     case SPELL_IDENT:
3789       return CPP_TOKEN_FLD_NODE;
3790     case SPELL_LITERAL:
3791       return CPP_TOKEN_FLD_STR;
3792     case SPELL_OPERATOR:
3793       /* Operands which were originally spelled as ident keep around
3794          the node for the exact spelling.  */
3795       if (tok->flags & NAMED_OP)
3796 	return CPP_TOKEN_FLD_NODE;
3797       else if (tok->type == CPP_PASTE)
3798 	return CPP_TOKEN_FLD_TOKEN_NO;
3799       else
3800 	return CPP_TOKEN_FLD_NONE;
3801     case SPELL_NONE:
3802       if (tok->type == CPP_MACRO_ARG)
3803 	return CPP_TOKEN_FLD_ARG_NO;
3804       else if (tok->type == CPP_PADDING)
3805 	return CPP_TOKEN_FLD_SOURCE;
3806       else if (tok->type == CPP_PRAGMA)
3807 	return CPP_TOKEN_FLD_PRAGMA;
3808       /* fall through */
3809     default:
3810       return CPP_TOKEN_FLD_NONE;
3811     }
3812 }
3813 
3814 /* All tokens lexed in R after calling this function will be forced to
3815    have their location_t to be P, until
3816    cpp_stop_forcing_token_locations is called for R.  */
3817 
3818 void
3819 cpp_force_token_locations (cpp_reader *r, location_t loc)
3820 {
3821   r->forced_token_location = loc;
3822 }
3823 
3824 /* Go back to assigning locations naturally for lexed tokens.  */
3825 
3826 void
3827 cpp_stop_forcing_token_locations (cpp_reader *r)
3828 {
3829   r->forced_token_location = 0;
3830 }
3831