xref: /netbsd-src/external/gpl3/gcc.old/dist/libcpp/lex.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7 
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26 
27 enum spell_type
28 {
29   SPELL_OPERATOR = 0,
30   SPELL_IDENT,
31   SPELL_LITERAL,
32   SPELL_NONE
33 };
34 
35 struct token_spelling
36 {
37   enum spell_type category;
38   const unsigned char *name;
39 };
40 
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43 
44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
45 #define TK(e, s) { SPELL_ ## s,    UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49 
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52 
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 			    unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
64 
65 static _cpp_buff *new_buff (size_t);
66 
67 
68 /* Utility routine:
69 
70    Compares, the token TOKEN to the NUL-terminated string STRING.
71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
72 int
73 cpp_ideq (const cpp_token *token, const char *string)
74 {
75   if (token->type != CPP_NAME)
76     return 0;
77 
78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
79 }
80 
81 /* Record a note TYPE at byte POS into the current cleaned logical
82    line.  */
83 static void
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 {
86   if (buffer->notes_used == buffer->notes_cap)
87     {
88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90                                   buffer->notes_cap);
91     }
92 
93   buffer->notes[buffer->notes_used].pos = pos;
94   buffer->notes[buffer->notes_used].type = type;
95   buffer->notes_used++;
96 }
97 
98 
99 /* Fast path to find line special characters using optimized character
100    scanning algorithms.  Anything complicated falls back to the slow
101    path below.  Since this loop is very hot it's worth doing these kinds
102    of optimizations.
103 
104    One of the paths through the ifdefs should provide
105 
106      const uchar *search_line_fast (const uchar *s, const uchar *end);
107 
108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
109    the found character.
110 
111    Note that the last character of the buffer is *always* a newline,
112    as forced by _cpp_convert_input.  This fact can be used to avoid
113    explicitly looking for the end of the buffer.  */
114 
115 /* Configure gives us an ifdef test.  */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
118 #endif
119 
120 /* We'd like the largest integer that fits into a register.  There's nothing
121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
123    can get the "real" word size.  */
124 #ifdef __GNUC__
125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
126 #else
127 typedef unsigned long word_type;
128 #endif
129 
130 /* The code below is only expecting sizes 4 or 8.
131    Die at compile-time if this expectation is violated.  */
132 typedef char check_word_type_size
133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134 
135 /* Return X with the first N bytes forced to values that won't match one
136    of the interesting characters.  Note that NUL is not interesting.  */
137 
138 static inline word_type
139 acc_char_mask_misalign (word_type val, unsigned int n)
140 {
141   word_type mask = -1;
142   if (WORDS_BIGENDIAN)
143     mask >>= n * 8;
144   else
145     mask <<= n * 8;
146   return val & mask;
147 }
148 
149 /* Return X replicated to all byte positions within WORD_TYPE.  */
150 
151 static inline word_type
152 acc_char_replicate (uchar x)
153 {
154   word_type ret;
155 
156   ret = (x << 24) | (x << 16) | (x << 8) | x;
157   if (sizeof(word_type) == 8)
158     ret = (ret << 16 << 16) | ret;
159   return ret;
160 }
161 
162 /* Return non-zero if some byte of VAL is (probably) C.  */
163 
164 static inline word_type
165 acc_char_cmp (word_type val, word_type c)
166 {
167 #if defined(__GNUC__) && defined(__alpha__)
168   /* We can get exact results using a compare-bytes instruction.
169      Get (val == c) via (0 >= (val ^ c)).  */
170   return __builtin_alpha_cmpbge (0, val ^ c);
171 #else
172   word_type magic = 0x7efefefeU;
173   if (sizeof(word_type) == 8)
174     magic = (magic << 16 << 16) | 0xfefefefeU;
175   magic |= 1;
176 
177   val ^= c;
178   return ((val + magic) ^ ~val) & ~magic;
179 #endif
180 }
181 
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183    the found character.  If this was a false positive, return -1.  */
184 
185 static inline int
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 		word_type val ATTRIBUTE_UNUSED)
188 {
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190   /* The cmpbge instruction sets *bits* of the result corresponding to
191      matches in the bytes with no false positives.  */
192   return __builtin_ctzl (cmp);
193 #else
194   unsigned int i;
195 
196   /* ??? It would be nice to force unrolling here,
197      and have all of these constants folded.  */
198   for (i = 0; i < sizeof(word_type); ++i)
199     {
200       uchar c;
201       if (WORDS_BIGENDIAN)
202 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203       else
204 	c = (val >> i * 8) & 0xff;
205 
206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 	return i;
208     }
209 
210   return -1;
211 #endif
212 }
213 
214 /* A version of the fast scanner using bit fiddling techniques.
215 
216    For 32-bit words, one would normally perform 16 comparisons and
217    16 branches.  With this algorithm one performs 24 arithmetic
218    operations and one branch.  Whether this is faster with a 32-bit
219    word size is going to be somewhat system dependent.
220 
221    For 64-bit words, we eliminate twice the number of comparisons
222    and branches without increasing the number of arithmetic operations.
223    It's almost certainly going to be a win with 64-bit word size.  */
224 
225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
226   ATTRIBUTE_UNUSED;
227 
228 static const uchar *
229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230 {
231   const word_type repl_nl = acc_char_replicate ('\n');
232   const word_type repl_cr = acc_char_replicate ('\r');
233   const word_type repl_bs = acc_char_replicate ('\\');
234   const word_type repl_qm = acc_char_replicate ('?');
235 
236   unsigned int misalign;
237   const word_type *p;
238   word_type val, t;
239 
240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242   val = *p;
243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244   if (misalign)
245     val = acc_char_mask_misalign (val, misalign);
246 
247   /* Main loop.  */
248   while (1)
249     {
250       t  = acc_char_cmp (val, repl_nl);
251       t |= acc_char_cmp (val, repl_cr);
252       t |= acc_char_cmp (val, repl_bs);
253       t |= acc_char_cmp (val, repl_qm);
254 
255       if (__builtin_expect (t != 0, 0))
256 	{
257 	  int i = acc_char_index (t, val);
258 	  if (i >= 0)
259 	    return (const uchar *)p + i;
260 	}
261 
262       val = *++p;
263     }
264 }
265 
266 /* Disable on Solaris 2/x86 until the following problem can be properly
267    autoconfed:
268 
269    The Solaris 10+ assembler tags objects with the instruction set
270    extensions used, so SSE4.2 executables cannot run on machines that
271    don't support that extension.  */
272 
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
274 
275 /* Replicated character data to be shared between implementations.
276    Recall that outside of a context with vector support we can't
277    define compatible vector types, therefore these are all defined
278    in terms of raw characters.  */
279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286   { '?', '?', '?', '?', '?', '?', '?', '?',
287     '?', '?', '?', '?', '?', '?', '?', '?' },
288 };
289 
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
291 
292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
293    which was packaged into SSE1; it is also present in the AMD MMX
294    extension.  Mark the function as using "sse" so that we emit a real
295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
296 
297 static const uchar *
298 #ifndef __SSE__
299 __attribute__((__target__("sse")))
300 #endif
301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302 {
303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305 
306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310 
311   unsigned int misalign, found, mask;
312   const v8qi *p;
313   v8qi data, t, c;
314 
315   /* Align the source pointer.  While MMX doesn't generate unaligned data
316      faults, this allows us to safely scan to the end of the buffer without
317      reading beyond the end of the last page.  */
318   misalign = (uintptr_t)s & 7;
319   p = (const v8qi *)((uintptr_t)s & -8);
320   data = *p;
321 
322   /* Create a mask for the bytes that are valid within the first
323      16-byte block.  The Idea here is that the AND with the mask
324      within the loop is "free", since we need some AND or TEST
325      insn in order to set the flags for the branch anyway.  */
326   mask = -1u << misalign;
327 
328   /* Main loop processing 8 bytes at a time.  */
329   goto start;
330   do
331     {
332       data = *++p;
333       mask = -1;
334 
335     start:
336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343       found = __builtin_ia32_pmovmskb (t);
344       found &= mask;
345     }
346   while (!found);
347 
348   __builtin_ia32_emms ();
349 
350   /* FOUND contains 1 in bits for which we matched a relevant
351      character.  Conversion to the byte index is trivial.  */
352   found = __builtin_ctz(found);
353   return (const uchar *)p + found;
354 }
355 
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
357 
358 static const uchar *
359 #ifndef __SSE2__
360 __attribute__((__target__("sse2")))
361 #endif
362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363 {
364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
365 
366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370 
371   unsigned int misalign, found, mask;
372   const v16qi *p;
373   v16qi data, t;
374 
375   /* Align the source pointer.  */
376   misalign = (uintptr_t)s & 15;
377   p = (const v16qi *)((uintptr_t)s & -16);
378   data = *p;
379 
380   /* Create a mask for the bytes that are valid within the first
381      16-byte block.  The Idea here is that the AND with the mask
382      within the loop is "free", since we need some AND or TEST
383      insn in order to set the flags for the branch anyway.  */
384   mask = -1u << misalign;
385 
386   /* Main loop processing 16 bytes at a time.  */
387   goto start;
388   do
389     {
390       data = *++p;
391       mask = -1;
392 
393     start:
394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
398       found = __builtin_ia32_pmovmskb128 (t);
399       found &= mask;
400     }
401   while (!found);
402 
403   /* FOUND contains 1 in bits for which we matched a relevant
404      character.  Conversion to the byte index is trivial.  */
405   found = __builtin_ctz(found);
406   return (const uchar *)p + found;
407 }
408 
409 #ifdef HAVE_SSE4
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
411 
412 static const uchar *
413 #ifndef __SSE4_2__
414 __attribute__((__target__("sse4.2")))
415 #endif
416 search_line_sse42 (const uchar *s, const uchar *end)
417 {
418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
419   static const v16qi search = { '\n', '\r', '?', '\\' };
420 
421   uintptr_t si = (uintptr_t)s;
422   uintptr_t index;
423 
424   /* Check for unaligned input.  */
425   if (si & 15)
426     {
427       v16qi sv;
428 
429       if (__builtin_expect (end - s < 16, 0)
430 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431 	{
432 	  /* There are less than 16 bytes left in the buffer, and less
433 	     than 16 bytes left on the page.  Reading 16 bytes at this
434 	     point might generate a spurious page fault.  Defer to the
435 	     SSE2 implementation, which already handles alignment.  */
436 	  return search_line_sse2 (s, end);
437 	}
438 
439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 	 memory need not be aligned.  */
441       sv = __builtin_ia32_loaddqu ((const char *) s);
442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443 
444       if (__builtin_expect (index < 16, 0))
445 	goto found;
446 
447       /* Advance the pointer to an aligned address.  We will re-scan a
448 	 few bytes, but we no longer need care for reading past the
449 	 end of a page, since we're guaranteed a match.  */
450       s = (const uchar *)((si + 16) & -16);
451     }
452 
453   /* Main loop, processing 16 bytes at a time.  By doing the whole loop
454      in inline assembly, we can make proper use of the flags set.  */
455   __asm (      "sub $16, %1\n"
456 	"	.balign 16\n"
457 	"0:	add $16, %1\n"
458 	"	%vpcmpestri $0, (%1), %2\n"
459 	"	jnc 0b"
460 	: "=&c"(index), "+r"(s)
461 	: "x"(search), "a"(4), "d"(16));
462 
463  found:
464   return s + index;
465 }
466 
467 #else
468 /* Work around out-dated assemblers without sse4 support.  */
469 #define search_line_sse42 search_line_sse2
470 #endif
471 
472 /* Check the CPU capabilities.  */
473 
474 #include "../gcc/config/i386/cpuid.h"
475 
476 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
477 static search_line_fast_type search_line_fast;
478 
479 #define HAVE_init_vectorized_lexer 1
480 static inline void
481 init_vectorized_lexer (void)
482 {
483   unsigned dummy, ecx = 0, edx = 0;
484   search_line_fast_type impl = search_line_acc_char;
485   int minimum = 0;
486 
487 #if defined(__SSE4_2__)
488   minimum = 3;
489 #elif defined(__SSE2__)
490   minimum = 2;
491 #elif defined(__SSE__)
492   minimum = 1;
493 #endif
494 
495   if (minimum == 3)
496     impl = search_line_sse42;
497   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
498     {
499       if (minimum == 3 || (ecx & bit_SSE4_2))
500         impl = search_line_sse42;
501       else if (minimum == 2 || (edx & bit_SSE2))
502 	impl = search_line_sse2;
503       else if (minimum == 1 || (edx & bit_SSE))
504 	impl = search_line_mmx;
505     }
506   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
507     {
508       if (minimum == 1
509 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
510 	impl = search_line_mmx;
511     }
512 
513   search_line_fast = impl;
514 }
515 
516 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
517 
518 /* A vection of the fast scanner using AltiVec vectorized byte compares
519    and VSX unaligned loads (when VSX is available).  This is otherwise
520    the same as the pre-GCC 5 version.  */
521 
522 ATTRIBUTE_NO_SANITIZE_UNDEFINED
523 static const uchar *
524 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
525 {
526   typedef __attribute__((altivec(vector))) unsigned char vc;
527 
528   const vc repl_nl = {
529     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
530     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
531   };
532   const vc repl_cr = {
533     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
534     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
535   };
536   const vc repl_bs = {
537     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
538     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
539   };
540   const vc repl_qm = {
541     '?', '?', '?', '?', '?', '?', '?', '?',
542     '?', '?', '?', '?', '?', '?', '?', '?',
543   };
544   const vc zero = { 0 };
545 
546   vc data, t;
547 
548   /* Main loop processing 16 bytes at a time.  */
549   do
550     {
551       vc m_nl, m_cr, m_bs, m_qm;
552 
553       data = *((const vc *)s);
554       s += 16;
555 
556       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
557       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
558       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
559       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
560       t = (m_nl | m_cr) | (m_bs | m_qm);
561 
562       /* T now contains 0xff in bytes for which we matched one of the relevant
563 	 characters.  We want to exit the loop if any byte in T is non-zero.
564 	 Below is the expansion of vec_any_ne(t, zero).  */
565     }
566   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
567 
568   /* Restore s to to point to the 16 bytes we just processed.  */
569   s -= 16;
570 
571   {
572 #define N  (sizeof(vc) / sizeof(long))
573 
574     union {
575       vc v;
576       /* Statically assert that N is 2 or 4.  */
577       unsigned long l[(N == 2 || N == 4) ? N : -1];
578     } u;
579     unsigned long l, i = 0;
580 
581     u.v = t;
582 
583     /* Find the first word of T that is non-zero.  */
584     switch (N)
585       {
586       case 4:
587 	l = u.l[i++];
588 	if (l != 0)
589 	  break;
590 	s += sizeof(unsigned long);
591 	l = u.l[i++];
592 	if (l != 0)
593 	  break;
594 	s += sizeof(unsigned long);
595       case 2:
596 	l = u.l[i++];
597 	if (l != 0)
598 	  break;
599 	s += sizeof(unsigned long);
600 	l = u.l[i];
601       }
602 
603     /* L now contains 0xff in bytes for which we matched one of the
604        relevant characters.  We can find the byte index by finding
605        its bit index and dividing by 8.  */
606 #ifdef __BIG_ENDIAN__
607     l = __builtin_clzl(l) >> 3;
608 #else
609     l = __builtin_ctzl(l) >> 3;
610 #endif
611     return s + l;
612 
613 #undef N
614   }
615 }
616 
617 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
618 
619 /* A vection of the fast scanner using AltiVec vectorized byte compares.
620    This cannot be used for little endian because vec_lvsl/lvsr are
621    deprecated for little endian and the code won't work properly.  */
622 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
623    so we can't compile this function without -maltivec on the command line
624    (or implied by some other switch).  */
625 
626 static const uchar *
627 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
628 {
629   typedef __attribute__((altivec(vector))) unsigned char vc;
630 
631   const vc repl_nl = {
632     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
633     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
634   };
635   const vc repl_cr = {
636     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
637     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
638   };
639   const vc repl_bs = {
640     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
641     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
642   };
643   const vc repl_qm = {
644     '?', '?', '?', '?', '?', '?', '?', '?',
645     '?', '?', '?', '?', '?', '?', '?', '?',
646   };
647   const vc ones = {
648     -1, -1, -1, -1, -1, -1, -1, -1,
649     -1, -1, -1, -1, -1, -1, -1, -1,
650   };
651   const vc zero = { 0 };
652 
653   vc data, mask, t;
654 
655   /* Altivec loads automatically mask addresses with -16.  This lets us
656      issue the first load as early as possible.  */
657   data = __builtin_vec_ld(0, (const vc *)s);
658 
659   /* Discard bytes before the beginning of the buffer.  Do this by
660      beginning with all ones and shifting in zeros according to the
661      mis-alignment.  The LVSR instruction pulls the exact shift we
662      want from the address.  */
663   mask = __builtin_vec_lvsr(0, s);
664   mask = __builtin_vec_perm(zero, ones, mask);
665   data &= mask;
666 
667   /* While altivec loads mask addresses, we still need to align S so
668      that the offset we compute at the end is correct.  */
669   s = (const uchar *)((uintptr_t)s & -16);
670 
671   /* Main loop processing 16 bytes at a time.  */
672   goto start;
673   do
674     {
675       vc m_nl, m_cr, m_bs, m_qm;
676 
677       s += 16;
678       data = __builtin_vec_ld(0, (const vc *)s);
679 
680     start:
681       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
682       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
683       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
684       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
685       t = (m_nl | m_cr) | (m_bs | m_qm);
686 
687       /* T now contains 0xff in bytes for which we matched one of the relevant
688 	 characters.  We want to exit the loop if any byte in T is non-zero.
689 	 Below is the expansion of vec_any_ne(t, zero).  */
690     }
691   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
692 
693   {
694 #define N  (sizeof(vc) / sizeof(long))
695 
696     union {
697       vc v;
698       /* Statically assert that N is 2 or 4.  */
699       unsigned long l[(N == 2 || N == 4) ? N : -1];
700     } u;
701     unsigned long l, i = 0;
702 
703     u.v = t;
704 
705     /* Find the first word of T that is non-zero.  */
706     switch (N)
707       {
708       case 4:
709 	l = u.l[i++];
710 	if (l != 0)
711 	  break;
712 	s += sizeof(unsigned long);
713 	l = u.l[i++];
714 	if (l != 0)
715 	  break;
716 	s += sizeof(unsigned long);
717       case 2:
718 	l = u.l[i++];
719 	if (l != 0)
720 	  break;
721 	s += sizeof(unsigned long);
722 	l = u.l[i];
723       }
724 
725     /* L now contains 0xff in bytes for which we matched one of the
726        relevant characters.  We can find the byte index by finding
727        its bit index and dividing by 8.  */
728     l = __builtin_clzl(l) >> 3;
729     return s + l;
730 
731 #undef N
732   }
733 }
734 
735 #elif defined (__ARM_NEON)
736 #include "arm_neon.h"
737 
738 static const uchar *
739 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
740 {
741   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
742   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
743   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
744   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
745   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
746 
747   unsigned int misalign, found, mask;
748   const uint8_t *p;
749   uint8x16_t data;
750 
751   /* Align the source pointer.  */
752   misalign = (uintptr_t)s & 15;
753   p = (const uint8_t *)((uintptr_t)s & -16);
754   data = vld1q_u8 (p);
755 
756   /* Create a mask for the bytes that are valid within the first
757      16-byte block.  The Idea here is that the AND with the mask
758      within the loop is "free", since we need some AND or TEST
759      insn in order to set the flags for the branch anyway.  */
760   mask = (-1u << misalign) & 0xffff;
761 
762   /* Main loop, processing 16 bytes at a time.  */
763   goto start;
764 
765   do
766     {
767       uint8x8_t l;
768       uint16x4_t m;
769       uint32x2_t n;
770       uint8x16_t t, u, v, w;
771 
772       p += 16;
773       data = vld1q_u8 (p);
774       mask = 0xffff;
775 
776     start:
777       t = vceqq_u8 (data, repl_nl);
778       u = vceqq_u8 (data, repl_cr);
779       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
780       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
781       t = vandq_u8 (vorrq_u8 (v, w), xmask);
782       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
783       m = vpaddl_u8 (l);
784       n = vpaddl_u16 (m);
785 
786       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
787 	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
788       found &= mask;
789     }
790   while (!found);
791 
792   /* FOUND contains 1 in bits for which we matched a relevant
793      character.  Conversion to the byte index is trivial.  */
794   found = __builtin_ctz (found);
795   return (const uchar *)p + found;
796 }
797 
798 #else
799 
800 /* We only have one accellerated alternative.  Use a direct call so that
801    we encourage inlining.  */
802 
803 #define search_line_fast  search_line_acc_char
804 
805 #endif
806 
807 /* Initialize the lexer if needed.  */
808 
809 void
810 _cpp_init_lexer (void)
811 {
812 #ifdef HAVE_init_vectorized_lexer
813   init_vectorized_lexer ();
814 #endif
815 }
816 
817 /* Returns with a logical line that contains no escaped newlines or
818    trigraphs.  This is a time-critical inner loop.  */
819 void
820 _cpp_clean_line (cpp_reader *pfile)
821 {
822   cpp_buffer *buffer;
823   const uchar *s;
824   uchar c, *d, *p;
825 
826   buffer = pfile->buffer;
827   buffer->cur_note = buffer->notes_used = 0;
828   buffer->cur = buffer->line_base = buffer->next_line;
829   buffer->need_line = false;
830   s = buffer->next_line;
831 
832   if (!buffer->from_stage3)
833     {
834       const uchar *pbackslash = NULL;
835 
836       /* Fast path.  This is the common case of an un-escaped line with
837 	 no trigraphs.  The primary win here is by not writing any
838 	 data back to memory until we have to.  */
839       while (1)
840 	{
841 	  /* Perform an optimized search for \n, \r, \\, ?.  */
842 	  s = search_line_fast (s, buffer->rlimit);
843 
844 	  c = *s;
845 	  if (c == '\\')
846 	    {
847 	      /* Record the location of the backslash and continue.  */
848 	      pbackslash = s++;
849 	    }
850 	  else if (__builtin_expect (c == '?', 0))
851 	    {
852 	      if (__builtin_expect (s[1] == '?', false)
853 		   && _cpp_trigraph_map[s[2]])
854 		{
855 		  /* Have a trigraph.  We may or may not have to convert
856 		     it.  Add a line note regardless, for -Wtrigraphs.  */
857 		  add_line_note (buffer, s, s[2]);
858 		  if (CPP_OPTION (pfile, trigraphs))
859 		    {
860 		      /* We do, and that means we have to switch to the
861 		         slow path.  */
862 		      d = (uchar *) s;
863 		      *d = _cpp_trigraph_map[s[2]];
864 		      s += 2;
865 		      goto slow_path;
866 		    }
867 		}
868 	      /* Not a trigraph.  Continue on fast-path.  */
869 	      s++;
870 	    }
871 	  else
872 	    break;
873 	}
874 
875       /* This must be \r or \n.  We're either done, or we'll be forced
876 	 to write back to the buffer and continue on the slow path.  */
877       d = (uchar *) s;
878 
879       if (__builtin_expect (s == buffer->rlimit, false))
880 	goto done;
881 
882       /* DOS line ending? */
883       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
884 	{
885 	  s++;
886 	  if (s == buffer->rlimit)
887 	    goto done;
888 	}
889 
890       if (__builtin_expect (pbackslash == NULL, true))
891 	goto done;
892 
893       /* Check for escaped newline.  */
894       p = d;
895       while (is_nvspace (p[-1]))
896 	p--;
897       if (p - 1 != pbackslash)
898 	goto done;
899 
900       /* Have an escaped newline; process it and proceed to
901 	 the slow path.  */
902       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
903       d = p - 2;
904       buffer->next_line = p - 1;
905 
906     slow_path:
907       while (1)
908 	{
909 	  c = *++s;
910 	  *++d = c;
911 
912 	  if (c == '\n' || c == '\r')
913 	    {
914 	      /* Handle DOS line endings.  */
915 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
916 		s++;
917 	      if (s == buffer->rlimit)
918 		break;
919 
920 	      /* Escaped?  */
921 	      p = d;
922 	      while (p != buffer->next_line && is_nvspace (p[-1]))
923 		p--;
924 	      if (p == buffer->next_line || p[-1] != '\\')
925 		break;
926 
927 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
928 	      d = p - 2;
929 	      buffer->next_line = p - 1;
930 	    }
931 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
932 	    {
933 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
934 	      add_line_note (buffer, d, s[2]);
935 	      if (CPP_OPTION (pfile, trigraphs))
936 		{
937 		  *d = _cpp_trigraph_map[s[2]];
938 		  s += 2;
939 		}
940 	    }
941 	}
942     }
943   else
944     {
945       while (*s != '\n' && *s != '\r')
946 	s++;
947       d = (uchar *) s;
948 
949       /* Handle DOS line endings.  */
950       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
951 	s++;
952     }
953 
954  done:
955   *d = '\n';
956   /* A sentinel note that should never be processed.  */
957   add_line_note (buffer, d + 1, '\n');
958   buffer->next_line = s + 1;
959 }
960 
961 /* Return true if the trigraph indicated by NOTE should be warned
962    about in a comment.  */
963 static bool
964 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
965 {
966   const uchar *p;
967 
968   /* Within comments we don't warn about trigraphs, unless the
969      trigraph forms an escaped newline, as that may change
970      behavior.  */
971   if (note->type != '/')
972     return false;
973 
974   /* If -trigraphs, then this was an escaped newline iff the next note
975      is coincident.  */
976   if (CPP_OPTION (pfile, trigraphs))
977     return note[1].pos == note->pos;
978 
979   /* Otherwise, see if this forms an escaped newline.  */
980   p = note->pos + 3;
981   while (is_nvspace (*p))
982     p++;
983 
984   /* There might have been escaped newlines between the trigraph and the
985      newline we found.  Hence the position test.  */
986   return (*p == '\n' && p < note[1].pos);
987 }
988 
989 /* Process the notes created by add_line_note as far as the current
990    location.  */
991 void
992 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
993 {
994   cpp_buffer *buffer = pfile->buffer;
995 
996   for (;;)
997     {
998       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
999       unsigned int col;
1000 
1001       if (note->pos > buffer->cur)
1002 	break;
1003 
1004       buffer->cur_note++;
1005       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1006 
1007       if (note->type == '\\' || note->type == ' ')
1008 	{
1009 	  if (note->type == ' ' && !in_comment)
1010 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1011 				 "backslash and newline separated by space");
1012 
1013 	  if (buffer->next_line > buffer->rlimit)
1014 	    {
1015 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1016 				   "backslash-newline at end of file");
1017 	      /* Prevent "no newline at end of file" warning.  */
1018 	      buffer->next_line = buffer->rlimit;
1019 	    }
1020 
1021 	  buffer->line_base = note->pos;
1022 	  CPP_INCREMENT_LINE (pfile, 0);
1023 	}
1024       else if (_cpp_trigraph_map[note->type])
1025 	{
1026 	  if (CPP_OPTION (pfile, warn_trigraphs)
1027 	      && (!in_comment || warn_in_comment (pfile, note)))
1028 	    {
1029 	      if (CPP_OPTION (pfile, trigraphs))
1030 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1031                                        pfile->line_table->highest_line, col,
1032 				       "trigraph ??%c converted to %c",
1033 				       note->type,
1034 				       (int) _cpp_trigraph_map[note->type]);
1035 	      else
1036 		{
1037 		  cpp_warning_with_line
1038 		    (pfile, CPP_W_TRIGRAPHS,
1039                      pfile->line_table->highest_line, col,
1040 		     "trigraph ??%c ignored, use -trigraphs to enable",
1041 		     note->type);
1042 		}
1043 	    }
1044 	}
1045       else if (note->type == 0)
1046 	/* Already processed in lex_raw_string.  */;
1047       else
1048 	abort ();
1049     }
1050 }
1051 
1052 /* Skip a C-style block comment.  We find the end of the comment by
1053    seeing if an asterisk is before every '/' we encounter.  Returns
1054    nonzero if comment terminated by EOF, zero otherwise.
1055 
1056    Buffer->cur points to the initial asterisk of the comment.  */
1057 bool
1058 _cpp_skip_block_comment (cpp_reader *pfile)
1059 {
1060   cpp_buffer *buffer = pfile->buffer;
1061   const uchar *cur = buffer->cur;
1062   uchar c;
1063 
1064   cur++;
1065   if (*cur == '/')
1066     cur++;
1067 
1068   for (;;)
1069     {
1070       /* People like decorating comments with '*', so check for '/'
1071 	 instead for efficiency.  */
1072       c = *cur++;
1073 
1074       if (c == '/')
1075 	{
1076 	  if (cur[-2] == '*')
1077 	    break;
1078 
1079 	  /* Warn about potential nested comments, but not if the '/'
1080 	     comes immediately before the true comment delimiter.
1081 	     Don't bother to get it right across escaped newlines.  */
1082 	  if (CPP_OPTION (pfile, warn_comments)
1083 	      && cur[0] == '*' && cur[1] != '/')
1084 	    {
1085 	      buffer->cur = cur;
1086 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1087 				     pfile->line_table->highest_line,
1088 				     CPP_BUF_COL (buffer),
1089 				     "\"/*\" within comment");
1090 	    }
1091 	}
1092       else if (c == '\n')
1093 	{
1094 	  unsigned int cols;
1095 	  buffer->cur = cur - 1;
1096 	  _cpp_process_line_notes (pfile, true);
1097 	  if (buffer->next_line >= buffer->rlimit)
1098 	    return true;
1099 	  _cpp_clean_line (pfile);
1100 
1101 	  cols = buffer->next_line - buffer->line_base;
1102 	  CPP_INCREMENT_LINE (pfile, cols);
1103 
1104 	  cur = buffer->cur;
1105 	}
1106     }
1107 
1108   buffer->cur = cur;
1109   _cpp_process_line_notes (pfile, true);
1110   return false;
1111 }
1112 
1113 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1114    terminating newline.  Handles escaped newlines.  Returns nonzero
1115    if a multiline comment.  */
1116 static int
1117 skip_line_comment (cpp_reader *pfile)
1118 {
1119   cpp_buffer *buffer = pfile->buffer;
1120   source_location orig_line = pfile->line_table->highest_line;
1121 
1122   while (*buffer->cur != '\n')
1123     buffer->cur++;
1124 
1125   _cpp_process_line_notes (pfile, true);
1126   return orig_line != pfile->line_table->highest_line;
1127 }
1128 
1129 /* Skips whitespace, saving the next non-whitespace character.  */
1130 static void
1131 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1132 {
1133   cpp_buffer *buffer = pfile->buffer;
1134   bool saw_NUL = false;
1135 
1136   do
1137     {
1138       /* Horizontal space always OK.  */
1139       if (c == ' ' || c == '\t')
1140 	;
1141       /* Just \f \v or \0 left.  */
1142       else if (c == '\0')
1143 	saw_NUL = true;
1144       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1145 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1146 			     CPP_BUF_COL (buffer),
1147 			     "%s in preprocessing directive",
1148 			     c == '\f' ? "form feed" : "vertical tab");
1149 
1150       c = *buffer->cur++;
1151     }
1152   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1153   while (is_nvspace (c));
1154 
1155   if (saw_NUL)
1156     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1157 
1158   buffer->cur--;
1159 }
1160 
1161 /* See if the characters of a number token are valid in a name (no
1162    '.', '+' or '-').  */
1163 static int
1164 name_p (cpp_reader *pfile, const cpp_string *string)
1165 {
1166   unsigned int i;
1167 
1168   for (i = 0; i < string->len; i++)
1169     if (!is_idchar (string->text[i]))
1170       return 0;
1171 
1172   return 1;
1173 }
1174 
1175 /* After parsing an identifier or other sequence, produce a warning about
1176    sequences not in NFC/NFKC.  */
1177 static void
1178 warn_about_normalization (cpp_reader *pfile,
1179 			  const cpp_token *token,
1180 			  const struct normalize_state *s)
1181 {
1182   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1183       && !pfile->state.skipping)
1184     {
1185       /* Make sure that the token is printed using UCNs, even
1186 	 if we'd otherwise happily print UTF-8.  */
1187       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1188       size_t sz;
1189 
1190       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1191       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1192 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1193 			       "`%.*s' is not in NFKC", (int) sz, buf);
1194       else
1195 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1196 			       "`%.*s' is not in NFC", (int) sz, buf);
1197       free (buf);
1198     }
1199 }
1200 
1201 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1202    an identifier.  FIRST is TRUE if this starts an identifier.  */
1203 static bool
1204 forms_identifier_p (cpp_reader *pfile, int first,
1205 		    struct normalize_state *state)
1206 {
1207   cpp_buffer *buffer = pfile->buffer;
1208 
1209   if (*buffer->cur == '$')
1210     {
1211       if (!CPP_OPTION (pfile, dollars_in_ident))
1212 	return false;
1213 
1214       buffer->cur++;
1215       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1216 	{
1217 	  CPP_OPTION (pfile, warn_dollars) = 0;
1218 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1219 	}
1220 
1221       return true;
1222     }
1223 
1224   /* Is this a syntactically valid UCN?  */
1225   if (CPP_OPTION (pfile, extended_identifiers)
1226       && *buffer->cur == '\\'
1227       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1228     {
1229       buffer->cur += 2;
1230       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1231 			  state))
1232 	return true;
1233       buffer->cur -= 2;
1234     }
1235 
1236   return false;
1237 }
1238 
1239 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1240 static cpp_hashnode *
1241 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1242 {
1243   cpp_hashnode *result;
1244   const uchar *cur;
1245   unsigned int len;
1246   unsigned int hash = HT_HASHSTEP (0, *base);
1247 
1248   cur = base + 1;
1249   while (ISIDNUM (*cur))
1250     {
1251       hash = HT_HASHSTEP (hash, *cur);
1252       cur++;
1253     }
1254   len = cur - base;
1255   hash = HT_HASHFINISH (hash, len);
1256   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1257 					      base, len, hash, HT_ALLOC));
1258 
1259   /* Rarely, identifiers require diagnostics when lexed.  */
1260   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1261 			&& !pfile->state.skipping, 0))
1262     {
1263       /* It is allowed to poison the same identifier twice.  */
1264       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1265 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1266 		   NODE_NAME (result));
1267 
1268       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1269 	 replacement list of a variadic macro.  */
1270       if (result == pfile->spec_nodes.n__VA_ARGS__
1271 	  && !pfile->state.va_args_ok)
1272 	{
1273 	  if (CPP_OPTION (pfile, cplusplus))
1274 	    cpp_error (pfile, CPP_DL_PEDWARN,
1275 		       "__VA_ARGS__ can only appear in the expansion"
1276 		       " of a C++11 variadic macro");
1277 	  else
1278 	    cpp_error (pfile, CPP_DL_PEDWARN,
1279 		       "__VA_ARGS__ can only appear in the expansion"
1280 		       " of a C99 variadic macro");
1281 	}
1282 
1283       /* For -Wc++-compat, warn about use of C++ named operators.  */
1284       if (result->flags & NODE_WARN_OPERATOR)
1285 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1286 		     "identifier \"%s\" is a special operator name in C++",
1287 		     NODE_NAME (result));
1288     }
1289 
1290   return result;
1291 }
1292 
1293 /* Get the cpp_hashnode of an identifier specified by NAME in
1294    the current cpp_reader object.  If none is found, NULL is returned.  */
1295 cpp_hashnode *
1296 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1297 {
1298   cpp_hashnode *result;
1299   result = lex_identifier_intern (pfile, (uchar *) name);
1300   return result;
1301 }
1302 
1303 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1304 static cpp_hashnode *
1305 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1306 		struct normalize_state *nst, cpp_hashnode **spelling)
1307 {
1308   cpp_hashnode *result;
1309   const uchar *cur;
1310   unsigned int len;
1311   unsigned int hash = HT_HASHSTEP (0, *base);
1312 
1313   cur = pfile->buffer->cur;
1314   if (! starts_ucn)
1315     {
1316       while (ISIDNUM (*cur))
1317 	{
1318 	  hash = HT_HASHSTEP (hash, *cur);
1319 	  cur++;
1320 	}
1321       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1322     }
1323   pfile->buffer->cur = cur;
1324   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1325     {
1326       /* Slower version for identifiers containing UCNs (or $).  */
1327       do {
1328 	while (ISIDNUM (*pfile->buffer->cur))
1329 	  {
1330 	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1331 	    pfile->buffer->cur++;
1332 	  }
1333       } while (forms_identifier_p (pfile, false, nst));
1334       result = _cpp_interpret_identifier (pfile, base,
1335 					  pfile->buffer->cur - base);
1336       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1337     }
1338   else
1339     {
1340       len = cur - base;
1341       hash = HT_HASHFINISH (hash, len);
1342 
1343       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1344 						  base, len, hash, HT_ALLOC));
1345       *spelling = result;
1346     }
1347 
1348   /* Rarely, identifiers require diagnostics when lexed.  */
1349   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1350 			&& !pfile->state.skipping, 0))
1351     {
1352       /* It is allowed to poison the same identifier twice.  */
1353       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1354 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1355 		   NODE_NAME (result));
1356 
1357       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1358 	 replacement list of a variadic macro.  */
1359       if (result == pfile->spec_nodes.n__VA_ARGS__
1360 	  && !pfile->state.va_args_ok)
1361 	{
1362 	  if (CPP_OPTION (pfile, cplusplus))
1363 	    cpp_error (pfile, CPP_DL_PEDWARN,
1364 		       "__VA_ARGS__ can only appear in the expansion"
1365 		       " of a C++11 variadic macro");
1366 	  else
1367 	    cpp_error (pfile, CPP_DL_PEDWARN,
1368 		       "__VA_ARGS__ can only appear in the expansion"
1369 		       " of a C99 variadic macro");
1370 	}
1371 
1372       /* For -Wc++-compat, warn about use of C++ named operators.  */
1373       if (result->flags & NODE_WARN_OPERATOR)
1374 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1375 		     "identifier \"%s\" is a special operator name in C++",
1376 		     NODE_NAME (result));
1377     }
1378 
1379   return result;
1380 }
1381 
1382 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1383 static void
1384 lex_number (cpp_reader *pfile, cpp_string *number,
1385 	    struct normalize_state *nst)
1386 {
1387   const uchar *cur;
1388   const uchar *base;
1389   uchar *dest;
1390 
1391   base = pfile->buffer->cur - 1;
1392   do
1393     {
1394       cur = pfile->buffer->cur;
1395 
1396       /* N.B. ISIDNUM does not include $.  */
1397       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1398 	     || VALID_SIGN (*cur, cur[-1]))
1399 	{
1400 	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1401 	  cur++;
1402 	}
1403       /* A number can't end with a digit separator.  */
1404       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1405 	--cur;
1406 
1407       pfile->buffer->cur = cur;
1408     }
1409   while (forms_identifier_p (pfile, false, nst));
1410 
1411   number->len = cur - base;
1412   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1413   memcpy (dest, base, number->len);
1414   dest[number->len] = '\0';
1415   number->text = dest;
1416 }
1417 
1418 /* Create a token of type TYPE with a literal spelling.  */
1419 static void
1420 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1421 		unsigned int len, enum cpp_ttype type)
1422 {
1423   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1424 
1425   memcpy (dest, base, len);
1426   dest[len] = '\0';
1427   token->type = type;
1428   token->val.str.len = len;
1429   token->val.str.text = dest;
1430 }
1431 
1432 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1433    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1434 
1435 static void
1436 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1437 		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1438 {
1439   _cpp_buff *first_buff = *first_buff_p;
1440   _cpp_buff *last_buff = *last_buff_p;
1441 
1442   if (first_buff == NULL)
1443     first_buff = last_buff = _cpp_get_buff (pfile, len);
1444   else if (len > BUFF_ROOM (last_buff))
1445     {
1446       size_t room = BUFF_ROOM (last_buff);
1447       memcpy (BUFF_FRONT (last_buff), base, room);
1448       BUFF_FRONT (last_buff) += room;
1449       base += room;
1450       len -= room;
1451       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1452     }
1453 
1454   memcpy (BUFF_FRONT (last_buff), base, len);
1455   BUFF_FRONT (last_buff) += len;
1456 
1457   *first_buff_p = first_buff;
1458   *last_buff_p = last_buff;
1459 }
1460 
1461 
1462 /* Returns true if a macro has been defined.
1463    This might not work if compile with -save-temps,
1464    or preprocess separately from compilation.  */
1465 
1466 static bool
1467 is_macro(cpp_reader *pfile, const uchar *base)
1468 {
1469   const uchar *cur = base;
1470   if (! ISIDST (*cur))
1471     return false;
1472   unsigned int hash = HT_HASHSTEP (0, *cur);
1473   ++cur;
1474   while (ISIDNUM (*cur))
1475     {
1476       hash = HT_HASHSTEP (hash, *cur);
1477       ++cur;
1478     }
1479   hash = HT_HASHFINISH (hash, cur - base);
1480 
1481   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1482 					base, cur - base, hash, HT_NO_INSERT));
1483 
1484   return !result ? false : (result->type == NT_MACRO);
1485 }
1486 
1487 
1488 /* Lexes a raw string.  The stored string contains the spelling, including
1489    double quotes, delimiter string, '(' and ')', any leading
1490    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1491    literal, or CPP_OTHER if it was not properly terminated.
1492 
1493    The spelling is NUL-terminated, but it is not guaranteed that this
1494    is the first NUL since embedded NULs are preserved.  */
1495 
1496 static void
1497 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1498 		const uchar *cur)
1499 {
1500   uchar raw_prefix[17];
1501   uchar temp_buffer[18];
1502   const uchar *orig_base;
1503   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1504   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1505   raw_str_phase phase = RAW_STR_PREFIX;
1506   enum cpp_ttype type;
1507   size_t total_len = 0;
1508   /* Index into temp_buffer during phases other than RAW_STR,
1509      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1510      be appended to temp_buffer.  */
1511   size_t temp_buffer_len = 0;
1512   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1513   size_t raw_prefix_start;
1514   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1515 
1516   type = (*base == 'L' ? CPP_WSTRING :
1517 	  *base == 'U' ? CPP_STRING32 :
1518 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1519 	  : CPP_STRING);
1520 
1521 #define BUF_APPEND(STR,LEN)					\
1522       do {							\
1523 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
1524 			&first_buff, &last_buff);		\
1525 	total_len += (LEN);					\
1526 	if (__builtin_expect (temp_buffer_len < 17, 0)		\
1527 	    && (const uchar *)(STR) != base			\
1528 	    && (LEN) <= 2)					\
1529 	  {							\
1530 	    memcpy (temp_buffer + temp_buffer_len,		\
1531 		    (const uchar *)(STR), (LEN));		\
1532 	    temp_buffer_len += (LEN);				\
1533 	  }							\
1534       } while (0);
1535 
1536   orig_base = base;
1537   ++cur;
1538   raw_prefix_start = cur - base;
1539   for (;;)
1540     {
1541       cppchar_t c;
1542 
1543       /* If we previously performed any trigraph or line splicing
1544 	 transformations, undo them in between the opening and closing
1545 	 double quote.  */
1546       while (note->pos < cur)
1547 	++note;
1548       for (; note->pos == cur; ++note)
1549 	{
1550 	  switch (note->type)
1551 	    {
1552 	    case '\\':
1553 	    case ' ':
1554 	      /* Restore backslash followed by newline.  */
1555 	      BUF_APPEND (base, cur - base);
1556 	      base = cur;
1557 	      BUF_APPEND ("\\", 1);
1558 	    after_backslash:
1559 	      if (note->type == ' ')
1560 		{
1561 		  /* GNU backslash whitespace newline extension.  FIXME
1562 		     could be any sequence of non-vertical space.  When we
1563 		     can properly restore any such sequence, we should mark
1564 		     this note as handled so _cpp_process_line_notes
1565 		     doesn't warn.  */
1566 		  BUF_APPEND (" ", 1);
1567 		}
1568 
1569 	      BUF_APPEND ("\n", 1);
1570 	      break;
1571 
1572 	    case 0:
1573 	      /* Already handled.  */
1574 	      break;
1575 
1576 	    default:
1577 	      if (_cpp_trigraph_map[note->type])
1578 		{
1579 		  /* Don't warn about this trigraph in
1580 		     _cpp_process_line_notes, since trigraphs show up as
1581 		     trigraphs in raw strings.  */
1582 		  uchar type = note->type;
1583 		  note->type = 0;
1584 
1585 		  if (!CPP_OPTION (pfile, trigraphs))
1586 		    /* If we didn't convert the trigraph in the first
1587 		       place, don't do anything now either.  */
1588 		    break;
1589 
1590 		  BUF_APPEND (base, cur - base);
1591 		  base = cur;
1592 		  BUF_APPEND ("??", 2);
1593 
1594 		  /* ??/ followed by newline gets two line notes, one for
1595 		     the trigraph and one for the backslash/newline.  */
1596 		  if (type == '/' && note[1].pos == cur)
1597 		    {
1598 		      if (note[1].type != '\\'
1599 			  && note[1].type != ' ')
1600 			abort ();
1601 		      BUF_APPEND ("/", 1);
1602 		      ++note;
1603 		      goto after_backslash;
1604 		    }
1605 		  else
1606 		    {
1607 		      /* Skip the replacement character.  */
1608 		      base = ++cur;
1609 		      BUF_APPEND (&type, 1);
1610 		      c = type;
1611 		      goto check_c;
1612 		    }
1613 		}
1614 	      else
1615 		abort ();
1616 	      break;
1617 	    }
1618 	}
1619       c = *cur++;
1620       if (__builtin_expect (temp_buffer_len < 17, 0))
1621 	temp_buffer[temp_buffer_len++] = c;
1622 
1623      check_c:
1624       if (phase == RAW_STR_PREFIX)
1625 	{
1626 	  while (raw_prefix_len < temp_buffer_len)
1627 	    {
1628 	      raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1629 	      switch (raw_prefix[raw_prefix_len])
1630 		{
1631 		case ' ': case '(': case ')': case '\\': case '\t':
1632 		case '\v': case '\f': case '\n': default:
1633 		  break;
1634 		/* Basic source charset except the above chars.  */
1635 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1636 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1637 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1638 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1639 		case 'y': case 'z':
1640 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1641 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1642 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1643 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1644 		case 'Y': case 'Z':
1645 		case '0': case '1': case '2': case '3': case '4': case '5':
1646 		case '6': case '7': case '8': case '9':
1647 		case '_': case '{': case '}': case '#': case '[': case ']':
1648 		case '<': case '>': case '%': case ':': case ';': case '.':
1649 		case '?': case '*': case '+': case '-': case '/': case '^':
1650 		case '&': case '|': case '~': case '!': case '=': case ',':
1651 		case '"': case '\'':
1652 		  if (raw_prefix_len < 16)
1653 		    {
1654 		      raw_prefix_len++;
1655 		      continue;
1656 		    }
1657 		  break;
1658 		}
1659 
1660 	      if (raw_prefix[raw_prefix_len] != '(')
1661 		{
1662 		  int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1663 		  if (raw_prefix_len == 16)
1664 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1665 					 col, "raw string delimiter longer "
1666 					      "than 16 characters");
1667 		  else if (raw_prefix[raw_prefix_len] == '\n')
1668 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1669 					 col, "invalid new-line in raw "
1670 					      "string delimiter");
1671 		  else
1672 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1673 					 col, "invalid character '%c' in "
1674 					      "raw string delimiter",
1675 					 (int) raw_prefix[raw_prefix_len]);
1676 		  pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1677 		  create_literal (pfile, token, orig_base,
1678 				  raw_prefix_start - 1, CPP_OTHER);
1679 		  if (first_buff)
1680 		    _cpp_release_buff (pfile, first_buff);
1681 		  return;
1682 		}
1683 	      raw_prefix[raw_prefix_len] = '"';
1684 	      phase = RAW_STR;
1685 	      /* Nothing should be appended to temp_buffer during
1686 		 RAW_STR phase.  */
1687 	      temp_buffer_len = 17;
1688 	      break;
1689 	    }
1690 	  continue;
1691 	}
1692       else if (phase == RAW_STR_SUFFIX)
1693 	{
1694 	  while (raw_suffix_len <= raw_prefix_len
1695 		 && raw_suffix_len < temp_buffer_len
1696 		 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1697 	    raw_suffix_len++;
1698 	  if (raw_suffix_len > raw_prefix_len)
1699 	    break;
1700 	  if (raw_suffix_len == temp_buffer_len)
1701 	    continue;
1702 	  phase = RAW_STR;
1703 	  /* Nothing should be appended to temp_buffer during
1704 	     RAW_STR phase.  */
1705 	  temp_buffer_len = 17;
1706 	}
1707       if (c == ')')
1708 	{
1709 	  phase = RAW_STR_SUFFIX;
1710 	  raw_suffix_len = 0;
1711 	  temp_buffer_len = 0;
1712 	}
1713       else if (c == '\n')
1714 	{
1715 	  if (pfile->state.in_directive
1716 	      || (pfile->state.parsing_args
1717 		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
1718 	    {
1719 	      cur--;
1720 	      type = CPP_OTHER;
1721 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1722 				   "unterminated raw string");
1723 	      break;
1724 	    }
1725 
1726 	  BUF_APPEND (base, cur - base);
1727 
1728 	  if (pfile->buffer->cur < pfile->buffer->rlimit)
1729 	    CPP_INCREMENT_LINE (pfile, 0);
1730 	  pfile->buffer->need_line = true;
1731 
1732 	  pfile->buffer->cur = cur-1;
1733 	  _cpp_process_line_notes (pfile, false);
1734 	  if (!_cpp_get_fresh_line (pfile))
1735 	    {
1736 	      source_location src_loc = token->src_loc;
1737 	      token->type = CPP_EOF;
1738 	      /* Tell the compiler the line number of the EOF token.  */
1739 	      token->src_loc = pfile->line_table->highest_line;
1740 	      token->flags = BOL;
1741 	      if (first_buff != NULL)
1742 		_cpp_release_buff (pfile, first_buff);
1743 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1744 				   "unterminated raw string");
1745 	      return;
1746 	    }
1747 
1748 	  cur = base = pfile->buffer->cur;
1749 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
1750 	}
1751     }
1752 
1753   if (CPP_OPTION (pfile, user_literals))
1754     {
1755       /* If a string format macro, say from inttypes.h, is placed touching
1756 	 a string literal it could be parsed as a C++11 user-defined string
1757 	 literal thus breaking the program.
1758 	 Try to identify macros with is_macro. A warning is issued. */
1759       if (is_macro (pfile, cur))
1760 	{
1761 	  /* Raise a warning, but do not consume subsequent tokens.  */
1762 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1763 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1764 				   token->src_loc, 0,
1765 				   "invalid suffix on literal; C++11 requires "
1766 				   "a space between literal and string macro");
1767 	}
1768       /* Grab user defined literal suffix.  */
1769       else if (ISIDST (*cur))
1770 	{
1771 	  type = cpp_userdef_string_add_type (type);
1772 	  ++cur;
1773 
1774 	  while (ISIDNUM (*cur))
1775 	    ++cur;
1776 	}
1777     }
1778 
1779   pfile->buffer->cur = cur;
1780   if (first_buff == NULL)
1781     create_literal (pfile, token, base, cur - base, type);
1782   else
1783     {
1784       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1785 
1786       token->type = type;
1787       token->val.str.len = total_len + (cur - base);
1788       token->val.str.text = dest;
1789       last_buff = first_buff;
1790       while (last_buff != NULL)
1791 	{
1792 	  memcpy (dest, last_buff->base,
1793 		  BUFF_FRONT (last_buff) - last_buff->base);
1794 	  dest += BUFF_FRONT (last_buff) - last_buff->base;
1795 	  last_buff = last_buff->next;
1796 	}
1797       _cpp_release_buff (pfile, first_buff);
1798       memcpy (dest, base, cur - base);
1799       dest[cur - base] = '\0';
1800     }
1801 }
1802 
1803 /* Lexes a string, character constant, or angle-bracketed header file
1804    name.  The stored string contains the spelling, including opening
1805    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1806    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1807    if it was not properly terminated, or CPP_LESS for an unterminated
1808    header name which must be relexed as normal tokens.
1809 
1810    The spelling is NUL-terminated, but it is not guaranteed that this
1811    is the first NUL since embedded NULs are preserved.  */
1812 static void
1813 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1814 {
1815   bool saw_NUL = false;
1816   const uchar *cur;
1817   cppchar_t terminator;
1818   enum cpp_ttype type;
1819 
1820   cur = base;
1821   terminator = *cur++;
1822   if (terminator == 'L' || terminator == 'U')
1823     terminator = *cur++;
1824   else if (terminator == 'u')
1825     {
1826       terminator = *cur++;
1827       if (terminator == '8')
1828 	terminator = *cur++;
1829     }
1830   if (terminator == 'R')
1831     {
1832       lex_raw_string (pfile, token, base, cur);
1833       return;
1834     }
1835   if (terminator == '"')
1836     type = (*base == 'L' ? CPP_WSTRING :
1837 	    *base == 'U' ? CPP_STRING32 :
1838 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1839 			 : CPP_STRING);
1840   else if (terminator == '\'')
1841     type = (*base == 'L' ? CPP_WCHAR :
1842 	    *base == 'U' ? CPP_CHAR32 :
1843 	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1844   else
1845     terminator = '>', type = CPP_HEADER_NAME;
1846 
1847   for (;;)
1848     {
1849       cppchar_t c = *cur++;
1850 
1851       /* In #include-style directives, terminators are not escapable.  */
1852       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1853 	cur++;
1854       else if (c == terminator)
1855 	break;
1856       else if (c == '\n')
1857 	{
1858 	  cur--;
1859 	  /* Unmatched quotes always yield undefined behavior, but
1860 	     greedy lexing means that what appears to be an unterminated
1861 	     header name may actually be a legitimate sequence of tokens.  */
1862 	  if (terminator == '>')
1863 	    {
1864 	      token->type = CPP_LESS;
1865 	      return;
1866 	    }
1867 	  type = CPP_OTHER;
1868 	  break;
1869 	}
1870       else if (c == '\0')
1871 	saw_NUL = true;
1872     }
1873 
1874   if (saw_NUL && !pfile->state.skipping)
1875     cpp_error (pfile, CPP_DL_WARNING,
1876 	       "null character(s) preserved in literal");
1877 
1878   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1879     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1880 	       (int) terminator);
1881 
1882   if (CPP_OPTION (pfile, user_literals))
1883     {
1884       /* If a string format macro, say from inttypes.h, is placed touching
1885 	 a string literal it could be parsed as a C++11 user-defined string
1886 	 literal thus breaking the program.
1887 	 Try to identify macros with is_macro. A warning is issued. */
1888       if (is_macro (pfile, cur))
1889 	{
1890 	  /* Raise a warning, but do not consume subsequent tokens.  */
1891 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1892 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1893 				   token->src_loc, 0,
1894 				   "invalid suffix on literal; C++11 requires "
1895 				   "a space between literal and string macro");
1896 	}
1897       /* Grab user defined literal suffix.  */
1898       else if (ISIDST (*cur))
1899 	{
1900 	  type = cpp_userdef_char_add_type (type);
1901 	  type = cpp_userdef_string_add_type (type);
1902           ++cur;
1903 
1904 	  while (ISIDNUM (*cur))
1905 	    ++cur;
1906 	}
1907     }
1908 
1909   pfile->buffer->cur = cur;
1910   create_literal (pfile, token, base, cur - base, type);
1911 }
1912 
1913 /* Return the comment table. The client may not make any assumption
1914    about the ordering of the table.  */
1915 cpp_comment_table *
1916 cpp_get_comments (cpp_reader *pfile)
1917 {
1918   return &pfile->comments;
1919 }
1920 
1921 /* Append a comment to the end of the comment table. */
1922 static void
1923 store_comment (cpp_reader *pfile, cpp_token *token)
1924 {
1925   int len;
1926 
1927   if (pfile->comments.allocated == 0)
1928     {
1929       pfile->comments.allocated = 256;
1930       pfile->comments.entries = (cpp_comment *) xmalloc
1931 	(pfile->comments.allocated * sizeof (cpp_comment));
1932     }
1933 
1934   if (pfile->comments.count == pfile->comments.allocated)
1935     {
1936       pfile->comments.allocated *= 2;
1937       pfile->comments.entries = (cpp_comment *) xrealloc
1938 	(pfile->comments.entries,
1939 	 pfile->comments.allocated * sizeof (cpp_comment));
1940     }
1941 
1942   len = token->val.str.len;
1943 
1944   /* Copy comment. Note, token may not be NULL terminated. */
1945   pfile->comments.entries[pfile->comments.count].comment =
1946     (char *) xmalloc (sizeof (char) * (len + 1));
1947   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1948 	  token->val.str.text, len);
1949   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1950 
1951   /* Set source location. */
1952   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1953 
1954   /* Increment the count of entries in the comment table. */
1955   pfile->comments.count++;
1956 }
1957 
1958 /* The stored comment includes the comment start and any terminator.  */
1959 static void
1960 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1961 	      cppchar_t type)
1962 {
1963   unsigned char *buffer;
1964   unsigned int len, clen, i;
1965   int convert_to_c = (pfile->state.in_directive || pfile->state.parsing_args)
1966     && type == '/';
1967 
1968   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1969 
1970   /* C++ comments probably (not definitely) have moved past a new
1971      line, which we don't want to save in the comment.  */
1972   if (is_vspace (pfile->buffer->cur[-1]))
1973     len--;
1974 
1975   /* If we are currently in a directive or in argument parsing, then
1976      we need to store all C++ comments as C comments internally, and
1977      so we need to allocate a little extra space in that case.
1978 
1979      Note that the only time we encounter a directive here is
1980      when we are saving comments in a "#define".  */
1981   clen = convert_to_c ? len + 2 : len;
1982 
1983   buffer = _cpp_unaligned_alloc (pfile, clen);
1984 
1985   token->type = CPP_COMMENT;
1986   token->val.str.len = clen;
1987   token->val.str.text = buffer;
1988 
1989   buffer[0] = '/';
1990   memcpy (buffer + 1, from, len - 1);
1991 
1992   /* Finish conversion to a C comment, if necessary.  */
1993   if (convert_to_c)
1994     {
1995       buffer[1] = '*';
1996       buffer[clen - 2] = '*';
1997       buffer[clen - 1] = '/';
1998       /* As there can be in a C++ comments illegal sequences for C comments
1999          we need to filter them out.  */
2000       for (i = 2; i < (clen - 2); i++)
2001         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2002           buffer[i] = '|';
2003     }
2004 
2005   /* Finally store this comment for use by clients of libcpp. */
2006   store_comment (pfile, token);
2007 }
2008 
2009 /* Allocate COUNT tokens for RUN.  */
2010 void
2011 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2012 {
2013   run->base = XNEWVEC (cpp_token, count);
2014   run->limit = run->base + count;
2015   run->next = NULL;
2016 }
2017 
2018 /* Returns the next tokenrun, or creates one if there is none.  */
2019 static tokenrun *
2020 next_tokenrun (tokenrun *run)
2021 {
2022   if (run->next == NULL)
2023     {
2024       run->next = XNEW (tokenrun);
2025       run->next->prev = run;
2026       _cpp_init_tokenrun (run->next, 250);
2027     }
2028 
2029   return run->next;
2030 }
2031 
2032 /* Return the number of not yet processed token in a given
2033    context.  */
2034 int
2035 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2036 {
2037   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2038     return (LAST (context).token - FIRST (context).token);
2039   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2040 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2041     return (LAST (context).ptoken - FIRST (context).ptoken);
2042   else
2043       abort ();
2044 }
2045 
2046 /* Returns the token present at index INDEX in a given context.  If
2047    INDEX is zero, the next token to be processed is returned.  */
2048 static const cpp_token*
2049 _cpp_token_from_context_at (cpp_context *context, int index)
2050 {
2051   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2052     return &(FIRST (context).token[index]);
2053   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2054 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2055     return FIRST (context).ptoken[index];
2056  else
2057    abort ();
2058 }
2059 
2060 /* Look ahead in the input stream.  */
2061 const cpp_token *
2062 cpp_peek_token (cpp_reader *pfile, int index)
2063 {
2064   cpp_context *context = pfile->context;
2065   const cpp_token *peektok;
2066   int count;
2067 
2068   /* First, scan through any pending cpp_context objects.  */
2069   while (context->prev)
2070     {
2071       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2072 
2073       if (index < (int) sz)
2074         return _cpp_token_from_context_at (context, index);
2075       index -= (int) sz;
2076       context = context->prev;
2077     }
2078 
2079   /* We will have to read some new tokens after all (and do so
2080      without invalidating preceding tokens).  */
2081   count = index;
2082   pfile->keep_tokens++;
2083 
2084   /* For peeked tokens temporarily disable line_change reporting,
2085      until the tokens are parsed for real.  */
2086   void (*line_change) (cpp_reader *, const cpp_token *, int)
2087     = pfile->cb.line_change;
2088   pfile->cb.line_change = NULL;
2089 
2090   do
2091     {
2092       peektok = _cpp_lex_token (pfile);
2093       if (peektok->type == CPP_EOF)
2094 	{
2095 	  index--;
2096 	  break;
2097 	}
2098     }
2099   while (index--);
2100 
2101   _cpp_backup_tokens_direct (pfile, count - index);
2102   pfile->keep_tokens--;
2103   pfile->cb.line_change = line_change;
2104 
2105   return peektok;
2106 }
2107 
2108 /* Allocate a single token that is invalidated at the same time as the
2109    rest of the tokens on the line.  Has its line and col set to the
2110    same as the last lexed token, so that diagnostics appear in the
2111    right place.  */
2112 cpp_token *
2113 _cpp_temp_token (cpp_reader *pfile)
2114 {
2115   cpp_token *old, *result;
2116   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2117   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2118 
2119   old = pfile->cur_token - 1;
2120   /* Any pre-existing lookaheads must not be clobbered.  */
2121   if (la)
2122     {
2123       if (sz <= la)
2124         {
2125           tokenrun *next = next_tokenrun (pfile->cur_run);
2126 
2127           if (sz < la)
2128             memmove (next->base + 1, next->base,
2129                      (la - sz) * sizeof (cpp_token));
2130 
2131           next->base[0] = pfile->cur_run->limit[-1];
2132         }
2133 
2134       if (sz > 1)
2135         memmove (pfile->cur_token + 1, pfile->cur_token,
2136                  MIN (la, sz - 1) * sizeof (cpp_token));
2137     }
2138 
2139   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2140     {
2141       pfile->cur_run = next_tokenrun (pfile->cur_run);
2142       pfile->cur_token = pfile->cur_run->base;
2143     }
2144 
2145   result = pfile->cur_token++;
2146   result->src_loc = old->src_loc;
2147   return result;
2148 }
2149 
2150 /* Lex a token into RESULT (external interface).  Takes care of issues
2151    like directive handling, token lookahead, multiple include
2152    optimization and skipping.  */
2153 const cpp_token *
2154 _cpp_lex_token (cpp_reader *pfile)
2155 {
2156   cpp_token *result;
2157 
2158   for (;;)
2159     {
2160       if (pfile->cur_token == pfile->cur_run->limit)
2161 	{
2162 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
2163 	  pfile->cur_token = pfile->cur_run->base;
2164 	}
2165       /* We assume that the current token is somewhere in the current
2166 	 run.  */
2167       if (pfile->cur_token < pfile->cur_run->base
2168 	  || pfile->cur_token >= pfile->cur_run->limit)
2169 	abort ();
2170 
2171       if (pfile->lookaheads)
2172 	{
2173 	  pfile->lookaheads--;
2174 	  result = pfile->cur_token++;
2175 	}
2176       else
2177 	result = _cpp_lex_direct (pfile);
2178 
2179       if (result->flags & BOL)
2180 	{
2181 	  /* Is this a directive.  If _cpp_handle_directive returns
2182 	     false, it is an assembler #.  */
2183 	  if (result->type == CPP_HASH
2184 	      /* 6.10.3 p 11: Directives in a list of macro arguments
2185 		 gives undefined behavior.  This implementation
2186 		 handles the directive as normal.  */
2187 	      && pfile->state.parsing_args != 1)
2188 	    {
2189 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2190 		{
2191 		  if (pfile->directive_result.type == CPP_PADDING)
2192 		    continue;
2193 		  result = &pfile->directive_result;
2194 		}
2195 	    }
2196 	  else if (pfile->state.in_deferred_pragma)
2197 	    result = &pfile->directive_result;
2198 
2199 	  if (pfile->cb.line_change && !pfile->state.skipping)
2200 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2201 	}
2202 
2203       /* We don't skip tokens in directives.  */
2204       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2205 	break;
2206 
2207       /* Outside a directive, invalidate controlling macros.  At file
2208 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2209 	 get here and MI optimization works.  */
2210       pfile->mi_valid = false;
2211 
2212       if (!pfile->state.skipping || result->type == CPP_EOF)
2213 	break;
2214     }
2215 
2216   return result;
2217 }
2218 
2219 /* Returns true if a fresh line has been loaded.  */
2220 bool
2221 _cpp_get_fresh_line (cpp_reader *pfile)
2222 {
2223   int return_at_eof;
2224 
2225   /* We can't get a new line until we leave the current directive.  */
2226   if (pfile->state.in_directive)
2227     return false;
2228 
2229   for (;;)
2230     {
2231       cpp_buffer *buffer = pfile->buffer;
2232 
2233       if (!buffer->need_line)
2234 	return true;
2235 
2236       if (buffer->next_line < buffer->rlimit)
2237 	{
2238 	  _cpp_clean_line (pfile);
2239 	  return true;
2240 	}
2241 
2242       /* First, get out of parsing arguments state.  */
2243       if (pfile->state.parsing_args)
2244 	return false;
2245 
2246       /* End of buffer.  Non-empty files should end in a newline.  */
2247       if (buffer->buf != buffer->rlimit
2248 	  && buffer->next_line > buffer->rlimit
2249 	  && !buffer->from_stage3)
2250 	{
2251 	  /* Clip to buffer size.  */
2252 	  buffer->next_line = buffer->rlimit;
2253 	}
2254 
2255       return_at_eof = buffer->return_at_eof;
2256       _cpp_pop_buffer (pfile);
2257       if (pfile->buffer == NULL || return_at_eof)
2258 	return false;
2259     }
2260 }
2261 
2262 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
2263   do							\
2264     {							\
2265       result->type = ELSE_TYPE;				\
2266       if (*buffer->cur == CHAR)				\
2267 	buffer->cur++, result->type = THEN_TYPE;	\
2268     }							\
2269   while (0)
2270 
2271 /* Lex a token into pfile->cur_token, which is also incremented, to
2272    get diagnostics pointing to the correct location.
2273 
2274    Does not handle issues such as token lookahead, multiple-include
2275    optimization, directives, skipping etc.  This function is only
2276    suitable for use by _cpp_lex_token, and in special cases like
2277    lex_expansion_token which doesn't care for any of these issues.
2278 
2279    When meeting a newline, returns CPP_EOF if parsing a directive,
2280    otherwise returns to the start of the token buffer if permissible.
2281    Returns the location of the lexed token.  */
2282 cpp_token *
2283 _cpp_lex_direct (cpp_reader *pfile)
2284 {
2285   cppchar_t c;
2286   cpp_buffer *buffer;
2287   const unsigned char *comment_start;
2288   cpp_token *result = pfile->cur_token++;
2289 
2290  fresh_line:
2291   result->flags = 0;
2292   buffer = pfile->buffer;
2293   if (buffer->need_line)
2294     {
2295       if (pfile->state.in_deferred_pragma)
2296 	{
2297 	  result->type = CPP_PRAGMA_EOL;
2298 	  pfile->state.in_deferred_pragma = false;
2299 	  if (!pfile->state.pragma_allow_expansion)
2300 	    pfile->state.prevent_expansion--;
2301 	  return result;
2302 	}
2303       if (!_cpp_get_fresh_line (pfile))
2304 	{
2305 	  result->type = CPP_EOF;
2306 	  if (!pfile->state.in_directive)
2307 	    {
2308 	      /* Tell the compiler the line number of the EOF token.  */
2309 	      result->src_loc = pfile->line_table->highest_line;
2310 	      result->flags = BOL;
2311 	    }
2312 	  return result;
2313 	}
2314       if (!pfile->keep_tokens)
2315 	{
2316 	  pfile->cur_run = &pfile->base_run;
2317 	  result = pfile->base_run.base;
2318 	  pfile->cur_token = result + 1;
2319 	}
2320       result->flags = BOL;
2321       if (pfile->state.parsing_args == 2)
2322 	result->flags |= PREV_WHITE;
2323     }
2324   buffer = pfile->buffer;
2325  update_tokens_line:
2326   result->src_loc = pfile->line_table->highest_line;
2327 
2328  skipped_white:
2329   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2330       && !pfile->overlaid_buffer)
2331     {
2332       _cpp_process_line_notes (pfile, false);
2333       result->src_loc = pfile->line_table->highest_line;
2334     }
2335   c = *buffer->cur++;
2336 
2337   if (pfile->forced_token_location_p)
2338     result->src_loc = *pfile->forced_token_location_p;
2339   else
2340     result->src_loc = linemap_position_for_column (pfile->line_table,
2341 					  CPP_BUF_COLUMN (buffer, buffer->cur));
2342 
2343   switch (c)
2344     {
2345     case ' ': case '\t': case '\f': case '\v': case '\0':
2346       result->flags |= PREV_WHITE;
2347       skip_whitespace (pfile, c);
2348       goto skipped_white;
2349 
2350     case '\n':
2351       if (buffer->cur < buffer->rlimit)
2352 	CPP_INCREMENT_LINE (pfile, 0);
2353       buffer->need_line = true;
2354       goto fresh_line;
2355 
2356     case '0': case '1': case '2': case '3': case '4':
2357     case '5': case '6': case '7': case '8': case '9':
2358       {
2359 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2360 	result->type = CPP_NUMBER;
2361 	lex_number (pfile, &result->val.str, &nst);
2362 	warn_about_normalization (pfile, result, &nst);
2363 	break;
2364       }
2365 
2366     case 'L':
2367     case 'u':
2368     case 'U':
2369     case 'R':
2370       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2371 	 wide strings or raw strings.  */
2372       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2373 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2374 	{
2375 	  if ((*buffer->cur == '\'' && c != 'R')
2376 	      || *buffer->cur == '"'
2377 	      || (*buffer->cur == 'R'
2378 		  && c != 'R'
2379 		  && buffer->cur[1] == '"'
2380 		  && CPP_OPTION (pfile, rliterals))
2381 	      || (*buffer->cur == '8'
2382 		  && c == 'u'
2383 		  && (buffer->cur[1] == '"'
2384 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2385 			  && CPP_OPTION (pfile, rliterals)))))
2386 	    {
2387 	      lex_string (pfile, result, buffer->cur - 1);
2388 	      break;
2389 	    }
2390 	}
2391       /* Fall through.  */
2392 
2393     case '_':
2394     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2395     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2396     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2397     case 's': case 't':           case 'v': case 'w': case 'x':
2398     case 'y': case 'z':
2399     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2400     case 'G': case 'H': case 'I': case 'J': case 'K':
2401     case 'M': case 'N': case 'O': case 'P': case 'Q':
2402     case 'S': case 'T':           case 'V': case 'W': case 'X':
2403     case 'Y': case 'Z':
2404       result->type = CPP_NAME;
2405       {
2406 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2407 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2408 						&nst,
2409 						&result->val.node.spelling);
2410 	warn_about_normalization (pfile, result, &nst);
2411       }
2412 
2413       /* Convert named operators to their proper types.  */
2414       if (result->val.node.node->flags & NODE_OPERATOR)
2415 	{
2416 	  result->flags |= NAMED_OP;
2417 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2418 	}
2419       break;
2420 
2421     case '\'':
2422     case '"':
2423       lex_string (pfile, result, buffer->cur - 1);
2424       break;
2425 
2426     case '/':
2427       /* A potential block or line comment.  */
2428       comment_start = buffer->cur;
2429       c = *buffer->cur;
2430 
2431       if (c == '*')
2432 	{
2433 	  if (_cpp_skip_block_comment (pfile))
2434 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2435 	}
2436       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2437 	{
2438 	  /* Don't warn for system headers.  */
2439 	  if (cpp_in_system_header (pfile))
2440 	    ;
2441 	  /* Warn about comments if pedantically GNUC89, and not
2442 	     in system headers.  */
2443 	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2444 		   && CPP_PEDANTIC (pfile)
2445 		   && ! buffer->warned_cplusplus_comments)
2446 	    {
2447 	      cpp_error (pfile, CPP_DL_PEDWARN,
2448 			 "C++ style comments are not allowed in ISO C90");
2449 	      cpp_error (pfile, CPP_DL_PEDWARN,
2450 			 "(this will be reported only once per input file)");
2451 	      buffer->warned_cplusplus_comments = 1;
2452 	    }
2453 	  /* Or if specifically desired via -Wc90-c99-compat.  */
2454 	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2455 		   && ! CPP_OPTION (pfile, cplusplus)
2456 		   && ! buffer->warned_cplusplus_comments)
2457 	    {
2458 	      cpp_error (pfile, CPP_DL_WARNING,
2459 			 "C++ style comments are incompatible with C90");
2460 	      cpp_error (pfile, CPP_DL_WARNING,
2461 			 "(this will be reported only once per input file)");
2462 	      buffer->warned_cplusplus_comments = 1;
2463 	    }
2464 	  /* In C89/C94, C++ style comments are forbidden.  */
2465 	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2466 		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
2467 	    {
2468 	      /* But don't be confused about valid code such as
2469 	         - // immediately followed by *,
2470 		 - // in a preprocessing directive,
2471 		 - // in an #if 0 block.  */
2472 	      if (buffer->cur[1] == '*'
2473 		  || pfile->state.in_directive
2474 		  || pfile->state.skipping)
2475 		{
2476 		  result->type = CPP_DIV;
2477 		  break;
2478 		}
2479 	      else if (! buffer->warned_cplusplus_comments)
2480 		{
2481 		  cpp_error (pfile, CPP_DL_ERROR,
2482 			     "C++ style comments are not allowed in ISO C90");
2483 		  cpp_error (pfile, CPP_DL_ERROR,
2484 			     "(this will be reported only once per input "
2485 			     "file)");
2486 		  buffer->warned_cplusplus_comments = 1;
2487 		}
2488 	    }
2489 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2490 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2491 	}
2492       else if (c == '=')
2493 	{
2494 	  buffer->cur++;
2495 	  result->type = CPP_DIV_EQ;
2496 	  break;
2497 	}
2498       else
2499 	{
2500 	  result->type = CPP_DIV;
2501 	  break;
2502 	}
2503 
2504       if (!pfile->state.save_comments)
2505 	{
2506 	  result->flags |= PREV_WHITE;
2507 	  goto update_tokens_line;
2508 	}
2509 
2510       /* Save the comment as a token in its own right.  */
2511       save_comment (pfile, result, comment_start, c);
2512       break;
2513 
2514     case '<':
2515       if (pfile->state.angled_headers)
2516 	{
2517 	  lex_string (pfile, result, buffer->cur - 1);
2518 	  if (result->type != CPP_LESS)
2519 	    break;
2520 	}
2521 
2522       result->type = CPP_LESS;
2523       if (*buffer->cur == '=')
2524 	buffer->cur++, result->type = CPP_LESS_EQ;
2525       else if (*buffer->cur == '<')
2526 	{
2527 	  buffer->cur++;
2528 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2529 	}
2530       else if (CPP_OPTION (pfile, digraphs))
2531 	{
2532 	  if (*buffer->cur == ':')
2533 	    {
2534 	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2535 		 three characters are <:: and the subsequent character
2536 		 is neither : nor >, the < is treated as a preprocessor
2537 		 token by itself".  */
2538 	      if (CPP_OPTION (pfile, cplusplus)
2539 		  && CPP_OPTION (pfile, lang) != CLK_CXX98
2540 		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2541 		  && buffer->cur[1] == ':'
2542 		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2543 		break;
2544 
2545 	      buffer->cur++;
2546 	      result->flags |= DIGRAPH;
2547 	      result->type = CPP_OPEN_SQUARE;
2548 	    }
2549 	  else if (*buffer->cur == '%')
2550 	    {
2551 	      buffer->cur++;
2552 	      result->flags |= DIGRAPH;
2553 	      result->type = CPP_OPEN_BRACE;
2554 	    }
2555 	}
2556       break;
2557 
2558     case '>':
2559       result->type = CPP_GREATER;
2560       if (*buffer->cur == '=')
2561 	buffer->cur++, result->type = CPP_GREATER_EQ;
2562       else if (*buffer->cur == '>')
2563 	{
2564 	  buffer->cur++;
2565 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2566 	}
2567       break;
2568 
2569     case '%':
2570       result->type = CPP_MOD;
2571       if (*buffer->cur == '=')
2572 	buffer->cur++, result->type = CPP_MOD_EQ;
2573       else if (CPP_OPTION (pfile, digraphs))
2574 	{
2575 	  if (*buffer->cur == ':')
2576 	    {
2577 	      buffer->cur++;
2578 	      result->flags |= DIGRAPH;
2579 	      result->type = CPP_HASH;
2580 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
2581 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2582 	    }
2583 	  else if (*buffer->cur == '>')
2584 	    {
2585 	      buffer->cur++;
2586 	      result->flags |= DIGRAPH;
2587 	      result->type = CPP_CLOSE_BRACE;
2588 	    }
2589 	}
2590       break;
2591 
2592     case '.':
2593       result->type = CPP_DOT;
2594       if (ISDIGIT (*buffer->cur))
2595 	{
2596 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2597 	  result->type = CPP_NUMBER;
2598 	  lex_number (pfile, &result->val.str, &nst);
2599 	  warn_about_normalization (pfile, result, &nst);
2600 	}
2601       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2602 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
2603       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2604 	buffer->cur++, result->type = CPP_DOT_STAR;
2605       break;
2606 
2607     case '+':
2608       result->type = CPP_PLUS;
2609       if (*buffer->cur == '+')
2610 	buffer->cur++, result->type = CPP_PLUS_PLUS;
2611       else if (*buffer->cur == '=')
2612 	buffer->cur++, result->type = CPP_PLUS_EQ;
2613       break;
2614 
2615     case '-':
2616       result->type = CPP_MINUS;
2617       if (*buffer->cur == '>')
2618 	{
2619 	  buffer->cur++;
2620 	  result->type = CPP_DEREF;
2621 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2622 	    buffer->cur++, result->type = CPP_DEREF_STAR;
2623 	}
2624       else if (*buffer->cur == '-')
2625 	buffer->cur++, result->type = CPP_MINUS_MINUS;
2626       else if (*buffer->cur == '=')
2627 	buffer->cur++, result->type = CPP_MINUS_EQ;
2628       break;
2629 
2630     case '&':
2631       result->type = CPP_AND;
2632       if (*buffer->cur == '&')
2633 	buffer->cur++, result->type = CPP_AND_AND;
2634       else if (*buffer->cur == '=')
2635 	buffer->cur++, result->type = CPP_AND_EQ;
2636       break;
2637 
2638     case '|':
2639       result->type = CPP_OR;
2640       if (*buffer->cur == '|')
2641 	buffer->cur++, result->type = CPP_OR_OR;
2642       else if (*buffer->cur == '=')
2643 	buffer->cur++, result->type = CPP_OR_EQ;
2644       break;
2645 
2646     case ':':
2647       result->type = CPP_COLON;
2648       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2649 	buffer->cur++, result->type = CPP_SCOPE;
2650       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2651 	{
2652 	  buffer->cur++;
2653 	  result->flags |= DIGRAPH;
2654 	  result->type = CPP_CLOSE_SQUARE;
2655 	}
2656       break;
2657 
2658     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2659     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2660     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2661     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2662     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2663 
2664     case '?': result->type = CPP_QUERY; break;
2665     case '~': result->type = CPP_COMPL; break;
2666     case ',': result->type = CPP_COMMA; break;
2667     case '(': result->type = CPP_OPEN_PAREN; break;
2668     case ')': result->type = CPP_CLOSE_PAREN; break;
2669     case '[': result->type = CPP_OPEN_SQUARE; break;
2670     case ']': result->type = CPP_CLOSE_SQUARE; break;
2671     case '{': result->type = CPP_OPEN_BRACE; break;
2672     case '}': result->type = CPP_CLOSE_BRACE; break;
2673     case ';': result->type = CPP_SEMICOLON; break;
2674 
2675       /* @ is a punctuator in Objective-C.  */
2676     case '@': result->type = CPP_ATSIGN; break;
2677 
2678     case '$':
2679     case '\\':
2680       {
2681 	const uchar *base = --buffer->cur;
2682 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2683 
2684 	if (forms_identifier_p (pfile, true, &nst))
2685 	  {
2686 	    result->type = CPP_NAME;
2687 	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
2688 						    &result->val.node.spelling);
2689 	    warn_about_normalization (pfile, result, &nst);
2690 	    break;
2691 	  }
2692 	buffer->cur++;
2693       }
2694 
2695     default:
2696       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2697       break;
2698     }
2699 
2700   return result;
2701 }
2702 
2703 /* An upper bound on the number of bytes needed to spell TOKEN.
2704    Does not include preceding whitespace.  */
2705 unsigned int
2706 cpp_token_len (const cpp_token *token)
2707 {
2708   unsigned int len;
2709 
2710   switch (TOKEN_SPELL (token))
2711     {
2712     default:		len = 6;				break;
2713     case SPELL_LITERAL:	len = token->val.str.len;		break;
2714     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
2715     }
2716 
2717   return len;
2718 }
2719 
2720 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2721    Return the number of bytes read out of NAME.  (There are always
2722    10 bytes written to BUFFER.)  */
2723 
2724 static size_t
2725 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2726 {
2727   int j;
2728   int ucn_len = 0;
2729   int ucn_len_c;
2730   unsigned t;
2731   unsigned long utf32;
2732 
2733   /* Compute the length of the UTF-8 sequence.  */
2734   for (t = *name; t & 0x80; t <<= 1)
2735     ucn_len++;
2736 
2737   utf32 = *name & (0x7F >> ucn_len);
2738   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2739     {
2740       utf32 = (utf32 << 6) | (*++name & 0x3F);
2741 
2742       /* Ill-formed UTF-8.  */
2743       if ((*name & ~0x3F) != 0x80)
2744 	abort ();
2745     }
2746 
2747   *buffer++ = '\\';
2748   *buffer++ = 'U';
2749   for (j = 7; j >= 0; j--)
2750     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2751   return ucn_len;
2752 }
2753 
2754 /* Given a token TYPE corresponding to a digraph, return a pointer to
2755    the spelling of the digraph.  */
2756 static const unsigned char *
2757 cpp_digraph2name (enum cpp_ttype type)
2758 {
2759   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2760 }
2761 
2762 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2763    The buffer must already contain the enough space to hold the
2764    token's spelling.  Returns a pointer to the character after the
2765    last character written.  */
2766 unsigned char *
2767 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2768 {
2769   size_t i;
2770   const unsigned char *name = NODE_NAME (ident);
2771 
2772   for (i = 0; i < NODE_LEN (ident); i++)
2773     if (name[i] & ~0x7F)
2774       {
2775 	i += utf8_to_ucn (buffer, name + i) - 1;
2776 	buffer += 10;
2777       }
2778     else
2779       *buffer++ = name[i];
2780 
2781   return buffer;
2782 }
2783 
2784 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2785    already contain the enough space to hold the token's spelling.
2786    Returns a pointer to the character after the last character written.
2787    FORSTRING is true if this is to be the spelling after translation
2788    phase 1 (with the original spelling of extended identifiers), false
2789    if extended identifiers should always be written using UCNs (there is
2790    no option for always writing them in the internal UTF-8 form).
2791    FIXME: Would be nice if we didn't need the PFILE argument.  */
2792 unsigned char *
2793 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2794 		 unsigned char *buffer, bool forstring)
2795 {
2796   switch (TOKEN_SPELL (token))
2797     {
2798     case SPELL_OPERATOR:
2799       {
2800 	const unsigned char *spelling;
2801 	unsigned char c;
2802 
2803 	if (token->flags & DIGRAPH)
2804 	  spelling = cpp_digraph2name (token->type);
2805 	else if (token->flags & NAMED_OP)
2806 	  goto spell_ident;
2807 	else
2808 	  spelling = TOKEN_NAME (token);
2809 
2810 	while ((c = *spelling++) != '\0')
2811 	  *buffer++ = c;
2812       }
2813       break;
2814 
2815     spell_ident:
2816     case SPELL_IDENT:
2817       if (forstring)
2818 	{
2819 	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
2820 		  NODE_LEN (token->val.node.spelling));
2821 	  buffer += NODE_LEN (token->val.node.spelling);
2822 	}
2823       else
2824 	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2825       break;
2826 
2827     case SPELL_LITERAL:
2828       memcpy (buffer, token->val.str.text, token->val.str.len);
2829       buffer += token->val.str.len;
2830       break;
2831 
2832     case SPELL_NONE:
2833       cpp_error (pfile, CPP_DL_ICE,
2834 		 "unspellable token %s", TOKEN_NAME (token));
2835       break;
2836     }
2837 
2838   return buffer;
2839 }
2840 
2841 /* Returns TOKEN spelt as a null-terminated string.  The string is
2842    freed when the reader is destroyed.  Useful for diagnostics.  */
2843 unsigned char *
2844 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2845 {
2846   unsigned int len = cpp_token_len (token) + 1;
2847   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2848 
2849   end = cpp_spell_token (pfile, token, start, false);
2850   end[0] = '\0';
2851 
2852   return start;
2853 }
2854 
2855 /* Returns a pointer to a string which spells the token defined by
2856    TYPE and FLAGS.  Used by C front ends, which really should move to
2857    using cpp_token_as_text.  */
2858 const char *
2859 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2860 {
2861   if (flags & DIGRAPH)
2862     return (const char *) cpp_digraph2name (type);
2863   else if (flags & NAMED_OP)
2864     return cpp_named_operator2name (type);
2865 
2866   return (const char *) token_spellings[type].name;
2867 }
2868 
2869 /* Writes the spelling of token to FP, without any preceding space.
2870    Separated from cpp_spell_token for efficiency - to avoid stdio
2871    double-buffering.  */
2872 void
2873 cpp_output_token (const cpp_token *token, FILE *fp)
2874 {
2875   switch (TOKEN_SPELL (token))
2876     {
2877     case SPELL_OPERATOR:
2878       {
2879 	const unsigned char *spelling;
2880 	int c;
2881 
2882 	if (token->flags & DIGRAPH)
2883 	  spelling = cpp_digraph2name (token->type);
2884 	else if (token->flags & NAMED_OP)
2885 	  goto spell_ident;
2886 	else
2887 	  spelling = TOKEN_NAME (token);
2888 
2889 	c = *spelling;
2890 	do
2891 	  putc (c, fp);
2892 	while ((c = *++spelling) != '\0');
2893       }
2894       break;
2895 
2896     spell_ident:
2897     case SPELL_IDENT:
2898       {
2899 	size_t i;
2900 	const unsigned char * name = NODE_NAME (token->val.node.node);
2901 
2902 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2903 	  if (name[i] & ~0x7F)
2904 	    {
2905 	      unsigned char buffer[10];
2906 	      i += utf8_to_ucn (buffer, name + i) - 1;
2907 	      fwrite (buffer, 1, 10, fp);
2908 	    }
2909 	  else
2910 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
2911       }
2912       break;
2913 
2914     case SPELL_LITERAL:
2915       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2916       break;
2917 
2918     case SPELL_NONE:
2919       /* An error, most probably.  */
2920       break;
2921     }
2922 }
2923 
2924 /* Compare two tokens.  */
2925 int
2926 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2927 {
2928   if (a->type == b->type && a->flags == b->flags)
2929     switch (TOKEN_SPELL (a))
2930       {
2931       default:			/* Keep compiler happy.  */
2932       case SPELL_OPERATOR:
2933 	/* token_no is used to track where multiple consecutive ##
2934 	   tokens were originally located.  */
2935 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2936       case SPELL_NONE:
2937 	return (a->type != CPP_MACRO_ARG
2938 		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2939 		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2940       case SPELL_IDENT:
2941 	return (a->val.node.node == b->val.node.node
2942 		&& a->val.node.spelling == b->val.node.spelling);
2943       case SPELL_LITERAL:
2944 	return (a->val.str.len == b->val.str.len
2945 		&& !memcmp (a->val.str.text, b->val.str.text,
2946 			    a->val.str.len));
2947       }
2948 
2949   return 0;
2950 }
2951 
2952 /* Returns nonzero if a space should be inserted to avoid an
2953    accidental token paste for output.  For simplicity, it is
2954    conservative, and occasionally advises a space where one is not
2955    needed, e.g. "." and ".2".  */
2956 int
2957 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2958 		 const cpp_token *token2)
2959 {
2960   enum cpp_ttype a = token1->type, b = token2->type;
2961   cppchar_t c;
2962 
2963   if (token1->flags & NAMED_OP)
2964     a = CPP_NAME;
2965   if (token2->flags & NAMED_OP)
2966     b = CPP_NAME;
2967 
2968   c = EOF;
2969   if (token2->flags & DIGRAPH)
2970     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2971   else if (token_spellings[b].category == SPELL_OPERATOR)
2972     c = token_spellings[b].name[0];
2973 
2974   /* Quickly get everything that can paste with an '='.  */
2975   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2976     return 1;
2977 
2978   switch (a)
2979     {
2980     case CPP_GREATER:	return c == '>';
2981     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
2982     case CPP_PLUS:	return c == '+';
2983     case CPP_MINUS:	return c == '-' || c == '>';
2984     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
2985     case CPP_MOD:	return c == ':' || c == '>';
2986     case CPP_AND:	return c == '&';
2987     case CPP_OR:	return c == '|';
2988     case CPP_COLON:	return c == ':' || c == '>';
2989     case CPP_DEREF:	return c == '*';
2990     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
2991     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
2992     case CPP_NAME:	return ((b == CPP_NUMBER
2993 				 && name_p (pfile, &token2->val.str))
2994 				|| b == CPP_NAME
2995 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
2996     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
2997 				|| c == '.' || c == '+' || c == '-');
2998 				      /* UCNs */
2999     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
3000 				 && b == CPP_NAME)
3001 				|| (CPP_OPTION (pfile, objc)
3002 				    && token1->val.str.text[0] == '@'
3003 				    && (b == CPP_NAME || b == CPP_STRING)));
3004     case CPP_STRING:
3005     case CPP_WSTRING:
3006     case CPP_UTF8STRING:
3007     case CPP_STRING16:
3008     case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
3009 				&& (b == CPP_NAME
3010 				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
3011 					&& ISIDST (token2->val.str.text[0]))));
3012 
3013     default:		break;
3014     }
3015 
3016   return 0;
3017 }
3018 
3019 /* Output all the remaining tokens on the current line, and a newline
3020    character, to FP.  Leading whitespace is removed.  If there are
3021    macros, special token padding is not performed.  */
3022 void
3023 cpp_output_line (cpp_reader *pfile, FILE *fp)
3024 {
3025   const cpp_token *token;
3026 
3027   token = cpp_get_token (pfile);
3028   while (token->type != CPP_EOF)
3029     {
3030       cpp_output_token (token, fp);
3031       token = cpp_get_token (pfile);
3032       if (token->flags & PREV_WHITE)
3033 	putc (' ', fp);
3034     }
3035 
3036   putc ('\n', fp);
3037 }
3038 
3039 /* Return a string representation of all the remaining tokens on the
3040    current line.  The result is allocated using xmalloc and must be
3041    freed by the caller.  */
3042 unsigned char *
3043 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3044 {
3045   const cpp_token *token;
3046   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3047   unsigned int alloced = 120 + out;
3048   unsigned char *result = (unsigned char *) xmalloc (alloced);
3049 
3050   /* If DIR_NAME is empty, there are no initial contents.  */
3051   if (dir_name)
3052     {
3053       sprintf ((char *) result, "#%s ", dir_name);
3054       out += 2;
3055     }
3056 
3057   token = cpp_get_token (pfile);
3058   while (token->type != CPP_EOF)
3059     {
3060       unsigned char *last;
3061       /* Include room for a possible space and the terminating nul.  */
3062       unsigned int len = cpp_token_len (token) + 2;
3063 
3064       if (out + len > alloced)
3065 	{
3066 	  alloced *= 2;
3067 	  if (out + len > alloced)
3068 	    alloced = out + len;
3069 	  result = (unsigned char *) xrealloc (result, alloced);
3070 	}
3071 
3072       last = cpp_spell_token (pfile, token, &result[out], 0);
3073       out = last - result;
3074 
3075       token = cpp_get_token (pfile);
3076       if (token->flags & PREV_WHITE)
3077 	result[out++] = ' ';
3078     }
3079 
3080   result[out] = '\0';
3081   return result;
3082 }
3083 
3084 /* Memory buffers.  Changing these three constants can have a dramatic
3085    effect on performance.  The values here are reasonable defaults,
3086    but might be tuned.  If you adjust them, be sure to test across a
3087    range of uses of cpplib, including heavy nested function-like macro
3088    expansion.  Also check the change in peak memory usage (NJAMD is a
3089    good tool for this).  */
3090 #define MIN_BUFF_SIZE 8000
3091 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3092 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3093 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3094 
3095 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3096   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3097 #endif
3098 
3099 /* Create a new allocation buffer.  Place the control block at the end
3100    of the buffer, so that buffer overflows will cause immediate chaos.  */
3101 static _cpp_buff *
3102 new_buff (size_t len)
3103 {
3104   _cpp_buff *result;
3105   unsigned char *base;
3106 
3107   if (len < MIN_BUFF_SIZE)
3108     len = MIN_BUFF_SIZE;
3109   len = CPP_ALIGN (len);
3110 
3111 #ifdef ENABLE_VALGRIND_CHECKING
3112   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3113      struct first.  */
3114   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3115   base = XNEWVEC (unsigned char, len + slen);
3116   result = (_cpp_buff *) base;
3117   base += slen;
3118 #else
3119   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3120   result = (_cpp_buff *) (base + len);
3121 #endif
3122   result->base = base;
3123   result->cur = base;
3124   result->limit = base + len;
3125   result->next = NULL;
3126   return result;
3127 }
3128 
3129 /* Place a chain of unwanted allocation buffers on the free list.  */
3130 void
3131 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3132 {
3133   _cpp_buff *end = buff;
3134 
3135   while (end->next)
3136     end = end->next;
3137   end->next = pfile->free_buffs;
3138   pfile->free_buffs = buff;
3139 }
3140 
3141 /* Return a free buffer of size at least MIN_SIZE.  */
3142 _cpp_buff *
3143 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3144 {
3145   _cpp_buff *result, **p;
3146 
3147   for (p = &pfile->free_buffs;; p = &(*p)->next)
3148     {
3149       size_t size;
3150 
3151       if (*p == NULL)
3152 	return new_buff (min_size);
3153       result = *p;
3154       size = result->limit - result->base;
3155       /* Return a buffer that's big enough, but don't waste one that's
3156          way too big.  */
3157       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3158 	break;
3159     }
3160 
3161   *p = result->next;
3162   result->next = NULL;
3163   result->cur = result->base;
3164   return result;
3165 }
3166 
3167 /* Creates a new buffer with enough space to hold the uncommitted
3168    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3169    the excess bytes to the new buffer.  Chains the new buffer after
3170    BUFF, and returns the new buffer.  */
3171 _cpp_buff *
3172 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3173 {
3174   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3175   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3176 
3177   buff->next = new_buff;
3178   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3179   return new_buff;
3180 }
3181 
3182 /* Creates a new buffer with enough space to hold the uncommitted
3183    remaining bytes of the buffer pointed to by BUFF, and at least
3184    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3185    Chains the new buffer before the buffer pointed to by BUFF, and
3186    updates the pointer to point to the new buffer.  */
3187 void
3188 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3189 {
3190   _cpp_buff *new_buff, *old_buff = *pbuff;
3191   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3192 
3193   new_buff = _cpp_get_buff (pfile, size);
3194   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3195   new_buff->next = old_buff;
3196   *pbuff = new_buff;
3197 }
3198 
3199 /* Free a chain of buffers starting at BUFF.  */
3200 void
3201 _cpp_free_buff (_cpp_buff *buff)
3202 {
3203   _cpp_buff *next;
3204 
3205   for (; buff; buff = next)
3206     {
3207       next = buff->next;
3208 #ifdef ENABLE_VALGRIND_CHECKING
3209       free (buff);
3210 #else
3211       free (buff->base);
3212 #endif
3213     }
3214 }
3215 
3216 /* Allocate permanent, unaligned storage of length LEN.  */
3217 unsigned char *
3218 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3219 {
3220   _cpp_buff *buff = pfile->u_buff;
3221   unsigned char *result = buff->cur;
3222 
3223   if (len > (size_t) (buff->limit - result))
3224     {
3225       buff = _cpp_get_buff (pfile, len);
3226       buff->next = pfile->u_buff;
3227       pfile->u_buff = buff;
3228       result = buff->cur;
3229     }
3230 
3231   buff->cur = result + len;
3232   return result;
3233 }
3234 
3235 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3236    That buffer is used for growing allocations when saving macro
3237    replacement lists in a #define, and when parsing an answer to an
3238    assertion in #assert, #unassert or #if (and therefore possibly
3239    whilst expanding macros).  It therefore must not be used by any
3240    code that they might call: specifically the lexer and the guts of
3241    the macro expander.
3242 
3243    All existing other uses clearly fit this restriction: storing
3244    registered pragmas during initialization.  */
3245 unsigned char *
3246 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3247 {
3248   _cpp_buff *buff = pfile->a_buff;
3249   unsigned char *result = buff->cur;
3250 
3251   if (len > (size_t) (buff->limit - result))
3252     {
3253       buff = _cpp_get_buff (pfile, len);
3254       buff->next = pfile->a_buff;
3255       pfile->a_buff = buff;
3256       result = buff->cur;
3257     }
3258 
3259   buff->cur = result + len;
3260   return result;
3261 }
3262 
3263 /* Say which field of TOK is in use.  */
3264 
3265 enum cpp_token_fld_kind
3266 cpp_token_val_index (const cpp_token *tok)
3267 {
3268   switch (TOKEN_SPELL (tok))
3269     {
3270     case SPELL_IDENT:
3271       return CPP_TOKEN_FLD_NODE;
3272     case SPELL_LITERAL:
3273       return CPP_TOKEN_FLD_STR;
3274     case SPELL_OPERATOR:
3275       if (tok->type == CPP_PASTE)
3276 	return CPP_TOKEN_FLD_TOKEN_NO;
3277       else
3278 	return CPP_TOKEN_FLD_NONE;
3279     case SPELL_NONE:
3280       if (tok->type == CPP_MACRO_ARG)
3281 	return CPP_TOKEN_FLD_ARG_NO;
3282       else if (tok->type == CPP_PADDING)
3283 	return CPP_TOKEN_FLD_SOURCE;
3284       else if (tok->type == CPP_PRAGMA)
3285 	return CPP_TOKEN_FLD_PRAGMA;
3286       /* else fall through */
3287     default:
3288       return CPP_TOKEN_FLD_NONE;
3289     }
3290 }
3291 
3292 /* All tokens lexed in R after calling this function will be forced to have
3293    their source_location the same as the location referenced by P, until
3294    cpp_stop_forcing_token_locations is called for R.  */
3295 
3296 void
3297 cpp_force_token_locations (cpp_reader *r, source_location *p)
3298 {
3299   r->forced_token_location_p = p;
3300 }
3301 
3302 /* Go back to assigning locations naturally for lexed tokens.  */
3303 
3304 void
3305 cpp_stop_forcing_token_locations (cpp_reader *r)
3306 {
3307   r->forced_token_location_p = NULL;
3308 }
3309