xref: /netbsd-src/external/gpl3/gcc.old/dist/libcpp/lex.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000-2016 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7 
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26 
27 enum spell_type
28 {
29   SPELL_OPERATOR = 0,
30   SPELL_IDENT,
31   SPELL_LITERAL,
32   SPELL_NONE
33 };
34 
35 struct token_spelling
36 {
37   enum spell_type category;
38   const unsigned char *name;
39 };
40 
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43 
44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
45 #define TK(e, s) { SPELL_ ## s,    UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49 
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52 
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 			    unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
64 
65 static _cpp_buff *new_buff (size_t);
66 
67 
68 /* Utility routine:
69 
70    Compares, the token TOKEN to the NUL-terminated string STRING.
71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
72 int
73 cpp_ideq (const cpp_token *token, const char *string)
74 {
75   if (token->type != CPP_NAME)
76     return 0;
77 
78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
79 }
80 
81 /* Record a note TYPE at byte POS into the current cleaned logical
82    line.  */
83 static void
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 {
86   if (buffer->notes_used == buffer->notes_cap)
87     {
88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90                                   buffer->notes_cap);
91     }
92 
93   buffer->notes[buffer->notes_used].pos = pos;
94   buffer->notes[buffer->notes_used].type = type;
95   buffer->notes_used++;
96 }
97 
98 
99 /* Fast path to find line special characters using optimized character
100    scanning algorithms.  Anything complicated falls back to the slow
101    path below.  Since this loop is very hot it's worth doing these kinds
102    of optimizations.
103 
104    One of the paths through the ifdefs should provide
105 
106      const uchar *search_line_fast (const uchar *s, const uchar *end);
107 
108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
109    the found character.
110 
111    Note that the last character of the buffer is *always* a newline,
112    as forced by _cpp_convert_input.  This fact can be used to avoid
113    explicitly looking for the end of the buffer.  */
114 
115 /* Configure gives us an ifdef test.  */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
118 #endif
119 
120 /* We'd like the largest integer that fits into a register.  There's nothing
121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
123    can get the "real" word size.  */
124 #ifdef __GNUC__
125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
126 #else
127 typedef unsigned long word_type;
128 #endif
129 
130 /* The code below is only expecting sizes 4 or 8.
131    Die at compile-time if this expectation is violated.  */
132 typedef char check_word_type_size
133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134 
135 /* Return X with the first N bytes forced to values that won't match one
136    of the interesting characters.  Note that NUL is not interesting.  */
137 
138 static inline word_type
139 acc_char_mask_misalign (word_type val, unsigned int n)
140 {
141   word_type mask = -1;
142   if (WORDS_BIGENDIAN)
143     mask >>= n * 8;
144   else
145     mask <<= n * 8;
146   return val & mask;
147 }
148 
149 /* Return X replicated to all byte positions within WORD_TYPE.  */
150 
151 static inline word_type
152 acc_char_replicate (uchar x)
153 {
154   word_type ret;
155 
156   ret = (x << 24) | (x << 16) | (x << 8) | x;
157   if (sizeof(word_type) == 8)
158     ret = (ret << 16 << 16) | ret;
159   return ret;
160 }
161 
162 /* Return non-zero if some byte of VAL is (probably) C.  */
163 
164 static inline word_type
165 acc_char_cmp (word_type val, word_type c)
166 {
167 #if defined(__GNUC__) && defined(__alpha__)
168   /* We can get exact results using a compare-bytes instruction.
169      Get (val == c) via (0 >= (val ^ c)).  */
170   return __builtin_alpha_cmpbge (0, val ^ c);
171 #else
172   word_type magic = 0x7efefefeU;
173   if (sizeof(word_type) == 8)
174     magic = (magic << 16 << 16) | 0xfefefefeU;
175   magic |= 1;
176 
177   val ^= c;
178   return ((val + magic) ^ ~val) & ~magic;
179 #endif
180 }
181 
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183    the found character.  If this was a false positive, return -1.  */
184 
185 static inline int
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 		word_type val ATTRIBUTE_UNUSED)
188 {
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190   /* The cmpbge instruction sets *bits* of the result corresponding to
191      matches in the bytes with no false positives.  */
192   return __builtin_ctzl (cmp);
193 #else
194   unsigned int i;
195 
196   /* ??? It would be nice to force unrolling here,
197      and have all of these constants folded.  */
198   for (i = 0; i < sizeof(word_type); ++i)
199     {
200       uchar c;
201       if (WORDS_BIGENDIAN)
202 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203       else
204 	c = (val >> i * 8) & 0xff;
205 
206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 	return i;
208     }
209 
210   return -1;
211 #endif
212 }
213 
214 /* A version of the fast scanner using bit fiddling techniques.
215 
216    For 32-bit words, one would normally perform 16 comparisons and
217    16 branches.  With this algorithm one performs 24 arithmetic
218    operations and one branch.  Whether this is faster with a 32-bit
219    word size is going to be somewhat system dependent.
220 
221    For 64-bit words, we eliminate twice the number of comparisons
222    and branches without increasing the number of arithmetic operations.
223    It's almost certainly going to be a win with 64-bit word size.  */
224 
225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
226   ATTRIBUTE_UNUSED;
227 
228 static const uchar *
229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230 {
231   const word_type repl_nl = acc_char_replicate ('\n');
232   const word_type repl_cr = acc_char_replicate ('\r');
233   const word_type repl_bs = acc_char_replicate ('\\');
234   const word_type repl_qm = acc_char_replicate ('?');
235 
236   unsigned int misalign;
237   const word_type *p;
238   word_type val, t;
239 
240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242   val = *p;
243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244   if (misalign)
245     val = acc_char_mask_misalign (val, misalign);
246 
247   /* Main loop.  */
248   while (1)
249     {
250       t  = acc_char_cmp (val, repl_nl);
251       t |= acc_char_cmp (val, repl_cr);
252       t |= acc_char_cmp (val, repl_bs);
253       t |= acc_char_cmp (val, repl_qm);
254 
255       if (__builtin_expect (t != 0, 0))
256 	{
257 	  int i = acc_char_index (t, val);
258 	  if (i >= 0)
259 	    return (const uchar *)p + i;
260 	}
261 
262       val = *++p;
263     }
264 }
265 
266 /* Disable on Solaris 2/x86 until the following problem can be properly
267    autoconfed:
268 
269    The Solaris 10+ assembler tags objects with the instruction set
270    extensions used, so SSE4.2 executables cannot run on machines that
271    don't support that extension.  */
272 
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
274 
275 /* Replicated character data to be shared between implementations.
276    Recall that outside of a context with vector support we can't
277    define compatible vector types, therefore these are all defined
278    in terms of raw characters.  */
279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286   { '?', '?', '?', '?', '?', '?', '?', '?',
287     '?', '?', '?', '?', '?', '?', '?', '?' },
288 };
289 
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
291 
292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
293    which was packaged into SSE1; it is also present in the AMD MMX
294    extension.  Mark the function as using "sse" so that we emit a real
295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
296 
297 static const uchar *
298 #ifndef __SSE__
299 __attribute__((__target__("sse")))
300 #endif
301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302 {
303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305 
306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310 
311   unsigned int misalign, found, mask;
312   const v8qi *p;
313   v8qi data, t, c;
314 
315   /* Align the source pointer.  While MMX doesn't generate unaligned data
316      faults, this allows us to safely scan to the end of the buffer without
317      reading beyond the end of the last page.  */
318   misalign = (uintptr_t)s & 7;
319   p = (const v8qi *)((uintptr_t)s & -8);
320   data = *p;
321 
322   /* Create a mask for the bytes that are valid within the first
323      16-byte block.  The Idea here is that the AND with the mask
324      within the loop is "free", since we need some AND or TEST
325      insn in order to set the flags for the branch anyway.  */
326   mask = -1u << misalign;
327 
328   /* Main loop processing 8 bytes at a time.  */
329   goto start;
330   do
331     {
332       data = *++p;
333       mask = -1;
334 
335     start:
336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343       found = __builtin_ia32_pmovmskb (t);
344       found &= mask;
345     }
346   while (!found);
347 
348   __builtin_ia32_emms ();
349 
350   /* FOUND contains 1 in bits for which we matched a relevant
351      character.  Conversion to the byte index is trivial.  */
352   found = __builtin_ctz(found);
353   return (const uchar *)p + found;
354 }
355 
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
357 
358 static const uchar *
359 #ifndef __SSE2__
360 __attribute__((__target__("sse2")))
361 #endif
362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363 {
364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
365 
366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370 
371   unsigned int misalign, found, mask;
372   const v16qi *p;
373   v16qi data, t;
374 
375   /* Align the source pointer.  */
376   misalign = (uintptr_t)s & 15;
377   p = (const v16qi *)((uintptr_t)s & -16);
378   data = *p;
379 
380   /* Create a mask for the bytes that are valid within the first
381      16-byte block.  The Idea here is that the AND with the mask
382      within the loop is "free", since we need some AND or TEST
383      insn in order to set the flags for the branch anyway.  */
384   mask = -1u << misalign;
385 
386   /* Main loop processing 16 bytes at a time.  */
387   goto start;
388   do
389     {
390       data = *++p;
391       mask = -1;
392 
393     start:
394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
398       found = __builtin_ia32_pmovmskb128 (t);
399       found &= mask;
400     }
401   while (!found);
402 
403   /* FOUND contains 1 in bits for which we matched a relevant
404      character.  Conversion to the byte index is trivial.  */
405   found = __builtin_ctz(found);
406   return (const uchar *)p + found;
407 }
408 
409 #ifdef HAVE_SSE4
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
411 
412 static const uchar *
413 #ifndef __SSE4_2__
414 __attribute__((__target__("sse4.2")))
415 #endif
416 search_line_sse42 (const uchar *s, const uchar *end)
417 {
418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
419   static const v16qi search = { '\n', '\r', '?', '\\' };
420 
421   uintptr_t si = (uintptr_t)s;
422   uintptr_t index;
423 
424   /* Check for unaligned input.  */
425   if (si & 15)
426     {
427       v16qi sv;
428 
429       if (__builtin_expect (end - s < 16, 0)
430 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431 	{
432 	  /* There are less than 16 bytes left in the buffer, and less
433 	     than 16 bytes left on the page.  Reading 16 bytes at this
434 	     point might generate a spurious page fault.  Defer to the
435 	     SSE2 implementation, which already handles alignment.  */
436 	  return search_line_sse2 (s, end);
437 	}
438 
439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 	 memory need not be aligned.  */
441       sv = __builtin_ia32_loaddqu ((const char *) s);
442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443 
444       if (__builtin_expect (index < 16, 0))
445 	goto found;
446 
447       /* Advance the pointer to an aligned address.  We will re-scan a
448 	 few bytes, but we no longer need care for reading past the
449 	 end of a page, since we're guaranteed a match.  */
450       s = (const uchar *)((si + 15) & -16);
451     }
452 
453   /* Main loop, processing 16 bytes at a time.  */
454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
455   while (1)
456     {
457       char f;
458 
459       /* By using inline assembly instead of the builtin,
460 	 we can use the result, as well as the flags set.  */
461       __asm ("%vpcmpestri\t$0, %2, %3"
462 	     : "=c"(index), "=@ccc"(f)
463 	     : "m"(*s), "x"(search), "a"(4), "d"(16));
464       if (f)
465 	break;
466 
467       s += 16;
468     }
469 #else
470   s -= 16;
471   /* By doing the whole loop in inline assembly,
472      we can make proper use of the flags set.  */
473   __asm (      ".balign 16\n"
474 	"0:	add $16, %1\n"
475 	"	%vpcmpestri\t$0, (%1), %2\n"
476 	"	jnc 0b"
477 	: "=&c"(index), "+r"(s)
478 	: "x"(search), "a"(4), "d"(16));
479 #endif
480 
481  found:
482   return s + index;
483 }
484 
485 #else
486 /* Work around out-dated assemblers without sse4 support.  */
487 #define search_line_sse42 search_line_sse2
488 #endif
489 
490 /* Check the CPU capabilities.  */
491 
492 #include "../gcc/config/i386/cpuid.h"
493 
494 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
495 static search_line_fast_type search_line_fast;
496 
497 #define HAVE_init_vectorized_lexer 1
498 static inline void
499 init_vectorized_lexer (void)
500 {
501   unsigned dummy, ecx = 0, edx = 0;
502   search_line_fast_type impl = search_line_acc_char;
503   int minimum = 0;
504 
505 #if defined(__SSE4_2__)
506   minimum = 3;
507 #elif defined(__SSE2__)
508   minimum = 2;
509 #elif defined(__SSE__)
510   minimum = 1;
511 #endif
512 
513   if (minimum == 3)
514     impl = search_line_sse42;
515   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
516     {
517       if (minimum == 3 || (ecx & bit_SSE4_2))
518         impl = search_line_sse42;
519       else if (minimum == 2 || (edx & bit_SSE2))
520 	impl = search_line_sse2;
521       else if (minimum == 1 || (edx & bit_SSE))
522 	impl = search_line_mmx;
523     }
524   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
525     {
526       if (minimum == 1
527 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
528 	impl = search_line_mmx;
529     }
530 
531   search_line_fast = impl;
532 }
533 
534 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
535 
536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537    and VSX unaligned loads (when VSX is available).  This is otherwise
538    the same as the pre-GCC 5 version.  */
539 
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
541 static const uchar *
542 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
543 {
544   typedef __attribute__((altivec(vector))) unsigned char vc;
545 
546   const vc repl_nl = {
547     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
549   };
550   const vc repl_cr = {
551     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
553   };
554   const vc repl_bs = {
555     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
557   };
558   const vc repl_qm = {
559     '?', '?', '?', '?', '?', '?', '?', '?',
560     '?', '?', '?', '?', '?', '?', '?', '?',
561   };
562   const vc zero = { 0 };
563 
564   vc data, t;
565 
566   /* Main loop processing 16 bytes at a time.  */
567   do
568     {
569       vc m_nl, m_cr, m_bs, m_qm;
570 
571       data = __builtin_vec_vsx_ld (0, s);
572       s += 16;
573 
574       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
575       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
576       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
577       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
578       t = (m_nl | m_cr) | (m_bs | m_qm);
579 
580       /* T now contains 0xff in bytes for which we matched one of the relevant
581 	 characters.  We want to exit the loop if any byte in T is non-zero.
582 	 Below is the expansion of vec_any_ne(t, zero).  */
583     }
584   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
585 
586   /* Restore s to to point to the 16 bytes we just processed.  */
587   s -= 16;
588 
589   {
590 #define N  (sizeof(vc) / sizeof(long))
591 
592     union {
593       vc v;
594       /* Statically assert that N is 2 or 4.  */
595       unsigned long l[(N == 2 || N == 4) ? N : -1];
596     } u;
597     unsigned long l, i = 0;
598 
599     u.v = t;
600 
601     /* Find the first word of T that is non-zero.  */
602     switch (N)
603       {
604       case 4:
605 	l = u.l[i++];
606 	if (l != 0)
607 	  break;
608 	s += sizeof(unsigned long);
609 	l = u.l[i++];
610 	if (l != 0)
611 	  break;
612 	s += sizeof(unsigned long);
613       case 2:
614 	l = u.l[i++];
615 	if (l != 0)
616 	  break;
617 	s += sizeof(unsigned long);
618 	l = u.l[i];
619       }
620 
621     /* L now contains 0xff in bytes for which we matched one of the
622        relevant characters.  We can find the byte index by finding
623        its bit index and dividing by 8.  */
624 #ifdef __BIG_ENDIAN__
625     l = __builtin_clzl(l) >> 3;
626 #else
627     l = __builtin_ctzl(l) >> 3;
628 #endif
629     return s + l;
630 
631 #undef N
632   }
633 }
634 
635 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
636 
637 /* A vection of the fast scanner using AltiVec vectorized byte compares.
638    This cannot be used for little endian because vec_lvsl/lvsr are
639    deprecated for little endian and the code won't work properly.  */
640 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
641    so we can't compile this function without -maltivec on the command line
642    (or implied by some other switch).  */
643 
644 static const uchar *
645 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
646 {
647   typedef __attribute__((altivec(vector))) unsigned char vc;
648 
649   const vc repl_nl = {
650     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
651     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
652   };
653   const vc repl_cr = {
654     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
655     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
656   };
657   const vc repl_bs = {
658     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
659     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
660   };
661   const vc repl_qm = {
662     '?', '?', '?', '?', '?', '?', '?', '?',
663     '?', '?', '?', '?', '?', '?', '?', '?',
664   };
665   const vc ones = {
666     -1, -1, -1, -1, -1, -1, -1, -1,
667     -1, -1, -1, -1, -1, -1, -1, -1,
668   };
669   const vc zero = { 0 };
670 
671   vc data, mask, t;
672 
673   /* Altivec loads automatically mask addresses with -16.  This lets us
674      issue the first load as early as possible.  */
675   data = __builtin_vec_ld(0, (const vc *)s);
676 
677   /* Discard bytes before the beginning of the buffer.  Do this by
678      beginning with all ones and shifting in zeros according to the
679      mis-alignment.  The LVSR instruction pulls the exact shift we
680      want from the address.  */
681   mask = __builtin_vec_lvsr(0, s);
682   mask = __builtin_vec_perm(zero, ones, mask);
683   data &= mask;
684 
685   /* While altivec loads mask addresses, we still need to align S so
686      that the offset we compute at the end is correct.  */
687   s = (const uchar *)((uintptr_t)s & -16);
688 
689   /* Main loop processing 16 bytes at a time.  */
690   goto start;
691   do
692     {
693       vc m_nl, m_cr, m_bs, m_qm;
694 
695       s += 16;
696       data = __builtin_vec_ld(0, (const vc *)s);
697 
698     start:
699       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
700       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
701       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
702       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
703       t = (m_nl | m_cr) | (m_bs | m_qm);
704 
705       /* T now contains 0xff in bytes for which we matched one of the relevant
706 	 characters.  We want to exit the loop if any byte in T is non-zero.
707 	 Below is the expansion of vec_any_ne(t, zero).  */
708     }
709   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
710 
711   {
712 #define N  (sizeof(vc) / sizeof(long))
713 
714     union {
715       vc v;
716       /* Statically assert that N is 2 or 4.  */
717       unsigned long l[(N == 2 || N == 4) ? N : -1];
718     } u;
719     unsigned long l, i = 0;
720 
721     u.v = t;
722 
723     /* Find the first word of T that is non-zero.  */
724     switch (N)
725       {
726       case 4:
727 	l = u.l[i++];
728 	if (l != 0)
729 	  break;
730 	s += sizeof(unsigned long);
731 	l = u.l[i++];
732 	if (l != 0)
733 	  break;
734 	s += sizeof(unsigned long);
735       case 2:
736 	l = u.l[i++];
737 	if (l != 0)
738 	  break;
739 	s += sizeof(unsigned long);
740 	l = u.l[i];
741       }
742 
743     /* L now contains 0xff in bytes for which we matched one of the
744        relevant characters.  We can find the byte index by finding
745        its bit index and dividing by 8.  */
746     l = __builtin_clzl(l) >> 3;
747     return s + l;
748 
749 #undef N
750   }
751 }
752 
753 #elif defined (__ARM_NEON)
754 #include "arm_neon.h"
755 
756 static const uchar *
757 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
758 {
759   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
760   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
761   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
762   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
763   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
764 
765   unsigned int misalign, found, mask;
766   const uint8_t *p;
767   uint8x16_t data;
768 
769   /* Align the source pointer.  */
770   misalign = (uintptr_t)s & 15;
771   p = (const uint8_t *)((uintptr_t)s & -16);
772   data = vld1q_u8 (p);
773 
774   /* Create a mask for the bytes that are valid within the first
775      16-byte block.  The Idea here is that the AND with the mask
776      within the loop is "free", since we need some AND or TEST
777      insn in order to set the flags for the branch anyway.  */
778   mask = (-1u << misalign) & 0xffff;
779 
780   /* Main loop, processing 16 bytes at a time.  */
781   goto start;
782 
783   do
784     {
785       uint8x8_t l;
786       uint16x4_t m;
787       uint32x2_t n;
788       uint8x16_t t, u, v, w;
789 
790       p += 16;
791       data = vld1q_u8 (p);
792       mask = 0xffff;
793 
794     start:
795       t = vceqq_u8 (data, repl_nl);
796       u = vceqq_u8 (data, repl_cr);
797       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
798       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
799       t = vandq_u8 (vorrq_u8 (v, w), xmask);
800       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
801       m = vpaddl_u8 (l);
802       n = vpaddl_u16 (m);
803 
804       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
805 	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
806       found &= mask;
807     }
808   while (!found);
809 
810   /* FOUND contains 1 in bits for which we matched a relevant
811      character.  Conversion to the byte index is trivial.  */
812   found = __builtin_ctz (found);
813   return (const uchar *)p + found;
814 }
815 
816 #else
817 
818 /* We only have one accellerated alternative.  Use a direct call so that
819    we encourage inlining.  */
820 
821 #define search_line_fast  search_line_acc_char
822 
823 #endif
824 
825 /* Initialize the lexer if needed.  */
826 
827 void
828 _cpp_init_lexer (void)
829 {
830 #ifdef HAVE_init_vectorized_lexer
831   init_vectorized_lexer ();
832 #endif
833 }
834 
835 /* Returns with a logical line that contains no escaped newlines or
836    trigraphs.  This is a time-critical inner loop.  */
837 void
838 _cpp_clean_line (cpp_reader *pfile)
839 {
840   cpp_buffer *buffer;
841   const uchar *s;
842   uchar c, *d, *p;
843 
844   buffer = pfile->buffer;
845   buffer->cur_note = buffer->notes_used = 0;
846   buffer->cur = buffer->line_base = buffer->next_line;
847   buffer->need_line = false;
848   s = buffer->next_line;
849 
850   if (!buffer->from_stage3)
851     {
852       const uchar *pbackslash = NULL;
853 
854       /* Fast path.  This is the common case of an un-escaped line with
855 	 no trigraphs.  The primary win here is by not writing any
856 	 data back to memory until we have to.  */
857       while (1)
858 	{
859 	  /* Perform an optimized search for \n, \r, \\, ?.  */
860 	  s = search_line_fast (s, buffer->rlimit);
861 
862 	  c = *s;
863 	  if (c == '\\')
864 	    {
865 	      /* Record the location of the backslash and continue.  */
866 	      pbackslash = s++;
867 	    }
868 	  else if (__builtin_expect (c == '?', 0))
869 	    {
870 	      if (__builtin_expect (s[1] == '?', false)
871 		   && _cpp_trigraph_map[s[2]])
872 		{
873 		  /* Have a trigraph.  We may or may not have to convert
874 		     it.  Add a line note regardless, for -Wtrigraphs.  */
875 		  add_line_note (buffer, s, s[2]);
876 		  if (CPP_OPTION (pfile, trigraphs))
877 		    {
878 		      /* We do, and that means we have to switch to the
879 		         slow path.  */
880 		      d = (uchar *) s;
881 		      *d = _cpp_trigraph_map[s[2]];
882 		      s += 2;
883 		      goto slow_path;
884 		    }
885 		}
886 	      /* Not a trigraph.  Continue on fast-path.  */
887 	      s++;
888 	    }
889 	  else
890 	    break;
891 	}
892 
893       /* This must be \r or \n.  We're either done, or we'll be forced
894 	 to write back to the buffer and continue on the slow path.  */
895       d = (uchar *) s;
896 
897       if (__builtin_expect (s == buffer->rlimit, false))
898 	goto done;
899 
900       /* DOS line ending? */
901       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
902 	{
903 	  s++;
904 	  if (s == buffer->rlimit)
905 	    goto done;
906 	}
907 
908       if (__builtin_expect (pbackslash == NULL, true))
909 	goto done;
910 
911       /* Check for escaped newline.  */
912       p = d;
913       while (is_nvspace (p[-1]))
914 	p--;
915       if (p - 1 != pbackslash)
916 	goto done;
917 
918       /* Have an escaped newline; process it and proceed to
919 	 the slow path.  */
920       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
921       d = p - 2;
922       buffer->next_line = p - 1;
923 
924     slow_path:
925       while (1)
926 	{
927 	  c = *++s;
928 	  *++d = c;
929 
930 	  if (c == '\n' || c == '\r')
931 	    {
932 	      /* Handle DOS line endings.  */
933 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
934 		s++;
935 	      if (s == buffer->rlimit)
936 		break;
937 
938 	      /* Escaped?  */
939 	      p = d;
940 	      while (p != buffer->next_line && is_nvspace (p[-1]))
941 		p--;
942 	      if (p == buffer->next_line || p[-1] != '\\')
943 		break;
944 
945 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
946 	      d = p - 2;
947 	      buffer->next_line = p - 1;
948 	    }
949 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
950 	    {
951 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
952 	      add_line_note (buffer, d, s[2]);
953 	      if (CPP_OPTION (pfile, trigraphs))
954 		{
955 		  *d = _cpp_trigraph_map[s[2]];
956 		  s += 2;
957 		}
958 	    }
959 	}
960     }
961   else
962     {
963       while (*s != '\n' && *s != '\r')
964 	s++;
965       d = (uchar *) s;
966 
967       /* Handle DOS line endings.  */
968       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
969 	s++;
970     }
971 
972  done:
973   *d = '\n';
974   /* A sentinel note that should never be processed.  */
975   add_line_note (buffer, d + 1, '\n');
976   buffer->next_line = s + 1;
977 }
978 
979 /* Return true if the trigraph indicated by NOTE should be warned
980    about in a comment.  */
981 static bool
982 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
983 {
984   const uchar *p;
985 
986   /* Within comments we don't warn about trigraphs, unless the
987      trigraph forms an escaped newline, as that may change
988      behavior.  */
989   if (note->type != '/')
990     return false;
991 
992   /* If -trigraphs, then this was an escaped newline iff the next note
993      is coincident.  */
994   if (CPP_OPTION (pfile, trigraphs))
995     return note[1].pos == note->pos;
996 
997   /* Otherwise, see if this forms an escaped newline.  */
998   p = note->pos + 3;
999   while (is_nvspace (*p))
1000     p++;
1001 
1002   /* There might have been escaped newlines between the trigraph and the
1003      newline we found.  Hence the position test.  */
1004   return (*p == '\n' && p < note[1].pos);
1005 }
1006 
1007 /* Process the notes created by add_line_note as far as the current
1008    location.  */
1009 void
1010 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
1011 {
1012   cpp_buffer *buffer = pfile->buffer;
1013 
1014   for (;;)
1015     {
1016       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1017       unsigned int col;
1018 
1019       if (note->pos > buffer->cur)
1020 	break;
1021 
1022       buffer->cur_note++;
1023       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1024 
1025       if (note->type == '\\' || note->type == ' ')
1026 	{
1027 	  if (note->type == ' ' && !in_comment)
1028 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1029 				 "backslash and newline separated by space");
1030 
1031 	  if (buffer->next_line > buffer->rlimit)
1032 	    {
1033 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1034 				   "backslash-newline at end of file");
1035 	      /* Prevent "no newline at end of file" warning.  */
1036 	      buffer->next_line = buffer->rlimit;
1037 	    }
1038 
1039 	  buffer->line_base = note->pos;
1040 	  CPP_INCREMENT_LINE (pfile, 0);
1041 	}
1042       else if (_cpp_trigraph_map[note->type])
1043 	{
1044 	  if (CPP_OPTION (pfile, warn_trigraphs)
1045 	      && (!in_comment || warn_in_comment (pfile, note)))
1046 	    {
1047 	      if (CPP_OPTION (pfile, trigraphs))
1048 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1049                                        pfile->line_table->highest_line, col,
1050 				       "trigraph ??%c converted to %c",
1051 				       note->type,
1052 				       (int) _cpp_trigraph_map[note->type]);
1053 	      else
1054 		{
1055 		  cpp_warning_with_line
1056 		    (pfile, CPP_W_TRIGRAPHS,
1057                      pfile->line_table->highest_line, col,
1058 		     "trigraph ??%c ignored, use -trigraphs to enable",
1059 		     note->type);
1060 		}
1061 	    }
1062 	}
1063       else if (note->type == 0)
1064 	/* Already processed in lex_raw_string.  */;
1065       else
1066 	abort ();
1067     }
1068 }
1069 
1070 /* Skip a C-style block comment.  We find the end of the comment by
1071    seeing if an asterisk is before every '/' we encounter.  Returns
1072    nonzero if comment terminated by EOF, zero otherwise.
1073 
1074    Buffer->cur points to the initial asterisk of the comment.  */
1075 bool
1076 _cpp_skip_block_comment (cpp_reader *pfile)
1077 {
1078   cpp_buffer *buffer = pfile->buffer;
1079   const uchar *cur = buffer->cur;
1080   uchar c;
1081 
1082   cur++;
1083   if (*cur == '/')
1084     cur++;
1085 
1086   for (;;)
1087     {
1088       /* People like decorating comments with '*', so check for '/'
1089 	 instead for efficiency.  */
1090       c = *cur++;
1091 
1092       if (c == '/')
1093 	{
1094 	  if (cur[-2] == '*')
1095 	    break;
1096 
1097 	  /* Warn about potential nested comments, but not if the '/'
1098 	     comes immediately before the true comment delimiter.
1099 	     Don't bother to get it right across escaped newlines.  */
1100 	  if (CPP_OPTION (pfile, warn_comments)
1101 	      && cur[0] == '*' && cur[1] != '/')
1102 	    {
1103 	      buffer->cur = cur;
1104 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1105 				     pfile->line_table->highest_line,
1106 				     CPP_BUF_COL (buffer),
1107 				     "\"/*\" within comment");
1108 	    }
1109 	}
1110       else if (c == '\n')
1111 	{
1112 	  unsigned int cols;
1113 	  buffer->cur = cur - 1;
1114 	  _cpp_process_line_notes (pfile, true);
1115 	  if (buffer->next_line >= buffer->rlimit)
1116 	    return true;
1117 	  _cpp_clean_line (pfile);
1118 
1119 	  cols = buffer->next_line - buffer->line_base;
1120 	  CPP_INCREMENT_LINE (pfile, cols);
1121 
1122 	  cur = buffer->cur;
1123 	}
1124     }
1125 
1126   buffer->cur = cur;
1127   _cpp_process_line_notes (pfile, true);
1128   return false;
1129 }
1130 
1131 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1132    terminating newline.  Handles escaped newlines.  Returns nonzero
1133    if a multiline comment.  */
1134 static int
1135 skip_line_comment (cpp_reader *pfile)
1136 {
1137   cpp_buffer *buffer = pfile->buffer;
1138   source_location orig_line = pfile->line_table->highest_line;
1139 
1140   while (*buffer->cur != '\n')
1141     buffer->cur++;
1142 
1143   _cpp_process_line_notes (pfile, true);
1144   return orig_line != pfile->line_table->highest_line;
1145 }
1146 
1147 /* Skips whitespace, saving the next non-whitespace character.  */
1148 static void
1149 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1150 {
1151   cpp_buffer *buffer = pfile->buffer;
1152   bool saw_NUL = false;
1153 
1154   do
1155     {
1156       /* Horizontal space always OK.  */
1157       if (c == ' ' || c == '\t')
1158 	;
1159       /* Just \f \v or \0 left.  */
1160       else if (c == '\0')
1161 	saw_NUL = true;
1162       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1163 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1164 			     CPP_BUF_COL (buffer),
1165 			     "%s in preprocessing directive",
1166 			     c == '\f' ? "form feed" : "vertical tab");
1167 
1168       c = *buffer->cur++;
1169     }
1170   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1171   while (is_nvspace (c));
1172 
1173   if (saw_NUL)
1174     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1175 
1176   buffer->cur--;
1177 }
1178 
1179 /* See if the characters of a number token are valid in a name (no
1180    '.', '+' or '-').  */
1181 static int
1182 name_p (cpp_reader *pfile, const cpp_string *string)
1183 {
1184   unsigned int i;
1185 
1186   for (i = 0; i < string->len; i++)
1187     if (!is_idchar (string->text[i]))
1188       return 0;
1189 
1190   return 1;
1191 }
1192 
1193 /* After parsing an identifier or other sequence, produce a warning about
1194    sequences not in NFC/NFKC.  */
1195 static void
1196 warn_about_normalization (cpp_reader *pfile,
1197 			  const cpp_token *token,
1198 			  const struct normalize_state *s)
1199 {
1200   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1201       && !pfile->state.skipping)
1202     {
1203       /* Make sure that the token is printed using UCNs, even
1204 	 if we'd otherwise happily print UTF-8.  */
1205       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1206       size_t sz;
1207 
1208       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1209       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1210 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1211 			       "`%.*s' is not in NFKC", (int) sz, buf);
1212       else
1213 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1214 			       "`%.*s' is not in NFC", (int) sz, buf);
1215       free (buf);
1216     }
1217 }
1218 
1219 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1220    an identifier.  FIRST is TRUE if this starts an identifier.  */
1221 static bool
1222 forms_identifier_p (cpp_reader *pfile, int first,
1223 		    struct normalize_state *state)
1224 {
1225   cpp_buffer *buffer = pfile->buffer;
1226 
1227   if (*buffer->cur == '$')
1228     {
1229       if (!CPP_OPTION (pfile, dollars_in_ident))
1230 	return false;
1231 
1232       buffer->cur++;
1233       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1234 	{
1235 	  CPP_OPTION (pfile, warn_dollars) = 0;
1236 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1237 	}
1238 
1239       return true;
1240     }
1241 
1242   /* Is this a syntactically valid UCN?  */
1243   if (CPP_OPTION (pfile, extended_identifiers)
1244       && *buffer->cur == '\\'
1245       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1246     {
1247       cppchar_t s;
1248       buffer->cur += 2;
1249       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1250 			  state, &s))
1251 	return true;
1252       buffer->cur -= 2;
1253     }
1254 
1255   return false;
1256 }
1257 
1258 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1259 static cpp_hashnode *
1260 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1261 {
1262   cpp_hashnode *result;
1263   const uchar *cur;
1264   unsigned int len;
1265   unsigned int hash = HT_HASHSTEP (0, *base);
1266 
1267   cur = base + 1;
1268   while (ISIDNUM (*cur))
1269     {
1270       hash = HT_HASHSTEP (hash, *cur);
1271       cur++;
1272     }
1273   len = cur - base;
1274   hash = HT_HASHFINISH (hash, len);
1275   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1276 					      base, len, hash, HT_ALLOC));
1277 
1278   /* Rarely, identifiers require diagnostics when lexed.  */
1279   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1280 			&& !pfile->state.skipping, 0))
1281     {
1282       /* It is allowed to poison the same identifier twice.  */
1283       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1284 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1285 		   NODE_NAME (result));
1286 
1287       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1288 	 replacement list of a variadic macro.  */
1289       if (result == pfile->spec_nodes.n__VA_ARGS__
1290 	  && !pfile->state.va_args_ok)
1291 	{
1292 	  if (CPP_OPTION (pfile, cplusplus))
1293 	    cpp_error (pfile, CPP_DL_PEDWARN,
1294 		       "__VA_ARGS__ can only appear in the expansion"
1295 		       " of a C++11 variadic macro");
1296 	  else
1297 	    cpp_error (pfile, CPP_DL_PEDWARN,
1298 		       "__VA_ARGS__ can only appear in the expansion"
1299 		       " of a C99 variadic macro");
1300 	}
1301 
1302       /* For -Wc++-compat, warn about use of C++ named operators.  */
1303       if (result->flags & NODE_WARN_OPERATOR)
1304 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1305 		     "identifier \"%s\" is a special operator name in C++",
1306 		     NODE_NAME (result));
1307     }
1308 
1309   return result;
1310 }
1311 
1312 /* Get the cpp_hashnode of an identifier specified by NAME in
1313    the current cpp_reader object.  If none is found, NULL is returned.  */
1314 cpp_hashnode *
1315 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1316 {
1317   cpp_hashnode *result;
1318   result = lex_identifier_intern (pfile, (uchar *) name);
1319   return result;
1320 }
1321 
1322 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1323 static cpp_hashnode *
1324 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1325 		struct normalize_state *nst, cpp_hashnode **spelling)
1326 {
1327   cpp_hashnode *result;
1328   const uchar *cur;
1329   unsigned int len;
1330   unsigned int hash = HT_HASHSTEP (0, *base);
1331 
1332   cur = pfile->buffer->cur;
1333   if (! starts_ucn)
1334     {
1335       while (ISIDNUM (*cur))
1336 	{
1337 	  hash = HT_HASHSTEP (hash, *cur);
1338 	  cur++;
1339 	}
1340       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1341     }
1342   pfile->buffer->cur = cur;
1343   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1344     {
1345       /* Slower version for identifiers containing UCNs (or $).  */
1346       do {
1347 	while (ISIDNUM (*pfile->buffer->cur))
1348 	  {
1349 	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1350 	    pfile->buffer->cur++;
1351 	  }
1352       } while (forms_identifier_p (pfile, false, nst));
1353       result = _cpp_interpret_identifier (pfile, base,
1354 					  pfile->buffer->cur - base);
1355       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1356     }
1357   else
1358     {
1359       len = cur - base;
1360       hash = HT_HASHFINISH (hash, len);
1361 
1362       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1363 						  base, len, hash, HT_ALLOC));
1364       *spelling = result;
1365     }
1366 
1367   /* Rarely, identifiers require diagnostics when lexed.  */
1368   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1369 			&& !pfile->state.skipping, 0))
1370     {
1371       /* It is allowed to poison the same identifier twice.  */
1372       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1373 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1374 		   NODE_NAME (result));
1375 
1376       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1377 	 replacement list of a variadic macro.  */
1378       if (result == pfile->spec_nodes.n__VA_ARGS__
1379 	  && !pfile->state.va_args_ok)
1380 	{
1381 	  if (CPP_OPTION (pfile, cplusplus))
1382 	    cpp_error (pfile, CPP_DL_PEDWARN,
1383 		       "__VA_ARGS__ can only appear in the expansion"
1384 		       " of a C++11 variadic macro");
1385 	  else
1386 	    cpp_error (pfile, CPP_DL_PEDWARN,
1387 		       "__VA_ARGS__ can only appear in the expansion"
1388 		       " of a C99 variadic macro");
1389 	}
1390 
1391       /* For -Wc++-compat, warn about use of C++ named operators.  */
1392       if (result->flags & NODE_WARN_OPERATOR)
1393 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1394 		     "identifier \"%s\" is a special operator name in C++",
1395 		     NODE_NAME (result));
1396     }
1397 
1398   return result;
1399 }
1400 
1401 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1402 static void
1403 lex_number (cpp_reader *pfile, cpp_string *number,
1404 	    struct normalize_state *nst)
1405 {
1406   const uchar *cur;
1407   const uchar *base;
1408   uchar *dest;
1409 
1410   base = pfile->buffer->cur - 1;
1411   do
1412     {
1413       cur = pfile->buffer->cur;
1414 
1415       /* N.B. ISIDNUM does not include $.  */
1416       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1417 	     || VALID_SIGN (*cur, cur[-1]))
1418 	{
1419 	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1420 	  cur++;
1421 	}
1422       /* A number can't end with a digit separator.  */
1423       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1424 	--cur;
1425 
1426       pfile->buffer->cur = cur;
1427     }
1428   while (forms_identifier_p (pfile, false, nst));
1429 
1430   number->len = cur - base;
1431   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1432   memcpy (dest, base, number->len);
1433   dest[number->len] = '\0';
1434   number->text = dest;
1435 }
1436 
1437 /* Create a token of type TYPE with a literal spelling.  */
1438 static void
1439 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1440 		unsigned int len, enum cpp_ttype type)
1441 {
1442   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1443 
1444   memcpy (dest, base, len);
1445   dest[len] = '\0';
1446   token->type = type;
1447   token->val.str.len = len;
1448   token->val.str.text = dest;
1449 }
1450 
1451 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1452    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1453 
1454 static void
1455 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1456 		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1457 {
1458   _cpp_buff *first_buff = *first_buff_p;
1459   _cpp_buff *last_buff = *last_buff_p;
1460 
1461   if (first_buff == NULL)
1462     first_buff = last_buff = _cpp_get_buff (pfile, len);
1463   else if (len > BUFF_ROOM (last_buff))
1464     {
1465       size_t room = BUFF_ROOM (last_buff);
1466       memcpy (BUFF_FRONT (last_buff), base, room);
1467       BUFF_FRONT (last_buff) += room;
1468       base += room;
1469       len -= room;
1470       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1471     }
1472 
1473   memcpy (BUFF_FRONT (last_buff), base, len);
1474   BUFF_FRONT (last_buff) += len;
1475 
1476   *first_buff_p = first_buff;
1477   *last_buff_p = last_buff;
1478 }
1479 
1480 
1481 /* Returns true if a macro has been defined.
1482    This might not work if compile with -save-temps,
1483    or preprocess separately from compilation.  */
1484 
1485 static bool
1486 is_macro(cpp_reader *pfile, const uchar *base)
1487 {
1488   const uchar *cur = base;
1489   if (! ISIDST (*cur))
1490     return false;
1491   unsigned int hash = HT_HASHSTEP (0, *cur);
1492   ++cur;
1493   while (ISIDNUM (*cur))
1494     {
1495       hash = HT_HASHSTEP (hash, *cur);
1496       ++cur;
1497     }
1498   hash = HT_HASHFINISH (hash, cur - base);
1499 
1500   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1501 					base, cur - base, hash, HT_NO_INSERT));
1502 
1503   return !result ? false : (result->type == NT_MACRO);
1504 }
1505 
1506 
1507 /* Lexes a raw string.  The stored string contains the spelling, including
1508    double quotes, delimiter string, '(' and ')', any leading
1509    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1510    literal, or CPP_OTHER if it was not properly terminated.
1511 
1512    The spelling is NUL-terminated, but it is not guaranteed that this
1513    is the first NUL since embedded NULs are preserved.  */
1514 
1515 static void
1516 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1517 		const uchar *cur)
1518 {
1519   uchar raw_prefix[17];
1520   uchar temp_buffer[18];
1521   const uchar *orig_base;
1522   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1523   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1524   raw_str_phase phase = RAW_STR_PREFIX;
1525   enum cpp_ttype type;
1526   size_t total_len = 0;
1527   /* Index into temp_buffer during phases other than RAW_STR,
1528      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1529      be appended to temp_buffer.  */
1530   size_t temp_buffer_len = 0;
1531   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1532   size_t raw_prefix_start;
1533   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1534 
1535   type = (*base == 'L' ? CPP_WSTRING :
1536 	  *base == 'U' ? CPP_STRING32 :
1537 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1538 	  : CPP_STRING);
1539 
1540 #define BUF_APPEND(STR,LEN)					\
1541       do {							\
1542 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
1543 			&first_buff, &last_buff);		\
1544 	total_len += (LEN);					\
1545 	if (__builtin_expect (temp_buffer_len < 17, 0)		\
1546 	    && (const uchar *)(STR) != base			\
1547 	    && (LEN) <= 2)					\
1548 	  {							\
1549 	    memcpy (temp_buffer + temp_buffer_len,		\
1550 		    (const uchar *)(STR), (LEN));		\
1551 	    temp_buffer_len += (LEN);				\
1552 	  }							\
1553       } while (0);
1554 
1555   orig_base = base;
1556   ++cur;
1557   raw_prefix_start = cur - base;
1558   for (;;)
1559     {
1560       cppchar_t c;
1561 
1562       /* If we previously performed any trigraph or line splicing
1563 	 transformations, undo them in between the opening and closing
1564 	 double quote.  */
1565       while (note->pos < cur)
1566 	++note;
1567       for (; note->pos == cur; ++note)
1568 	{
1569 	  switch (note->type)
1570 	    {
1571 	    case '\\':
1572 	    case ' ':
1573 	      /* Restore backslash followed by newline.  */
1574 	      BUF_APPEND (base, cur - base);
1575 	      base = cur;
1576 	      BUF_APPEND ("\\", 1);
1577 	    after_backslash:
1578 	      if (note->type == ' ')
1579 		{
1580 		  /* GNU backslash whitespace newline extension.  FIXME
1581 		     could be any sequence of non-vertical space.  When we
1582 		     can properly restore any such sequence, we should mark
1583 		     this note as handled so _cpp_process_line_notes
1584 		     doesn't warn.  */
1585 		  BUF_APPEND (" ", 1);
1586 		}
1587 
1588 	      BUF_APPEND ("\n", 1);
1589 	      break;
1590 
1591 	    case 0:
1592 	      /* Already handled.  */
1593 	      break;
1594 
1595 	    default:
1596 	      if (_cpp_trigraph_map[note->type])
1597 		{
1598 		  /* Don't warn about this trigraph in
1599 		     _cpp_process_line_notes, since trigraphs show up as
1600 		     trigraphs in raw strings.  */
1601 		  uchar type = note->type;
1602 		  note->type = 0;
1603 
1604 		  if (!CPP_OPTION (pfile, trigraphs))
1605 		    /* If we didn't convert the trigraph in the first
1606 		       place, don't do anything now either.  */
1607 		    break;
1608 
1609 		  BUF_APPEND (base, cur - base);
1610 		  base = cur;
1611 		  BUF_APPEND ("??", 2);
1612 
1613 		  /* ??/ followed by newline gets two line notes, one for
1614 		     the trigraph and one for the backslash/newline.  */
1615 		  if (type == '/' && note[1].pos == cur)
1616 		    {
1617 		      if (note[1].type != '\\'
1618 			  && note[1].type != ' ')
1619 			abort ();
1620 		      BUF_APPEND ("/", 1);
1621 		      ++note;
1622 		      goto after_backslash;
1623 		    }
1624 		  else
1625 		    {
1626 		      /* Skip the replacement character.  */
1627 		      base = ++cur;
1628 		      BUF_APPEND (&type, 1);
1629 		      c = type;
1630 		      goto check_c;
1631 		    }
1632 		}
1633 	      else
1634 		abort ();
1635 	      break;
1636 	    }
1637 	}
1638       c = *cur++;
1639       if (__builtin_expect (temp_buffer_len < 17, 0))
1640 	temp_buffer[temp_buffer_len++] = c;
1641 
1642      check_c:
1643       if (phase == RAW_STR_PREFIX)
1644 	{
1645 	  while (raw_prefix_len < temp_buffer_len)
1646 	    {
1647 	      raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1648 	      switch (raw_prefix[raw_prefix_len])
1649 		{
1650 		case ' ': case '(': case ')': case '\\': case '\t':
1651 		case '\v': case '\f': case '\n': default:
1652 		  break;
1653 		/* Basic source charset except the above chars.  */
1654 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1655 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1656 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1657 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1658 		case 'y': case 'z':
1659 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1660 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1661 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1662 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1663 		case 'Y': case 'Z':
1664 		case '0': case '1': case '2': case '3': case '4': case '5':
1665 		case '6': case '7': case '8': case '9':
1666 		case '_': case '{': case '}': case '#': case '[': case ']':
1667 		case '<': case '>': case '%': case ':': case ';': case '.':
1668 		case '?': case '*': case '+': case '-': case '/': case '^':
1669 		case '&': case '|': case '~': case '!': case '=': case ',':
1670 		case '"': case '\'':
1671 		  if (raw_prefix_len < 16)
1672 		    {
1673 		      raw_prefix_len++;
1674 		      continue;
1675 		    }
1676 		  break;
1677 		}
1678 
1679 	      if (raw_prefix[raw_prefix_len] != '(')
1680 		{
1681 		  int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1682 		  if (raw_prefix_len == 16)
1683 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1684 					 col, "raw string delimiter longer "
1685 					      "than 16 characters");
1686 		  else if (raw_prefix[raw_prefix_len] == '\n')
1687 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1688 					 col, "invalid new-line in raw "
1689 					      "string delimiter");
1690 		  else
1691 		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1692 					 col, "invalid character '%c' in "
1693 					      "raw string delimiter",
1694 					 (int) raw_prefix[raw_prefix_len]);
1695 		  pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1696 		  create_literal (pfile, token, orig_base,
1697 				  raw_prefix_start - 1, CPP_OTHER);
1698 		  if (first_buff)
1699 		    _cpp_release_buff (pfile, first_buff);
1700 		  return;
1701 		}
1702 	      raw_prefix[raw_prefix_len] = '"';
1703 	      phase = RAW_STR;
1704 	      /* Nothing should be appended to temp_buffer during
1705 		 RAW_STR phase.  */
1706 	      temp_buffer_len = 17;
1707 	      break;
1708 	    }
1709 	  continue;
1710 	}
1711       else if (phase == RAW_STR_SUFFIX)
1712 	{
1713 	  while (raw_suffix_len <= raw_prefix_len
1714 		 && raw_suffix_len < temp_buffer_len
1715 		 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1716 	    raw_suffix_len++;
1717 	  if (raw_suffix_len > raw_prefix_len)
1718 	    break;
1719 	  if (raw_suffix_len == temp_buffer_len)
1720 	    continue;
1721 	  phase = RAW_STR;
1722 	  /* Nothing should be appended to temp_buffer during
1723 	     RAW_STR phase.  */
1724 	  temp_buffer_len = 17;
1725 	}
1726       if (c == ')')
1727 	{
1728 	  phase = RAW_STR_SUFFIX;
1729 	  raw_suffix_len = 0;
1730 	  temp_buffer_len = 0;
1731 	}
1732       else if (c == '\n')
1733 	{
1734 	  if (pfile->state.in_directive
1735 	      || (pfile->state.parsing_args
1736 		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
1737 	    {
1738 	      cur--;
1739 	      type = CPP_OTHER;
1740 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1741 				   "unterminated raw string");
1742 	      break;
1743 	    }
1744 
1745 	  BUF_APPEND (base, cur - base);
1746 
1747 	  if (pfile->buffer->cur < pfile->buffer->rlimit)
1748 	    CPP_INCREMENT_LINE (pfile, 0);
1749 	  pfile->buffer->need_line = true;
1750 
1751 	  pfile->buffer->cur = cur-1;
1752 	  _cpp_process_line_notes (pfile, false);
1753 	  if (!_cpp_get_fresh_line (pfile))
1754 	    {
1755 	      source_location src_loc = token->src_loc;
1756 	      token->type = CPP_EOF;
1757 	      /* Tell the compiler the line number of the EOF token.  */
1758 	      token->src_loc = pfile->line_table->highest_line;
1759 	      token->flags = BOL;
1760 	      if (first_buff != NULL)
1761 		_cpp_release_buff (pfile, first_buff);
1762 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1763 				   "unterminated raw string");
1764 	      return;
1765 	    }
1766 
1767 	  cur = base = pfile->buffer->cur;
1768 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
1769 	}
1770     }
1771 
1772   if (CPP_OPTION (pfile, user_literals))
1773     {
1774       /* If a string format macro, say from inttypes.h, is placed touching
1775 	 a string literal it could be parsed as a C++11 user-defined string
1776 	 literal thus breaking the program.
1777 	 Try to identify macros with is_macro. A warning is issued. */
1778       if (is_macro (pfile, cur))
1779 	{
1780 	  /* Raise a warning, but do not consume subsequent tokens.  */
1781 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1782 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1783 				   token->src_loc, 0,
1784 				   "invalid suffix on literal; C++11 requires "
1785 				   "a space between literal and string macro");
1786 	}
1787       /* Grab user defined literal suffix.  */
1788       else if (ISIDST (*cur))
1789 	{
1790 	  type = cpp_userdef_string_add_type (type);
1791 	  ++cur;
1792 
1793 	  while (ISIDNUM (*cur))
1794 	    ++cur;
1795 	}
1796     }
1797 
1798   pfile->buffer->cur = cur;
1799   if (first_buff == NULL)
1800     create_literal (pfile, token, base, cur - base, type);
1801   else
1802     {
1803       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1804 
1805       token->type = type;
1806       token->val.str.len = total_len + (cur - base);
1807       token->val.str.text = dest;
1808       last_buff = first_buff;
1809       while (last_buff != NULL)
1810 	{
1811 	  memcpy (dest, last_buff->base,
1812 		  BUFF_FRONT (last_buff) - last_buff->base);
1813 	  dest += BUFF_FRONT (last_buff) - last_buff->base;
1814 	  last_buff = last_buff->next;
1815 	}
1816       _cpp_release_buff (pfile, first_buff);
1817       memcpy (dest, base, cur - base);
1818       dest[cur - base] = '\0';
1819     }
1820 }
1821 
1822 /* Lexes a string, character constant, or angle-bracketed header file
1823    name.  The stored string contains the spelling, including opening
1824    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1825    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1826    if it was not properly terminated, or CPP_LESS for an unterminated
1827    header name which must be relexed as normal tokens.
1828 
1829    The spelling is NUL-terminated, but it is not guaranteed that this
1830    is the first NUL since embedded NULs are preserved.  */
1831 static void
1832 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1833 {
1834   bool saw_NUL = false;
1835   const uchar *cur;
1836   cppchar_t terminator;
1837   enum cpp_ttype type;
1838 
1839   cur = base;
1840   terminator = *cur++;
1841   if (terminator == 'L' || terminator == 'U')
1842     terminator = *cur++;
1843   else if (terminator == 'u')
1844     {
1845       terminator = *cur++;
1846       if (terminator == '8')
1847 	terminator = *cur++;
1848     }
1849   if (terminator == 'R')
1850     {
1851       lex_raw_string (pfile, token, base, cur);
1852       return;
1853     }
1854   if (terminator == '"')
1855     type = (*base == 'L' ? CPP_WSTRING :
1856 	    *base == 'U' ? CPP_STRING32 :
1857 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1858 			 : CPP_STRING);
1859   else if (terminator == '\'')
1860     type = (*base == 'L' ? CPP_WCHAR :
1861 	    *base == 'U' ? CPP_CHAR32 :
1862 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
1863 			 : CPP_CHAR);
1864   else
1865     terminator = '>', type = CPP_HEADER_NAME;
1866 
1867   for (;;)
1868     {
1869       cppchar_t c = *cur++;
1870 
1871       /* In #include-style directives, terminators are not escapable.  */
1872       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1873 	cur++;
1874       else if (c == terminator)
1875 	break;
1876       else if (c == '\n')
1877 	{
1878 	  cur--;
1879 	  /* Unmatched quotes always yield undefined behavior, but
1880 	     greedy lexing means that what appears to be an unterminated
1881 	     header name may actually be a legitimate sequence of tokens.  */
1882 	  if (terminator == '>')
1883 	    {
1884 	      token->type = CPP_LESS;
1885 	      return;
1886 	    }
1887 	  type = CPP_OTHER;
1888 	  break;
1889 	}
1890       else if (c == '\0')
1891 	saw_NUL = true;
1892     }
1893 
1894   if (saw_NUL && !pfile->state.skipping)
1895     cpp_error (pfile, CPP_DL_WARNING,
1896 	       "null character(s) preserved in literal");
1897 
1898   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1899     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1900 	       (int) terminator);
1901 
1902   if (CPP_OPTION (pfile, user_literals))
1903     {
1904       /* If a string format macro, say from inttypes.h, is placed touching
1905 	 a string literal it could be parsed as a C++11 user-defined string
1906 	 literal thus breaking the program.
1907 	 Try to identify macros with is_macro. A warning is issued. */
1908       if (is_macro (pfile, cur))
1909 	{
1910 	  /* Raise a warning, but do not consume subsequent tokens.  */
1911 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1912 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1913 				   token->src_loc, 0,
1914 				   "invalid suffix on literal; C++11 requires "
1915 				   "a space between literal and string macro");
1916 	}
1917       /* Grab user defined literal suffix.  */
1918       else if (ISIDST (*cur))
1919 	{
1920 	  type = cpp_userdef_char_add_type (type);
1921 	  type = cpp_userdef_string_add_type (type);
1922           ++cur;
1923 
1924 	  while (ISIDNUM (*cur))
1925 	    ++cur;
1926 	}
1927     }
1928   else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
1929 	   && is_macro (pfile, cur)
1930 	   && !pfile->state.skipping)
1931     cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
1932 			   token->src_loc, 0, "C++11 requires a space "
1933 			   "between string literal and macro");
1934 
1935   pfile->buffer->cur = cur;
1936   create_literal (pfile, token, base, cur - base, type);
1937 }
1938 
1939 /* Return the comment table. The client may not make any assumption
1940    about the ordering of the table.  */
1941 cpp_comment_table *
1942 cpp_get_comments (cpp_reader *pfile)
1943 {
1944   return &pfile->comments;
1945 }
1946 
1947 /* Append a comment to the end of the comment table. */
1948 static void
1949 store_comment (cpp_reader *pfile, cpp_token *token)
1950 {
1951   int len;
1952 
1953   if (pfile->comments.allocated == 0)
1954     {
1955       pfile->comments.allocated = 256;
1956       pfile->comments.entries = (cpp_comment *) xmalloc
1957 	(pfile->comments.allocated * sizeof (cpp_comment));
1958     }
1959 
1960   if (pfile->comments.count == pfile->comments.allocated)
1961     {
1962       pfile->comments.allocated *= 2;
1963       pfile->comments.entries = (cpp_comment *) xrealloc
1964 	(pfile->comments.entries,
1965 	 pfile->comments.allocated * sizeof (cpp_comment));
1966     }
1967 
1968   len = token->val.str.len;
1969 
1970   /* Copy comment. Note, token may not be NULL terminated. */
1971   pfile->comments.entries[pfile->comments.count].comment =
1972     (char *) xmalloc (sizeof (char) * (len + 1));
1973   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1974 	  token->val.str.text, len);
1975   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1976 
1977   /* Set source location. */
1978   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1979 
1980   /* Increment the count of entries in the comment table. */
1981   pfile->comments.count++;
1982 }
1983 
1984 /* The stored comment includes the comment start and any terminator.  */
1985 static void
1986 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1987 	      cppchar_t type)
1988 {
1989   unsigned char *buffer;
1990   unsigned int len, clen, i;
1991   int convert_to_c = (pfile->state.in_directive || pfile->state.parsing_args)
1992     && type == '/';
1993 
1994   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1995 
1996   /* C++ comments probably (not definitely) have moved past a new
1997      line, which we don't want to save in the comment.  */
1998   if (is_vspace (pfile->buffer->cur[-1]))
1999     len--;
2000 
2001   /* If we are currently in a directive or in argument parsing, then
2002      we need to store all C++ comments as C comments internally, and
2003      so we need to allocate a little extra space in that case.
2004 
2005      Note that the only time we encounter a directive here is
2006      when we are saving comments in a "#define".  */
2007   clen = convert_to_c ? len + 2 : len;
2008 
2009   buffer = _cpp_unaligned_alloc (pfile, clen);
2010 
2011   token->type = CPP_COMMENT;
2012   token->val.str.len = clen;
2013   token->val.str.text = buffer;
2014 
2015   buffer[0] = '/';
2016   memcpy (buffer + 1, from, len - 1);
2017 
2018   /* Finish conversion to a C comment, if necessary.  */
2019   if (convert_to_c)
2020     {
2021       buffer[1] = '*';
2022       buffer[clen - 2] = '*';
2023       buffer[clen - 1] = '/';
2024       /* As there can be in a C++ comments illegal sequences for C comments
2025          we need to filter them out.  */
2026       for (i = 2; i < (clen - 2); i++)
2027         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2028           buffer[i] = '|';
2029     }
2030 
2031   /* Finally store this comment for use by clients of libcpp. */
2032   store_comment (pfile, token);
2033 }
2034 
2035 /* Allocate COUNT tokens for RUN.  */
2036 void
2037 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2038 {
2039   run->base = XNEWVEC (cpp_token, count);
2040   run->limit = run->base + count;
2041   run->next = NULL;
2042 }
2043 
2044 /* Returns the next tokenrun, or creates one if there is none.  */
2045 static tokenrun *
2046 next_tokenrun (tokenrun *run)
2047 {
2048   if (run->next == NULL)
2049     {
2050       run->next = XNEW (tokenrun);
2051       run->next->prev = run;
2052       _cpp_init_tokenrun (run->next, 250);
2053     }
2054 
2055   return run->next;
2056 }
2057 
2058 /* Return the number of not yet processed token in a given
2059    context.  */
2060 int
2061 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2062 {
2063   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2064     return (LAST (context).token - FIRST (context).token);
2065   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2066 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2067     return (LAST (context).ptoken - FIRST (context).ptoken);
2068   else
2069       abort ();
2070 }
2071 
2072 /* Returns the token present at index INDEX in a given context.  If
2073    INDEX is zero, the next token to be processed is returned.  */
2074 static const cpp_token*
2075 _cpp_token_from_context_at (cpp_context *context, int index)
2076 {
2077   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2078     return &(FIRST (context).token[index]);
2079   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2080 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2081     return FIRST (context).ptoken[index];
2082  else
2083    abort ();
2084 }
2085 
2086 /* Look ahead in the input stream.  */
2087 const cpp_token *
2088 cpp_peek_token (cpp_reader *pfile, int index)
2089 {
2090   cpp_context *context = pfile->context;
2091   const cpp_token *peektok;
2092   int count;
2093 
2094   /* First, scan through any pending cpp_context objects.  */
2095   while (context->prev)
2096     {
2097       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2098 
2099       if (index < (int) sz)
2100         return _cpp_token_from_context_at (context, index);
2101       index -= (int) sz;
2102       context = context->prev;
2103     }
2104 
2105   /* We will have to read some new tokens after all (and do so
2106      without invalidating preceding tokens).  */
2107   count = index;
2108   pfile->keep_tokens++;
2109 
2110   /* For peeked tokens temporarily disable line_change reporting,
2111      until the tokens are parsed for real.  */
2112   void (*line_change) (cpp_reader *, const cpp_token *, int)
2113     = pfile->cb.line_change;
2114   pfile->cb.line_change = NULL;
2115 
2116   do
2117     {
2118       peektok = _cpp_lex_token (pfile);
2119       if (peektok->type == CPP_EOF)
2120 	{
2121 	  index--;
2122 	  break;
2123 	}
2124     }
2125   while (index--);
2126 
2127   _cpp_backup_tokens_direct (pfile, count - index);
2128   pfile->keep_tokens--;
2129   pfile->cb.line_change = line_change;
2130 
2131   return peektok;
2132 }
2133 
2134 /* Allocate a single token that is invalidated at the same time as the
2135    rest of the tokens on the line.  Has its line and col set to the
2136    same as the last lexed token, so that diagnostics appear in the
2137    right place.  */
2138 cpp_token *
2139 _cpp_temp_token (cpp_reader *pfile)
2140 {
2141   cpp_token *old, *result;
2142   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2143   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2144 
2145   old = pfile->cur_token - 1;
2146   /* Any pre-existing lookaheads must not be clobbered.  */
2147   if (la)
2148     {
2149       if (sz <= la)
2150         {
2151           tokenrun *next = next_tokenrun (pfile->cur_run);
2152 
2153           if (sz < la)
2154             memmove (next->base + 1, next->base,
2155                      (la - sz) * sizeof (cpp_token));
2156 
2157           next->base[0] = pfile->cur_run->limit[-1];
2158         }
2159 
2160       if (sz > 1)
2161         memmove (pfile->cur_token + 1, pfile->cur_token,
2162                  MIN (la, sz - 1) * sizeof (cpp_token));
2163     }
2164 
2165   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2166     {
2167       pfile->cur_run = next_tokenrun (pfile->cur_run);
2168       pfile->cur_token = pfile->cur_run->base;
2169     }
2170 
2171   result = pfile->cur_token++;
2172   result->src_loc = old->src_loc;
2173   return result;
2174 }
2175 
2176 /* Lex a token into RESULT (external interface).  Takes care of issues
2177    like directive handling, token lookahead, multiple include
2178    optimization and skipping.  */
2179 const cpp_token *
2180 _cpp_lex_token (cpp_reader *pfile)
2181 {
2182   cpp_token *result;
2183 
2184   for (;;)
2185     {
2186       if (pfile->cur_token == pfile->cur_run->limit)
2187 	{
2188 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
2189 	  pfile->cur_token = pfile->cur_run->base;
2190 	}
2191       /* We assume that the current token is somewhere in the current
2192 	 run.  */
2193       if (pfile->cur_token < pfile->cur_run->base
2194 	  || pfile->cur_token >= pfile->cur_run->limit)
2195 	abort ();
2196 
2197       if (pfile->lookaheads)
2198 	{
2199 	  pfile->lookaheads--;
2200 	  result = pfile->cur_token++;
2201 	}
2202       else
2203 	result = _cpp_lex_direct (pfile);
2204 
2205       if (result->flags & BOL)
2206 	{
2207 	  /* Is this a directive.  If _cpp_handle_directive returns
2208 	     false, it is an assembler #.  */
2209 	  if (result->type == CPP_HASH
2210 	      /* 6.10.3 p 11: Directives in a list of macro arguments
2211 		 gives undefined behavior.  This implementation
2212 		 handles the directive as normal.  */
2213 	      && pfile->state.parsing_args != 1)
2214 	    {
2215 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2216 		{
2217 		  if (pfile->directive_result.type == CPP_PADDING)
2218 		    continue;
2219 		  result = &pfile->directive_result;
2220 		}
2221 	    }
2222 	  else if (pfile->state.in_deferred_pragma)
2223 	    result = &pfile->directive_result;
2224 
2225 	  if (pfile->cb.line_change && !pfile->state.skipping)
2226 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2227 	}
2228 
2229       /* We don't skip tokens in directives.  */
2230       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2231 	break;
2232 
2233       /* Outside a directive, invalidate controlling macros.  At file
2234 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2235 	 get here and MI optimization works.  */
2236       pfile->mi_valid = false;
2237 
2238       if (!pfile->state.skipping || result->type == CPP_EOF)
2239 	break;
2240     }
2241 
2242   return result;
2243 }
2244 
2245 /* Returns true if a fresh line has been loaded.  */
2246 bool
2247 _cpp_get_fresh_line (cpp_reader *pfile)
2248 {
2249   int return_at_eof;
2250 
2251   /* We can't get a new line until we leave the current directive.  */
2252   if (pfile->state.in_directive)
2253     return false;
2254 
2255   for (;;)
2256     {
2257       cpp_buffer *buffer = pfile->buffer;
2258 
2259       if (!buffer->need_line)
2260 	return true;
2261 
2262       if (buffer->next_line < buffer->rlimit)
2263 	{
2264 	  _cpp_clean_line (pfile);
2265 	  return true;
2266 	}
2267 
2268       /* First, get out of parsing arguments state.  */
2269       if (pfile->state.parsing_args)
2270 	return false;
2271 
2272       /* End of buffer.  Non-empty files should end in a newline.  */
2273       if (buffer->buf != buffer->rlimit
2274 	  && buffer->next_line > buffer->rlimit
2275 	  && !buffer->from_stage3)
2276 	{
2277 	  /* Clip to buffer size.  */
2278 	  buffer->next_line = buffer->rlimit;
2279 	}
2280 
2281       return_at_eof = buffer->return_at_eof;
2282       _cpp_pop_buffer (pfile);
2283       if (pfile->buffer == NULL || return_at_eof)
2284 	return false;
2285     }
2286 }
2287 
2288 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
2289   do							\
2290     {							\
2291       result->type = ELSE_TYPE;				\
2292       if (*buffer->cur == CHAR)				\
2293 	buffer->cur++, result->type = THEN_TYPE;	\
2294     }							\
2295   while (0)
2296 
2297 /* Lex a token into pfile->cur_token, which is also incremented, to
2298    get diagnostics pointing to the correct location.
2299 
2300    Does not handle issues such as token lookahead, multiple-include
2301    optimization, directives, skipping etc.  This function is only
2302    suitable for use by _cpp_lex_token, and in special cases like
2303    lex_expansion_token which doesn't care for any of these issues.
2304 
2305    When meeting a newline, returns CPP_EOF if parsing a directive,
2306    otherwise returns to the start of the token buffer if permissible.
2307    Returns the location of the lexed token.  */
2308 cpp_token *
2309 _cpp_lex_direct (cpp_reader *pfile)
2310 {
2311   cppchar_t c;
2312   cpp_buffer *buffer;
2313   const unsigned char *comment_start;
2314   cpp_token *result = pfile->cur_token++;
2315 
2316  fresh_line:
2317   result->flags = 0;
2318   buffer = pfile->buffer;
2319   if (buffer->need_line)
2320     {
2321       if (pfile->state.in_deferred_pragma)
2322 	{
2323 	  result->type = CPP_PRAGMA_EOL;
2324 	  pfile->state.in_deferred_pragma = false;
2325 	  if (!pfile->state.pragma_allow_expansion)
2326 	    pfile->state.prevent_expansion--;
2327 	  return result;
2328 	}
2329       if (!_cpp_get_fresh_line (pfile))
2330 	{
2331 	  result->type = CPP_EOF;
2332 	  if (!pfile->state.in_directive)
2333 	    {
2334 	      /* Tell the compiler the line number of the EOF token.  */
2335 	      result->src_loc = pfile->line_table->highest_line;
2336 	      result->flags = BOL;
2337 	    }
2338 	  return result;
2339 	}
2340       if (!pfile->keep_tokens)
2341 	{
2342 	  pfile->cur_run = &pfile->base_run;
2343 	  result = pfile->base_run.base;
2344 	  pfile->cur_token = result + 1;
2345 	}
2346       result->flags = BOL;
2347       if (pfile->state.parsing_args == 2)
2348 	result->flags |= PREV_WHITE;
2349     }
2350   buffer = pfile->buffer;
2351  update_tokens_line:
2352   result->src_loc = pfile->line_table->highest_line;
2353 
2354  skipped_white:
2355   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2356       && !pfile->overlaid_buffer)
2357     {
2358       _cpp_process_line_notes (pfile, false);
2359       result->src_loc = pfile->line_table->highest_line;
2360     }
2361   c = *buffer->cur++;
2362 
2363   if (pfile->forced_token_location_p)
2364     result->src_loc = *pfile->forced_token_location_p;
2365   else
2366     result->src_loc = linemap_position_for_column (pfile->line_table,
2367 					  CPP_BUF_COLUMN (buffer, buffer->cur));
2368 
2369   switch (c)
2370     {
2371     case ' ': case '\t': case '\f': case '\v': case '\0':
2372       result->flags |= PREV_WHITE;
2373       skip_whitespace (pfile, c);
2374       goto skipped_white;
2375 
2376     case '\n':
2377       if (buffer->cur < buffer->rlimit)
2378 	CPP_INCREMENT_LINE (pfile, 0);
2379       buffer->need_line = true;
2380       goto fresh_line;
2381 
2382     case '0': case '1': case '2': case '3': case '4':
2383     case '5': case '6': case '7': case '8': case '9':
2384       {
2385 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2386 	result->type = CPP_NUMBER;
2387 	lex_number (pfile, &result->val.str, &nst);
2388 	warn_about_normalization (pfile, result, &nst);
2389 	break;
2390       }
2391 
2392     case 'L':
2393     case 'u':
2394     case 'U':
2395     case 'R':
2396       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2397 	 wide strings or raw strings.  */
2398       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2399 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2400 	{
2401 	  if ((*buffer->cur == '\'' && c != 'R')
2402 	      || *buffer->cur == '"'
2403 	      || (*buffer->cur == 'R'
2404 		  && c != 'R'
2405 		  && buffer->cur[1] == '"'
2406 		  && CPP_OPTION (pfile, rliterals))
2407 	      || (*buffer->cur == '8'
2408 		  && c == 'u'
2409 		  && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
2410 				&& CPP_OPTION (pfile, utf8_char_literals)))
2411 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2412 			  && CPP_OPTION (pfile, rliterals)))))
2413 	    {
2414 	      lex_string (pfile, result, buffer->cur - 1);
2415 	      break;
2416 	    }
2417 	}
2418       /* Fall through.  */
2419 
2420     case '_':
2421     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2422     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2423     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2424     case 's': case 't':           case 'v': case 'w': case 'x':
2425     case 'y': case 'z':
2426     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2427     case 'G': case 'H': case 'I': case 'J': case 'K':
2428     case 'M': case 'N': case 'O': case 'P': case 'Q':
2429     case 'S': case 'T':           case 'V': case 'W': case 'X':
2430     case 'Y': case 'Z':
2431       result->type = CPP_NAME;
2432       {
2433 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2434 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2435 						&nst,
2436 						&result->val.node.spelling);
2437 	warn_about_normalization (pfile, result, &nst);
2438       }
2439 
2440       /* Convert named operators to their proper types.  */
2441       if (result->val.node.node->flags & NODE_OPERATOR)
2442 	{
2443 	  result->flags |= NAMED_OP;
2444 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2445 	}
2446       break;
2447 
2448     case '\'':
2449     case '"':
2450       lex_string (pfile, result, buffer->cur - 1);
2451       break;
2452 
2453     case '/':
2454       /* A potential block or line comment.  */
2455       comment_start = buffer->cur;
2456       c = *buffer->cur;
2457 
2458       if (c == '*')
2459 	{
2460 	  if (_cpp_skip_block_comment (pfile))
2461 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2462 	}
2463       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2464 	{
2465 	  /* Don't warn for system headers.  */
2466 	  if (cpp_in_system_header (pfile))
2467 	    ;
2468 	  /* Warn about comments if pedantically GNUC89, and not
2469 	     in system headers.  */
2470 	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2471 		   && CPP_PEDANTIC (pfile)
2472 		   && ! buffer->warned_cplusplus_comments)
2473 	    {
2474 	      cpp_error (pfile, CPP_DL_PEDWARN,
2475 			 "C++ style comments are not allowed in ISO C90");
2476 	      cpp_error (pfile, CPP_DL_PEDWARN,
2477 			 "(this will be reported only once per input file)");
2478 	      buffer->warned_cplusplus_comments = 1;
2479 	    }
2480 	  /* Or if specifically desired via -Wc90-c99-compat.  */
2481 	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2482 		   && ! CPP_OPTION (pfile, cplusplus)
2483 		   && ! buffer->warned_cplusplus_comments)
2484 	    {
2485 	      cpp_error (pfile, CPP_DL_WARNING,
2486 			 "C++ style comments are incompatible with C90");
2487 	      cpp_error (pfile, CPP_DL_WARNING,
2488 			 "(this will be reported only once per input file)");
2489 	      buffer->warned_cplusplus_comments = 1;
2490 	    }
2491 	  /* In C89/C94, C++ style comments are forbidden.  */
2492 	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2493 		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
2494 	    {
2495 	      /* But don't be confused about valid code such as
2496 	         - // immediately followed by *,
2497 		 - // in a preprocessing directive,
2498 		 - // in an #if 0 block.  */
2499 	      if (buffer->cur[1] == '*'
2500 		  || pfile->state.in_directive
2501 		  || pfile->state.skipping)
2502 		{
2503 		  result->type = CPP_DIV;
2504 		  break;
2505 		}
2506 	      else if (! buffer->warned_cplusplus_comments)
2507 		{
2508 		  cpp_error (pfile, CPP_DL_ERROR,
2509 			     "C++ style comments are not allowed in ISO C90");
2510 		  cpp_error (pfile, CPP_DL_ERROR,
2511 			     "(this will be reported only once per input "
2512 			     "file)");
2513 		  buffer->warned_cplusplus_comments = 1;
2514 		}
2515 	    }
2516 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2517 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2518 	}
2519       else if (c == '=')
2520 	{
2521 	  buffer->cur++;
2522 	  result->type = CPP_DIV_EQ;
2523 	  break;
2524 	}
2525       else
2526 	{
2527 	  result->type = CPP_DIV;
2528 	  break;
2529 	}
2530 
2531       if (!pfile->state.save_comments)
2532 	{
2533 	  result->flags |= PREV_WHITE;
2534 	  goto update_tokens_line;
2535 	}
2536 
2537       /* Save the comment as a token in its own right.  */
2538       save_comment (pfile, result, comment_start, c);
2539       break;
2540 
2541     case '<':
2542       if (pfile->state.angled_headers)
2543 	{
2544 	  lex_string (pfile, result, buffer->cur - 1);
2545 	  if (result->type != CPP_LESS)
2546 	    break;
2547 	}
2548 
2549       result->type = CPP_LESS;
2550       if (*buffer->cur == '=')
2551 	buffer->cur++, result->type = CPP_LESS_EQ;
2552       else if (*buffer->cur == '<')
2553 	{
2554 	  buffer->cur++;
2555 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2556 	}
2557       else if (CPP_OPTION (pfile, digraphs))
2558 	{
2559 	  if (*buffer->cur == ':')
2560 	    {
2561 	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2562 		 three characters are <:: and the subsequent character
2563 		 is neither : nor >, the < is treated as a preprocessor
2564 		 token by itself".  */
2565 	      if (CPP_OPTION (pfile, cplusplus)
2566 		  && CPP_OPTION (pfile, lang) != CLK_CXX98
2567 		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2568 		  && buffer->cur[1] == ':'
2569 		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2570 		break;
2571 
2572 	      buffer->cur++;
2573 	      result->flags |= DIGRAPH;
2574 	      result->type = CPP_OPEN_SQUARE;
2575 	    }
2576 	  else if (*buffer->cur == '%')
2577 	    {
2578 	      buffer->cur++;
2579 	      result->flags |= DIGRAPH;
2580 	      result->type = CPP_OPEN_BRACE;
2581 	    }
2582 	}
2583       break;
2584 
2585     case '>':
2586       result->type = CPP_GREATER;
2587       if (*buffer->cur == '=')
2588 	buffer->cur++, result->type = CPP_GREATER_EQ;
2589       else if (*buffer->cur == '>')
2590 	{
2591 	  buffer->cur++;
2592 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2593 	}
2594       break;
2595 
2596     case '%':
2597       result->type = CPP_MOD;
2598       if (*buffer->cur == '=')
2599 	buffer->cur++, result->type = CPP_MOD_EQ;
2600       else if (CPP_OPTION (pfile, digraphs))
2601 	{
2602 	  if (*buffer->cur == ':')
2603 	    {
2604 	      buffer->cur++;
2605 	      result->flags |= DIGRAPH;
2606 	      result->type = CPP_HASH;
2607 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
2608 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2609 	    }
2610 	  else if (*buffer->cur == '>')
2611 	    {
2612 	      buffer->cur++;
2613 	      result->flags |= DIGRAPH;
2614 	      result->type = CPP_CLOSE_BRACE;
2615 	    }
2616 	}
2617       break;
2618 
2619     case '.':
2620       result->type = CPP_DOT;
2621       if (ISDIGIT (*buffer->cur))
2622 	{
2623 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2624 	  result->type = CPP_NUMBER;
2625 	  lex_number (pfile, &result->val.str, &nst);
2626 	  warn_about_normalization (pfile, result, &nst);
2627 	}
2628       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2629 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
2630       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2631 	buffer->cur++, result->type = CPP_DOT_STAR;
2632       break;
2633 
2634     case '+':
2635       result->type = CPP_PLUS;
2636       if (*buffer->cur == '+')
2637 	buffer->cur++, result->type = CPP_PLUS_PLUS;
2638       else if (*buffer->cur == '=')
2639 	buffer->cur++, result->type = CPP_PLUS_EQ;
2640       break;
2641 
2642     case '-':
2643       result->type = CPP_MINUS;
2644       if (*buffer->cur == '>')
2645 	{
2646 	  buffer->cur++;
2647 	  result->type = CPP_DEREF;
2648 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2649 	    buffer->cur++, result->type = CPP_DEREF_STAR;
2650 	}
2651       else if (*buffer->cur == '-')
2652 	buffer->cur++, result->type = CPP_MINUS_MINUS;
2653       else if (*buffer->cur == '=')
2654 	buffer->cur++, result->type = CPP_MINUS_EQ;
2655       break;
2656 
2657     case '&':
2658       result->type = CPP_AND;
2659       if (*buffer->cur == '&')
2660 	buffer->cur++, result->type = CPP_AND_AND;
2661       else if (*buffer->cur == '=')
2662 	buffer->cur++, result->type = CPP_AND_EQ;
2663       break;
2664 
2665     case '|':
2666       result->type = CPP_OR;
2667       if (*buffer->cur == '|')
2668 	buffer->cur++, result->type = CPP_OR_OR;
2669       else if (*buffer->cur == '=')
2670 	buffer->cur++, result->type = CPP_OR_EQ;
2671       break;
2672 
2673     case ':':
2674       result->type = CPP_COLON;
2675       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2676 	buffer->cur++, result->type = CPP_SCOPE;
2677       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2678 	{
2679 	  buffer->cur++;
2680 	  result->flags |= DIGRAPH;
2681 	  result->type = CPP_CLOSE_SQUARE;
2682 	}
2683       break;
2684 
2685     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2686     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2687     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2688     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2689     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2690 
2691     case '?': result->type = CPP_QUERY; break;
2692     case '~': result->type = CPP_COMPL; break;
2693     case ',': result->type = CPP_COMMA; break;
2694     case '(': result->type = CPP_OPEN_PAREN; break;
2695     case ')': result->type = CPP_CLOSE_PAREN; break;
2696     case '[': result->type = CPP_OPEN_SQUARE; break;
2697     case ']': result->type = CPP_CLOSE_SQUARE; break;
2698     case '{': result->type = CPP_OPEN_BRACE; break;
2699     case '}': result->type = CPP_CLOSE_BRACE; break;
2700     case ';': result->type = CPP_SEMICOLON; break;
2701 
2702       /* @ is a punctuator in Objective-C.  */
2703     case '@': result->type = CPP_ATSIGN; break;
2704 
2705     case '$':
2706     case '\\':
2707       {
2708 	const uchar *base = --buffer->cur;
2709 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2710 
2711 	if (forms_identifier_p (pfile, true, &nst))
2712 	  {
2713 	    result->type = CPP_NAME;
2714 	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
2715 						    &result->val.node.spelling);
2716 	    warn_about_normalization (pfile, result, &nst);
2717 	    break;
2718 	  }
2719 	buffer->cur++;
2720       }
2721 
2722     default:
2723       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2724       break;
2725     }
2726 
2727   source_range tok_range;
2728   tok_range.m_start = result->src_loc;
2729   if (result->src_loc >= RESERVED_LOCATION_COUNT)
2730     tok_range.m_finish
2731       = linemap_position_for_column (pfile->line_table,
2732 				     CPP_BUF_COLUMN (buffer, buffer->cur));
2733   else
2734     tok_range.m_finish = tok_range.m_start;
2735 
2736   result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
2737 					   result->src_loc,
2738 					   tok_range, NULL);
2739 
2740   return result;
2741 }
2742 
2743 /* An upper bound on the number of bytes needed to spell TOKEN.
2744    Does not include preceding whitespace.  */
2745 unsigned int
2746 cpp_token_len (const cpp_token *token)
2747 {
2748   unsigned int len;
2749 
2750   switch (TOKEN_SPELL (token))
2751     {
2752     default:		len = 6;				break;
2753     case SPELL_LITERAL:	len = token->val.str.len;		break;
2754     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
2755     }
2756 
2757   return len;
2758 }
2759 
2760 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2761    Return the number of bytes read out of NAME.  (There are always
2762    10 bytes written to BUFFER.)  */
2763 
2764 static size_t
2765 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2766 {
2767   int j;
2768   int ucn_len = 0;
2769   int ucn_len_c;
2770   unsigned t;
2771   unsigned long utf32;
2772 
2773   /* Compute the length of the UTF-8 sequence.  */
2774   for (t = *name; t & 0x80; t <<= 1)
2775     ucn_len++;
2776 
2777   utf32 = *name & (0x7F >> ucn_len);
2778   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2779     {
2780       utf32 = (utf32 << 6) | (*++name & 0x3F);
2781 
2782       /* Ill-formed UTF-8.  */
2783       if ((*name & ~0x3F) != 0x80)
2784 	abort ();
2785     }
2786 
2787   *buffer++ = '\\';
2788   *buffer++ = 'U';
2789   for (j = 7; j >= 0; j--)
2790     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2791   return ucn_len;
2792 }
2793 
2794 /* Given a token TYPE corresponding to a digraph, return a pointer to
2795    the spelling of the digraph.  */
2796 static const unsigned char *
2797 cpp_digraph2name (enum cpp_ttype type)
2798 {
2799   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2800 }
2801 
2802 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2803    The buffer must already contain the enough space to hold the
2804    token's spelling.  Returns a pointer to the character after the
2805    last character written.  */
2806 unsigned char *
2807 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2808 {
2809   size_t i;
2810   const unsigned char *name = NODE_NAME (ident);
2811 
2812   for (i = 0; i < NODE_LEN (ident); i++)
2813     if (name[i] & ~0x7F)
2814       {
2815 	i += utf8_to_ucn (buffer, name + i) - 1;
2816 	buffer += 10;
2817       }
2818     else
2819       *buffer++ = name[i];
2820 
2821   return buffer;
2822 }
2823 
2824 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2825    already contain the enough space to hold the token's spelling.
2826    Returns a pointer to the character after the last character written.
2827    FORSTRING is true if this is to be the spelling after translation
2828    phase 1 (with the original spelling of extended identifiers), false
2829    if extended identifiers should always be written using UCNs (there is
2830    no option for always writing them in the internal UTF-8 form).
2831    FIXME: Would be nice if we didn't need the PFILE argument.  */
2832 unsigned char *
2833 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2834 		 unsigned char *buffer, bool forstring)
2835 {
2836   switch (TOKEN_SPELL (token))
2837     {
2838     case SPELL_OPERATOR:
2839       {
2840 	const unsigned char *spelling;
2841 	unsigned char c;
2842 
2843 	if (token->flags & DIGRAPH)
2844 	  spelling = cpp_digraph2name (token->type);
2845 	else if (token->flags & NAMED_OP)
2846 	  goto spell_ident;
2847 	else
2848 	  spelling = TOKEN_NAME (token);
2849 
2850 	while ((c = *spelling++) != '\0')
2851 	  *buffer++ = c;
2852       }
2853       break;
2854 
2855     spell_ident:
2856     case SPELL_IDENT:
2857       if (forstring)
2858 	{
2859 	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
2860 		  NODE_LEN (token->val.node.spelling));
2861 	  buffer += NODE_LEN (token->val.node.spelling);
2862 	}
2863       else
2864 	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2865       break;
2866 
2867     case SPELL_LITERAL:
2868       memcpy (buffer, token->val.str.text, token->val.str.len);
2869       buffer += token->val.str.len;
2870       break;
2871 
2872     case SPELL_NONE:
2873       cpp_error (pfile, CPP_DL_ICE,
2874 		 "unspellable token %s", TOKEN_NAME (token));
2875       break;
2876     }
2877 
2878   return buffer;
2879 }
2880 
2881 /* Returns TOKEN spelt as a null-terminated string.  The string is
2882    freed when the reader is destroyed.  Useful for diagnostics.  */
2883 unsigned char *
2884 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2885 {
2886   unsigned int len = cpp_token_len (token) + 1;
2887   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2888 
2889   end = cpp_spell_token (pfile, token, start, false);
2890   end[0] = '\0';
2891 
2892   return start;
2893 }
2894 
2895 /* Returns a pointer to a string which spells the token defined by
2896    TYPE and FLAGS.  Used by C front ends, which really should move to
2897    using cpp_token_as_text.  */
2898 const char *
2899 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2900 {
2901   if (flags & DIGRAPH)
2902     return (const char *) cpp_digraph2name (type);
2903   else if (flags & NAMED_OP)
2904     return cpp_named_operator2name (type);
2905 
2906   return (const char *) token_spellings[type].name;
2907 }
2908 
2909 /* Writes the spelling of token to FP, without any preceding space.
2910    Separated from cpp_spell_token for efficiency - to avoid stdio
2911    double-buffering.  */
2912 void
2913 cpp_output_token (const cpp_token *token, FILE *fp)
2914 {
2915   switch (TOKEN_SPELL (token))
2916     {
2917     case SPELL_OPERATOR:
2918       {
2919 	const unsigned char *spelling;
2920 	int c;
2921 
2922 	if (token->flags & DIGRAPH)
2923 	  spelling = cpp_digraph2name (token->type);
2924 	else if (token->flags & NAMED_OP)
2925 	  goto spell_ident;
2926 	else
2927 	  spelling = TOKEN_NAME (token);
2928 
2929 	c = *spelling;
2930 	do
2931 	  putc (c, fp);
2932 	while ((c = *++spelling) != '\0');
2933       }
2934       break;
2935 
2936     spell_ident:
2937     case SPELL_IDENT:
2938       {
2939 	size_t i;
2940 	const unsigned char * name = NODE_NAME (token->val.node.node);
2941 
2942 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2943 	  if (name[i] & ~0x7F)
2944 	    {
2945 	      unsigned char buffer[10];
2946 	      i += utf8_to_ucn (buffer, name + i) - 1;
2947 	      fwrite (buffer, 1, 10, fp);
2948 	    }
2949 	  else
2950 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
2951       }
2952       break;
2953 
2954     case SPELL_LITERAL:
2955       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2956       break;
2957 
2958     case SPELL_NONE:
2959       /* An error, most probably.  */
2960       break;
2961     }
2962 }
2963 
2964 /* Compare two tokens.  */
2965 int
2966 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2967 {
2968   if (a->type == b->type && a->flags == b->flags)
2969     switch (TOKEN_SPELL (a))
2970       {
2971       default:			/* Keep compiler happy.  */
2972       case SPELL_OPERATOR:
2973 	/* token_no is used to track where multiple consecutive ##
2974 	   tokens were originally located.  */
2975 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2976       case SPELL_NONE:
2977 	return (a->type != CPP_MACRO_ARG
2978 		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2979 		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2980       case SPELL_IDENT:
2981 	return (a->val.node.node == b->val.node.node
2982 		&& a->val.node.spelling == b->val.node.spelling);
2983       case SPELL_LITERAL:
2984 	return (a->val.str.len == b->val.str.len
2985 		&& !memcmp (a->val.str.text, b->val.str.text,
2986 			    a->val.str.len));
2987       }
2988 
2989   return 0;
2990 }
2991 
2992 /* Returns nonzero if a space should be inserted to avoid an
2993    accidental token paste for output.  For simplicity, it is
2994    conservative, and occasionally advises a space where one is not
2995    needed, e.g. "." and ".2".  */
2996 int
2997 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2998 		 const cpp_token *token2)
2999 {
3000   enum cpp_ttype a = token1->type, b = token2->type;
3001   cppchar_t c;
3002 
3003   if (token1->flags & NAMED_OP)
3004     a = CPP_NAME;
3005   if (token2->flags & NAMED_OP)
3006     b = CPP_NAME;
3007 
3008   c = EOF;
3009   if (token2->flags & DIGRAPH)
3010     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
3011   else if (token_spellings[b].category == SPELL_OPERATOR)
3012     c = token_spellings[b].name[0];
3013 
3014   /* Quickly get everything that can paste with an '='.  */
3015   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
3016     return 1;
3017 
3018   switch (a)
3019     {
3020     case CPP_GREATER:	return c == '>';
3021     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
3022     case CPP_PLUS:	return c == '+';
3023     case CPP_MINUS:	return c == '-' || c == '>';
3024     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
3025     case CPP_MOD:	return c == ':' || c == '>';
3026     case CPP_AND:	return c == '&';
3027     case CPP_OR:	return c == '|';
3028     case CPP_COLON:	return c == ':' || c == '>';
3029     case CPP_DEREF:	return c == '*';
3030     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
3031     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
3032     case CPP_NAME:	return ((b == CPP_NUMBER
3033 				 && name_p (pfile, &token2->val.str))
3034 				|| b == CPP_NAME
3035 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
3036     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
3037 				|| c == '.' || c == '+' || c == '-');
3038 				      /* UCNs */
3039     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
3040 				 && b == CPP_NAME)
3041 				|| (CPP_OPTION (pfile, objc)
3042 				    && token1->val.str.text[0] == '@'
3043 				    && (b == CPP_NAME || b == CPP_STRING)));
3044     case CPP_STRING:
3045     case CPP_WSTRING:
3046     case CPP_UTF8STRING:
3047     case CPP_STRING16:
3048     case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
3049 				&& (b == CPP_NAME
3050 				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
3051 					&& ISIDST (token2->val.str.text[0]))));
3052 
3053     default:		break;
3054     }
3055 
3056   return 0;
3057 }
3058 
3059 /* Output all the remaining tokens on the current line, and a newline
3060    character, to FP.  Leading whitespace is removed.  If there are
3061    macros, special token padding is not performed.  */
3062 void
3063 cpp_output_line (cpp_reader *pfile, FILE *fp)
3064 {
3065   const cpp_token *token;
3066 
3067   token = cpp_get_token (pfile);
3068   while (token->type != CPP_EOF)
3069     {
3070       cpp_output_token (token, fp);
3071       token = cpp_get_token (pfile);
3072       if (token->flags & PREV_WHITE)
3073 	putc (' ', fp);
3074     }
3075 
3076   putc ('\n', fp);
3077 }
3078 
3079 /* Return a string representation of all the remaining tokens on the
3080    current line.  The result is allocated using xmalloc and must be
3081    freed by the caller.  */
3082 unsigned char *
3083 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3084 {
3085   const cpp_token *token;
3086   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3087   unsigned int alloced = 120 + out;
3088   unsigned char *result = (unsigned char *) xmalloc (alloced);
3089 
3090   /* If DIR_NAME is empty, there are no initial contents.  */
3091   if (dir_name)
3092     {
3093       sprintf ((char *) result, "#%s ", dir_name);
3094       out += 2;
3095     }
3096 
3097   token = cpp_get_token (pfile);
3098   while (token->type != CPP_EOF)
3099     {
3100       unsigned char *last;
3101       /* Include room for a possible space and the terminating nul.  */
3102       unsigned int len = cpp_token_len (token) + 2;
3103 
3104       if (out + len > alloced)
3105 	{
3106 	  alloced *= 2;
3107 	  if (out + len > alloced)
3108 	    alloced = out + len;
3109 	  result = (unsigned char *) xrealloc (result, alloced);
3110 	}
3111 
3112       last = cpp_spell_token (pfile, token, &result[out], 0);
3113       out = last - result;
3114 
3115       token = cpp_get_token (pfile);
3116       if (token->flags & PREV_WHITE)
3117 	result[out++] = ' ';
3118     }
3119 
3120   result[out] = '\0';
3121   return result;
3122 }
3123 
3124 /* Memory buffers.  Changing these three constants can have a dramatic
3125    effect on performance.  The values here are reasonable defaults,
3126    but might be tuned.  If you adjust them, be sure to test across a
3127    range of uses of cpplib, including heavy nested function-like macro
3128    expansion.  Also check the change in peak memory usage (NJAMD is a
3129    good tool for this).  */
3130 #define MIN_BUFF_SIZE 8000
3131 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3132 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3133 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3134 
3135 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3136   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3137 #endif
3138 
3139 /* Create a new allocation buffer.  Place the control block at the end
3140    of the buffer, so that buffer overflows will cause immediate chaos.  */
3141 static _cpp_buff *
3142 new_buff (size_t len)
3143 {
3144   _cpp_buff *result;
3145   unsigned char *base;
3146 
3147   if (len < MIN_BUFF_SIZE)
3148     len = MIN_BUFF_SIZE;
3149   len = CPP_ALIGN (len);
3150 
3151 #ifdef ENABLE_VALGRIND_CHECKING
3152   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3153      struct first.  */
3154   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3155   base = XNEWVEC (unsigned char, len + slen);
3156   result = (_cpp_buff *) base;
3157   base += slen;
3158 #else
3159   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3160   result = (_cpp_buff *) (base + len);
3161 #endif
3162   result->base = base;
3163   result->cur = base;
3164   result->limit = base + len;
3165   result->next = NULL;
3166   return result;
3167 }
3168 
3169 /* Place a chain of unwanted allocation buffers on the free list.  */
3170 void
3171 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3172 {
3173   _cpp_buff *end = buff;
3174 
3175   while (end->next)
3176     end = end->next;
3177   end->next = pfile->free_buffs;
3178   pfile->free_buffs = buff;
3179 }
3180 
3181 /* Return a free buffer of size at least MIN_SIZE.  */
3182 _cpp_buff *
3183 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3184 {
3185   _cpp_buff *result, **p;
3186 
3187   for (p = &pfile->free_buffs;; p = &(*p)->next)
3188     {
3189       size_t size;
3190 
3191       if (*p == NULL)
3192 	return new_buff (min_size);
3193       result = *p;
3194       size = result->limit - result->base;
3195       /* Return a buffer that's big enough, but don't waste one that's
3196          way too big.  */
3197       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3198 	break;
3199     }
3200 
3201   *p = result->next;
3202   result->next = NULL;
3203   result->cur = result->base;
3204   return result;
3205 }
3206 
3207 /* Creates a new buffer with enough space to hold the uncommitted
3208    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3209    the excess bytes to the new buffer.  Chains the new buffer after
3210    BUFF, and returns the new buffer.  */
3211 _cpp_buff *
3212 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3213 {
3214   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3215   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3216 
3217   buff->next = new_buff;
3218   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3219   return new_buff;
3220 }
3221 
3222 /* Creates a new buffer with enough space to hold the uncommitted
3223    remaining bytes of the buffer pointed to by BUFF, and at least
3224    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3225    Chains the new buffer before the buffer pointed to by BUFF, and
3226    updates the pointer to point to the new buffer.  */
3227 void
3228 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3229 {
3230   _cpp_buff *new_buff, *old_buff = *pbuff;
3231   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3232 
3233   new_buff = _cpp_get_buff (pfile, size);
3234   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3235   new_buff->next = old_buff;
3236   *pbuff = new_buff;
3237 }
3238 
3239 /* Free a chain of buffers starting at BUFF.  */
3240 void
3241 _cpp_free_buff (_cpp_buff *buff)
3242 {
3243   _cpp_buff *next;
3244 
3245   for (; buff; buff = next)
3246     {
3247       next = buff->next;
3248 #ifdef ENABLE_VALGRIND_CHECKING
3249       free (buff);
3250 #else
3251       free (buff->base);
3252 #endif
3253     }
3254 }
3255 
3256 /* Allocate permanent, unaligned storage of length LEN.  */
3257 unsigned char *
3258 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3259 {
3260   _cpp_buff *buff = pfile->u_buff;
3261   unsigned char *result = buff->cur;
3262 
3263   if (len > (size_t) (buff->limit - result))
3264     {
3265       buff = _cpp_get_buff (pfile, len);
3266       buff->next = pfile->u_buff;
3267       pfile->u_buff = buff;
3268       result = buff->cur;
3269     }
3270 
3271   buff->cur = result + len;
3272   return result;
3273 }
3274 
3275 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3276    That buffer is used for growing allocations when saving macro
3277    replacement lists in a #define, and when parsing an answer to an
3278    assertion in #assert, #unassert or #if (and therefore possibly
3279    whilst expanding macros).  It therefore must not be used by any
3280    code that they might call: specifically the lexer and the guts of
3281    the macro expander.
3282 
3283    All existing other uses clearly fit this restriction: storing
3284    registered pragmas during initialization.  */
3285 unsigned char *
3286 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3287 {
3288   _cpp_buff *buff = pfile->a_buff;
3289   unsigned char *result = buff->cur;
3290 
3291   if (len > (size_t) (buff->limit - result))
3292     {
3293       buff = _cpp_get_buff (pfile, len);
3294       buff->next = pfile->a_buff;
3295       pfile->a_buff = buff;
3296       result = buff->cur;
3297     }
3298 
3299   buff->cur = result + len;
3300   return result;
3301 }
3302 
3303 /* Say which field of TOK is in use.  */
3304 
3305 enum cpp_token_fld_kind
3306 cpp_token_val_index (const cpp_token *tok)
3307 {
3308   switch (TOKEN_SPELL (tok))
3309     {
3310     case SPELL_IDENT:
3311       return CPP_TOKEN_FLD_NODE;
3312     case SPELL_LITERAL:
3313       return CPP_TOKEN_FLD_STR;
3314     case SPELL_OPERATOR:
3315       if (tok->type == CPP_PASTE)
3316 	return CPP_TOKEN_FLD_TOKEN_NO;
3317       else
3318 	return CPP_TOKEN_FLD_NONE;
3319     case SPELL_NONE:
3320       if (tok->type == CPP_MACRO_ARG)
3321 	return CPP_TOKEN_FLD_ARG_NO;
3322       else if (tok->type == CPP_PADDING)
3323 	return CPP_TOKEN_FLD_SOURCE;
3324       else if (tok->type == CPP_PRAGMA)
3325 	return CPP_TOKEN_FLD_PRAGMA;
3326       /* else fall through */
3327     default:
3328       return CPP_TOKEN_FLD_NONE;
3329     }
3330 }
3331 
3332 /* All tokens lexed in R after calling this function will be forced to have
3333    their source_location the same as the location referenced by P, until
3334    cpp_stop_forcing_token_locations is called for R.  */
3335 
3336 void
3337 cpp_force_token_locations (cpp_reader *r, source_location *p)
3338 {
3339   r->forced_token_location_p = p;
3340 }
3341 
3342 /* Go back to assigning locations naturally for lexed tokens.  */
3343 
3344 void
3345 cpp_stop_forcing_token_locations (cpp_reader *r)
3346 {
3347   r->forced_token_location_p = NULL;
3348 }
3349