1 /* CPP Library - lexical analysis. 2 Copyright (C) 2000-2016 Free Software Foundation, Inc. 3 Contributed by Per Bothner, 1994-95. 4 Based on CCCP program by Paul Rubin, June 1986 5 Adapted to ANSI C, Richard Stallman, Jan 1987 6 Broken out to separate file, Zack Weinberg, Mar 2000 7 8 This program is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 3, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "cpplib.h" 25 #include "internal.h" 26 27 enum spell_type 28 { 29 SPELL_OPERATOR = 0, 30 SPELL_IDENT, 31 SPELL_LITERAL, 32 SPELL_NONE 33 }; 34 35 struct token_spelling 36 { 37 enum spell_type category; 38 const unsigned char *name; 39 }; 40 41 static const unsigned char *const digraph_spellings[] = 42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" }; 43 44 #define OP(e, s) { SPELL_OPERATOR, UC s }, 45 #define TK(e, s) { SPELL_ ## s, UC #e }, 46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; 47 #undef OP 48 #undef TK 49 50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category) 51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name) 52 53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int); 54 static int skip_line_comment (cpp_reader *); 55 static void skip_whitespace (cpp_reader *, cppchar_t); 56 static void lex_string (cpp_reader *, cpp_token *, const uchar *); 57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); 58 static void store_comment (cpp_reader *, cpp_token *); 59 static void create_literal (cpp_reader *, cpp_token *, const uchar *, 60 unsigned int, enum cpp_ttype); 61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *); 62 static int name_p (cpp_reader *, const cpp_string *); 63 static tokenrun *next_tokenrun (tokenrun *); 64 65 static _cpp_buff *new_buff (size_t); 66 67 68 /* Utility routine: 69 70 Compares, the token TOKEN to the NUL-terminated string STRING. 71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ 72 int 73 cpp_ideq (const cpp_token *token, const char *string) 74 { 75 if (token->type != CPP_NAME) 76 return 0; 77 78 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string); 79 } 80 81 /* Record a note TYPE at byte POS into the current cleaned logical 82 line. */ 83 static void 84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) 85 { 86 if (buffer->notes_used == buffer->notes_cap) 87 { 88 buffer->notes_cap = buffer->notes_cap * 2 + 200; 89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes, 90 buffer->notes_cap); 91 } 92 93 buffer->notes[buffer->notes_used].pos = pos; 94 buffer->notes[buffer->notes_used].type = type; 95 buffer->notes_used++; 96 } 97 98 99 /* Fast path to find line special characters using optimized character 100 scanning algorithms. Anything complicated falls back to the slow 101 path below. Since this loop is very hot it's worth doing these kinds 102 of optimizations. 103 104 One of the paths through the ifdefs should provide 105 106 const uchar *search_line_fast (const uchar *s, const uchar *end); 107 108 Between S and END, search for \n, \r, \\, ?. Return a pointer to 109 the found character. 110 111 Note that the last character of the buffer is *always* a newline, 112 as forced by _cpp_convert_input. This fact can be used to avoid 113 explicitly looking for the end of the buffer. */ 114 115 /* Configure gives us an ifdef test. */ 116 #ifndef WORDS_BIGENDIAN 117 #define WORDS_BIGENDIAN 0 118 #endif 119 120 /* We'd like the largest integer that fits into a register. There's nothing 121 in <stdint.h> that gives us that. For most hosts this is unsigned long, 122 but MS decided on an LLP64 model. Thankfully when building with GCC we 123 can get the "real" word size. */ 124 #ifdef __GNUC__ 125 typedef unsigned int word_type __attribute__((__mode__(__word__))); 126 #else 127 typedef unsigned long word_type; 128 #endif 129 130 /* The code below is only expecting sizes 4 or 8. 131 Die at compile-time if this expectation is violated. */ 132 typedef char check_word_type_size 133 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1]; 134 135 /* Return X with the first N bytes forced to values that won't match one 136 of the interesting characters. Note that NUL is not interesting. */ 137 138 static inline word_type 139 acc_char_mask_misalign (word_type val, unsigned int n) 140 { 141 word_type mask = -1; 142 if (WORDS_BIGENDIAN) 143 mask >>= n * 8; 144 else 145 mask <<= n * 8; 146 return val & mask; 147 } 148 149 /* Return X replicated to all byte positions within WORD_TYPE. */ 150 151 static inline word_type 152 acc_char_replicate (uchar x) 153 { 154 word_type ret; 155 156 ret = (x << 24) | (x << 16) | (x << 8) | x; 157 if (sizeof(word_type) == 8) 158 ret = (ret << 16 << 16) | ret; 159 return ret; 160 } 161 162 /* Return non-zero if some byte of VAL is (probably) C. */ 163 164 static inline word_type 165 acc_char_cmp (word_type val, word_type c) 166 { 167 #if defined(__GNUC__) && defined(__alpha__) 168 /* We can get exact results using a compare-bytes instruction. 169 Get (val == c) via (0 >= (val ^ c)). */ 170 return __builtin_alpha_cmpbge (0, val ^ c); 171 #else 172 word_type magic = 0x7efefefeU; 173 if (sizeof(word_type) == 8) 174 magic = (magic << 16 << 16) | 0xfefefefeU; 175 magic |= 1; 176 177 val ^= c; 178 return ((val + magic) ^ ~val) & ~magic; 179 #endif 180 } 181 182 /* Given the result of acc_char_cmp is non-zero, return the index of 183 the found character. If this was a false positive, return -1. */ 184 185 static inline int 186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED, 187 word_type val ATTRIBUTE_UNUSED) 188 { 189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN 190 /* The cmpbge instruction sets *bits* of the result corresponding to 191 matches in the bytes with no false positives. */ 192 return __builtin_ctzl (cmp); 193 #else 194 unsigned int i; 195 196 /* ??? It would be nice to force unrolling here, 197 and have all of these constants folded. */ 198 for (i = 0; i < sizeof(word_type); ++i) 199 { 200 uchar c; 201 if (WORDS_BIGENDIAN) 202 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff; 203 else 204 c = (val >> i * 8) & 0xff; 205 206 if (c == '\n' || c == '\r' || c == '\\' || c == '?') 207 return i; 208 } 209 210 return -1; 211 #endif 212 } 213 214 /* A version of the fast scanner using bit fiddling techniques. 215 216 For 32-bit words, one would normally perform 16 comparisons and 217 16 branches. With this algorithm one performs 24 arithmetic 218 operations and one branch. Whether this is faster with a 32-bit 219 word size is going to be somewhat system dependent. 220 221 For 64-bit words, we eliminate twice the number of comparisons 222 and branches without increasing the number of arithmetic operations. 223 It's almost certainly going to be a win with 64-bit word size. */ 224 225 static const uchar * search_line_acc_char (const uchar *, const uchar *) 226 ATTRIBUTE_UNUSED; 227 228 static const uchar * 229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) 230 { 231 const word_type repl_nl = acc_char_replicate ('\n'); 232 const word_type repl_cr = acc_char_replicate ('\r'); 233 const word_type repl_bs = acc_char_replicate ('\\'); 234 const word_type repl_qm = acc_char_replicate ('?'); 235 236 unsigned int misalign; 237 const word_type *p; 238 word_type val, t; 239 240 /* Align the buffer. Mask out any bytes from before the beginning. */ 241 p = (word_type *)((uintptr_t)s & -sizeof(word_type)); 242 val = *p; 243 misalign = (uintptr_t)s & (sizeof(word_type) - 1); 244 if (misalign) 245 val = acc_char_mask_misalign (val, misalign); 246 247 /* Main loop. */ 248 while (1) 249 { 250 t = acc_char_cmp (val, repl_nl); 251 t |= acc_char_cmp (val, repl_cr); 252 t |= acc_char_cmp (val, repl_bs); 253 t |= acc_char_cmp (val, repl_qm); 254 255 if (__builtin_expect (t != 0, 0)) 256 { 257 int i = acc_char_index (t, val); 258 if (i >= 0) 259 return (const uchar *)p + i; 260 } 261 262 val = *++p; 263 } 264 } 265 266 /* Disable on Solaris 2/x86 until the following problem can be properly 267 autoconfed: 268 269 The Solaris 10+ assembler tags objects with the instruction set 270 extensions used, so SSE4.2 executables cannot run on machines that 271 don't support that extension. */ 272 273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__)) 274 275 /* Replicated character data to be shared between implementations. 276 Recall that outside of a context with vector support we can't 277 define compatible vector types, therefore these are all defined 278 in terms of raw characters. */ 279 static const char repl_chars[4][16] __attribute__((aligned(16))) = { 280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' }, 282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', 283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' }, 284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', 285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }, 286 { '?', '?', '?', '?', '?', '?', '?', '?', 287 '?', '?', '?', '?', '?', '?', '?', '?' }, 288 }; 289 290 /* A version of the fast scanner using MMX vectorized byte compare insns. 291 292 This uses the PMOVMSKB instruction which was introduced with "MMX2", 293 which was packaged into SSE1; it is also present in the AMD MMX 294 extension. Mark the function as using "sse" so that we emit a real 295 "emms" instruction, rather than the 3dNOW "femms" instruction. */ 296 297 static const uchar * 298 #ifndef __SSE__ 299 __attribute__((__target__("sse"))) 300 #endif 301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) 302 { 303 typedef char v8qi __attribute__ ((__vector_size__ (8))); 304 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); 305 306 const v8qi repl_nl = *(const v8qi *)repl_chars[0]; 307 const v8qi repl_cr = *(const v8qi *)repl_chars[1]; 308 const v8qi repl_bs = *(const v8qi *)repl_chars[2]; 309 const v8qi repl_qm = *(const v8qi *)repl_chars[3]; 310 311 unsigned int misalign, found, mask; 312 const v8qi *p; 313 v8qi data, t, c; 314 315 /* Align the source pointer. While MMX doesn't generate unaligned data 316 faults, this allows us to safely scan to the end of the buffer without 317 reading beyond the end of the last page. */ 318 misalign = (uintptr_t)s & 7; 319 p = (const v8qi *)((uintptr_t)s & -8); 320 data = *p; 321 322 /* Create a mask for the bytes that are valid within the first 323 16-byte block. The Idea here is that the AND with the mask 324 within the loop is "free", since we need some AND or TEST 325 insn in order to set the flags for the branch anyway. */ 326 mask = -1u << misalign; 327 328 /* Main loop processing 8 bytes at a time. */ 329 goto start; 330 do 331 { 332 data = *++p; 333 mask = -1; 334 335 start: 336 t = __builtin_ia32_pcmpeqb(data, repl_nl); 337 c = __builtin_ia32_pcmpeqb(data, repl_cr); 338 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c); 339 c = __builtin_ia32_pcmpeqb(data, repl_bs); 340 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c); 341 c = __builtin_ia32_pcmpeqb(data, repl_qm); 342 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c); 343 found = __builtin_ia32_pmovmskb (t); 344 found &= mask; 345 } 346 while (!found); 347 348 __builtin_ia32_emms (); 349 350 /* FOUND contains 1 in bits for which we matched a relevant 351 character. Conversion to the byte index is trivial. */ 352 found = __builtin_ctz(found); 353 return (const uchar *)p + found; 354 } 355 356 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */ 357 358 static const uchar * 359 #ifndef __SSE2__ 360 __attribute__((__target__("sse2"))) 361 #endif 362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) 363 { 364 typedef char v16qi __attribute__ ((__vector_size__ (16))); 365 366 const v16qi repl_nl = *(const v16qi *)repl_chars[0]; 367 const v16qi repl_cr = *(const v16qi *)repl_chars[1]; 368 const v16qi repl_bs = *(const v16qi *)repl_chars[2]; 369 const v16qi repl_qm = *(const v16qi *)repl_chars[3]; 370 371 unsigned int misalign, found, mask; 372 const v16qi *p; 373 v16qi data, t; 374 375 /* Align the source pointer. */ 376 misalign = (uintptr_t)s & 15; 377 p = (const v16qi *)((uintptr_t)s & -16); 378 data = *p; 379 380 /* Create a mask for the bytes that are valid within the first 381 16-byte block. The Idea here is that the AND with the mask 382 within the loop is "free", since we need some AND or TEST 383 insn in order to set the flags for the branch anyway. */ 384 mask = -1u << misalign; 385 386 /* Main loop processing 16 bytes at a time. */ 387 goto start; 388 do 389 { 390 data = *++p; 391 mask = -1; 392 393 start: 394 t = __builtin_ia32_pcmpeqb128(data, repl_nl); 395 t |= __builtin_ia32_pcmpeqb128(data, repl_cr); 396 t |= __builtin_ia32_pcmpeqb128(data, repl_bs); 397 t |= __builtin_ia32_pcmpeqb128(data, repl_qm); 398 found = __builtin_ia32_pmovmskb128 (t); 399 found &= mask; 400 } 401 while (!found); 402 403 /* FOUND contains 1 in bits for which we matched a relevant 404 character. Conversion to the byte index is trivial. */ 405 found = __builtin_ctz(found); 406 return (const uchar *)p + found; 407 } 408 409 #ifdef HAVE_SSE4 410 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */ 411 412 static const uchar * 413 #ifndef __SSE4_2__ 414 __attribute__((__target__("sse4.2"))) 415 #endif 416 search_line_sse42 (const uchar *s, const uchar *end) 417 { 418 typedef char v16qi __attribute__ ((__vector_size__ (16))); 419 static const v16qi search = { '\n', '\r', '?', '\\' }; 420 421 uintptr_t si = (uintptr_t)s; 422 uintptr_t index; 423 424 /* Check for unaligned input. */ 425 if (si & 15) 426 { 427 v16qi sv; 428 429 if (__builtin_expect (end - s < 16, 0) 430 && __builtin_expect ((si & 0xfff) > 0xff0, 0)) 431 { 432 /* There are less than 16 bytes left in the buffer, and less 433 than 16 bytes left on the page. Reading 16 bytes at this 434 point might generate a spurious page fault. Defer to the 435 SSE2 implementation, which already handles alignment. */ 436 return search_line_sse2 (s, end); 437 } 438 439 /* ??? The builtin doesn't understand that the PCMPESTRI read from 440 memory need not be aligned. */ 441 sv = __builtin_ia32_loaddqu ((const char *) s); 442 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0); 443 444 if (__builtin_expect (index < 16, 0)) 445 goto found; 446 447 /* Advance the pointer to an aligned address. We will re-scan a 448 few bytes, but we no longer need care for reading past the 449 end of a page, since we're guaranteed a match. */ 450 s = (const uchar *)((si + 15) & -16); 451 } 452 453 /* Main loop, processing 16 bytes at a time. */ 454 #ifdef __GCC_ASM_FLAG_OUTPUTS__ 455 while (1) 456 { 457 char f; 458 459 /* By using inline assembly instead of the builtin, 460 we can use the result, as well as the flags set. */ 461 __asm ("%vpcmpestri\t$0, %2, %3" 462 : "=c"(index), "=@ccc"(f) 463 : "m"(*s), "x"(search), "a"(4), "d"(16)); 464 if (f) 465 break; 466 467 s += 16; 468 } 469 #else 470 s -= 16; 471 /* By doing the whole loop in inline assembly, 472 we can make proper use of the flags set. */ 473 __asm ( ".balign 16\n" 474 "0: add $16, %1\n" 475 " %vpcmpestri\t$0, (%1), %2\n" 476 " jnc 0b" 477 : "=&c"(index), "+r"(s) 478 : "x"(search), "a"(4), "d"(16)); 479 #endif 480 481 found: 482 return s + index; 483 } 484 485 #else 486 /* Work around out-dated assemblers without sse4 support. */ 487 #define search_line_sse42 search_line_sse2 488 #endif 489 490 /* Check the CPU capabilities. */ 491 492 #include "../gcc/config/i386/cpuid.h" 493 494 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *); 495 static search_line_fast_type search_line_fast; 496 497 #define HAVE_init_vectorized_lexer 1 498 static inline void 499 init_vectorized_lexer (void) 500 { 501 unsigned dummy, ecx = 0, edx = 0; 502 search_line_fast_type impl = search_line_acc_char; 503 int minimum = 0; 504 505 #if defined(__SSE4_2__) 506 minimum = 3; 507 #elif defined(__SSE2__) 508 minimum = 2; 509 #elif defined(__SSE__) 510 minimum = 1; 511 #endif 512 513 if (minimum == 3) 514 impl = search_line_sse42; 515 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2) 516 { 517 if (minimum == 3 || (ecx & bit_SSE4_2)) 518 impl = search_line_sse42; 519 else if (minimum == 2 || (edx & bit_SSE2)) 520 impl = search_line_sse2; 521 else if (minimum == 1 || (edx & bit_SSE)) 522 impl = search_line_mmx; 523 } 524 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx)) 525 { 526 if (minimum == 1 527 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV)) 528 impl = search_line_mmx; 529 } 530 531 search_line_fast = impl; 532 } 533 534 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__) 535 536 /* A vection of the fast scanner using AltiVec vectorized byte compares 537 and VSX unaligned loads (when VSX is available). This is otherwise 538 the same as the pre-GCC 5 version. */ 539 540 ATTRIBUTE_NO_SANITIZE_UNDEFINED 541 static const uchar * 542 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) 543 { 544 typedef __attribute__((altivec(vector))) unsigned char vc; 545 546 const vc repl_nl = { 547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' 549 }; 550 const vc repl_cr = { 551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', 552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' 553 }; 554 const vc repl_bs = { 555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', 556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' 557 }; 558 const vc repl_qm = { 559 '?', '?', '?', '?', '?', '?', '?', '?', 560 '?', '?', '?', '?', '?', '?', '?', '?', 561 }; 562 const vc zero = { 0 }; 563 564 vc data, t; 565 566 /* Main loop processing 16 bytes at a time. */ 567 do 568 { 569 vc m_nl, m_cr, m_bs, m_qm; 570 571 data = __builtin_vec_vsx_ld (0, s); 572 s += 16; 573 574 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl); 575 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr); 576 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs); 577 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm); 578 t = (m_nl | m_cr) | (m_bs | m_qm); 579 580 /* T now contains 0xff in bytes for which we matched one of the relevant 581 characters. We want to exit the loop if any byte in T is non-zero. 582 Below is the expansion of vec_any_ne(t, zero). */ 583 } 584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero)); 585 586 /* Restore s to to point to the 16 bytes we just processed. */ 587 s -= 16; 588 589 { 590 #define N (sizeof(vc) / sizeof(long)) 591 592 union { 593 vc v; 594 /* Statically assert that N is 2 or 4. */ 595 unsigned long l[(N == 2 || N == 4) ? N : -1]; 596 } u; 597 unsigned long l, i = 0; 598 599 u.v = t; 600 601 /* Find the first word of T that is non-zero. */ 602 switch (N) 603 { 604 case 4: 605 l = u.l[i++]; 606 if (l != 0) 607 break; 608 s += sizeof(unsigned long); 609 l = u.l[i++]; 610 if (l != 0) 611 break; 612 s += sizeof(unsigned long); 613 case 2: 614 l = u.l[i++]; 615 if (l != 0) 616 break; 617 s += sizeof(unsigned long); 618 l = u.l[i]; 619 } 620 621 /* L now contains 0xff in bytes for which we matched one of the 622 relevant characters. We can find the byte index by finding 623 its bit index and dividing by 8. */ 624 #ifdef __BIG_ENDIAN__ 625 l = __builtin_clzl(l) >> 3; 626 #else 627 l = __builtin_ctzl(l) >> 3; 628 #endif 629 return s + l; 630 631 #undef N 632 } 633 } 634 635 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__) 636 637 /* A vection of the fast scanner using AltiVec vectorized byte compares. 638 This cannot be used for little endian because vec_lvsl/lvsr are 639 deprecated for little endian and the code won't work properly. */ 640 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported, 641 so we can't compile this function without -maltivec on the command line 642 (or implied by some other switch). */ 643 644 static const uchar * 645 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) 646 { 647 typedef __attribute__((altivec(vector))) unsigned char vc; 648 649 const vc repl_nl = { 650 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 651 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' 652 }; 653 const vc repl_cr = { 654 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', 655 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' 656 }; 657 const vc repl_bs = { 658 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', 659 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' 660 }; 661 const vc repl_qm = { 662 '?', '?', '?', '?', '?', '?', '?', '?', 663 '?', '?', '?', '?', '?', '?', '?', '?', 664 }; 665 const vc ones = { 666 -1, -1, -1, -1, -1, -1, -1, -1, 667 -1, -1, -1, -1, -1, -1, -1, -1, 668 }; 669 const vc zero = { 0 }; 670 671 vc data, mask, t; 672 673 /* Altivec loads automatically mask addresses with -16. This lets us 674 issue the first load as early as possible. */ 675 data = __builtin_vec_ld(0, (const vc *)s); 676 677 /* Discard bytes before the beginning of the buffer. Do this by 678 beginning with all ones and shifting in zeros according to the 679 mis-alignment. The LVSR instruction pulls the exact shift we 680 want from the address. */ 681 mask = __builtin_vec_lvsr(0, s); 682 mask = __builtin_vec_perm(zero, ones, mask); 683 data &= mask; 684 685 /* While altivec loads mask addresses, we still need to align S so 686 that the offset we compute at the end is correct. */ 687 s = (const uchar *)((uintptr_t)s & -16); 688 689 /* Main loop processing 16 bytes at a time. */ 690 goto start; 691 do 692 { 693 vc m_nl, m_cr, m_bs, m_qm; 694 695 s += 16; 696 data = __builtin_vec_ld(0, (const vc *)s); 697 698 start: 699 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl); 700 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr); 701 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs); 702 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm); 703 t = (m_nl | m_cr) | (m_bs | m_qm); 704 705 /* T now contains 0xff in bytes for which we matched one of the relevant 706 characters. We want to exit the loop if any byte in T is non-zero. 707 Below is the expansion of vec_any_ne(t, zero). */ 708 } 709 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero)); 710 711 { 712 #define N (sizeof(vc) / sizeof(long)) 713 714 union { 715 vc v; 716 /* Statically assert that N is 2 or 4. */ 717 unsigned long l[(N == 2 || N == 4) ? N : -1]; 718 } u; 719 unsigned long l, i = 0; 720 721 u.v = t; 722 723 /* Find the first word of T that is non-zero. */ 724 switch (N) 725 { 726 case 4: 727 l = u.l[i++]; 728 if (l != 0) 729 break; 730 s += sizeof(unsigned long); 731 l = u.l[i++]; 732 if (l != 0) 733 break; 734 s += sizeof(unsigned long); 735 case 2: 736 l = u.l[i++]; 737 if (l != 0) 738 break; 739 s += sizeof(unsigned long); 740 l = u.l[i]; 741 } 742 743 /* L now contains 0xff in bytes for which we matched one of the 744 relevant characters. We can find the byte index by finding 745 its bit index and dividing by 8. */ 746 l = __builtin_clzl(l) >> 3; 747 return s + l; 748 749 #undef N 750 } 751 } 752 753 #elif defined (__ARM_NEON) 754 #include "arm_neon.h" 755 756 static const uchar * 757 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) 758 { 759 const uint8x16_t repl_nl = vdupq_n_u8 ('\n'); 760 const uint8x16_t repl_cr = vdupq_n_u8 ('\r'); 761 const uint8x16_t repl_bs = vdupq_n_u8 ('\\'); 762 const uint8x16_t repl_qm = vdupq_n_u8 ('?'); 763 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL); 764 765 unsigned int misalign, found, mask; 766 const uint8_t *p; 767 uint8x16_t data; 768 769 /* Align the source pointer. */ 770 misalign = (uintptr_t)s & 15; 771 p = (const uint8_t *)((uintptr_t)s & -16); 772 data = vld1q_u8 (p); 773 774 /* Create a mask for the bytes that are valid within the first 775 16-byte block. The Idea here is that the AND with the mask 776 within the loop is "free", since we need some AND or TEST 777 insn in order to set the flags for the branch anyway. */ 778 mask = (-1u << misalign) & 0xffff; 779 780 /* Main loop, processing 16 bytes at a time. */ 781 goto start; 782 783 do 784 { 785 uint8x8_t l; 786 uint16x4_t m; 787 uint32x2_t n; 788 uint8x16_t t, u, v, w; 789 790 p += 16; 791 data = vld1q_u8 (p); 792 mask = 0xffff; 793 794 start: 795 t = vceqq_u8 (data, repl_nl); 796 u = vceqq_u8 (data, repl_cr); 797 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs)); 798 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm)); 799 t = vandq_u8 (vorrq_u8 (v, w), xmask); 800 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t)); 801 m = vpaddl_u8 (l); 802 n = vpaddl_u16 (m); 803 804 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n, 805 vshr_n_u64 ((uint64x1_t) n, 24)), 0); 806 found &= mask; 807 } 808 while (!found); 809 810 /* FOUND contains 1 in bits for which we matched a relevant 811 character. Conversion to the byte index is trivial. */ 812 found = __builtin_ctz (found); 813 return (const uchar *)p + found; 814 } 815 816 #else 817 818 /* We only have one accellerated alternative. Use a direct call so that 819 we encourage inlining. */ 820 821 #define search_line_fast search_line_acc_char 822 823 #endif 824 825 /* Initialize the lexer if needed. */ 826 827 void 828 _cpp_init_lexer (void) 829 { 830 #ifdef HAVE_init_vectorized_lexer 831 init_vectorized_lexer (); 832 #endif 833 } 834 835 /* Returns with a logical line that contains no escaped newlines or 836 trigraphs. This is a time-critical inner loop. */ 837 void 838 _cpp_clean_line (cpp_reader *pfile) 839 { 840 cpp_buffer *buffer; 841 const uchar *s; 842 uchar c, *d, *p; 843 844 buffer = pfile->buffer; 845 buffer->cur_note = buffer->notes_used = 0; 846 buffer->cur = buffer->line_base = buffer->next_line; 847 buffer->need_line = false; 848 s = buffer->next_line; 849 850 if (!buffer->from_stage3) 851 { 852 const uchar *pbackslash = NULL; 853 854 /* Fast path. This is the common case of an un-escaped line with 855 no trigraphs. The primary win here is by not writing any 856 data back to memory until we have to. */ 857 while (1) 858 { 859 /* Perform an optimized search for \n, \r, \\, ?. */ 860 s = search_line_fast (s, buffer->rlimit); 861 862 c = *s; 863 if (c == '\\') 864 { 865 /* Record the location of the backslash and continue. */ 866 pbackslash = s++; 867 } 868 else if (__builtin_expect (c == '?', 0)) 869 { 870 if (__builtin_expect (s[1] == '?', false) 871 && _cpp_trigraph_map[s[2]]) 872 { 873 /* Have a trigraph. We may or may not have to convert 874 it. Add a line note regardless, for -Wtrigraphs. */ 875 add_line_note (buffer, s, s[2]); 876 if (CPP_OPTION (pfile, trigraphs)) 877 { 878 /* We do, and that means we have to switch to the 879 slow path. */ 880 d = (uchar *) s; 881 *d = _cpp_trigraph_map[s[2]]; 882 s += 2; 883 goto slow_path; 884 } 885 } 886 /* Not a trigraph. Continue on fast-path. */ 887 s++; 888 } 889 else 890 break; 891 } 892 893 /* This must be \r or \n. We're either done, or we'll be forced 894 to write back to the buffer and continue on the slow path. */ 895 d = (uchar *) s; 896 897 if (__builtin_expect (s == buffer->rlimit, false)) 898 goto done; 899 900 /* DOS line ending? */ 901 if (__builtin_expect (c == '\r', false) && s[1] == '\n') 902 { 903 s++; 904 if (s == buffer->rlimit) 905 goto done; 906 } 907 908 if (__builtin_expect (pbackslash == NULL, true)) 909 goto done; 910 911 /* Check for escaped newline. */ 912 p = d; 913 while (is_nvspace (p[-1])) 914 p--; 915 if (p - 1 != pbackslash) 916 goto done; 917 918 /* Have an escaped newline; process it and proceed to 919 the slow path. */ 920 add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); 921 d = p - 2; 922 buffer->next_line = p - 1; 923 924 slow_path: 925 while (1) 926 { 927 c = *++s; 928 *++d = c; 929 930 if (c == '\n' || c == '\r') 931 { 932 /* Handle DOS line endings. */ 933 if (c == '\r' && s != buffer->rlimit && s[1] == '\n') 934 s++; 935 if (s == buffer->rlimit) 936 break; 937 938 /* Escaped? */ 939 p = d; 940 while (p != buffer->next_line && is_nvspace (p[-1])) 941 p--; 942 if (p == buffer->next_line || p[-1] != '\\') 943 break; 944 945 add_line_note (buffer, p - 1, p != d ? ' ': '\\'); 946 d = p - 2; 947 buffer->next_line = p - 1; 948 } 949 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 950 { 951 /* Add a note regardless, for the benefit of -Wtrigraphs. */ 952 add_line_note (buffer, d, s[2]); 953 if (CPP_OPTION (pfile, trigraphs)) 954 { 955 *d = _cpp_trigraph_map[s[2]]; 956 s += 2; 957 } 958 } 959 } 960 } 961 else 962 { 963 while (*s != '\n' && *s != '\r') 964 s++; 965 d = (uchar *) s; 966 967 /* Handle DOS line endings. */ 968 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') 969 s++; 970 } 971 972 done: 973 *d = '\n'; 974 /* A sentinel note that should never be processed. */ 975 add_line_note (buffer, d + 1, '\n'); 976 buffer->next_line = s + 1; 977 } 978 979 /* Return true if the trigraph indicated by NOTE should be warned 980 about in a comment. */ 981 static bool 982 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note) 983 { 984 const uchar *p; 985 986 /* Within comments we don't warn about trigraphs, unless the 987 trigraph forms an escaped newline, as that may change 988 behavior. */ 989 if (note->type != '/') 990 return false; 991 992 /* If -trigraphs, then this was an escaped newline iff the next note 993 is coincident. */ 994 if (CPP_OPTION (pfile, trigraphs)) 995 return note[1].pos == note->pos; 996 997 /* Otherwise, see if this forms an escaped newline. */ 998 p = note->pos + 3; 999 while (is_nvspace (*p)) 1000 p++; 1001 1002 /* There might have been escaped newlines between the trigraph and the 1003 newline we found. Hence the position test. */ 1004 return (*p == '\n' && p < note[1].pos); 1005 } 1006 1007 /* Process the notes created by add_line_note as far as the current 1008 location. */ 1009 void 1010 _cpp_process_line_notes (cpp_reader *pfile, int in_comment) 1011 { 1012 cpp_buffer *buffer = pfile->buffer; 1013 1014 for (;;) 1015 { 1016 _cpp_line_note *note = &buffer->notes[buffer->cur_note]; 1017 unsigned int col; 1018 1019 if (note->pos > buffer->cur) 1020 break; 1021 1022 buffer->cur_note++; 1023 col = CPP_BUF_COLUMN (buffer, note->pos + 1); 1024 1025 if (note->type == '\\' || note->type == ' ') 1026 { 1027 if (note->type == ' ' && !in_comment) 1028 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 1029 "backslash and newline separated by space"); 1030 1031 if (buffer->next_line > buffer->rlimit) 1032 { 1033 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, 1034 "backslash-newline at end of file"); 1035 /* Prevent "no newline at end of file" warning. */ 1036 buffer->next_line = buffer->rlimit; 1037 } 1038 1039 buffer->line_base = note->pos; 1040 CPP_INCREMENT_LINE (pfile, 0); 1041 } 1042 else if (_cpp_trigraph_map[note->type]) 1043 { 1044 if (CPP_OPTION (pfile, warn_trigraphs) 1045 && (!in_comment || warn_in_comment (pfile, note))) 1046 { 1047 if (CPP_OPTION (pfile, trigraphs)) 1048 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS, 1049 pfile->line_table->highest_line, col, 1050 "trigraph ??%c converted to %c", 1051 note->type, 1052 (int) _cpp_trigraph_map[note->type]); 1053 else 1054 { 1055 cpp_warning_with_line 1056 (pfile, CPP_W_TRIGRAPHS, 1057 pfile->line_table->highest_line, col, 1058 "trigraph ??%c ignored, use -trigraphs to enable", 1059 note->type); 1060 } 1061 } 1062 } 1063 else if (note->type == 0) 1064 /* Already processed in lex_raw_string. */; 1065 else 1066 abort (); 1067 } 1068 } 1069 1070 /* Skip a C-style block comment. We find the end of the comment by 1071 seeing if an asterisk is before every '/' we encounter. Returns 1072 nonzero if comment terminated by EOF, zero otherwise. 1073 1074 Buffer->cur points to the initial asterisk of the comment. */ 1075 bool 1076 _cpp_skip_block_comment (cpp_reader *pfile) 1077 { 1078 cpp_buffer *buffer = pfile->buffer; 1079 const uchar *cur = buffer->cur; 1080 uchar c; 1081 1082 cur++; 1083 if (*cur == '/') 1084 cur++; 1085 1086 for (;;) 1087 { 1088 /* People like decorating comments with '*', so check for '/' 1089 instead for efficiency. */ 1090 c = *cur++; 1091 1092 if (c == '/') 1093 { 1094 if (cur[-2] == '*') 1095 break; 1096 1097 /* Warn about potential nested comments, but not if the '/' 1098 comes immediately before the true comment delimiter. 1099 Don't bother to get it right across escaped newlines. */ 1100 if (CPP_OPTION (pfile, warn_comments) 1101 && cur[0] == '*' && cur[1] != '/') 1102 { 1103 buffer->cur = cur; 1104 cpp_warning_with_line (pfile, CPP_W_COMMENTS, 1105 pfile->line_table->highest_line, 1106 CPP_BUF_COL (buffer), 1107 "\"/*\" within comment"); 1108 } 1109 } 1110 else if (c == '\n') 1111 { 1112 unsigned int cols; 1113 buffer->cur = cur - 1; 1114 _cpp_process_line_notes (pfile, true); 1115 if (buffer->next_line >= buffer->rlimit) 1116 return true; 1117 _cpp_clean_line (pfile); 1118 1119 cols = buffer->next_line - buffer->line_base; 1120 CPP_INCREMENT_LINE (pfile, cols); 1121 1122 cur = buffer->cur; 1123 } 1124 } 1125 1126 buffer->cur = cur; 1127 _cpp_process_line_notes (pfile, true); 1128 return false; 1129 } 1130 1131 /* Skip a C++ line comment, leaving buffer->cur pointing to the 1132 terminating newline. Handles escaped newlines. Returns nonzero 1133 if a multiline comment. */ 1134 static int 1135 skip_line_comment (cpp_reader *pfile) 1136 { 1137 cpp_buffer *buffer = pfile->buffer; 1138 source_location orig_line = pfile->line_table->highest_line; 1139 1140 while (*buffer->cur != '\n') 1141 buffer->cur++; 1142 1143 _cpp_process_line_notes (pfile, true); 1144 return orig_line != pfile->line_table->highest_line; 1145 } 1146 1147 /* Skips whitespace, saving the next non-whitespace character. */ 1148 static void 1149 skip_whitespace (cpp_reader *pfile, cppchar_t c) 1150 { 1151 cpp_buffer *buffer = pfile->buffer; 1152 bool saw_NUL = false; 1153 1154 do 1155 { 1156 /* Horizontal space always OK. */ 1157 if (c == ' ' || c == '\t') 1158 ; 1159 /* Just \f \v or \0 left. */ 1160 else if (c == '\0') 1161 saw_NUL = true; 1162 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) 1163 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 1164 CPP_BUF_COL (buffer), 1165 "%s in preprocessing directive", 1166 c == '\f' ? "form feed" : "vertical tab"); 1167 1168 c = *buffer->cur++; 1169 } 1170 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ 1171 while (is_nvspace (c)); 1172 1173 if (saw_NUL) 1174 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); 1175 1176 buffer->cur--; 1177 } 1178 1179 /* See if the characters of a number token are valid in a name (no 1180 '.', '+' or '-'). */ 1181 static int 1182 name_p (cpp_reader *pfile, const cpp_string *string) 1183 { 1184 unsigned int i; 1185 1186 for (i = 0; i < string->len; i++) 1187 if (!is_idchar (string->text[i])) 1188 return 0; 1189 1190 return 1; 1191 } 1192 1193 /* After parsing an identifier or other sequence, produce a warning about 1194 sequences not in NFC/NFKC. */ 1195 static void 1196 warn_about_normalization (cpp_reader *pfile, 1197 const cpp_token *token, 1198 const struct normalize_state *s) 1199 { 1200 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) 1201 && !pfile->state.skipping) 1202 { 1203 /* Make sure that the token is printed using UCNs, even 1204 if we'd otherwise happily print UTF-8. */ 1205 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); 1206 size_t sz; 1207 1208 sz = cpp_spell_token (pfile, token, buf, false) - buf; 1209 if (NORMALIZE_STATE_RESULT (s) == normalized_C) 1210 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0, 1211 "`%.*s' is not in NFKC", (int) sz, buf); 1212 else 1213 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0, 1214 "`%.*s' is not in NFC", (int) sz, buf); 1215 free (buf); 1216 } 1217 } 1218 1219 /* Returns TRUE if the sequence starting at buffer->cur is invalid in 1220 an identifier. FIRST is TRUE if this starts an identifier. */ 1221 static bool 1222 forms_identifier_p (cpp_reader *pfile, int first, 1223 struct normalize_state *state) 1224 { 1225 cpp_buffer *buffer = pfile->buffer; 1226 1227 if (*buffer->cur == '$') 1228 { 1229 if (!CPP_OPTION (pfile, dollars_in_ident)) 1230 return false; 1231 1232 buffer->cur++; 1233 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) 1234 { 1235 CPP_OPTION (pfile, warn_dollars) = 0; 1236 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); 1237 } 1238 1239 return true; 1240 } 1241 1242 /* Is this a syntactically valid UCN? */ 1243 if (CPP_OPTION (pfile, extended_identifiers) 1244 && *buffer->cur == '\\' 1245 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) 1246 { 1247 cppchar_t s; 1248 buffer->cur += 2; 1249 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 1250 state, &s)) 1251 return true; 1252 buffer->cur -= 2; 1253 } 1254 1255 return false; 1256 } 1257 1258 /* Helper function to get the cpp_hashnode of the identifier BASE. */ 1259 static cpp_hashnode * 1260 lex_identifier_intern (cpp_reader *pfile, const uchar *base) 1261 { 1262 cpp_hashnode *result; 1263 const uchar *cur; 1264 unsigned int len; 1265 unsigned int hash = HT_HASHSTEP (0, *base); 1266 1267 cur = base + 1; 1268 while (ISIDNUM (*cur)) 1269 { 1270 hash = HT_HASHSTEP (hash, *cur); 1271 cur++; 1272 } 1273 len = cur - base; 1274 hash = HT_HASHFINISH (hash, len); 1275 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, 1276 base, len, hash, HT_ALLOC)); 1277 1278 /* Rarely, identifiers require diagnostics when lexed. */ 1279 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 1280 && !pfile->state.skipping, 0)) 1281 { 1282 /* It is allowed to poison the same identifier twice. */ 1283 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 1284 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 1285 NODE_NAME (result)); 1286 1287 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 1288 replacement list of a variadic macro. */ 1289 if (result == pfile->spec_nodes.n__VA_ARGS__ 1290 && !pfile->state.va_args_ok) 1291 { 1292 if (CPP_OPTION (pfile, cplusplus)) 1293 cpp_error (pfile, CPP_DL_PEDWARN, 1294 "__VA_ARGS__ can only appear in the expansion" 1295 " of a C++11 variadic macro"); 1296 else 1297 cpp_error (pfile, CPP_DL_PEDWARN, 1298 "__VA_ARGS__ can only appear in the expansion" 1299 " of a C99 variadic macro"); 1300 } 1301 1302 /* For -Wc++-compat, warn about use of C++ named operators. */ 1303 if (result->flags & NODE_WARN_OPERATOR) 1304 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES, 1305 "identifier \"%s\" is a special operator name in C++", 1306 NODE_NAME (result)); 1307 } 1308 1309 return result; 1310 } 1311 1312 /* Get the cpp_hashnode of an identifier specified by NAME in 1313 the current cpp_reader object. If none is found, NULL is returned. */ 1314 cpp_hashnode * 1315 _cpp_lex_identifier (cpp_reader *pfile, const char *name) 1316 { 1317 cpp_hashnode *result; 1318 result = lex_identifier_intern (pfile, (uchar *) name); 1319 return result; 1320 } 1321 1322 /* Lex an identifier starting at BUFFER->CUR - 1. */ 1323 static cpp_hashnode * 1324 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, 1325 struct normalize_state *nst, cpp_hashnode **spelling) 1326 { 1327 cpp_hashnode *result; 1328 const uchar *cur; 1329 unsigned int len; 1330 unsigned int hash = HT_HASHSTEP (0, *base); 1331 1332 cur = pfile->buffer->cur; 1333 if (! starts_ucn) 1334 { 1335 while (ISIDNUM (*cur)) 1336 { 1337 hash = HT_HASHSTEP (hash, *cur); 1338 cur++; 1339 } 1340 NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1)); 1341 } 1342 pfile->buffer->cur = cur; 1343 if (starts_ucn || forms_identifier_p (pfile, false, nst)) 1344 { 1345 /* Slower version for identifiers containing UCNs (or $). */ 1346 do { 1347 while (ISIDNUM (*pfile->buffer->cur)) 1348 { 1349 NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur); 1350 pfile->buffer->cur++; 1351 } 1352 } while (forms_identifier_p (pfile, false, nst)); 1353 result = _cpp_interpret_identifier (pfile, base, 1354 pfile->buffer->cur - base); 1355 *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); 1356 } 1357 else 1358 { 1359 len = cur - base; 1360 hash = HT_HASHFINISH (hash, len); 1361 1362 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, 1363 base, len, hash, HT_ALLOC)); 1364 *spelling = result; 1365 } 1366 1367 /* Rarely, identifiers require diagnostics when lexed. */ 1368 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 1369 && !pfile->state.skipping, 0)) 1370 { 1371 /* It is allowed to poison the same identifier twice. */ 1372 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 1373 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 1374 NODE_NAME (result)); 1375 1376 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 1377 replacement list of a variadic macro. */ 1378 if (result == pfile->spec_nodes.n__VA_ARGS__ 1379 && !pfile->state.va_args_ok) 1380 { 1381 if (CPP_OPTION (pfile, cplusplus)) 1382 cpp_error (pfile, CPP_DL_PEDWARN, 1383 "__VA_ARGS__ can only appear in the expansion" 1384 " of a C++11 variadic macro"); 1385 else 1386 cpp_error (pfile, CPP_DL_PEDWARN, 1387 "__VA_ARGS__ can only appear in the expansion" 1388 " of a C99 variadic macro"); 1389 } 1390 1391 /* For -Wc++-compat, warn about use of C++ named operators. */ 1392 if (result->flags & NODE_WARN_OPERATOR) 1393 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES, 1394 "identifier \"%s\" is a special operator name in C++", 1395 NODE_NAME (result)); 1396 } 1397 1398 return result; 1399 } 1400 1401 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ 1402 static void 1403 lex_number (cpp_reader *pfile, cpp_string *number, 1404 struct normalize_state *nst) 1405 { 1406 const uchar *cur; 1407 const uchar *base; 1408 uchar *dest; 1409 1410 base = pfile->buffer->cur - 1; 1411 do 1412 { 1413 cur = pfile->buffer->cur; 1414 1415 /* N.B. ISIDNUM does not include $. */ 1416 while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur) 1417 || VALID_SIGN (*cur, cur[-1])) 1418 { 1419 NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur); 1420 cur++; 1421 } 1422 /* A number can't end with a digit separator. */ 1423 while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1])) 1424 --cur; 1425 1426 pfile->buffer->cur = cur; 1427 } 1428 while (forms_identifier_p (pfile, false, nst)); 1429 1430 number->len = cur - base; 1431 dest = _cpp_unaligned_alloc (pfile, number->len + 1); 1432 memcpy (dest, base, number->len); 1433 dest[number->len] = '\0'; 1434 number->text = dest; 1435 } 1436 1437 /* Create a token of type TYPE with a literal spelling. */ 1438 static void 1439 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, 1440 unsigned int len, enum cpp_ttype type) 1441 { 1442 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); 1443 1444 memcpy (dest, base, len); 1445 dest[len] = '\0'; 1446 token->type = type; 1447 token->val.str.len = len; 1448 token->val.str.text = dest; 1449 } 1450 1451 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer 1452 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */ 1453 1454 static void 1455 bufring_append (cpp_reader *pfile, const uchar *base, size_t len, 1456 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p) 1457 { 1458 _cpp_buff *first_buff = *first_buff_p; 1459 _cpp_buff *last_buff = *last_buff_p; 1460 1461 if (first_buff == NULL) 1462 first_buff = last_buff = _cpp_get_buff (pfile, len); 1463 else if (len > BUFF_ROOM (last_buff)) 1464 { 1465 size_t room = BUFF_ROOM (last_buff); 1466 memcpy (BUFF_FRONT (last_buff), base, room); 1467 BUFF_FRONT (last_buff) += room; 1468 base += room; 1469 len -= room; 1470 last_buff = _cpp_append_extend_buff (pfile, last_buff, len); 1471 } 1472 1473 memcpy (BUFF_FRONT (last_buff), base, len); 1474 BUFF_FRONT (last_buff) += len; 1475 1476 *first_buff_p = first_buff; 1477 *last_buff_p = last_buff; 1478 } 1479 1480 1481 /* Returns true if a macro has been defined. 1482 This might not work if compile with -save-temps, 1483 or preprocess separately from compilation. */ 1484 1485 static bool 1486 is_macro(cpp_reader *pfile, const uchar *base) 1487 { 1488 const uchar *cur = base; 1489 if (! ISIDST (*cur)) 1490 return false; 1491 unsigned int hash = HT_HASHSTEP (0, *cur); 1492 ++cur; 1493 while (ISIDNUM (*cur)) 1494 { 1495 hash = HT_HASHSTEP (hash, *cur); 1496 ++cur; 1497 } 1498 hash = HT_HASHFINISH (hash, cur - base); 1499 1500 cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, 1501 base, cur - base, hash, HT_NO_INSERT)); 1502 1503 return !result ? false : (result->type == NT_MACRO); 1504 } 1505 1506 1507 /* Lexes a raw string. The stored string contains the spelling, including 1508 double quotes, delimiter string, '(' and ')', any leading 1509 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the 1510 literal, or CPP_OTHER if it was not properly terminated. 1511 1512 The spelling is NUL-terminated, but it is not guaranteed that this 1513 is the first NUL since embedded NULs are preserved. */ 1514 1515 static void 1516 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, 1517 const uchar *cur) 1518 { 1519 uchar raw_prefix[17]; 1520 uchar temp_buffer[18]; 1521 const uchar *orig_base; 1522 unsigned int raw_prefix_len = 0, raw_suffix_len = 0; 1523 enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX }; 1524 raw_str_phase phase = RAW_STR_PREFIX; 1525 enum cpp_ttype type; 1526 size_t total_len = 0; 1527 /* Index into temp_buffer during phases other than RAW_STR, 1528 during RAW_STR phase 17 to tell BUF_APPEND that nothing should 1529 be appended to temp_buffer. */ 1530 size_t temp_buffer_len = 0; 1531 _cpp_buff *first_buff = NULL, *last_buff = NULL; 1532 size_t raw_prefix_start; 1533 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; 1534 1535 type = (*base == 'L' ? CPP_WSTRING : 1536 *base == 'U' ? CPP_STRING32 : 1537 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) 1538 : CPP_STRING); 1539 1540 #define BUF_APPEND(STR,LEN) \ 1541 do { \ 1542 bufring_append (pfile, (const uchar *)(STR), (LEN), \ 1543 &first_buff, &last_buff); \ 1544 total_len += (LEN); \ 1545 if (__builtin_expect (temp_buffer_len < 17, 0) \ 1546 && (const uchar *)(STR) != base \ 1547 && (LEN) <= 2) \ 1548 { \ 1549 memcpy (temp_buffer + temp_buffer_len, \ 1550 (const uchar *)(STR), (LEN)); \ 1551 temp_buffer_len += (LEN); \ 1552 } \ 1553 } while (0); 1554 1555 orig_base = base; 1556 ++cur; 1557 raw_prefix_start = cur - base; 1558 for (;;) 1559 { 1560 cppchar_t c; 1561 1562 /* If we previously performed any trigraph or line splicing 1563 transformations, undo them in between the opening and closing 1564 double quote. */ 1565 while (note->pos < cur) 1566 ++note; 1567 for (; note->pos == cur; ++note) 1568 { 1569 switch (note->type) 1570 { 1571 case '\\': 1572 case ' ': 1573 /* Restore backslash followed by newline. */ 1574 BUF_APPEND (base, cur - base); 1575 base = cur; 1576 BUF_APPEND ("\\", 1); 1577 after_backslash: 1578 if (note->type == ' ') 1579 { 1580 /* GNU backslash whitespace newline extension. FIXME 1581 could be any sequence of non-vertical space. When we 1582 can properly restore any such sequence, we should mark 1583 this note as handled so _cpp_process_line_notes 1584 doesn't warn. */ 1585 BUF_APPEND (" ", 1); 1586 } 1587 1588 BUF_APPEND ("\n", 1); 1589 break; 1590 1591 case 0: 1592 /* Already handled. */ 1593 break; 1594 1595 default: 1596 if (_cpp_trigraph_map[note->type]) 1597 { 1598 /* Don't warn about this trigraph in 1599 _cpp_process_line_notes, since trigraphs show up as 1600 trigraphs in raw strings. */ 1601 uchar type = note->type; 1602 note->type = 0; 1603 1604 if (!CPP_OPTION (pfile, trigraphs)) 1605 /* If we didn't convert the trigraph in the first 1606 place, don't do anything now either. */ 1607 break; 1608 1609 BUF_APPEND (base, cur - base); 1610 base = cur; 1611 BUF_APPEND ("??", 2); 1612 1613 /* ??/ followed by newline gets two line notes, one for 1614 the trigraph and one for the backslash/newline. */ 1615 if (type == '/' && note[1].pos == cur) 1616 { 1617 if (note[1].type != '\\' 1618 && note[1].type != ' ') 1619 abort (); 1620 BUF_APPEND ("/", 1); 1621 ++note; 1622 goto after_backslash; 1623 } 1624 else 1625 { 1626 /* Skip the replacement character. */ 1627 base = ++cur; 1628 BUF_APPEND (&type, 1); 1629 c = type; 1630 goto check_c; 1631 } 1632 } 1633 else 1634 abort (); 1635 break; 1636 } 1637 } 1638 c = *cur++; 1639 if (__builtin_expect (temp_buffer_len < 17, 0)) 1640 temp_buffer[temp_buffer_len++] = c; 1641 1642 check_c: 1643 if (phase == RAW_STR_PREFIX) 1644 { 1645 while (raw_prefix_len < temp_buffer_len) 1646 { 1647 raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len]; 1648 switch (raw_prefix[raw_prefix_len]) 1649 { 1650 case ' ': case '(': case ')': case '\\': case '\t': 1651 case '\v': case '\f': case '\n': default: 1652 break; 1653 /* Basic source charset except the above chars. */ 1654 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 1655 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 1656 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 1657 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 1658 case 'y': case 'z': 1659 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 1660 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 1661 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 1662 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 1663 case 'Y': case 'Z': 1664 case '0': case '1': case '2': case '3': case '4': case '5': 1665 case '6': case '7': case '8': case '9': 1666 case '_': case '{': case '}': case '#': case '[': case ']': 1667 case '<': case '>': case '%': case ':': case ';': case '.': 1668 case '?': case '*': case '+': case '-': case '/': case '^': 1669 case '&': case '|': case '~': case '!': case '=': case ',': 1670 case '"': case '\'': 1671 if (raw_prefix_len < 16) 1672 { 1673 raw_prefix_len++; 1674 continue; 1675 } 1676 break; 1677 } 1678 1679 if (raw_prefix[raw_prefix_len] != '(') 1680 { 1681 int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1; 1682 if (raw_prefix_len == 16) 1683 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 1684 col, "raw string delimiter longer " 1685 "than 16 characters"); 1686 else if (raw_prefix[raw_prefix_len] == '\n') 1687 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 1688 col, "invalid new-line in raw " 1689 "string delimiter"); 1690 else 1691 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 1692 col, "invalid character '%c' in " 1693 "raw string delimiter", 1694 (int) raw_prefix[raw_prefix_len]); 1695 pfile->buffer->cur = orig_base + raw_prefix_start - 1; 1696 create_literal (pfile, token, orig_base, 1697 raw_prefix_start - 1, CPP_OTHER); 1698 if (first_buff) 1699 _cpp_release_buff (pfile, first_buff); 1700 return; 1701 } 1702 raw_prefix[raw_prefix_len] = '"'; 1703 phase = RAW_STR; 1704 /* Nothing should be appended to temp_buffer during 1705 RAW_STR phase. */ 1706 temp_buffer_len = 17; 1707 break; 1708 } 1709 continue; 1710 } 1711 else if (phase == RAW_STR_SUFFIX) 1712 { 1713 while (raw_suffix_len <= raw_prefix_len 1714 && raw_suffix_len < temp_buffer_len 1715 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len]) 1716 raw_suffix_len++; 1717 if (raw_suffix_len > raw_prefix_len) 1718 break; 1719 if (raw_suffix_len == temp_buffer_len) 1720 continue; 1721 phase = RAW_STR; 1722 /* Nothing should be appended to temp_buffer during 1723 RAW_STR phase. */ 1724 temp_buffer_len = 17; 1725 } 1726 if (c == ')') 1727 { 1728 phase = RAW_STR_SUFFIX; 1729 raw_suffix_len = 0; 1730 temp_buffer_len = 0; 1731 } 1732 else if (c == '\n') 1733 { 1734 if (pfile->state.in_directive 1735 || (pfile->state.parsing_args 1736 && pfile->buffer->next_line >= pfile->buffer->rlimit)) 1737 { 1738 cur--; 1739 type = CPP_OTHER; 1740 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0, 1741 "unterminated raw string"); 1742 break; 1743 } 1744 1745 BUF_APPEND (base, cur - base); 1746 1747 if (pfile->buffer->cur < pfile->buffer->rlimit) 1748 CPP_INCREMENT_LINE (pfile, 0); 1749 pfile->buffer->need_line = true; 1750 1751 pfile->buffer->cur = cur-1; 1752 _cpp_process_line_notes (pfile, false); 1753 if (!_cpp_get_fresh_line (pfile)) 1754 { 1755 source_location src_loc = token->src_loc; 1756 token->type = CPP_EOF; 1757 /* Tell the compiler the line number of the EOF token. */ 1758 token->src_loc = pfile->line_table->highest_line; 1759 token->flags = BOL; 1760 if (first_buff != NULL) 1761 _cpp_release_buff (pfile, first_buff); 1762 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0, 1763 "unterminated raw string"); 1764 return; 1765 } 1766 1767 cur = base = pfile->buffer->cur; 1768 note = &pfile->buffer->notes[pfile->buffer->cur_note]; 1769 } 1770 } 1771 1772 if (CPP_OPTION (pfile, user_literals)) 1773 { 1774 /* If a string format macro, say from inttypes.h, is placed touching 1775 a string literal it could be parsed as a C++11 user-defined string 1776 literal thus breaking the program. 1777 Try to identify macros with is_macro. A warning is issued. */ 1778 if (is_macro (pfile, cur)) 1779 { 1780 /* Raise a warning, but do not consume subsequent tokens. */ 1781 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping) 1782 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX, 1783 token->src_loc, 0, 1784 "invalid suffix on literal; C++11 requires " 1785 "a space between literal and string macro"); 1786 } 1787 /* Grab user defined literal suffix. */ 1788 else if (ISIDST (*cur)) 1789 { 1790 type = cpp_userdef_string_add_type (type); 1791 ++cur; 1792 1793 while (ISIDNUM (*cur)) 1794 ++cur; 1795 } 1796 } 1797 1798 pfile->buffer->cur = cur; 1799 if (first_buff == NULL) 1800 create_literal (pfile, token, base, cur - base, type); 1801 else 1802 { 1803 uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1); 1804 1805 token->type = type; 1806 token->val.str.len = total_len + (cur - base); 1807 token->val.str.text = dest; 1808 last_buff = first_buff; 1809 while (last_buff != NULL) 1810 { 1811 memcpy (dest, last_buff->base, 1812 BUFF_FRONT (last_buff) - last_buff->base); 1813 dest += BUFF_FRONT (last_buff) - last_buff->base; 1814 last_buff = last_buff->next; 1815 } 1816 _cpp_release_buff (pfile, first_buff); 1817 memcpy (dest, base, cur - base); 1818 dest[cur - base] = '\0'; 1819 } 1820 } 1821 1822 /* Lexes a string, character constant, or angle-bracketed header file 1823 name. The stored string contains the spelling, including opening 1824 quote and any leading 'L', 'u', 'U' or 'u8' and optional 1825 'R' modifier. It returns the type of the literal, or CPP_OTHER 1826 if it was not properly terminated, or CPP_LESS for an unterminated 1827 header name which must be relexed as normal tokens. 1828 1829 The spelling is NUL-terminated, but it is not guaranteed that this 1830 is the first NUL since embedded NULs are preserved. */ 1831 static void 1832 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) 1833 { 1834 bool saw_NUL = false; 1835 const uchar *cur; 1836 cppchar_t terminator; 1837 enum cpp_ttype type; 1838 1839 cur = base; 1840 terminator = *cur++; 1841 if (terminator == 'L' || terminator == 'U') 1842 terminator = *cur++; 1843 else if (terminator == 'u') 1844 { 1845 terminator = *cur++; 1846 if (terminator == '8') 1847 terminator = *cur++; 1848 } 1849 if (terminator == 'R') 1850 { 1851 lex_raw_string (pfile, token, base, cur); 1852 return; 1853 } 1854 if (terminator == '"') 1855 type = (*base == 'L' ? CPP_WSTRING : 1856 *base == 'U' ? CPP_STRING32 : 1857 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) 1858 : CPP_STRING); 1859 else if (terminator == '\'') 1860 type = (*base == 'L' ? CPP_WCHAR : 1861 *base == 'U' ? CPP_CHAR32 : 1862 *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16) 1863 : CPP_CHAR); 1864 else 1865 terminator = '>', type = CPP_HEADER_NAME; 1866 1867 for (;;) 1868 { 1869 cppchar_t c = *cur++; 1870 1871 /* In #include-style directives, terminators are not escapable. */ 1872 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') 1873 cur++; 1874 else if (c == terminator) 1875 break; 1876 else if (c == '\n') 1877 { 1878 cur--; 1879 /* Unmatched quotes always yield undefined behavior, but 1880 greedy lexing means that what appears to be an unterminated 1881 header name may actually be a legitimate sequence of tokens. */ 1882 if (terminator == '>') 1883 { 1884 token->type = CPP_LESS; 1885 return; 1886 } 1887 type = CPP_OTHER; 1888 break; 1889 } 1890 else if (c == '\0') 1891 saw_NUL = true; 1892 } 1893 1894 if (saw_NUL && !pfile->state.skipping) 1895 cpp_error (pfile, CPP_DL_WARNING, 1896 "null character(s) preserved in literal"); 1897 1898 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM) 1899 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", 1900 (int) terminator); 1901 1902 if (CPP_OPTION (pfile, user_literals)) 1903 { 1904 /* If a string format macro, say from inttypes.h, is placed touching 1905 a string literal it could be parsed as a C++11 user-defined string 1906 literal thus breaking the program. 1907 Try to identify macros with is_macro. A warning is issued. */ 1908 if (is_macro (pfile, cur)) 1909 { 1910 /* Raise a warning, but do not consume subsequent tokens. */ 1911 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping) 1912 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX, 1913 token->src_loc, 0, 1914 "invalid suffix on literal; C++11 requires " 1915 "a space between literal and string macro"); 1916 } 1917 /* Grab user defined literal suffix. */ 1918 else if (ISIDST (*cur)) 1919 { 1920 type = cpp_userdef_char_add_type (type); 1921 type = cpp_userdef_string_add_type (type); 1922 ++cur; 1923 1924 while (ISIDNUM (*cur)) 1925 ++cur; 1926 } 1927 } 1928 else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat) 1929 && is_macro (pfile, cur) 1930 && !pfile->state.skipping) 1931 cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT, 1932 token->src_loc, 0, "C++11 requires a space " 1933 "between string literal and macro"); 1934 1935 pfile->buffer->cur = cur; 1936 create_literal (pfile, token, base, cur - base, type); 1937 } 1938 1939 /* Return the comment table. The client may not make any assumption 1940 about the ordering of the table. */ 1941 cpp_comment_table * 1942 cpp_get_comments (cpp_reader *pfile) 1943 { 1944 return &pfile->comments; 1945 } 1946 1947 /* Append a comment to the end of the comment table. */ 1948 static void 1949 store_comment (cpp_reader *pfile, cpp_token *token) 1950 { 1951 int len; 1952 1953 if (pfile->comments.allocated == 0) 1954 { 1955 pfile->comments.allocated = 256; 1956 pfile->comments.entries = (cpp_comment *) xmalloc 1957 (pfile->comments.allocated * sizeof (cpp_comment)); 1958 } 1959 1960 if (pfile->comments.count == pfile->comments.allocated) 1961 { 1962 pfile->comments.allocated *= 2; 1963 pfile->comments.entries = (cpp_comment *) xrealloc 1964 (pfile->comments.entries, 1965 pfile->comments.allocated * sizeof (cpp_comment)); 1966 } 1967 1968 len = token->val.str.len; 1969 1970 /* Copy comment. Note, token may not be NULL terminated. */ 1971 pfile->comments.entries[pfile->comments.count].comment = 1972 (char *) xmalloc (sizeof (char) * (len + 1)); 1973 memcpy (pfile->comments.entries[pfile->comments.count].comment, 1974 token->val.str.text, len); 1975 pfile->comments.entries[pfile->comments.count].comment[len] = '\0'; 1976 1977 /* Set source location. */ 1978 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc; 1979 1980 /* Increment the count of entries in the comment table. */ 1981 pfile->comments.count++; 1982 } 1983 1984 /* The stored comment includes the comment start and any terminator. */ 1985 static void 1986 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, 1987 cppchar_t type) 1988 { 1989 unsigned char *buffer; 1990 unsigned int len, clen, i; 1991 int convert_to_c = (pfile->state.in_directive || pfile->state.parsing_args) 1992 && type == '/'; 1993 1994 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 1995 1996 /* C++ comments probably (not definitely) have moved past a new 1997 line, which we don't want to save in the comment. */ 1998 if (is_vspace (pfile->buffer->cur[-1])) 1999 len--; 2000 2001 /* If we are currently in a directive or in argument parsing, then 2002 we need to store all C++ comments as C comments internally, and 2003 so we need to allocate a little extra space in that case. 2004 2005 Note that the only time we encounter a directive here is 2006 when we are saving comments in a "#define". */ 2007 clen = convert_to_c ? len + 2 : len; 2008 2009 buffer = _cpp_unaligned_alloc (pfile, clen); 2010 2011 token->type = CPP_COMMENT; 2012 token->val.str.len = clen; 2013 token->val.str.text = buffer; 2014 2015 buffer[0] = '/'; 2016 memcpy (buffer + 1, from, len - 1); 2017 2018 /* Finish conversion to a C comment, if necessary. */ 2019 if (convert_to_c) 2020 { 2021 buffer[1] = '*'; 2022 buffer[clen - 2] = '*'; 2023 buffer[clen - 1] = '/'; 2024 /* As there can be in a C++ comments illegal sequences for C comments 2025 we need to filter them out. */ 2026 for (i = 2; i < (clen - 2); i++) 2027 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*')) 2028 buffer[i] = '|'; 2029 } 2030 2031 /* Finally store this comment for use by clients of libcpp. */ 2032 store_comment (pfile, token); 2033 } 2034 2035 /* Allocate COUNT tokens for RUN. */ 2036 void 2037 _cpp_init_tokenrun (tokenrun *run, unsigned int count) 2038 { 2039 run->base = XNEWVEC (cpp_token, count); 2040 run->limit = run->base + count; 2041 run->next = NULL; 2042 } 2043 2044 /* Returns the next tokenrun, or creates one if there is none. */ 2045 static tokenrun * 2046 next_tokenrun (tokenrun *run) 2047 { 2048 if (run->next == NULL) 2049 { 2050 run->next = XNEW (tokenrun); 2051 run->next->prev = run; 2052 _cpp_init_tokenrun (run->next, 250); 2053 } 2054 2055 return run->next; 2056 } 2057 2058 /* Return the number of not yet processed token in a given 2059 context. */ 2060 int 2061 _cpp_remaining_tokens_num_in_context (cpp_context *context) 2062 { 2063 if (context->tokens_kind == TOKENS_KIND_DIRECT) 2064 return (LAST (context).token - FIRST (context).token); 2065 else if (context->tokens_kind == TOKENS_KIND_INDIRECT 2066 || context->tokens_kind == TOKENS_KIND_EXTENDED) 2067 return (LAST (context).ptoken - FIRST (context).ptoken); 2068 else 2069 abort (); 2070 } 2071 2072 /* Returns the token present at index INDEX in a given context. If 2073 INDEX is zero, the next token to be processed is returned. */ 2074 static const cpp_token* 2075 _cpp_token_from_context_at (cpp_context *context, int index) 2076 { 2077 if (context->tokens_kind == TOKENS_KIND_DIRECT) 2078 return &(FIRST (context).token[index]); 2079 else if (context->tokens_kind == TOKENS_KIND_INDIRECT 2080 || context->tokens_kind == TOKENS_KIND_EXTENDED) 2081 return FIRST (context).ptoken[index]; 2082 else 2083 abort (); 2084 } 2085 2086 /* Look ahead in the input stream. */ 2087 const cpp_token * 2088 cpp_peek_token (cpp_reader *pfile, int index) 2089 { 2090 cpp_context *context = pfile->context; 2091 const cpp_token *peektok; 2092 int count; 2093 2094 /* First, scan through any pending cpp_context objects. */ 2095 while (context->prev) 2096 { 2097 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context); 2098 2099 if (index < (int) sz) 2100 return _cpp_token_from_context_at (context, index); 2101 index -= (int) sz; 2102 context = context->prev; 2103 } 2104 2105 /* We will have to read some new tokens after all (and do so 2106 without invalidating preceding tokens). */ 2107 count = index; 2108 pfile->keep_tokens++; 2109 2110 /* For peeked tokens temporarily disable line_change reporting, 2111 until the tokens are parsed for real. */ 2112 void (*line_change) (cpp_reader *, const cpp_token *, int) 2113 = pfile->cb.line_change; 2114 pfile->cb.line_change = NULL; 2115 2116 do 2117 { 2118 peektok = _cpp_lex_token (pfile); 2119 if (peektok->type == CPP_EOF) 2120 { 2121 index--; 2122 break; 2123 } 2124 } 2125 while (index--); 2126 2127 _cpp_backup_tokens_direct (pfile, count - index); 2128 pfile->keep_tokens--; 2129 pfile->cb.line_change = line_change; 2130 2131 return peektok; 2132 } 2133 2134 /* Allocate a single token that is invalidated at the same time as the 2135 rest of the tokens on the line. Has its line and col set to the 2136 same as the last lexed token, so that diagnostics appear in the 2137 right place. */ 2138 cpp_token * 2139 _cpp_temp_token (cpp_reader *pfile) 2140 { 2141 cpp_token *old, *result; 2142 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token; 2143 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads; 2144 2145 old = pfile->cur_token - 1; 2146 /* Any pre-existing lookaheads must not be clobbered. */ 2147 if (la) 2148 { 2149 if (sz <= la) 2150 { 2151 tokenrun *next = next_tokenrun (pfile->cur_run); 2152 2153 if (sz < la) 2154 memmove (next->base + 1, next->base, 2155 (la - sz) * sizeof (cpp_token)); 2156 2157 next->base[0] = pfile->cur_run->limit[-1]; 2158 } 2159 2160 if (sz > 1) 2161 memmove (pfile->cur_token + 1, pfile->cur_token, 2162 MIN (la, sz - 1) * sizeof (cpp_token)); 2163 } 2164 2165 if (!sz && pfile->cur_token == pfile->cur_run->limit) 2166 { 2167 pfile->cur_run = next_tokenrun (pfile->cur_run); 2168 pfile->cur_token = pfile->cur_run->base; 2169 } 2170 2171 result = pfile->cur_token++; 2172 result->src_loc = old->src_loc; 2173 return result; 2174 } 2175 2176 /* Lex a token into RESULT (external interface). Takes care of issues 2177 like directive handling, token lookahead, multiple include 2178 optimization and skipping. */ 2179 const cpp_token * 2180 _cpp_lex_token (cpp_reader *pfile) 2181 { 2182 cpp_token *result; 2183 2184 for (;;) 2185 { 2186 if (pfile->cur_token == pfile->cur_run->limit) 2187 { 2188 pfile->cur_run = next_tokenrun (pfile->cur_run); 2189 pfile->cur_token = pfile->cur_run->base; 2190 } 2191 /* We assume that the current token is somewhere in the current 2192 run. */ 2193 if (pfile->cur_token < pfile->cur_run->base 2194 || pfile->cur_token >= pfile->cur_run->limit) 2195 abort (); 2196 2197 if (pfile->lookaheads) 2198 { 2199 pfile->lookaheads--; 2200 result = pfile->cur_token++; 2201 } 2202 else 2203 result = _cpp_lex_direct (pfile); 2204 2205 if (result->flags & BOL) 2206 { 2207 /* Is this a directive. If _cpp_handle_directive returns 2208 false, it is an assembler #. */ 2209 if (result->type == CPP_HASH 2210 /* 6.10.3 p 11: Directives in a list of macro arguments 2211 gives undefined behavior. This implementation 2212 handles the directive as normal. */ 2213 && pfile->state.parsing_args != 1) 2214 { 2215 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE)) 2216 { 2217 if (pfile->directive_result.type == CPP_PADDING) 2218 continue; 2219 result = &pfile->directive_result; 2220 } 2221 } 2222 else if (pfile->state.in_deferred_pragma) 2223 result = &pfile->directive_result; 2224 2225 if (pfile->cb.line_change && !pfile->state.skipping) 2226 pfile->cb.line_change (pfile, result, pfile->state.parsing_args); 2227 } 2228 2229 /* We don't skip tokens in directives. */ 2230 if (pfile->state.in_directive || pfile->state.in_deferred_pragma) 2231 break; 2232 2233 /* Outside a directive, invalidate controlling macros. At file 2234 EOF, _cpp_lex_direct takes care of popping the buffer, so we never 2235 get here and MI optimization works. */ 2236 pfile->mi_valid = false; 2237 2238 if (!pfile->state.skipping || result->type == CPP_EOF) 2239 break; 2240 } 2241 2242 return result; 2243 } 2244 2245 /* Returns true if a fresh line has been loaded. */ 2246 bool 2247 _cpp_get_fresh_line (cpp_reader *pfile) 2248 { 2249 int return_at_eof; 2250 2251 /* We can't get a new line until we leave the current directive. */ 2252 if (pfile->state.in_directive) 2253 return false; 2254 2255 for (;;) 2256 { 2257 cpp_buffer *buffer = pfile->buffer; 2258 2259 if (!buffer->need_line) 2260 return true; 2261 2262 if (buffer->next_line < buffer->rlimit) 2263 { 2264 _cpp_clean_line (pfile); 2265 return true; 2266 } 2267 2268 /* First, get out of parsing arguments state. */ 2269 if (pfile->state.parsing_args) 2270 return false; 2271 2272 /* End of buffer. Non-empty files should end in a newline. */ 2273 if (buffer->buf != buffer->rlimit 2274 && buffer->next_line > buffer->rlimit 2275 && !buffer->from_stage3) 2276 { 2277 /* Clip to buffer size. */ 2278 buffer->next_line = buffer->rlimit; 2279 } 2280 2281 return_at_eof = buffer->return_at_eof; 2282 _cpp_pop_buffer (pfile); 2283 if (pfile->buffer == NULL || return_at_eof) 2284 return false; 2285 } 2286 } 2287 2288 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ 2289 do \ 2290 { \ 2291 result->type = ELSE_TYPE; \ 2292 if (*buffer->cur == CHAR) \ 2293 buffer->cur++, result->type = THEN_TYPE; \ 2294 } \ 2295 while (0) 2296 2297 /* Lex a token into pfile->cur_token, which is also incremented, to 2298 get diagnostics pointing to the correct location. 2299 2300 Does not handle issues such as token lookahead, multiple-include 2301 optimization, directives, skipping etc. This function is only 2302 suitable for use by _cpp_lex_token, and in special cases like 2303 lex_expansion_token which doesn't care for any of these issues. 2304 2305 When meeting a newline, returns CPP_EOF if parsing a directive, 2306 otherwise returns to the start of the token buffer if permissible. 2307 Returns the location of the lexed token. */ 2308 cpp_token * 2309 _cpp_lex_direct (cpp_reader *pfile) 2310 { 2311 cppchar_t c; 2312 cpp_buffer *buffer; 2313 const unsigned char *comment_start; 2314 cpp_token *result = pfile->cur_token++; 2315 2316 fresh_line: 2317 result->flags = 0; 2318 buffer = pfile->buffer; 2319 if (buffer->need_line) 2320 { 2321 if (pfile->state.in_deferred_pragma) 2322 { 2323 result->type = CPP_PRAGMA_EOL; 2324 pfile->state.in_deferred_pragma = false; 2325 if (!pfile->state.pragma_allow_expansion) 2326 pfile->state.prevent_expansion--; 2327 return result; 2328 } 2329 if (!_cpp_get_fresh_line (pfile)) 2330 { 2331 result->type = CPP_EOF; 2332 if (!pfile->state.in_directive) 2333 { 2334 /* Tell the compiler the line number of the EOF token. */ 2335 result->src_loc = pfile->line_table->highest_line; 2336 result->flags = BOL; 2337 } 2338 return result; 2339 } 2340 if (!pfile->keep_tokens) 2341 { 2342 pfile->cur_run = &pfile->base_run; 2343 result = pfile->base_run.base; 2344 pfile->cur_token = result + 1; 2345 } 2346 result->flags = BOL; 2347 if (pfile->state.parsing_args == 2) 2348 result->flags |= PREV_WHITE; 2349 } 2350 buffer = pfile->buffer; 2351 update_tokens_line: 2352 result->src_loc = pfile->line_table->highest_line; 2353 2354 skipped_white: 2355 if (buffer->cur >= buffer->notes[buffer->cur_note].pos 2356 && !pfile->overlaid_buffer) 2357 { 2358 _cpp_process_line_notes (pfile, false); 2359 result->src_loc = pfile->line_table->highest_line; 2360 } 2361 c = *buffer->cur++; 2362 2363 if (pfile->forced_token_location_p) 2364 result->src_loc = *pfile->forced_token_location_p; 2365 else 2366 result->src_loc = linemap_position_for_column (pfile->line_table, 2367 CPP_BUF_COLUMN (buffer, buffer->cur)); 2368 2369 switch (c) 2370 { 2371 case ' ': case '\t': case '\f': case '\v': case '\0': 2372 result->flags |= PREV_WHITE; 2373 skip_whitespace (pfile, c); 2374 goto skipped_white; 2375 2376 case '\n': 2377 if (buffer->cur < buffer->rlimit) 2378 CPP_INCREMENT_LINE (pfile, 0); 2379 buffer->need_line = true; 2380 goto fresh_line; 2381 2382 case '0': case '1': case '2': case '3': case '4': 2383 case '5': case '6': case '7': case '8': case '9': 2384 { 2385 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 2386 result->type = CPP_NUMBER; 2387 lex_number (pfile, &result->val.str, &nst); 2388 warn_about_normalization (pfile, result, &nst); 2389 break; 2390 } 2391 2392 case 'L': 2393 case 'u': 2394 case 'U': 2395 case 'R': 2396 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters, 2397 wide strings or raw strings. */ 2398 if (c == 'L' || CPP_OPTION (pfile, rliterals) 2399 || (c != 'R' && CPP_OPTION (pfile, uliterals))) 2400 { 2401 if ((*buffer->cur == '\'' && c != 'R') 2402 || *buffer->cur == '"' 2403 || (*buffer->cur == 'R' 2404 && c != 'R' 2405 && buffer->cur[1] == '"' 2406 && CPP_OPTION (pfile, rliterals)) 2407 || (*buffer->cur == '8' 2408 && c == 'u' 2409 && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\'' 2410 && CPP_OPTION (pfile, utf8_char_literals))) 2411 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"' 2412 && CPP_OPTION (pfile, rliterals))))) 2413 { 2414 lex_string (pfile, result, buffer->cur - 1); 2415 break; 2416 } 2417 } 2418 /* Fall through. */ 2419 2420 case '_': 2421 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 2422 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 2423 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 2424 case 's': case 't': case 'v': case 'w': case 'x': 2425 case 'y': case 'z': 2426 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 2427 case 'G': case 'H': case 'I': case 'J': case 'K': 2428 case 'M': case 'N': case 'O': case 'P': case 'Q': 2429 case 'S': case 'T': case 'V': case 'W': case 'X': 2430 case 'Y': case 'Z': 2431 result->type = CPP_NAME; 2432 { 2433 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 2434 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false, 2435 &nst, 2436 &result->val.node.spelling); 2437 warn_about_normalization (pfile, result, &nst); 2438 } 2439 2440 /* Convert named operators to their proper types. */ 2441 if (result->val.node.node->flags & NODE_OPERATOR) 2442 { 2443 result->flags |= NAMED_OP; 2444 result->type = (enum cpp_ttype) result->val.node.node->directive_index; 2445 } 2446 break; 2447 2448 case '\'': 2449 case '"': 2450 lex_string (pfile, result, buffer->cur - 1); 2451 break; 2452 2453 case '/': 2454 /* A potential block or line comment. */ 2455 comment_start = buffer->cur; 2456 c = *buffer->cur; 2457 2458 if (c == '*') 2459 { 2460 if (_cpp_skip_block_comment (pfile)) 2461 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); 2462 } 2463 else if (c == '/' && ! CPP_OPTION (pfile, traditional)) 2464 { 2465 /* Don't warn for system headers. */ 2466 if (cpp_in_system_header (pfile)) 2467 ; 2468 /* Warn about comments if pedantically GNUC89, and not 2469 in system headers. */ 2470 else if (CPP_OPTION (pfile, lang) == CLK_GNUC89 2471 && CPP_PEDANTIC (pfile) 2472 && ! buffer->warned_cplusplus_comments) 2473 { 2474 cpp_error (pfile, CPP_DL_PEDWARN, 2475 "C++ style comments are not allowed in ISO C90"); 2476 cpp_error (pfile, CPP_DL_PEDWARN, 2477 "(this will be reported only once per input file)"); 2478 buffer->warned_cplusplus_comments = 1; 2479 } 2480 /* Or if specifically desired via -Wc90-c99-compat. */ 2481 else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0 2482 && ! CPP_OPTION (pfile, cplusplus) 2483 && ! buffer->warned_cplusplus_comments) 2484 { 2485 cpp_error (pfile, CPP_DL_WARNING, 2486 "C++ style comments are incompatible with C90"); 2487 cpp_error (pfile, CPP_DL_WARNING, 2488 "(this will be reported only once per input file)"); 2489 buffer->warned_cplusplus_comments = 1; 2490 } 2491 /* In C89/C94, C++ style comments are forbidden. */ 2492 else if ((CPP_OPTION (pfile, lang) == CLK_STDC89 2493 || CPP_OPTION (pfile, lang) == CLK_STDC94)) 2494 { 2495 /* But don't be confused about valid code such as 2496 - // immediately followed by *, 2497 - // in a preprocessing directive, 2498 - // in an #if 0 block. */ 2499 if (buffer->cur[1] == '*' 2500 || pfile->state.in_directive 2501 || pfile->state.skipping) 2502 { 2503 result->type = CPP_DIV; 2504 break; 2505 } 2506 else if (! buffer->warned_cplusplus_comments) 2507 { 2508 cpp_error (pfile, CPP_DL_ERROR, 2509 "C++ style comments are not allowed in ISO C90"); 2510 cpp_error (pfile, CPP_DL_ERROR, 2511 "(this will be reported only once per input " 2512 "file)"); 2513 buffer->warned_cplusplus_comments = 1; 2514 } 2515 } 2516 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 2517 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment"); 2518 } 2519 else if (c == '=') 2520 { 2521 buffer->cur++; 2522 result->type = CPP_DIV_EQ; 2523 break; 2524 } 2525 else 2526 { 2527 result->type = CPP_DIV; 2528 break; 2529 } 2530 2531 if (!pfile->state.save_comments) 2532 { 2533 result->flags |= PREV_WHITE; 2534 goto update_tokens_line; 2535 } 2536 2537 /* Save the comment as a token in its own right. */ 2538 save_comment (pfile, result, comment_start, c); 2539 break; 2540 2541 case '<': 2542 if (pfile->state.angled_headers) 2543 { 2544 lex_string (pfile, result, buffer->cur - 1); 2545 if (result->type != CPP_LESS) 2546 break; 2547 } 2548 2549 result->type = CPP_LESS; 2550 if (*buffer->cur == '=') 2551 buffer->cur++, result->type = CPP_LESS_EQ; 2552 else if (*buffer->cur == '<') 2553 { 2554 buffer->cur++; 2555 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); 2556 } 2557 else if (CPP_OPTION (pfile, digraphs)) 2558 { 2559 if (*buffer->cur == ':') 2560 { 2561 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next 2562 three characters are <:: and the subsequent character 2563 is neither : nor >, the < is treated as a preprocessor 2564 token by itself". */ 2565 if (CPP_OPTION (pfile, cplusplus) 2566 && CPP_OPTION (pfile, lang) != CLK_CXX98 2567 && CPP_OPTION (pfile, lang) != CLK_GNUCXX 2568 && buffer->cur[1] == ':' 2569 && buffer->cur[2] != ':' && buffer->cur[2] != '>') 2570 break; 2571 2572 buffer->cur++; 2573 result->flags |= DIGRAPH; 2574 result->type = CPP_OPEN_SQUARE; 2575 } 2576 else if (*buffer->cur == '%') 2577 { 2578 buffer->cur++; 2579 result->flags |= DIGRAPH; 2580 result->type = CPP_OPEN_BRACE; 2581 } 2582 } 2583 break; 2584 2585 case '>': 2586 result->type = CPP_GREATER; 2587 if (*buffer->cur == '=') 2588 buffer->cur++, result->type = CPP_GREATER_EQ; 2589 else if (*buffer->cur == '>') 2590 { 2591 buffer->cur++; 2592 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); 2593 } 2594 break; 2595 2596 case '%': 2597 result->type = CPP_MOD; 2598 if (*buffer->cur == '=') 2599 buffer->cur++, result->type = CPP_MOD_EQ; 2600 else if (CPP_OPTION (pfile, digraphs)) 2601 { 2602 if (*buffer->cur == ':') 2603 { 2604 buffer->cur++; 2605 result->flags |= DIGRAPH; 2606 result->type = CPP_HASH; 2607 if (*buffer->cur == '%' && buffer->cur[1] == ':') 2608 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0; 2609 } 2610 else if (*buffer->cur == '>') 2611 { 2612 buffer->cur++; 2613 result->flags |= DIGRAPH; 2614 result->type = CPP_CLOSE_BRACE; 2615 } 2616 } 2617 break; 2618 2619 case '.': 2620 result->type = CPP_DOT; 2621 if (ISDIGIT (*buffer->cur)) 2622 { 2623 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 2624 result->type = CPP_NUMBER; 2625 lex_number (pfile, &result->val.str, &nst); 2626 warn_about_normalization (pfile, result, &nst); 2627 } 2628 else if (*buffer->cur == '.' && buffer->cur[1] == '.') 2629 buffer->cur += 2, result->type = CPP_ELLIPSIS; 2630 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 2631 buffer->cur++, result->type = CPP_DOT_STAR; 2632 break; 2633 2634 case '+': 2635 result->type = CPP_PLUS; 2636 if (*buffer->cur == '+') 2637 buffer->cur++, result->type = CPP_PLUS_PLUS; 2638 else if (*buffer->cur == '=') 2639 buffer->cur++, result->type = CPP_PLUS_EQ; 2640 break; 2641 2642 case '-': 2643 result->type = CPP_MINUS; 2644 if (*buffer->cur == '>') 2645 { 2646 buffer->cur++; 2647 result->type = CPP_DEREF; 2648 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 2649 buffer->cur++, result->type = CPP_DEREF_STAR; 2650 } 2651 else if (*buffer->cur == '-') 2652 buffer->cur++, result->type = CPP_MINUS_MINUS; 2653 else if (*buffer->cur == '=') 2654 buffer->cur++, result->type = CPP_MINUS_EQ; 2655 break; 2656 2657 case '&': 2658 result->type = CPP_AND; 2659 if (*buffer->cur == '&') 2660 buffer->cur++, result->type = CPP_AND_AND; 2661 else if (*buffer->cur == '=') 2662 buffer->cur++, result->type = CPP_AND_EQ; 2663 break; 2664 2665 case '|': 2666 result->type = CPP_OR; 2667 if (*buffer->cur == '|') 2668 buffer->cur++, result->type = CPP_OR_OR; 2669 else if (*buffer->cur == '=') 2670 buffer->cur++, result->type = CPP_OR_EQ; 2671 break; 2672 2673 case ':': 2674 result->type = CPP_COLON; 2675 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) 2676 buffer->cur++, result->type = CPP_SCOPE; 2677 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) 2678 { 2679 buffer->cur++; 2680 result->flags |= DIGRAPH; 2681 result->type = CPP_CLOSE_SQUARE; 2682 } 2683 break; 2684 2685 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; 2686 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; 2687 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; 2688 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; 2689 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break; 2690 2691 case '?': result->type = CPP_QUERY; break; 2692 case '~': result->type = CPP_COMPL; break; 2693 case ',': result->type = CPP_COMMA; break; 2694 case '(': result->type = CPP_OPEN_PAREN; break; 2695 case ')': result->type = CPP_CLOSE_PAREN; break; 2696 case '[': result->type = CPP_OPEN_SQUARE; break; 2697 case ']': result->type = CPP_CLOSE_SQUARE; break; 2698 case '{': result->type = CPP_OPEN_BRACE; break; 2699 case '}': result->type = CPP_CLOSE_BRACE; break; 2700 case ';': result->type = CPP_SEMICOLON; break; 2701 2702 /* @ is a punctuator in Objective-C. */ 2703 case '@': result->type = CPP_ATSIGN; break; 2704 2705 case '$': 2706 case '\\': 2707 { 2708 const uchar *base = --buffer->cur; 2709 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 2710 2711 if (forms_identifier_p (pfile, true, &nst)) 2712 { 2713 result->type = CPP_NAME; 2714 result->val.node.node = lex_identifier (pfile, base, true, &nst, 2715 &result->val.node.spelling); 2716 warn_about_normalization (pfile, result, &nst); 2717 break; 2718 } 2719 buffer->cur++; 2720 } 2721 2722 default: 2723 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); 2724 break; 2725 } 2726 2727 source_range tok_range; 2728 tok_range.m_start = result->src_loc; 2729 if (result->src_loc >= RESERVED_LOCATION_COUNT) 2730 tok_range.m_finish 2731 = linemap_position_for_column (pfile->line_table, 2732 CPP_BUF_COLUMN (buffer, buffer->cur)); 2733 else 2734 tok_range.m_finish = tok_range.m_start; 2735 2736 result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table, 2737 result->src_loc, 2738 tok_range, NULL); 2739 2740 return result; 2741 } 2742 2743 /* An upper bound on the number of bytes needed to spell TOKEN. 2744 Does not include preceding whitespace. */ 2745 unsigned int 2746 cpp_token_len (const cpp_token *token) 2747 { 2748 unsigned int len; 2749 2750 switch (TOKEN_SPELL (token)) 2751 { 2752 default: len = 6; break; 2753 case SPELL_LITERAL: len = token->val.str.len; break; 2754 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break; 2755 } 2756 2757 return len; 2758 } 2759 2760 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. 2761 Return the number of bytes read out of NAME. (There are always 2762 10 bytes written to BUFFER.) */ 2763 2764 static size_t 2765 utf8_to_ucn (unsigned char *buffer, const unsigned char *name) 2766 { 2767 int j; 2768 int ucn_len = 0; 2769 int ucn_len_c; 2770 unsigned t; 2771 unsigned long utf32; 2772 2773 /* Compute the length of the UTF-8 sequence. */ 2774 for (t = *name; t & 0x80; t <<= 1) 2775 ucn_len++; 2776 2777 utf32 = *name & (0x7F >> ucn_len); 2778 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) 2779 { 2780 utf32 = (utf32 << 6) | (*++name & 0x3F); 2781 2782 /* Ill-formed UTF-8. */ 2783 if ((*name & ~0x3F) != 0x80) 2784 abort (); 2785 } 2786 2787 *buffer++ = '\\'; 2788 *buffer++ = 'U'; 2789 for (j = 7; j >= 0; j--) 2790 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; 2791 return ucn_len; 2792 } 2793 2794 /* Given a token TYPE corresponding to a digraph, return a pointer to 2795 the spelling of the digraph. */ 2796 static const unsigned char * 2797 cpp_digraph2name (enum cpp_ttype type) 2798 { 2799 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH]; 2800 } 2801 2802 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER. 2803 The buffer must already contain the enough space to hold the 2804 token's spelling. Returns a pointer to the character after the 2805 last character written. */ 2806 unsigned char * 2807 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident) 2808 { 2809 size_t i; 2810 const unsigned char *name = NODE_NAME (ident); 2811 2812 for (i = 0; i < NODE_LEN (ident); i++) 2813 if (name[i] & ~0x7F) 2814 { 2815 i += utf8_to_ucn (buffer, name + i) - 1; 2816 buffer += 10; 2817 } 2818 else 2819 *buffer++ = name[i]; 2820 2821 return buffer; 2822 } 2823 2824 /* Write the spelling of a token TOKEN to BUFFER. The buffer must 2825 already contain the enough space to hold the token's spelling. 2826 Returns a pointer to the character after the last character written. 2827 FORSTRING is true if this is to be the spelling after translation 2828 phase 1 (with the original spelling of extended identifiers), false 2829 if extended identifiers should always be written using UCNs (there is 2830 no option for always writing them in the internal UTF-8 form). 2831 FIXME: Would be nice if we didn't need the PFILE argument. */ 2832 unsigned char * 2833 cpp_spell_token (cpp_reader *pfile, const cpp_token *token, 2834 unsigned char *buffer, bool forstring) 2835 { 2836 switch (TOKEN_SPELL (token)) 2837 { 2838 case SPELL_OPERATOR: 2839 { 2840 const unsigned char *spelling; 2841 unsigned char c; 2842 2843 if (token->flags & DIGRAPH) 2844 spelling = cpp_digraph2name (token->type); 2845 else if (token->flags & NAMED_OP) 2846 goto spell_ident; 2847 else 2848 spelling = TOKEN_NAME (token); 2849 2850 while ((c = *spelling++) != '\0') 2851 *buffer++ = c; 2852 } 2853 break; 2854 2855 spell_ident: 2856 case SPELL_IDENT: 2857 if (forstring) 2858 { 2859 memcpy (buffer, NODE_NAME (token->val.node.spelling), 2860 NODE_LEN (token->val.node.spelling)); 2861 buffer += NODE_LEN (token->val.node.spelling); 2862 } 2863 else 2864 buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node); 2865 break; 2866 2867 case SPELL_LITERAL: 2868 memcpy (buffer, token->val.str.text, token->val.str.len); 2869 buffer += token->val.str.len; 2870 break; 2871 2872 case SPELL_NONE: 2873 cpp_error (pfile, CPP_DL_ICE, 2874 "unspellable token %s", TOKEN_NAME (token)); 2875 break; 2876 } 2877 2878 return buffer; 2879 } 2880 2881 /* Returns TOKEN spelt as a null-terminated string. The string is 2882 freed when the reader is destroyed. Useful for diagnostics. */ 2883 unsigned char * 2884 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) 2885 { 2886 unsigned int len = cpp_token_len (token) + 1; 2887 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; 2888 2889 end = cpp_spell_token (pfile, token, start, false); 2890 end[0] = '\0'; 2891 2892 return start; 2893 } 2894 2895 /* Returns a pointer to a string which spells the token defined by 2896 TYPE and FLAGS. Used by C front ends, which really should move to 2897 using cpp_token_as_text. */ 2898 const char * 2899 cpp_type2name (enum cpp_ttype type, unsigned char flags) 2900 { 2901 if (flags & DIGRAPH) 2902 return (const char *) cpp_digraph2name (type); 2903 else if (flags & NAMED_OP) 2904 return cpp_named_operator2name (type); 2905 2906 return (const char *) token_spellings[type].name; 2907 } 2908 2909 /* Writes the spelling of token to FP, without any preceding space. 2910 Separated from cpp_spell_token for efficiency - to avoid stdio 2911 double-buffering. */ 2912 void 2913 cpp_output_token (const cpp_token *token, FILE *fp) 2914 { 2915 switch (TOKEN_SPELL (token)) 2916 { 2917 case SPELL_OPERATOR: 2918 { 2919 const unsigned char *spelling; 2920 int c; 2921 2922 if (token->flags & DIGRAPH) 2923 spelling = cpp_digraph2name (token->type); 2924 else if (token->flags & NAMED_OP) 2925 goto spell_ident; 2926 else 2927 spelling = TOKEN_NAME (token); 2928 2929 c = *spelling; 2930 do 2931 putc (c, fp); 2932 while ((c = *++spelling) != '\0'); 2933 } 2934 break; 2935 2936 spell_ident: 2937 case SPELL_IDENT: 2938 { 2939 size_t i; 2940 const unsigned char * name = NODE_NAME (token->val.node.node); 2941 2942 for (i = 0; i < NODE_LEN (token->val.node.node); i++) 2943 if (name[i] & ~0x7F) 2944 { 2945 unsigned char buffer[10]; 2946 i += utf8_to_ucn (buffer, name + i) - 1; 2947 fwrite (buffer, 1, 10, fp); 2948 } 2949 else 2950 fputc (NODE_NAME (token->val.node.node)[i], fp); 2951 } 2952 break; 2953 2954 case SPELL_LITERAL: 2955 fwrite (token->val.str.text, 1, token->val.str.len, fp); 2956 break; 2957 2958 case SPELL_NONE: 2959 /* An error, most probably. */ 2960 break; 2961 } 2962 } 2963 2964 /* Compare two tokens. */ 2965 int 2966 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) 2967 { 2968 if (a->type == b->type && a->flags == b->flags) 2969 switch (TOKEN_SPELL (a)) 2970 { 2971 default: /* Keep compiler happy. */ 2972 case SPELL_OPERATOR: 2973 /* token_no is used to track where multiple consecutive ## 2974 tokens were originally located. */ 2975 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no); 2976 case SPELL_NONE: 2977 return (a->type != CPP_MACRO_ARG 2978 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no 2979 && a->val.macro_arg.spelling == b->val.macro_arg.spelling)); 2980 case SPELL_IDENT: 2981 return (a->val.node.node == b->val.node.node 2982 && a->val.node.spelling == b->val.node.spelling); 2983 case SPELL_LITERAL: 2984 return (a->val.str.len == b->val.str.len 2985 && !memcmp (a->val.str.text, b->val.str.text, 2986 a->val.str.len)); 2987 } 2988 2989 return 0; 2990 } 2991 2992 /* Returns nonzero if a space should be inserted to avoid an 2993 accidental token paste for output. For simplicity, it is 2994 conservative, and occasionally advises a space where one is not 2995 needed, e.g. "." and ".2". */ 2996 int 2997 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, 2998 const cpp_token *token2) 2999 { 3000 enum cpp_ttype a = token1->type, b = token2->type; 3001 cppchar_t c; 3002 3003 if (token1->flags & NAMED_OP) 3004 a = CPP_NAME; 3005 if (token2->flags & NAMED_OP) 3006 b = CPP_NAME; 3007 3008 c = EOF; 3009 if (token2->flags & DIGRAPH) 3010 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; 3011 else if (token_spellings[b].category == SPELL_OPERATOR) 3012 c = token_spellings[b].name[0]; 3013 3014 /* Quickly get everything that can paste with an '='. */ 3015 if ((int) a <= (int) CPP_LAST_EQ && c == '=') 3016 return 1; 3017 3018 switch (a) 3019 { 3020 case CPP_GREATER: return c == '>'; 3021 case CPP_LESS: return c == '<' || c == '%' || c == ':'; 3022 case CPP_PLUS: return c == '+'; 3023 case CPP_MINUS: return c == '-' || c == '>'; 3024 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ 3025 case CPP_MOD: return c == ':' || c == '>'; 3026 case CPP_AND: return c == '&'; 3027 case CPP_OR: return c == '|'; 3028 case CPP_COLON: return c == ':' || c == '>'; 3029 case CPP_DEREF: return c == '*'; 3030 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; 3031 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ 3032 case CPP_NAME: return ((b == CPP_NUMBER 3033 && name_p (pfile, &token2->val.str)) 3034 || b == CPP_NAME 3035 || b == CPP_CHAR || b == CPP_STRING); /* L */ 3036 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME 3037 || c == '.' || c == '+' || c == '-'); 3038 /* UCNs */ 3039 case CPP_OTHER: return ((token1->val.str.text[0] == '\\' 3040 && b == CPP_NAME) 3041 || (CPP_OPTION (pfile, objc) 3042 && token1->val.str.text[0] == '@' 3043 && (b == CPP_NAME || b == CPP_STRING))); 3044 case CPP_STRING: 3045 case CPP_WSTRING: 3046 case CPP_UTF8STRING: 3047 case CPP_STRING16: 3048 case CPP_STRING32: return (CPP_OPTION (pfile, user_literals) 3049 && (b == CPP_NAME 3050 || (TOKEN_SPELL (token2) == SPELL_LITERAL 3051 && ISIDST (token2->val.str.text[0])))); 3052 3053 default: break; 3054 } 3055 3056 return 0; 3057 } 3058 3059 /* Output all the remaining tokens on the current line, and a newline 3060 character, to FP. Leading whitespace is removed. If there are 3061 macros, special token padding is not performed. */ 3062 void 3063 cpp_output_line (cpp_reader *pfile, FILE *fp) 3064 { 3065 const cpp_token *token; 3066 3067 token = cpp_get_token (pfile); 3068 while (token->type != CPP_EOF) 3069 { 3070 cpp_output_token (token, fp); 3071 token = cpp_get_token (pfile); 3072 if (token->flags & PREV_WHITE) 3073 putc (' ', fp); 3074 } 3075 3076 putc ('\n', fp); 3077 } 3078 3079 /* Return a string representation of all the remaining tokens on the 3080 current line. The result is allocated using xmalloc and must be 3081 freed by the caller. */ 3082 unsigned char * 3083 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name) 3084 { 3085 const cpp_token *token; 3086 unsigned int out = dir_name ? ustrlen (dir_name) : 0; 3087 unsigned int alloced = 120 + out; 3088 unsigned char *result = (unsigned char *) xmalloc (alloced); 3089 3090 /* If DIR_NAME is empty, there are no initial contents. */ 3091 if (dir_name) 3092 { 3093 sprintf ((char *) result, "#%s ", dir_name); 3094 out += 2; 3095 } 3096 3097 token = cpp_get_token (pfile); 3098 while (token->type != CPP_EOF) 3099 { 3100 unsigned char *last; 3101 /* Include room for a possible space and the terminating nul. */ 3102 unsigned int len = cpp_token_len (token) + 2; 3103 3104 if (out + len > alloced) 3105 { 3106 alloced *= 2; 3107 if (out + len > alloced) 3108 alloced = out + len; 3109 result = (unsigned char *) xrealloc (result, alloced); 3110 } 3111 3112 last = cpp_spell_token (pfile, token, &result[out], 0); 3113 out = last - result; 3114 3115 token = cpp_get_token (pfile); 3116 if (token->flags & PREV_WHITE) 3117 result[out++] = ' '; 3118 } 3119 3120 result[out] = '\0'; 3121 return result; 3122 } 3123 3124 /* Memory buffers. Changing these three constants can have a dramatic 3125 effect on performance. The values here are reasonable defaults, 3126 but might be tuned. If you adjust them, be sure to test across a 3127 range of uses of cpplib, including heavy nested function-like macro 3128 expansion. Also check the change in peak memory usage (NJAMD is a 3129 good tool for this). */ 3130 #define MIN_BUFF_SIZE 8000 3131 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) 3132 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ 3133 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) 3134 3135 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) 3136 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! 3137 #endif 3138 3139 /* Create a new allocation buffer. Place the control block at the end 3140 of the buffer, so that buffer overflows will cause immediate chaos. */ 3141 static _cpp_buff * 3142 new_buff (size_t len) 3143 { 3144 _cpp_buff *result; 3145 unsigned char *base; 3146 3147 if (len < MIN_BUFF_SIZE) 3148 len = MIN_BUFF_SIZE; 3149 len = CPP_ALIGN (len); 3150 3151 #ifdef ENABLE_VALGRIND_CHECKING 3152 /* Valgrind warns about uses of interior pointers, so put _cpp_buff 3153 struct first. */ 3154 size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT); 3155 base = XNEWVEC (unsigned char, len + slen); 3156 result = (_cpp_buff *) base; 3157 base += slen; 3158 #else 3159 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); 3160 result = (_cpp_buff *) (base + len); 3161 #endif 3162 result->base = base; 3163 result->cur = base; 3164 result->limit = base + len; 3165 result->next = NULL; 3166 return result; 3167 } 3168 3169 /* Place a chain of unwanted allocation buffers on the free list. */ 3170 void 3171 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) 3172 { 3173 _cpp_buff *end = buff; 3174 3175 while (end->next) 3176 end = end->next; 3177 end->next = pfile->free_buffs; 3178 pfile->free_buffs = buff; 3179 } 3180 3181 /* Return a free buffer of size at least MIN_SIZE. */ 3182 _cpp_buff * 3183 _cpp_get_buff (cpp_reader *pfile, size_t min_size) 3184 { 3185 _cpp_buff *result, **p; 3186 3187 for (p = &pfile->free_buffs;; p = &(*p)->next) 3188 { 3189 size_t size; 3190 3191 if (*p == NULL) 3192 return new_buff (min_size); 3193 result = *p; 3194 size = result->limit - result->base; 3195 /* Return a buffer that's big enough, but don't waste one that's 3196 way too big. */ 3197 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) 3198 break; 3199 } 3200 3201 *p = result->next; 3202 result->next = NULL; 3203 result->cur = result->base; 3204 return result; 3205 } 3206 3207 /* Creates a new buffer with enough space to hold the uncommitted 3208 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies 3209 the excess bytes to the new buffer. Chains the new buffer after 3210 BUFF, and returns the new buffer. */ 3211 _cpp_buff * 3212 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) 3213 { 3214 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); 3215 _cpp_buff *new_buff = _cpp_get_buff (pfile, size); 3216 3217 buff->next = new_buff; 3218 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); 3219 return new_buff; 3220 } 3221 3222 /* Creates a new buffer with enough space to hold the uncommitted 3223 remaining bytes of the buffer pointed to by BUFF, and at least 3224 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. 3225 Chains the new buffer before the buffer pointed to by BUFF, and 3226 updates the pointer to point to the new buffer. */ 3227 void 3228 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) 3229 { 3230 _cpp_buff *new_buff, *old_buff = *pbuff; 3231 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); 3232 3233 new_buff = _cpp_get_buff (pfile, size); 3234 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); 3235 new_buff->next = old_buff; 3236 *pbuff = new_buff; 3237 } 3238 3239 /* Free a chain of buffers starting at BUFF. */ 3240 void 3241 _cpp_free_buff (_cpp_buff *buff) 3242 { 3243 _cpp_buff *next; 3244 3245 for (; buff; buff = next) 3246 { 3247 next = buff->next; 3248 #ifdef ENABLE_VALGRIND_CHECKING 3249 free (buff); 3250 #else 3251 free (buff->base); 3252 #endif 3253 } 3254 } 3255 3256 /* Allocate permanent, unaligned storage of length LEN. */ 3257 unsigned char * 3258 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len) 3259 { 3260 _cpp_buff *buff = pfile->u_buff; 3261 unsigned char *result = buff->cur; 3262 3263 if (len > (size_t) (buff->limit - result)) 3264 { 3265 buff = _cpp_get_buff (pfile, len); 3266 buff->next = pfile->u_buff; 3267 pfile->u_buff = buff; 3268 result = buff->cur; 3269 } 3270 3271 buff->cur = result + len; 3272 return result; 3273 } 3274 3275 /* Allocate permanent, unaligned storage of length LEN from a_buff. 3276 That buffer is used for growing allocations when saving macro 3277 replacement lists in a #define, and when parsing an answer to an 3278 assertion in #assert, #unassert or #if (and therefore possibly 3279 whilst expanding macros). It therefore must not be used by any 3280 code that they might call: specifically the lexer and the guts of 3281 the macro expander. 3282 3283 All existing other uses clearly fit this restriction: storing 3284 registered pragmas during initialization. */ 3285 unsigned char * 3286 _cpp_aligned_alloc (cpp_reader *pfile, size_t len) 3287 { 3288 _cpp_buff *buff = pfile->a_buff; 3289 unsigned char *result = buff->cur; 3290 3291 if (len > (size_t) (buff->limit - result)) 3292 { 3293 buff = _cpp_get_buff (pfile, len); 3294 buff->next = pfile->a_buff; 3295 pfile->a_buff = buff; 3296 result = buff->cur; 3297 } 3298 3299 buff->cur = result + len; 3300 return result; 3301 } 3302 3303 /* Say which field of TOK is in use. */ 3304 3305 enum cpp_token_fld_kind 3306 cpp_token_val_index (const cpp_token *tok) 3307 { 3308 switch (TOKEN_SPELL (tok)) 3309 { 3310 case SPELL_IDENT: 3311 return CPP_TOKEN_FLD_NODE; 3312 case SPELL_LITERAL: 3313 return CPP_TOKEN_FLD_STR; 3314 case SPELL_OPERATOR: 3315 if (tok->type == CPP_PASTE) 3316 return CPP_TOKEN_FLD_TOKEN_NO; 3317 else 3318 return CPP_TOKEN_FLD_NONE; 3319 case SPELL_NONE: 3320 if (tok->type == CPP_MACRO_ARG) 3321 return CPP_TOKEN_FLD_ARG_NO; 3322 else if (tok->type == CPP_PADDING) 3323 return CPP_TOKEN_FLD_SOURCE; 3324 else if (tok->type == CPP_PRAGMA) 3325 return CPP_TOKEN_FLD_PRAGMA; 3326 /* else fall through */ 3327 default: 3328 return CPP_TOKEN_FLD_NONE; 3329 } 3330 } 3331 3332 /* All tokens lexed in R after calling this function will be forced to have 3333 their source_location the same as the location referenced by P, until 3334 cpp_stop_forcing_token_locations is called for R. */ 3335 3336 void 3337 cpp_force_token_locations (cpp_reader *r, source_location *p) 3338 { 3339 r->forced_token_location_p = p; 3340 } 3341 3342 /* Go back to assigning locations naturally for lexed tokens. */ 3343 3344 void 3345 cpp_stop_forcing_token_locations (cpp_reader *r) 3346 { 3347 r->forced_token_location_p = NULL; 3348 } 3349