1 /* CPP Library - traditional lexical analysis and macro expansion. 2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 Contributed by Neil Booth, May 2002 4 5 This program is free software; you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published by the 7 Free Software Foundation; either version 3, or (at your option) any 8 later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; see the file COPYING3. If not see 17 <http://www.gnu.org/licenses/>. */ 18 19 #include "config.h" 20 #include "system.h" 21 #include "cpplib.h" 22 #include "internal.h" 23 24 /* The replacement text of a function-like macro is stored as a 25 contiguous sequence of aligned blocks, each representing the text 26 between subsequent parameters. 27 28 Each block comprises the text between its surrounding parameters, 29 the length of that text, and the one-based index of the following 30 parameter. The final block in the replacement text is easily 31 recognizable as it has an argument index of zero. */ 32 33 struct block 34 { 35 unsigned int text_len; 36 unsigned short arg_index; 37 uchar text[1]; 38 }; 39 40 #define BLOCK_HEADER_LEN offsetof (struct block, text) 41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN)) 42 43 /* Structure holding information about a function-like macro 44 invocation. */ 45 struct fun_macro 46 { 47 /* Memory buffer holding the trad_arg array. */ 48 _cpp_buff *buff; 49 50 /* An array of size the number of macro parameters + 1, containing 51 the offsets of the start of each macro argument in the output 52 buffer. The argument continues until the character before the 53 start of the next one. */ 54 size_t *args; 55 56 /* The hashnode of the macro. */ 57 cpp_hashnode *node; 58 59 /* The offset of the macro name in the output buffer. */ 60 size_t offset; 61 62 /* The line the macro name appeared on. */ 63 location_t line; 64 65 /* Number of parameters. */ 66 unsigned int paramc; 67 68 /* Zero-based index of argument being currently lexed. */ 69 unsigned int argc; 70 }; 71 72 /* Lexing state. It is mostly used to prevent macro expansion. */ 73 enum ls {ls_none = 0, /* Normal state. */ 74 ls_fun_open, /* When looking for '('. */ 75 ls_fun_close, /* When looking for ')'. */ 76 ls_defined, /* After defined. */ 77 ls_defined_close, /* Looking for ')' of defined(). */ 78 ls_hash, /* After # in preprocessor conditional. */ 79 ls_predicate, /* After the predicate, maybe paren? */ 80 ls_answer /* In answer to predicate. */ 81 }; 82 83 /* Lexing TODO: Maybe handle space in escaped newlines. Stop lex.cc 84 from recognizing comments and directives during its lexing pass. */ 85 86 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int); 87 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *); 88 static const uchar *copy_comment (cpp_reader *, const uchar *, int); 89 static void check_output_buffer (cpp_reader *, size_t); 90 static void push_replacement_text (cpp_reader *, cpp_hashnode *); 91 static bool scan_parameters (cpp_reader *, unsigned *); 92 static bool recursive_macro (cpp_reader *, cpp_hashnode *); 93 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int); 94 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *, 95 struct fun_macro *); 96 static void save_argument (struct fun_macro *, size_t); 97 static void replace_args_and_push (cpp_reader *, struct fun_macro *); 98 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *); 99 100 /* Ensures we have N bytes' space in the output buffer, and 101 reallocates it if not. */ 102 static void 103 check_output_buffer (cpp_reader *pfile, size_t n) 104 { 105 /* We might need two bytes to terminate an unterminated comment, and 106 one more to terminate the line with a NUL. */ 107 n += 2 + 1; 108 109 if (n > (size_t) (pfile->out.limit - pfile->out.cur)) 110 { 111 size_t size = pfile->out.cur - pfile->out.base; 112 size_t new_size = (size + n) * 3 / 2; 113 114 pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size); 115 pfile->out.limit = pfile->out.base + new_size; 116 pfile->out.cur = pfile->out.base + size; 117 } 118 } 119 120 /* Skip a C-style block comment in a macro as a result of -CC. 121 PFILE->buffer->cur points to the initial asterisk of the comment, 122 change it to point to after the '*' and '/' characters that terminate it. 123 Return true if the macro has not been termined, in that case set 124 PFILE->buffer->cur to the end of the buffer. */ 125 static bool 126 skip_macro_block_comment (cpp_reader *pfile) 127 { 128 const uchar *cur = pfile->buffer->cur; 129 130 cur++; 131 if (*cur == '/') 132 cur++; 133 134 /* People like decorating comments with '*', so check for '/' 135 instead for efficiency. */ 136 while (! (*cur++ == '/' && cur[-2] == '*')) 137 if (cur[-1] == '\n') 138 { 139 pfile->buffer->cur = cur - 1; 140 return true; 141 } 142 143 pfile->buffer->cur = cur; 144 return false; 145 } 146 147 /* CUR points to the asterisk introducing a comment in the current 148 context. IN_DEFINE is true if we are in the replacement text of a 149 macro. 150 151 The asterisk and following comment is copied to the buffer pointed 152 to by pfile->out.cur, which must be of sufficient size. 153 Unterminated comments are diagnosed, and correctly terminated in 154 the output. pfile->out.cur is updated depending upon IN_DEFINE, 155 -C, -CC and pfile->state.in_directive. 156 157 Returns a pointer to the first character after the comment in the 158 input buffer. */ 159 static const uchar * 160 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define) 161 { 162 bool unterminated, copy = false; 163 location_t src_loc = pfile->line_table->highest_line; 164 cpp_buffer *buffer = pfile->buffer; 165 166 buffer->cur = cur; 167 if (pfile->context->prev) 168 unterminated = skip_macro_block_comment (pfile); 169 else 170 unterminated = _cpp_skip_block_comment (pfile); 171 172 if (unterminated) 173 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0, 174 "unterminated comment"); 175 176 /* Comments in directives become spaces so that tokens are properly 177 separated when the ISO preprocessor re-lexes the line. The 178 exception is #define. */ 179 if (pfile->state.in_directive) 180 { 181 if (in_define) 182 { 183 if (CPP_OPTION (pfile, discard_comments_in_macro_exp)) 184 pfile->out.cur--; 185 else 186 copy = true; 187 } 188 else 189 pfile->out.cur[-1] = ' '; 190 } 191 else if (CPP_OPTION (pfile, discard_comments)) 192 pfile->out.cur--; 193 else 194 copy = true; 195 196 if (copy) 197 { 198 size_t len = (size_t) (buffer->cur - cur); 199 memcpy (pfile->out.cur, cur, len); 200 pfile->out.cur += len; 201 if (unterminated) 202 { 203 *pfile->out.cur++ = '*'; 204 *pfile->out.cur++ = '/'; 205 } 206 } 207 208 return buffer->cur; 209 } 210 211 /* CUR points to any character in the input buffer. Skips over all 212 contiguous horizontal white space and NULs, including comments if 213 SKIP_COMMENTS, until reaching the first non-horizontal-whitespace 214 character or the end of the current context. Escaped newlines are 215 removed. 216 217 The whitespace is copied verbatim to the output buffer, except that 218 comments are handled as described in copy_comment(). 219 pfile->out.cur is updated. 220 221 Returns a pointer to the first character after the whitespace in 222 the input buffer. */ 223 static const uchar * 224 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments) 225 { 226 uchar *out = pfile->out.cur; 227 228 for (;;) 229 { 230 unsigned int c = *cur++; 231 *out++ = c; 232 233 if (is_nvspace (c)) 234 continue; 235 236 if (c == '/' && *cur == '*' && skip_comments) 237 { 238 pfile->out.cur = out; 239 cur = copy_comment (pfile, cur, false /* in_define */); 240 out = pfile->out.cur; 241 continue; 242 } 243 244 out--; 245 break; 246 } 247 248 pfile->out.cur = out; 249 return cur - 1; 250 } 251 252 /* Lexes and outputs an identifier starting at CUR, which is assumed 253 to point to a valid first character of an identifier. Returns 254 the hashnode, and updates out.cur. */ 255 static cpp_hashnode * 256 lex_identifier (cpp_reader *pfile, const uchar *cur) 257 { 258 size_t len; 259 uchar *out = pfile->out.cur; 260 cpp_hashnode *result; 261 262 do 263 *out++ = *cur++; 264 while (is_numchar (*cur)); 265 266 CUR (pfile->context) = cur; 267 len = out - pfile->out.cur; 268 result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur, 269 len, HT_ALLOC)); 270 pfile->out.cur = out; 271 return result; 272 } 273 274 /* Overlays the true file buffer temporarily with text of length LEN 275 starting at START. The true buffer is restored upon calling 276 restore_buff(). */ 277 void 278 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len) 279 { 280 cpp_buffer *buffer = pfile->buffer; 281 282 pfile->overlaid_buffer = buffer; 283 pfile->saved_cur = buffer->cur; 284 pfile->saved_rlimit = buffer->rlimit; 285 pfile->saved_line_base = buffer->next_line; 286 buffer->need_line = false; 287 288 buffer->cur = start; 289 buffer->line_base = start; 290 buffer->rlimit = start + len; 291 } 292 293 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */ 294 void 295 _cpp_remove_overlay (cpp_reader *pfile) 296 { 297 cpp_buffer *buffer = pfile->overlaid_buffer; 298 299 buffer->cur = pfile->saved_cur; 300 buffer->rlimit = pfile->saved_rlimit; 301 buffer->line_base = pfile->saved_line_base; 302 buffer->need_line = true; 303 304 pfile->overlaid_buffer = NULL; 305 } 306 307 /* Reads a logical line into the output buffer. Returns TRUE if there 308 is more text left in the buffer. */ 309 bool 310 _cpp_read_logical_line_trad (cpp_reader *pfile) 311 { 312 do 313 { 314 if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile)) 315 { 316 /* Now pop the buffer that _cpp_get_fresh_line did not. */ 317 _cpp_pop_buffer (pfile); 318 return false; 319 } 320 } 321 while (!_cpp_scan_out_logical_line (pfile, NULL, false) 322 || pfile->state.skipping); 323 324 return pfile->buffer != NULL; 325 } 326 327 /* Return true if NODE is a fun_like macro. */ 328 static inline bool 329 fun_like_macro (cpp_hashnode *node) 330 { 331 if (cpp_builtin_macro_p (node)) 332 return (node->value.builtin == BT_HAS_ATTRIBUTE 333 || node->value.builtin == BT_HAS_STD_ATTRIBUTE 334 || node->value.builtin == BT_HAS_BUILTIN 335 || node->value.builtin == BT_HAS_INCLUDE 336 || node->value.builtin == BT_HAS_INCLUDE_NEXT); 337 return node->value.macro->fun_like; 338 } 339 340 /* Set up state for finding the opening '(' of a function-like 341 macro. */ 342 static void 343 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start, 344 struct fun_macro *macro) 345 { 346 unsigned int n; 347 if (cpp_builtin_macro_p (node)) 348 n = 1; 349 else 350 n = node->value.macro->paramc; 351 352 if (macro->buff) 353 _cpp_release_buff (pfile, macro->buff); 354 macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t)); 355 macro->args = (size_t *) BUFF_FRONT (macro->buff); 356 macro->node = node; 357 macro->offset = start - pfile->out.base; 358 macro->paramc = n; 359 macro->argc = 0; 360 } 361 362 /* Save the OFFSET of the start of the next argument to MACRO. */ 363 static void 364 save_argument (struct fun_macro *macro, size_t offset) 365 { 366 macro->argc++; 367 if (macro->argc <= macro->paramc) 368 macro->args[macro->argc] = offset; 369 } 370 371 /* Copies the next logical line in the current buffer (starting at 372 buffer->cur) to the output buffer. The output is guaranteed to 373 terminate with a NUL character. buffer->cur is updated. 374 375 If MACRO is non-NULL, then we are scanning the replacement list of 376 MACRO, and we call save_replacement_text() every time we meet an 377 argument. 378 379 If BUILTIN_MACRO_ARG is true, this is called to macro expand 380 arguments of builtin function-like macros. */ 381 bool 382 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro, 383 bool builtin_macro_arg) 384 { 385 bool result = true; 386 cpp_context *context; 387 const uchar *cur; 388 uchar *out; 389 struct fun_macro fmacro; 390 unsigned int c, paren_depth = 0, quote; 391 enum ls lex_state = ls_none; 392 bool header_ok; 393 const uchar *start_of_input_line; 394 395 fmacro.buff = NULL; 396 fmacro.args = NULL; 397 fmacro.node = NULL; 398 fmacro.offset = 0; 399 fmacro.line = 0; 400 fmacro.paramc = 0; 401 fmacro.argc = 0; 402 403 quote = 0; 404 header_ok = pfile->state.angled_headers; 405 CUR (pfile->context) = pfile->buffer->cur; 406 RLIMIT (pfile->context) = pfile->buffer->rlimit; 407 if (!builtin_macro_arg) 408 { 409 pfile->out.cur = pfile->out.base; 410 pfile->out.first_line = pfile->line_table->highest_line; 411 } 412 /* start_of_input_line is needed to make sure that directives really, 413 really start at the first character of the line. */ 414 start_of_input_line = pfile->buffer->cur; 415 new_context: 416 context = pfile->context; 417 cur = CUR (context); 418 check_output_buffer (pfile, RLIMIT (context) - cur); 419 out = pfile->out.cur; 420 421 for (;;) 422 { 423 if (!context->prev 424 && !builtin_macro_arg 425 && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos) 426 { 427 pfile->buffer->cur = cur; 428 _cpp_process_line_notes (pfile, false); 429 } 430 c = *cur++; 431 *out++ = c; 432 433 /* Whitespace should "continue" out of the switch, 434 non-whitespace should "break" out of it. */ 435 switch (c) 436 { 437 case ' ': 438 case '\t': 439 case '\f': 440 case '\v': 441 case '\0': 442 continue; 443 444 case '\n': 445 /* If this is a macro's expansion, pop it. */ 446 if (context->prev) 447 { 448 pfile->out.cur = out - 1; 449 _cpp_pop_context (pfile); 450 goto new_context; 451 } 452 453 /* Omit the newline from the output buffer. */ 454 pfile->out.cur = out - 1; 455 pfile->buffer->cur = cur; 456 if (builtin_macro_arg) 457 goto done; 458 pfile->buffer->need_line = true; 459 CPP_INCREMENT_LINE (pfile, 0); 460 461 if ((lex_state == ls_fun_open || lex_state == ls_fun_close) 462 && !pfile->state.in_directive 463 && _cpp_get_fresh_line (pfile)) 464 { 465 /* Newlines in arguments become a space, but we don't 466 clear any in-progress quote. */ 467 if (lex_state == ls_fun_close) 468 out[-1] = ' '; 469 cur = pfile->buffer->cur; 470 continue; 471 } 472 goto done; 473 474 case '<': 475 if (header_ok) 476 quote = '>'; 477 break; 478 case '>': 479 if (c == quote) 480 quote = 0; 481 break; 482 483 case '"': 484 case '\'': 485 if (c == quote) 486 quote = 0; 487 else if (!quote) 488 quote = c; 489 break; 490 491 case '\\': 492 /* Skip escaped quotes here, it's easier than above. */ 493 if (*cur == '\\' || *cur == '"' || *cur == '\'') 494 *out++ = *cur++; 495 break; 496 497 case '/': 498 /* Traditional CPP does not recognize comments within 499 literals. */ 500 if (!quote && *cur == '*') 501 { 502 pfile->out.cur = out; 503 cur = copy_comment (pfile, cur, macro != 0); 504 out = pfile->out.cur; 505 continue; 506 } 507 break; 508 509 case '_': 510 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 511 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 512 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 513 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 514 case 'y': case 'z': 515 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 516 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 517 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 518 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 519 case 'Y': case 'Z': 520 if (!pfile->state.skipping && (quote == 0 || macro)) 521 { 522 cpp_hashnode *node; 523 uchar *out_start = out - 1; 524 525 pfile->out.cur = out_start; 526 node = lex_identifier (pfile, cur - 1); 527 out = pfile->out.cur; 528 cur = CUR (context); 529 530 if (cpp_macro_p (node) 531 /* Should we expand for ls_answer? */ 532 && (lex_state == ls_none || lex_state == ls_fun_open) 533 && !pfile->state.prevent_expansion) 534 { 535 /* Macros invalidate MI optimization. */ 536 pfile->mi_valid = false; 537 if (fun_like_macro (node)) 538 { 539 maybe_start_funlike (pfile, node, out_start, &fmacro); 540 lex_state = ls_fun_open; 541 fmacro.line = pfile->line_table->highest_line; 542 continue; 543 } 544 else if (!recursive_macro (pfile, node)) 545 { 546 /* Remove the object-like macro's name from the 547 output, and push its replacement text. */ 548 pfile->out.cur = out_start; 549 push_replacement_text (pfile, node); 550 lex_state = ls_none; 551 goto new_context; 552 } 553 } 554 else if (macro && node->type == NT_MACRO_ARG) 555 { 556 /* Found a parameter in the replacement text of a 557 #define. Remove its name from the output. */ 558 pfile->out.cur = out_start; 559 save_replacement_text (pfile, macro, node->value.arg_index); 560 out = pfile->out.base; 561 } 562 else if (lex_state == ls_hash) 563 { 564 lex_state = ls_predicate; 565 continue; 566 } 567 else if (pfile->state.in_expression 568 && node == pfile->spec_nodes.n_defined) 569 { 570 lex_state = ls_defined; 571 continue; 572 } 573 } 574 break; 575 576 case '(': 577 if (quote == 0) 578 { 579 paren_depth++; 580 if (lex_state == ls_fun_open) 581 { 582 if (recursive_macro (pfile, fmacro.node)) 583 lex_state = ls_none; 584 else 585 { 586 lex_state = ls_fun_close; 587 paren_depth = 1; 588 out = pfile->out.base + fmacro.offset; 589 fmacro.args[0] = fmacro.offset; 590 } 591 } 592 else if (lex_state == ls_predicate) 593 lex_state = ls_answer; 594 else if (lex_state == ls_defined) 595 lex_state = ls_defined_close; 596 } 597 break; 598 599 case ',': 600 if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1) 601 save_argument (&fmacro, out - pfile->out.base); 602 break; 603 604 case ')': 605 if (quote == 0) 606 { 607 paren_depth--; 608 if (lex_state == ls_fun_close && paren_depth == 0) 609 { 610 if (cpp_builtin_macro_p (fmacro.node)) 611 { 612 /* Handle builtin function-like macros like 613 __has_attribute. The already parsed arguments 614 are put into a buffer, which is then preprocessed 615 and the result is fed to _cpp_push_text_context 616 with disabled expansion, where the ISO preprocessor 617 parses it. While in traditional preprocessing 618 macro arguments aren't immediately expanded, they in 619 the end are because the macro with replaced arguments 620 is preprocessed again. For the builtin function-like 621 macros we need the argument immediately though, 622 if we don't preprocess them, they would behave 623 very differently from ISO preprocessor handling 624 of those builtin macros. So, this handling is 625 more similar to traditional preprocessing of 626 #if directives, where we also keep preprocessing 627 until everything is expanded, and then feed the 628 result with disabled expansion to ISO preprocessor 629 for handling the directives. */ 630 lex_state = ls_none; 631 save_argument (&fmacro, out - pfile->out.base); 632 cpp_macro m; 633 memset (&m, '\0', sizeof (m)); 634 m.paramc = fmacro.paramc; 635 if (_cpp_arguments_ok (pfile, &m, fmacro.node, 636 fmacro.argc)) 637 { 638 size_t len = fmacro.args[1] - fmacro.args[0]; 639 uchar *buf; 640 641 /* Remove the macro's invocation from the 642 output, and push its replacement text. */ 643 pfile->out.cur = pfile->out.base + fmacro.offset; 644 CUR (context) = cur; 645 buf = _cpp_unaligned_alloc (pfile, len + 2); 646 buf[0] = '('; 647 memcpy (buf + 1, pfile->out.base + fmacro.args[0], 648 len); 649 buf[len + 1] = '\n'; 650 651 const unsigned char *ctx_rlimit = RLIMIT (context); 652 const unsigned char *saved_cur = pfile->buffer->cur; 653 const unsigned char *saved_rlimit 654 = pfile->buffer->rlimit; 655 const unsigned char *saved_line_base 656 = pfile->buffer->line_base; 657 bool saved_need_line = pfile->buffer->need_line; 658 cpp_buffer *saved_overlaid_buffer 659 = pfile->overlaid_buffer; 660 pfile->buffer->cur = buf; 661 pfile->buffer->line_base = buf; 662 pfile->buffer->rlimit = buf + len + 1; 663 pfile->buffer->need_line = false; 664 pfile->overlaid_buffer = pfile->buffer; 665 bool saved_in_directive = pfile->state.in_directive; 666 pfile->state.in_directive = true; 667 cpp_context *saved_prev_context = context->prev; 668 context->prev = NULL; 669 670 _cpp_scan_out_logical_line (pfile, NULL, true); 671 672 pfile->state.in_directive = saved_in_directive; 673 check_output_buffer (pfile, 1); 674 *pfile->out.cur = '\n'; 675 pfile->buffer->cur = pfile->out.base + fmacro.offset; 676 pfile->buffer->line_base = pfile->buffer->cur; 677 pfile->buffer->rlimit = pfile->out.cur; 678 CUR (context) = pfile->buffer->cur; 679 RLIMIT (context) = pfile->buffer->rlimit; 680 681 pfile->state.prevent_expansion++; 682 const uchar *text 683 = _cpp_builtin_macro_text (pfile, fmacro.node); 684 pfile->state.prevent_expansion--; 685 686 context->prev = saved_prev_context; 687 pfile->buffer->cur = saved_cur; 688 pfile->buffer->rlimit = saved_rlimit; 689 pfile->buffer->line_base = saved_line_base; 690 pfile->buffer->need_line = saved_need_line; 691 pfile->overlaid_buffer = saved_overlaid_buffer; 692 pfile->out.cur = pfile->out.base + fmacro.offset; 693 CUR (context) = cur; 694 RLIMIT (context) = ctx_rlimit; 695 len = ustrlen (text); 696 buf = _cpp_unaligned_alloc (pfile, len + 1); 697 memcpy (buf, text, len); 698 buf[len] = '\n'; 699 text = buf; 700 _cpp_push_text_context (pfile, fmacro.node, 701 text, len); 702 goto new_context; 703 } 704 break; 705 } 706 707 cpp_macro *m = fmacro.node->value.macro; 708 709 m->used = 1; 710 lex_state = ls_none; 711 save_argument (&fmacro, out - pfile->out.base); 712 713 /* A single zero-length argument is no argument. */ 714 if (fmacro.argc == 1 715 && m->paramc == 0 716 && out == pfile->out.base + fmacro.offset + 1) 717 fmacro.argc = 0; 718 719 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc)) 720 { 721 /* Remove the macro's invocation from the 722 output, and push its replacement text. */ 723 pfile->out.cur = pfile->out.base + fmacro.offset; 724 CUR (context) = cur; 725 replace_args_and_push (pfile, &fmacro); 726 goto new_context; 727 } 728 } 729 else if (lex_state == ls_answer || lex_state == ls_defined_close) 730 lex_state = ls_none; 731 } 732 break; 733 734 case '#': 735 if (cur - 1 == start_of_input_line 736 /* A '#' from a macro doesn't start a directive. */ 737 && !pfile->context->prev 738 && !pfile->state.in_directive) 739 { 740 /* A directive. With the way _cpp_handle_directive 741 currently works, we only want to call it if either we 742 know the directive is OK, or we want it to fail and 743 be removed from the output. If we want it to be 744 passed through (the assembler case) then we must not 745 call _cpp_handle_directive. */ 746 pfile->out.cur = out; 747 cur = skip_whitespace (pfile, cur, true /* skip_comments */); 748 out = pfile->out.cur; 749 750 if (*cur == '\n') 751 { 752 /* Null directive. Ignore it and don't invalidate 753 the MI optimization. */ 754 pfile->buffer->need_line = true; 755 CPP_INCREMENT_LINE (pfile, 0); 756 result = false; 757 goto done; 758 } 759 else 760 { 761 bool do_it = false; 762 763 if (is_numstart (*cur) 764 && CPP_OPTION (pfile, lang) != CLK_ASM) 765 do_it = true; 766 else if (is_idstart (*cur)) 767 /* Check whether we know this directive, but don't 768 advance. */ 769 do_it = lex_identifier (pfile, cur)->is_directive; 770 771 if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM) 772 { 773 /* This is a kludge. We want to have the ISO 774 preprocessor lex the next token. */ 775 pfile->buffer->cur = cur; 776 _cpp_handle_directive (pfile, false /* indented */); 777 result = false; 778 goto done; 779 } 780 } 781 } 782 783 if (pfile->state.in_expression) 784 { 785 lex_state = ls_hash; 786 continue; 787 } 788 break; 789 790 default: 791 break; 792 } 793 794 /* Non-whitespace disables MI optimization and stops treating 795 '<' as a quote in #include. */ 796 header_ok = false; 797 if (!pfile->state.in_directive) 798 pfile->mi_valid = false; 799 800 if (lex_state == ls_none) 801 continue; 802 803 /* Some of these transitions of state are syntax errors. The 804 ISO preprocessor will issue errors later. */ 805 if (lex_state == ls_fun_open) 806 /* Missing '('. */ 807 lex_state = ls_none; 808 else if (lex_state == ls_hash 809 || lex_state == ls_predicate 810 || lex_state == ls_defined) 811 lex_state = ls_none; 812 813 /* ls_answer and ls_defined_close keep going until ')'. */ 814 } 815 816 done: 817 if (fmacro.buff) 818 _cpp_release_buff (pfile, fmacro.buff); 819 820 if (lex_state == ls_fun_close) 821 cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0, 822 "unterminated argument list invoking macro \"%s\"", 823 NODE_NAME (fmacro.node)); 824 return result; 825 } 826 827 /* Push a context holding the replacement text of the macro NODE on 828 the context stack. NODE is either object-like, or a function-like 829 macro with no arguments. */ 830 static void 831 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node) 832 { 833 size_t len; 834 const uchar *text; 835 uchar *buf; 836 837 if (cpp_builtin_macro_p (node)) 838 { 839 text = _cpp_builtin_macro_text (pfile, node); 840 len = ustrlen (text); 841 buf = _cpp_unaligned_alloc (pfile, len + 1); 842 memcpy (buf, text, len); 843 buf[len] = '\n'; 844 text = buf; 845 } 846 else 847 { 848 cpp_macro *macro = node->value.macro; 849 macro->used = 1; 850 text = macro->exp.text; 851 len = macro->count; 852 } 853 854 _cpp_push_text_context (pfile, node, text, len); 855 } 856 857 /* Returns TRUE if traditional macro recursion is detected. */ 858 static bool 859 recursive_macro (cpp_reader *pfile, cpp_hashnode *node) 860 { 861 bool recursing = !!(node->flags & NODE_DISABLED); 862 863 /* Object-like macros that are already expanding are necessarily 864 recursive. 865 866 However, it is possible to have traditional function-like macros 867 that are not infinitely recursive but recurse to any given depth. 868 Further, it is easy to construct examples that get ever longer 869 until the point they stop recursing. So there is no easy way to 870 detect true recursion; instead we assume any expansion more than 871 20 deep since the first invocation of this macro must be 872 recursing. */ 873 if (recursing && fun_like_macro (node)) 874 { 875 size_t depth = 0; 876 cpp_context *context = pfile->context; 877 878 do 879 { 880 depth++; 881 if (context->c.macro == node && depth > 20) 882 break; 883 context = context->prev; 884 } 885 while (context); 886 recursing = context != NULL; 887 } 888 889 if (recursing) 890 cpp_error (pfile, CPP_DL_ERROR, 891 "detected recursion whilst expanding macro \"%s\"", 892 NODE_NAME (node)); 893 894 return recursing; 895 } 896 897 /* Return the length of the replacement text of a function-like or 898 object-like non-builtin macro. */ 899 size_t 900 _cpp_replacement_text_len (const cpp_macro *macro) 901 { 902 size_t len; 903 904 if (macro->fun_like && (macro->paramc != 0)) 905 { 906 const uchar *exp; 907 908 len = 0; 909 for (exp = macro->exp.text;;) 910 { 911 struct block *b = (struct block *) exp; 912 913 len += b->text_len; 914 if (b->arg_index == 0) 915 break; 916 len += NODE_LEN (macro->parm.params[b->arg_index - 1]); 917 exp += BLOCK_LEN (b->text_len); 918 } 919 } 920 else 921 len = macro->count; 922 923 return len; 924 } 925 926 /* Copy the replacement text of MACRO to DEST, which must be of 927 sufficient size. It is not NUL-terminated. The next character is 928 returned. */ 929 uchar * 930 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest) 931 { 932 if (macro->fun_like && (macro->paramc != 0)) 933 { 934 const uchar *exp; 935 936 for (exp = macro->exp.text;;) 937 { 938 struct block *b = (struct block *) exp; 939 cpp_hashnode *param; 940 941 memcpy (dest, b->text, b->text_len); 942 dest += b->text_len; 943 if (b->arg_index == 0) 944 break; 945 param = macro->parm.params[b->arg_index - 1]; 946 memcpy (dest, NODE_NAME (param), NODE_LEN (param)); 947 dest += NODE_LEN (param); 948 exp += BLOCK_LEN (b->text_len); 949 } 950 } 951 else 952 { 953 memcpy (dest, macro->exp.text, macro->count); 954 dest += macro->count; 955 } 956 957 return dest; 958 } 959 960 /* Push a context holding the replacement text of the macro NODE on 961 the context stack. NODE is either object-like, or a function-like 962 macro with no arguments. */ 963 static void 964 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro) 965 { 966 cpp_macro *macro = fmacro->node->value.macro; 967 968 if (macro->paramc == 0) 969 push_replacement_text (pfile, fmacro->node); 970 else 971 { 972 const uchar *exp; 973 uchar *p; 974 _cpp_buff *buff; 975 size_t len = 0; 976 int cxtquote = 0; 977 978 /* Get an estimate of the length of the argument-replaced text. 979 This is a worst case estimate, assuming that every replacement 980 text character needs quoting. */ 981 for (exp = macro->exp.text;;) 982 { 983 struct block *b = (struct block *) exp; 984 985 len += b->text_len; 986 if (b->arg_index == 0) 987 break; 988 len += 2 * (fmacro->args[b->arg_index] 989 - fmacro->args[b->arg_index - 1] - 1); 990 exp += BLOCK_LEN (b->text_len); 991 } 992 993 /* Allocate room for the expansion plus \n. */ 994 buff = _cpp_get_buff (pfile, len + 1); 995 996 /* Copy the expansion and replace arguments. */ 997 /* Accumulate actual length, including quoting as necessary */ 998 p = BUFF_FRONT (buff); 999 len = 0; 1000 for (exp = macro->exp.text;;) 1001 { 1002 struct block *b = (struct block *) exp; 1003 size_t arglen; 1004 int argquote; 1005 uchar *base; 1006 uchar *in; 1007 1008 len += b->text_len; 1009 /* Copy the non-argument text literally, keeping 1010 track of whether matching quotes have been seen. */ 1011 for (arglen = b->text_len, in = b->text; arglen > 0; arglen--) 1012 { 1013 if (*in == '"') 1014 cxtquote = ! cxtquote; 1015 *p++ = *in++; 1016 } 1017 /* Done if no more arguments */ 1018 if (b->arg_index == 0) 1019 break; 1020 arglen = (fmacro->args[b->arg_index] 1021 - fmacro->args[b->arg_index - 1] - 1); 1022 base = pfile->out.base + fmacro->args[b->arg_index - 1]; 1023 in = base; 1024 #if 0 1025 /* Skip leading whitespace in the text for the argument to 1026 be substituted. To be compatible with gcc 2.95, we would 1027 also need to trim trailing whitespace. Gcc 2.95 trims 1028 leading and trailing whitespace, which may be a bug. The 1029 current gcc testsuite explicitly checks that this leading 1030 and trailing whitespace in actual arguments is 1031 preserved. */ 1032 while (arglen > 0 && is_space (*in)) 1033 { 1034 in++; 1035 arglen--; 1036 } 1037 #endif 1038 for (argquote = 0; arglen > 0; arglen--) 1039 { 1040 if (cxtquote && *in == '"') 1041 { 1042 if (in > base && *(in-1) != '\\') 1043 argquote = ! argquote; 1044 /* Always add backslash before double quote if argument 1045 is expanded in a quoted context */ 1046 *p++ = '\\'; 1047 len++; 1048 } 1049 else if (cxtquote && argquote && *in == '\\') 1050 { 1051 /* Always add backslash before a backslash in an argument 1052 that is expanded in a quoted context and also in the 1053 range of a quoted context in the argument itself. */ 1054 *p++ = '\\'; 1055 len++; 1056 } 1057 *p++ = *in++; 1058 len++; 1059 } 1060 exp += BLOCK_LEN (b->text_len); 1061 } 1062 1063 /* \n-terminate. */ 1064 *p = '\n'; 1065 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len); 1066 1067 /* So we free buffer allocation when macro is left. */ 1068 pfile->context->buff = buff; 1069 } 1070 } 1071 1072 /* Read and record the parameters, if any, of a function-like macro 1073 definition. Destroys pfile->out.cur. 1074 1075 Returns true on success, false on failure (syntax error or a 1076 duplicate parameter). On success, CUR (pfile->context) is just 1077 past the closing parenthesis. */ 1078 static bool 1079 scan_parameters (cpp_reader *pfile, unsigned *n_ptr) 1080 { 1081 const uchar *cur = CUR (pfile->context) + 1; 1082 bool ok; 1083 1084 unsigned nparms = 0; 1085 for (;;) 1086 { 1087 cur = skip_whitespace (pfile, cur, true /* skip_comments */); 1088 1089 if (is_idstart (*cur)) 1090 { 1091 struct cpp_hashnode *id = lex_identifier (pfile, cur); 1092 ok = false; 1093 if (!_cpp_save_parameter (pfile, nparms, id, id)) 1094 break; 1095 nparms++; 1096 cur = skip_whitespace (pfile, CUR (pfile->context), 1097 true /* skip_comments */); 1098 if (*cur == ',') 1099 { 1100 cur++; 1101 continue; 1102 } 1103 ok = (*cur == ')'); 1104 break; 1105 } 1106 1107 ok = (*cur == ')' && !nparms); 1108 break; 1109 } 1110 1111 *n_ptr = nparms; 1112 1113 if (!ok) 1114 cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list"); 1115 1116 CUR (pfile->context) = cur + (*cur == ')'); 1117 1118 return ok; 1119 } 1120 1121 /* Save the text from pfile->out.base to pfile->out.cur as 1122 the replacement text for the current macro, followed by argument 1123 ARG_INDEX, with zero indicating the end of the replacement 1124 text. */ 1125 static void 1126 save_replacement_text (cpp_reader *pfile, cpp_macro *macro, 1127 unsigned int arg_index) 1128 { 1129 size_t len = pfile->out.cur - pfile->out.base; 1130 uchar *exp; 1131 1132 if (macro->paramc == 0) 1133 { 1134 /* Object-like and function-like macros without parameters 1135 simply store their \n-terminated replacement text. */ 1136 exp = _cpp_unaligned_alloc (pfile, len + 1); 1137 memcpy (exp, pfile->out.base, len); 1138 exp[len] = '\n'; 1139 macro->exp.text = exp; 1140 macro->count = len; 1141 } 1142 else 1143 { 1144 /* Store the text's length (unsigned int), the argument index 1145 (unsigned short, base 1) and then the text. */ 1146 size_t blen = BLOCK_LEN (len); 1147 struct block *block; 1148 1149 if (macro->count + blen > BUFF_ROOM (pfile->a_buff)) 1150 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen); 1151 1152 exp = BUFF_FRONT (pfile->a_buff); 1153 block = (struct block *) (exp + macro->count); 1154 macro->exp.text = exp; 1155 1156 /* Write out the block information. */ 1157 block->text_len = len; 1158 block->arg_index = arg_index; 1159 memcpy (block->text, pfile->out.base, len); 1160 1161 /* Lex the rest into the start of the output buffer. */ 1162 pfile->out.cur = pfile->out.base; 1163 1164 macro->count += blen; 1165 1166 /* If we've finished, commit the memory. */ 1167 if (arg_index == 0) 1168 BUFF_FRONT (pfile->a_buff) += macro->count; 1169 } 1170 } 1171 1172 /* Analyze and save the replacement text of a macro. Returns true on 1173 success. */ 1174 cpp_macro * 1175 _cpp_create_trad_definition (cpp_reader *pfile) 1176 { 1177 const uchar *cur; 1178 uchar *limit; 1179 cpp_context *context = pfile->context; 1180 unsigned nparms = 0; 1181 int fun_like = 0; 1182 cpp_hashnode **params = NULL; 1183 1184 /* The context has not been set up for command line defines, and CUR 1185 has not been updated for the macro name for in-file defines. */ 1186 pfile->out.cur = pfile->out.base; 1187 CUR (context) = pfile->buffer->cur; 1188 RLIMIT (context) = pfile->buffer->rlimit; 1189 check_output_buffer (pfile, RLIMIT (context) - CUR (context)); 1190 1191 /* Is this a function-like macro? */ 1192 if (* CUR (context) == '(') 1193 { 1194 fun_like = +1; 1195 if (scan_parameters (pfile, &nparms)) 1196 params = (cpp_hashnode **)_cpp_commit_buff 1197 (pfile, sizeof (cpp_hashnode *) * nparms); 1198 else 1199 fun_like = -1; 1200 } 1201 1202 cpp_macro *macro = NULL; 1203 1204 if (fun_like >= 0) 1205 { 1206 macro = _cpp_new_macro (pfile, cmk_traditional, 1207 _cpp_aligned_alloc (pfile, sizeof (cpp_macro))); 1208 macro->parm.params = params; 1209 macro->paramc = nparms; 1210 macro->fun_like = fun_like != 0; 1211 } 1212 1213 /* Skip leading whitespace in the replacement text. */ 1214 pfile->buffer->cur 1215 = skip_whitespace (pfile, CUR (context), 1216 CPP_OPTION (pfile, discard_comments_in_macro_exp)); 1217 1218 pfile->state.prevent_expansion++; 1219 _cpp_scan_out_logical_line (pfile, macro, false); 1220 pfile->state.prevent_expansion--; 1221 1222 _cpp_unsave_parameters (pfile, nparms); 1223 1224 if (macro) 1225 { 1226 /* Skip trailing white space. */ 1227 cur = pfile->out.base; 1228 limit = pfile->out.cur; 1229 while (limit > cur && is_space (limit[-1])) 1230 limit--; 1231 pfile->out.cur = limit; 1232 save_replacement_text (pfile, macro, 0); 1233 } 1234 1235 return macro; 1236 } 1237 1238 /* Copy SRC of length LEN to DEST, but convert all contiguous 1239 whitespace to a single space, provided it is not in quotes. The 1240 quote currently in effect is pointed to by PQUOTE, and is updated 1241 by the function. Returns the number of bytes copied. */ 1242 static size_t 1243 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote) 1244 { 1245 uchar *orig_dest = dest; 1246 uchar quote = *pquote; 1247 1248 while (len) 1249 { 1250 if (is_space (*src) && !quote) 1251 { 1252 do 1253 src++, len--; 1254 while (len && is_space (*src)); 1255 *dest++ = ' '; 1256 } 1257 else 1258 { 1259 if (*src == '\'' || *src == '"') 1260 { 1261 if (!quote) 1262 quote = *src; 1263 else if (quote == *src) 1264 quote = 0; 1265 } 1266 *dest++ = *src++, len--; 1267 } 1268 } 1269 1270 *pquote = quote; 1271 return dest - orig_dest; 1272 } 1273 1274 /* Returns true if MACRO1 and MACRO2 have expansions different other 1275 than in the form of their whitespace. */ 1276 bool 1277 _cpp_expansions_different_trad (const cpp_macro *macro1, 1278 const cpp_macro *macro2) 1279 { 1280 uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count); 1281 uchar *p2 = p1 + macro1->count; 1282 uchar quote1 = 0, quote2 = 0; 1283 bool mismatch; 1284 size_t len1, len2; 1285 1286 if (macro1->paramc > 0) 1287 { 1288 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text; 1289 1290 mismatch = true; 1291 for (;;) 1292 { 1293 struct block *b1 = (struct block *) exp1; 1294 struct block *b2 = (struct block *) exp2; 1295 1296 if (b1->arg_index != b2->arg_index) 1297 break; 1298 1299 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1); 1300 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2); 1301 if (len1 != len2 || memcmp (p1, p2, len1)) 1302 break; 1303 if (b1->arg_index == 0) 1304 { 1305 mismatch = false; 1306 break; 1307 } 1308 exp1 += BLOCK_LEN (b1->text_len); 1309 exp2 += BLOCK_LEN (b2->text_len); 1310 } 1311 } 1312 else 1313 { 1314 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1); 1315 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2); 1316 mismatch = (len1 != len2 || memcmp (p1, p2, len1)); 1317 } 1318 1319 free (p1); 1320 return mismatch; 1321 } 1322