1 //===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the implementation for minimizing header and source files to the 11 /// minimum necessary preprocessor directives for evaluating includes. It 12 /// reduces the source down to #define, #include, #import, @import, and any 13 /// conditional preprocessor logic that contains one of those. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "clang/Basic/Diagnostic.h" 20 #include "clang/Lex/LexDiagnostic.h" 21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 24 using namespace llvm; 25 using namespace clang; 26 using namespace clang::minimize_source_to_dependency_directives; 27 28 namespace { 29 30 struct Minimizer { 31 /// Minimized output. 32 SmallVectorImpl<char> &Out; 33 /// The known tokens encountered during the minimization. 34 SmallVectorImpl<Token> &Tokens; 35 36 Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens, 37 StringRef Input, DiagnosticsEngine *Diags, 38 SourceLocation InputSourceLoc) 39 : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), 40 InputSourceLoc(InputSourceLoc) {} 41 42 /// Lex the provided source and emit the minimized output. 43 /// 44 /// \returns True on error. 45 bool minimize(); 46 47 private: 48 struct IdInfo { 49 const char *Last; 50 StringRef Name; 51 }; 52 53 /// Lex an identifier. 54 /// 55 /// \pre First points at a valid identifier head. 56 LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); 57 LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, 58 const char *const End); 59 LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); 60 LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); 61 LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); 62 LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); 63 LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); 64 LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); 65 LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); 66 LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, 67 const char *&First, const char *const End); 68 Token &makeToken(TokenKind K) { 69 Tokens.emplace_back(K, Out.size()); 70 return Tokens.back(); 71 } 72 void popToken() { 73 Out.resize(Tokens.back().Offset); 74 Tokens.pop_back(); 75 } 76 TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } 77 78 Minimizer &put(char Byte) { 79 Out.push_back(Byte); 80 return *this; 81 } 82 Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } 83 Minimizer &append(const char *First, const char *Last) { 84 Out.append(First, Last); 85 return *this; 86 } 87 88 void printToNewline(const char *&First, const char *const End); 89 void printAdjacentModuleNameParts(const char *&First, const char *const End); 90 LLVM_NODISCARD bool printAtImportBody(const char *&First, 91 const char *const End); 92 void printDirectiveBody(const char *&First, const char *const End); 93 void printAdjacentMacroArgs(const char *&First, const char *const End); 94 LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); 95 96 /// Reports a diagnostic if the diagnostic engine is provided. Always returns 97 /// true at the end. 98 bool reportError(const char *CurPtr, unsigned Err); 99 100 StringMap<char> SplitIds; 101 StringRef Input; 102 DiagnosticsEngine *Diags; 103 SourceLocation InputSourceLoc; 104 }; 105 106 } // end anonymous namespace 107 108 bool Minimizer::reportError(const char *CurPtr, unsigned Err) { 109 if (!Diags) 110 return true; 111 assert(CurPtr >= Input.data() && "invalid buffer ptr"); 112 Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); 113 return true; 114 } 115 116 static void skipOverSpaces(const char *&First, const char *const End) { 117 while (First != End && isHorizontalWhitespace(*First)) 118 ++First; 119 } 120 121 LLVM_NODISCARD static bool isRawStringLiteral(const char *First, 122 const char *Current) { 123 assert(First <= Current); 124 125 // Check if we can even back up. 126 if (*Current != '"' || First == Current) 127 return false; 128 129 // Check for an "R". 130 --Current; 131 if (*Current != 'R') 132 return false; 133 if (First == Current || !isIdentifierBody(*--Current)) 134 return true; 135 136 // Check for a prefix of "u", "U", or "L". 137 if (*Current == 'u' || *Current == 'U' || *Current == 'L') 138 return First == Current || !isIdentifierBody(*--Current); 139 140 // Check for a prefix of "u8". 141 if (*Current != '8' || First == Current || *Current-- != 'u') 142 return false; 143 return First == Current || !isIdentifierBody(*--Current); 144 } 145 146 static void skipRawString(const char *&First, const char *const End) { 147 assert(First[0] == '"'); 148 assert(First[-1] == 'R'); 149 150 const char *Last = ++First; 151 while (Last != End && *Last != '(') 152 ++Last; 153 if (Last == End) { 154 First = Last; // Hit the end... just give up. 155 return; 156 } 157 158 StringRef Terminator(First, Last - First); 159 for (;;) { 160 // Move First to just past the next ")". 161 First = Last; 162 while (First != End && *First != ')') 163 ++First; 164 if (First == End) 165 return; 166 ++First; 167 168 // Look ahead for the terminator sequence. 169 Last = First; 170 while (Last != End && size_t(Last - First) < Terminator.size() && 171 Terminator[Last - First] == *Last) 172 ++Last; 173 174 // Check if we hit it (or the end of the file). 175 if (Last == End) { 176 First = Last; 177 return; 178 } 179 if (size_t(Last - First) < Terminator.size()) 180 continue; 181 if (*Last != '"') 182 continue; 183 First = Last + 1; 184 return; 185 } 186 } 187 188 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 189 static unsigned isEOL(const char *First, const char *const End) { 190 if (First == End) 191 return 0; 192 if (End - First > 1 && isVerticalWhitespace(First[0]) && 193 isVerticalWhitespace(First[1]) && First[0] != First[1]) 194 return 2; 195 return !!isVerticalWhitespace(First[0]); 196 } 197 198 static void skipString(const char *&First, const char *const End) { 199 assert(*First == '\'' || *First == '"' || *First == '<'); 200 const char Terminator = *First == '<' ? '>' : *First; 201 for (++First; First != End && *First != Terminator; ++First) { 202 // String and character literals don't extend past the end of the line. 203 if (isVerticalWhitespace(*First)) 204 return; 205 if (*First != '\\') 206 continue; 207 // Skip past backslash to the next character. This ensures that the 208 // character right after it is skipped as well, which matters if it's 209 // the terminator. 210 if (++First == End) 211 return; 212 if (!isWhitespace(*First)) 213 continue; 214 // Whitespace after the backslash might indicate a line continuation. 215 const char *FirstAfterBackslashPastSpace = First; 216 skipOverSpaces(FirstAfterBackslashPastSpace, End); 217 if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 218 // Advance the character pointer to the next line for the next 219 // iteration. 220 First = FirstAfterBackslashPastSpace + NLSize - 1; 221 } 222 } 223 if (First != End) 224 ++First; // Finish off the string. 225 } 226 227 // Returns the length of the skipped newline 228 static unsigned skipNewline(const char *&First, const char *End) { 229 if (First == End) 230 return 0; 231 assert(isVerticalWhitespace(*First)); 232 unsigned Len = isEOL(First, End); 233 assert(Len && "expected newline"); 234 First += Len; 235 return Len; 236 } 237 238 static bool wasLineContinuation(const char *First, unsigned EOLLen) { 239 return *(First - (int)EOLLen - 1) == '\\'; 240 } 241 242 static void skipToNewlineRaw(const char *&First, const char *const End) { 243 for (;;) { 244 if (First == End) 245 return; 246 247 unsigned Len = isEOL(First, End); 248 if (Len) 249 return; 250 251 do { 252 if (++First == End) 253 return; 254 Len = isEOL(First, End); 255 } while (!Len); 256 257 if (First[-1] != '\\') 258 return; 259 260 First += Len; 261 // Keep skipping lines... 262 } 263 } 264 265 static const char *findLastNonSpace(const char *First, const char *Last) { 266 assert(First <= Last); 267 while (First != Last && isHorizontalWhitespace(Last[-1])) 268 --Last; 269 return Last; 270 } 271 272 static const char *findFirstTrailingSpace(const char *First, 273 const char *Last) { 274 const char *LastNonSpace = findLastNonSpace(First, Last); 275 if (Last == LastNonSpace) 276 return Last; 277 assert(isHorizontalWhitespace(LastNonSpace[0])); 278 return LastNonSpace + 1; 279 } 280 281 static void skipLineComment(const char *&First, const char *const End) { 282 assert(First[0] == '/' && First[1] == '/'); 283 First += 2; 284 skipToNewlineRaw(First, End); 285 } 286 287 static void skipBlockComment(const char *&First, const char *const End) { 288 assert(First[0] == '/' && First[1] == '*'); 289 if (End - First < 4) { 290 First = End; 291 return; 292 } 293 for (First += 3; First != End; ++First) 294 if (First[-1] == '*' && First[0] == '/') { 295 ++First; 296 return; 297 } 298 } 299 300 /// \returns True if the current single quotation mark character is a C++ 14 301 /// digit separator. 302 static bool isQuoteCppDigitSeparator(const char *const Start, 303 const char *const Cur, 304 const char *const End) { 305 assert(*Cur == '\'' && "expected quotation character"); 306 // skipLine called in places where we don't expect a valid number 307 // body before `start` on the same line, so always return false at the start. 308 if (Start == Cur) 309 return false; 310 // The previous character must be a valid PP number character. 311 // Make sure that the L, u, U, u8 prefixes don't get marked as a 312 // separator though. 313 char Prev = *(Cur - 1); 314 if (Prev == 'L' || Prev == 'U' || Prev == 'u') 315 return false; 316 if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 317 return false; 318 if (!isPreprocessingNumberBody(Prev)) 319 return false; 320 // The next character should be a valid identifier body character. 321 return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); 322 } 323 324 static void skipLine(const char *&First, const char *const End) { 325 for (;;) { 326 assert(First <= End); 327 if (First == End) 328 return; 329 330 if (isVerticalWhitespace(*First)) { 331 skipNewline(First, End); 332 return; 333 } 334 const char *Start = First; 335 while (First != End && !isVerticalWhitespace(*First)) { 336 // Iterate over strings correctly to avoid comments and newlines. 337 if (*First == '"' || 338 (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 339 if (isRawStringLiteral(Start, First)) 340 skipRawString(First, End); 341 else 342 skipString(First, End); 343 continue; 344 } 345 346 // Iterate over comments correctly. 347 if (*First != '/' || End - First < 2) { 348 ++First; 349 continue; 350 } 351 352 if (First[1] == '/') { 353 // "//...". 354 skipLineComment(First, End); 355 continue; 356 } 357 358 if (First[1] != '*') { 359 ++First; 360 continue; 361 } 362 363 // "/*...*/". 364 skipBlockComment(First, End); 365 } 366 if (First == End) 367 return; 368 369 // Skip over the newline. 370 unsigned Len = skipNewline(First, End); 371 if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 372 break; 373 } 374 } 375 376 static void skipDirective(StringRef Name, const char *&First, 377 const char *const End) { 378 if (llvm::StringSwitch<bool>(Name) 379 .Case("warning", true) 380 .Case("error", true) 381 .Default(false)) 382 // Do not process quotes or comments. 383 skipToNewlineRaw(First, End); 384 else 385 skipLine(First, End); 386 } 387 388 void Minimizer::printToNewline(const char *&First, const char *const End) { 389 while (First != End && !isVerticalWhitespace(*First)) { 390 const char *Last = First; 391 do { 392 // Iterate over strings correctly to avoid comments and newlines. 393 if (*Last == '"' || *Last == '\'' || 394 (*Last == '<' && top() == pp_include)) { 395 if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) 396 skipRawString(Last, End); 397 else 398 skipString(Last, End); 399 continue; 400 } 401 if (*Last != '/' || End - Last < 2) { 402 ++Last; 403 continue; // Gather the rest up to print verbatim. 404 } 405 406 if (Last[1] != '/' && Last[1] != '*') { 407 ++Last; 408 continue; 409 } 410 411 // Deal with "//..." and "/*...*/". 412 append(First, findFirstTrailingSpace(First, Last)); 413 First = Last; 414 415 if (Last[1] == '/') { 416 skipLineComment(First, End); 417 return; 418 } 419 420 put(' '); 421 skipBlockComment(First, End); 422 skipOverSpaces(First, End); 423 Last = First; 424 } while (Last != End && !isVerticalWhitespace(*Last)); 425 426 // Print out the string. 427 const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); 428 if (Last == End || LastBeforeTrailingSpace == First || 429 LastBeforeTrailingSpace[-1] != '\\') { 430 append(First, LastBeforeTrailingSpace); 431 First = Last; 432 skipNewline(First, End); 433 return; 434 } 435 436 // Print up to the backslash, backing up over spaces. Preserve at least one 437 // space, as the space matters when tokens are separated by a line 438 // continuation. 439 append(First, findFirstTrailingSpace( 440 First, LastBeforeTrailingSpace - 1)); 441 442 First = Last; 443 skipNewline(First, End); 444 skipOverSpaces(First, End); 445 } 446 } 447 448 static void skipWhitespace(const char *&First, const char *const End) { 449 for (;;) { 450 assert(First <= End); 451 skipOverSpaces(First, End); 452 453 if (End - First < 2) 454 return; 455 456 if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 457 skipNewline(++First, End); 458 continue; 459 } 460 461 // Check for a non-comment character. 462 if (First[0] != '/') 463 return; 464 465 // "// ...". 466 if (First[1] == '/') { 467 skipLineComment(First, End); 468 return; 469 } 470 471 // Cannot be a comment. 472 if (First[1] != '*') 473 return; 474 475 // "/*...*/". 476 skipBlockComment(First, End); 477 } 478 } 479 480 void Minimizer::printAdjacentModuleNameParts(const char *&First, 481 const char *const End) { 482 // Skip over parts of the body. 483 const char *Last = First; 484 do 485 ++Last; 486 while (Last != End && (isIdentifierBody(*Last) || *Last == '.')); 487 append(First, Last); 488 First = Last; 489 } 490 491 bool Minimizer::printAtImportBody(const char *&First, const char *const End) { 492 for (;;) { 493 skipWhitespace(First, End); 494 if (First == End) 495 return true; 496 497 if (isVerticalWhitespace(*First)) { 498 skipNewline(First, End); 499 continue; 500 } 501 502 // Found a semicolon. 503 if (*First == ';') { 504 put(*First++).put('\n'); 505 return false; 506 } 507 508 // Don't handle macro expansions inside @import for now. 509 if (!isIdentifierBody(*First) && *First != '.') 510 return true; 511 512 printAdjacentModuleNameParts(First, End); 513 } 514 } 515 516 void Minimizer::printDirectiveBody(const char *&First, const char *const End) { 517 skipWhitespace(First, End); // Skip initial whitespace. 518 printToNewline(First, End); 519 while (Out.back() == ' ') 520 Out.pop_back(); 521 put('\n'); 522 } 523 524 LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, 525 const char *const End) { 526 assert(isIdentifierBody(*First) && "invalid identifer"); 527 const char *Last = First + 1; 528 while (Last != End && isIdentifierBody(*Last)) 529 ++Last; 530 return Last; 531 } 532 533 LLVM_NODISCARD static const char * 534 getIdentifierContinuation(const char *First, const char *const End) { 535 if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) 536 return nullptr; 537 538 ++First; 539 skipNewline(First, End); 540 if (First == End) 541 return nullptr; 542 return isIdentifierBody(First[0]) ? First : nullptr; 543 } 544 545 Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, 546 const char *const End) { 547 const char *Last = lexRawIdentifier(First, End); 548 const char *Next = getIdentifierContinuation(Last, End); 549 if (LLVM_LIKELY(!Next)) 550 return IdInfo{Last, StringRef(First, Last - First)}; 551 552 // Slow path, where identifiers are split over lines. 553 SmallVector<char, 64> Id(First, Last); 554 while (Next) { 555 Last = lexRawIdentifier(Next, End); 556 Id.append(Next, Last); 557 Next = getIdentifierContinuation(Last, End); 558 } 559 return IdInfo{ 560 Last, 561 SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; 562 } 563 564 void Minimizer::printAdjacentMacroArgs(const char *&First, 565 const char *const End) { 566 // Skip over parts of the body. 567 const char *Last = First; 568 do 569 ++Last; 570 while (Last != End && 571 (isIdentifierBody(*Last) || *Last == '.' || *Last == ',')); 572 append(First, Last); 573 First = Last; 574 } 575 576 bool Minimizer::printMacroArgs(const char *&First, const char *const End) { 577 assert(*First == '('); 578 put(*First++); 579 for (;;) { 580 skipWhitespace(First, End); 581 if (First == End) 582 return true; 583 584 if (*First == ')') { 585 put(*First++); 586 return false; 587 } 588 589 // This is intentionally fairly liberal. 590 if (!(isIdentifierBody(*First) || *First == '.' || *First == ',')) 591 return true; 592 593 printAdjacentMacroArgs(First, End); 594 } 595 } 596 597 /// Looks for an identifier starting from Last. 598 /// 599 /// Updates "First" to just past the next identifier, if any. Returns true iff 600 /// the identifier matches "Id". 601 bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, 602 const char *const End) { 603 skipWhitespace(First, End); 604 if (First == End || !isIdentifierHead(*First)) 605 return false; 606 607 IdInfo FoundId = lexIdentifier(First, End); 608 First = FoundId.Last; 609 return FoundId.Name == Id; 610 } 611 612 bool Minimizer::lexAt(const char *&First, const char *const End) { 613 // Handle "@import". 614 const char *ImportLoc = First++; 615 if (!isNextIdentifier("import", First, End)) { 616 skipLine(First, End); 617 return false; 618 } 619 makeToken(decl_at_import); 620 append("@import "); 621 if (printAtImportBody(First, End)) 622 return reportError( 623 ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); 624 skipWhitespace(First, End); 625 if (First == End) 626 return false; 627 if (!isVerticalWhitespace(*First)) 628 return reportError( 629 ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); 630 skipNewline(First, End); 631 return false; 632 } 633 634 bool Minimizer::lexModule(const char *&First, const char *const End) { 635 IdInfo Id = lexIdentifier(First, End); 636 First = Id.Last; 637 bool Export = false; 638 if (Id.Name == "export") { 639 Export = true; 640 skipWhitespace(First, End); 641 if (!isIdentifierBody(*First)) { 642 skipLine(First, End); 643 return false; 644 } 645 Id = lexIdentifier(First, End); 646 First = Id.Last; 647 } 648 649 if (Id.Name != "module" && Id.Name != "import") { 650 skipLine(First, End); 651 return false; 652 } 653 654 skipWhitespace(First, End); 655 656 // Ignore this as a module directive if the next character can't be part of 657 // an import. 658 659 switch (*First) { 660 case ':': 661 case '<': 662 case '"': 663 break; 664 default: 665 if (!isIdentifierBody(*First)) { 666 skipLine(First, End); 667 return false; 668 } 669 } 670 671 if (Export) { 672 makeToken(cxx_export_decl); 673 append("export "); 674 } 675 676 if (Id.Name == "module") 677 makeToken(cxx_module_decl); 678 else 679 makeToken(cxx_import_decl); 680 append(Id.Name); 681 append(" "); 682 printToNewline(First, End); 683 append("\n"); 684 return false; 685 } 686 687 bool Minimizer::lexDefine(const char *&First, const char *const End) { 688 makeToken(pp_define); 689 append("#define "); 690 skipWhitespace(First, End); 691 692 if (!isIdentifierHead(*First)) 693 return reportError(First, diag::err_pp_macro_not_identifier); 694 695 IdInfo Id = lexIdentifier(First, End); 696 const char *Last = Id.Last; 697 append(Id.Name); 698 if (Last == End) 699 return false; 700 if (*Last == '(') { 701 size_t Size = Out.size(); 702 if (printMacroArgs(Last, End)) { 703 // Be robust to bad macro arguments, since they can show up in disabled 704 // code. 705 Out.resize(Size); 706 append("(/* invalid */\n"); 707 skipLine(Last, End); 708 return false; 709 } 710 } 711 skipWhitespace(Last, End); 712 if (Last == End) 713 return false; 714 if (!isVerticalWhitespace(*Last)) 715 put(' '); 716 printDirectiveBody(Last, End); 717 First = Last; 718 return false; 719 } 720 721 bool Minimizer::lexPragma(const char *&First, const char *const End) { 722 // #pragma. 723 skipWhitespace(First, End); 724 if (First == End || !isIdentifierHead(*First)) 725 return false; 726 727 IdInfo FoundId = lexIdentifier(First, End); 728 First = FoundId.Last; 729 if (FoundId.Name == "once") { 730 // #pragma once 731 skipLine(First, End); 732 makeToken(pp_pragma_once); 733 append("#pragma once\n"); 734 return false; 735 } 736 737 if (FoundId.Name != "clang") { 738 skipLine(First, End); 739 return false; 740 } 741 742 // #pragma clang. 743 if (!isNextIdentifier("module", First, End)) { 744 skipLine(First, End); 745 return false; 746 } 747 748 // #pragma clang module. 749 if (!isNextIdentifier("import", First, End)) { 750 skipLine(First, End); 751 return false; 752 } 753 754 // #pragma clang module import. 755 makeToken(pp_pragma_import); 756 append("#pragma clang module import "); 757 printDirectiveBody(First, End); 758 return false; 759 } 760 761 bool Minimizer::lexEndif(const char *&First, const char *const End) { 762 // Strip out "#else" if it's empty. 763 if (top() == pp_else) 764 popToken(); 765 766 // Strip out "#elif" if they're empty. 767 while (top() == pp_elif) 768 popToken(); 769 770 // If "#if" is empty, strip it and skip the "#endif". 771 if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) { 772 popToken(); 773 skipLine(First, End); 774 return false; 775 } 776 777 return lexDefault(pp_endif, "endif", First, End); 778 } 779 780 bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, 781 const char *&First, const char *const End) { 782 makeToken(Kind); 783 put('#').append(Directive).put(' '); 784 printDirectiveBody(First, End); 785 return false; 786 } 787 788 static bool isStartOfRelevantLine(char First) { 789 switch (First) { 790 case '#': 791 case '@': 792 case 'i': 793 case 'e': 794 case 'm': 795 return true; 796 } 797 return false; 798 } 799 800 bool Minimizer::lexPPLine(const char *&First, const char *const End) { 801 assert(First != End); 802 803 skipWhitespace(First, End); 804 assert(First <= End); 805 if (First == End) 806 return false; 807 808 if (!isStartOfRelevantLine(*First)) { 809 skipLine(First, End); 810 assert(First <= End); 811 return false; 812 } 813 814 // Handle "@import". 815 if (*First == '@') 816 return lexAt(First, End); 817 818 if (*First == 'i' || *First == 'e' || *First == 'm') 819 return lexModule(First, End); 820 821 // Handle preprocessing directives. 822 ++First; // Skip over '#'. 823 skipWhitespace(First, End); 824 825 if (First == End) 826 return reportError(First, diag::err_pp_expected_eol); 827 828 if (!isIdentifierHead(*First)) { 829 skipLine(First, End); 830 return false; 831 } 832 833 // Figure out the token. 834 IdInfo Id = lexIdentifier(First, End); 835 First = Id.Last; 836 auto Kind = llvm::StringSwitch<TokenKind>(Id.Name) 837 .Case("include", pp_include) 838 .Case("__include_macros", pp___include_macros) 839 .Case("define", pp_define) 840 .Case("undef", pp_undef) 841 .Case("import", pp_import) 842 .Case("include_next", pp_include_next) 843 .Case("if", pp_if) 844 .Case("ifdef", pp_ifdef) 845 .Case("ifndef", pp_ifndef) 846 .Case("elif", pp_elif) 847 .Case("else", pp_else) 848 .Case("endif", pp_endif) 849 .Case("pragma", pp_pragma_import) 850 .Default(pp_none); 851 if (Kind == pp_none) { 852 skipDirective(Id.Name, First, End); 853 return false; 854 } 855 856 if (Kind == pp_endif) 857 return lexEndif(First, End); 858 859 if (Kind == pp_define) 860 return lexDefine(First, End); 861 862 if (Kind == pp_pragma_import) 863 return lexPragma(First, End); 864 865 // Everything else. 866 return lexDefault(Kind, Id.Name, First, End); 867 } 868 869 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 870 if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 871 First[2] == '\xbf') 872 First += 3; 873 } 874 875 bool Minimizer::minimizeImpl(const char *First, const char *const End) { 876 skipUTF8ByteOrderMark(First, End); 877 while (First != End) 878 if (lexPPLine(First, End)) 879 return true; 880 return false; 881 } 882 883 bool Minimizer::minimize() { 884 bool Error = minimizeImpl(Input.begin(), Input.end()); 885 886 if (!Error) { 887 // Add a trailing newline and an EOF on success. 888 if (!Out.empty() && Out.back() != '\n') 889 Out.push_back('\n'); 890 makeToken(pp_eof); 891 } 892 893 // Null-terminate the output. This way the memory buffer that's passed to 894 // Clang will not have to worry about the terminating '\0'. 895 Out.push_back(0); 896 Out.pop_back(); 897 return Error; 898 } 899 900 bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( 901 ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) { 902 struct Directive { 903 enum DirectiveKind { 904 If, // if/ifdef/ifndef 905 Else // elif,else 906 }; 907 int Offset; 908 DirectiveKind Kind; 909 }; 910 llvm::SmallVector<Directive, 32> Offsets; 911 for (const Token &T : Input) { 912 switch (T.K) { 913 case pp_if: 914 case pp_ifdef: 915 case pp_ifndef: 916 Offsets.push_back({T.Offset, Directive::If}); 917 break; 918 919 case pp_elif: 920 case pp_else: { 921 if (Offsets.empty()) 922 return true; 923 int PreviousOffset = Offsets.back().Offset; 924 Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); 925 Offsets.push_back({T.Offset, Directive::Else}); 926 break; 927 } 928 929 case pp_endif: { 930 if (Offsets.empty()) 931 return true; 932 int PreviousOffset = Offsets.back().Offset; 933 Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); 934 do { 935 Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; 936 if (Kind == Directive::If) 937 break; 938 } while (!Offsets.empty()); 939 break; 940 } 941 default: 942 break; 943 } 944 } 945 return false; 946 } 947 948 bool clang::minimizeSourceToDependencyDirectives( 949 StringRef Input, SmallVectorImpl<char> &Output, 950 SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags, 951 SourceLocation InputSourceLoc) { 952 Output.clear(); 953 Tokens.clear(); 954 return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); 955 } 956