1 //===- DependencyDirectivesScanner.cpp ------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the interface for scanning header and source files to get the 11 /// minimum necessary preprocessor directives for evaluating includes. It 12 /// reduces the source down to #define, #include, #import, @import, and any 13 /// conditional preprocessor logic that contains one of those. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "clang/Lex/DependencyDirectivesScanner.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "clang/Basic/Diagnostic.h" 20 #include "clang/Lex/LexDiagnostic.h" 21 #include "clang/Lex/Lexer.h" 22 #include "llvm/ADT/ScopeExit.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringMap.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include <optional> 27 28 using namespace clang; 29 using namespace clang::dependency_directives_scan; 30 using namespace llvm; 31 32 namespace { 33 34 struct DirectiveWithTokens { 35 DirectiveKind Kind; 36 unsigned NumTokens; 37 38 DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) 39 : Kind(Kind), NumTokens(NumTokens) {} 40 }; 41 42 /// Does an efficient "scan" of the sources to detect the presence of 43 /// preprocessor (or module import) directives and collects the raw lexed tokens 44 /// for those directives so that the \p Lexer can "replay" them when the file is 45 /// included. 46 /// 47 /// Note that the behavior of the raw lexer is affected by the language mode, 48 /// while at this point we want to do a scan and collect tokens once, 49 /// irrespective of the language mode that the file will get included in. To 50 /// compensate for that the \p Lexer, while "replaying", will adjust a token 51 /// where appropriate, when it could affect the preprocessor's state. 52 /// For example in a directive like 53 /// 54 /// \code 55 /// #if __has_cpp_attribute(clang::fallthrough) 56 /// \endcode 57 /// 58 /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 59 /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' 60 /// while in C++ mode. 61 struct Scanner { 62 Scanner(StringRef Input, 63 SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 64 DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) 65 : Input(Input), Tokens(Tokens), Diags(Diags), 66 InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), 67 TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), 68 Input.end()) {} 69 70 static LangOptions getLangOptsForDepScanning() { 71 LangOptions LangOpts; 72 // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. 73 LangOpts.ObjC = true; 74 LangOpts.LineComment = true; 75 return LangOpts; 76 } 77 78 /// Lex the provided source and emit the directive tokens. 79 /// 80 /// \returns True on error. 81 bool scan(SmallVectorImpl<Directive> &Directives); 82 83 private: 84 /// Lexes next token and advances \p First and the \p Lexer. 85 [[nodiscard]] dependency_directives_scan::Token & 86 lexToken(const char *&First, const char *const End); 87 88 dependency_directives_scan::Token &lexIncludeFilename(const char *&First, 89 const char *const End); 90 91 void skipLine(const char *&First, const char *const End); 92 void skipDirective(StringRef Name, const char *&First, const char *const End); 93 94 /// Lexes next token and if it is identifier returns its string, otherwise 95 /// it skips the current line and returns \p std::nullopt. 96 /// 97 /// In any case (whatever the token kind) \p First and the \p Lexer will 98 /// advance beyond the token. 99 [[nodiscard]] Optional<StringRef> 100 tryLexIdentifierOrSkipLine(const char *&First, const char *const End); 101 102 /// Used when it is certain that next token is an identifier. 103 [[nodiscard]] StringRef lexIdentifier(const char *&First, 104 const char *const End); 105 106 /// Lexes next token and returns true iff it is an identifier that matches \p 107 /// Id, otherwise it skips the current line and returns false. 108 /// 109 /// In any case (whatever the token kind) \p First and the \p Lexer will 110 /// advance beyond the token. 111 [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id, 112 const char *&First, 113 const char *const End); 114 115 [[nodiscard]] bool scanImpl(const char *First, const char *const End); 116 [[nodiscard]] bool lexPPLine(const char *&First, const char *const End); 117 [[nodiscard]] bool lexAt(const char *&First, const char *const End); 118 [[nodiscard]] bool lexModule(const char *&First, const char *const End); 119 [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First, 120 const char *const End); 121 [[nodiscard]] bool lexPragma(const char *&First, const char *const End); 122 [[nodiscard]] bool lexEndif(const char *&First, const char *const End); 123 [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First, 124 const char *const End); 125 [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind, 126 const char *&First, 127 const char *const End); 128 void lexPPDirectiveBody(const char *&First, const char *const End); 129 130 DirectiveWithTokens &pushDirective(DirectiveKind Kind) { 131 Tokens.append(CurDirToks); 132 DirsWithToks.emplace_back(Kind, CurDirToks.size()); 133 CurDirToks.clear(); 134 return DirsWithToks.back(); 135 } 136 void popDirective() { 137 Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); 138 } 139 DirectiveKind topDirective() const { 140 return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; 141 } 142 143 unsigned getOffsetAt(const char *CurPtr) const { 144 return CurPtr - Input.data(); 145 } 146 147 /// Reports a diagnostic if the diagnostic engine is provided. Always returns 148 /// true at the end. 149 bool reportError(const char *CurPtr, unsigned Err); 150 151 StringMap<char> SplitIds; 152 StringRef Input; 153 SmallVectorImpl<dependency_directives_scan::Token> &Tokens; 154 DiagnosticsEngine *Diags; 155 SourceLocation InputSourceLoc; 156 157 const char *LastTokenPtr = nullptr; 158 /// Keeps track of the tokens for the currently lexed directive. Once a 159 /// directive is fully lexed and "committed" then the tokens get appended to 160 /// \p Tokens and \p CurDirToks is cleared for the next directive. 161 SmallVector<dependency_directives_scan::Token, 32> CurDirToks; 162 /// The directives that were lexed along with the number of tokens that each 163 /// directive contains. The tokens of all the directives are kept in \p Tokens 164 /// vector, in the same order as the directives order in \p DirsWithToks. 165 SmallVector<DirectiveWithTokens, 64> DirsWithToks; 166 LangOptions LangOpts; 167 Lexer TheLexer; 168 }; 169 170 } // end anonymous namespace 171 172 bool Scanner::reportError(const char *CurPtr, unsigned Err) { 173 if (!Diags) 174 return true; 175 assert(CurPtr >= Input.data() && "invalid buffer ptr"); 176 Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); 177 return true; 178 } 179 180 static void skipOverSpaces(const char *&First, const char *const End) { 181 while (First != End && isHorizontalWhitespace(*First)) 182 ++First; 183 } 184 185 [[nodiscard]] static bool isRawStringLiteral(const char *First, 186 const char *Current) { 187 assert(First <= Current); 188 189 // Check if we can even back up. 190 if (*Current != '"' || First == Current) 191 return false; 192 193 // Check for an "R". 194 --Current; 195 if (*Current != 'R') 196 return false; 197 if (First == Current || !isAsciiIdentifierContinue(*--Current)) 198 return true; 199 200 // Check for a prefix of "u", "U", or "L". 201 if (*Current == 'u' || *Current == 'U' || *Current == 'L') 202 return First == Current || !isAsciiIdentifierContinue(*--Current); 203 204 // Check for a prefix of "u8". 205 if (*Current != '8' || First == Current || *Current-- != 'u') 206 return false; 207 return First == Current || !isAsciiIdentifierContinue(*--Current); 208 } 209 210 static void skipRawString(const char *&First, const char *const End) { 211 assert(First[0] == '"'); 212 assert(First[-1] == 'R'); 213 214 const char *Last = ++First; 215 while (Last != End && *Last != '(') 216 ++Last; 217 if (Last == End) { 218 First = Last; // Hit the end... just give up. 219 return; 220 } 221 222 StringRef Terminator(First, Last - First); 223 for (;;) { 224 // Move First to just past the next ")". 225 First = Last; 226 while (First != End && *First != ')') 227 ++First; 228 if (First == End) 229 return; 230 ++First; 231 232 // Look ahead for the terminator sequence. 233 Last = First; 234 while (Last != End && size_t(Last - First) < Terminator.size() && 235 Terminator[Last - First] == *Last) 236 ++Last; 237 238 // Check if we hit it (or the end of the file). 239 if (Last == End) { 240 First = Last; 241 return; 242 } 243 if (size_t(Last - First) < Terminator.size()) 244 continue; 245 if (*Last != '"') 246 continue; 247 First = Last + 1; 248 return; 249 } 250 } 251 252 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 253 static unsigned isEOL(const char *First, const char *const End) { 254 if (First == End) 255 return 0; 256 if (End - First > 1 && isVerticalWhitespace(First[0]) && 257 isVerticalWhitespace(First[1]) && First[0] != First[1]) 258 return 2; 259 return !!isVerticalWhitespace(First[0]); 260 } 261 262 static void skipString(const char *&First, const char *const End) { 263 assert(*First == '\'' || *First == '"' || *First == '<'); 264 const char Terminator = *First == '<' ? '>' : *First; 265 for (++First; First != End && *First != Terminator; ++First) { 266 // String and character literals don't extend past the end of the line. 267 if (isVerticalWhitespace(*First)) 268 return; 269 if (*First != '\\') 270 continue; 271 // Skip past backslash to the next character. This ensures that the 272 // character right after it is skipped as well, which matters if it's 273 // the terminator. 274 if (++First == End) 275 return; 276 if (!isWhitespace(*First)) 277 continue; 278 // Whitespace after the backslash might indicate a line continuation. 279 const char *FirstAfterBackslashPastSpace = First; 280 skipOverSpaces(FirstAfterBackslashPastSpace, End); 281 if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 282 // Advance the character pointer to the next line for the next 283 // iteration. 284 First = FirstAfterBackslashPastSpace + NLSize - 1; 285 } 286 } 287 if (First != End) 288 ++First; // Finish off the string. 289 } 290 291 // Returns the length of the skipped newline 292 static unsigned skipNewline(const char *&First, const char *End) { 293 if (First == End) 294 return 0; 295 assert(isVerticalWhitespace(*First)); 296 unsigned Len = isEOL(First, End); 297 assert(Len && "expected newline"); 298 First += Len; 299 return Len; 300 } 301 302 static bool wasLineContinuation(const char *First, unsigned EOLLen) { 303 return *(First - (int)EOLLen - 1) == '\\'; 304 } 305 306 static void skipToNewlineRaw(const char *&First, const char *const End) { 307 for (;;) { 308 if (First == End) 309 return; 310 311 unsigned Len = isEOL(First, End); 312 if (Len) 313 return; 314 315 do { 316 if (++First == End) 317 return; 318 Len = isEOL(First, End); 319 } while (!Len); 320 321 if (First[-1] != '\\') 322 return; 323 324 First += Len; 325 // Keep skipping lines... 326 } 327 } 328 329 static void skipLineComment(const char *&First, const char *const End) { 330 assert(First[0] == '/' && First[1] == '/'); 331 First += 2; 332 skipToNewlineRaw(First, End); 333 } 334 335 static void skipBlockComment(const char *&First, const char *const End) { 336 assert(First[0] == '/' && First[1] == '*'); 337 if (End - First < 4) { 338 First = End; 339 return; 340 } 341 for (First += 3; First != End; ++First) 342 if (First[-1] == '*' && First[0] == '/') { 343 ++First; 344 return; 345 } 346 } 347 348 /// \returns True if the current single quotation mark character is a C++ 14 349 /// digit separator. 350 static bool isQuoteCppDigitSeparator(const char *const Start, 351 const char *const Cur, 352 const char *const End) { 353 assert(*Cur == '\'' && "expected quotation character"); 354 // skipLine called in places where we don't expect a valid number 355 // body before `start` on the same line, so always return false at the start. 356 if (Start == Cur) 357 return false; 358 // The previous character must be a valid PP number character. 359 // Make sure that the L, u, U, u8 prefixes don't get marked as a 360 // separator though. 361 char Prev = *(Cur - 1); 362 if (Prev == 'L' || Prev == 'U' || Prev == 'u') 363 return false; 364 if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 365 return false; 366 if (!isPreprocessingNumberBody(Prev)) 367 return false; 368 // The next character should be a valid identifier body character. 369 return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); 370 } 371 372 void Scanner::skipLine(const char *&First, const char *const End) { 373 for (;;) { 374 assert(First <= End); 375 if (First == End) 376 return; 377 378 if (isVerticalWhitespace(*First)) { 379 skipNewline(First, End); 380 return; 381 } 382 const char *Start = First; 383 while (First != End && !isVerticalWhitespace(*First)) { 384 // Iterate over strings correctly to avoid comments and newlines. 385 if (*First == '"' || 386 (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 387 LastTokenPtr = First; 388 if (isRawStringLiteral(Start, First)) 389 skipRawString(First, End); 390 else 391 skipString(First, End); 392 continue; 393 } 394 395 // Iterate over comments correctly. 396 if (*First != '/' || End - First < 2) { 397 LastTokenPtr = First; 398 ++First; 399 continue; 400 } 401 402 if (First[1] == '/') { 403 // "//...". 404 skipLineComment(First, End); 405 continue; 406 } 407 408 if (First[1] != '*') { 409 LastTokenPtr = First; 410 ++First; 411 continue; 412 } 413 414 // "/*...*/". 415 skipBlockComment(First, End); 416 } 417 if (First == End) 418 return; 419 420 // Skip over the newline. 421 unsigned Len = skipNewline(First, End); 422 if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 423 break; 424 } 425 } 426 427 void Scanner::skipDirective(StringRef Name, const char *&First, 428 const char *const End) { 429 if (llvm::StringSwitch<bool>(Name) 430 .Case("warning", true) 431 .Case("error", true) 432 .Default(false)) 433 // Do not process quotes or comments. 434 skipToNewlineRaw(First, End); 435 else 436 skipLine(First, End); 437 } 438 439 static void skipWhitespace(const char *&First, const char *const End) { 440 for (;;) { 441 assert(First <= End); 442 skipOverSpaces(First, End); 443 444 if (End - First < 2) 445 return; 446 447 if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 448 skipNewline(++First, End); 449 continue; 450 } 451 452 // Check for a non-comment character. 453 if (First[0] != '/') 454 return; 455 456 // "// ...". 457 if (First[1] == '/') { 458 skipLineComment(First, End); 459 return; 460 } 461 462 // Cannot be a comment. 463 if (First[1] != '*') 464 return; 465 466 // "/*...*/". 467 skipBlockComment(First, End); 468 } 469 } 470 471 bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, 472 const char *const End) { 473 const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; 474 for (;;) { 475 const dependency_directives_scan::Token &Tok = lexToken(First, End); 476 if (Tok.is(tok::eof)) 477 return reportError( 478 DirectiveLoc, 479 diag::err_dep_source_scanner_missing_semi_after_at_import); 480 if (Tok.is(tok::semi)) 481 break; 482 } 483 pushDirective(Kind); 484 skipWhitespace(First, End); 485 if (First == End) 486 return false; 487 if (!isVerticalWhitespace(*First)) 488 return reportError( 489 DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); 490 skipNewline(First, End); 491 return false; 492 } 493 494 dependency_directives_scan::Token &Scanner::lexToken(const char *&First, 495 const char *const End) { 496 clang::Token Tok; 497 TheLexer.LexFromRawLexer(Tok); 498 First = Input.data() + TheLexer.getCurrentBufferOffset(); 499 assert(First <= End); 500 501 unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 502 CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 503 Tok.getFlags()); 504 return CurDirToks.back(); 505 } 506 507 dependency_directives_scan::Token & 508 Scanner::lexIncludeFilename(const char *&First, const char *const End) { 509 clang::Token Tok; 510 TheLexer.LexIncludeFilename(Tok); 511 First = Input.data() + TheLexer.getCurrentBufferOffset(); 512 assert(First <= End); 513 514 unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 515 CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 516 Tok.getFlags()); 517 return CurDirToks.back(); 518 } 519 520 void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { 521 while (true) { 522 const dependency_directives_scan::Token &Tok = lexToken(First, End); 523 if (Tok.is(tok::eod)) 524 break; 525 } 526 } 527 528 [[nodiscard]] Optional<StringRef> 529 Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { 530 const dependency_directives_scan::Token &Tok = lexToken(First, End); 531 if (Tok.isNot(tok::raw_identifier)) { 532 if (!Tok.is(tok::eod)) 533 skipLine(First, End); 534 return std::nullopt; 535 } 536 537 bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; 538 if (LLVM_LIKELY(!NeedsCleaning)) 539 return Input.slice(Tok.Offset, Tok.getEnd()); 540 541 SmallString<64> Spelling; 542 Spelling.resize(Tok.Length); 543 544 unsigned SpellingLength = 0; 545 const char *BufPtr = Input.begin() + Tok.Offset; 546 const char *AfterIdent = Input.begin() + Tok.getEnd(); 547 while (BufPtr < AfterIdent) { 548 unsigned Size; 549 Spelling[SpellingLength++] = 550 Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 551 BufPtr += Size; 552 } 553 554 return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) 555 .first->first(); 556 } 557 558 StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { 559 Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); 560 assert(Id && "expected identifier token"); 561 return *Id; 562 } 563 564 bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, 565 const char *const End) { 566 if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) { 567 if (*FoundId == Id) 568 return true; 569 skipLine(First, End); 570 } 571 return false; 572 } 573 574 bool Scanner::lexAt(const char *&First, const char *const End) { 575 // Handle "@import". 576 577 // Lex '@'. 578 const dependency_directives_scan::Token &AtTok = lexToken(First, End); 579 assert(AtTok.is(tok::at)); 580 (void)AtTok; 581 582 if (!isNextIdentifierOrSkipLine("import", First, End)) 583 return false; 584 return lexModuleDirectiveBody(decl_at_import, First, End); 585 } 586 587 bool Scanner::lexModule(const char *&First, const char *const End) { 588 StringRef Id = lexIdentifier(First, End); 589 bool Export = false; 590 if (Id == "export") { 591 Export = true; 592 Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); 593 if (!NextId) 594 return false; 595 Id = *NextId; 596 } 597 598 if (Id != "module" && Id != "import") { 599 skipLine(First, End); 600 return false; 601 } 602 603 skipWhitespace(First, End); 604 605 // Ignore this as a module directive if the next character can't be part of 606 // an import. 607 608 switch (*First) { 609 case ':': 610 case '<': 611 case '"': 612 break; 613 default: 614 if (!isAsciiIdentifierContinue(*First)) { 615 skipLine(First, End); 616 return false; 617 } 618 } 619 620 TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); 621 622 DirectiveKind Kind; 623 if (Id == "module") 624 Kind = Export ? cxx_export_module_decl : cxx_module_decl; 625 else 626 Kind = Export ? cxx_export_import_decl : cxx_import_decl; 627 628 return lexModuleDirectiveBody(Kind, First, End); 629 } 630 631 bool Scanner::lexPragma(const char *&First, const char *const End) { 632 Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 633 if (!FoundId) 634 return false; 635 636 StringRef Id = *FoundId; 637 auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 638 .Case("once", pp_pragma_once) 639 .Case("push_macro", pp_pragma_push_macro) 640 .Case("pop_macro", pp_pragma_pop_macro) 641 .Case("include_alias", pp_pragma_include_alias) 642 .Default(pp_none); 643 if (Kind != pp_none) { 644 lexPPDirectiveBody(First, End); 645 pushDirective(Kind); 646 return false; 647 } 648 649 if (Id != "clang") { 650 skipLine(First, End); 651 return false; 652 } 653 654 // #pragma clang. 655 if (!isNextIdentifierOrSkipLine("module", First, End)) 656 return false; 657 658 // #pragma clang module. 659 if (!isNextIdentifierOrSkipLine("import", First, End)) 660 return false; 661 662 // #pragma clang module import. 663 lexPPDirectiveBody(First, End); 664 pushDirective(pp_pragma_import); 665 return false; 666 } 667 668 bool Scanner::lexEndif(const char *&First, const char *const End) { 669 // Strip out "#else" if it's empty. 670 if (topDirective() == pp_else) 671 popDirective(); 672 673 // If "#ifdef" is empty, strip it and skip the "#endif". 674 // 675 // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, 676 // we can skip empty `#if` and `#elif` blocks as well after scanning for a 677 // literal __has_include in the condition. Even without that rule we could 678 // drop the tokens if we scan for identifiers in the condition and find none. 679 if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { 680 popDirective(); 681 skipLine(First, End); 682 return false; 683 } 684 685 return lexDefault(pp_endif, First, End); 686 } 687 688 bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, 689 const char *const End) { 690 lexPPDirectiveBody(First, End); 691 pushDirective(Kind); 692 return false; 693 } 694 695 static bool isStartOfRelevantLine(char First) { 696 switch (First) { 697 case '#': 698 case '@': 699 case 'i': 700 case 'e': 701 case 'm': 702 return true; 703 } 704 return false; 705 } 706 707 bool Scanner::lexPPLine(const char *&First, const char *const End) { 708 assert(First != End); 709 710 skipWhitespace(First, End); 711 assert(First <= End); 712 if (First == End) 713 return false; 714 715 if (!isStartOfRelevantLine(*First)) { 716 skipLine(First, End); 717 assert(First <= End); 718 return false; 719 } 720 721 LastTokenPtr = First; 722 723 TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); 724 725 auto ScEx1 = make_scope_exit([&]() { 726 /// Clear Scanner's CurDirToks before returning, in case we didn't push a 727 /// new directive. 728 CurDirToks.clear(); 729 }); 730 731 // Handle "@import". 732 if (*First == '@') 733 return lexAt(First, End); 734 735 if (*First == 'i' || *First == 'e' || *First == 'm') 736 return lexModule(First, End); 737 738 // Handle preprocessing directives. 739 740 TheLexer.setParsingPreprocessorDirective(true); 741 auto ScEx2 = make_scope_exit( 742 [&]() { TheLexer.setParsingPreprocessorDirective(false); }); 743 744 // Lex '#'. 745 const dependency_directives_scan::Token &HashTok = lexToken(First, End); 746 if (HashTok.is(tok::hashhash)) { 747 // A \p tok::hashhash at this location is passed by the preprocessor to the 748 // parser to interpret, like any other token. So for dependency scanning 749 // skip it like a normal token not affecting the preprocessor. 750 skipLine(First, End); 751 assert(First <= End); 752 return false; 753 } 754 assert(HashTok.is(tok::hash)); 755 (void)HashTok; 756 757 Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 758 if (!FoundId) 759 return false; 760 761 StringRef Id = *FoundId; 762 763 if (Id == "pragma") 764 return lexPragma(First, End); 765 766 auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 767 .Case("include", pp_include) 768 .Case("__include_macros", pp___include_macros) 769 .Case("define", pp_define) 770 .Case("undef", pp_undef) 771 .Case("import", pp_import) 772 .Case("include_next", pp_include_next) 773 .Case("if", pp_if) 774 .Case("ifdef", pp_ifdef) 775 .Case("ifndef", pp_ifndef) 776 .Case("elif", pp_elif) 777 .Case("elifdef", pp_elifdef) 778 .Case("elifndef", pp_elifndef) 779 .Case("else", pp_else) 780 .Case("endif", pp_endif) 781 .Default(pp_none); 782 if (Kind == pp_none) { 783 skipDirective(Id, First, End); 784 return false; 785 } 786 787 if (Kind == pp_endif) 788 return lexEndif(First, End); 789 790 switch (Kind) { 791 case pp_include: 792 case pp___include_macros: 793 case pp_include_next: 794 case pp_import: 795 lexIncludeFilename(First, End); 796 break; 797 default: 798 break; 799 } 800 801 // Everything else. 802 return lexDefault(Kind, First, End); 803 } 804 805 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 806 if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 807 First[2] == '\xbf') 808 First += 3; 809 } 810 811 bool Scanner::scanImpl(const char *First, const char *const End) { 812 skipUTF8ByteOrderMark(First, End); 813 while (First != End) 814 if (lexPPLine(First, End)) 815 return true; 816 return false; 817 } 818 819 bool Scanner::scan(SmallVectorImpl<Directive> &Directives) { 820 bool Error = scanImpl(Input.begin(), Input.end()); 821 822 if (!Error) { 823 // Add an EOF on success. 824 if (LastTokenPtr && 825 (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) 826 pushDirective(tokens_present_before_eof); 827 pushDirective(pp_eof); 828 } 829 830 ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens; 831 for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { 832 assert(RemainingTokens.size() >= DirWithToks.NumTokens); 833 Directives.emplace_back(DirWithToks.Kind, 834 RemainingTokens.take_front(DirWithToks.NumTokens)); 835 RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); 836 } 837 assert(RemainingTokens.empty()); 838 839 return Error; 840 } 841 842 bool clang::scanSourceForDependencyDirectives( 843 StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 844 SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags, 845 SourceLocation InputSourceLoc) { 846 return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); 847 } 848 849 void clang::printDependencyDirectivesAsSource( 850 StringRef Source, 851 ArrayRef<dependency_directives_scan::Directive> Directives, 852 llvm::raw_ostream &OS) { 853 // Add a space separator where it is convenient for testing purposes. 854 auto needsSpaceSeparator = 855 [](tok::TokenKind Prev, 856 const dependency_directives_scan::Token &Tok) -> bool { 857 if (Prev == Tok.Kind) 858 return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, 859 tok::r_square); 860 if (Prev == tok::raw_identifier && 861 Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, 862 tok::char_constant, tok::header_name)) 863 return true; 864 if (Prev == tok::r_paren && 865 Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, 866 tok::char_constant, tok::unknown)) 867 return true; 868 if (Prev == tok::comma && 869 Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) 870 return true; 871 return false; 872 }; 873 874 for (const dependency_directives_scan::Directive &Directive : Directives) { 875 if (Directive.Kind == tokens_present_before_eof) 876 OS << "<TokBeforeEOF>"; 877 Optional<tok::TokenKind> PrevTokenKind; 878 for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { 879 if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) 880 OS << ' '; 881 PrevTokenKind = Tok.Kind; 882 OS << Source.slice(Tok.Offset, Tok.getEnd()); 883 } 884 } 885 } 886