1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/Module.h" 23 #include "clang/Basic/SourceLocation.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Basic/TokenKinds.h" 26 #include "clang/Lex/HeaderSearch.h" 27 #include "clang/Lex/Lexer.h" 28 #include "clang/Lex/MacroInfo.h" 29 #include "clang/Lex/ModuleLoader.h" 30 #include "clang/Lex/ModuleMap.h" 31 #include "clang/Lex/PPCallbacks.h" 32 #include "clang/Lex/PPEmbedParameters.h" 33 #include "clang/Lex/Token.h" 34 #include "clang/Lex/TokenLexer.h" 35 #include "clang/Support/Compiler.h" 36 #include "llvm/ADT/APSInt.h" 37 #include "llvm/ADT/ArrayRef.h" 38 #include "llvm/ADT/DenseMap.h" 39 #include "llvm/ADT/FoldingSet.h" 40 #include "llvm/ADT/FunctionExtras.h" 41 #include "llvm/ADT/PointerUnion.h" 42 #include "llvm/ADT/STLExtras.h" 43 #include "llvm/ADT/SmallPtrSet.h" 44 #include "llvm/ADT/SmallVector.h" 45 #include "llvm/ADT/StringRef.h" 46 #include "llvm/ADT/TinyPtrVector.h" 47 #include "llvm/ADT/iterator_range.h" 48 #include "llvm/Support/Allocator.h" 49 #include "llvm/Support/Casting.h" 50 #include "llvm/Support/Registry.h" 51 #include <cassert> 52 #include <cstddef> 53 #include <cstdint> 54 #include <map> 55 #include <memory> 56 #include <optional> 57 #include <string> 58 #include <utility> 59 #include <vector> 60 61 namespace llvm { 62 63 template<unsigned InternalLen> class SmallString; 64 65 } // namespace llvm 66 67 namespace clang { 68 69 class CodeCompletionHandler; 70 class CommentHandler; 71 class DirectoryEntry; 72 class EmptylineHandler; 73 class ExternalPreprocessorSource; 74 class FileEntry; 75 class FileManager; 76 class HeaderSearch; 77 class MacroArgs; 78 class PragmaHandler; 79 class PragmaNamespace; 80 class PreprocessingRecord; 81 class PreprocessorLexer; 82 class PreprocessorOptions; 83 class ScratchBuffer; 84 class TargetInfo; 85 86 namespace Builtin { 87 class Context; 88 } 89 90 /// Stores token information for comparing actual tokens with 91 /// predefined values. Only handles simple tokens and identifiers. 92 class TokenValue { 93 tok::TokenKind Kind; 94 IdentifierInfo *II; 95 96 public: 97 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 98 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 99 assert(Kind != tok::identifier && 100 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 101 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 102 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 103 } 104 105 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 106 107 bool operator==(const Token &Tok) const { 108 return Tok.getKind() == Kind && 109 (!II || II == Tok.getIdentifierInfo()); 110 } 111 }; 112 113 /// Context in which macro name is used. 114 enum MacroUse { 115 // other than #define or #undef 116 MU_Other = 0, 117 118 // macro name specified in #define 119 MU_Define = 1, 120 121 // macro name specified in #undef 122 MU_Undef = 2 123 }; 124 125 enum class EmbedResult { 126 Invalid = -1, // Parsing error occurred. 127 NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__ 128 Found = 1, // Corresponds to __STDC_EMBED_FOUND__ 129 Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__ 130 }; 131 132 /// Engages in a tight little dance with the lexer to efficiently 133 /// preprocess tokens. 134 /// 135 /// Lexers know only about tokens within a single source file, and don't 136 /// know anything about preprocessor-level issues like the \#include stack, 137 /// token expansion, etc. 138 class Preprocessor { 139 friend class VAOptDefinitionContext; 140 friend class VariadicMacroScopeGuard; 141 142 llvm::unique_function<void(const clang::Token &)> OnToken; 143 std::shared_ptr<PreprocessorOptions> PPOpts; 144 DiagnosticsEngine *Diags; 145 const LangOptions &LangOpts; 146 const TargetInfo *Target = nullptr; 147 const TargetInfo *AuxTarget = nullptr; 148 FileManager &FileMgr; 149 SourceManager &SourceMgr; 150 std::unique_ptr<ScratchBuffer> ScratchBuf; 151 HeaderSearch &HeaderInfo; 152 ModuleLoader &TheModuleLoader; 153 154 /// External source of macros. 155 ExternalPreprocessorSource *ExternalSource; 156 157 /// A BumpPtrAllocator object used to quickly allocate and release 158 /// objects internal to the Preprocessor. 159 llvm::BumpPtrAllocator BP; 160 161 /// Identifiers for builtin macros and other builtins. 162 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 163 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 164 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 165 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 166 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 167 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 168 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 169 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 170 IdentifierInfo *Ident__identifier; // __identifier 171 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 172 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 173 IdentifierInfo *Ident__has_feature; // __has_feature 174 IdentifierInfo *Ident__has_extension; // __has_extension 175 IdentifierInfo *Ident__has_builtin; // __has_builtin 176 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin 177 IdentifierInfo *Ident__has_attribute; // __has_attribute 178 IdentifierInfo *Ident__has_embed; // __has_embed 179 IdentifierInfo *Ident__has_include; // __has_include 180 IdentifierInfo *Ident__has_include_next; // __has_include_next 181 IdentifierInfo *Ident__has_warning; // __has_warning 182 IdentifierInfo *Ident__is_identifier; // __is_identifier 183 IdentifierInfo *Ident__building_module; // __building_module 184 IdentifierInfo *Ident__MODULE__; // __MODULE__ 185 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 186 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 187 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 188 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 189 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 190 IdentifierInfo *Ident__is_target_os; // __is_target_os 191 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 192 IdentifierInfo *Ident__is_target_variant_os; 193 IdentifierInfo *Ident__is_target_variant_environment; 194 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD 195 196 // Weak, only valid (and set) while InMacroArgs is true. 197 Token* ArgMacro; 198 199 SourceLocation DATELoc, TIMELoc; 200 201 // FEM_UnsetOnCommandLine means that an explicit evaluation method was 202 // not specified on the command line. The target is queried to set the 203 // default evaluation method. 204 LangOptions::FPEvalMethodKind CurrentFPEvalMethod = 205 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 206 207 // The most recent pragma location where the floating point evaluation 208 // method was modified. This is used to determine whether the 209 // 'pragma clang fp eval_method' was used whithin the current scope. 210 SourceLocation LastFPEvalPragmaLocation; 211 212 LangOptions::FPEvalMethodKind TUFPEvalMethod = 213 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 214 215 // Next __COUNTER__ value, starts at 0. 216 unsigned CounterValue = 0; 217 218 enum { 219 /// Maximum depth of \#includes. 220 MaxAllowedIncludeStackDepth = 200 221 }; 222 223 // State that is set before the preprocessor begins. 224 bool KeepComments : 1; 225 bool KeepMacroComments : 1; 226 bool SuppressIncludeNotFoundError : 1; 227 228 // State that changes while the preprocessor runs: 229 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 230 231 /// Whether the preprocessor owns the header search object. 232 bool OwnsHeaderSearch : 1; 233 234 /// True if macro expansion is disabled. 235 bool DisableMacroExpansion : 1; 236 237 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 238 /// when parsing preprocessor directives. 239 bool MacroExpansionInDirectivesOverride : 1; 240 241 class ResetMacroExpansionHelper; 242 243 /// Whether we have already loaded macros from the external source. 244 mutable bool ReadMacrosFromExternalSource : 1; 245 246 /// True if pragmas are enabled. 247 bool PragmasEnabled : 1; 248 249 /// True if the current build action is a preprocessing action. 250 bool PreprocessedOutput : 1; 251 252 /// True if we are currently preprocessing a #if or #elif directive 253 bool ParsingIfOrElifDirective; 254 255 /// True if we are pre-expanding macro arguments. 256 bool InMacroArgPreExpansion; 257 258 /// Mapping/lookup information for all identifiers in 259 /// the program, including program keywords. 260 mutable IdentifierTable Identifiers; 261 262 /// This table contains all the selectors in the program. 263 /// 264 /// Unlike IdentifierTable above, this table *isn't* populated by the 265 /// preprocessor. It is declared/expanded here because its role/lifetime is 266 /// conceptually similar to the IdentifierTable. In addition, the current 267 /// control flow (in clang::ParseAST()), make it convenient to put here. 268 /// 269 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 270 /// the lifetime of the preprocessor. 271 SelectorTable Selectors; 272 273 /// Information about builtins. 274 std::unique_ptr<Builtin::Context> BuiltinInfo; 275 276 /// Tracks all of the pragmas that the client registered 277 /// with this preprocessor. 278 std::unique_ptr<PragmaNamespace> PragmaHandlers; 279 280 /// Pragma handlers of the original source is stored here during the 281 /// parsing of a model file. 282 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 283 284 /// Tracks all of the comment handlers that the client registered 285 /// with this preprocessor. 286 std::vector<CommentHandler *> CommentHandlers; 287 288 /// Empty line handler. 289 EmptylineHandler *Emptyline = nullptr; 290 291 /// True to avoid tearing down the lexer etc on EOF 292 bool IncrementalProcessing = false; 293 294 public: 295 /// The kind of translation unit we are processing. 296 const TranslationUnitKind TUKind; 297 298 /// Returns a pointer into the given file's buffer that's guaranteed 299 /// to be between tokens. The returned pointer is always before \p Start. 300 /// The maximum distance betweenthe returned pointer and \p Start is 301 /// limited by a constant value, but also an implementation detail. 302 /// If no such check point exists, \c nullptr is returned. 303 const char *getCheckPoint(FileID FID, const char *Start) const; 304 305 private: 306 /// The code-completion handler. 307 CodeCompletionHandler *CodeComplete = nullptr; 308 309 /// The file that we're performing code-completion for, if any. 310 const FileEntry *CodeCompletionFile = nullptr; 311 312 /// The offset in file for the code-completion point. 313 unsigned CodeCompletionOffset = 0; 314 315 /// The location for the code-completion point. This gets instantiated 316 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 317 SourceLocation CodeCompletionLoc; 318 319 /// The start location for the file of the code-completion point. 320 /// 321 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 322 /// for preprocessing. 323 SourceLocation CodeCompletionFileLoc; 324 325 /// The source location of the \c import contextual keyword we just 326 /// lexed, if any. 327 SourceLocation ModuleImportLoc; 328 329 /// The import path for named module that we're currently processing. 330 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath; 331 332 llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints; 333 unsigned CheckPointCounter = 0; 334 335 /// Whether the import is an `@import` or a standard c++ modules import. 336 bool IsAtImport = false; 337 338 /// Whether the last token we lexed was an '@'. 339 bool LastTokenWasAt = false; 340 341 /// A position within a C++20 import-seq. 342 class StdCXXImportSeq { 343 public: 344 enum State : int { 345 // Positive values represent a number of unclosed brackets. 346 AtTopLevel = 0, 347 AfterTopLevelTokenSeq = -1, 348 AfterExport = -2, 349 AfterImportSeq = -3, 350 }; 351 352 StdCXXImportSeq(State S) : S(S) {} 353 354 /// Saw any kind of open bracket. 355 void handleOpenBracket() { 356 S = static_cast<State>(std::max<int>(S, 0) + 1); 357 } 358 /// Saw any kind of close bracket other than '}'. 359 void handleCloseBracket() { 360 S = static_cast<State>(std::max<int>(S, 1) - 1); 361 } 362 /// Saw a close brace. 363 void handleCloseBrace() { 364 handleCloseBracket(); 365 if (S == AtTopLevel && !AfterHeaderName) 366 S = AfterTopLevelTokenSeq; 367 } 368 /// Saw a semicolon. 369 void handleSemi() { 370 if (atTopLevel()) { 371 S = AfterTopLevelTokenSeq; 372 AfterHeaderName = false; 373 } 374 } 375 376 /// Saw an 'export' identifier. 377 void handleExport() { 378 if (S == AfterTopLevelTokenSeq) 379 S = AfterExport; 380 else if (S <= 0) 381 S = AtTopLevel; 382 } 383 /// Saw an 'import' identifier. 384 void handleImport() { 385 if (S == AfterTopLevelTokenSeq || S == AfterExport) 386 S = AfterImportSeq; 387 else if (S <= 0) 388 S = AtTopLevel; 389 } 390 391 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 392 /// until we reach a top-level semicolon. 393 void handleHeaderName() { 394 if (S == AfterImportSeq) 395 AfterHeaderName = true; 396 handleMisc(); 397 } 398 399 /// Saw any other token. 400 void handleMisc() { 401 if (S <= 0) 402 S = AtTopLevel; 403 } 404 405 bool atTopLevel() { return S <= 0; } 406 bool afterImportSeq() { return S == AfterImportSeq; } 407 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } 408 409 private: 410 State S; 411 /// Whether we're in the pp-import-suffix following the header-name in a 412 /// pp-import. If so, a close-brace is not sufficient to end the 413 /// top-level-token-seq of an import-seq. 414 bool AfterHeaderName = false; 415 }; 416 417 /// Our current position within a C++20 import-seq. 418 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq; 419 420 /// Track whether we are in a Global Module Fragment 421 class TrackGMF { 422 public: 423 enum GMFState : int { 424 GMFActive = 1, 425 MaybeGMF = 0, 426 BeforeGMFIntroducer = -1, 427 GMFAbsentOrEnded = -2, 428 }; 429 430 TrackGMF(GMFState S) : S(S) {} 431 432 /// Saw a semicolon. 433 void handleSemi() { 434 // If it is immediately after the first instance of the module keyword, 435 // then that introduces the GMF. 436 if (S == MaybeGMF) 437 S = GMFActive; 438 } 439 440 /// Saw an 'export' identifier. 441 void handleExport() { 442 // The presence of an 'export' keyword always ends or excludes a GMF. 443 S = GMFAbsentOrEnded; 444 } 445 446 /// Saw an 'import' identifier. 447 void handleImport(bool AfterTopLevelTokenSeq) { 448 // If we see this before any 'module' kw, then we have no GMF. 449 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 450 S = GMFAbsentOrEnded; 451 } 452 453 /// Saw a 'module' identifier. 454 void handleModule(bool AfterTopLevelTokenSeq) { 455 // This was the first module identifier and not preceded by any token 456 // that would exclude a GMF. It could begin a GMF, but only if directly 457 // followed by a semicolon. 458 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 459 S = MaybeGMF; 460 else 461 S = GMFAbsentOrEnded; 462 } 463 464 /// Saw any other token. 465 void handleMisc() { 466 // We saw something other than ; after the 'module' kw, so not a GMF. 467 if (S == MaybeGMF) 468 S = GMFAbsentOrEnded; 469 } 470 471 bool inGMF() { return S == GMFActive; } 472 473 private: 474 /// Track the transitions into and out of a Global Module Fragment, 475 /// if one is present. 476 GMFState S; 477 }; 478 479 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; 480 481 /// Track the status of the c++20 module decl. 482 /// 483 /// module-declaration: 484 /// 'export'[opt] 'module' module-name module-partition[opt] 485 /// attribute-specifier-seq[opt] ';' 486 /// 487 /// module-name: 488 /// module-name-qualifier[opt] identifier 489 /// 490 /// module-partition: 491 /// ':' module-name-qualifier[opt] identifier 492 /// 493 /// module-name-qualifier: 494 /// identifier '.' 495 /// module-name-qualifier identifier '.' 496 /// 497 /// Transition state: 498 /// 499 /// NotAModuleDecl --- export ---> FoundExport 500 /// NotAModuleDecl --- module ---> ImplementationCandidate 501 /// FoundExport --- module ---> InterfaceCandidate 502 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate 503 /// ImplementationCandidate --- period ---> ImplementationCandidate 504 /// ImplementationCandidate --- colon ---> ImplementationCandidate 505 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate 506 /// InterfaceCandidate --- period ---> InterfaceCandidate 507 /// InterfaceCandidate --- colon ---> InterfaceCandidate 508 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation 509 /// NamedModuleInterface --- Semi ---> NamedModuleInterface 510 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation 511 /// NamedModuleInterface --- Anything ---> NamedModuleInterface 512 /// 513 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad 514 /// soon since we don't support any module attributes yet. 515 class ModuleDeclSeq { 516 enum ModuleDeclState : int { 517 NotAModuleDecl, 518 FoundExport, 519 InterfaceCandidate, 520 ImplementationCandidate, 521 NamedModuleInterface, 522 NamedModuleImplementation, 523 }; 524 525 public: 526 ModuleDeclSeq() = default; 527 528 void handleExport() { 529 if (State == NotAModuleDecl) 530 State = FoundExport; 531 else if (!isNamedModule()) 532 reset(); 533 } 534 535 void handleModule() { 536 if (State == FoundExport) 537 State = InterfaceCandidate; 538 else if (State == NotAModuleDecl) 539 State = ImplementationCandidate; 540 else if (!isNamedModule()) 541 reset(); 542 } 543 544 void handleIdentifier(IdentifierInfo *Identifier) { 545 if (isModuleCandidate() && Identifier) 546 Name += Identifier->getName().str(); 547 else if (!isNamedModule()) 548 reset(); 549 } 550 551 void handleColon() { 552 if (isModuleCandidate()) 553 Name += ":"; 554 else if (!isNamedModule()) 555 reset(); 556 } 557 558 void handlePeriod() { 559 if (isModuleCandidate()) 560 Name += "."; 561 else if (!isNamedModule()) 562 reset(); 563 } 564 565 void handleSemi() { 566 if (!Name.empty() && isModuleCandidate()) { 567 if (State == InterfaceCandidate) 568 State = NamedModuleInterface; 569 else if (State == ImplementationCandidate) 570 State = NamedModuleImplementation; 571 else 572 llvm_unreachable("Unimaged ModuleDeclState."); 573 } else if (!isNamedModule()) 574 reset(); 575 } 576 577 void handleMisc() { 578 if (!isNamedModule()) 579 reset(); 580 } 581 582 bool isModuleCandidate() const { 583 return State == InterfaceCandidate || State == ImplementationCandidate; 584 } 585 586 bool isNamedModule() const { 587 return State == NamedModuleInterface || 588 State == NamedModuleImplementation; 589 } 590 591 bool isNamedInterface() const { return State == NamedModuleInterface; } 592 593 bool isImplementationUnit() const { 594 return State == NamedModuleImplementation && !getName().contains(':'); 595 } 596 597 StringRef getName() const { 598 assert(isNamedModule() && "Can't get name from a non named module"); 599 return Name; 600 } 601 602 StringRef getPrimaryName() const { 603 assert(isNamedModule() && "Can't get name from a non named module"); 604 return getName().split(':').first; 605 } 606 607 void reset() { 608 Name.clear(); 609 State = NotAModuleDecl; 610 } 611 612 private: 613 ModuleDeclState State = NotAModuleDecl; 614 std::string Name; 615 }; 616 617 ModuleDeclSeq ModuleDeclState; 618 619 /// Whether the module import expects an identifier next. Otherwise, 620 /// it expects a '.' or ';'. 621 bool ModuleImportExpectsIdentifier = false; 622 623 /// The identifier and source location of the currently-active 624 /// \#pragma clang arc_cf_code_audited begin. 625 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; 626 627 /// The source location of the currently-active 628 /// \#pragma clang assume_nonnull begin. 629 SourceLocation PragmaAssumeNonNullLoc; 630 631 /// Set only for preambles which end with an active 632 /// \#pragma clang assume_nonnull begin. 633 /// 634 /// When the preamble is loaded into the main file, 635 /// `PragmaAssumeNonNullLoc` will be set to this to 636 /// replay the unterminated assume_nonnull. 637 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc; 638 639 /// True if we hit the code-completion point. 640 bool CodeCompletionReached = false; 641 642 /// The code completion token containing the information 643 /// on the stem that is to be code completed. 644 IdentifierInfo *CodeCompletionII = nullptr; 645 646 /// Range for the code completion token. 647 SourceRange CodeCompletionTokenRange; 648 649 /// The directory that the main file should be considered to occupy, 650 /// if it does not correspond to a real file (as happens when building a 651 /// module). 652 OptionalDirectoryEntryRef MainFileDir; 653 654 /// The number of bytes that we will initially skip when entering the 655 /// main file, along with a flag that indicates whether skipping this number 656 /// of bytes will place the lexer at the start of a line. 657 /// 658 /// This is used when loading a precompiled preamble. 659 std::pair<int, bool> SkipMainFilePreamble; 660 661 /// Whether we hit an error due to reaching max allowed include depth. Allows 662 /// to avoid hitting the same error over and over again. 663 bool HasReachedMaxIncludeDepth = false; 664 665 /// The number of currently-active calls to Lex. 666 /// 667 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 668 /// require asking for multiple additional tokens. This counter makes it 669 /// possible for Lex to detect whether it's producing a token for the end 670 /// of phase 4 of translation or for some other situation. 671 unsigned LexLevel = 0; 672 673 /// The number of (LexLevel 0) preprocessor tokens. 674 unsigned TokenCount = 0; 675 676 /// Preprocess every token regardless of LexLevel. 677 bool PreprocessToken = false; 678 679 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 680 /// warning, or zero for unlimited. 681 unsigned MaxTokens = 0; 682 SourceLocation MaxTokensOverrideLoc; 683 684 public: 685 struct PreambleSkipInfo { 686 SourceLocation HashTokenLoc; 687 SourceLocation IfTokenLoc; 688 bool FoundNonSkipPortion; 689 bool FoundElse; 690 SourceLocation ElseLoc; 691 692 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 693 bool FoundNonSkipPortion, bool FoundElse, 694 SourceLocation ElseLoc) 695 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 696 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 697 ElseLoc(ElseLoc) {} 698 }; 699 700 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>; 701 702 private: 703 friend class ASTReader; 704 friend class MacroArgs; 705 706 class PreambleConditionalStackStore { 707 enum State { 708 Off = 0, 709 Recording = 1, 710 Replaying = 2, 711 }; 712 713 public: 714 PreambleConditionalStackStore() = default; 715 716 void startRecording() { ConditionalStackState = Recording; } 717 void startReplaying() { ConditionalStackState = Replaying; } 718 bool isRecording() const { return ConditionalStackState == Recording; } 719 bool isReplaying() const { return ConditionalStackState == Replaying; } 720 721 ArrayRef<PPConditionalInfo> getStack() const { 722 return ConditionalStack; 723 } 724 725 void doneReplaying() { 726 ConditionalStack.clear(); 727 ConditionalStackState = Off; 728 } 729 730 void setStack(ArrayRef<PPConditionalInfo> s) { 731 if (!isRecording() && !isReplaying()) 732 return; 733 ConditionalStack.clear(); 734 ConditionalStack.append(s.begin(), s.end()); 735 } 736 737 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 738 739 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); } 740 741 void clearSkipInfo() { SkipInfo.reset(); } 742 743 std::optional<PreambleSkipInfo> SkipInfo; 744 745 private: 746 SmallVector<PPConditionalInfo, 4> ConditionalStack; 747 State ConditionalStackState = Off; 748 } PreambleConditionalStack; 749 750 /// The current top of the stack that we're lexing from if 751 /// not expanding a macro and we are lexing directly from source code. 752 /// 753 /// Only one of CurLexer, or CurTokenLexer will be non-null. 754 std::unique_ptr<Lexer> CurLexer; 755 756 /// The current top of the stack that we're lexing from 757 /// if not expanding a macro. 758 /// 759 /// This is an alias for CurLexer. 760 PreprocessorLexer *CurPPLexer = nullptr; 761 762 /// Used to find the current FileEntry, if CurLexer is non-null 763 /// and if applicable. 764 /// 765 /// This allows us to implement \#include_next and find directory-specific 766 /// properties. 767 ConstSearchDirIterator CurDirLookup = nullptr; 768 769 /// The current macro we are expanding, if we are expanding a macro. 770 /// 771 /// One of CurLexer and CurTokenLexer must be null. 772 std::unique_ptr<TokenLexer> CurTokenLexer; 773 774 /// The kind of lexer we're currently working with. 775 typedef bool (*LexerCallback)(Preprocessor &, Token &); 776 LexerCallback CurLexerCallback = &CLK_Lexer; 777 778 /// If the current lexer is for a submodule that is being built, this 779 /// is that submodule. 780 Module *CurLexerSubmodule = nullptr; 781 782 /// Keeps track of the stack of files currently 783 /// \#included, and macros currently being expanded from, not counting 784 /// CurLexer/CurTokenLexer. 785 struct IncludeStackInfo { 786 LexerCallback CurLexerCallback; 787 Module *TheSubmodule; 788 std::unique_ptr<Lexer> TheLexer; 789 PreprocessorLexer *ThePPLexer; 790 std::unique_ptr<TokenLexer> TheTokenLexer; 791 ConstSearchDirIterator TheDirLookup; 792 793 // The following constructors are completely useless copies of the default 794 // versions, only needed to pacify MSVC. 795 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule, 796 std::unique_ptr<Lexer> &&TheLexer, 797 PreprocessorLexer *ThePPLexer, 798 std::unique_ptr<TokenLexer> &&TheTokenLexer, 799 ConstSearchDirIterator TheDirLookup) 800 : CurLexerCallback(std::move(CurLexerCallback)), 801 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 802 ThePPLexer(std::move(ThePPLexer)), 803 TheTokenLexer(std::move(TheTokenLexer)), 804 TheDirLookup(std::move(TheDirLookup)) {} 805 }; 806 std::vector<IncludeStackInfo> IncludeMacroStack; 807 808 /// Actions invoked when some preprocessor activity is 809 /// encountered (e.g. a file is \#included, etc). 810 std::unique_ptr<PPCallbacks> Callbacks; 811 812 struct MacroExpandsInfo { 813 Token Tok; 814 MacroDefinition MD; 815 SourceRange Range; 816 817 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 818 : Tok(Tok), MD(MD), Range(Range) {} 819 }; 820 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 821 822 /// Information about a name that has been used to define a module macro. 823 struct ModuleMacroInfo { 824 /// The most recent macro directive for this identifier. 825 MacroDirective *MD; 826 827 /// The active module macros for this identifier. 828 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 829 830 /// The generation number at which we last updated ActiveModuleMacros. 831 /// \see Preprocessor::VisibleModules. 832 unsigned ActiveModuleMacrosGeneration = 0; 833 834 /// Whether this macro name is ambiguous. 835 bool IsAmbiguous = false; 836 837 /// The module macros that are overridden by this macro. 838 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 839 840 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 841 }; 842 843 /// The state of a macro for an identifier. 844 class MacroState { 845 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 846 847 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 848 const IdentifierInfo *II) const { 849 if (II->isOutOfDate()) 850 PP.updateOutOfDateIdentifier(*II); 851 // FIXME: Find a spare bit on IdentifierInfo and store a 852 // HasModuleMacros flag. 853 if (!II->hasMacroDefinition() || 854 (!PP.getLangOpts().Modules && 855 !PP.getLangOpts().ModulesLocalVisibility) || 856 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 857 return nullptr; 858 859 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State); 860 if (!Info) { 861 Info = new (PP.getPreprocessorAllocator()) 862 ModuleMacroInfo(cast<MacroDirective *>(State)); 863 State = Info; 864 } 865 866 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 867 Info->ActiveModuleMacrosGeneration) 868 PP.updateModuleMacroInfo(II, *Info); 869 return Info; 870 } 871 872 public: 873 MacroState() : MacroState(nullptr) {} 874 MacroState(MacroDirective *MD) : State(MD) {} 875 876 MacroState(MacroState &&O) noexcept : State(O.State) { 877 O.State = (MacroDirective *)nullptr; 878 } 879 880 MacroState &operator=(MacroState &&O) noexcept { 881 auto S = O.State; 882 O.State = (MacroDirective *)nullptr; 883 State = S; 884 return *this; 885 } 886 887 ~MacroState() { 888 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State)) 889 Info->~ModuleMacroInfo(); 890 } 891 892 MacroDirective *getLatest() const { 893 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State)) 894 return Info->MD; 895 return cast<MacroDirective *>(State); 896 } 897 898 void setLatest(MacroDirective *MD) { 899 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State)) 900 Info->MD = MD; 901 else 902 State = MD; 903 } 904 905 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 906 auto *Info = getModuleInfo(PP, II); 907 return Info ? Info->IsAmbiguous : false; 908 } 909 910 ArrayRef<ModuleMacro *> 911 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 912 if (auto *Info = getModuleInfo(PP, II)) 913 return Info->ActiveModuleMacros; 914 return {}; 915 } 916 917 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 918 SourceManager &SourceMgr) const { 919 // FIXME: Incorporate module macros into the result of this. 920 if (auto *Latest = getLatest()) 921 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 922 return {}; 923 } 924 925 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 926 if (auto *Info = getModuleInfo(PP, II)) { 927 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 928 Info->ActiveModuleMacros.begin(), 929 Info->ActiveModuleMacros.end()); 930 Info->ActiveModuleMacros.clear(); 931 Info->IsAmbiguous = false; 932 } 933 } 934 935 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 936 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 937 return Info->OverriddenMacros; 938 return {}; 939 } 940 941 void setOverriddenMacros(Preprocessor &PP, 942 ArrayRef<ModuleMacro *> Overrides) { 943 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State); 944 if (!Info) { 945 if (Overrides.empty()) 946 return; 947 Info = new (PP.getPreprocessorAllocator()) 948 ModuleMacroInfo(cast<MacroDirective *>(State)); 949 State = Info; 950 } 951 Info->OverriddenMacros.clear(); 952 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 953 Overrides.begin(), Overrides.end()); 954 Info->ActiveModuleMacrosGeneration = 0; 955 } 956 }; 957 958 /// For each IdentifierInfo that was associated with a macro, we 959 /// keep a mapping to the history of all macro definitions and #undefs in 960 /// the reverse order (the latest one is in the head of the list). 961 /// 962 /// This mapping lives within the \p CurSubmoduleState. 963 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 964 965 struct SubmoduleState; 966 967 /// Information about a submodule that we're currently building. 968 struct BuildingSubmoduleInfo { 969 /// The module that we are building. 970 Module *M; 971 972 /// The location at which the module was included. 973 SourceLocation ImportLoc; 974 975 /// Whether we entered this submodule via a pragma. 976 bool IsPragma; 977 978 /// The previous SubmoduleState. 979 SubmoduleState *OuterSubmoduleState; 980 981 /// The number of pending module macro names when we started building this. 982 unsigned OuterPendingModuleMacroNames; 983 984 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 985 SubmoduleState *OuterSubmoduleState, 986 unsigned OuterPendingModuleMacroNames) 987 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 988 OuterSubmoduleState(OuterSubmoduleState), 989 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 990 }; 991 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 992 993 /// Information about a submodule's preprocessor state. 994 struct SubmoduleState { 995 /// The macros for the submodule. 996 MacroMap Macros; 997 998 /// The set of modules that are visible within the submodule. 999 VisibleModuleSet VisibleModules; 1000 1001 // FIXME: CounterValue? 1002 // FIXME: PragmaPushMacroInfo? 1003 }; 1004 std::map<Module *, SubmoduleState> Submodules; 1005 1006 /// The preprocessor state for preprocessing outside of any submodule. 1007 SubmoduleState NullSubmoduleState; 1008 1009 /// The current submodule state. Will be \p NullSubmoduleState if we're not 1010 /// in a submodule. 1011 SubmoduleState *CurSubmoduleState; 1012 1013 /// The files that have been included. 1014 IncludedFilesSet IncludedFiles; 1015 1016 /// The set of top-level modules that affected preprocessing, but were not 1017 /// imported. 1018 llvm::SmallSetVector<Module *, 2> AffectingClangModules; 1019 1020 /// The set of known macros exported from modules. 1021 llvm::FoldingSet<ModuleMacro> ModuleMacros; 1022 1023 /// The names of potential module macros that we've not yet processed. 1024 llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames; 1025 1026 /// The list of module macros, for each identifier, that are not overridden by 1027 /// any other module macro. 1028 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 1029 LeafModuleMacros; 1030 1031 /// Macros that we want to warn because they are not used at the end 1032 /// of the translation unit. 1033 /// 1034 /// We store just their SourceLocations instead of 1035 /// something like MacroInfo*. The benefit of this is that when we are 1036 /// deserializing from PCH, we don't need to deserialize identifier & macros 1037 /// just so that we can report that they are unused, we just warn using 1038 /// the SourceLocations of this set (that will be filled by the ASTReader). 1039 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>; 1040 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 1041 1042 /// This is a pair of an optional message and source location used for pragmas 1043 /// that annotate macros like pragma clang restrict_expansion and pragma clang 1044 /// deprecated. This pair stores the optional message and the location of the 1045 /// annotation pragma for use producing diagnostics and notes. 1046 using MsgLocationPair = std::pair<std::string, SourceLocation>; 1047 1048 struct MacroAnnotationInfo { 1049 SourceLocation Location; 1050 std::string Message; 1051 }; 1052 1053 struct MacroAnnotations { 1054 std::optional<MacroAnnotationInfo> DeprecationInfo; 1055 std::optional<MacroAnnotationInfo> RestrictExpansionInfo; 1056 std::optional<SourceLocation> FinalAnnotationLoc; 1057 }; 1058 1059 /// Warning information for macro annotations. 1060 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos; 1061 1062 /// A "freelist" of MacroArg objects that can be 1063 /// reused for quick allocation. 1064 MacroArgs *MacroArgCache = nullptr; 1065 1066 /// For each IdentifierInfo used in a \#pragma push_macro directive, 1067 /// we keep a MacroInfo stack used to restore the previous macro value. 1068 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 1069 PragmaPushMacroInfo; 1070 1071 // Various statistics we track for performance analysis. 1072 unsigned NumDirectives = 0; 1073 unsigned NumDefined = 0; 1074 unsigned NumUndefined = 0; 1075 unsigned NumPragma = 0; 1076 unsigned NumIf = 0; 1077 unsigned NumElse = 0; 1078 unsigned NumEndif = 0; 1079 unsigned NumEnteredSourceFiles = 0; 1080 unsigned MaxIncludeStackDepth = 0; 1081 unsigned NumMacroExpanded = 0; 1082 unsigned NumFnMacroExpanded = 0; 1083 unsigned NumBuiltinMacroExpanded = 0; 1084 unsigned NumFastMacroExpanded = 0; 1085 unsigned NumTokenPaste = 0; 1086 unsigned NumFastTokenPaste = 0; 1087 unsigned NumSkipped = 0; 1088 1089 /// The predefined macros that preprocessor should use from the 1090 /// command line etc. 1091 std::string Predefines; 1092 1093 /// The file ID for the preprocessor predefines. 1094 FileID PredefinesFileID; 1095 1096 /// The file ID for the PCH through header. 1097 FileID PCHThroughHeaderFileID; 1098 1099 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 1100 bool SkippingUntilPragmaHdrStop = false; 1101 1102 /// Whether tokens are being skipped until the through header is seen. 1103 bool SkippingUntilPCHThroughHeader = false; 1104 1105 /// \{ 1106 /// Cache of macro expanders to reduce malloc traffic. 1107 enum { TokenLexerCacheSize = 8 }; 1108 unsigned NumCachedTokenLexers; 1109 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 1110 /// \} 1111 1112 /// Keeps macro expanded tokens for TokenLexers. 1113 // 1114 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1115 /// going to lex in the cache and when it finishes the tokens are removed 1116 /// from the end of the cache. 1117 SmallVector<Token, 16> MacroExpandedTokens; 1118 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 1119 1120 /// A record of the macro definitions and expansions that 1121 /// occurred during preprocessing. 1122 /// 1123 /// This is an optional side structure that can be enabled with 1124 /// \c createPreprocessingRecord() prior to preprocessing. 1125 PreprocessingRecord *Record = nullptr; 1126 1127 /// Cached tokens state. 1128 using CachedTokensTy = SmallVector<Token, 1>; 1129 1130 /// Cached tokens are stored here when we do backtracking or 1131 /// lookahead. They are "lexed" by the CachingLex() method. 1132 CachedTokensTy CachedTokens; 1133 1134 /// The position of the cached token that CachingLex() should 1135 /// "lex" next. 1136 /// 1137 /// If it points beyond the CachedTokens vector, it means that a normal 1138 /// Lex() should be invoked. 1139 CachedTokensTy::size_type CachedLexPos = 0; 1140 1141 /// Stack of backtrack positions, allowing nested backtracks. 1142 /// 1143 /// The EnableBacktrackAtThisPos() method pushes a position to 1144 /// indicate where CachedLexPos should be set when the BackTrack() method is 1145 /// invoked (at which point the last position is popped). 1146 std::vector<CachedTokensTy::size_type> BacktrackPositions; 1147 1148 /// Stack of cached tokens/initial number of cached tokens pairs, allowing 1149 /// nested unannotated backtracks. 1150 std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>> 1151 UnannotatedBacktrackTokens; 1152 1153 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running. 1154 /// This is used to guard against calling this function recursively. 1155 /// 1156 /// See comments at the use-site for more context about why it is needed. 1157 bool SkippingExcludedConditionalBlock = false; 1158 1159 /// Keeps track of skipped range mappings that were recorded while skipping 1160 /// excluded conditional directives. It maps the source buffer pointer at 1161 /// the beginning of a skipped block, to the number of bytes that should be 1162 /// skipped. 1163 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges; 1164 1165 void updateOutOfDateIdentifier(const IdentifierInfo &II) const; 1166 1167 public: 1168 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 1169 DiagnosticsEngine &diags, const LangOptions &LangOpts, 1170 SourceManager &SM, HeaderSearch &Headers, 1171 ModuleLoader &TheModuleLoader, 1172 IdentifierInfoLookup *IILookup = nullptr, 1173 bool OwnsHeaderSearch = false, 1174 TranslationUnitKind TUKind = TU_Complete); 1175 1176 ~Preprocessor(); 1177 1178 /// Initialize the preprocessor using information about the target. 1179 /// 1180 /// \param Target is owned by the caller and must remain valid for the 1181 /// lifetime of the preprocessor. 1182 /// \param AuxTarget is owned by the caller and must remain valid for 1183 /// the lifetime of the preprocessor. 1184 void Initialize(const TargetInfo &Target, 1185 const TargetInfo *AuxTarget = nullptr); 1186 1187 /// Initialize the preprocessor to parse a model file 1188 /// 1189 /// To parse model files the preprocessor of the original source is reused to 1190 /// preserver the identifier table. However to avoid some duplicate 1191 /// information in the preprocessor some cleanup is needed before it is used 1192 /// to parse model files. This method does that cleanup. 1193 void InitializeForModelFile(); 1194 1195 /// Cleanup after model file parsing 1196 void FinalizeForModelFile(); 1197 1198 /// Retrieve the preprocessor options used to initialize this 1199 /// preprocessor. 1200 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 1201 1202 DiagnosticsEngine &getDiagnostics() const { return *Diags; } 1203 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 1204 1205 const LangOptions &getLangOpts() const { return LangOpts; } 1206 const TargetInfo &getTargetInfo() const { return *Target; } 1207 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } 1208 FileManager &getFileManager() const { return FileMgr; } 1209 SourceManager &getSourceManager() const { return SourceMgr; } 1210 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 1211 1212 IdentifierTable &getIdentifierTable() { return Identifiers; } 1213 const IdentifierTable &getIdentifierTable() const { return Identifiers; } 1214 SelectorTable &getSelectorTable() { return Selectors; } 1215 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } 1216 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 1217 1218 void setExternalSource(ExternalPreprocessorSource *Source) { 1219 ExternalSource = Source; 1220 } 1221 1222 ExternalPreprocessorSource *getExternalSource() const { 1223 return ExternalSource; 1224 } 1225 1226 /// Retrieve the module loader associated with this preprocessor. 1227 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 1228 1229 bool hadModuleLoaderFatalFailure() const { 1230 return TheModuleLoader.HadFatalFailure; 1231 } 1232 1233 /// Retrieve the number of Directives that have been processed by the 1234 /// Preprocessor. 1235 unsigned getNumDirectives() const { 1236 return NumDirectives; 1237 } 1238 1239 /// True if we are currently preprocessing a #if or #elif directive 1240 bool isParsingIfOrElifDirective() const { 1241 return ParsingIfOrElifDirective; 1242 } 1243 1244 /// Control whether the preprocessor retains comments in output. 1245 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 1246 this->KeepComments = KeepComments | KeepMacroComments; 1247 this->KeepMacroComments = KeepMacroComments; 1248 } 1249 1250 bool getCommentRetentionState() const { return KeepComments; } 1251 1252 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } 1253 bool getPragmasEnabled() const { return PragmasEnabled; } 1254 1255 void SetSuppressIncludeNotFoundError(bool Suppress) { 1256 SuppressIncludeNotFoundError = Suppress; 1257 } 1258 1259 bool GetSuppressIncludeNotFoundError() { 1260 return SuppressIncludeNotFoundError; 1261 } 1262 1263 /// Sets whether the preprocessor is responsible for producing output or if 1264 /// it is producing tokens to be consumed by Parse and Sema. 1265 void setPreprocessedOutput(bool IsPreprocessedOutput) { 1266 PreprocessedOutput = IsPreprocessedOutput; 1267 } 1268 1269 /// Returns true if the preprocessor is responsible for generating output, 1270 /// false if it is producing tokens to be consumed by Parse and Sema. 1271 bool isPreprocessedOutput() const { return PreprocessedOutput; } 1272 1273 /// Return true if we are lexing directly from the specified lexer. 1274 bool isCurrentLexer(const PreprocessorLexer *L) const { 1275 return CurPPLexer == L; 1276 } 1277 1278 /// Return the current lexer being lexed from. 1279 /// 1280 /// Note that this ignores any potentially active macro expansions and _Pragma 1281 /// expansions going on at the time. 1282 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 1283 1284 /// Return the current file lexer being lexed from. 1285 /// 1286 /// Note that this ignores any potentially active macro expansions and _Pragma 1287 /// expansions going on at the time. 1288 PreprocessorLexer *getCurrentFileLexer() const; 1289 1290 /// Return the submodule owning the file being lexed. This may not be 1291 /// the current module if we have changed modules since entering the file. 1292 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1293 1294 /// Returns the FileID for the preprocessor predefines. 1295 FileID getPredefinesFileID() const { return PredefinesFileID; } 1296 1297 /// \{ 1298 /// Accessors for preprocessor callbacks. 1299 /// 1300 /// Note that this class takes ownership of any PPCallbacks object given to 1301 /// it. 1302 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } 1303 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1304 if (Callbacks) 1305 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1306 std::move(Callbacks)); 1307 Callbacks = std::move(C); 1308 } 1309 /// \} 1310 1311 /// Get the number of tokens processed so far. 1312 unsigned getTokenCount() const { return TokenCount; } 1313 1314 /// Get the max number of tokens before issuing a -Wmax-tokens warning. 1315 unsigned getMaxTokens() const { return MaxTokens; } 1316 1317 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1318 MaxTokens = Value; 1319 MaxTokensOverrideLoc = Loc; 1320 }; 1321 1322 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1323 1324 /// Register a function that would be called on each token in the final 1325 /// expanded token stream. 1326 /// This also reports annotation tokens produced by the parser. 1327 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1328 OnToken = std::move(F); 1329 } 1330 1331 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } 1332 1333 bool isMacroDefined(StringRef Id) { 1334 return isMacroDefined(&Identifiers.get(Id)); 1335 } 1336 bool isMacroDefined(const IdentifierInfo *II) { 1337 return II->hasMacroDefinition() && 1338 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1339 } 1340 1341 /// Determine whether II is defined as a macro within the module M, 1342 /// if that is a module that we've already preprocessed. Does not check for 1343 /// macros imported into M. 1344 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1345 if (!II->hasMacroDefinition()) 1346 return false; 1347 auto I = Submodules.find(M); 1348 if (I == Submodules.end()) 1349 return false; 1350 auto J = I->second.Macros.find(II); 1351 if (J == I->second.Macros.end()) 1352 return false; 1353 auto *MD = J->second.getLatest(); 1354 return MD && MD->isDefined(); 1355 } 1356 1357 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1358 if (!II->hasMacroDefinition()) 1359 return {}; 1360 1361 MacroState &S = CurSubmoduleState->Macros[II]; 1362 auto *MD = S.getLatest(); 1363 while (isa_and_nonnull<VisibilityMacroDirective>(MD)) 1364 MD = MD->getPrevious(); 1365 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1366 S.getActiveModuleMacros(*this, II), 1367 S.isAmbiguous(*this, II)); 1368 } 1369 1370 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1371 SourceLocation Loc) { 1372 if (!II->hadMacroDefinition()) 1373 return {}; 1374 1375 MacroState &S = CurSubmoduleState->Macros[II]; 1376 MacroDirective::DefInfo DI; 1377 if (auto *MD = S.getLatest()) 1378 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1379 // FIXME: Compute the set of active module macros at the specified location. 1380 return MacroDefinition(DI.getDirective(), 1381 S.getActiveModuleMacros(*this, II), 1382 S.isAmbiguous(*this, II)); 1383 } 1384 1385 /// Given an identifier, return its latest non-imported MacroDirective 1386 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. 1387 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1388 if (!II->hasMacroDefinition()) 1389 return nullptr; 1390 1391 auto *MD = getLocalMacroDirectiveHistory(II); 1392 if (!MD || MD->getDefinition().isUndefined()) 1393 return nullptr; 1394 1395 return MD; 1396 } 1397 1398 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1399 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1400 } 1401 1402 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1403 if (!II->hasMacroDefinition()) 1404 return nullptr; 1405 if (auto MD = getMacroDefinition(II)) 1406 return MD.getMacroInfo(); 1407 return nullptr; 1408 } 1409 1410 /// Given an identifier, return the latest non-imported macro 1411 /// directive for that identifier. 1412 /// 1413 /// One can iterate over all previous macro directives from the most recent 1414 /// one. 1415 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1416 1417 /// Add a directive to the macro directive history for this identifier. 1418 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); 1419 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1420 SourceLocation Loc) { 1421 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1422 appendMacroDirective(II, MD); 1423 return MD; 1424 } 1425 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1426 MacroInfo *MI) { 1427 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1428 } 1429 1430 /// Set a MacroDirective that was loaded from a PCH file. 1431 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1432 MacroDirective *MD); 1433 1434 /// Register an exported macro for a module and identifier. 1435 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, 1436 MacroInfo *Macro, 1437 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1438 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); 1439 1440 /// Get the list of leaf (non-overridden) module macros for a name. 1441 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1442 if (II->isOutOfDate()) 1443 updateOutOfDateIdentifier(*II); 1444 auto I = LeafModuleMacros.find(II); 1445 if (I != LeafModuleMacros.end()) 1446 return I->second; 1447 return {}; 1448 } 1449 1450 /// Get the list of submodules that we're currently building. 1451 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const { 1452 return BuildingSubmoduleStack; 1453 } 1454 1455 /// \{ 1456 /// Iterators for the macro history table. Currently defined macros have 1457 /// IdentifierInfo::hasMacroDefinition() set and an empty 1458 /// MacroInfo::getUndefLoc() at the head of the list. 1459 using macro_iterator = MacroMap::const_iterator; 1460 1461 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1462 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1463 1464 llvm::iterator_range<macro_iterator> 1465 macros(bool IncludeExternalMacros = true) const { 1466 macro_iterator begin = macro_begin(IncludeExternalMacros); 1467 macro_iterator end = macro_end(IncludeExternalMacros); 1468 return llvm::make_range(begin, end); 1469 } 1470 1471 /// \} 1472 1473 /// Mark the given clang module as affecting the current clang module or translation unit. 1474 void markClangModuleAsAffecting(Module *M) { 1475 assert(M->isModuleMapModule()); 1476 if (!BuildingSubmoduleStack.empty()) { 1477 if (M != BuildingSubmoduleStack.back().M) 1478 BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M); 1479 } else { 1480 AffectingClangModules.insert(M); 1481 } 1482 } 1483 1484 /// Get the set of top-level clang modules that affected preprocessing, but were not 1485 /// imported. 1486 const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const { 1487 return AffectingClangModules; 1488 } 1489 1490 /// Mark the file as included. 1491 /// Returns true if this is the first time the file was included. 1492 bool markIncluded(FileEntryRef File) { 1493 HeaderInfo.getFileInfo(File).IsLocallyIncluded = true; 1494 return IncludedFiles.insert(File).second; 1495 } 1496 1497 /// Return true if this header has already been included. 1498 bool alreadyIncluded(FileEntryRef File) const { 1499 HeaderInfo.getFileInfo(File); 1500 return IncludedFiles.count(File); 1501 } 1502 1503 /// Get the set of included files. 1504 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } 1505 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } 1506 1507 /// Return the name of the macro defined before \p Loc that has 1508 /// spelling \p Tokens. If there are multiple macros with same spelling, 1509 /// return the last one defined. 1510 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1511 ArrayRef<TokenValue> Tokens) const; 1512 1513 /// Get the predefines for this processor. 1514 /// Used by some third-party tools to inspect and add predefines (see 1515 /// https://github.com/llvm/llvm-project/issues/57483). 1516 const std::string &getPredefines() const { return Predefines; } 1517 1518 /// Set the predefines for this Preprocessor. 1519 /// 1520 /// These predefines are automatically injected when parsing the main file. 1521 void setPredefines(std::string P) { Predefines = std::move(P); } 1522 1523 /// Return information about the specified preprocessor 1524 /// identifier token. 1525 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1526 return &Identifiers.get(Name); 1527 } 1528 1529 /// Add the specified pragma handler to this preprocessor. 1530 /// 1531 /// If \p Namespace is non-null, then it is a token required to exist on the 1532 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1533 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); 1534 void AddPragmaHandler(PragmaHandler *Handler) { 1535 AddPragmaHandler(StringRef(), Handler); 1536 } 1537 1538 /// Remove the specific pragma handler from this preprocessor. 1539 /// 1540 /// If \p Namespace is non-null, then it should be the namespace that 1541 /// \p Handler was added to. It is an error to remove a handler that 1542 /// has not been registered. 1543 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); 1544 void RemovePragmaHandler(PragmaHandler *Handler) { 1545 RemovePragmaHandler(StringRef(), Handler); 1546 } 1547 1548 /// Install empty handlers for all pragmas (making them ignored). 1549 void IgnorePragmas(); 1550 1551 /// Set empty line handler. 1552 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } 1553 1554 EmptylineHandler *getEmptylineHandler() const { return Emptyline; } 1555 1556 /// Add the specified comment handler to the preprocessor. 1557 void addCommentHandler(CommentHandler *Handler); 1558 1559 /// Remove the specified comment handler. 1560 /// 1561 /// It is an error to remove a handler that has not been registered. 1562 void removeCommentHandler(CommentHandler *Handler); 1563 1564 /// Set the code completion handler to the given object. 1565 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1566 CodeComplete = &Handler; 1567 } 1568 1569 /// Retrieve the current code-completion handler. 1570 CodeCompletionHandler *getCodeCompletionHandler() const { 1571 return CodeComplete; 1572 } 1573 1574 /// Clear out the code completion handler. 1575 void clearCodeCompletionHandler() { 1576 CodeComplete = nullptr; 1577 } 1578 1579 /// Hook used by the lexer to invoke the "included file" code 1580 /// completion point. 1581 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1582 1583 /// Hook used by the lexer to invoke the "natural language" code 1584 /// completion point. 1585 void CodeCompleteNaturalLanguage(); 1586 1587 /// Set the code completion token for filtering purposes. 1588 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1589 CodeCompletionII = Filter; 1590 } 1591 1592 /// Set the code completion token range for detecting replacement range later 1593 /// on. 1594 void setCodeCompletionTokenRange(const SourceLocation Start, 1595 const SourceLocation End) { 1596 CodeCompletionTokenRange = {Start, End}; 1597 } 1598 SourceRange getCodeCompletionTokenRange() const { 1599 return CodeCompletionTokenRange; 1600 } 1601 1602 /// Get the code completion token for filtering purposes. 1603 StringRef getCodeCompletionFilter() { 1604 if (CodeCompletionII) 1605 return CodeCompletionII->getName(); 1606 return {}; 1607 } 1608 1609 /// Retrieve the preprocessing record, or NULL if there is no 1610 /// preprocessing record. 1611 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1612 1613 /// Create a new preprocessing record, which will keep track of 1614 /// all macro expansions, macro definitions, etc. 1615 void createPreprocessingRecord(); 1616 1617 /// Returns true if the FileEntry is the PCH through header. 1618 bool isPCHThroughHeader(const FileEntry *FE); 1619 1620 /// True if creating a PCH with a through header. 1621 bool creatingPCHWithThroughHeader(); 1622 1623 /// True if using a PCH with a through header. 1624 bool usingPCHWithThroughHeader(); 1625 1626 /// True if creating a PCH with a #pragma hdrstop. 1627 bool creatingPCHWithPragmaHdrStop(); 1628 1629 /// True if using a PCH with a #pragma hdrstop. 1630 bool usingPCHWithPragmaHdrStop(); 1631 1632 /// Skip tokens until after the #include of the through header or 1633 /// until after a #pragma hdrstop. 1634 void SkipTokensWhileUsingPCH(); 1635 1636 /// Process directives while skipping until the through header or 1637 /// #pragma hdrstop is found. 1638 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1639 SourceLocation HashLoc); 1640 1641 /// Enter the specified FileID as the main source file, 1642 /// which implicitly adds the builtin defines etc. 1643 void EnterMainSourceFile(); 1644 1645 /// Inform the preprocessor callbacks that processing is complete. 1646 void EndSourceFile(); 1647 1648 /// Add a source file to the top of the include stack and 1649 /// start lexing tokens from it instead of the current buffer. 1650 /// 1651 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1652 bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, 1653 SourceLocation Loc, bool IsFirstIncludeOfFile = true); 1654 1655 /// Add a Macro to the top of the include stack and start lexing 1656 /// tokens from it instead of the current buffer. 1657 /// 1658 /// \param Args specifies the tokens input to a function-like macro. 1659 /// \param ILEnd specifies the location of the ')' for a function-like macro 1660 /// or the identifier for an object-like macro. 1661 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1662 MacroArgs *Args); 1663 1664 private: 1665 /// Add a "macro" context to the top of the include stack, 1666 /// which will cause the lexer to start returning the specified tokens. 1667 /// 1668 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1669 /// will not be subject to further macro expansion. Otherwise, these tokens 1670 /// will be re-macro-expanded when/if expansion is enabled. 1671 /// 1672 /// If \p OwnsTokens is false, this method assumes that the specified stream 1673 /// of tokens has a permanent owner somewhere, so they do not need to be 1674 /// copied. If it is true, it assumes the array of tokens is allocated with 1675 /// \c new[] and the Preprocessor will delete[] it. 1676 /// 1677 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1678 /// set, see the flag documentation for details. 1679 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1680 bool DisableMacroExpansion, bool OwnsTokens, 1681 bool IsReinject); 1682 1683 public: 1684 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1685 bool DisableMacroExpansion, bool IsReinject) { 1686 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1687 IsReinject); 1688 } 1689 1690 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1691 bool IsReinject) { 1692 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1693 IsReinject); 1694 } 1695 1696 /// Pop the current lexer/macro exp off the top of the lexer stack. 1697 /// 1698 /// This should only be used in situations where the current state of the 1699 /// top-of-stack lexer is known. 1700 void RemoveTopOfLexerStack(); 1701 1702 /// From the point that this method is called, and until 1703 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1704 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1705 /// make the Preprocessor re-lex the same tokens. 1706 /// 1707 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1708 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1709 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1710 /// 1711 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1712 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1713 /// tokens will continue indefinitely. 1714 /// 1715 /// \param Unannotated Whether token annotations are reverted upon calling 1716 /// Backtrack(). 1717 void EnableBacktrackAtThisPos(bool Unannotated = false); 1718 1719 private: 1720 std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos(); 1721 1722 CachedTokensTy PopUnannotatedBacktrackTokens(); 1723 1724 public: 1725 /// Disable the last EnableBacktrackAtThisPos call. 1726 void CommitBacktrackedTokens(); 1727 1728 /// Make Preprocessor re-lex the tokens that were lexed since 1729 /// EnableBacktrackAtThisPos() was previously called. 1730 void Backtrack(); 1731 1732 /// True if EnableBacktrackAtThisPos() was called and 1733 /// caching of tokens is on. 1734 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1735 1736 /// True if EnableBacktrackAtThisPos() was called and 1737 /// caching of unannotated tokens is on. 1738 bool isUnannotatedBacktrackEnabled() const { 1739 return !UnannotatedBacktrackTokens.empty(); 1740 } 1741 1742 /// Lex the next token for this preprocessor. 1743 void Lex(Token &Result); 1744 1745 /// Lex all tokens for this preprocessor until (and excluding) end of file. 1746 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr); 1747 1748 /// Lex a token, forming a header-name token if possible. 1749 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1750 1751 /// Lex the parameters for an #embed directive, returns nullopt on error. 1752 std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, 1753 bool ForHasEmbed); 1754 1755 bool LexAfterModuleImport(Token &Result); 1756 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1757 1758 void makeModuleVisible(Module *M, SourceLocation Loc); 1759 1760 SourceLocation getModuleImportLoc(Module *M) const { 1761 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1762 } 1763 1764 /// Lex a string literal, which may be the concatenation of multiple 1765 /// string literals and may even come from macro expansion. 1766 /// \returns true on success, false if a error diagnostic has been generated. 1767 bool LexStringLiteral(Token &Result, std::string &String, 1768 const char *DiagnosticTag, bool AllowMacroExpansion) { 1769 if (AllowMacroExpansion) 1770 Lex(Result); 1771 else 1772 LexUnexpandedToken(Result); 1773 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1774 AllowMacroExpansion); 1775 } 1776 1777 /// Complete the lexing of a string literal where the first token has 1778 /// already been lexed (see LexStringLiteral). 1779 bool FinishLexStringLiteral(Token &Result, std::string &String, 1780 const char *DiagnosticTag, 1781 bool AllowMacroExpansion); 1782 1783 /// Lex a token. If it's a comment, keep lexing until we get 1784 /// something not a comment. 1785 /// 1786 /// This is useful in -E -C mode where comments would foul up preprocessor 1787 /// directive handling. 1788 void LexNonComment(Token &Result) { 1789 do 1790 Lex(Result); 1791 while (Result.getKind() == tok::comment); 1792 } 1793 1794 /// Just like Lex, but disables macro expansion of identifier tokens. 1795 void LexUnexpandedToken(Token &Result) { 1796 // Disable macro expansion. 1797 bool OldVal = DisableMacroExpansion; 1798 DisableMacroExpansion = true; 1799 // Lex the token. 1800 Lex(Result); 1801 1802 // Reenable it. 1803 DisableMacroExpansion = OldVal; 1804 } 1805 1806 /// Like LexNonComment, but this disables macro expansion of 1807 /// identifier tokens. 1808 void LexUnexpandedNonComment(Token &Result) { 1809 do 1810 LexUnexpandedToken(Result); 1811 while (Result.getKind() == tok::comment); 1812 } 1813 1814 /// Parses a simple integer literal to get its numeric value. Floating 1815 /// point literals and user defined literals are rejected. Used primarily to 1816 /// handle pragmas that accept integer arguments. 1817 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1818 1819 /// Disables macro expansion everywhere except for preprocessor directives. 1820 void SetMacroExpansionOnlyInDirectives() { 1821 DisableMacroExpansion = true; 1822 MacroExpansionInDirectivesOverride = true; 1823 } 1824 1825 /// Peeks ahead N tokens and returns that token without consuming any 1826 /// tokens. 1827 /// 1828 /// LookAhead(0) returns the next token that would be returned by Lex(), 1829 /// LookAhead(1) returns the token after it, etc. This returns normal 1830 /// tokens after phase 5. As such, it is equivalent to using 1831 /// 'Lex', not 'LexUnexpandedToken'. 1832 const Token &LookAhead(unsigned N) { 1833 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1834 if (CachedLexPos + N < CachedTokens.size()) 1835 return CachedTokens[CachedLexPos+N]; 1836 else 1837 return PeekAhead(N+1); 1838 } 1839 1840 /// When backtracking is enabled and tokens are cached, 1841 /// this allows to revert a specific number of tokens. 1842 /// 1843 /// Note that the number of tokens being reverted should be up to the last 1844 /// backtrack position, not more. 1845 void RevertCachedTokens(unsigned N) { 1846 assert(isBacktrackEnabled() && 1847 "Should only be called when tokens are cached for backtracking"); 1848 assert(signed(CachedLexPos) - signed(N) >= 1849 signed(LastBacktrackPos().first) && 1850 "Should revert tokens up to the last backtrack position, not more"); 1851 assert(signed(CachedLexPos) - signed(N) >= 0 && 1852 "Corrupted backtrack positions ?"); 1853 CachedLexPos -= N; 1854 } 1855 1856 /// Enters a token in the token stream to be lexed next. 1857 /// 1858 /// If BackTrack() is called afterwards, the token will remain at the 1859 /// insertion point. 1860 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1861 /// flag set. See the flag documentation for details. 1862 void EnterToken(const Token &Tok, bool IsReinject) { 1863 if (LexLevel) { 1864 // It's not correct in general to enter caching lex mode while in the 1865 // middle of a nested lexing action. 1866 auto TokCopy = std::make_unique<Token[]>(1); 1867 TokCopy[0] = Tok; 1868 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1869 } else { 1870 EnterCachingLexMode(); 1871 assert(IsReinject && "new tokens in the middle of cached stream"); 1872 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1873 } 1874 } 1875 1876 /// We notify the Preprocessor that if it is caching tokens (because 1877 /// backtrack is enabled) it should replace the most recent cached tokens 1878 /// with the given annotation token. This function has no effect if 1879 /// backtracking is not enabled. 1880 /// 1881 /// Note that the use of this function is just for optimization, so that the 1882 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1883 /// invoked. 1884 void AnnotateCachedTokens(const Token &Tok) { 1885 assert(Tok.isAnnotation() && "Expected annotation token"); 1886 if (CachedLexPos != 0 && isBacktrackEnabled()) 1887 AnnotatePreviousCachedTokens(Tok); 1888 } 1889 1890 /// Get the location of the last cached token, suitable for setting the end 1891 /// location of an annotation token. 1892 SourceLocation getLastCachedTokenLocation() const { 1893 assert(CachedLexPos != 0); 1894 return CachedTokens[CachedLexPos-1].getLastLoc(); 1895 } 1896 1897 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1898 /// CachedTokens. 1899 bool IsPreviousCachedToken(const Token &Tok) const; 1900 1901 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1902 /// in \p NewToks. 1903 /// 1904 /// Useful when a token needs to be split in smaller ones and CachedTokens 1905 /// most recent token must to be updated to reflect that. 1906 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1907 1908 /// Replace the last token with an annotation token. 1909 /// 1910 /// Like AnnotateCachedTokens(), this routine replaces an 1911 /// already-parsed (and resolved) token with an annotation 1912 /// token. However, this routine only replaces the last token with 1913 /// the annotation token; it does not affect any other cached 1914 /// tokens. This function has no effect if backtracking is not 1915 /// enabled. 1916 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1917 assert(Tok.isAnnotation() && "Expected annotation token"); 1918 if (CachedLexPos != 0 && isBacktrackEnabled()) 1919 CachedTokens[CachedLexPos-1] = Tok; 1920 } 1921 1922 /// Enter an annotation token into the token stream. 1923 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1924 void *AnnotationVal); 1925 1926 /// Determine whether it's possible for a future call to Lex to produce an 1927 /// annotation token created by a previous call to EnterAnnotationToken. 1928 bool mightHavePendingAnnotationTokens() { 1929 return CurLexerCallback != CLK_Lexer; 1930 } 1931 1932 /// Update the current token to represent the provided 1933 /// identifier, in order to cache an action performed by typo correction. 1934 void TypoCorrectToken(const Token &Tok) { 1935 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1936 if (CachedLexPos != 0 && isBacktrackEnabled()) 1937 CachedTokens[CachedLexPos-1] = Tok; 1938 } 1939 1940 /// Recompute the current lexer kind based on the CurLexer/ 1941 /// CurTokenLexer pointers. 1942 void recomputeCurLexerKind(); 1943 1944 /// Returns true if incremental processing is enabled 1945 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1946 1947 /// Enables the incremental processing 1948 void enableIncrementalProcessing(bool value = true) { 1949 IncrementalProcessing = value; 1950 } 1951 1952 /// Specify the point at which code-completion will be performed. 1953 /// 1954 /// \param File the file in which code completion should occur. If 1955 /// this file is included multiple times, code-completion will 1956 /// perform completion the first time it is included. If NULL, this 1957 /// function clears out the code-completion point. 1958 /// 1959 /// \param Line the line at which code completion should occur 1960 /// (1-based). 1961 /// 1962 /// \param Column the column at which code completion should occur 1963 /// (1-based). 1964 /// 1965 /// \returns true if an error occurred, false otherwise. 1966 bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, 1967 unsigned Column); 1968 1969 /// Determine if we are performing code completion. 1970 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1971 1972 /// Returns the location of the code-completion point. 1973 /// 1974 /// Returns an invalid location if code-completion is not enabled or the file 1975 /// containing the code-completion point has not been lexed yet. 1976 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1977 1978 /// Returns the start location of the file of code-completion point. 1979 /// 1980 /// Returns an invalid location if code-completion is not enabled or the file 1981 /// containing the code-completion point has not been lexed yet. 1982 SourceLocation getCodeCompletionFileLoc() const { 1983 return CodeCompletionFileLoc; 1984 } 1985 1986 /// Returns true if code-completion is enabled and we have hit the 1987 /// code-completion point. 1988 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1989 1990 /// Note that we hit the code-completion point. 1991 void setCodeCompletionReached() { 1992 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1993 CodeCompletionReached = true; 1994 // Silence any diagnostics that occur after we hit the code-completion. 1995 getDiagnostics().setSuppressAllDiagnostics(true); 1996 } 1997 1998 /// The location of the currently-active \#pragma clang 1999 /// arc_cf_code_audited begin. 2000 /// 2001 /// Returns an invalid location if there is no such pragma active. 2002 std::pair<IdentifierInfo *, SourceLocation> 2003 getPragmaARCCFCodeAuditedInfo() const { 2004 return PragmaARCCFCodeAuditedInfo; 2005 } 2006 2007 /// Set the location of the currently-active \#pragma clang 2008 /// arc_cf_code_audited begin. An invalid location ends the pragma. 2009 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 2010 SourceLocation Loc) { 2011 PragmaARCCFCodeAuditedInfo = {Ident, Loc}; 2012 } 2013 2014 /// The location of the currently-active \#pragma clang 2015 /// assume_nonnull begin. 2016 /// 2017 /// Returns an invalid location if there is no such pragma active. 2018 SourceLocation getPragmaAssumeNonNullLoc() const { 2019 return PragmaAssumeNonNullLoc; 2020 } 2021 2022 /// Set the location of the currently-active \#pragma clang 2023 /// assume_nonnull begin. An invalid location ends the pragma. 2024 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 2025 PragmaAssumeNonNullLoc = Loc; 2026 } 2027 2028 /// Get the location of the recorded unterminated \#pragma clang 2029 /// assume_nonnull begin in the preamble, if one exists. 2030 /// 2031 /// Returns an invalid location if the premable did not end with 2032 /// such a pragma active or if there is no recorded preamble. 2033 SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const { 2034 return PreambleRecordedPragmaAssumeNonNullLoc; 2035 } 2036 2037 /// Record the location of the unterminated \#pragma clang 2038 /// assume_nonnull begin in the preamble. 2039 void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) { 2040 PreambleRecordedPragmaAssumeNonNullLoc = Loc; 2041 } 2042 2043 /// Set the directory in which the main file should be considered 2044 /// to have been found, if it is not a real file. 2045 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; } 2046 2047 /// Instruct the preprocessor to skip part of the main source file. 2048 /// 2049 /// \param Bytes The number of bytes in the preamble to skip. 2050 /// 2051 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 2052 /// start of a line. 2053 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 2054 SkipMainFilePreamble.first = Bytes; 2055 SkipMainFilePreamble.second = StartOfLine; 2056 } 2057 2058 /// Forwarding function for diagnostics. This emits a diagnostic at 2059 /// the specified Token's location, translating the token's start 2060 /// position in the current buffer into a SourcePosition object for rendering. 2061 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 2062 return Diags->Report(Loc, DiagID); 2063 } 2064 2065 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 2066 return Diags->Report(Tok.getLocation(), DiagID); 2067 } 2068 2069 /// Return the 'spelling' of the token at the given 2070 /// location; does not go up to the spelling location or down to the 2071 /// expansion location. 2072 /// 2073 /// \param buffer A buffer which will be used only if the token requires 2074 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 2075 /// \param invalid If non-null, will be set \c true if an error occurs. 2076 StringRef getSpelling(SourceLocation loc, 2077 SmallVectorImpl<char> &buffer, 2078 bool *invalid = nullptr) const { 2079 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 2080 } 2081 2082 /// Return the 'spelling' of the Tok token. 2083 /// 2084 /// The spelling of a token is the characters used to represent the token in 2085 /// the source file after trigraph expansion and escaped-newline folding. In 2086 /// particular, this wants to get the true, uncanonicalized, spelling of 2087 /// things like digraphs, UCNs, etc. 2088 /// 2089 /// \param Invalid If non-null, will be set \c true if an error occurs. 2090 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 2091 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 2092 } 2093 2094 /// Get the spelling of a token into a preallocated buffer, instead 2095 /// of as an std::string. 2096 /// 2097 /// The caller is required to allocate enough space for the token, which is 2098 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 2099 /// actual result is returned. 2100 /// 2101 /// Note that this method may do two possible things: it may either fill in 2102 /// the buffer specified with characters, or it may *change the input pointer* 2103 /// to point to a constant buffer with the data already in it (avoiding a 2104 /// copy). The caller is not allowed to modify the returned buffer pointer 2105 /// if an internal buffer is returned. 2106 unsigned getSpelling(const Token &Tok, const char *&Buffer, 2107 bool *Invalid = nullptr) const { 2108 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 2109 } 2110 2111 /// Get the spelling of a token into a SmallVector. 2112 /// 2113 /// Note that the returned StringRef may not point to the 2114 /// supplied buffer if a copy can be avoided. 2115 StringRef getSpelling(const Token &Tok, 2116 SmallVectorImpl<char> &Buffer, 2117 bool *Invalid = nullptr) const; 2118 2119 /// Relex the token at the specified location. 2120 /// \returns true if there was a failure, false on success. 2121 bool getRawToken(SourceLocation Loc, Token &Result, 2122 bool IgnoreWhiteSpace = false) { 2123 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 2124 } 2125 2126 /// Given a Token \p Tok that is a numeric constant with length 1, 2127 /// return the value of constant as an unsigned 8-bit integer. 2128 uint8_t 2129 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 2130 bool *Invalid = nullptr) const { 2131 assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) && 2132 Tok.getLength() == 1 && "Called on unsupported token"); 2133 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 2134 2135 // If the token is carrying a literal data pointer, just use it. 2136 if (const char *D = Tok.getLiteralData()) 2137 return (Tok.getKind() == tok::binary_data) ? *D : *D - '0'; 2138 2139 assert(Tok.is(tok::numeric_constant) && "binary data with no data"); 2140 // Otherwise, fall back on getCharacterData, which is slower, but always 2141 // works. 2142 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0'; 2143 } 2144 2145 /// Retrieve the name of the immediate macro expansion. 2146 /// 2147 /// This routine starts from a source location, and finds the name of the 2148 /// macro responsible for its immediate expansion. It looks through any 2149 /// intervening macro argument expansions to compute this. It returns a 2150 /// StringRef that refers to the SourceManager-owned buffer of the source 2151 /// where that macro name is spelled. Thus, the result shouldn't out-live 2152 /// the SourceManager. 2153 StringRef getImmediateMacroName(SourceLocation Loc) { 2154 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 2155 } 2156 2157 /// Plop the specified string into a scratch buffer and set the 2158 /// specified token's location and length to it. 2159 /// 2160 /// If specified, the source location provides a location of the expansion 2161 /// point of the token. 2162 void CreateString(StringRef Str, Token &Tok, 2163 SourceLocation ExpansionLocStart = SourceLocation(), 2164 SourceLocation ExpansionLocEnd = SourceLocation()); 2165 2166 /// Split the first Length characters out of the token starting at TokLoc 2167 /// and return a location pointing to the split token. Re-lexing from the 2168 /// split token will return the split token rather than the original. 2169 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 2170 2171 /// Computes the source location just past the end of the 2172 /// token at this source location. 2173 /// 2174 /// This routine can be used to produce a source location that 2175 /// points just past the end of the token referenced by \p Loc, and 2176 /// is generally used when a diagnostic needs to point just after a 2177 /// token where it expected something different that it received. If 2178 /// the returned source location would not be meaningful (e.g., if 2179 /// it points into a macro), this routine returns an invalid 2180 /// source location. 2181 /// 2182 /// \param Offset an offset from the end of the token, where the source 2183 /// location should refer to. The default offset (0) produces a source 2184 /// location pointing just past the end of the token; an offset of 1 produces 2185 /// a source location pointing to the last character in the token, etc. 2186 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 2187 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 2188 } 2189 2190 /// Returns true if the given MacroID location points at the first 2191 /// token of the macro expansion. 2192 /// 2193 /// \param MacroBegin If non-null and function returns true, it is set to 2194 /// begin location of the macro. 2195 bool isAtStartOfMacroExpansion(SourceLocation loc, 2196 SourceLocation *MacroBegin = nullptr) const { 2197 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 2198 MacroBegin); 2199 } 2200 2201 /// Returns true if the given MacroID location points at the last 2202 /// token of the macro expansion. 2203 /// 2204 /// \param MacroEnd If non-null and function returns true, it is set to 2205 /// end location of the macro. 2206 bool isAtEndOfMacroExpansion(SourceLocation loc, 2207 SourceLocation *MacroEnd = nullptr) const { 2208 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 2209 } 2210 2211 /// Print the token to stderr, used for debugging. 2212 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 2213 void DumpLocation(SourceLocation Loc) const; 2214 void DumpMacro(const MacroInfo &MI) const; 2215 void dumpMacroInfo(const IdentifierInfo *II); 2216 2217 /// Given a location that specifies the start of a 2218 /// token, return a new location that specifies a character within the token. 2219 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 2220 unsigned Char) const { 2221 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 2222 } 2223 2224 /// Increment the counters for the number of token paste operations 2225 /// performed. 2226 /// 2227 /// If fast was specified, this is a 'fast paste' case we handled. 2228 void IncrementPasteCounter(bool isFast) { 2229 if (isFast) 2230 ++NumFastTokenPaste; 2231 else 2232 ++NumTokenPaste; 2233 } 2234 2235 void PrintStats(); 2236 2237 size_t getTotalMemory() const; 2238 2239 /// When the macro expander pastes together a comment (/##/) in Microsoft 2240 /// mode, this method handles updating the current state, returning the 2241 /// token on the next source line. 2242 void HandleMicrosoftCommentPaste(Token &Tok); 2243 2244 //===--------------------------------------------------------------------===// 2245 // Preprocessor callback methods. These are invoked by a lexer as various 2246 // directives and events are found. 2247 2248 /// Given a tok::raw_identifier token, look up the 2249 /// identifier information for the token and install it into the token, 2250 /// updating the token kind accordingly. 2251 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 2252 2253 private: 2254 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 2255 2256 public: 2257 /// Specifies the reason for poisoning an identifier. 2258 /// 2259 /// If that identifier is accessed while poisoned, then this reason will be 2260 /// used instead of the default "poisoned" diagnostic. 2261 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 2262 2263 /// Display reason for poisoned identifier. 2264 void HandlePoisonedIdentifier(Token & Identifier); 2265 2266 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 2267 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 2268 if(II->isPoisoned()) { 2269 HandlePoisonedIdentifier(Identifier); 2270 } 2271 } 2272 } 2273 2274 /// Determine whether the next preprocessor token to be 2275 /// lexed is a '('. If so, consume the token and return true, if not, this 2276 /// method should have no observable side-effect on the lexed tokens. 2277 bool isNextPPTokenLParen(); 2278 2279 private: 2280 /// Identifiers used for SEH handling in Borland. These are only 2281 /// allowed in particular circumstances 2282 // __except block 2283 IdentifierInfo *Ident__exception_code, 2284 *Ident___exception_code, 2285 *Ident_GetExceptionCode; 2286 // __except filter expression 2287 IdentifierInfo *Ident__exception_info, 2288 *Ident___exception_info, 2289 *Ident_GetExceptionInfo; 2290 // __finally 2291 IdentifierInfo *Ident__abnormal_termination, 2292 *Ident___abnormal_termination, 2293 *Ident_AbnormalTermination; 2294 2295 const char *getCurLexerEndPos(); 2296 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 2297 2298 public: 2299 void PoisonSEHIdentifiers(bool Poison = true); // Borland 2300 2301 /// Callback invoked when the lexer reads an identifier and has 2302 /// filled in the tokens IdentifierInfo member. 2303 /// 2304 /// This callback potentially macro expands it or turns it into a named 2305 /// token (like 'for'). 2306 /// 2307 /// \returns true if we actually computed a token, false if we need to 2308 /// lex again. 2309 bool HandleIdentifier(Token &Identifier); 2310 2311 /// Callback invoked when the lexer hits the end of the current file. 2312 /// 2313 /// This either returns the EOF token and returns true, or 2314 /// pops a level off the include stack and returns false, at which point the 2315 /// client should call lex again. 2316 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 2317 2318 /// Callback invoked when the current TokenLexer hits the end of its 2319 /// token stream. 2320 bool HandleEndOfTokenLexer(Token &Result); 2321 2322 /// Callback invoked when the lexer sees a # token at the start of a 2323 /// line. 2324 /// 2325 /// This consumes the directive, modifies the lexer/preprocessor state, and 2326 /// advances the lexer(s) so that the next token read is the correct one. 2327 void HandleDirective(Token &Result); 2328 2329 /// Ensure that the next token is a tok::eod token. 2330 /// 2331 /// If not, emit a diagnostic and consume up until the eod. 2332 /// If \p EnableMacros is true, then we consider macros that expand to zero 2333 /// tokens as being ok. 2334 /// 2335 /// \return The location of the end of the directive (the terminating 2336 /// newline). 2337 SourceLocation CheckEndOfDirective(const char *DirType, 2338 bool EnableMacros = false); 2339 2340 /// Read and discard all tokens remaining on the current line until 2341 /// the tok::eod token is found. Returns the range of the skipped tokens. 2342 SourceRange DiscardUntilEndOfDirective() { 2343 Token Tmp; 2344 return DiscardUntilEndOfDirective(Tmp); 2345 } 2346 2347 /// Same as above except retains the token that was found. 2348 SourceRange DiscardUntilEndOfDirective(Token &Tok); 2349 2350 /// Returns true if the preprocessor has seen a use of 2351 /// __DATE__ or __TIME__ in the file so far. 2352 bool SawDateOrTime() const { 2353 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 2354 } 2355 unsigned getCounterValue() const { return CounterValue; } 2356 void setCounterValue(unsigned V) { CounterValue = V; } 2357 2358 LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const { 2359 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine && 2360 "FPEvalMethod should be set either from command line or from the " 2361 "target info"); 2362 return CurrentFPEvalMethod; 2363 } 2364 2365 LangOptions::FPEvalMethodKind getTUFPEvalMethod() const { 2366 return TUFPEvalMethod; 2367 } 2368 2369 SourceLocation getLastFPEvalPragmaLocation() const { 2370 return LastFPEvalPragmaLocation; 2371 } 2372 2373 void setCurrentFPEvalMethod(SourceLocation PragmaLoc, 2374 LangOptions::FPEvalMethodKind Val) { 2375 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2376 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2377 // This is the location of the '#pragma float_control" where the 2378 // execution state is modifed. 2379 LastFPEvalPragmaLocation = PragmaLoc; 2380 CurrentFPEvalMethod = Val; 2381 TUFPEvalMethod = Val; 2382 } 2383 2384 void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2385 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2386 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2387 TUFPEvalMethod = Val; 2388 } 2389 2390 /// Retrieves the module that we're currently building, if any. 2391 Module *getCurrentModule(); 2392 2393 /// Retrieves the module whose implementation we're current compiling, if any. 2394 Module *getCurrentModuleImplementation(); 2395 2396 /// If we are preprocessing a named module. 2397 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); } 2398 2399 /// If we are proprocessing a named interface unit. 2400 /// Note that a module implementation partition is not considered as an 2401 /// named interface unit here although it is importable 2402 /// to ease the parsing. 2403 bool isInNamedInterfaceUnit() const { 2404 return ModuleDeclState.isNamedInterface(); 2405 } 2406 2407 /// Get the named module name we're preprocessing. 2408 /// Requires we're preprocessing a named module. 2409 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); } 2410 2411 /// If we are implementing an implementation module unit. 2412 /// Note that the module implementation partition is not considered as an 2413 /// implementation unit. 2414 bool isInImplementationUnit() const { 2415 return ModuleDeclState.isImplementationUnit(); 2416 } 2417 2418 /// If we're importing a standard C++20 Named Modules. 2419 bool isInImportingCXXNamedModules() const { 2420 // NamedModuleImportPath will be non-empty only if we're importing 2421 // Standard C++ named modules. 2422 return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && 2423 !IsAtImport; 2424 } 2425 2426 /// Allocate a new MacroInfo object with the provided SourceLocation. 2427 MacroInfo *AllocateMacroInfo(SourceLocation L); 2428 2429 /// Turn the specified lexer token into a fully checked and spelled 2430 /// filename, e.g. as an operand of \#include. 2431 /// 2432 /// The caller is expected to provide a buffer that is large enough to hold 2433 /// the spelling of the filename, but is also expected to handle the case 2434 /// when this method decides to use a different buffer. 2435 /// 2436 /// \returns true if the input filename was in <>'s or false if it was 2437 /// in ""'s. 2438 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 2439 2440 /// Given a "foo" or \<foo> reference, look up the indicated file. 2441 /// 2442 /// Returns std::nullopt on failure. \p isAngled indicates whether the file 2443 /// reference is for system \#include's or not (i.e. using <> instead of ""). 2444 OptionalFileEntryRef 2445 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 2446 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 2447 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath, 2448 SmallVectorImpl<char> *RelativePath, 2449 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 2450 bool *IsFrameworkFound, bool SkipCache = false, 2451 bool OpenFile = true, bool CacheFailures = true); 2452 2453 /// Given a "Filename" or \<Filename> reference, look up the indicated embed 2454 /// resource. \p isAngled indicates whether the file reference is for 2455 /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile 2456 /// is true, the file looked up is opened for reading, otherwise it only 2457 /// validates that the file exists. Quoted filenames are looked up relative 2458 /// to \p LookupFromFile if it is nonnull. 2459 /// 2460 /// Returns std::nullopt on failure. 2461 OptionalFileEntryRef 2462 LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, 2463 const FileEntry *LookupFromFile = nullptr); 2464 2465 /// Return true if we're in the top-level file, not in a \#include. 2466 bool isInPrimaryFile() const; 2467 2468 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2469 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2470 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2471 2472 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2473 bool *ShadowFlag = nullptr); 2474 2475 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2476 Module *LeaveSubmodule(bool ForPragma); 2477 2478 private: 2479 friend void TokenLexer::ExpandFunctionArguments(); 2480 2481 void PushIncludeMacroStack() { 2482 assert(CurLexerCallback != CLK_CachingLexer && 2483 "cannot push a caching lexer"); 2484 IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule, 2485 std::move(CurLexer), CurPPLexer, 2486 std::move(CurTokenLexer), CurDirLookup); 2487 CurPPLexer = nullptr; 2488 } 2489 2490 void PopIncludeMacroStack() { 2491 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2492 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2493 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2494 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2495 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2496 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback; 2497 IncludeMacroStack.pop_back(); 2498 } 2499 2500 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2501 2502 /// Determine whether we need to create module macros for #defines in the 2503 /// current context. 2504 bool needModuleMacros() const; 2505 2506 /// Update the set of active module macros and ambiguity flag for a module 2507 /// macro name. 2508 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2509 2510 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2511 SourceLocation Loc); 2512 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2513 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2514 bool isPublic); 2515 2516 /// Lex and validate a macro name, which occurs after a 2517 /// \#define or \#undef. 2518 /// 2519 /// \param MacroNameTok Token that represents the name defined or undefined. 2520 /// \param IsDefineUndef Kind if preprocessor directive. 2521 /// \param ShadowFlag Points to flag that is set if macro name shadows 2522 /// a keyword. 2523 /// 2524 /// This emits a diagnostic, sets the token kind to eod, 2525 /// and discards the rest of the macro line if the macro name is invalid. 2526 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2527 bool *ShadowFlag = nullptr); 2528 2529 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2530 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2531 /// doing so performs certain validity checks including (but not limited to): 2532 /// - # (stringization) is followed by a macro parameter 2533 /// \param MacroNameTok - Token that represents the macro name 2534 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2535 /// 2536 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2537 /// returns a nullptr if an invalid sequence of tokens is encountered. 2538 MacroInfo *ReadOptionalMacroParameterListAndBody( 2539 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2540 2541 /// The ( starting an argument list of a macro definition has just been read. 2542 /// Lex the rest of the parameters and the closing ), updating \p MI with 2543 /// what we learn and saving in \p LastTok the last token read. 2544 /// Return true if an error occurs parsing the arg list. 2545 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2546 2547 /// Provide a suggestion for a typoed directive. If there is no typo, then 2548 /// just skip suggesting. 2549 /// 2550 /// \param Tok - Token that represents the directive 2551 /// \param Directive - String reference for the directive name 2552 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const; 2553 2554 /// We just read a \#if or related directive and decided that the 2555 /// subsequent tokens are in the \#if'd out portion of the 2556 /// file. Lex the rest of the file, until we see an \#endif. If \p 2557 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2558 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2559 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2560 /// already seen one so a \#else directive is a duplicate. When this returns, 2561 /// the caller can lex the first valid token. 2562 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2563 SourceLocation IfTokenLoc, 2564 bool FoundNonSkipPortion, bool FoundElse, 2565 SourceLocation ElseLoc = SourceLocation()); 2566 2567 /// Information about the result for evaluating an expression for a 2568 /// preprocessor directive. 2569 struct DirectiveEvalResult { 2570 /// The integral value of the expression. 2571 std::optional<llvm::APSInt> Value; 2572 2573 /// Whether the expression was evaluated as true or not. 2574 bool Conditional; 2575 2576 /// True if the expression contained identifiers that were undefined. 2577 bool IncludedUndefinedIds; 2578 2579 /// The source range for the expression. 2580 SourceRange ExprRange; 2581 }; 2582 2583 /// Evaluate an integer constant expression that may occur after a 2584 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2585 /// 2586 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2587 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, 2588 bool CheckForEoD = true); 2589 2590 /// Evaluate an integer constant expression that may occur after a 2591 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2592 /// 2593 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2594 /// \p EvaluatedDefined will contain the result of whether "defined" appeared 2595 /// in the evaluated expression or not. 2596 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, 2597 Token &Tok, 2598 bool &EvaluatedDefined, 2599 bool CheckForEoD = true); 2600 2601 /// Process a '__has_embed("path" [, ...])' expression. 2602 /// 2603 /// Returns predefined `__STDC_EMBED_*` macro values if 2604 /// successful. 2605 EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II); 2606 2607 /// Process a '__has_include("path")' expression. 2608 /// 2609 /// Returns true if successful. 2610 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II); 2611 2612 /// Process '__has_include_next("path")' expression. 2613 /// 2614 /// Returns true if successful. 2615 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II); 2616 2617 /// Get the directory and file from which to start \#include_next lookup. 2618 std::pair<ConstSearchDirIterator, const FileEntry *> 2619 getIncludeNextStart(const Token &IncludeNextTok) const; 2620 2621 /// Install the standard preprocessor pragmas: 2622 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2623 void RegisterBuiltinPragmas(); 2624 2625 /// RegisterBuiltinMacro - Register the specified identifier in the identifier 2626 /// table and mark it as a builtin macro to be expanded. 2627 IdentifierInfo *RegisterBuiltinMacro(const char *Name) { 2628 // Get the identifier. 2629 IdentifierInfo *Id = getIdentifierInfo(Name); 2630 2631 // Mark it as being a macro that is builtin. 2632 MacroInfo *MI = AllocateMacroInfo(SourceLocation()); 2633 MI->setIsBuiltinMacro(); 2634 appendDefMacroDirective(Id, MI); 2635 return Id; 2636 } 2637 2638 /// Register builtin macros such as __LINE__ with the identifier table. 2639 void RegisterBuiltinMacros(); 2640 2641 /// If an identifier token is read that is to be expanded as a macro, handle 2642 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2643 /// otherwise the caller should lex again. 2644 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2645 2646 /// Cache macro expanded tokens for TokenLexers. 2647 // 2648 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2649 /// going to lex in the cache and when it finishes the tokens are removed 2650 /// from the end of the cache. 2651 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2652 ArrayRef<Token> tokens); 2653 2654 void removeCachedMacroExpandedTokensOfLastLexer(); 2655 2656 /// After reading "MACRO(", this method is invoked to read all of the formal 2657 /// arguments specified for the macro invocation. Returns null on error. 2658 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2659 SourceLocation &MacroEnd); 2660 2661 /// If an identifier token is read that is to be expanded 2662 /// as a builtin macro, handle it and return the next token as 'Tok'. 2663 void ExpandBuiltinMacro(Token &Tok); 2664 2665 /// Read a \c _Pragma directive, slice it up, process it, then 2666 /// return the first token after the directive. 2667 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2668 void Handle_Pragma(Token &Tok); 2669 2670 /// Like Handle_Pragma except the pragma text is not enclosed within 2671 /// a string literal. 2672 void HandleMicrosoft__pragma(Token &Tok); 2673 2674 /// Add a lexer to the top of the include stack and 2675 /// start lexing tokens from it instead of the current buffer. 2676 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir); 2677 2678 /// Set the FileID for the preprocessor predefines. 2679 void setPredefinesFileID(FileID FID) { 2680 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2681 PredefinesFileID = FID; 2682 } 2683 2684 /// Set the FileID for the PCH through header. 2685 void setPCHThroughHeaderFileID(FileID FID); 2686 2687 /// Returns true if we are lexing from a file and not a 2688 /// pragma or a macro. 2689 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2690 return L ? !L->isPragmaLexer() : P != nullptr; 2691 } 2692 2693 static bool IsFileLexer(const IncludeStackInfo& I) { 2694 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2695 } 2696 2697 bool IsFileLexer() const { 2698 return IsFileLexer(CurLexer.get(), CurPPLexer); 2699 } 2700 2701 //===--------------------------------------------------------------------===// 2702 // Caching stuff. 2703 void CachingLex(Token &Result); 2704 2705 bool InCachingLexMode() const { 2706 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2707 // that we are past EOF, not that we are in CachingLex mode. 2708 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2709 } 2710 2711 void EnterCachingLexMode(); 2712 void EnterCachingLexModeUnchecked(); 2713 2714 void ExitCachingLexMode() { 2715 if (InCachingLexMode()) 2716 RemoveTopOfLexerStack(); 2717 } 2718 2719 const Token &PeekAhead(unsigned N); 2720 void AnnotatePreviousCachedTokens(const Token &Tok); 2721 2722 //===--------------------------------------------------------------------===// 2723 /// Handle*Directive - implement the various preprocessor directives. These 2724 /// should side-effect the current preprocessor object so that the next call 2725 /// to Lex() will return the appropriate token next. 2726 void HandleLineDirective(); 2727 void HandleDigitDirective(Token &Tok); 2728 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2729 void HandleIdentSCCSDirective(Token &Tok); 2730 void HandleMacroPublicDirective(Token &Tok); 2731 void HandleMacroPrivateDirective(); 2732 2733 /// An additional notification that can be produced by a header inclusion or 2734 /// import to tell the parser what happened. 2735 struct ImportAction { 2736 enum ActionKind { 2737 None, 2738 ModuleBegin, 2739 ModuleImport, 2740 HeaderUnitImport, 2741 SkippedModuleImport, 2742 Failure, 2743 } Kind; 2744 Module *ModuleForHeader = nullptr; 2745 2746 ImportAction(ActionKind AK, Module *Mod = nullptr) 2747 : Kind(AK), ModuleForHeader(Mod) { 2748 assert((AK == None || Mod || AK == Failure) && 2749 "no module for module action"); 2750 } 2751 }; 2752 2753 OptionalFileEntryRef LookupHeaderIncludeOrImport( 2754 ConstSearchDirIterator *CurDir, StringRef &Filename, 2755 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2756 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2757 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2758 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2759 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2760 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2761 // Binary data inclusion 2762 void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, 2763 const FileEntry *LookupFromFile = nullptr); 2764 void HandleEmbedDirectiveImpl(SourceLocation HashLoc, 2765 const LexEmbedParametersResult &Params, 2766 StringRef BinaryContents); 2767 2768 // File inclusion. 2769 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2770 ConstSearchDirIterator LookupFrom = nullptr, 2771 const FileEntry *LookupFromFile = nullptr); 2772 ImportAction 2773 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2774 Token &FilenameTok, SourceLocation EndLoc, 2775 ConstSearchDirIterator LookupFrom = nullptr, 2776 const FileEntry *LookupFromFile = nullptr); 2777 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2778 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2779 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2780 void HandleMicrosoftImportDirective(Token &Tok); 2781 2782 public: 2783 /// Check that the given module is available, producing a diagnostic if not. 2784 /// \return \c true if the check failed (because the module is not available). 2785 /// \c false if the module appears to be usable. 2786 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2787 const TargetInfo &TargetInfo, 2788 const Module &M, DiagnosticsEngine &Diags); 2789 2790 // Module inclusion testing. 2791 /// Find the module that owns the source or header file that 2792 /// \p Loc points to. If the location is in a file that was included 2793 /// into a module, or is outside any module, returns nullptr. 2794 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual); 2795 2796 /// We want to produce a diagnostic at location IncLoc concerning an 2797 /// unreachable effect at location MLoc (eg, where a desired entity was 2798 /// declared or defined). Determine whether the right way to make MLoc 2799 /// reachable is by #include, and if so, what header should be included. 2800 /// 2801 /// This is not necessarily fast, and might load unexpected module maps, so 2802 /// should only be called by code that intends to produce an error. 2803 /// 2804 /// \param IncLoc The location at which the missing effect was detected. 2805 /// \param MLoc A location within an unimported module at which the desired 2806 /// effect occurred. 2807 /// \return A file that can be #included to provide the desired effect. Null 2808 /// if no such file could be determined or if a #include is not 2809 /// appropriate (eg, if a module should be imported instead). 2810 OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2811 SourceLocation MLoc); 2812 2813 bool isRecordingPreamble() const { 2814 return PreambleConditionalStack.isRecording(); 2815 } 2816 2817 bool hasRecordedPreamble() const { 2818 return PreambleConditionalStack.hasRecordedPreamble(); 2819 } 2820 2821 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2822 return PreambleConditionalStack.getStack(); 2823 } 2824 2825 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2826 PreambleConditionalStack.setStack(s); 2827 } 2828 2829 void setReplayablePreambleConditionalStack( 2830 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) { 2831 PreambleConditionalStack.startReplaying(); 2832 PreambleConditionalStack.setStack(s); 2833 PreambleConditionalStack.SkipInfo = SkipInfo; 2834 } 2835 2836 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2837 return PreambleConditionalStack.SkipInfo; 2838 } 2839 2840 private: 2841 /// After processing predefined file, initialize the conditional stack from 2842 /// the preamble. 2843 void replayPreambleConditionalStack(); 2844 2845 // Macro handling. 2846 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2847 void HandleUndefDirective(); 2848 2849 // Conditional Inclusion. 2850 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2851 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2852 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2853 bool ReadAnyTokensBeforeDirective); 2854 void HandleEndifDirective(Token &EndifToken); 2855 void HandleElseDirective(Token &Result, const Token &HashToken); 2856 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken, 2857 tok::PPKeywordKind Kind); 2858 2859 // Pragmas. 2860 void HandlePragmaDirective(PragmaIntroducer Introducer); 2861 2862 public: 2863 void HandlePragmaOnce(Token &OnceTok); 2864 void HandlePragmaMark(Token &MarkTok); 2865 void HandlePragmaPoison(); 2866 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2867 void HandlePragmaDependency(Token &DependencyTok); 2868 void HandlePragmaPushMacro(Token &Tok); 2869 void HandlePragmaPopMacro(Token &Tok); 2870 void HandlePragmaIncludeAlias(Token &Tok); 2871 void HandlePragmaModuleBuild(Token &Tok); 2872 void HandlePragmaHdrstop(Token &Tok); 2873 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2874 2875 // Return true and store the first token only if any CommentHandler 2876 // has inserted some tokens and getCommentRetentionState() is false. 2877 bool HandleComment(Token &result, SourceRange Comment); 2878 2879 /// A macro is used, update information about macros that need unused 2880 /// warnings. 2881 void markMacroAsUsed(MacroInfo *MI); 2882 2883 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, 2884 SourceLocation AnnotationLoc) { 2885 AnnotationInfos[II].DeprecationInfo = 2886 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2887 } 2888 2889 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, 2890 SourceLocation AnnotationLoc) { 2891 AnnotationInfos[II].RestrictExpansionInfo = 2892 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2893 } 2894 2895 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) { 2896 AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc; 2897 } 2898 2899 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const { 2900 return AnnotationInfos.find(II)->second; 2901 } 2902 2903 void emitMacroExpansionWarnings(const Token &Identifier, 2904 bool IsIfnDef = false) const { 2905 IdentifierInfo *Info = Identifier.getIdentifierInfo(); 2906 if (Info->isDeprecatedMacro()) 2907 emitMacroDeprecationWarning(Identifier); 2908 2909 if (Info->isRestrictExpansion() && 2910 !SourceMgr.isInMainFile(Identifier.getLocation())) 2911 emitRestrictExpansionWarning(Identifier); 2912 2913 if (!IsIfnDef) { 2914 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs) 2915 emitRestrictInfNaNWarning(Identifier, 0); 2916 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs) 2917 emitRestrictInfNaNWarning(Identifier, 1); 2918 } 2919 } 2920 2921 static void processPathForFileMacro(SmallVectorImpl<char> &Path, 2922 const LangOptions &LangOpts, 2923 const TargetInfo &TI); 2924 2925 static void processPathToFileName(SmallVectorImpl<char> &FileName, 2926 const PresumedLoc &PLoc, 2927 const LangOptions &LangOpts, 2928 const TargetInfo &TI); 2929 2930 private: 2931 void emitMacroDeprecationWarning(const Token &Identifier) const; 2932 void emitRestrictExpansionWarning(const Token &Identifier) const; 2933 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; 2934 void emitRestrictInfNaNWarning(const Token &Identifier, 2935 unsigned DiagSelection) const; 2936 2937 /// This boolean state keeps track if the current scanned token (by this PP) 2938 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a 2939 /// translation unit in a linear order. 2940 bool InSafeBufferOptOutRegion = false; 2941 2942 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out 2943 /// region if PP is currently in such a region. Hold undefined value 2944 /// otherwise. 2945 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. 2946 2947 using SafeBufferOptOutRegionsTy = 2948 SmallVector<std::pair<SourceLocation, SourceLocation>, 16>; 2949 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this 2950 // translation unit. Each region is represented by a pair of start and 2951 // end locations. 2952 SafeBufferOptOutRegionsTy SafeBufferOptOutMap; 2953 2954 // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the 2955 // following structure to manage them by their ASTs. 2956 struct { 2957 // A map from unique IDs to region maps of loaded ASTs. The ID identifies a 2958 // loaded AST. See `SourceManager::getUniqueLoadedASTID`. 2959 llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions; 2960 2961 // Returns a reference to the safe buffer opt-out regions of the loaded 2962 // AST where `Loc` belongs to. (Construct if absent) 2963 SafeBufferOptOutRegionsTy & 2964 findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) { 2965 return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)]; 2966 } 2967 2968 // Returns a reference to the safe buffer opt-out regions of the loaded 2969 // AST where `Loc` belongs to. (This const function returns nullptr if 2970 // absent.) 2971 const SafeBufferOptOutRegionsTy * 2972 lookupLoadedOptOutMap(SourceLocation Loc, 2973 const SourceManager &SrcMgr) const { 2974 FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc); 2975 auto Iter = LoadedRegions.find(FID); 2976 2977 if (Iter == LoadedRegions.end()) 2978 return nullptr; 2979 return &Iter->getSecond(); 2980 } 2981 } LoadedSafeBufferOptOutMap; 2982 2983 public: 2984 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out 2985 /// region. This `Loc` must be a source location that has been pre-processed. 2986 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const; 2987 2988 /// Alter the state of whether this PP currently is in a 2989 /// "-Wunsafe-buffer-usage" opt-out region. 2990 /// 2991 /// \param isEnter true if this PP is entering a region; otherwise, this PP 2992 /// is exiting a region 2993 /// \param Loc the location of the entry or exit of a 2994 /// region 2995 /// \return true iff it is INVALID to enter or exit a region, i.e., 2996 /// attempt to enter a region before exiting a previous region, or exiting a 2997 /// region that PP is not currently in. 2998 bool enterOrExitSafeBufferOptOutRegion(bool isEnter, 2999 const SourceLocation &Loc); 3000 3001 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 3002 /// opt-out region 3003 bool isPPInSafeBufferOptOutRegion(); 3004 3005 /// \param StartLoc output argument. It will be set to the start location of 3006 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function 3007 /// returns true. 3008 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 3009 /// opt-out region 3010 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); 3011 3012 /// \return a sequence of SourceLocations representing ordered opt-out regions 3013 /// specified by 3014 /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. 3015 SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const; 3016 3017 /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a 3018 /// record of code `PP_UNSAFE_BUFFER_USAGE`. 3019 /// \return true iff the `Preprocessor` has been updated; false `Preprocessor` 3020 /// is same as itself before the call. 3021 bool setDeserializedSafeBufferOptOutMap( 3022 const SmallVectorImpl<SourceLocation> &SrcLocSeqs); 3023 3024 private: 3025 /// Helper functions to forward lexing to the actual lexer. They all share the 3026 /// same signature. 3027 static bool CLK_Lexer(Preprocessor &P, Token &Result) { 3028 return P.CurLexer->Lex(Result); 3029 } 3030 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) { 3031 return P.CurTokenLexer->Lex(Result); 3032 } 3033 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) { 3034 P.CachingLex(Result); 3035 return true; 3036 } 3037 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) { 3038 return P.CurLexer->LexDependencyDirectiveToken(Result); 3039 } 3040 static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { 3041 return P.LexAfterModuleImport(Result); 3042 } 3043 }; 3044 3045 /// Abstract base class that describes a handler that will receive 3046 /// source ranges for each of the comments encountered in the source file. 3047 class CommentHandler { 3048 public: 3049 virtual ~CommentHandler(); 3050 3051 // The handler shall return true if it has pushed any tokens 3052 // to be read using e.g. EnterToken or EnterTokenStream. 3053 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 3054 }; 3055 3056 /// Abstract base class that describes a handler that will receive 3057 /// source ranges for empty lines encountered in the source file. 3058 class EmptylineHandler { 3059 public: 3060 virtual ~EmptylineHandler(); 3061 3062 // The handler handles empty lines. 3063 virtual void HandleEmptyline(SourceRange Range) = 0; 3064 }; 3065 3066 /// Helper class to shuttle information about #embed directives from the 3067 /// preprocessor to the parser through an annotation token. 3068 struct EmbedAnnotationData { 3069 StringRef BinaryData; 3070 }; 3071 3072 /// Registry of pragma handlers added by plugins 3073 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 3074 3075 } // namespace clang 3076 3077 namespace llvm { 3078 extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>; 3079 } // namespace llvm 3080 3081 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 3082