xref: /llvm-project/clang/include/clang/Lex/Preprocessor.h (revision 563c7c5539f05e7f8cbb42565c1f24466019f38b)
1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/IdentifierTable.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/Module.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/HeaderSearch.h"
27 #include "clang/Lex/Lexer.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/PPEmbedParameters.h"
33 #include "clang/Lex/Token.h"
34 #include "clang/Lex/TokenLexer.h"
35 #include "clang/Support/Compiler.h"
36 #include "llvm/ADT/APSInt.h"
37 #include "llvm/ADT/ArrayRef.h"
38 #include "llvm/ADT/DenseMap.h"
39 #include "llvm/ADT/FoldingSet.h"
40 #include "llvm/ADT/FunctionExtras.h"
41 #include "llvm/ADT/PointerUnion.h"
42 #include "llvm/ADT/STLExtras.h"
43 #include "llvm/ADT/SmallPtrSet.h"
44 #include "llvm/ADT/SmallVector.h"
45 #include "llvm/ADT/StringRef.h"
46 #include "llvm/ADT/TinyPtrVector.h"
47 #include "llvm/ADT/iterator_range.h"
48 #include "llvm/Support/Allocator.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Registry.h"
51 #include <cassert>
52 #include <cstddef>
53 #include <cstdint>
54 #include <map>
55 #include <memory>
56 #include <optional>
57 #include <string>
58 #include <utility>
59 #include <vector>
60 
61 namespace llvm {
62 
63 template<unsigned InternalLen> class SmallString;
64 
65 } // namespace llvm
66 
67 namespace clang {
68 
69 class CodeCompletionHandler;
70 class CommentHandler;
71 class DirectoryEntry;
72 class EmptylineHandler;
73 class ExternalPreprocessorSource;
74 class FileEntry;
75 class FileManager;
76 class HeaderSearch;
77 class MacroArgs;
78 class PragmaHandler;
79 class PragmaNamespace;
80 class PreprocessingRecord;
81 class PreprocessorLexer;
82 class PreprocessorOptions;
83 class ScratchBuffer;
84 class TargetInfo;
85 
86 namespace Builtin {
87 class Context;
88 }
89 
90 /// Stores token information for comparing actual tokens with
91 /// predefined values.  Only handles simple tokens and identifiers.
92 class TokenValue {
93   tok::TokenKind Kind;
94   IdentifierInfo *II;
95 
96 public:
97   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
98     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
99     assert(Kind != tok::identifier &&
100            "Identifiers should be created by TokenValue(IdentifierInfo *)");
101     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
102     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
103   }
104 
105   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
106 
107   bool operator==(const Token &Tok) const {
108     return Tok.getKind() == Kind &&
109         (!II || II == Tok.getIdentifierInfo());
110   }
111 };
112 
113 /// Context in which macro name is used.
114 enum MacroUse {
115   // other than #define or #undef
116   MU_Other  = 0,
117 
118   // macro name specified in #define
119   MU_Define = 1,
120 
121   // macro name specified in #undef
122   MU_Undef  = 2
123 };
124 
125 enum class EmbedResult {
126   Invalid = -1, // Parsing error occurred.
127   NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
128   Found = 1,    // Corresponds to __STDC_EMBED_FOUND__
129   Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
130 };
131 
132 /// Engages in a tight little dance with the lexer to efficiently
133 /// preprocess tokens.
134 ///
135 /// Lexers know only about tokens within a single source file, and don't
136 /// know anything about preprocessor-level issues like the \#include stack,
137 /// token expansion, etc.
138 class Preprocessor {
139   friend class VAOptDefinitionContext;
140   friend class VariadicMacroScopeGuard;
141 
142   llvm::unique_function<void(const clang::Token &)> OnToken;
143   std::shared_ptr<PreprocessorOptions> PPOpts;
144   DiagnosticsEngine        *Diags;
145   const LangOptions &LangOpts;
146   const TargetInfo *Target = nullptr;
147   const TargetInfo *AuxTarget = nullptr;
148   FileManager       &FileMgr;
149   SourceManager     &SourceMgr;
150   std::unique_ptr<ScratchBuffer> ScratchBuf;
151   HeaderSearch      &HeaderInfo;
152   ModuleLoader      &TheModuleLoader;
153 
154   /// External source of macros.
155   ExternalPreprocessorSource *ExternalSource;
156 
157   /// A BumpPtrAllocator object used to quickly allocate and release
158   /// objects internal to the Preprocessor.
159   llvm::BumpPtrAllocator BP;
160 
161   /// Identifiers for builtin macros and other builtins.
162   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
163   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
164   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
165   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
166   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
167   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
168   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
169   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
170   IdentifierInfo *Ident__identifier;               // __identifier
171   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
172   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
173   IdentifierInfo *Ident__has_feature;              // __has_feature
174   IdentifierInfo *Ident__has_extension;            // __has_extension
175   IdentifierInfo *Ident__has_builtin;              // __has_builtin
176   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
177   IdentifierInfo *Ident__has_attribute;            // __has_attribute
178   IdentifierInfo *Ident__has_embed;                // __has_embed
179   IdentifierInfo *Ident__has_include;              // __has_include
180   IdentifierInfo *Ident__has_include_next;         // __has_include_next
181   IdentifierInfo *Ident__has_warning;              // __has_warning
182   IdentifierInfo *Ident__is_identifier;            // __is_identifier
183   IdentifierInfo *Ident__building_module;          // __building_module
184   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
185   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
186   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
187   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
188   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
189   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
190   IdentifierInfo *Ident__is_target_os;             // __is_target_os
191   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
192   IdentifierInfo *Ident__is_target_variant_os;
193   IdentifierInfo *Ident__is_target_variant_environment;
194   IdentifierInfo *Ident__FLT_EVAL_METHOD__;        // __FLT_EVAL_METHOD
195 
196   // Weak, only valid (and set) while InMacroArgs is true.
197   Token* ArgMacro;
198 
199   SourceLocation DATELoc, TIMELoc;
200 
201   // FEM_UnsetOnCommandLine means that an explicit evaluation method was
202   // not specified on the command line. The target is queried to set the
203   // default evaluation method.
204   LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
205       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
206 
207   // The most recent pragma location where the floating point evaluation
208   // method was modified. This is used to determine whether the
209   // 'pragma clang fp eval_method' was used whithin the current scope.
210   SourceLocation LastFPEvalPragmaLocation;
211 
212   LangOptions::FPEvalMethodKind TUFPEvalMethod =
213       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
214 
215   // Next __COUNTER__ value, starts at 0.
216   unsigned CounterValue = 0;
217 
218   enum {
219     /// Maximum depth of \#includes.
220     MaxAllowedIncludeStackDepth = 200
221   };
222 
223   // State that is set before the preprocessor begins.
224   bool KeepComments : 1;
225   bool KeepMacroComments : 1;
226   bool SuppressIncludeNotFoundError : 1;
227 
228   // State that changes while the preprocessor runs:
229   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
230 
231   /// Whether the preprocessor owns the header search object.
232   bool OwnsHeaderSearch : 1;
233 
234   /// True if macro expansion is disabled.
235   bool DisableMacroExpansion : 1;
236 
237   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
238   /// when parsing preprocessor directives.
239   bool MacroExpansionInDirectivesOverride : 1;
240 
241   class ResetMacroExpansionHelper;
242 
243   /// Whether we have already loaded macros from the external source.
244   mutable bool ReadMacrosFromExternalSource : 1;
245 
246   /// True if pragmas are enabled.
247   bool PragmasEnabled : 1;
248 
249   /// True if the current build action is a preprocessing action.
250   bool PreprocessedOutput : 1;
251 
252   /// True if we are currently preprocessing a #if or #elif directive
253   bool ParsingIfOrElifDirective;
254 
255   /// True if we are pre-expanding macro arguments.
256   bool InMacroArgPreExpansion;
257 
258   /// Mapping/lookup information for all identifiers in
259   /// the program, including program keywords.
260   mutable IdentifierTable Identifiers;
261 
262   /// This table contains all the selectors in the program.
263   ///
264   /// Unlike IdentifierTable above, this table *isn't* populated by the
265   /// preprocessor. It is declared/expanded here because its role/lifetime is
266   /// conceptually similar to the IdentifierTable. In addition, the current
267   /// control flow (in clang::ParseAST()), make it convenient to put here.
268   ///
269   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
270   /// the lifetime of the preprocessor.
271   SelectorTable Selectors;
272 
273   /// Information about builtins.
274   std::unique_ptr<Builtin::Context> BuiltinInfo;
275 
276   /// Tracks all of the pragmas that the client registered
277   /// with this preprocessor.
278   std::unique_ptr<PragmaNamespace> PragmaHandlers;
279 
280   /// Pragma handlers of the original source is stored here during the
281   /// parsing of a model file.
282   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
283 
284   /// Tracks all of the comment handlers that the client registered
285   /// with this preprocessor.
286   std::vector<CommentHandler *> CommentHandlers;
287 
288   /// Empty line handler.
289   EmptylineHandler *Emptyline = nullptr;
290 
291   /// True to avoid tearing down the lexer etc on EOF
292   bool IncrementalProcessing = false;
293 
294 public:
295   /// The kind of translation unit we are processing.
296   const TranslationUnitKind TUKind;
297 
298   /// Returns a pointer into the given file's buffer that's guaranteed
299   /// to be between tokens. The returned pointer is always before \p Start.
300   /// The maximum distance betweenthe returned pointer and \p Start is
301   /// limited by a constant value, but also an implementation detail.
302   /// If no such check point exists, \c nullptr is returned.
303   const char *getCheckPoint(FileID FID, const char *Start) const;
304 
305 private:
306   /// The code-completion handler.
307   CodeCompletionHandler *CodeComplete = nullptr;
308 
309   /// The file that we're performing code-completion for, if any.
310   const FileEntry *CodeCompletionFile = nullptr;
311 
312   /// The offset in file for the code-completion point.
313   unsigned CodeCompletionOffset = 0;
314 
315   /// The location for the code-completion point. This gets instantiated
316   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
317   SourceLocation CodeCompletionLoc;
318 
319   /// The start location for the file of the code-completion point.
320   ///
321   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
322   /// for preprocessing.
323   SourceLocation CodeCompletionFileLoc;
324 
325   /// The source location of the \c import contextual keyword we just
326   /// lexed, if any.
327   SourceLocation ModuleImportLoc;
328 
329   /// The import path for named module that we're currently processing.
330   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
331 
332   llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
333   unsigned CheckPointCounter = 0;
334 
335   /// Whether the import is an `@import` or a standard c++ modules import.
336   bool IsAtImport = false;
337 
338   /// Whether the last token we lexed was an '@'.
339   bool LastTokenWasAt = false;
340 
341   /// A position within a C++20 import-seq.
342   class StdCXXImportSeq {
343   public:
344     enum State : int {
345       // Positive values represent a number of unclosed brackets.
346       AtTopLevel = 0,
347       AfterTopLevelTokenSeq = -1,
348       AfterExport = -2,
349       AfterImportSeq = -3,
350     };
351 
352     StdCXXImportSeq(State S) : S(S) {}
353 
354     /// Saw any kind of open bracket.
355     void handleOpenBracket() {
356       S = static_cast<State>(std::max<int>(S, 0) + 1);
357     }
358     /// Saw any kind of close bracket other than '}'.
359     void handleCloseBracket() {
360       S = static_cast<State>(std::max<int>(S, 1) - 1);
361     }
362     /// Saw a close brace.
363     void handleCloseBrace() {
364       handleCloseBracket();
365       if (S == AtTopLevel && !AfterHeaderName)
366         S = AfterTopLevelTokenSeq;
367     }
368     /// Saw a semicolon.
369     void handleSemi() {
370       if (atTopLevel()) {
371         S = AfterTopLevelTokenSeq;
372         AfterHeaderName = false;
373       }
374     }
375 
376     /// Saw an 'export' identifier.
377     void handleExport() {
378       if (S == AfterTopLevelTokenSeq)
379         S = AfterExport;
380       else if (S <= 0)
381         S = AtTopLevel;
382     }
383     /// Saw an 'import' identifier.
384     void handleImport() {
385       if (S == AfterTopLevelTokenSeq || S == AfterExport)
386         S = AfterImportSeq;
387       else if (S <= 0)
388         S = AtTopLevel;
389     }
390 
391     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
392     /// until we reach a top-level semicolon.
393     void handleHeaderName() {
394       if (S == AfterImportSeq)
395         AfterHeaderName = true;
396       handleMisc();
397     }
398 
399     /// Saw any other token.
400     void handleMisc() {
401       if (S <= 0)
402         S = AtTopLevel;
403     }
404 
405     bool atTopLevel() { return S <= 0; }
406     bool afterImportSeq() { return S == AfterImportSeq; }
407     bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
408 
409   private:
410     State S;
411     /// Whether we're in the pp-import-suffix following the header-name in a
412     /// pp-import. If so, a close-brace is not sufficient to end the
413     /// top-level-token-seq of an import-seq.
414     bool AfterHeaderName = false;
415   };
416 
417   /// Our current position within a C++20 import-seq.
418   StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
419 
420   /// Track whether we are in a Global Module Fragment
421   class TrackGMF {
422   public:
423     enum GMFState : int {
424       GMFActive = 1,
425       MaybeGMF = 0,
426       BeforeGMFIntroducer = -1,
427       GMFAbsentOrEnded = -2,
428     };
429 
430     TrackGMF(GMFState S) : S(S) {}
431 
432     /// Saw a semicolon.
433     void handleSemi() {
434       // If it is immediately after the first instance of the module keyword,
435       // then that introduces the GMF.
436       if (S == MaybeGMF)
437         S = GMFActive;
438     }
439 
440     /// Saw an 'export' identifier.
441     void handleExport() {
442       // The presence of an 'export' keyword always ends or excludes a GMF.
443       S = GMFAbsentOrEnded;
444     }
445 
446     /// Saw an 'import' identifier.
447     void handleImport(bool AfterTopLevelTokenSeq) {
448       // If we see this before any 'module' kw, then we have no GMF.
449       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
450         S = GMFAbsentOrEnded;
451     }
452 
453     /// Saw a 'module' identifier.
454     void handleModule(bool AfterTopLevelTokenSeq) {
455       // This was the first module identifier and not preceded by any token
456       // that would exclude a GMF.  It could begin a GMF, but only if directly
457       // followed by a semicolon.
458       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
459         S = MaybeGMF;
460       else
461         S = GMFAbsentOrEnded;
462     }
463 
464     /// Saw any other token.
465     void handleMisc() {
466       // We saw something other than ; after the 'module' kw, so not a GMF.
467       if (S == MaybeGMF)
468         S = GMFAbsentOrEnded;
469     }
470 
471     bool inGMF() { return S == GMFActive; }
472 
473   private:
474     /// Track the transitions into and out of a Global Module Fragment,
475     /// if one is present.
476     GMFState S;
477   };
478 
479   TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
480 
481   /// Track the status of the c++20 module decl.
482   ///
483   ///   module-declaration:
484   ///     'export'[opt] 'module' module-name module-partition[opt]
485   ///     attribute-specifier-seq[opt] ';'
486   ///
487   ///   module-name:
488   ///     module-name-qualifier[opt] identifier
489   ///
490   ///   module-partition:
491   ///     ':' module-name-qualifier[opt] identifier
492   ///
493   ///   module-name-qualifier:
494   ///     identifier '.'
495   ///     module-name-qualifier identifier '.'
496   ///
497   /// Transition state:
498   ///
499   ///   NotAModuleDecl --- export ---> FoundExport
500   ///   NotAModuleDecl --- module ---> ImplementationCandidate
501   ///   FoundExport --- module ---> InterfaceCandidate
502   ///   ImplementationCandidate --- Identifier ---> ImplementationCandidate
503   ///   ImplementationCandidate --- period ---> ImplementationCandidate
504   ///   ImplementationCandidate --- colon ---> ImplementationCandidate
505   ///   InterfaceCandidate --- Identifier ---> InterfaceCandidate
506   ///   InterfaceCandidate --- period ---> InterfaceCandidate
507   ///   InterfaceCandidate --- colon ---> InterfaceCandidate
508   ///   ImplementationCandidate --- Semi ---> NamedModuleImplementation
509   ///   NamedModuleInterface --- Semi ---> NamedModuleInterface
510   ///   NamedModuleImplementation --- Anything ---> NamedModuleImplementation
511   ///   NamedModuleInterface --- Anything ---> NamedModuleInterface
512   ///
513   /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
514   /// soon since we don't support any module attributes yet.
515   class ModuleDeclSeq {
516     enum ModuleDeclState : int {
517       NotAModuleDecl,
518       FoundExport,
519       InterfaceCandidate,
520       ImplementationCandidate,
521       NamedModuleInterface,
522       NamedModuleImplementation,
523     };
524 
525   public:
526     ModuleDeclSeq() = default;
527 
528     void handleExport() {
529       if (State == NotAModuleDecl)
530         State = FoundExport;
531       else if (!isNamedModule())
532         reset();
533     }
534 
535     void handleModule() {
536       if (State == FoundExport)
537         State = InterfaceCandidate;
538       else if (State == NotAModuleDecl)
539         State = ImplementationCandidate;
540       else if (!isNamedModule())
541         reset();
542     }
543 
544     void handleIdentifier(IdentifierInfo *Identifier) {
545       if (isModuleCandidate() && Identifier)
546         Name += Identifier->getName().str();
547       else if (!isNamedModule())
548         reset();
549     }
550 
551     void handleColon() {
552       if (isModuleCandidate())
553         Name += ":";
554       else if (!isNamedModule())
555         reset();
556     }
557 
558     void handlePeriod() {
559       if (isModuleCandidate())
560         Name += ".";
561       else if (!isNamedModule())
562         reset();
563     }
564 
565     void handleSemi() {
566       if (!Name.empty() && isModuleCandidate()) {
567         if (State == InterfaceCandidate)
568           State = NamedModuleInterface;
569         else if (State == ImplementationCandidate)
570           State = NamedModuleImplementation;
571         else
572           llvm_unreachable("Unimaged ModuleDeclState.");
573       } else if (!isNamedModule())
574         reset();
575     }
576 
577     void handleMisc() {
578       if (!isNamedModule())
579         reset();
580     }
581 
582     bool isModuleCandidate() const {
583       return State == InterfaceCandidate || State == ImplementationCandidate;
584     }
585 
586     bool isNamedModule() const {
587       return State == NamedModuleInterface ||
588              State == NamedModuleImplementation;
589     }
590 
591     bool isNamedInterface() const { return State == NamedModuleInterface; }
592 
593     bool isImplementationUnit() const {
594       return State == NamedModuleImplementation && !getName().contains(':');
595     }
596 
597     StringRef getName() const {
598       assert(isNamedModule() && "Can't get name from a non named module");
599       return Name;
600     }
601 
602     StringRef getPrimaryName() const {
603       assert(isNamedModule() && "Can't get name from a non named module");
604       return getName().split(':').first;
605     }
606 
607     void reset() {
608       Name.clear();
609       State = NotAModuleDecl;
610     }
611 
612   private:
613     ModuleDeclState State = NotAModuleDecl;
614     std::string Name;
615   };
616 
617   ModuleDeclSeq ModuleDeclState;
618 
619   /// Whether the module import expects an identifier next. Otherwise,
620   /// it expects a '.' or ';'.
621   bool ModuleImportExpectsIdentifier = false;
622 
623   /// The identifier and source location of the currently-active
624   /// \#pragma clang arc_cf_code_audited begin.
625   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
626 
627   /// The source location of the currently-active
628   /// \#pragma clang assume_nonnull begin.
629   SourceLocation PragmaAssumeNonNullLoc;
630 
631   /// Set only for preambles which end with an active
632   /// \#pragma clang assume_nonnull begin.
633   ///
634   /// When the preamble is loaded into the main file,
635   /// `PragmaAssumeNonNullLoc` will be set to this to
636   /// replay the unterminated assume_nonnull.
637   SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
638 
639   /// True if we hit the code-completion point.
640   bool CodeCompletionReached = false;
641 
642   /// The code completion token containing the information
643   /// on the stem that is to be code completed.
644   IdentifierInfo *CodeCompletionII = nullptr;
645 
646   /// Range for the code completion token.
647   SourceRange CodeCompletionTokenRange;
648 
649   /// The directory that the main file should be considered to occupy,
650   /// if it does not correspond to a real file (as happens when building a
651   /// module).
652   OptionalDirectoryEntryRef MainFileDir;
653 
654   /// The number of bytes that we will initially skip when entering the
655   /// main file, along with a flag that indicates whether skipping this number
656   /// of bytes will place the lexer at the start of a line.
657   ///
658   /// This is used when loading a precompiled preamble.
659   std::pair<int, bool> SkipMainFilePreamble;
660 
661   /// Whether we hit an error due to reaching max allowed include depth. Allows
662   /// to avoid hitting the same error over and over again.
663   bool HasReachedMaxIncludeDepth = false;
664 
665   /// The number of currently-active calls to Lex.
666   ///
667   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
668   /// require asking for multiple additional tokens. This counter makes it
669   /// possible for Lex to detect whether it's producing a token for the end
670   /// of phase 4 of translation or for some other situation.
671   unsigned LexLevel = 0;
672 
673   /// The number of (LexLevel 0) preprocessor tokens.
674   unsigned TokenCount = 0;
675 
676   /// Preprocess every token regardless of LexLevel.
677   bool PreprocessToken = false;
678 
679   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
680   /// warning, or zero for unlimited.
681   unsigned MaxTokens = 0;
682   SourceLocation MaxTokensOverrideLoc;
683 
684 public:
685   struct PreambleSkipInfo {
686     SourceLocation HashTokenLoc;
687     SourceLocation IfTokenLoc;
688     bool FoundNonSkipPortion;
689     bool FoundElse;
690     SourceLocation ElseLoc;
691 
692     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
693                      bool FoundNonSkipPortion, bool FoundElse,
694                      SourceLocation ElseLoc)
695         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
696           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
697           ElseLoc(ElseLoc) {}
698   };
699 
700   using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
701 
702 private:
703   friend class ASTReader;
704   friend class MacroArgs;
705 
706   class PreambleConditionalStackStore {
707     enum State {
708       Off = 0,
709       Recording = 1,
710       Replaying = 2,
711     };
712 
713   public:
714     PreambleConditionalStackStore() = default;
715 
716     void startRecording() { ConditionalStackState = Recording; }
717     void startReplaying() { ConditionalStackState = Replaying; }
718     bool isRecording() const { return ConditionalStackState == Recording; }
719     bool isReplaying() const { return ConditionalStackState == Replaying; }
720 
721     ArrayRef<PPConditionalInfo> getStack() const {
722       return ConditionalStack;
723     }
724 
725     void doneReplaying() {
726       ConditionalStack.clear();
727       ConditionalStackState = Off;
728     }
729 
730     void setStack(ArrayRef<PPConditionalInfo> s) {
731       if (!isRecording() && !isReplaying())
732         return;
733       ConditionalStack.clear();
734       ConditionalStack.append(s.begin(), s.end());
735     }
736 
737     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
738 
739     bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
740 
741     void clearSkipInfo() { SkipInfo.reset(); }
742 
743     std::optional<PreambleSkipInfo> SkipInfo;
744 
745   private:
746     SmallVector<PPConditionalInfo, 4> ConditionalStack;
747     State ConditionalStackState = Off;
748   } PreambleConditionalStack;
749 
750   /// The current top of the stack that we're lexing from if
751   /// not expanding a macro and we are lexing directly from source code.
752   ///
753   /// Only one of CurLexer, or CurTokenLexer will be non-null.
754   std::unique_ptr<Lexer> CurLexer;
755 
756   /// The current top of the stack that we're lexing from
757   /// if not expanding a macro.
758   ///
759   /// This is an alias for CurLexer.
760   PreprocessorLexer *CurPPLexer = nullptr;
761 
762   /// Used to find the current FileEntry, if CurLexer is non-null
763   /// and if applicable.
764   ///
765   /// This allows us to implement \#include_next and find directory-specific
766   /// properties.
767   ConstSearchDirIterator CurDirLookup = nullptr;
768 
769   /// The current macro we are expanding, if we are expanding a macro.
770   ///
771   /// One of CurLexer and CurTokenLexer must be null.
772   std::unique_ptr<TokenLexer> CurTokenLexer;
773 
774   /// The kind of lexer we're currently working with.
775   typedef bool (*LexerCallback)(Preprocessor &, Token &);
776   LexerCallback CurLexerCallback = &CLK_Lexer;
777 
778   /// If the current lexer is for a submodule that is being built, this
779   /// is that submodule.
780   Module *CurLexerSubmodule = nullptr;
781 
782   /// Keeps track of the stack of files currently
783   /// \#included, and macros currently being expanded from, not counting
784   /// CurLexer/CurTokenLexer.
785   struct IncludeStackInfo {
786     LexerCallback               CurLexerCallback;
787     Module                     *TheSubmodule;
788     std::unique_ptr<Lexer>      TheLexer;
789     PreprocessorLexer          *ThePPLexer;
790     std::unique_ptr<TokenLexer> TheTokenLexer;
791     ConstSearchDirIterator      TheDirLookup;
792 
793     // The following constructors are completely useless copies of the default
794     // versions, only needed to pacify MSVC.
795     IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
796                      std::unique_ptr<Lexer> &&TheLexer,
797                      PreprocessorLexer *ThePPLexer,
798                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
799                      ConstSearchDirIterator TheDirLookup)
800         : CurLexerCallback(std::move(CurLexerCallback)),
801           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
802           ThePPLexer(std::move(ThePPLexer)),
803           TheTokenLexer(std::move(TheTokenLexer)),
804           TheDirLookup(std::move(TheDirLookup)) {}
805   };
806   std::vector<IncludeStackInfo> IncludeMacroStack;
807 
808   /// Actions invoked when some preprocessor activity is
809   /// encountered (e.g. a file is \#included, etc).
810   std::unique_ptr<PPCallbacks> Callbacks;
811 
812   struct MacroExpandsInfo {
813     Token Tok;
814     MacroDefinition MD;
815     SourceRange Range;
816 
817     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
818         : Tok(Tok), MD(MD), Range(Range) {}
819   };
820   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
821 
822   /// Information about a name that has been used to define a module macro.
823   struct ModuleMacroInfo {
824     /// The most recent macro directive for this identifier.
825     MacroDirective *MD;
826 
827     /// The active module macros for this identifier.
828     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
829 
830     /// The generation number at which we last updated ActiveModuleMacros.
831     /// \see Preprocessor::VisibleModules.
832     unsigned ActiveModuleMacrosGeneration = 0;
833 
834     /// Whether this macro name is ambiguous.
835     bool IsAmbiguous = false;
836 
837     /// The module macros that are overridden by this macro.
838     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
839 
840     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
841   };
842 
843   /// The state of a macro for an identifier.
844   class MacroState {
845     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
846 
847     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
848                                    const IdentifierInfo *II) const {
849       if (II->isOutOfDate())
850         PP.updateOutOfDateIdentifier(*II);
851       // FIXME: Find a spare bit on IdentifierInfo and store a
852       //        HasModuleMacros flag.
853       if (!II->hasMacroDefinition() ||
854           (!PP.getLangOpts().Modules &&
855            !PP.getLangOpts().ModulesLocalVisibility) ||
856           !PP.CurSubmoduleState->VisibleModules.getGeneration())
857         return nullptr;
858 
859       auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
860       if (!Info) {
861         Info = new (PP.getPreprocessorAllocator())
862             ModuleMacroInfo(cast<MacroDirective *>(State));
863         State = Info;
864       }
865 
866       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
867           Info->ActiveModuleMacrosGeneration)
868         PP.updateModuleMacroInfo(II, *Info);
869       return Info;
870     }
871 
872   public:
873     MacroState() : MacroState(nullptr) {}
874     MacroState(MacroDirective *MD) : State(MD) {}
875 
876     MacroState(MacroState &&O) noexcept : State(O.State) {
877       O.State = (MacroDirective *)nullptr;
878     }
879 
880     MacroState &operator=(MacroState &&O) noexcept {
881       auto S = O.State;
882       O.State = (MacroDirective *)nullptr;
883       State = S;
884       return *this;
885     }
886 
887     ~MacroState() {
888       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
889         Info->~ModuleMacroInfo();
890     }
891 
892     MacroDirective *getLatest() const {
893       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
894         return Info->MD;
895       return cast<MacroDirective *>(State);
896     }
897 
898     void setLatest(MacroDirective *MD) {
899       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
900         Info->MD = MD;
901       else
902         State = MD;
903     }
904 
905     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
906       auto *Info = getModuleInfo(PP, II);
907       return Info ? Info->IsAmbiguous : false;
908     }
909 
910     ArrayRef<ModuleMacro *>
911     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
912       if (auto *Info = getModuleInfo(PP, II))
913         return Info->ActiveModuleMacros;
914       return {};
915     }
916 
917     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
918                                                SourceManager &SourceMgr) const {
919       // FIXME: Incorporate module macros into the result of this.
920       if (auto *Latest = getLatest())
921         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
922       return {};
923     }
924 
925     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
926       if (auto *Info = getModuleInfo(PP, II)) {
927         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
928                                       Info->ActiveModuleMacros.begin(),
929                                       Info->ActiveModuleMacros.end());
930         Info->ActiveModuleMacros.clear();
931         Info->IsAmbiguous = false;
932       }
933     }
934 
935     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
936       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
937         return Info->OverriddenMacros;
938       return {};
939     }
940 
941     void setOverriddenMacros(Preprocessor &PP,
942                              ArrayRef<ModuleMacro *> Overrides) {
943       auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
944       if (!Info) {
945         if (Overrides.empty())
946           return;
947         Info = new (PP.getPreprocessorAllocator())
948             ModuleMacroInfo(cast<MacroDirective *>(State));
949         State = Info;
950       }
951       Info->OverriddenMacros.clear();
952       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
953                                     Overrides.begin(), Overrides.end());
954       Info->ActiveModuleMacrosGeneration = 0;
955     }
956   };
957 
958   /// For each IdentifierInfo that was associated with a macro, we
959   /// keep a mapping to the history of all macro definitions and #undefs in
960   /// the reverse order (the latest one is in the head of the list).
961   ///
962   /// This mapping lives within the \p CurSubmoduleState.
963   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
964 
965   struct SubmoduleState;
966 
967   /// Information about a submodule that we're currently building.
968   struct BuildingSubmoduleInfo {
969     /// The module that we are building.
970     Module *M;
971 
972     /// The location at which the module was included.
973     SourceLocation ImportLoc;
974 
975     /// Whether we entered this submodule via a pragma.
976     bool IsPragma;
977 
978     /// The previous SubmoduleState.
979     SubmoduleState *OuterSubmoduleState;
980 
981     /// The number of pending module macro names when we started building this.
982     unsigned OuterPendingModuleMacroNames;
983 
984     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
985                           SubmoduleState *OuterSubmoduleState,
986                           unsigned OuterPendingModuleMacroNames)
987         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
988           OuterSubmoduleState(OuterSubmoduleState),
989           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
990   };
991   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
992 
993   /// Information about a submodule's preprocessor state.
994   struct SubmoduleState {
995     /// The macros for the submodule.
996     MacroMap Macros;
997 
998     /// The set of modules that are visible within the submodule.
999     VisibleModuleSet VisibleModules;
1000 
1001     // FIXME: CounterValue?
1002     // FIXME: PragmaPushMacroInfo?
1003   };
1004   std::map<Module *, SubmoduleState> Submodules;
1005 
1006   /// The preprocessor state for preprocessing outside of any submodule.
1007   SubmoduleState NullSubmoduleState;
1008 
1009   /// The current submodule state. Will be \p NullSubmoduleState if we're not
1010   /// in a submodule.
1011   SubmoduleState *CurSubmoduleState;
1012 
1013   /// The files that have been included.
1014   IncludedFilesSet IncludedFiles;
1015 
1016   /// The set of top-level modules that affected preprocessing, but were not
1017   /// imported.
1018   llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1019 
1020   /// The set of known macros exported from modules.
1021   llvm::FoldingSet<ModuleMacro> ModuleMacros;
1022 
1023   /// The names of potential module macros that we've not yet processed.
1024   llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1025 
1026   /// The list of module macros, for each identifier, that are not overridden by
1027   /// any other module macro.
1028   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1029       LeafModuleMacros;
1030 
1031   /// Macros that we want to warn because they are not used at the end
1032   /// of the translation unit.
1033   ///
1034   /// We store just their SourceLocations instead of
1035   /// something like MacroInfo*. The benefit of this is that when we are
1036   /// deserializing from PCH, we don't need to deserialize identifier & macros
1037   /// just so that we can report that they are unused, we just warn using
1038   /// the SourceLocations of this set (that will be filled by the ASTReader).
1039   using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1040   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1041 
1042   /// This is a pair of an optional message and source location used for pragmas
1043   /// that annotate macros like pragma clang restrict_expansion and pragma clang
1044   /// deprecated. This pair stores the optional message and the location of the
1045   /// annotation pragma for use producing diagnostics and notes.
1046   using MsgLocationPair = std::pair<std::string, SourceLocation>;
1047 
1048   struct MacroAnnotationInfo {
1049     SourceLocation Location;
1050     std::string Message;
1051   };
1052 
1053   struct MacroAnnotations {
1054     std::optional<MacroAnnotationInfo> DeprecationInfo;
1055     std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1056     std::optional<SourceLocation> FinalAnnotationLoc;
1057   };
1058 
1059   /// Warning information for macro annotations.
1060   llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1061 
1062   /// A "freelist" of MacroArg objects that can be
1063   /// reused for quick allocation.
1064   MacroArgs *MacroArgCache = nullptr;
1065 
1066   /// For each IdentifierInfo used in a \#pragma push_macro directive,
1067   /// we keep a MacroInfo stack used to restore the previous macro value.
1068   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1069       PragmaPushMacroInfo;
1070 
1071   // Various statistics we track for performance analysis.
1072   unsigned NumDirectives = 0;
1073   unsigned NumDefined = 0;
1074   unsigned NumUndefined = 0;
1075   unsigned NumPragma = 0;
1076   unsigned NumIf = 0;
1077   unsigned NumElse = 0;
1078   unsigned NumEndif = 0;
1079   unsigned NumEnteredSourceFiles = 0;
1080   unsigned MaxIncludeStackDepth = 0;
1081   unsigned NumMacroExpanded = 0;
1082   unsigned NumFnMacroExpanded = 0;
1083   unsigned NumBuiltinMacroExpanded = 0;
1084   unsigned NumFastMacroExpanded = 0;
1085   unsigned NumTokenPaste = 0;
1086   unsigned NumFastTokenPaste = 0;
1087   unsigned NumSkipped = 0;
1088 
1089   /// The predefined macros that preprocessor should use from the
1090   /// command line etc.
1091   std::string Predefines;
1092 
1093   /// The file ID for the preprocessor predefines.
1094   FileID PredefinesFileID;
1095 
1096   /// The file ID for the PCH through header.
1097   FileID PCHThroughHeaderFileID;
1098 
1099   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1100   bool SkippingUntilPragmaHdrStop = false;
1101 
1102   /// Whether tokens are being skipped until the through header is seen.
1103   bool SkippingUntilPCHThroughHeader = false;
1104 
1105   /// \{
1106   /// Cache of macro expanders to reduce malloc traffic.
1107   enum { TokenLexerCacheSize = 8 };
1108   unsigned NumCachedTokenLexers;
1109   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1110   /// \}
1111 
1112   /// Keeps macro expanded tokens for TokenLexers.
1113   //
1114   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1115   /// going to lex in the cache and when it finishes the tokens are removed
1116   /// from the end of the cache.
1117   SmallVector<Token, 16> MacroExpandedTokens;
1118   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1119 
1120   /// A record of the macro definitions and expansions that
1121   /// occurred during preprocessing.
1122   ///
1123   /// This is an optional side structure that can be enabled with
1124   /// \c createPreprocessingRecord() prior to preprocessing.
1125   PreprocessingRecord *Record = nullptr;
1126 
1127   /// Cached tokens state.
1128   using CachedTokensTy = SmallVector<Token, 1>;
1129 
1130   /// Cached tokens are stored here when we do backtracking or
1131   /// lookahead. They are "lexed" by the CachingLex() method.
1132   CachedTokensTy CachedTokens;
1133 
1134   /// The position of the cached token that CachingLex() should
1135   /// "lex" next.
1136   ///
1137   /// If it points beyond the CachedTokens vector, it means that a normal
1138   /// Lex() should be invoked.
1139   CachedTokensTy::size_type CachedLexPos = 0;
1140 
1141   /// Stack of backtrack positions, allowing nested backtracks.
1142   ///
1143   /// The EnableBacktrackAtThisPos() method pushes a position to
1144   /// indicate where CachedLexPos should be set when the BackTrack() method is
1145   /// invoked (at which point the last position is popped).
1146   std::vector<CachedTokensTy::size_type> BacktrackPositions;
1147 
1148   /// Stack of cached tokens/initial number of cached tokens pairs, allowing
1149   /// nested unannotated backtracks.
1150   std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
1151       UnannotatedBacktrackTokens;
1152 
1153   /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1154   /// This is used to guard against calling this function recursively.
1155   ///
1156   /// See comments at the use-site for more context about why it is needed.
1157   bool SkippingExcludedConditionalBlock = false;
1158 
1159   /// Keeps track of skipped range mappings that were recorded while skipping
1160   /// excluded conditional directives. It maps the source buffer pointer at
1161   /// the beginning of a skipped block, to the number of bytes that should be
1162   /// skipped.
1163   llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1164 
1165   void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1166 
1167 public:
1168   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
1169                DiagnosticsEngine &diags, const LangOptions &LangOpts,
1170                SourceManager &SM, HeaderSearch &Headers,
1171                ModuleLoader &TheModuleLoader,
1172                IdentifierInfoLookup *IILookup = nullptr,
1173                bool OwnsHeaderSearch = false,
1174                TranslationUnitKind TUKind = TU_Complete);
1175 
1176   ~Preprocessor();
1177 
1178   /// Initialize the preprocessor using information about the target.
1179   ///
1180   /// \param Target is owned by the caller and must remain valid for the
1181   /// lifetime of the preprocessor.
1182   /// \param AuxTarget is owned by the caller and must remain valid for
1183   /// the lifetime of the preprocessor.
1184   void Initialize(const TargetInfo &Target,
1185                   const TargetInfo *AuxTarget = nullptr);
1186 
1187   /// Initialize the preprocessor to parse a model file
1188   ///
1189   /// To parse model files the preprocessor of the original source is reused to
1190   /// preserver the identifier table. However to avoid some duplicate
1191   /// information in the preprocessor some cleanup is needed before it is used
1192   /// to parse model files. This method does that cleanup.
1193   void InitializeForModelFile();
1194 
1195   /// Cleanup after model file parsing
1196   void FinalizeForModelFile();
1197 
1198   /// Retrieve the preprocessor options used to initialize this
1199   /// preprocessor.
1200   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
1201 
1202   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
1203   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1204 
1205   const LangOptions &getLangOpts() const { return LangOpts; }
1206   const TargetInfo &getTargetInfo() const { return *Target; }
1207   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
1208   FileManager &getFileManager() const { return FileMgr; }
1209   SourceManager &getSourceManager() const { return SourceMgr; }
1210   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1211 
1212   IdentifierTable &getIdentifierTable() { return Identifiers; }
1213   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
1214   SelectorTable &getSelectorTable() { return Selectors; }
1215   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
1216   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1217 
1218   void setExternalSource(ExternalPreprocessorSource *Source) {
1219     ExternalSource = Source;
1220   }
1221 
1222   ExternalPreprocessorSource *getExternalSource() const {
1223     return ExternalSource;
1224   }
1225 
1226   /// Retrieve the module loader associated with this preprocessor.
1227   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1228 
1229   bool hadModuleLoaderFatalFailure() const {
1230     return TheModuleLoader.HadFatalFailure;
1231   }
1232 
1233   /// Retrieve the number of Directives that have been processed by the
1234   /// Preprocessor.
1235   unsigned getNumDirectives() const {
1236     return NumDirectives;
1237   }
1238 
1239   /// True if we are currently preprocessing a #if or #elif directive
1240   bool isParsingIfOrElifDirective() const {
1241     return ParsingIfOrElifDirective;
1242   }
1243 
1244   /// Control whether the preprocessor retains comments in output.
1245   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1246     this->KeepComments = KeepComments | KeepMacroComments;
1247     this->KeepMacroComments = KeepMacroComments;
1248   }
1249 
1250   bool getCommentRetentionState() const { return KeepComments; }
1251 
1252   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
1253   bool getPragmasEnabled() const { return PragmasEnabled; }
1254 
1255   void SetSuppressIncludeNotFoundError(bool Suppress) {
1256     SuppressIncludeNotFoundError = Suppress;
1257   }
1258 
1259   bool GetSuppressIncludeNotFoundError() {
1260     return SuppressIncludeNotFoundError;
1261   }
1262 
1263   /// Sets whether the preprocessor is responsible for producing output or if
1264   /// it is producing tokens to be consumed by Parse and Sema.
1265   void setPreprocessedOutput(bool IsPreprocessedOutput) {
1266     PreprocessedOutput = IsPreprocessedOutput;
1267   }
1268 
1269   /// Returns true if the preprocessor is responsible for generating output,
1270   /// false if it is producing tokens to be consumed by Parse and Sema.
1271   bool isPreprocessedOutput() const { return PreprocessedOutput; }
1272 
1273   /// Return true if we are lexing directly from the specified lexer.
1274   bool isCurrentLexer(const PreprocessorLexer *L) const {
1275     return CurPPLexer == L;
1276   }
1277 
1278   /// Return the current lexer being lexed from.
1279   ///
1280   /// Note that this ignores any potentially active macro expansions and _Pragma
1281   /// expansions going on at the time.
1282   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1283 
1284   /// Return the current file lexer being lexed from.
1285   ///
1286   /// Note that this ignores any potentially active macro expansions and _Pragma
1287   /// expansions going on at the time.
1288   PreprocessorLexer *getCurrentFileLexer() const;
1289 
1290   /// Return the submodule owning the file being lexed. This may not be
1291   /// the current module if we have changed modules since entering the file.
1292   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1293 
1294   /// Returns the FileID for the preprocessor predefines.
1295   FileID getPredefinesFileID() const { return PredefinesFileID; }
1296 
1297   /// \{
1298   /// Accessors for preprocessor callbacks.
1299   ///
1300   /// Note that this class takes ownership of any PPCallbacks object given to
1301   /// it.
1302   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1303   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1304     if (Callbacks)
1305       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1306                                                 std::move(Callbacks));
1307     Callbacks = std::move(C);
1308   }
1309   /// \}
1310 
1311   /// Get the number of tokens processed so far.
1312   unsigned getTokenCount() const { return TokenCount; }
1313 
1314   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1315   unsigned getMaxTokens() const { return MaxTokens; }
1316 
1317   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1318     MaxTokens = Value;
1319     MaxTokensOverrideLoc = Loc;
1320   };
1321 
1322   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1323 
1324   /// Register a function that would be called on each token in the final
1325   /// expanded token stream.
1326   /// This also reports annotation tokens produced by the parser.
1327   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1328     OnToken = std::move(F);
1329   }
1330 
1331   void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1332 
1333   bool isMacroDefined(StringRef Id) {
1334     return isMacroDefined(&Identifiers.get(Id));
1335   }
1336   bool isMacroDefined(const IdentifierInfo *II) {
1337     return II->hasMacroDefinition() &&
1338            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1339   }
1340 
1341   /// Determine whether II is defined as a macro within the module M,
1342   /// if that is a module that we've already preprocessed. Does not check for
1343   /// macros imported into M.
1344   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1345     if (!II->hasMacroDefinition())
1346       return false;
1347     auto I = Submodules.find(M);
1348     if (I == Submodules.end())
1349       return false;
1350     auto J = I->second.Macros.find(II);
1351     if (J == I->second.Macros.end())
1352       return false;
1353     auto *MD = J->second.getLatest();
1354     return MD && MD->isDefined();
1355   }
1356 
1357   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1358     if (!II->hasMacroDefinition())
1359       return {};
1360 
1361     MacroState &S = CurSubmoduleState->Macros[II];
1362     auto *MD = S.getLatest();
1363     while (isa_and_nonnull<VisibilityMacroDirective>(MD))
1364       MD = MD->getPrevious();
1365     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1366                            S.getActiveModuleMacros(*this, II),
1367                            S.isAmbiguous(*this, II));
1368   }
1369 
1370   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1371                                           SourceLocation Loc) {
1372     if (!II->hadMacroDefinition())
1373       return {};
1374 
1375     MacroState &S = CurSubmoduleState->Macros[II];
1376     MacroDirective::DefInfo DI;
1377     if (auto *MD = S.getLatest())
1378       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1379     // FIXME: Compute the set of active module macros at the specified location.
1380     return MacroDefinition(DI.getDirective(),
1381                            S.getActiveModuleMacros(*this, II),
1382                            S.isAmbiguous(*this, II));
1383   }
1384 
1385   /// Given an identifier, return its latest non-imported MacroDirective
1386   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1387   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1388     if (!II->hasMacroDefinition())
1389       return nullptr;
1390 
1391     auto *MD = getLocalMacroDirectiveHistory(II);
1392     if (!MD || MD->getDefinition().isUndefined())
1393       return nullptr;
1394 
1395     return MD;
1396   }
1397 
1398   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1399     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1400   }
1401 
1402   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1403     if (!II->hasMacroDefinition())
1404       return nullptr;
1405     if (auto MD = getMacroDefinition(II))
1406       return MD.getMacroInfo();
1407     return nullptr;
1408   }
1409 
1410   /// Given an identifier, return the latest non-imported macro
1411   /// directive for that identifier.
1412   ///
1413   /// One can iterate over all previous macro directives from the most recent
1414   /// one.
1415   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1416 
1417   /// Add a directive to the macro directive history for this identifier.
1418   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1419   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1420                                              SourceLocation Loc) {
1421     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1422     appendMacroDirective(II, MD);
1423     return MD;
1424   }
1425   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1426                                              MacroInfo *MI) {
1427     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1428   }
1429 
1430   /// Set a MacroDirective that was loaded from a PCH file.
1431   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1432                                MacroDirective *MD);
1433 
1434   /// Register an exported macro for a module and identifier.
1435   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II,
1436                               MacroInfo *Macro,
1437                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1438   ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1439 
1440   /// Get the list of leaf (non-overridden) module macros for a name.
1441   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1442     if (II->isOutOfDate())
1443       updateOutOfDateIdentifier(*II);
1444     auto I = LeafModuleMacros.find(II);
1445     if (I != LeafModuleMacros.end())
1446       return I->second;
1447     return {};
1448   }
1449 
1450   /// Get the list of submodules that we're currently building.
1451   ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1452     return BuildingSubmoduleStack;
1453   }
1454 
1455   /// \{
1456   /// Iterators for the macro history table. Currently defined macros have
1457   /// IdentifierInfo::hasMacroDefinition() set and an empty
1458   /// MacroInfo::getUndefLoc() at the head of the list.
1459   using macro_iterator = MacroMap::const_iterator;
1460 
1461   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1462   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1463 
1464   llvm::iterator_range<macro_iterator>
1465   macros(bool IncludeExternalMacros = true) const {
1466     macro_iterator begin = macro_begin(IncludeExternalMacros);
1467     macro_iterator end = macro_end(IncludeExternalMacros);
1468     return llvm::make_range(begin, end);
1469   }
1470 
1471   /// \}
1472 
1473   /// Mark the given clang module as affecting the current clang module or translation unit.
1474   void markClangModuleAsAffecting(Module *M) {
1475     assert(M->isModuleMapModule());
1476     if (!BuildingSubmoduleStack.empty()) {
1477       if (M != BuildingSubmoduleStack.back().M)
1478         BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
1479     } else {
1480       AffectingClangModules.insert(M);
1481     }
1482   }
1483 
1484   /// Get the set of top-level clang modules that affected preprocessing, but were not
1485   /// imported.
1486   const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const {
1487     return AffectingClangModules;
1488   }
1489 
1490   /// Mark the file as included.
1491   /// Returns true if this is the first time the file was included.
1492   bool markIncluded(FileEntryRef File) {
1493     HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
1494     return IncludedFiles.insert(File).second;
1495   }
1496 
1497   /// Return true if this header has already been included.
1498   bool alreadyIncluded(FileEntryRef File) const {
1499     HeaderInfo.getFileInfo(File);
1500     return IncludedFiles.count(File);
1501   }
1502 
1503   /// Get the set of included files.
1504   IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
1505   const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1506 
1507   /// Return the name of the macro defined before \p Loc that has
1508   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1509   /// return the last one defined.
1510   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1511                                      ArrayRef<TokenValue> Tokens) const;
1512 
1513   /// Get the predefines for this processor.
1514   /// Used by some third-party tools to inspect and add predefines (see
1515   /// https://github.com/llvm/llvm-project/issues/57483).
1516   const std::string &getPredefines() const { return Predefines; }
1517 
1518   /// Set the predefines for this Preprocessor.
1519   ///
1520   /// These predefines are automatically injected when parsing the main file.
1521   void setPredefines(std::string P) { Predefines = std::move(P); }
1522 
1523   /// Return information about the specified preprocessor
1524   /// identifier token.
1525   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1526     return &Identifiers.get(Name);
1527   }
1528 
1529   /// Add the specified pragma handler to this preprocessor.
1530   ///
1531   /// If \p Namespace is non-null, then it is a token required to exist on the
1532   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1533   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1534   void AddPragmaHandler(PragmaHandler *Handler) {
1535     AddPragmaHandler(StringRef(), Handler);
1536   }
1537 
1538   /// Remove the specific pragma handler from this preprocessor.
1539   ///
1540   /// If \p Namespace is non-null, then it should be the namespace that
1541   /// \p Handler was added to. It is an error to remove a handler that
1542   /// has not been registered.
1543   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1544   void RemovePragmaHandler(PragmaHandler *Handler) {
1545     RemovePragmaHandler(StringRef(), Handler);
1546   }
1547 
1548   /// Install empty handlers for all pragmas (making them ignored).
1549   void IgnorePragmas();
1550 
1551   /// Set empty line handler.
1552   void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1553 
1554   EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1555 
1556   /// Add the specified comment handler to the preprocessor.
1557   void addCommentHandler(CommentHandler *Handler);
1558 
1559   /// Remove the specified comment handler.
1560   ///
1561   /// It is an error to remove a handler that has not been registered.
1562   void removeCommentHandler(CommentHandler *Handler);
1563 
1564   /// Set the code completion handler to the given object.
1565   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1566     CodeComplete = &Handler;
1567   }
1568 
1569   /// Retrieve the current code-completion handler.
1570   CodeCompletionHandler *getCodeCompletionHandler() const {
1571     return CodeComplete;
1572   }
1573 
1574   /// Clear out the code completion handler.
1575   void clearCodeCompletionHandler() {
1576     CodeComplete = nullptr;
1577   }
1578 
1579   /// Hook used by the lexer to invoke the "included file" code
1580   /// completion point.
1581   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1582 
1583   /// Hook used by the lexer to invoke the "natural language" code
1584   /// completion point.
1585   void CodeCompleteNaturalLanguage();
1586 
1587   /// Set the code completion token for filtering purposes.
1588   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1589     CodeCompletionII = Filter;
1590   }
1591 
1592   /// Set the code completion token range for detecting replacement range later
1593   /// on.
1594   void setCodeCompletionTokenRange(const SourceLocation Start,
1595                                    const SourceLocation End) {
1596     CodeCompletionTokenRange = {Start, End};
1597   }
1598   SourceRange getCodeCompletionTokenRange() const {
1599     return CodeCompletionTokenRange;
1600   }
1601 
1602   /// Get the code completion token for filtering purposes.
1603   StringRef getCodeCompletionFilter() {
1604     if (CodeCompletionII)
1605       return CodeCompletionII->getName();
1606     return {};
1607   }
1608 
1609   /// Retrieve the preprocessing record, or NULL if there is no
1610   /// preprocessing record.
1611   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1612 
1613   /// Create a new preprocessing record, which will keep track of
1614   /// all macro expansions, macro definitions, etc.
1615   void createPreprocessingRecord();
1616 
1617   /// Returns true if the FileEntry is the PCH through header.
1618   bool isPCHThroughHeader(const FileEntry *FE);
1619 
1620   /// True if creating a PCH with a through header.
1621   bool creatingPCHWithThroughHeader();
1622 
1623   /// True if using a PCH with a through header.
1624   bool usingPCHWithThroughHeader();
1625 
1626   /// True if creating a PCH with a #pragma hdrstop.
1627   bool creatingPCHWithPragmaHdrStop();
1628 
1629   /// True if using a PCH with a #pragma hdrstop.
1630   bool usingPCHWithPragmaHdrStop();
1631 
1632   /// Skip tokens until after the #include of the through header or
1633   /// until after a #pragma hdrstop.
1634   void SkipTokensWhileUsingPCH();
1635 
1636   /// Process directives while skipping until the through header or
1637   /// #pragma hdrstop is found.
1638   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1639                                            SourceLocation HashLoc);
1640 
1641   /// Enter the specified FileID as the main source file,
1642   /// which implicitly adds the builtin defines etc.
1643   void EnterMainSourceFile();
1644 
1645   /// Inform the preprocessor callbacks that processing is complete.
1646   void EndSourceFile();
1647 
1648   /// Add a source file to the top of the include stack and
1649   /// start lexing tokens from it instead of the current buffer.
1650   ///
1651   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1652   bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir,
1653                        SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1654 
1655   /// Add a Macro to the top of the include stack and start lexing
1656   /// tokens from it instead of the current buffer.
1657   ///
1658   /// \param Args specifies the tokens input to a function-like macro.
1659   /// \param ILEnd specifies the location of the ')' for a function-like macro
1660   /// or the identifier for an object-like macro.
1661   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1662                   MacroArgs *Args);
1663 
1664 private:
1665   /// Add a "macro" context to the top of the include stack,
1666   /// which will cause the lexer to start returning the specified tokens.
1667   ///
1668   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1669   /// will not be subject to further macro expansion. Otherwise, these tokens
1670   /// will be re-macro-expanded when/if expansion is enabled.
1671   ///
1672   /// If \p OwnsTokens is false, this method assumes that the specified stream
1673   /// of tokens has a permanent owner somewhere, so they do not need to be
1674   /// copied. If it is true, it assumes the array of tokens is allocated with
1675   /// \c new[] and the Preprocessor will delete[] it.
1676   ///
1677   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1678   /// set, see the flag documentation for details.
1679   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1680                         bool DisableMacroExpansion, bool OwnsTokens,
1681                         bool IsReinject);
1682 
1683 public:
1684   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1685                         bool DisableMacroExpansion, bool IsReinject) {
1686     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1687                      IsReinject);
1688   }
1689 
1690   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1691                         bool IsReinject) {
1692     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1693                      IsReinject);
1694   }
1695 
1696   /// Pop the current lexer/macro exp off the top of the lexer stack.
1697   ///
1698   /// This should only be used in situations where the current state of the
1699   /// top-of-stack lexer is known.
1700   void RemoveTopOfLexerStack();
1701 
1702   /// From the point that this method is called, and until
1703   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1704   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1705   /// make the Preprocessor re-lex the same tokens.
1706   ///
1707   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1708   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1709   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1710   ///
1711   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1712   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1713   /// tokens will continue indefinitely.
1714   ///
1715   /// \param Unannotated Whether token annotations are reverted upon calling
1716   /// Backtrack().
1717   void EnableBacktrackAtThisPos(bool Unannotated = false);
1718 
1719 private:
1720   std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos();
1721 
1722   CachedTokensTy PopUnannotatedBacktrackTokens();
1723 
1724 public:
1725   /// Disable the last EnableBacktrackAtThisPos call.
1726   void CommitBacktrackedTokens();
1727 
1728   /// Make Preprocessor re-lex the tokens that were lexed since
1729   /// EnableBacktrackAtThisPos() was previously called.
1730   void Backtrack();
1731 
1732   /// True if EnableBacktrackAtThisPos() was called and
1733   /// caching of tokens is on.
1734   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1735 
1736   /// True if EnableBacktrackAtThisPos() was called and
1737   /// caching of unannotated tokens is on.
1738   bool isUnannotatedBacktrackEnabled() const {
1739     return !UnannotatedBacktrackTokens.empty();
1740   }
1741 
1742   /// Lex the next token for this preprocessor.
1743   void Lex(Token &Result);
1744 
1745   /// Lex all tokens for this preprocessor until (and excluding) end of file.
1746   void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1747 
1748   /// Lex a token, forming a header-name token if possible.
1749   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1750 
1751   /// Lex the parameters for an #embed directive, returns nullopt on error.
1752   std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1753                                                              bool ForHasEmbed);
1754 
1755   bool LexAfterModuleImport(Token &Result);
1756   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1757 
1758   void makeModuleVisible(Module *M, SourceLocation Loc);
1759 
1760   SourceLocation getModuleImportLoc(Module *M) const {
1761     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1762   }
1763 
1764   /// Lex a string literal, which may be the concatenation of multiple
1765   /// string literals and may even come from macro expansion.
1766   /// \returns true on success, false if a error diagnostic has been generated.
1767   bool LexStringLiteral(Token &Result, std::string &String,
1768                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1769     if (AllowMacroExpansion)
1770       Lex(Result);
1771     else
1772       LexUnexpandedToken(Result);
1773     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1774                                   AllowMacroExpansion);
1775   }
1776 
1777   /// Complete the lexing of a string literal where the first token has
1778   /// already been lexed (see LexStringLiteral).
1779   bool FinishLexStringLiteral(Token &Result, std::string &String,
1780                               const char *DiagnosticTag,
1781                               bool AllowMacroExpansion);
1782 
1783   /// Lex a token.  If it's a comment, keep lexing until we get
1784   /// something not a comment.
1785   ///
1786   /// This is useful in -E -C mode where comments would foul up preprocessor
1787   /// directive handling.
1788   void LexNonComment(Token &Result) {
1789     do
1790       Lex(Result);
1791     while (Result.getKind() == tok::comment);
1792   }
1793 
1794   /// Just like Lex, but disables macro expansion of identifier tokens.
1795   void LexUnexpandedToken(Token &Result) {
1796     // Disable macro expansion.
1797     bool OldVal = DisableMacroExpansion;
1798     DisableMacroExpansion = true;
1799     // Lex the token.
1800     Lex(Result);
1801 
1802     // Reenable it.
1803     DisableMacroExpansion = OldVal;
1804   }
1805 
1806   /// Like LexNonComment, but this disables macro expansion of
1807   /// identifier tokens.
1808   void LexUnexpandedNonComment(Token &Result) {
1809     do
1810       LexUnexpandedToken(Result);
1811     while (Result.getKind() == tok::comment);
1812   }
1813 
1814   /// Parses a simple integer literal to get its numeric value.  Floating
1815   /// point literals and user defined literals are rejected.  Used primarily to
1816   /// handle pragmas that accept integer arguments.
1817   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1818 
1819   /// Disables macro expansion everywhere except for preprocessor directives.
1820   void SetMacroExpansionOnlyInDirectives() {
1821     DisableMacroExpansion = true;
1822     MacroExpansionInDirectivesOverride = true;
1823   }
1824 
1825   /// Peeks ahead N tokens and returns that token without consuming any
1826   /// tokens.
1827   ///
1828   /// LookAhead(0) returns the next token that would be returned by Lex(),
1829   /// LookAhead(1) returns the token after it, etc.  This returns normal
1830   /// tokens after phase 5.  As such, it is equivalent to using
1831   /// 'Lex', not 'LexUnexpandedToken'.
1832   const Token &LookAhead(unsigned N) {
1833     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1834     if (CachedLexPos + N < CachedTokens.size())
1835       return CachedTokens[CachedLexPos+N];
1836     else
1837       return PeekAhead(N+1);
1838   }
1839 
1840   /// When backtracking is enabled and tokens are cached,
1841   /// this allows to revert a specific number of tokens.
1842   ///
1843   /// Note that the number of tokens being reverted should be up to the last
1844   /// backtrack position, not more.
1845   void RevertCachedTokens(unsigned N) {
1846     assert(isBacktrackEnabled() &&
1847            "Should only be called when tokens are cached for backtracking");
1848     assert(signed(CachedLexPos) - signed(N) >=
1849                signed(LastBacktrackPos().first) &&
1850            "Should revert tokens up to the last backtrack position, not more");
1851     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1852            "Corrupted backtrack positions ?");
1853     CachedLexPos -= N;
1854   }
1855 
1856   /// Enters a token in the token stream to be lexed next.
1857   ///
1858   /// If BackTrack() is called afterwards, the token will remain at the
1859   /// insertion point.
1860   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1861   /// flag set. See the flag documentation for details.
1862   void EnterToken(const Token &Tok, bool IsReinject) {
1863     if (LexLevel) {
1864       // It's not correct in general to enter caching lex mode while in the
1865       // middle of a nested lexing action.
1866       auto TokCopy = std::make_unique<Token[]>(1);
1867       TokCopy[0] = Tok;
1868       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1869     } else {
1870       EnterCachingLexMode();
1871       assert(IsReinject && "new tokens in the middle of cached stream");
1872       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1873     }
1874   }
1875 
1876   /// We notify the Preprocessor that if it is caching tokens (because
1877   /// backtrack is enabled) it should replace the most recent cached tokens
1878   /// with the given annotation token. This function has no effect if
1879   /// backtracking is not enabled.
1880   ///
1881   /// Note that the use of this function is just for optimization, so that the
1882   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1883   /// invoked.
1884   void AnnotateCachedTokens(const Token &Tok) {
1885     assert(Tok.isAnnotation() && "Expected annotation token");
1886     if (CachedLexPos != 0 && isBacktrackEnabled())
1887       AnnotatePreviousCachedTokens(Tok);
1888   }
1889 
1890   /// Get the location of the last cached token, suitable for setting the end
1891   /// location of an annotation token.
1892   SourceLocation getLastCachedTokenLocation() const {
1893     assert(CachedLexPos != 0);
1894     return CachedTokens[CachedLexPos-1].getLastLoc();
1895   }
1896 
1897   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1898   /// CachedTokens.
1899   bool IsPreviousCachedToken(const Token &Tok) const;
1900 
1901   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1902   /// in \p NewToks.
1903   ///
1904   /// Useful when a token needs to be split in smaller ones and CachedTokens
1905   /// most recent token must to be updated to reflect that.
1906   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1907 
1908   /// Replace the last token with an annotation token.
1909   ///
1910   /// Like AnnotateCachedTokens(), this routine replaces an
1911   /// already-parsed (and resolved) token with an annotation
1912   /// token. However, this routine only replaces the last token with
1913   /// the annotation token; it does not affect any other cached
1914   /// tokens. This function has no effect if backtracking is not
1915   /// enabled.
1916   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1917     assert(Tok.isAnnotation() && "Expected annotation token");
1918     if (CachedLexPos != 0 && isBacktrackEnabled())
1919       CachedTokens[CachedLexPos-1] = Tok;
1920   }
1921 
1922   /// Enter an annotation token into the token stream.
1923   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1924                             void *AnnotationVal);
1925 
1926   /// Determine whether it's possible for a future call to Lex to produce an
1927   /// annotation token created by a previous call to EnterAnnotationToken.
1928   bool mightHavePendingAnnotationTokens() {
1929     return CurLexerCallback != CLK_Lexer;
1930   }
1931 
1932   /// Update the current token to represent the provided
1933   /// identifier, in order to cache an action performed by typo correction.
1934   void TypoCorrectToken(const Token &Tok) {
1935     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1936     if (CachedLexPos != 0 && isBacktrackEnabled())
1937       CachedTokens[CachedLexPos-1] = Tok;
1938   }
1939 
1940   /// Recompute the current lexer kind based on the CurLexer/
1941   /// CurTokenLexer pointers.
1942   void recomputeCurLexerKind();
1943 
1944   /// Returns true if incremental processing is enabled
1945   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1946 
1947   /// Enables the incremental processing
1948   void enableIncrementalProcessing(bool value = true) {
1949     IncrementalProcessing = value;
1950   }
1951 
1952   /// Specify the point at which code-completion will be performed.
1953   ///
1954   /// \param File the file in which code completion should occur. If
1955   /// this file is included multiple times, code-completion will
1956   /// perform completion the first time it is included. If NULL, this
1957   /// function clears out the code-completion point.
1958   ///
1959   /// \param Line the line at which code completion should occur
1960   /// (1-based).
1961   ///
1962   /// \param Column the column at which code completion should occur
1963   /// (1-based).
1964   ///
1965   /// \returns true if an error occurred, false otherwise.
1966   bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line,
1967                               unsigned Column);
1968 
1969   /// Determine if we are performing code completion.
1970   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1971 
1972   /// Returns the location of the code-completion point.
1973   ///
1974   /// Returns an invalid location if code-completion is not enabled or the file
1975   /// containing the code-completion point has not been lexed yet.
1976   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1977 
1978   /// Returns the start location of the file of code-completion point.
1979   ///
1980   /// Returns an invalid location if code-completion is not enabled or the file
1981   /// containing the code-completion point has not been lexed yet.
1982   SourceLocation getCodeCompletionFileLoc() const {
1983     return CodeCompletionFileLoc;
1984   }
1985 
1986   /// Returns true if code-completion is enabled and we have hit the
1987   /// code-completion point.
1988   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1989 
1990   /// Note that we hit the code-completion point.
1991   void setCodeCompletionReached() {
1992     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1993     CodeCompletionReached = true;
1994     // Silence any diagnostics that occur after we hit the code-completion.
1995     getDiagnostics().setSuppressAllDiagnostics(true);
1996   }
1997 
1998   /// The location of the currently-active \#pragma clang
1999   /// arc_cf_code_audited begin.
2000   ///
2001   /// Returns an invalid location if there is no such pragma active.
2002   std::pair<IdentifierInfo *, SourceLocation>
2003   getPragmaARCCFCodeAuditedInfo() const {
2004     return PragmaARCCFCodeAuditedInfo;
2005   }
2006 
2007   /// Set the location of the currently-active \#pragma clang
2008   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
2009   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
2010                                      SourceLocation Loc) {
2011     PragmaARCCFCodeAuditedInfo = {Ident, Loc};
2012   }
2013 
2014   /// The location of the currently-active \#pragma clang
2015   /// assume_nonnull begin.
2016   ///
2017   /// Returns an invalid location if there is no such pragma active.
2018   SourceLocation getPragmaAssumeNonNullLoc() const {
2019     return PragmaAssumeNonNullLoc;
2020   }
2021 
2022   /// Set the location of the currently-active \#pragma clang
2023   /// assume_nonnull begin.  An invalid location ends the pragma.
2024   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
2025     PragmaAssumeNonNullLoc = Loc;
2026   }
2027 
2028   /// Get the location of the recorded unterminated \#pragma clang
2029   /// assume_nonnull begin in the preamble, if one exists.
2030   ///
2031   /// Returns an invalid location if the premable did not end with
2032   /// such a pragma active or if there is no recorded preamble.
2033   SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const {
2034     return PreambleRecordedPragmaAssumeNonNullLoc;
2035   }
2036 
2037   /// Record the location of the unterminated \#pragma clang
2038   /// assume_nonnull begin in the preamble.
2039   void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) {
2040     PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2041   }
2042 
2043   /// Set the directory in which the main file should be considered
2044   /// to have been found, if it is not a real file.
2045   void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2046 
2047   /// Instruct the preprocessor to skip part of the main source file.
2048   ///
2049   /// \param Bytes The number of bytes in the preamble to skip.
2050   ///
2051   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2052   /// start of a line.
2053   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2054     SkipMainFilePreamble.first = Bytes;
2055     SkipMainFilePreamble.second = StartOfLine;
2056   }
2057 
2058   /// Forwarding function for diagnostics.  This emits a diagnostic at
2059   /// the specified Token's location, translating the token's start
2060   /// position in the current buffer into a SourcePosition object for rendering.
2061   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2062     return Diags->Report(Loc, DiagID);
2063   }
2064 
2065   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2066     return Diags->Report(Tok.getLocation(), DiagID);
2067   }
2068 
2069   /// Return the 'spelling' of the token at the given
2070   /// location; does not go up to the spelling location or down to the
2071   /// expansion location.
2072   ///
2073   /// \param buffer A buffer which will be used only if the token requires
2074   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
2075   /// \param invalid If non-null, will be set \c true if an error occurs.
2076   StringRef getSpelling(SourceLocation loc,
2077                         SmallVectorImpl<char> &buffer,
2078                         bool *invalid = nullptr) const {
2079     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2080   }
2081 
2082   /// Return the 'spelling' of the Tok token.
2083   ///
2084   /// The spelling of a token is the characters used to represent the token in
2085   /// the source file after trigraph expansion and escaped-newline folding.  In
2086   /// particular, this wants to get the true, uncanonicalized, spelling of
2087   /// things like digraphs, UCNs, etc.
2088   ///
2089   /// \param Invalid If non-null, will be set \c true if an error occurs.
2090   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2091     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2092   }
2093 
2094   /// Get the spelling of a token into a preallocated buffer, instead
2095   /// of as an std::string.
2096   ///
2097   /// The caller is required to allocate enough space for the token, which is
2098   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2099   /// actual result is returned.
2100   ///
2101   /// Note that this method may do two possible things: it may either fill in
2102   /// the buffer specified with characters, or it may *change the input pointer*
2103   /// to point to a constant buffer with the data already in it (avoiding a
2104   /// copy).  The caller is not allowed to modify the returned buffer pointer
2105   /// if an internal buffer is returned.
2106   unsigned getSpelling(const Token &Tok, const char *&Buffer,
2107                        bool *Invalid = nullptr) const {
2108     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2109   }
2110 
2111   /// Get the spelling of a token into a SmallVector.
2112   ///
2113   /// Note that the returned StringRef may not point to the
2114   /// supplied buffer if a copy can be avoided.
2115   StringRef getSpelling(const Token &Tok,
2116                         SmallVectorImpl<char> &Buffer,
2117                         bool *Invalid = nullptr) const;
2118 
2119   /// Relex the token at the specified location.
2120   /// \returns true if there was a failure, false on success.
2121   bool getRawToken(SourceLocation Loc, Token &Result,
2122                    bool IgnoreWhiteSpace = false) {
2123     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2124   }
2125 
2126   /// Given a Token \p Tok that is a numeric constant with length 1,
2127   /// return the value of constant as an unsigned 8-bit integer.
2128   uint8_t
2129   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
2130                                               bool *Invalid = nullptr) const {
2131     assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2132            Tok.getLength() == 1 && "Called on unsupported token");
2133     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2134 
2135     // If the token is carrying a literal data pointer, just use it.
2136     if (const char *D = Tok.getLiteralData())
2137       return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2138 
2139     assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2140     // Otherwise, fall back on getCharacterData, which is slower, but always
2141     // works.
2142     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
2143   }
2144 
2145   /// Retrieve the name of the immediate macro expansion.
2146   ///
2147   /// This routine starts from a source location, and finds the name of the
2148   /// macro responsible for its immediate expansion. It looks through any
2149   /// intervening macro argument expansions to compute this. It returns a
2150   /// StringRef that refers to the SourceManager-owned buffer of the source
2151   /// where that macro name is spelled. Thus, the result shouldn't out-live
2152   /// the SourceManager.
2153   StringRef getImmediateMacroName(SourceLocation Loc) {
2154     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2155   }
2156 
2157   /// Plop the specified string into a scratch buffer and set the
2158   /// specified token's location and length to it.
2159   ///
2160   /// If specified, the source location provides a location of the expansion
2161   /// point of the token.
2162   void CreateString(StringRef Str, Token &Tok,
2163                     SourceLocation ExpansionLocStart = SourceLocation(),
2164                     SourceLocation ExpansionLocEnd = SourceLocation());
2165 
2166   /// Split the first Length characters out of the token starting at TokLoc
2167   /// and return a location pointing to the split token. Re-lexing from the
2168   /// split token will return the split token rather than the original.
2169   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2170 
2171   /// Computes the source location just past the end of the
2172   /// token at this source location.
2173   ///
2174   /// This routine can be used to produce a source location that
2175   /// points just past the end of the token referenced by \p Loc, and
2176   /// is generally used when a diagnostic needs to point just after a
2177   /// token where it expected something different that it received. If
2178   /// the returned source location would not be meaningful (e.g., if
2179   /// it points into a macro), this routine returns an invalid
2180   /// source location.
2181   ///
2182   /// \param Offset an offset from the end of the token, where the source
2183   /// location should refer to. The default offset (0) produces a source
2184   /// location pointing just past the end of the token; an offset of 1 produces
2185   /// a source location pointing to the last character in the token, etc.
2186   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
2187     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2188   }
2189 
2190   /// Returns true if the given MacroID location points at the first
2191   /// token of the macro expansion.
2192   ///
2193   /// \param MacroBegin If non-null and function returns true, it is set to
2194   /// begin location of the macro.
2195   bool isAtStartOfMacroExpansion(SourceLocation loc,
2196                                  SourceLocation *MacroBegin = nullptr) const {
2197     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2198                                             MacroBegin);
2199   }
2200 
2201   /// Returns true if the given MacroID location points at the last
2202   /// token of the macro expansion.
2203   ///
2204   /// \param MacroEnd If non-null and function returns true, it is set to
2205   /// end location of the macro.
2206   bool isAtEndOfMacroExpansion(SourceLocation loc,
2207                                SourceLocation *MacroEnd = nullptr) const {
2208     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2209   }
2210 
2211   /// Print the token to stderr, used for debugging.
2212   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2213   void DumpLocation(SourceLocation Loc) const;
2214   void DumpMacro(const MacroInfo &MI) const;
2215   void dumpMacroInfo(const IdentifierInfo *II);
2216 
2217   /// Given a location that specifies the start of a
2218   /// token, return a new location that specifies a character within the token.
2219   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
2220                                          unsigned Char) const {
2221     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2222   }
2223 
2224   /// Increment the counters for the number of token paste operations
2225   /// performed.
2226   ///
2227   /// If fast was specified, this is a 'fast paste' case we handled.
2228   void IncrementPasteCounter(bool isFast) {
2229     if (isFast)
2230       ++NumFastTokenPaste;
2231     else
2232       ++NumTokenPaste;
2233   }
2234 
2235   void PrintStats();
2236 
2237   size_t getTotalMemory() const;
2238 
2239   /// When the macro expander pastes together a comment (/##/) in Microsoft
2240   /// mode, this method handles updating the current state, returning the
2241   /// token on the next source line.
2242   void HandleMicrosoftCommentPaste(Token &Tok);
2243 
2244   //===--------------------------------------------------------------------===//
2245   // Preprocessor callback methods.  These are invoked by a lexer as various
2246   // directives and events are found.
2247 
2248   /// Given a tok::raw_identifier token, look up the
2249   /// identifier information for the token and install it into the token,
2250   /// updating the token kind accordingly.
2251   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2252 
2253 private:
2254   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2255 
2256 public:
2257   /// Specifies the reason for poisoning an identifier.
2258   ///
2259   /// If that identifier is accessed while poisoned, then this reason will be
2260   /// used instead of the default "poisoned" diagnostic.
2261   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2262 
2263   /// Display reason for poisoned identifier.
2264   void HandlePoisonedIdentifier(Token & Identifier);
2265 
2266   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
2267     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2268       if(II->isPoisoned()) {
2269         HandlePoisonedIdentifier(Identifier);
2270       }
2271     }
2272   }
2273 
2274   /// Determine whether the next preprocessor token to be
2275   /// lexed is a '('.  If so, consume the token and return true, if not, this
2276   /// method should have no observable side-effect on the lexed tokens.
2277   bool isNextPPTokenLParen();
2278 
2279 private:
2280   /// Identifiers used for SEH handling in Borland. These are only
2281   /// allowed in particular circumstances
2282   // __except block
2283   IdentifierInfo *Ident__exception_code,
2284                  *Ident___exception_code,
2285                  *Ident_GetExceptionCode;
2286   // __except filter expression
2287   IdentifierInfo *Ident__exception_info,
2288                  *Ident___exception_info,
2289                  *Ident_GetExceptionInfo;
2290   // __finally
2291   IdentifierInfo *Ident__abnormal_termination,
2292                  *Ident___abnormal_termination,
2293                  *Ident_AbnormalTermination;
2294 
2295   const char *getCurLexerEndPos();
2296   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2297 
2298 public:
2299   void PoisonSEHIdentifiers(bool Poison = true); // Borland
2300 
2301   /// Callback invoked when the lexer reads an identifier and has
2302   /// filled in the tokens IdentifierInfo member.
2303   ///
2304   /// This callback potentially macro expands it or turns it into a named
2305   /// token (like 'for').
2306   ///
2307   /// \returns true if we actually computed a token, false if we need to
2308   /// lex again.
2309   bool HandleIdentifier(Token &Identifier);
2310 
2311   /// Callback invoked when the lexer hits the end of the current file.
2312   ///
2313   /// This either returns the EOF token and returns true, or
2314   /// pops a level off the include stack and returns false, at which point the
2315   /// client should call lex again.
2316   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2317 
2318   /// Callback invoked when the current TokenLexer hits the end of its
2319   /// token stream.
2320   bool HandleEndOfTokenLexer(Token &Result);
2321 
2322   /// Callback invoked when the lexer sees a # token at the start of a
2323   /// line.
2324   ///
2325   /// This consumes the directive, modifies the lexer/preprocessor state, and
2326   /// advances the lexer(s) so that the next token read is the correct one.
2327   void HandleDirective(Token &Result);
2328 
2329   /// Ensure that the next token is a tok::eod token.
2330   ///
2331   /// If not, emit a diagnostic and consume up until the eod.
2332   /// If \p EnableMacros is true, then we consider macros that expand to zero
2333   /// tokens as being ok.
2334   ///
2335   /// \return The location of the end of the directive (the terminating
2336   /// newline).
2337   SourceLocation CheckEndOfDirective(const char *DirType,
2338                                      bool EnableMacros = false);
2339 
2340   /// Read and discard all tokens remaining on the current line until
2341   /// the tok::eod token is found. Returns the range of the skipped tokens.
2342   SourceRange DiscardUntilEndOfDirective() {
2343     Token Tmp;
2344     return DiscardUntilEndOfDirective(Tmp);
2345   }
2346 
2347   /// Same as above except retains the token that was found.
2348   SourceRange DiscardUntilEndOfDirective(Token &Tok);
2349 
2350   /// Returns true if the preprocessor has seen a use of
2351   /// __DATE__ or __TIME__ in the file so far.
2352   bool SawDateOrTime() const {
2353     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2354   }
2355   unsigned getCounterValue() const { return CounterValue; }
2356   void setCounterValue(unsigned V) { CounterValue = V; }
2357 
2358   LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const {
2359     assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2360            "FPEvalMethod should be set either from command line or from the "
2361            "target info");
2362     return CurrentFPEvalMethod;
2363   }
2364 
2365   LangOptions::FPEvalMethodKind getTUFPEvalMethod() const {
2366     return TUFPEvalMethod;
2367   }
2368 
2369   SourceLocation getLastFPEvalPragmaLocation() const {
2370     return LastFPEvalPragmaLocation;
2371   }
2372 
2373   void setCurrentFPEvalMethod(SourceLocation PragmaLoc,
2374                               LangOptions::FPEvalMethodKind Val) {
2375     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2376            "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2377     // This is the location of the '#pragma float_control" where the
2378     // execution state is modifed.
2379     LastFPEvalPragmaLocation = PragmaLoc;
2380     CurrentFPEvalMethod = Val;
2381     TUFPEvalMethod = Val;
2382   }
2383 
2384   void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2385     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2386            "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2387     TUFPEvalMethod = Val;
2388   }
2389 
2390   /// Retrieves the module that we're currently building, if any.
2391   Module *getCurrentModule();
2392 
2393   /// Retrieves the module whose implementation we're current compiling, if any.
2394   Module *getCurrentModuleImplementation();
2395 
2396   /// If we are preprocessing a named module.
2397   bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2398 
2399   /// If we are proprocessing a named interface unit.
2400   /// Note that a module implementation partition is not considered as an
2401   /// named interface unit here although it is importable
2402   /// to ease the parsing.
2403   bool isInNamedInterfaceUnit() const {
2404     return ModuleDeclState.isNamedInterface();
2405   }
2406 
2407   /// Get the named module name we're preprocessing.
2408   /// Requires we're preprocessing a named module.
2409   StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2410 
2411   /// If we are implementing an implementation module unit.
2412   /// Note that the module implementation partition is not considered as an
2413   /// implementation unit.
2414   bool isInImplementationUnit() const {
2415     return ModuleDeclState.isImplementationUnit();
2416   }
2417 
2418   /// If we're importing a standard C++20 Named Modules.
2419   bool isInImportingCXXNamedModules() const {
2420     // NamedModuleImportPath will be non-empty only if we're importing
2421     // Standard C++ named modules.
2422     return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
2423            !IsAtImport;
2424   }
2425 
2426   /// Allocate a new MacroInfo object with the provided SourceLocation.
2427   MacroInfo *AllocateMacroInfo(SourceLocation L);
2428 
2429   /// Turn the specified lexer token into a fully checked and spelled
2430   /// filename, e.g. as an operand of \#include.
2431   ///
2432   /// The caller is expected to provide a buffer that is large enough to hold
2433   /// the spelling of the filename, but is also expected to handle the case
2434   /// when this method decides to use a different buffer.
2435   ///
2436   /// \returns true if the input filename was in <>'s or false if it was
2437   /// in ""'s.
2438   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2439 
2440   /// Given a "foo" or \<foo> reference, look up the indicated file.
2441   ///
2442   /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
2443   /// reference is for system \#include's or not (i.e. using <> instead of "").
2444   OptionalFileEntryRef
2445   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2446              ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2447              ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2448              SmallVectorImpl<char> *RelativePath,
2449              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2450              bool *IsFrameworkFound, bool SkipCache = false,
2451              bool OpenFile = true, bool CacheFailures = true);
2452 
2453   /// Given a "Filename" or \<Filename> reference, look up the indicated embed
2454   /// resource. \p isAngled indicates whether the file reference is for
2455   /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2456   /// is true, the file looked up is opened for reading, otherwise it only
2457   /// validates that the file exists. Quoted filenames are looked up relative
2458   /// to \p LookupFromFile if it is nonnull.
2459   ///
2460   /// Returns std::nullopt on failure.
2461   OptionalFileEntryRef
2462   LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
2463                   const FileEntry *LookupFromFile = nullptr);
2464 
2465   /// Return true if we're in the top-level file, not in a \#include.
2466   bool isInPrimaryFile() const;
2467 
2468   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2469   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2470   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2471 
2472   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2473                       bool *ShadowFlag = nullptr);
2474 
2475   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2476   Module *LeaveSubmodule(bool ForPragma);
2477 
2478 private:
2479   friend void TokenLexer::ExpandFunctionArguments();
2480 
2481   void PushIncludeMacroStack() {
2482     assert(CurLexerCallback != CLK_CachingLexer &&
2483            "cannot push a caching lexer");
2484     IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2485                                    std::move(CurLexer), CurPPLexer,
2486                                    std::move(CurTokenLexer), CurDirLookup);
2487     CurPPLexer = nullptr;
2488   }
2489 
2490   void PopIncludeMacroStack() {
2491     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2492     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2493     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2494     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2495     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2496     CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2497     IncludeMacroStack.pop_back();
2498   }
2499 
2500   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2501 
2502   /// Determine whether we need to create module macros for #defines in the
2503   /// current context.
2504   bool needModuleMacros() const;
2505 
2506   /// Update the set of active module macros and ambiguity flag for a module
2507   /// macro name.
2508   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2509 
2510   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2511                                                SourceLocation Loc);
2512   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2513   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2514                                                              bool isPublic);
2515 
2516   /// Lex and validate a macro name, which occurs after a
2517   /// \#define or \#undef.
2518   ///
2519   /// \param MacroNameTok Token that represents the name defined or undefined.
2520   /// \param IsDefineUndef Kind if preprocessor directive.
2521   /// \param ShadowFlag Points to flag that is set if macro name shadows
2522   ///                   a keyword.
2523   ///
2524   /// This emits a diagnostic, sets the token kind to eod,
2525   /// and discards the rest of the macro line if the macro name is invalid.
2526   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2527                      bool *ShadowFlag = nullptr);
2528 
2529   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2530   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2531   /// doing so performs certain validity checks including (but not limited to):
2532   ///   - # (stringization) is followed by a macro parameter
2533   /// \param MacroNameTok - Token that represents the macro name
2534   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2535   ///
2536   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2537   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2538   MacroInfo *ReadOptionalMacroParameterListAndBody(
2539       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2540 
2541   /// The ( starting an argument list of a macro definition has just been read.
2542   /// Lex the rest of the parameters and the closing ), updating \p MI with
2543   /// what we learn and saving in \p LastTok the last token read.
2544   /// Return true if an error occurs parsing the arg list.
2545   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2546 
2547   /// Provide a suggestion for a typoed directive. If there is no typo, then
2548   /// just skip suggesting.
2549   ///
2550   /// \param Tok - Token that represents the directive
2551   /// \param Directive - String reference for the directive name
2552   void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2553 
2554   /// We just read a \#if or related directive and decided that the
2555   /// subsequent tokens are in the \#if'd out portion of the
2556   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2557   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2558   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2559   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2560   /// already seen one so a \#else directive is a duplicate.  When this returns,
2561   /// the caller can lex the first valid token.
2562   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2563                                     SourceLocation IfTokenLoc,
2564                                     bool FoundNonSkipPortion, bool FoundElse,
2565                                     SourceLocation ElseLoc = SourceLocation());
2566 
2567   /// Information about the result for evaluating an expression for a
2568   /// preprocessor directive.
2569   struct DirectiveEvalResult {
2570     /// The integral value of the expression.
2571     std::optional<llvm::APSInt> Value;
2572 
2573     /// Whether the expression was evaluated as true or not.
2574     bool Conditional;
2575 
2576     /// True if the expression contained identifiers that were undefined.
2577     bool IncludedUndefinedIds;
2578 
2579     /// The source range for the expression.
2580     SourceRange ExprRange;
2581   };
2582 
2583   /// Evaluate an integer constant expression that may occur after a
2584   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2585   ///
2586   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2587   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2588                                                   bool CheckForEoD = true);
2589 
2590   /// Evaluate an integer constant expression that may occur after a
2591   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2592   ///
2593   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2594   /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2595   /// in the evaluated expression or not.
2596   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2597                                                   Token &Tok,
2598                                                   bool &EvaluatedDefined,
2599                                                   bool CheckForEoD = true);
2600 
2601   /// Process a '__has_embed("path" [, ...])' expression.
2602   ///
2603   /// Returns predefined `__STDC_EMBED_*` macro values if
2604   /// successful.
2605   EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2606 
2607   /// Process a '__has_include("path")' expression.
2608   ///
2609   /// Returns true if successful.
2610   bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2611 
2612   /// Process '__has_include_next("path")' expression.
2613   ///
2614   /// Returns true if successful.
2615   bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2616 
2617   /// Get the directory and file from which to start \#include_next lookup.
2618   std::pair<ConstSearchDirIterator, const FileEntry *>
2619   getIncludeNextStart(const Token &IncludeNextTok) const;
2620 
2621   /// Install the standard preprocessor pragmas:
2622   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2623   void RegisterBuiltinPragmas();
2624 
2625   /// RegisterBuiltinMacro - Register the specified identifier in the identifier
2626   /// table and mark it as a builtin macro to be expanded.
2627   IdentifierInfo *RegisterBuiltinMacro(const char *Name) {
2628     // Get the identifier.
2629     IdentifierInfo *Id = getIdentifierInfo(Name);
2630 
2631     // Mark it as being a macro that is builtin.
2632     MacroInfo *MI = AllocateMacroInfo(SourceLocation());
2633     MI->setIsBuiltinMacro();
2634     appendDefMacroDirective(Id, MI);
2635     return Id;
2636   }
2637 
2638   /// Register builtin macros such as __LINE__ with the identifier table.
2639   void RegisterBuiltinMacros();
2640 
2641   /// If an identifier token is read that is to be expanded as a macro, handle
2642   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2643   /// otherwise the caller should lex again.
2644   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2645 
2646   /// Cache macro expanded tokens for TokenLexers.
2647   //
2648   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2649   /// going to lex in the cache and when it finishes the tokens are removed
2650   /// from the end of the cache.
2651   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2652                                   ArrayRef<Token> tokens);
2653 
2654   void removeCachedMacroExpandedTokensOfLastLexer();
2655 
2656   /// After reading "MACRO(", this method is invoked to read all of the formal
2657   /// arguments specified for the macro invocation.  Returns null on error.
2658   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2659                                        SourceLocation &MacroEnd);
2660 
2661   /// If an identifier token is read that is to be expanded
2662   /// as a builtin macro, handle it and return the next token as 'Tok'.
2663   void ExpandBuiltinMacro(Token &Tok);
2664 
2665   /// Read a \c _Pragma directive, slice it up, process it, then
2666   /// return the first token after the directive.
2667   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2668   void Handle_Pragma(Token &Tok);
2669 
2670   /// Like Handle_Pragma except the pragma text is not enclosed within
2671   /// a string literal.
2672   void HandleMicrosoft__pragma(Token &Tok);
2673 
2674   /// Add a lexer to the top of the include stack and
2675   /// start lexing tokens from it instead of the current buffer.
2676   void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2677 
2678   /// Set the FileID for the preprocessor predefines.
2679   void setPredefinesFileID(FileID FID) {
2680     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2681     PredefinesFileID = FID;
2682   }
2683 
2684   /// Set the FileID for the PCH through header.
2685   void setPCHThroughHeaderFileID(FileID FID);
2686 
2687   /// Returns true if we are lexing from a file and not a
2688   /// pragma or a macro.
2689   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2690     return L ? !L->isPragmaLexer() : P != nullptr;
2691   }
2692 
2693   static bool IsFileLexer(const IncludeStackInfo& I) {
2694     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2695   }
2696 
2697   bool IsFileLexer() const {
2698     return IsFileLexer(CurLexer.get(), CurPPLexer);
2699   }
2700 
2701   //===--------------------------------------------------------------------===//
2702   // Caching stuff.
2703   void CachingLex(Token &Result);
2704 
2705   bool InCachingLexMode() const {
2706     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2707     // that we are past EOF, not that we are in CachingLex mode.
2708     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2709   }
2710 
2711   void EnterCachingLexMode();
2712   void EnterCachingLexModeUnchecked();
2713 
2714   void ExitCachingLexMode() {
2715     if (InCachingLexMode())
2716       RemoveTopOfLexerStack();
2717   }
2718 
2719   const Token &PeekAhead(unsigned N);
2720   void AnnotatePreviousCachedTokens(const Token &Tok);
2721 
2722   //===--------------------------------------------------------------------===//
2723   /// Handle*Directive - implement the various preprocessor directives.  These
2724   /// should side-effect the current preprocessor object so that the next call
2725   /// to Lex() will return the appropriate token next.
2726   void HandleLineDirective();
2727   void HandleDigitDirective(Token &Tok);
2728   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2729   void HandleIdentSCCSDirective(Token &Tok);
2730   void HandleMacroPublicDirective(Token &Tok);
2731   void HandleMacroPrivateDirective();
2732 
2733   /// An additional notification that can be produced by a header inclusion or
2734   /// import to tell the parser what happened.
2735   struct ImportAction {
2736     enum ActionKind {
2737       None,
2738       ModuleBegin,
2739       ModuleImport,
2740       HeaderUnitImport,
2741       SkippedModuleImport,
2742       Failure,
2743     } Kind;
2744     Module *ModuleForHeader = nullptr;
2745 
2746     ImportAction(ActionKind AK, Module *Mod = nullptr)
2747         : Kind(AK), ModuleForHeader(Mod) {
2748       assert((AK == None || Mod || AK == Failure) &&
2749              "no module for module action");
2750     }
2751   };
2752 
2753   OptionalFileEntryRef LookupHeaderIncludeOrImport(
2754       ConstSearchDirIterator *CurDir, StringRef &Filename,
2755       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2756       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2757       bool &IsMapped, ConstSearchDirIterator LookupFrom,
2758       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2759       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2760       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2761   // Binary data inclusion
2762   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
2763                             const FileEntry *LookupFromFile = nullptr);
2764   void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2765                                 const LexEmbedParametersResult &Params,
2766                                 StringRef BinaryContents);
2767 
2768   // File inclusion.
2769   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2770                               ConstSearchDirIterator LookupFrom = nullptr,
2771                               const FileEntry *LookupFromFile = nullptr);
2772   ImportAction
2773   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2774                               Token &FilenameTok, SourceLocation EndLoc,
2775                               ConstSearchDirIterator LookupFrom = nullptr,
2776                               const FileEntry *LookupFromFile = nullptr);
2777   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2778   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2779   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2780   void HandleMicrosoftImportDirective(Token &Tok);
2781 
2782 public:
2783   /// Check that the given module is available, producing a diagnostic if not.
2784   /// \return \c true if the check failed (because the module is not available).
2785   ///         \c false if the module appears to be usable.
2786   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2787                                      const TargetInfo &TargetInfo,
2788                                      const Module &M, DiagnosticsEngine &Diags);
2789 
2790   // Module inclusion testing.
2791   /// Find the module that owns the source or header file that
2792   /// \p Loc points to. If the location is in a file that was included
2793   /// into a module, or is outside any module, returns nullptr.
2794   Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2795 
2796   /// We want to produce a diagnostic at location IncLoc concerning an
2797   /// unreachable effect at location MLoc (eg, where a desired entity was
2798   /// declared or defined). Determine whether the right way to make MLoc
2799   /// reachable is by #include, and if so, what header should be included.
2800   ///
2801   /// This is not necessarily fast, and might load unexpected module maps, so
2802   /// should only be called by code that intends to produce an error.
2803   ///
2804   /// \param IncLoc The location at which the missing effect was detected.
2805   /// \param MLoc A location within an unimported module at which the desired
2806   ///        effect occurred.
2807   /// \return A file that can be #included to provide the desired effect. Null
2808   ///         if no such file could be determined or if a #include is not
2809   ///         appropriate (eg, if a module should be imported instead).
2810   OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2811                                                         SourceLocation MLoc);
2812 
2813   bool isRecordingPreamble() const {
2814     return PreambleConditionalStack.isRecording();
2815   }
2816 
2817   bool hasRecordedPreamble() const {
2818     return PreambleConditionalStack.hasRecordedPreamble();
2819   }
2820 
2821   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2822       return PreambleConditionalStack.getStack();
2823   }
2824 
2825   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2826     PreambleConditionalStack.setStack(s);
2827   }
2828 
2829   void setReplayablePreambleConditionalStack(
2830       ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2831     PreambleConditionalStack.startReplaying();
2832     PreambleConditionalStack.setStack(s);
2833     PreambleConditionalStack.SkipInfo = SkipInfo;
2834   }
2835 
2836   std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2837     return PreambleConditionalStack.SkipInfo;
2838   }
2839 
2840 private:
2841   /// After processing predefined file, initialize the conditional stack from
2842   /// the preamble.
2843   void replayPreambleConditionalStack();
2844 
2845   // Macro handling.
2846   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2847   void HandleUndefDirective();
2848 
2849   // Conditional Inclusion.
2850   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2851                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2852   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2853                          bool ReadAnyTokensBeforeDirective);
2854   void HandleEndifDirective(Token &EndifToken);
2855   void HandleElseDirective(Token &Result, const Token &HashToken);
2856   void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2857                                  tok::PPKeywordKind Kind);
2858 
2859   // Pragmas.
2860   void HandlePragmaDirective(PragmaIntroducer Introducer);
2861 
2862 public:
2863   void HandlePragmaOnce(Token &OnceTok);
2864   void HandlePragmaMark(Token &MarkTok);
2865   void HandlePragmaPoison();
2866   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2867   void HandlePragmaDependency(Token &DependencyTok);
2868   void HandlePragmaPushMacro(Token &Tok);
2869   void HandlePragmaPopMacro(Token &Tok);
2870   void HandlePragmaIncludeAlias(Token &Tok);
2871   void HandlePragmaModuleBuild(Token &Tok);
2872   void HandlePragmaHdrstop(Token &Tok);
2873   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2874 
2875   // Return true and store the first token only if any CommentHandler
2876   // has inserted some tokens and getCommentRetentionState() is false.
2877   bool HandleComment(Token &result, SourceRange Comment);
2878 
2879   /// A macro is used, update information about macros that need unused
2880   /// warnings.
2881   void markMacroAsUsed(MacroInfo *MI);
2882 
2883   void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
2884                               SourceLocation AnnotationLoc) {
2885     AnnotationInfos[II].DeprecationInfo =
2886         MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2887   }
2888 
2889   void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
2890                                SourceLocation AnnotationLoc) {
2891     AnnotationInfos[II].RestrictExpansionInfo =
2892         MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2893   }
2894 
2895   void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
2896     AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc;
2897   }
2898 
2899   const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
2900     return AnnotationInfos.find(II)->second;
2901   }
2902 
2903   void emitMacroExpansionWarnings(const Token &Identifier,
2904                                   bool IsIfnDef = false) const {
2905     IdentifierInfo *Info = Identifier.getIdentifierInfo();
2906     if (Info->isDeprecatedMacro())
2907       emitMacroDeprecationWarning(Identifier);
2908 
2909     if (Info->isRestrictExpansion() &&
2910         !SourceMgr.isInMainFile(Identifier.getLocation()))
2911       emitRestrictExpansionWarning(Identifier);
2912 
2913     if (!IsIfnDef) {
2914       if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
2915         emitRestrictInfNaNWarning(Identifier, 0);
2916       if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
2917         emitRestrictInfNaNWarning(Identifier, 1);
2918     }
2919   }
2920 
2921   static void processPathForFileMacro(SmallVectorImpl<char> &Path,
2922                                       const LangOptions &LangOpts,
2923                                       const TargetInfo &TI);
2924 
2925   static void processPathToFileName(SmallVectorImpl<char> &FileName,
2926                                     const PresumedLoc &PLoc,
2927                                     const LangOptions &LangOpts,
2928                                     const TargetInfo &TI);
2929 
2930 private:
2931   void emitMacroDeprecationWarning(const Token &Identifier) const;
2932   void emitRestrictExpansionWarning(const Token &Identifier) const;
2933   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
2934   void emitRestrictInfNaNWarning(const Token &Identifier,
2935                                  unsigned DiagSelection) const;
2936 
2937   /// This boolean state keeps track if the current scanned token (by this PP)
2938   /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
2939   /// translation unit in a linear order.
2940   bool InSafeBufferOptOutRegion = false;
2941 
2942   /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
2943   /// region if PP is currently in such a region.  Hold undefined value
2944   /// otherwise.
2945   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
2946 
2947   using SafeBufferOptOutRegionsTy =
2948       SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
2949   // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
2950   // translation unit. Each region is represented by a pair of start and
2951   // end locations.
2952   SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
2953 
2954   // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs.  We use the
2955   // following structure to manage them by their ASTs.
2956   struct {
2957     // A map from unique IDs to region maps of loaded ASTs.  The ID identifies a
2958     // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
2959     llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
2960 
2961     // Returns a reference to the safe buffer opt-out regions of the loaded
2962     // AST where `Loc` belongs to. (Construct if absent)
2963     SafeBufferOptOutRegionsTy &
2964     findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
2965       return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
2966     }
2967 
2968     // Returns a reference to the safe buffer opt-out regions of the loaded
2969     // AST where `Loc` belongs to. (This const function returns nullptr if
2970     // absent.)
2971     const SafeBufferOptOutRegionsTy *
2972     lookupLoadedOptOutMap(SourceLocation Loc,
2973                           const SourceManager &SrcMgr) const {
2974       FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
2975       auto Iter = LoadedRegions.find(FID);
2976 
2977       if (Iter == LoadedRegions.end())
2978         return nullptr;
2979       return &Iter->getSecond();
2980     }
2981   } LoadedSafeBufferOptOutMap;
2982 
2983 public:
2984   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
2985   /// region.  This `Loc` must be a source location that has been pre-processed.
2986   bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
2987 
2988   /// Alter the state of whether this PP currently is in a
2989   /// "-Wunsafe-buffer-usage" opt-out region.
2990   ///
2991   /// \param isEnter true if this PP is entering a region; otherwise, this PP
2992   /// is exiting a region
2993   /// \param Loc the location of the entry or exit of a
2994   /// region
2995   /// \return true iff it is INVALID to enter or exit a region, i.e.,
2996   /// attempt to enter a region before exiting a previous region, or exiting a
2997   /// region that PP is not currently in.
2998   bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
2999                                          const SourceLocation &Loc);
3000 
3001   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3002   ///          opt-out region
3003   bool isPPInSafeBufferOptOutRegion();
3004 
3005   /// \param StartLoc output argument. It will be set to the start location of
3006   /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3007   /// returns true.
3008   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3009   ///          opt-out region
3010   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3011 
3012   /// \return a sequence of SourceLocations representing ordered opt-out regions
3013   /// specified by
3014   /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3015   SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3016 
3017   /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3018   /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3019   /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3020   /// is same as itself before the call.
3021   bool setDeserializedSafeBufferOptOutMap(
3022       const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3023 
3024 private:
3025   /// Helper functions to forward lexing to the actual lexer. They all share the
3026   /// same signature.
3027   static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3028     return P.CurLexer->Lex(Result);
3029   }
3030   static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3031     return P.CurTokenLexer->Lex(Result);
3032   }
3033   static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3034     P.CachingLex(Result);
3035     return true;
3036   }
3037   static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3038     return P.CurLexer->LexDependencyDirectiveToken(Result);
3039   }
3040   static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
3041     return P.LexAfterModuleImport(Result);
3042   }
3043 };
3044 
3045 /// Abstract base class that describes a handler that will receive
3046 /// source ranges for each of the comments encountered in the source file.
3047 class CommentHandler {
3048 public:
3049   virtual ~CommentHandler();
3050 
3051   // The handler shall return true if it has pushed any tokens
3052   // to be read using e.g. EnterToken or EnterTokenStream.
3053   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3054 };
3055 
3056 /// Abstract base class that describes a handler that will receive
3057 /// source ranges for empty lines encountered in the source file.
3058 class EmptylineHandler {
3059 public:
3060   virtual ~EmptylineHandler();
3061 
3062   // The handler handles empty lines.
3063   virtual void HandleEmptyline(SourceRange Range) = 0;
3064 };
3065 
3066 /// Helper class to shuttle information about #embed directives from the
3067 /// preprocessor to the parser through an annotation token.
3068 struct EmbedAnnotationData {
3069   StringRef BinaryData;
3070 };
3071 
3072 /// Registry of pragma handlers added by plugins
3073 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3074 
3075 } // namespace clang
3076 
3077 namespace llvm {
3078 extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>;
3079 } // namespace llvm
3080 
3081 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
3082