xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/Lex/Preprocessor.cpp (revision e038c9c4676b0f19b1b7dd08a940c6ed64a6d5ae)
17330f729Sjoerg //===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg //  This file implements the Preprocessor interface.
107330f729Sjoerg //
117330f729Sjoerg //===----------------------------------------------------------------------===//
127330f729Sjoerg //
137330f729Sjoerg // Options to support:
147330f729Sjoerg //   -H       - Print the name of each header file used.
157330f729Sjoerg //   -d[DNI] - Dump various things.
167330f729Sjoerg //   -fworking-directory - #line's with preprocessor's working dir.
177330f729Sjoerg //   -fpreprocessed
187330f729Sjoerg //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
197330f729Sjoerg //   -W*
207330f729Sjoerg //   -w
217330f729Sjoerg //
227330f729Sjoerg // Messages to emit:
237330f729Sjoerg //   "Multiple include guards may be useful for:\n"
247330f729Sjoerg //
257330f729Sjoerg //===----------------------------------------------------------------------===//
267330f729Sjoerg 
277330f729Sjoerg #include "clang/Lex/Preprocessor.h"
28*e038c9c4Sjoerg #include "clang/Basic/Builtins.h"
297330f729Sjoerg #include "clang/Basic/FileManager.h"
307330f729Sjoerg #include "clang/Basic/FileSystemStatCache.h"
317330f729Sjoerg #include "clang/Basic/IdentifierTable.h"
327330f729Sjoerg #include "clang/Basic/LLVM.h"
337330f729Sjoerg #include "clang/Basic/LangOptions.h"
347330f729Sjoerg #include "clang/Basic/Module.h"
357330f729Sjoerg #include "clang/Basic/SourceLocation.h"
367330f729Sjoerg #include "clang/Basic/SourceManager.h"
377330f729Sjoerg #include "clang/Basic/TargetInfo.h"
387330f729Sjoerg #include "clang/Lex/CodeCompletionHandler.h"
397330f729Sjoerg #include "clang/Lex/ExternalPreprocessorSource.h"
407330f729Sjoerg #include "clang/Lex/HeaderSearch.h"
417330f729Sjoerg #include "clang/Lex/LexDiagnostic.h"
427330f729Sjoerg #include "clang/Lex/Lexer.h"
437330f729Sjoerg #include "clang/Lex/LiteralSupport.h"
447330f729Sjoerg #include "clang/Lex/MacroArgs.h"
457330f729Sjoerg #include "clang/Lex/MacroInfo.h"
467330f729Sjoerg #include "clang/Lex/ModuleLoader.h"
477330f729Sjoerg #include "clang/Lex/Pragma.h"
487330f729Sjoerg #include "clang/Lex/PreprocessingRecord.h"
497330f729Sjoerg #include "clang/Lex/PreprocessorLexer.h"
507330f729Sjoerg #include "clang/Lex/PreprocessorOptions.h"
517330f729Sjoerg #include "clang/Lex/ScratchBuffer.h"
527330f729Sjoerg #include "clang/Lex/Token.h"
537330f729Sjoerg #include "clang/Lex/TokenLexer.h"
547330f729Sjoerg #include "llvm/ADT/APInt.h"
557330f729Sjoerg #include "llvm/ADT/ArrayRef.h"
567330f729Sjoerg #include "llvm/ADT/DenseMap.h"
57*e038c9c4Sjoerg #include "llvm/ADT/STLExtras.h"
587330f729Sjoerg #include "llvm/ADT/SmallString.h"
597330f729Sjoerg #include "llvm/ADT/SmallVector.h"
607330f729Sjoerg #include "llvm/ADT/StringRef.h"
617330f729Sjoerg #include "llvm/ADT/StringSwitch.h"
627330f729Sjoerg #include "llvm/Support/Capacity.h"
637330f729Sjoerg #include "llvm/Support/ErrorHandling.h"
647330f729Sjoerg #include "llvm/Support/MemoryBuffer.h"
657330f729Sjoerg #include "llvm/Support/raw_ostream.h"
667330f729Sjoerg #include <algorithm>
677330f729Sjoerg #include <cassert>
687330f729Sjoerg #include <memory>
697330f729Sjoerg #include <string>
707330f729Sjoerg #include <utility>
717330f729Sjoerg #include <vector>
727330f729Sjoerg 
737330f729Sjoerg using namespace clang;
747330f729Sjoerg 
757330f729Sjoerg LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
767330f729Sjoerg 
777330f729Sjoerg ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
787330f729Sjoerg 
Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)797330f729Sjoerg Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
807330f729Sjoerg                            DiagnosticsEngine &diags, LangOptions &opts,
817330f729Sjoerg                            SourceManager &SM, HeaderSearch &Headers,
827330f729Sjoerg                            ModuleLoader &TheModuleLoader,
837330f729Sjoerg                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
847330f729Sjoerg                            TranslationUnitKind TUKind)
857330f729Sjoerg     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
867330f729Sjoerg       FileMgr(Headers.getFileMgr()), SourceMgr(SM),
877330f729Sjoerg       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
887330f729Sjoerg       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
897330f729Sjoerg       // As the language options may have not been loaded yet (when
907330f729Sjoerg       // deserializing an ASTUnit), adding keywords to the identifier table is
917330f729Sjoerg       // deferred to Preprocessor::Initialize().
927330f729Sjoerg       Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
937330f729Sjoerg       TUKind(TUKind), SkipMainFilePreamble(0, true),
947330f729Sjoerg       CurSubmoduleState(&NullSubmoduleState) {
957330f729Sjoerg   OwnsHeaderSearch = OwnsHeaders;
967330f729Sjoerg 
977330f729Sjoerg   // Default to discarding comments.
987330f729Sjoerg   KeepComments = false;
997330f729Sjoerg   KeepMacroComments = false;
1007330f729Sjoerg   SuppressIncludeNotFoundError = false;
1017330f729Sjoerg 
1027330f729Sjoerg   // Macro expansion is enabled.
1037330f729Sjoerg   DisableMacroExpansion = false;
1047330f729Sjoerg   MacroExpansionInDirectivesOverride = false;
1057330f729Sjoerg   InMacroArgs = false;
1067330f729Sjoerg   ArgMacro = nullptr;
1077330f729Sjoerg   InMacroArgPreExpansion = false;
1087330f729Sjoerg   NumCachedTokenLexers = 0;
1097330f729Sjoerg   PragmasEnabled = true;
1107330f729Sjoerg   ParsingIfOrElifDirective = false;
1117330f729Sjoerg   PreprocessedOutput = false;
1127330f729Sjoerg 
1137330f729Sjoerg   // We haven't read anything from the external source.
1147330f729Sjoerg   ReadMacrosFromExternalSource = false;
1157330f729Sjoerg 
116*e038c9c4Sjoerg   BuiltinInfo = std::make_unique<Builtin::Context>();
117*e038c9c4Sjoerg 
1187330f729Sjoerg   // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
1197330f729Sjoerg   // a macro. They get unpoisoned where it is allowed.
1207330f729Sjoerg   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
1217330f729Sjoerg   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
1227330f729Sjoerg   (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
1237330f729Sjoerg   SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
1247330f729Sjoerg 
1257330f729Sjoerg   // Initialize the pragma handlers.
1267330f729Sjoerg   RegisterBuiltinPragmas();
1277330f729Sjoerg 
1287330f729Sjoerg   // Initialize builtin macros like __LINE__ and friends.
1297330f729Sjoerg   RegisterBuiltinMacros();
1307330f729Sjoerg 
1317330f729Sjoerg   if(LangOpts.Borland) {
1327330f729Sjoerg     Ident__exception_info        = getIdentifierInfo("_exception_info");
1337330f729Sjoerg     Ident___exception_info       = getIdentifierInfo("__exception_info");
1347330f729Sjoerg     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
1357330f729Sjoerg     Ident__exception_code        = getIdentifierInfo("_exception_code");
1367330f729Sjoerg     Ident___exception_code       = getIdentifierInfo("__exception_code");
1377330f729Sjoerg     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
1387330f729Sjoerg     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
1397330f729Sjoerg     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
1407330f729Sjoerg     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
1417330f729Sjoerg   } else {
1427330f729Sjoerg     Ident__exception_info = Ident__exception_code = nullptr;
1437330f729Sjoerg     Ident__abnormal_termination = Ident___exception_info = nullptr;
1447330f729Sjoerg     Ident___exception_code = Ident___abnormal_termination = nullptr;
1457330f729Sjoerg     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
1467330f729Sjoerg     Ident_AbnormalTermination = nullptr;
1477330f729Sjoerg   }
1487330f729Sjoerg 
1497330f729Sjoerg   // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
1507330f729Sjoerg   if (usingPCHWithPragmaHdrStop())
1517330f729Sjoerg     SkippingUntilPragmaHdrStop = true;
1527330f729Sjoerg 
1537330f729Sjoerg   // If using a PCH with a through header, start skipping tokens.
1547330f729Sjoerg   if (!this->PPOpts->PCHThroughHeader.empty() &&
1557330f729Sjoerg       !this->PPOpts->ImplicitPCHInclude.empty())
1567330f729Sjoerg     SkippingUntilPCHThroughHeader = true;
1577330f729Sjoerg 
1587330f729Sjoerg   if (this->PPOpts->GeneratePreamble)
1597330f729Sjoerg     PreambleConditionalStack.startRecording();
1607330f729Sjoerg 
1617330f729Sjoerg   ExcludedConditionalDirectiveSkipMappings =
1627330f729Sjoerg       this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
1637330f729Sjoerg   if (ExcludedConditionalDirectiveSkipMappings)
1647330f729Sjoerg     ExcludedConditionalDirectiveSkipMappings->clear();
165*e038c9c4Sjoerg 
166*e038c9c4Sjoerg   MaxTokens = LangOpts.MaxTokens;
1677330f729Sjoerg }
1687330f729Sjoerg 
~Preprocessor()1697330f729Sjoerg Preprocessor::~Preprocessor() {
1707330f729Sjoerg   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
1717330f729Sjoerg 
1727330f729Sjoerg   IncludeMacroStack.clear();
1737330f729Sjoerg 
1747330f729Sjoerg   // Destroy any macro definitions.
1757330f729Sjoerg   while (MacroInfoChain *I = MIChainHead) {
1767330f729Sjoerg     MIChainHead = I->Next;
1777330f729Sjoerg     I->~MacroInfoChain();
1787330f729Sjoerg   }
1797330f729Sjoerg 
1807330f729Sjoerg   // Free any cached macro expanders.
1817330f729Sjoerg   // This populates MacroArgCache, so all TokenLexers need to be destroyed
1827330f729Sjoerg   // before the code below that frees up the MacroArgCache list.
1837330f729Sjoerg   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
1847330f729Sjoerg   CurTokenLexer.reset();
1857330f729Sjoerg 
1867330f729Sjoerg   // Free any cached MacroArgs.
1877330f729Sjoerg   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
1887330f729Sjoerg     ArgList = ArgList->deallocate();
1897330f729Sjoerg 
1907330f729Sjoerg   // Delete the header search info, if we own it.
1917330f729Sjoerg   if (OwnsHeaderSearch)
1927330f729Sjoerg     delete &HeaderInfo;
1937330f729Sjoerg }
1947330f729Sjoerg 
Initialize(const TargetInfo & Target,const TargetInfo * AuxTarget)1957330f729Sjoerg void Preprocessor::Initialize(const TargetInfo &Target,
1967330f729Sjoerg                               const TargetInfo *AuxTarget) {
1977330f729Sjoerg   assert((!this->Target || this->Target == &Target) &&
1987330f729Sjoerg          "Invalid override of target information");
1997330f729Sjoerg   this->Target = &Target;
2007330f729Sjoerg 
2017330f729Sjoerg   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
2027330f729Sjoerg          "Invalid override of aux target information.");
2037330f729Sjoerg   this->AuxTarget = AuxTarget;
2047330f729Sjoerg 
2057330f729Sjoerg   // Initialize information about built-ins.
206*e038c9c4Sjoerg   BuiltinInfo->InitializeTarget(Target, AuxTarget);
2077330f729Sjoerg   HeaderInfo.setTarget(Target);
2087330f729Sjoerg 
2097330f729Sjoerg   // Populate the identifier table with info about keywords for the current language.
2107330f729Sjoerg   Identifiers.AddKeywords(LangOpts);
2117330f729Sjoerg }
2127330f729Sjoerg 
InitializeForModelFile()2137330f729Sjoerg void Preprocessor::InitializeForModelFile() {
2147330f729Sjoerg   NumEnteredSourceFiles = 0;
2157330f729Sjoerg 
2167330f729Sjoerg   // Reset pragmas
2177330f729Sjoerg   PragmaHandlersBackup = std::move(PragmaHandlers);
2187330f729Sjoerg   PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
2197330f729Sjoerg   RegisterBuiltinPragmas();
2207330f729Sjoerg 
2217330f729Sjoerg   // Reset PredefinesFileID
2227330f729Sjoerg   PredefinesFileID = FileID();
2237330f729Sjoerg }
2247330f729Sjoerg 
FinalizeForModelFile()2257330f729Sjoerg void Preprocessor::FinalizeForModelFile() {
2267330f729Sjoerg   NumEnteredSourceFiles = 1;
2277330f729Sjoerg 
2287330f729Sjoerg   PragmaHandlers = std::move(PragmaHandlersBackup);
2297330f729Sjoerg }
2307330f729Sjoerg 
DumpToken(const Token & Tok,bool DumpFlags) const2317330f729Sjoerg void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
2327330f729Sjoerg   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
2337330f729Sjoerg                << getSpelling(Tok) << "'";
2347330f729Sjoerg 
2357330f729Sjoerg   if (!DumpFlags) return;
2367330f729Sjoerg 
2377330f729Sjoerg   llvm::errs() << "\t";
2387330f729Sjoerg   if (Tok.isAtStartOfLine())
2397330f729Sjoerg     llvm::errs() << " [StartOfLine]";
2407330f729Sjoerg   if (Tok.hasLeadingSpace())
2417330f729Sjoerg     llvm::errs() << " [LeadingSpace]";
2427330f729Sjoerg   if (Tok.isExpandDisabled())
2437330f729Sjoerg     llvm::errs() << " [ExpandDisabled]";
2447330f729Sjoerg   if (Tok.needsCleaning()) {
2457330f729Sjoerg     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
2467330f729Sjoerg     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
2477330f729Sjoerg                  << "']";
2487330f729Sjoerg   }
2497330f729Sjoerg 
2507330f729Sjoerg   llvm::errs() << "\tLoc=<";
2517330f729Sjoerg   DumpLocation(Tok.getLocation());
2527330f729Sjoerg   llvm::errs() << ">";
2537330f729Sjoerg }
2547330f729Sjoerg 
DumpLocation(SourceLocation Loc) const2557330f729Sjoerg void Preprocessor::DumpLocation(SourceLocation Loc) const {
2567330f729Sjoerg   Loc.print(llvm::errs(), SourceMgr);
2577330f729Sjoerg }
2587330f729Sjoerg 
DumpMacro(const MacroInfo & MI) const2597330f729Sjoerg void Preprocessor::DumpMacro(const MacroInfo &MI) const {
2607330f729Sjoerg   llvm::errs() << "MACRO: ";
2617330f729Sjoerg   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
2627330f729Sjoerg     DumpToken(MI.getReplacementToken(i));
2637330f729Sjoerg     llvm::errs() << "  ";
2647330f729Sjoerg   }
2657330f729Sjoerg   llvm::errs() << "\n";
2667330f729Sjoerg }
2677330f729Sjoerg 
PrintStats()2687330f729Sjoerg void Preprocessor::PrintStats() {
2697330f729Sjoerg   llvm::errs() << "\n*** Preprocessor Stats:\n";
2707330f729Sjoerg   llvm::errs() << NumDirectives << " directives found:\n";
2717330f729Sjoerg   llvm::errs() << "  " << NumDefined << " #define.\n";
2727330f729Sjoerg   llvm::errs() << "  " << NumUndefined << " #undef.\n";
2737330f729Sjoerg   llvm::errs() << "  #include/#include_next/#import:\n";
2747330f729Sjoerg   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
2757330f729Sjoerg   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
2767330f729Sjoerg   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
2777330f729Sjoerg   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
2787330f729Sjoerg   llvm::errs() << "  " << NumEndif << " #endif.\n";
2797330f729Sjoerg   llvm::errs() << "  " << NumPragma << " #pragma.\n";
2807330f729Sjoerg   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
2817330f729Sjoerg 
2827330f729Sjoerg   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
2837330f729Sjoerg              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
2847330f729Sjoerg              << NumFastMacroExpanded << " on the fast path.\n";
2857330f729Sjoerg   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
2867330f729Sjoerg              << " token paste (##) operations performed, "
2877330f729Sjoerg              << NumFastTokenPaste << " on the fast path.\n";
2887330f729Sjoerg 
2897330f729Sjoerg   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
2907330f729Sjoerg 
2917330f729Sjoerg   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
2927330f729Sjoerg   llvm::errs() << "\n  Macro Expanded Tokens: "
2937330f729Sjoerg                << llvm::capacity_in_bytes(MacroExpandedTokens);
2947330f729Sjoerg   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
2957330f729Sjoerg   // FIXME: List information for all submodules.
2967330f729Sjoerg   llvm::errs() << "\n  Macros: "
2977330f729Sjoerg                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
2987330f729Sjoerg   llvm::errs() << "\n  #pragma push_macro Info: "
2997330f729Sjoerg                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
3007330f729Sjoerg   llvm::errs() << "\n  Poison Reasons: "
3017330f729Sjoerg                << llvm::capacity_in_bytes(PoisonReasons);
3027330f729Sjoerg   llvm::errs() << "\n  Comment Handlers: "
3037330f729Sjoerg                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
3047330f729Sjoerg }
3057330f729Sjoerg 
3067330f729Sjoerg Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const3077330f729Sjoerg Preprocessor::macro_begin(bool IncludeExternalMacros) const {
3087330f729Sjoerg   if (IncludeExternalMacros && ExternalSource &&
3097330f729Sjoerg       !ReadMacrosFromExternalSource) {
3107330f729Sjoerg     ReadMacrosFromExternalSource = true;
3117330f729Sjoerg     ExternalSource->ReadDefinedMacros();
3127330f729Sjoerg   }
3137330f729Sjoerg 
3147330f729Sjoerg   // Make sure we cover all macros in visible modules.
3157330f729Sjoerg   for (const ModuleMacro &Macro : ModuleMacros)
3167330f729Sjoerg     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
3177330f729Sjoerg 
3187330f729Sjoerg   return CurSubmoduleState->Macros.begin();
3197330f729Sjoerg }
3207330f729Sjoerg 
getTotalMemory() const3217330f729Sjoerg size_t Preprocessor::getTotalMemory() const {
3227330f729Sjoerg   return BP.getTotalMemory()
3237330f729Sjoerg     + llvm::capacity_in_bytes(MacroExpandedTokens)
3247330f729Sjoerg     + Predefines.capacity() /* Predefines buffer. */
3257330f729Sjoerg     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
3267330f729Sjoerg     // and ModuleMacros.
3277330f729Sjoerg     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
3287330f729Sjoerg     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
3297330f729Sjoerg     + llvm::capacity_in_bytes(PoisonReasons)
3307330f729Sjoerg     + llvm::capacity_in_bytes(CommentHandlers);
3317330f729Sjoerg }
3327330f729Sjoerg 
3337330f729Sjoerg Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const3347330f729Sjoerg Preprocessor::macro_end(bool IncludeExternalMacros) const {
3357330f729Sjoerg   if (IncludeExternalMacros && ExternalSource &&
3367330f729Sjoerg       !ReadMacrosFromExternalSource) {
3377330f729Sjoerg     ReadMacrosFromExternalSource = true;
3387330f729Sjoerg     ExternalSource->ReadDefinedMacros();
3397330f729Sjoerg   }
3407330f729Sjoerg 
3417330f729Sjoerg   return CurSubmoduleState->Macros.end();
3427330f729Sjoerg }
3437330f729Sjoerg 
3447330f729Sjoerg /// Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)3457330f729Sjoerg static bool MacroDefinitionEquals(const MacroInfo *MI,
3467330f729Sjoerg                                   ArrayRef<TokenValue> Tokens) {
3477330f729Sjoerg   return Tokens.size() == MI->getNumTokens() &&
3487330f729Sjoerg       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
3497330f729Sjoerg }
3507330f729Sjoerg 
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const3517330f729Sjoerg StringRef Preprocessor::getLastMacroWithSpelling(
3527330f729Sjoerg                                     SourceLocation Loc,
3537330f729Sjoerg                                     ArrayRef<TokenValue> Tokens) const {
3547330f729Sjoerg   SourceLocation BestLocation;
3557330f729Sjoerg   StringRef BestSpelling;
3567330f729Sjoerg   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
3577330f729Sjoerg        I != E; ++I) {
3587330f729Sjoerg     const MacroDirective::DefInfo
3597330f729Sjoerg       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
3607330f729Sjoerg     if (!Def || !Def.getMacroInfo())
3617330f729Sjoerg       continue;
3627330f729Sjoerg     if (!Def.getMacroInfo()->isObjectLike())
3637330f729Sjoerg       continue;
3647330f729Sjoerg     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
3657330f729Sjoerg       continue;
3667330f729Sjoerg     SourceLocation Location = Def.getLocation();
3677330f729Sjoerg     // Choose the macro defined latest.
3687330f729Sjoerg     if (BestLocation.isInvalid() ||
3697330f729Sjoerg         (Location.isValid() &&
3707330f729Sjoerg          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
3717330f729Sjoerg       BestLocation = Location;
3727330f729Sjoerg       BestSpelling = I->first->getName();
3737330f729Sjoerg     }
3747330f729Sjoerg   }
3757330f729Sjoerg   return BestSpelling;
3767330f729Sjoerg }
3777330f729Sjoerg 
recomputeCurLexerKind()3787330f729Sjoerg void Preprocessor::recomputeCurLexerKind() {
3797330f729Sjoerg   if (CurLexer)
3807330f729Sjoerg     CurLexerKind = CLK_Lexer;
3817330f729Sjoerg   else if (CurTokenLexer)
3827330f729Sjoerg     CurLexerKind = CLK_TokenLexer;
3837330f729Sjoerg   else
3847330f729Sjoerg     CurLexerKind = CLK_CachingLexer;
3857330f729Sjoerg }
3867330f729Sjoerg 
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)3877330f729Sjoerg bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
3887330f729Sjoerg                                           unsigned CompleteLine,
3897330f729Sjoerg                                           unsigned CompleteColumn) {
3907330f729Sjoerg   assert(File);
3917330f729Sjoerg   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
3927330f729Sjoerg   assert(!CodeCompletionFile && "Already set");
3937330f729Sjoerg 
3947330f729Sjoerg   // Load the actual file's contents.
395*e038c9c4Sjoerg   Optional<llvm::MemoryBufferRef> Buffer =
396*e038c9c4Sjoerg       SourceMgr.getMemoryBufferForFileOrNone(File);
397*e038c9c4Sjoerg   if (!Buffer)
3987330f729Sjoerg     return true;
3997330f729Sjoerg 
4007330f729Sjoerg   // Find the byte position of the truncation point.
4017330f729Sjoerg   const char *Position = Buffer->getBufferStart();
4027330f729Sjoerg   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
4037330f729Sjoerg     for (; *Position; ++Position) {
4047330f729Sjoerg       if (*Position != '\r' && *Position != '\n')
4057330f729Sjoerg         continue;
4067330f729Sjoerg 
4077330f729Sjoerg       // Eat \r\n or \n\r as a single line.
4087330f729Sjoerg       if ((Position[1] == '\r' || Position[1] == '\n') &&
4097330f729Sjoerg           Position[0] != Position[1])
4107330f729Sjoerg         ++Position;
4117330f729Sjoerg       ++Position;
4127330f729Sjoerg       break;
4137330f729Sjoerg     }
4147330f729Sjoerg   }
4157330f729Sjoerg 
4167330f729Sjoerg   Position += CompleteColumn - 1;
4177330f729Sjoerg 
4187330f729Sjoerg   // If pointing inside the preamble, adjust the position at the beginning of
4197330f729Sjoerg   // the file after the preamble.
4207330f729Sjoerg   if (SkipMainFilePreamble.first &&
4217330f729Sjoerg       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
4227330f729Sjoerg     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
4237330f729Sjoerg       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
4247330f729Sjoerg   }
4257330f729Sjoerg 
4267330f729Sjoerg   if (Position > Buffer->getBufferEnd())
4277330f729Sjoerg     Position = Buffer->getBufferEnd();
4287330f729Sjoerg 
4297330f729Sjoerg   CodeCompletionFile = File;
4307330f729Sjoerg   CodeCompletionOffset = Position - Buffer->getBufferStart();
4317330f729Sjoerg 
4327330f729Sjoerg   auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
4337330f729Sjoerg       Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
4347330f729Sjoerg   char *NewBuf = NewBuffer->getBufferStart();
4357330f729Sjoerg   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
4367330f729Sjoerg   *NewPos = '\0';
4377330f729Sjoerg   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
4387330f729Sjoerg   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
4397330f729Sjoerg 
4407330f729Sjoerg   return false;
4417330f729Sjoerg }
4427330f729Sjoerg 
CodeCompleteIncludedFile(llvm::StringRef Dir,bool IsAngled)4437330f729Sjoerg void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
4447330f729Sjoerg                                             bool IsAngled) {
445*e038c9c4Sjoerg   setCodeCompletionReached();
4467330f729Sjoerg   if (CodeComplete)
4477330f729Sjoerg     CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
4487330f729Sjoerg }
4497330f729Sjoerg 
CodeCompleteNaturalLanguage()4507330f729Sjoerg void Preprocessor::CodeCompleteNaturalLanguage() {
451*e038c9c4Sjoerg   setCodeCompletionReached();
4527330f729Sjoerg   if (CodeComplete)
4537330f729Sjoerg     CodeComplete->CodeCompleteNaturalLanguage();
4547330f729Sjoerg }
4557330f729Sjoerg 
4567330f729Sjoerg /// getSpelling - This method is used to get the spelling of a token into a
4577330f729Sjoerg /// SmallVector. Note that the returned StringRef may not point to the
4587330f729Sjoerg /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const4597330f729Sjoerg StringRef Preprocessor::getSpelling(const Token &Tok,
4607330f729Sjoerg                                           SmallVectorImpl<char> &Buffer,
4617330f729Sjoerg                                           bool *Invalid) const {
4627330f729Sjoerg   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
4637330f729Sjoerg   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
4647330f729Sjoerg     // Try the fast path.
4657330f729Sjoerg     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
4667330f729Sjoerg       return II->getName();
4677330f729Sjoerg   }
4687330f729Sjoerg 
4697330f729Sjoerg   // Resize the buffer if we need to copy into it.
4707330f729Sjoerg   if (Tok.needsCleaning())
4717330f729Sjoerg     Buffer.resize(Tok.getLength());
4727330f729Sjoerg 
4737330f729Sjoerg   const char *Ptr = Buffer.data();
4747330f729Sjoerg   unsigned Len = getSpelling(Tok, Ptr, Invalid);
4757330f729Sjoerg   return StringRef(Ptr, Len);
4767330f729Sjoerg }
4777330f729Sjoerg 
4787330f729Sjoerg /// CreateString - Plop the specified string into a scratch buffer and return a
4797330f729Sjoerg /// location for it.  If specified, the source location provides a source
4807330f729Sjoerg /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)4817330f729Sjoerg void Preprocessor::CreateString(StringRef Str, Token &Tok,
4827330f729Sjoerg                                 SourceLocation ExpansionLocStart,
4837330f729Sjoerg                                 SourceLocation ExpansionLocEnd) {
4847330f729Sjoerg   Tok.setLength(Str.size());
4857330f729Sjoerg 
4867330f729Sjoerg   const char *DestPtr;
4877330f729Sjoerg   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
4887330f729Sjoerg 
4897330f729Sjoerg   if (ExpansionLocStart.isValid())
4907330f729Sjoerg     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
4917330f729Sjoerg                                        ExpansionLocEnd, Str.size());
4927330f729Sjoerg   Tok.setLocation(Loc);
4937330f729Sjoerg 
4947330f729Sjoerg   // If this is a raw identifier or a literal token, set the pointer data.
4957330f729Sjoerg   if (Tok.is(tok::raw_identifier))
4967330f729Sjoerg     Tok.setRawIdentifierData(DestPtr);
4977330f729Sjoerg   else if (Tok.isLiteral())
4987330f729Sjoerg     Tok.setLiteralData(DestPtr);
4997330f729Sjoerg }
5007330f729Sjoerg 
SplitToken(SourceLocation Loc,unsigned Length)5017330f729Sjoerg SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
5027330f729Sjoerg   auto &SM = getSourceManager();
5037330f729Sjoerg   SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
5047330f729Sjoerg   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
5057330f729Sjoerg   bool Invalid = false;
5067330f729Sjoerg   StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
5077330f729Sjoerg   if (Invalid)
5087330f729Sjoerg     return SourceLocation();
5097330f729Sjoerg 
5107330f729Sjoerg   // FIXME: We could consider re-using spelling for tokens we see repeatedly.
5117330f729Sjoerg   const char *DestPtr;
5127330f729Sjoerg   SourceLocation Spelling =
5137330f729Sjoerg       ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
5147330f729Sjoerg   return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
5157330f729Sjoerg }
5167330f729Sjoerg 
getCurrentModule()5177330f729Sjoerg Module *Preprocessor::getCurrentModule() {
5187330f729Sjoerg   if (!getLangOpts().isCompilingModule())
5197330f729Sjoerg     return nullptr;
5207330f729Sjoerg 
5217330f729Sjoerg   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
5227330f729Sjoerg }
5237330f729Sjoerg 
5247330f729Sjoerg //===----------------------------------------------------------------------===//
5257330f729Sjoerg // Preprocessor Initialization Methods
5267330f729Sjoerg //===----------------------------------------------------------------------===//
5277330f729Sjoerg 
5287330f729Sjoerg /// EnterMainSourceFile - Enter the specified FileID as the main source file,
5297330f729Sjoerg /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()5307330f729Sjoerg void Preprocessor::EnterMainSourceFile() {
5317330f729Sjoerg   // We do not allow the preprocessor to reenter the main file.  Doing so will
5327330f729Sjoerg   // cause FileID's to accumulate information from both runs (e.g. #line
5337330f729Sjoerg   // information) and predefined macros aren't guaranteed to be set properly.
5347330f729Sjoerg   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
5357330f729Sjoerg   FileID MainFileID = SourceMgr.getMainFileID();
5367330f729Sjoerg 
5377330f729Sjoerg   // If MainFileID is loaded it means we loaded an AST file, no need to enter
5387330f729Sjoerg   // a main file.
5397330f729Sjoerg   if (!SourceMgr.isLoadedFileID(MainFileID)) {
5407330f729Sjoerg     // Enter the main file source buffer.
5417330f729Sjoerg     EnterSourceFile(MainFileID, nullptr, SourceLocation());
5427330f729Sjoerg 
5437330f729Sjoerg     // If we've been asked to skip bytes in the main file (e.g., as part of a
5447330f729Sjoerg     // precompiled preamble), do so now.
5457330f729Sjoerg     if (SkipMainFilePreamble.first > 0)
5467330f729Sjoerg       CurLexer->SetByteOffset(SkipMainFilePreamble.first,
5477330f729Sjoerg                               SkipMainFilePreamble.second);
5487330f729Sjoerg 
5497330f729Sjoerg     // Tell the header info that the main file was entered.  If the file is later
5507330f729Sjoerg     // #imported, it won't be re-entered.
5517330f729Sjoerg     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
5527330f729Sjoerg       HeaderInfo.IncrementIncludeCount(FE);
5537330f729Sjoerg   }
5547330f729Sjoerg 
5557330f729Sjoerg   // Preprocess Predefines to populate the initial preprocessor state.
5567330f729Sjoerg   std::unique_ptr<llvm::MemoryBuffer> SB =
5577330f729Sjoerg     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
5587330f729Sjoerg   assert(SB && "Cannot create predefined source buffer");
5597330f729Sjoerg   FileID FID = SourceMgr.createFileID(std::move(SB));
5607330f729Sjoerg   assert(FID.isValid() && "Could not create FileID for predefines?");
5617330f729Sjoerg   setPredefinesFileID(FID);
5627330f729Sjoerg 
5637330f729Sjoerg   // Start parsing the predefines.
5647330f729Sjoerg   EnterSourceFile(FID, nullptr, SourceLocation());
5657330f729Sjoerg 
5667330f729Sjoerg   if (!PPOpts->PCHThroughHeader.empty()) {
5677330f729Sjoerg     // Lookup and save the FileID for the through header. If it isn't found
5687330f729Sjoerg     // in the search path, it's a fatal error.
5697330f729Sjoerg     const DirectoryLookup *CurDir;
5707330f729Sjoerg     Optional<FileEntryRef> File = LookupFile(
5717330f729Sjoerg         SourceLocation(), PPOpts->PCHThroughHeader,
5727330f729Sjoerg         /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
5737330f729Sjoerg         /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
5747330f729Sjoerg         /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
5757330f729Sjoerg         /*IsFrameworkFound=*/nullptr);
5767330f729Sjoerg     if (!File) {
5777330f729Sjoerg       Diag(SourceLocation(), diag::err_pp_through_header_not_found)
5787330f729Sjoerg           << PPOpts->PCHThroughHeader;
5797330f729Sjoerg       return;
5807330f729Sjoerg     }
5817330f729Sjoerg     setPCHThroughHeaderFileID(
5827330f729Sjoerg         SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
5837330f729Sjoerg   }
5847330f729Sjoerg 
5857330f729Sjoerg   // Skip tokens from the Predefines and if needed the main file.
5867330f729Sjoerg   if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
5877330f729Sjoerg       (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
5887330f729Sjoerg     SkipTokensWhileUsingPCH();
5897330f729Sjoerg }
5907330f729Sjoerg 
setPCHThroughHeaderFileID(FileID FID)5917330f729Sjoerg void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
5927330f729Sjoerg   assert(PCHThroughHeaderFileID.isInvalid() &&
5937330f729Sjoerg          "PCHThroughHeaderFileID already set!");
5947330f729Sjoerg   PCHThroughHeaderFileID = FID;
5957330f729Sjoerg }
5967330f729Sjoerg 
isPCHThroughHeader(const FileEntry * FE)5977330f729Sjoerg bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
5987330f729Sjoerg   assert(PCHThroughHeaderFileID.isValid() &&
5997330f729Sjoerg          "Invalid PCH through header FileID");
6007330f729Sjoerg   return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
6017330f729Sjoerg }
6027330f729Sjoerg 
creatingPCHWithThroughHeader()6037330f729Sjoerg bool Preprocessor::creatingPCHWithThroughHeader() {
6047330f729Sjoerg   return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
6057330f729Sjoerg          PCHThroughHeaderFileID.isValid();
6067330f729Sjoerg }
6077330f729Sjoerg 
usingPCHWithThroughHeader()6087330f729Sjoerg bool Preprocessor::usingPCHWithThroughHeader() {
6097330f729Sjoerg   return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
6107330f729Sjoerg          PCHThroughHeaderFileID.isValid();
6117330f729Sjoerg }
6127330f729Sjoerg 
creatingPCHWithPragmaHdrStop()6137330f729Sjoerg bool Preprocessor::creatingPCHWithPragmaHdrStop() {
6147330f729Sjoerg   return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop;
6157330f729Sjoerg }
6167330f729Sjoerg 
usingPCHWithPragmaHdrStop()6177330f729Sjoerg bool Preprocessor::usingPCHWithPragmaHdrStop() {
6187330f729Sjoerg   return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop;
6197330f729Sjoerg }
6207330f729Sjoerg 
6217330f729Sjoerg /// Skip tokens until after the #include of the through header or
6227330f729Sjoerg /// until after a #pragma hdrstop is seen. Tokens in the predefines file
6237330f729Sjoerg /// and the main file may be skipped. If the end of the predefines file
6247330f729Sjoerg /// is reached, skipping continues into the main file. If the end of the
6257330f729Sjoerg /// main file is reached, it's a fatal error.
SkipTokensWhileUsingPCH()6267330f729Sjoerg void Preprocessor::SkipTokensWhileUsingPCH() {
6277330f729Sjoerg   bool ReachedMainFileEOF = false;
6287330f729Sjoerg   bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
6297330f729Sjoerg   bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
6307330f729Sjoerg   Token Tok;
6317330f729Sjoerg   while (true) {
6327330f729Sjoerg     bool InPredefines =
6337330f729Sjoerg         (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
6347330f729Sjoerg     switch (CurLexerKind) {
6357330f729Sjoerg     case CLK_Lexer:
6367330f729Sjoerg       CurLexer->Lex(Tok);
6377330f729Sjoerg      break;
6387330f729Sjoerg     case CLK_TokenLexer:
6397330f729Sjoerg       CurTokenLexer->Lex(Tok);
6407330f729Sjoerg       break;
6417330f729Sjoerg     case CLK_CachingLexer:
6427330f729Sjoerg       CachingLex(Tok);
6437330f729Sjoerg       break;
6447330f729Sjoerg     case CLK_LexAfterModuleImport:
6457330f729Sjoerg       LexAfterModuleImport(Tok);
6467330f729Sjoerg       break;
6477330f729Sjoerg     }
6487330f729Sjoerg     if (Tok.is(tok::eof) && !InPredefines) {
6497330f729Sjoerg       ReachedMainFileEOF = true;
6507330f729Sjoerg       break;
6517330f729Sjoerg     }
6527330f729Sjoerg     if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
6537330f729Sjoerg       break;
6547330f729Sjoerg     if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
6557330f729Sjoerg       break;
6567330f729Sjoerg   }
6577330f729Sjoerg   if (ReachedMainFileEOF) {
6587330f729Sjoerg     if (UsingPCHThroughHeader)
6597330f729Sjoerg       Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
6607330f729Sjoerg           << PPOpts->PCHThroughHeader << 1;
6617330f729Sjoerg     else if (!PPOpts->PCHWithHdrStopCreate)
6627330f729Sjoerg       Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
6637330f729Sjoerg   }
6647330f729Sjoerg }
6657330f729Sjoerg 
replayPreambleConditionalStack()6667330f729Sjoerg void Preprocessor::replayPreambleConditionalStack() {
6677330f729Sjoerg   // Restore the conditional stack from the preamble, if there is one.
6687330f729Sjoerg   if (PreambleConditionalStack.isReplaying()) {
6697330f729Sjoerg     assert(CurPPLexer &&
6707330f729Sjoerg            "CurPPLexer is null when calling replayPreambleConditionalStack.");
6717330f729Sjoerg     CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
6727330f729Sjoerg     PreambleConditionalStack.doneReplaying();
6737330f729Sjoerg     if (PreambleConditionalStack.reachedEOFWhileSkipping())
6747330f729Sjoerg       SkipExcludedConditionalBlock(
6757330f729Sjoerg           PreambleConditionalStack.SkipInfo->HashTokenLoc,
6767330f729Sjoerg           PreambleConditionalStack.SkipInfo->IfTokenLoc,
6777330f729Sjoerg           PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
6787330f729Sjoerg           PreambleConditionalStack.SkipInfo->FoundElse,
6797330f729Sjoerg           PreambleConditionalStack.SkipInfo->ElseLoc);
6807330f729Sjoerg   }
6817330f729Sjoerg }
6827330f729Sjoerg 
EndSourceFile()6837330f729Sjoerg void Preprocessor::EndSourceFile() {
6847330f729Sjoerg   // Notify the client that we reached the end of the source file.
6857330f729Sjoerg   if (Callbacks)
6867330f729Sjoerg     Callbacks->EndOfMainFile();
6877330f729Sjoerg }
6887330f729Sjoerg 
6897330f729Sjoerg //===----------------------------------------------------------------------===//
6907330f729Sjoerg // Lexer Event Handling.
6917330f729Sjoerg //===----------------------------------------------------------------------===//
6927330f729Sjoerg 
6937330f729Sjoerg /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
6947330f729Sjoerg /// identifier information for the token and install it into the token,
6957330f729Sjoerg /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const6967330f729Sjoerg IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
6977330f729Sjoerg   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
6987330f729Sjoerg 
6997330f729Sjoerg   // Look up this token, see if it is a macro, or if it is a language keyword.
7007330f729Sjoerg   IdentifierInfo *II;
7017330f729Sjoerg   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
7027330f729Sjoerg     // No cleaning needed, just use the characters from the lexed buffer.
7037330f729Sjoerg     II = getIdentifierInfo(Identifier.getRawIdentifier());
7047330f729Sjoerg   } else {
7057330f729Sjoerg     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
7067330f729Sjoerg     SmallString<64> IdentifierBuffer;
7077330f729Sjoerg     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
7087330f729Sjoerg 
7097330f729Sjoerg     if (Identifier.hasUCN()) {
7107330f729Sjoerg       SmallString<64> UCNIdentifierBuffer;
7117330f729Sjoerg       expandUCNs(UCNIdentifierBuffer, CleanedStr);
7127330f729Sjoerg       II = getIdentifierInfo(UCNIdentifierBuffer);
7137330f729Sjoerg     } else {
7147330f729Sjoerg       II = getIdentifierInfo(CleanedStr);
7157330f729Sjoerg     }
7167330f729Sjoerg   }
7177330f729Sjoerg 
7187330f729Sjoerg   // Update the token info (identifier info and appropriate token kind).
7197330f729Sjoerg   Identifier.setIdentifierInfo(II);
7207330f729Sjoerg   if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
7217330f729Sjoerg       getSourceManager().isInSystemHeader(Identifier.getLocation()))
7227330f729Sjoerg     Identifier.setKind(tok::identifier);
7237330f729Sjoerg   else
7247330f729Sjoerg     Identifier.setKind(II->getTokenID());
7257330f729Sjoerg 
7267330f729Sjoerg   return II;
7277330f729Sjoerg }
7287330f729Sjoerg 
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)7297330f729Sjoerg void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
7307330f729Sjoerg   PoisonReasons[II] = DiagID;
7317330f729Sjoerg }
7327330f729Sjoerg 
PoisonSEHIdentifiers(bool Poison)7337330f729Sjoerg void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
7347330f729Sjoerg   assert(Ident__exception_code && Ident__exception_info);
7357330f729Sjoerg   assert(Ident___exception_code && Ident___exception_info);
7367330f729Sjoerg   Ident__exception_code->setIsPoisoned(Poison);
7377330f729Sjoerg   Ident___exception_code->setIsPoisoned(Poison);
7387330f729Sjoerg   Ident_GetExceptionCode->setIsPoisoned(Poison);
7397330f729Sjoerg   Ident__exception_info->setIsPoisoned(Poison);
7407330f729Sjoerg   Ident___exception_info->setIsPoisoned(Poison);
7417330f729Sjoerg   Ident_GetExceptionInfo->setIsPoisoned(Poison);
7427330f729Sjoerg   Ident__abnormal_termination->setIsPoisoned(Poison);
7437330f729Sjoerg   Ident___abnormal_termination->setIsPoisoned(Poison);
7447330f729Sjoerg   Ident_AbnormalTermination->setIsPoisoned(Poison);
7457330f729Sjoerg }
7467330f729Sjoerg 
HandlePoisonedIdentifier(Token & Identifier)7477330f729Sjoerg void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
7487330f729Sjoerg   assert(Identifier.getIdentifierInfo() &&
7497330f729Sjoerg          "Can't handle identifiers without identifier info!");
7507330f729Sjoerg   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
7517330f729Sjoerg     PoisonReasons.find(Identifier.getIdentifierInfo());
7527330f729Sjoerg   if(it == PoisonReasons.end())
7537330f729Sjoerg     Diag(Identifier, diag::err_pp_used_poisoned_id);
7547330f729Sjoerg   else
7557330f729Sjoerg     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
7567330f729Sjoerg }
7577330f729Sjoerg 
7587330f729Sjoerg /// Returns a diagnostic message kind for reporting a future keyword as
7597330f729Sjoerg /// appropriate for the identifier and specified language.
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)7607330f729Sjoerg static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
7617330f729Sjoerg                                           const LangOptions &LangOpts) {
7627330f729Sjoerg   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
7637330f729Sjoerg 
7647330f729Sjoerg   if (LangOpts.CPlusPlus)
7657330f729Sjoerg     return llvm::StringSwitch<diag::kind>(II.getName())
7667330f729Sjoerg #define CXX11_KEYWORD(NAME, FLAGS)                                             \
7677330f729Sjoerg         .Case(#NAME, diag::warn_cxx11_keyword)
768*e038c9c4Sjoerg #define CXX20_KEYWORD(NAME, FLAGS)                                             \
769*e038c9c4Sjoerg         .Case(#NAME, diag::warn_cxx20_keyword)
7707330f729Sjoerg #include "clang/Basic/TokenKinds.def"
771*e038c9c4Sjoerg         // char8_t is not modeled as a CXX20_KEYWORD because it's not
772*e038c9c4Sjoerg         // unconditionally enabled in C++20 mode. (It can be disabled
773*e038c9c4Sjoerg         // by -fno-char8_t.)
774*e038c9c4Sjoerg         .Case("char8_t", diag::warn_cxx20_keyword)
7757330f729Sjoerg         ;
7767330f729Sjoerg 
7777330f729Sjoerg   llvm_unreachable(
7787330f729Sjoerg       "Keyword not known to come from a newer Standard or proposed Standard");
7797330f729Sjoerg }
7807330f729Sjoerg 
updateOutOfDateIdentifier(IdentifierInfo & II) const7817330f729Sjoerg void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
7827330f729Sjoerg   assert(II.isOutOfDate() && "not out of date");
7837330f729Sjoerg   getExternalSource()->updateOutOfDateIdentifier(II);
7847330f729Sjoerg }
7857330f729Sjoerg 
7867330f729Sjoerg /// HandleIdentifier - This callback is invoked when the lexer reads an
7877330f729Sjoerg /// identifier.  This callback looks up the identifier in the map and/or
7887330f729Sjoerg /// potentially macro expands it or turns it into a named token (like 'for').
7897330f729Sjoerg ///
7907330f729Sjoerg /// Note that callers of this method are guarded by checking the
7917330f729Sjoerg /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
7927330f729Sjoerg /// IdentifierInfo methods that compute these properties will need to change to
7937330f729Sjoerg /// match.
HandleIdentifier(Token & Identifier)7947330f729Sjoerg bool Preprocessor::HandleIdentifier(Token &Identifier) {
7957330f729Sjoerg   assert(Identifier.getIdentifierInfo() &&
7967330f729Sjoerg          "Can't handle identifiers without identifier info!");
7977330f729Sjoerg 
7987330f729Sjoerg   IdentifierInfo &II = *Identifier.getIdentifierInfo();
7997330f729Sjoerg 
8007330f729Sjoerg   // If the information about this identifier is out of date, update it from
8017330f729Sjoerg   // the external source.
8027330f729Sjoerg   // We have to treat __VA_ARGS__ in a special way, since it gets
8037330f729Sjoerg   // serialized with isPoisoned = true, but our preprocessor may have
8047330f729Sjoerg   // unpoisoned it if we're defining a C99 macro.
8057330f729Sjoerg   if (II.isOutOfDate()) {
8067330f729Sjoerg     bool CurrentIsPoisoned = false;
8077330f729Sjoerg     const bool IsSpecialVariadicMacro =
8087330f729Sjoerg         &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
8097330f729Sjoerg     if (IsSpecialVariadicMacro)
8107330f729Sjoerg       CurrentIsPoisoned = II.isPoisoned();
8117330f729Sjoerg 
8127330f729Sjoerg     updateOutOfDateIdentifier(II);
8137330f729Sjoerg     Identifier.setKind(II.getTokenID());
8147330f729Sjoerg 
8157330f729Sjoerg     if (IsSpecialVariadicMacro)
8167330f729Sjoerg       II.setIsPoisoned(CurrentIsPoisoned);
8177330f729Sjoerg   }
8187330f729Sjoerg 
8197330f729Sjoerg   // If this identifier was poisoned, and if it was not produced from a macro
8207330f729Sjoerg   // expansion, emit an error.
8217330f729Sjoerg   if (II.isPoisoned() && CurPPLexer) {
8227330f729Sjoerg     HandlePoisonedIdentifier(Identifier);
8237330f729Sjoerg   }
8247330f729Sjoerg 
8257330f729Sjoerg   // If this is a macro to be expanded, do it.
8267330f729Sjoerg   if (MacroDefinition MD = getMacroDefinition(&II)) {
8277330f729Sjoerg     auto *MI = MD.getMacroInfo();
8287330f729Sjoerg     assert(MI && "macro definition with no macro info?");
8297330f729Sjoerg     if (!DisableMacroExpansion) {
8307330f729Sjoerg       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
8317330f729Sjoerg         // C99 6.10.3p10: If the preprocessing token immediately after the
8327330f729Sjoerg         // macro name isn't a '(', this macro should not be expanded.
8337330f729Sjoerg         if (!MI->isFunctionLike() || isNextPPTokenLParen())
8347330f729Sjoerg           return HandleMacroExpandedIdentifier(Identifier, MD);
8357330f729Sjoerg       } else {
8367330f729Sjoerg         // C99 6.10.3.4p2 says that a disabled macro may never again be
8377330f729Sjoerg         // expanded, even if it's in a context where it could be expanded in the
8387330f729Sjoerg         // future.
8397330f729Sjoerg         Identifier.setFlag(Token::DisableExpand);
8407330f729Sjoerg         if (MI->isObjectLike() || isNextPPTokenLParen())
8417330f729Sjoerg           Diag(Identifier, diag::pp_disabled_macro_expansion);
8427330f729Sjoerg       }
8437330f729Sjoerg     }
8447330f729Sjoerg   }
8457330f729Sjoerg 
8467330f729Sjoerg   // If this identifier is a keyword in a newer Standard or proposed Standard,
8477330f729Sjoerg   // produce a warning. Don't warn if we're not considering macro expansion,
8487330f729Sjoerg   // since this identifier might be the name of a macro.
8497330f729Sjoerg   // FIXME: This warning is disabled in cases where it shouldn't be, like
8507330f729Sjoerg   //   "#define constexpr constexpr", "int constexpr;"
8517330f729Sjoerg   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
8527330f729Sjoerg     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
8537330f729Sjoerg         << II.getName();
8547330f729Sjoerg     // Don't diagnose this keyword again in this translation unit.
8557330f729Sjoerg     II.setIsFutureCompatKeyword(false);
8567330f729Sjoerg   }
8577330f729Sjoerg 
8587330f729Sjoerg   // If this is an extension token, diagnose its use.
8597330f729Sjoerg   // We avoid diagnosing tokens that originate from macro definitions.
8607330f729Sjoerg   // FIXME: This warning is disabled in cases where it shouldn't be,
8617330f729Sjoerg   // like "#define TY typeof", "TY(1) x".
8627330f729Sjoerg   if (II.isExtensionToken() && !DisableMacroExpansion)
8637330f729Sjoerg     Diag(Identifier, diag::ext_token_used);
8647330f729Sjoerg 
8657330f729Sjoerg   // If this is the 'import' contextual keyword following an '@', note
8667330f729Sjoerg   // that the next token indicates a module name.
8677330f729Sjoerg   //
8687330f729Sjoerg   // Note that we do not treat 'import' as a contextual
8697330f729Sjoerg   // keyword when we're in a caching lexer, because caching lexers only get
8707330f729Sjoerg   // used in contexts where import declarations are disallowed.
8717330f729Sjoerg   //
8727330f729Sjoerg   // Likewise if this is the C++ Modules TS import keyword.
8737330f729Sjoerg   if (((LastTokenWasAt && II.isModulesImport()) ||
8747330f729Sjoerg        Identifier.is(tok::kw_import)) &&
8757330f729Sjoerg       !InMacroArgs && !DisableMacroExpansion &&
8767330f729Sjoerg       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
8777330f729Sjoerg       CurLexerKind != CLK_CachingLexer) {
8787330f729Sjoerg     ModuleImportLoc = Identifier.getLocation();
8797330f729Sjoerg     ModuleImportPath.clear();
8807330f729Sjoerg     ModuleImportExpectsIdentifier = true;
8817330f729Sjoerg     CurLexerKind = CLK_LexAfterModuleImport;
8827330f729Sjoerg   }
8837330f729Sjoerg   return true;
8847330f729Sjoerg }
8857330f729Sjoerg 
Lex(Token & Result)8867330f729Sjoerg void Preprocessor::Lex(Token &Result) {
8877330f729Sjoerg   ++LexLevel;
8887330f729Sjoerg 
8897330f729Sjoerg   // We loop here until a lex function returns a token; this avoids recursion.
8907330f729Sjoerg   bool ReturnedToken;
8917330f729Sjoerg   do {
8927330f729Sjoerg     switch (CurLexerKind) {
8937330f729Sjoerg     case CLK_Lexer:
8947330f729Sjoerg       ReturnedToken = CurLexer->Lex(Result);
8957330f729Sjoerg       break;
8967330f729Sjoerg     case CLK_TokenLexer:
8977330f729Sjoerg       ReturnedToken = CurTokenLexer->Lex(Result);
8987330f729Sjoerg       break;
8997330f729Sjoerg     case CLK_CachingLexer:
9007330f729Sjoerg       CachingLex(Result);
9017330f729Sjoerg       ReturnedToken = true;
9027330f729Sjoerg       break;
9037330f729Sjoerg     case CLK_LexAfterModuleImport:
9047330f729Sjoerg       ReturnedToken = LexAfterModuleImport(Result);
9057330f729Sjoerg       break;
9067330f729Sjoerg     }
9077330f729Sjoerg   } while (!ReturnedToken);
9087330f729Sjoerg 
909*e038c9c4Sjoerg   if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
910*e038c9c4Sjoerg     return;
911*e038c9c4Sjoerg 
9127330f729Sjoerg   if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
9137330f729Sjoerg     // Remember the identifier before code completion token.
9147330f729Sjoerg     setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
9157330f729Sjoerg     setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
9167330f729Sjoerg     // Set IdenfitierInfo to null to avoid confusing code that handles both
9177330f729Sjoerg     // identifiers and completion tokens.
9187330f729Sjoerg     Result.setIdentifierInfo(nullptr);
9197330f729Sjoerg   }
9207330f729Sjoerg 
9217330f729Sjoerg   // Update ImportSeqState to track our position within a C++20 import-seq
9227330f729Sjoerg   // if this token is being produced as a result of phase 4 of translation.
9237330f729Sjoerg   if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
9247330f729Sjoerg       !Result.getFlag(Token::IsReinjected)) {
9257330f729Sjoerg     switch (Result.getKind()) {
9267330f729Sjoerg     case tok::l_paren: case tok::l_square: case tok::l_brace:
9277330f729Sjoerg       ImportSeqState.handleOpenBracket();
9287330f729Sjoerg       break;
9297330f729Sjoerg     case tok::r_paren: case tok::r_square:
9307330f729Sjoerg       ImportSeqState.handleCloseBracket();
9317330f729Sjoerg       break;
9327330f729Sjoerg     case tok::r_brace:
9337330f729Sjoerg       ImportSeqState.handleCloseBrace();
9347330f729Sjoerg       break;
9357330f729Sjoerg     case tok::semi:
9367330f729Sjoerg       ImportSeqState.handleSemi();
9377330f729Sjoerg       break;
9387330f729Sjoerg     case tok::header_name:
9397330f729Sjoerg     case tok::annot_header_unit:
9407330f729Sjoerg       ImportSeqState.handleHeaderName();
9417330f729Sjoerg       break;
9427330f729Sjoerg     case tok::kw_export:
9437330f729Sjoerg       ImportSeqState.handleExport();
9447330f729Sjoerg       break;
9457330f729Sjoerg     case tok::identifier:
9467330f729Sjoerg       if (Result.getIdentifierInfo()->isModulesImport()) {
9477330f729Sjoerg         ImportSeqState.handleImport();
9487330f729Sjoerg         if (ImportSeqState.afterImportSeq()) {
9497330f729Sjoerg           ModuleImportLoc = Result.getLocation();
9507330f729Sjoerg           ModuleImportPath.clear();
9517330f729Sjoerg           ModuleImportExpectsIdentifier = true;
9527330f729Sjoerg           CurLexerKind = CLK_LexAfterModuleImport;
9537330f729Sjoerg         }
9547330f729Sjoerg         break;
9557330f729Sjoerg       }
9567330f729Sjoerg       LLVM_FALLTHROUGH;
9577330f729Sjoerg     default:
9587330f729Sjoerg       ImportSeqState.handleMisc();
9597330f729Sjoerg       break;
9607330f729Sjoerg     }
9617330f729Sjoerg   }
9627330f729Sjoerg 
9637330f729Sjoerg   LastTokenWasAt = Result.is(tok::at);
9647330f729Sjoerg   --LexLevel;
965*e038c9c4Sjoerg 
966*e038c9c4Sjoerg   if ((LexLevel == 0 || PreprocessToken) &&
967*e038c9c4Sjoerg       !Result.getFlag(Token::IsReinjected)) {
968*e038c9c4Sjoerg     if (LexLevel == 0)
969*e038c9c4Sjoerg       ++TokenCount;
970*e038c9c4Sjoerg     if (OnToken)
9717330f729Sjoerg       OnToken(Result);
9727330f729Sjoerg   }
973*e038c9c4Sjoerg }
9747330f729Sjoerg 
9757330f729Sjoerg /// Lex a header-name token (including one formed from header-name-tokens if
9767330f729Sjoerg /// \p AllowConcatenation is \c true).
9777330f729Sjoerg ///
9787330f729Sjoerg /// \param FilenameTok Filled in with the next token. On success, this will
9797330f729Sjoerg ///        be either a header_name token. On failure, it will be whatever other
9807330f729Sjoerg ///        token was found instead.
9817330f729Sjoerg /// \param AllowMacroExpansion If \c true, allow the header name to be formed
9827330f729Sjoerg ///        by macro expansion (concatenating tokens as necessary if the first
9837330f729Sjoerg ///        token is a '<').
9847330f729Sjoerg /// \return \c true if we reached EOD or EOF while looking for a > token in
9857330f729Sjoerg ///         a concatenated header name and diagnosed it. \c false otherwise.
LexHeaderName(Token & FilenameTok,bool AllowMacroExpansion)9867330f729Sjoerg bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
9877330f729Sjoerg   // Lex using header-name tokenization rules if tokens are being lexed from
9887330f729Sjoerg   // a file. Just grab a token normally if we're in a macro expansion.
9897330f729Sjoerg   if (CurPPLexer)
9907330f729Sjoerg     CurPPLexer->LexIncludeFilename(FilenameTok);
9917330f729Sjoerg   else
9927330f729Sjoerg     Lex(FilenameTok);
9937330f729Sjoerg 
9947330f729Sjoerg   // This could be a <foo/bar.h> file coming from a macro expansion.  In this
9957330f729Sjoerg   // case, glue the tokens together into an angle_string_literal token.
9967330f729Sjoerg   SmallString<128> FilenameBuffer;
9977330f729Sjoerg   if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
9987330f729Sjoerg     bool StartOfLine = FilenameTok.isAtStartOfLine();
9997330f729Sjoerg     bool LeadingSpace = FilenameTok.hasLeadingSpace();
10007330f729Sjoerg     bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
10017330f729Sjoerg 
10027330f729Sjoerg     SourceLocation Start = FilenameTok.getLocation();
10037330f729Sjoerg     SourceLocation End;
10047330f729Sjoerg     FilenameBuffer.push_back('<');
10057330f729Sjoerg 
10067330f729Sjoerg     // Consume tokens until we find a '>'.
10077330f729Sjoerg     // FIXME: A header-name could be formed starting or ending with an
10087330f729Sjoerg     // alternative token. It's not clear whether that's ill-formed in all
10097330f729Sjoerg     // cases.
10107330f729Sjoerg     while (FilenameTok.isNot(tok::greater)) {
10117330f729Sjoerg       Lex(FilenameTok);
10127330f729Sjoerg       if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
10137330f729Sjoerg         Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
10147330f729Sjoerg         Diag(Start, diag::note_matching) << tok::less;
10157330f729Sjoerg         return true;
10167330f729Sjoerg       }
10177330f729Sjoerg 
10187330f729Sjoerg       End = FilenameTok.getLocation();
10197330f729Sjoerg 
10207330f729Sjoerg       // FIXME: Provide code completion for #includes.
10217330f729Sjoerg       if (FilenameTok.is(tok::code_completion)) {
10227330f729Sjoerg         setCodeCompletionReached();
10237330f729Sjoerg         Lex(FilenameTok);
10247330f729Sjoerg         continue;
10257330f729Sjoerg       }
10267330f729Sjoerg 
10277330f729Sjoerg       // Append the spelling of this token to the buffer. If there was a space
10287330f729Sjoerg       // before it, add it now.
10297330f729Sjoerg       if (FilenameTok.hasLeadingSpace())
10307330f729Sjoerg         FilenameBuffer.push_back(' ');
10317330f729Sjoerg 
10327330f729Sjoerg       // Get the spelling of the token, directly into FilenameBuffer if
10337330f729Sjoerg       // possible.
10347330f729Sjoerg       size_t PreAppendSize = FilenameBuffer.size();
10357330f729Sjoerg       FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
10367330f729Sjoerg 
10377330f729Sjoerg       const char *BufPtr = &FilenameBuffer[PreAppendSize];
10387330f729Sjoerg       unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
10397330f729Sjoerg 
10407330f729Sjoerg       // If the token was spelled somewhere else, copy it into FilenameBuffer.
10417330f729Sjoerg       if (BufPtr != &FilenameBuffer[PreAppendSize])
10427330f729Sjoerg         memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
10437330f729Sjoerg 
10447330f729Sjoerg       // Resize FilenameBuffer to the correct size.
10457330f729Sjoerg       if (FilenameTok.getLength() != ActualLen)
10467330f729Sjoerg         FilenameBuffer.resize(PreAppendSize + ActualLen);
10477330f729Sjoerg     }
10487330f729Sjoerg 
10497330f729Sjoerg     FilenameTok.startToken();
10507330f729Sjoerg     FilenameTok.setKind(tok::header_name);
10517330f729Sjoerg     FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
10527330f729Sjoerg     FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
10537330f729Sjoerg     FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
10547330f729Sjoerg     CreateString(FilenameBuffer, FilenameTok, Start, End);
10557330f729Sjoerg   } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
10567330f729Sjoerg     // Convert a string-literal token of the form " h-char-sequence "
10577330f729Sjoerg     // (produced by macro expansion) into a header-name token.
10587330f729Sjoerg     //
10597330f729Sjoerg     // The rules for header-names don't quite match the rules for
10607330f729Sjoerg     // string-literals, but all the places where they differ result in
10617330f729Sjoerg     // undefined behavior, so we can and do treat them the same.
10627330f729Sjoerg     //
10637330f729Sjoerg     // A string-literal with a prefix or suffix is not translated into a
10647330f729Sjoerg     // header-name. This could theoretically be observable via the C++20
10657330f729Sjoerg     // context-sensitive header-name formation rules.
10667330f729Sjoerg     StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
10677330f729Sjoerg     if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
10687330f729Sjoerg       FilenameTok.setKind(tok::header_name);
10697330f729Sjoerg   }
10707330f729Sjoerg 
10717330f729Sjoerg   return false;
10727330f729Sjoerg }
10737330f729Sjoerg 
10747330f729Sjoerg /// Collect the tokens of a C++20 pp-import-suffix.
CollectPpImportSuffix(SmallVectorImpl<Token> & Toks)10757330f729Sjoerg void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
10767330f729Sjoerg   // FIXME: For error recovery, consider recognizing attribute syntax here
10777330f729Sjoerg   // and terminating / diagnosing a missing semicolon if we find anything
10787330f729Sjoerg   // else? (Can we leave that to the parser?)
10797330f729Sjoerg   unsigned BracketDepth = 0;
10807330f729Sjoerg   while (true) {
10817330f729Sjoerg     Toks.emplace_back();
10827330f729Sjoerg     Lex(Toks.back());
10837330f729Sjoerg 
10847330f729Sjoerg     switch (Toks.back().getKind()) {
10857330f729Sjoerg     case tok::l_paren: case tok::l_square: case tok::l_brace:
10867330f729Sjoerg       ++BracketDepth;
10877330f729Sjoerg       break;
10887330f729Sjoerg 
10897330f729Sjoerg     case tok::r_paren: case tok::r_square: case tok::r_brace:
10907330f729Sjoerg       if (BracketDepth == 0)
10917330f729Sjoerg         return;
10927330f729Sjoerg       --BracketDepth;
10937330f729Sjoerg       break;
10947330f729Sjoerg 
10957330f729Sjoerg     case tok::semi:
10967330f729Sjoerg       if (BracketDepth == 0)
10977330f729Sjoerg         return;
10987330f729Sjoerg     break;
10997330f729Sjoerg 
11007330f729Sjoerg     case tok::eof:
11017330f729Sjoerg       return;
11027330f729Sjoerg 
11037330f729Sjoerg     default:
11047330f729Sjoerg       break;
11057330f729Sjoerg     }
11067330f729Sjoerg   }
11077330f729Sjoerg }
11087330f729Sjoerg 
11097330f729Sjoerg 
11107330f729Sjoerg /// Lex a token following the 'import' contextual keyword.
11117330f729Sjoerg ///
11127330f729Sjoerg ///     pp-import: [C++20]
11137330f729Sjoerg ///           import header-name pp-import-suffix[opt] ;
11147330f729Sjoerg ///           import header-name-tokens pp-import-suffix[opt] ;
11157330f729Sjoerg /// [ObjC]    @ import module-name ;
11167330f729Sjoerg /// [Clang]   import module-name ;
11177330f729Sjoerg ///
11187330f729Sjoerg ///     header-name-tokens:
11197330f729Sjoerg ///           string-literal
11207330f729Sjoerg ///           < [any sequence of preprocessing-tokens other than >] >
11217330f729Sjoerg ///
11227330f729Sjoerg ///     module-name:
11237330f729Sjoerg ///           module-name-qualifier[opt] identifier
11247330f729Sjoerg ///
11257330f729Sjoerg ///     module-name-qualifier
11267330f729Sjoerg ///           module-name-qualifier[opt] identifier .
11277330f729Sjoerg ///
11287330f729Sjoerg /// We respond to a pp-import by importing macros from the named module.
LexAfterModuleImport(Token & Result)11297330f729Sjoerg bool Preprocessor::LexAfterModuleImport(Token &Result) {
11307330f729Sjoerg   // Figure out what kind of lexer we actually have.
11317330f729Sjoerg   recomputeCurLexerKind();
11327330f729Sjoerg 
11337330f729Sjoerg   // Lex the next token. The header-name lexing rules are used at the start of
11347330f729Sjoerg   // a pp-import.
11357330f729Sjoerg   //
11367330f729Sjoerg   // For now, we only support header-name imports in C++20 mode.
11377330f729Sjoerg   // FIXME: Should we allow this in all language modes that support an import
11387330f729Sjoerg   // declaration as an extension?
11397330f729Sjoerg   if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
11407330f729Sjoerg     if (LexHeaderName(Result))
11417330f729Sjoerg       return true;
11427330f729Sjoerg   } else {
11437330f729Sjoerg     Lex(Result);
11447330f729Sjoerg   }
11457330f729Sjoerg 
11467330f729Sjoerg   // Allocate a holding buffer for a sequence of tokens and introduce it into
11477330f729Sjoerg   // the token stream.
11487330f729Sjoerg   auto EnterTokens = [this](ArrayRef<Token> Toks) {
11497330f729Sjoerg     auto ToksCopy = std::make_unique<Token[]>(Toks.size());
11507330f729Sjoerg     std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
11517330f729Sjoerg     EnterTokenStream(std::move(ToksCopy), Toks.size(),
11527330f729Sjoerg                      /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
11537330f729Sjoerg   };
11547330f729Sjoerg 
11557330f729Sjoerg   // Check for a header-name.
11567330f729Sjoerg   SmallVector<Token, 32> Suffix;
11577330f729Sjoerg   if (Result.is(tok::header_name)) {
11587330f729Sjoerg     // Enter the header-name token into the token stream; a Lex action cannot
11597330f729Sjoerg     // both return a token and cache tokens (doing so would corrupt the token
11607330f729Sjoerg     // cache if the call to Lex comes from CachingLex / PeekAhead).
11617330f729Sjoerg     Suffix.push_back(Result);
11627330f729Sjoerg 
11637330f729Sjoerg     // Consume the pp-import-suffix and expand any macros in it now. We'll add
11647330f729Sjoerg     // it back into the token stream later.
11657330f729Sjoerg     CollectPpImportSuffix(Suffix);
11667330f729Sjoerg     if (Suffix.back().isNot(tok::semi)) {
11677330f729Sjoerg       // This is not a pp-import after all.
11687330f729Sjoerg       EnterTokens(Suffix);
11697330f729Sjoerg       return false;
11707330f729Sjoerg     }
11717330f729Sjoerg 
11727330f729Sjoerg     // C++2a [cpp.module]p1:
11737330f729Sjoerg     //   The ';' preprocessing-token terminating a pp-import shall not have
11747330f729Sjoerg     //   been produced by macro replacement.
11757330f729Sjoerg     SourceLocation SemiLoc = Suffix.back().getLocation();
11767330f729Sjoerg     if (SemiLoc.isMacroID())
11777330f729Sjoerg       Diag(SemiLoc, diag::err_header_import_semi_in_macro);
11787330f729Sjoerg 
11797330f729Sjoerg     // Reconstitute the import token.
11807330f729Sjoerg     Token ImportTok;
11817330f729Sjoerg     ImportTok.startToken();
11827330f729Sjoerg     ImportTok.setKind(tok::kw_import);
11837330f729Sjoerg     ImportTok.setLocation(ModuleImportLoc);
11847330f729Sjoerg     ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
11857330f729Sjoerg     ImportTok.setLength(6);
11867330f729Sjoerg 
11877330f729Sjoerg     auto Action = HandleHeaderIncludeOrImport(
11887330f729Sjoerg         /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
11897330f729Sjoerg     switch (Action.Kind) {
11907330f729Sjoerg     case ImportAction::None:
11917330f729Sjoerg       break;
11927330f729Sjoerg 
11937330f729Sjoerg     case ImportAction::ModuleBegin:
11947330f729Sjoerg       // Let the parser know we're textually entering the module.
11957330f729Sjoerg       Suffix.emplace_back();
11967330f729Sjoerg       Suffix.back().startToken();
11977330f729Sjoerg       Suffix.back().setKind(tok::annot_module_begin);
11987330f729Sjoerg       Suffix.back().setLocation(SemiLoc);
11997330f729Sjoerg       Suffix.back().setAnnotationEndLoc(SemiLoc);
12007330f729Sjoerg       Suffix.back().setAnnotationValue(Action.ModuleForHeader);
12017330f729Sjoerg       LLVM_FALLTHROUGH;
12027330f729Sjoerg 
12037330f729Sjoerg     case ImportAction::ModuleImport:
12047330f729Sjoerg     case ImportAction::SkippedModuleImport:
12057330f729Sjoerg       // We chose to import (or textually enter) the file. Convert the
12067330f729Sjoerg       // header-name token into a header unit annotation token.
12077330f729Sjoerg       Suffix[0].setKind(tok::annot_header_unit);
12087330f729Sjoerg       Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
12097330f729Sjoerg       Suffix[0].setAnnotationValue(Action.ModuleForHeader);
12107330f729Sjoerg       // FIXME: Call the moduleImport callback?
12117330f729Sjoerg       break;
1212*e038c9c4Sjoerg     case ImportAction::Failure:
1213*e038c9c4Sjoerg       assert(TheModuleLoader.HadFatalFailure &&
1214*e038c9c4Sjoerg              "This should be an early exit only to a fatal error");
1215*e038c9c4Sjoerg       Result.setKind(tok::eof);
1216*e038c9c4Sjoerg       CurLexer->cutOffLexing();
1217*e038c9c4Sjoerg       EnterTokens(Suffix);
1218*e038c9c4Sjoerg       return true;
12197330f729Sjoerg     }
12207330f729Sjoerg 
12217330f729Sjoerg     EnterTokens(Suffix);
12227330f729Sjoerg     return false;
12237330f729Sjoerg   }
12247330f729Sjoerg 
12257330f729Sjoerg   // The token sequence
12267330f729Sjoerg   //
12277330f729Sjoerg   //   import identifier (. identifier)*
12287330f729Sjoerg   //
12297330f729Sjoerg   // indicates a module import directive. We already saw the 'import'
12307330f729Sjoerg   // contextual keyword, so now we're looking for the identifiers.
12317330f729Sjoerg   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
12327330f729Sjoerg     // We expected to see an identifier here, and we did; continue handling
12337330f729Sjoerg     // identifiers.
12347330f729Sjoerg     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
12357330f729Sjoerg                                               Result.getLocation()));
12367330f729Sjoerg     ModuleImportExpectsIdentifier = false;
12377330f729Sjoerg     CurLexerKind = CLK_LexAfterModuleImport;
12387330f729Sjoerg     return true;
12397330f729Sjoerg   }
12407330f729Sjoerg 
12417330f729Sjoerg   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
12427330f729Sjoerg   // see the next identifier. (We can also see a '[[' that begins an
12437330f729Sjoerg   // attribute-specifier-seq here under the C++ Modules TS.)
12447330f729Sjoerg   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
12457330f729Sjoerg     ModuleImportExpectsIdentifier = true;
12467330f729Sjoerg     CurLexerKind = CLK_LexAfterModuleImport;
12477330f729Sjoerg     return true;
12487330f729Sjoerg   }
12497330f729Sjoerg 
12507330f729Sjoerg   // If we didn't recognize a module name at all, this is not a (valid) import.
12517330f729Sjoerg   if (ModuleImportPath.empty() || Result.is(tok::eof))
12527330f729Sjoerg     return true;
12537330f729Sjoerg 
12547330f729Sjoerg   // Consume the pp-import-suffix and expand any macros in it now, if we're not
12557330f729Sjoerg   // at the semicolon already.
12567330f729Sjoerg   SourceLocation SemiLoc = Result.getLocation();
12577330f729Sjoerg   if (Result.isNot(tok::semi)) {
12587330f729Sjoerg     Suffix.push_back(Result);
12597330f729Sjoerg     CollectPpImportSuffix(Suffix);
12607330f729Sjoerg     if (Suffix.back().isNot(tok::semi)) {
12617330f729Sjoerg       // This is not an import after all.
12627330f729Sjoerg       EnterTokens(Suffix);
12637330f729Sjoerg       return false;
12647330f729Sjoerg     }
12657330f729Sjoerg     SemiLoc = Suffix.back().getLocation();
12667330f729Sjoerg   }
12677330f729Sjoerg 
12687330f729Sjoerg   // Under the Modules TS, the dot is just part of the module name, and not
12697330f729Sjoerg   // a real hierarchy separator. Flatten such module names now.
12707330f729Sjoerg   //
12717330f729Sjoerg   // FIXME: Is this the right level to be performing this transformation?
12727330f729Sjoerg   std::string FlatModuleName;
12737330f729Sjoerg   if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
12747330f729Sjoerg     for (auto &Piece : ModuleImportPath) {
12757330f729Sjoerg       if (!FlatModuleName.empty())
12767330f729Sjoerg         FlatModuleName += ".";
12777330f729Sjoerg       FlatModuleName += Piece.first->getName();
12787330f729Sjoerg     }
12797330f729Sjoerg     SourceLocation FirstPathLoc = ModuleImportPath[0].second;
12807330f729Sjoerg     ModuleImportPath.clear();
12817330f729Sjoerg     ModuleImportPath.push_back(
12827330f729Sjoerg         std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
12837330f729Sjoerg   }
12847330f729Sjoerg 
12857330f729Sjoerg   Module *Imported = nullptr;
12867330f729Sjoerg   if (getLangOpts().Modules) {
12877330f729Sjoerg     Imported = TheModuleLoader.loadModule(ModuleImportLoc,
12887330f729Sjoerg                                           ModuleImportPath,
12897330f729Sjoerg                                           Module::Hidden,
12907330f729Sjoerg                                           /*IsInclusionDirective=*/false);
12917330f729Sjoerg     if (Imported)
12927330f729Sjoerg       makeModuleVisible(Imported, SemiLoc);
12937330f729Sjoerg   }
12947330f729Sjoerg   if (Callbacks)
12957330f729Sjoerg     Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
12967330f729Sjoerg 
12977330f729Sjoerg   if (!Suffix.empty()) {
12987330f729Sjoerg     EnterTokens(Suffix);
12997330f729Sjoerg     return false;
13007330f729Sjoerg   }
13017330f729Sjoerg   return true;
13027330f729Sjoerg }
13037330f729Sjoerg 
makeModuleVisible(Module * M,SourceLocation Loc)13047330f729Sjoerg void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
13057330f729Sjoerg   CurSubmoduleState->VisibleModules.setVisible(
13067330f729Sjoerg       M, Loc, [](Module *) {},
13077330f729Sjoerg       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
13087330f729Sjoerg         // FIXME: Include the path in the diagnostic.
13097330f729Sjoerg         // FIXME: Include the import location for the conflicting module.
13107330f729Sjoerg         Diag(ModuleImportLoc, diag::warn_module_conflict)
13117330f729Sjoerg             << Path[0]->getFullModuleName()
13127330f729Sjoerg             << Conflict->getFullModuleName()
13137330f729Sjoerg             << Message;
13147330f729Sjoerg       });
13157330f729Sjoerg 
13167330f729Sjoerg   // Add this module to the imports list of the currently-built submodule.
13177330f729Sjoerg   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
13187330f729Sjoerg     BuildingSubmoduleStack.back().M->Imports.insert(M);
13197330f729Sjoerg }
13207330f729Sjoerg 
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)13217330f729Sjoerg bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
13227330f729Sjoerg                                           const char *DiagnosticTag,
13237330f729Sjoerg                                           bool AllowMacroExpansion) {
13247330f729Sjoerg   // We need at least one string literal.
13257330f729Sjoerg   if (Result.isNot(tok::string_literal)) {
13267330f729Sjoerg     Diag(Result, diag::err_expected_string_literal)
13277330f729Sjoerg       << /*Source='in...'*/0 << DiagnosticTag;
13287330f729Sjoerg     return false;
13297330f729Sjoerg   }
13307330f729Sjoerg 
13317330f729Sjoerg   // Lex string literal tokens, optionally with macro expansion.
13327330f729Sjoerg   SmallVector<Token, 4> StrToks;
13337330f729Sjoerg   do {
13347330f729Sjoerg     StrToks.push_back(Result);
13357330f729Sjoerg 
13367330f729Sjoerg     if (Result.hasUDSuffix())
13377330f729Sjoerg       Diag(Result, diag::err_invalid_string_udl);
13387330f729Sjoerg 
13397330f729Sjoerg     if (AllowMacroExpansion)
13407330f729Sjoerg       Lex(Result);
13417330f729Sjoerg     else
13427330f729Sjoerg       LexUnexpandedToken(Result);
13437330f729Sjoerg   } while (Result.is(tok::string_literal));
13447330f729Sjoerg 
13457330f729Sjoerg   // Concatenate and parse the strings.
13467330f729Sjoerg   StringLiteralParser Literal(StrToks, *this);
13477330f729Sjoerg   assert(Literal.isAscii() && "Didn't allow wide strings in");
13487330f729Sjoerg 
13497330f729Sjoerg   if (Literal.hadError)
13507330f729Sjoerg     return false;
13517330f729Sjoerg 
13527330f729Sjoerg   if (Literal.Pascal) {
13537330f729Sjoerg     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
13547330f729Sjoerg       << /*Source='in...'*/0 << DiagnosticTag;
13557330f729Sjoerg     return false;
13567330f729Sjoerg   }
13577330f729Sjoerg 
1358*e038c9c4Sjoerg   String = std::string(Literal.GetString());
13597330f729Sjoerg   return true;
13607330f729Sjoerg }
13617330f729Sjoerg 
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)13627330f729Sjoerg bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
13637330f729Sjoerg   assert(Tok.is(tok::numeric_constant));
13647330f729Sjoerg   SmallString<8> IntegerBuffer;
13657330f729Sjoerg   bool NumberInvalid = false;
13667330f729Sjoerg   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
13677330f729Sjoerg   if (NumberInvalid)
13687330f729Sjoerg     return false;
1369*e038c9c4Sjoerg   NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1370*e038c9c4Sjoerg                                getLangOpts(), getTargetInfo(),
1371*e038c9c4Sjoerg                                getDiagnostics());
13727330f729Sjoerg   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
13737330f729Sjoerg     return false;
13747330f729Sjoerg   llvm::APInt APVal(64, 0);
13757330f729Sjoerg   if (Literal.GetIntegerValue(APVal))
13767330f729Sjoerg     return false;
13777330f729Sjoerg   Lex(Tok);
13787330f729Sjoerg   Value = APVal.getLimitedValue();
13797330f729Sjoerg   return true;
13807330f729Sjoerg }
13817330f729Sjoerg 
addCommentHandler(CommentHandler * Handler)13827330f729Sjoerg void Preprocessor::addCommentHandler(CommentHandler *Handler) {
13837330f729Sjoerg   assert(Handler && "NULL comment handler");
13847330f729Sjoerg   assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
13857330f729Sjoerg          "Comment handler already registered");
13867330f729Sjoerg   CommentHandlers.push_back(Handler);
13877330f729Sjoerg }
13887330f729Sjoerg 
removeCommentHandler(CommentHandler * Handler)13897330f729Sjoerg void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
13907330f729Sjoerg   std::vector<CommentHandler *>::iterator Pos =
13917330f729Sjoerg       llvm::find(CommentHandlers, Handler);
13927330f729Sjoerg   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
13937330f729Sjoerg   CommentHandlers.erase(Pos);
13947330f729Sjoerg }
13957330f729Sjoerg 
HandleComment(Token & result,SourceRange Comment)13967330f729Sjoerg bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
13977330f729Sjoerg   bool AnyPendingTokens = false;
13987330f729Sjoerg   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
13997330f729Sjoerg        HEnd = CommentHandlers.end();
14007330f729Sjoerg        H != HEnd; ++H) {
14017330f729Sjoerg     if ((*H)->HandleComment(*this, Comment))
14027330f729Sjoerg       AnyPendingTokens = true;
14037330f729Sjoerg   }
14047330f729Sjoerg   if (!AnyPendingTokens || getCommentRetentionState())
14057330f729Sjoerg     return false;
14067330f729Sjoerg   Lex(result);
14077330f729Sjoerg   return true;
14087330f729Sjoerg }
14097330f729Sjoerg 
14107330f729Sjoerg ModuleLoader::~ModuleLoader() = default;
14117330f729Sjoerg 
14127330f729Sjoerg CommentHandler::~CommentHandler() = default;
14137330f729Sjoerg 
1414*e038c9c4Sjoerg EmptylineHandler::~EmptylineHandler() = default;
1415*e038c9c4Sjoerg 
14167330f729Sjoerg CodeCompletionHandler::~CodeCompletionHandler() = default;
14177330f729Sjoerg 
createPreprocessingRecord()14187330f729Sjoerg void Preprocessor::createPreprocessingRecord() {
14197330f729Sjoerg   if (Record)
14207330f729Sjoerg     return;
14217330f729Sjoerg 
14227330f729Sjoerg   Record = new PreprocessingRecord(getSourceManager());
14237330f729Sjoerg   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
14247330f729Sjoerg }
1425