17330f729Sjoerg //===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // This file implements the Preprocessor interface.
107330f729Sjoerg //
117330f729Sjoerg //===----------------------------------------------------------------------===//
127330f729Sjoerg //
137330f729Sjoerg // Options to support:
147330f729Sjoerg // -H - Print the name of each header file used.
157330f729Sjoerg // -d[DNI] - Dump various things.
167330f729Sjoerg // -fworking-directory - #line's with preprocessor's working dir.
177330f729Sjoerg // -fpreprocessed
187330f729Sjoerg // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
197330f729Sjoerg // -W*
207330f729Sjoerg // -w
217330f729Sjoerg //
227330f729Sjoerg // Messages to emit:
237330f729Sjoerg // "Multiple include guards may be useful for:\n"
247330f729Sjoerg //
257330f729Sjoerg //===----------------------------------------------------------------------===//
267330f729Sjoerg
277330f729Sjoerg #include "clang/Lex/Preprocessor.h"
28*e038c9c4Sjoerg #include "clang/Basic/Builtins.h"
297330f729Sjoerg #include "clang/Basic/FileManager.h"
307330f729Sjoerg #include "clang/Basic/FileSystemStatCache.h"
317330f729Sjoerg #include "clang/Basic/IdentifierTable.h"
327330f729Sjoerg #include "clang/Basic/LLVM.h"
337330f729Sjoerg #include "clang/Basic/LangOptions.h"
347330f729Sjoerg #include "clang/Basic/Module.h"
357330f729Sjoerg #include "clang/Basic/SourceLocation.h"
367330f729Sjoerg #include "clang/Basic/SourceManager.h"
377330f729Sjoerg #include "clang/Basic/TargetInfo.h"
387330f729Sjoerg #include "clang/Lex/CodeCompletionHandler.h"
397330f729Sjoerg #include "clang/Lex/ExternalPreprocessorSource.h"
407330f729Sjoerg #include "clang/Lex/HeaderSearch.h"
417330f729Sjoerg #include "clang/Lex/LexDiagnostic.h"
427330f729Sjoerg #include "clang/Lex/Lexer.h"
437330f729Sjoerg #include "clang/Lex/LiteralSupport.h"
447330f729Sjoerg #include "clang/Lex/MacroArgs.h"
457330f729Sjoerg #include "clang/Lex/MacroInfo.h"
467330f729Sjoerg #include "clang/Lex/ModuleLoader.h"
477330f729Sjoerg #include "clang/Lex/Pragma.h"
487330f729Sjoerg #include "clang/Lex/PreprocessingRecord.h"
497330f729Sjoerg #include "clang/Lex/PreprocessorLexer.h"
507330f729Sjoerg #include "clang/Lex/PreprocessorOptions.h"
517330f729Sjoerg #include "clang/Lex/ScratchBuffer.h"
527330f729Sjoerg #include "clang/Lex/Token.h"
537330f729Sjoerg #include "clang/Lex/TokenLexer.h"
547330f729Sjoerg #include "llvm/ADT/APInt.h"
557330f729Sjoerg #include "llvm/ADT/ArrayRef.h"
567330f729Sjoerg #include "llvm/ADT/DenseMap.h"
57*e038c9c4Sjoerg #include "llvm/ADT/STLExtras.h"
587330f729Sjoerg #include "llvm/ADT/SmallString.h"
597330f729Sjoerg #include "llvm/ADT/SmallVector.h"
607330f729Sjoerg #include "llvm/ADT/StringRef.h"
617330f729Sjoerg #include "llvm/ADT/StringSwitch.h"
627330f729Sjoerg #include "llvm/Support/Capacity.h"
637330f729Sjoerg #include "llvm/Support/ErrorHandling.h"
647330f729Sjoerg #include "llvm/Support/MemoryBuffer.h"
657330f729Sjoerg #include "llvm/Support/raw_ostream.h"
667330f729Sjoerg #include <algorithm>
677330f729Sjoerg #include <cassert>
687330f729Sjoerg #include <memory>
697330f729Sjoerg #include <string>
707330f729Sjoerg #include <utility>
717330f729Sjoerg #include <vector>
727330f729Sjoerg
737330f729Sjoerg using namespace clang;
747330f729Sjoerg
757330f729Sjoerg LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
767330f729Sjoerg
777330f729Sjoerg ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
787330f729Sjoerg
Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)797330f729Sjoerg Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
807330f729Sjoerg DiagnosticsEngine &diags, LangOptions &opts,
817330f729Sjoerg SourceManager &SM, HeaderSearch &Headers,
827330f729Sjoerg ModuleLoader &TheModuleLoader,
837330f729Sjoerg IdentifierInfoLookup *IILookup, bool OwnsHeaders,
847330f729Sjoerg TranslationUnitKind TUKind)
857330f729Sjoerg : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
867330f729Sjoerg FileMgr(Headers.getFileMgr()), SourceMgr(SM),
877330f729Sjoerg ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
887330f729Sjoerg TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
897330f729Sjoerg // As the language options may have not been loaded yet (when
907330f729Sjoerg // deserializing an ASTUnit), adding keywords to the identifier table is
917330f729Sjoerg // deferred to Preprocessor::Initialize().
927330f729Sjoerg Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
937330f729Sjoerg TUKind(TUKind), SkipMainFilePreamble(0, true),
947330f729Sjoerg CurSubmoduleState(&NullSubmoduleState) {
957330f729Sjoerg OwnsHeaderSearch = OwnsHeaders;
967330f729Sjoerg
977330f729Sjoerg // Default to discarding comments.
987330f729Sjoerg KeepComments = false;
997330f729Sjoerg KeepMacroComments = false;
1007330f729Sjoerg SuppressIncludeNotFoundError = false;
1017330f729Sjoerg
1027330f729Sjoerg // Macro expansion is enabled.
1037330f729Sjoerg DisableMacroExpansion = false;
1047330f729Sjoerg MacroExpansionInDirectivesOverride = false;
1057330f729Sjoerg InMacroArgs = false;
1067330f729Sjoerg ArgMacro = nullptr;
1077330f729Sjoerg InMacroArgPreExpansion = false;
1087330f729Sjoerg NumCachedTokenLexers = 0;
1097330f729Sjoerg PragmasEnabled = true;
1107330f729Sjoerg ParsingIfOrElifDirective = false;
1117330f729Sjoerg PreprocessedOutput = false;
1127330f729Sjoerg
1137330f729Sjoerg // We haven't read anything from the external source.
1147330f729Sjoerg ReadMacrosFromExternalSource = false;
1157330f729Sjoerg
116*e038c9c4Sjoerg BuiltinInfo = std::make_unique<Builtin::Context>();
117*e038c9c4Sjoerg
1187330f729Sjoerg // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
1197330f729Sjoerg // a macro. They get unpoisoned where it is allowed.
1207330f729Sjoerg (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
1217330f729Sjoerg SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
1227330f729Sjoerg (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
1237330f729Sjoerg SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
1247330f729Sjoerg
1257330f729Sjoerg // Initialize the pragma handlers.
1267330f729Sjoerg RegisterBuiltinPragmas();
1277330f729Sjoerg
1287330f729Sjoerg // Initialize builtin macros like __LINE__ and friends.
1297330f729Sjoerg RegisterBuiltinMacros();
1307330f729Sjoerg
1317330f729Sjoerg if(LangOpts.Borland) {
1327330f729Sjoerg Ident__exception_info = getIdentifierInfo("_exception_info");
1337330f729Sjoerg Ident___exception_info = getIdentifierInfo("__exception_info");
1347330f729Sjoerg Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
1357330f729Sjoerg Ident__exception_code = getIdentifierInfo("_exception_code");
1367330f729Sjoerg Ident___exception_code = getIdentifierInfo("__exception_code");
1377330f729Sjoerg Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
1387330f729Sjoerg Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
1397330f729Sjoerg Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
1407330f729Sjoerg Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
1417330f729Sjoerg } else {
1427330f729Sjoerg Ident__exception_info = Ident__exception_code = nullptr;
1437330f729Sjoerg Ident__abnormal_termination = Ident___exception_info = nullptr;
1447330f729Sjoerg Ident___exception_code = Ident___abnormal_termination = nullptr;
1457330f729Sjoerg Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
1467330f729Sjoerg Ident_AbnormalTermination = nullptr;
1477330f729Sjoerg }
1487330f729Sjoerg
1497330f729Sjoerg // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
1507330f729Sjoerg if (usingPCHWithPragmaHdrStop())
1517330f729Sjoerg SkippingUntilPragmaHdrStop = true;
1527330f729Sjoerg
1537330f729Sjoerg // If using a PCH with a through header, start skipping tokens.
1547330f729Sjoerg if (!this->PPOpts->PCHThroughHeader.empty() &&
1557330f729Sjoerg !this->PPOpts->ImplicitPCHInclude.empty())
1567330f729Sjoerg SkippingUntilPCHThroughHeader = true;
1577330f729Sjoerg
1587330f729Sjoerg if (this->PPOpts->GeneratePreamble)
1597330f729Sjoerg PreambleConditionalStack.startRecording();
1607330f729Sjoerg
1617330f729Sjoerg ExcludedConditionalDirectiveSkipMappings =
1627330f729Sjoerg this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
1637330f729Sjoerg if (ExcludedConditionalDirectiveSkipMappings)
1647330f729Sjoerg ExcludedConditionalDirectiveSkipMappings->clear();
165*e038c9c4Sjoerg
166*e038c9c4Sjoerg MaxTokens = LangOpts.MaxTokens;
1677330f729Sjoerg }
1687330f729Sjoerg
~Preprocessor()1697330f729Sjoerg Preprocessor::~Preprocessor() {
1707330f729Sjoerg assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
1717330f729Sjoerg
1727330f729Sjoerg IncludeMacroStack.clear();
1737330f729Sjoerg
1747330f729Sjoerg // Destroy any macro definitions.
1757330f729Sjoerg while (MacroInfoChain *I = MIChainHead) {
1767330f729Sjoerg MIChainHead = I->Next;
1777330f729Sjoerg I->~MacroInfoChain();
1787330f729Sjoerg }
1797330f729Sjoerg
1807330f729Sjoerg // Free any cached macro expanders.
1817330f729Sjoerg // This populates MacroArgCache, so all TokenLexers need to be destroyed
1827330f729Sjoerg // before the code below that frees up the MacroArgCache list.
1837330f729Sjoerg std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
1847330f729Sjoerg CurTokenLexer.reset();
1857330f729Sjoerg
1867330f729Sjoerg // Free any cached MacroArgs.
1877330f729Sjoerg for (MacroArgs *ArgList = MacroArgCache; ArgList;)
1887330f729Sjoerg ArgList = ArgList->deallocate();
1897330f729Sjoerg
1907330f729Sjoerg // Delete the header search info, if we own it.
1917330f729Sjoerg if (OwnsHeaderSearch)
1927330f729Sjoerg delete &HeaderInfo;
1937330f729Sjoerg }
1947330f729Sjoerg
Initialize(const TargetInfo & Target,const TargetInfo * AuxTarget)1957330f729Sjoerg void Preprocessor::Initialize(const TargetInfo &Target,
1967330f729Sjoerg const TargetInfo *AuxTarget) {
1977330f729Sjoerg assert((!this->Target || this->Target == &Target) &&
1987330f729Sjoerg "Invalid override of target information");
1997330f729Sjoerg this->Target = &Target;
2007330f729Sjoerg
2017330f729Sjoerg assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
2027330f729Sjoerg "Invalid override of aux target information.");
2037330f729Sjoerg this->AuxTarget = AuxTarget;
2047330f729Sjoerg
2057330f729Sjoerg // Initialize information about built-ins.
206*e038c9c4Sjoerg BuiltinInfo->InitializeTarget(Target, AuxTarget);
2077330f729Sjoerg HeaderInfo.setTarget(Target);
2087330f729Sjoerg
2097330f729Sjoerg // Populate the identifier table with info about keywords for the current language.
2107330f729Sjoerg Identifiers.AddKeywords(LangOpts);
2117330f729Sjoerg }
2127330f729Sjoerg
InitializeForModelFile()2137330f729Sjoerg void Preprocessor::InitializeForModelFile() {
2147330f729Sjoerg NumEnteredSourceFiles = 0;
2157330f729Sjoerg
2167330f729Sjoerg // Reset pragmas
2177330f729Sjoerg PragmaHandlersBackup = std::move(PragmaHandlers);
2187330f729Sjoerg PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
2197330f729Sjoerg RegisterBuiltinPragmas();
2207330f729Sjoerg
2217330f729Sjoerg // Reset PredefinesFileID
2227330f729Sjoerg PredefinesFileID = FileID();
2237330f729Sjoerg }
2247330f729Sjoerg
FinalizeForModelFile()2257330f729Sjoerg void Preprocessor::FinalizeForModelFile() {
2267330f729Sjoerg NumEnteredSourceFiles = 1;
2277330f729Sjoerg
2287330f729Sjoerg PragmaHandlers = std::move(PragmaHandlersBackup);
2297330f729Sjoerg }
2307330f729Sjoerg
DumpToken(const Token & Tok,bool DumpFlags) const2317330f729Sjoerg void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
2327330f729Sjoerg llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
2337330f729Sjoerg << getSpelling(Tok) << "'";
2347330f729Sjoerg
2357330f729Sjoerg if (!DumpFlags) return;
2367330f729Sjoerg
2377330f729Sjoerg llvm::errs() << "\t";
2387330f729Sjoerg if (Tok.isAtStartOfLine())
2397330f729Sjoerg llvm::errs() << " [StartOfLine]";
2407330f729Sjoerg if (Tok.hasLeadingSpace())
2417330f729Sjoerg llvm::errs() << " [LeadingSpace]";
2427330f729Sjoerg if (Tok.isExpandDisabled())
2437330f729Sjoerg llvm::errs() << " [ExpandDisabled]";
2447330f729Sjoerg if (Tok.needsCleaning()) {
2457330f729Sjoerg const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
2467330f729Sjoerg llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
2477330f729Sjoerg << "']";
2487330f729Sjoerg }
2497330f729Sjoerg
2507330f729Sjoerg llvm::errs() << "\tLoc=<";
2517330f729Sjoerg DumpLocation(Tok.getLocation());
2527330f729Sjoerg llvm::errs() << ">";
2537330f729Sjoerg }
2547330f729Sjoerg
DumpLocation(SourceLocation Loc) const2557330f729Sjoerg void Preprocessor::DumpLocation(SourceLocation Loc) const {
2567330f729Sjoerg Loc.print(llvm::errs(), SourceMgr);
2577330f729Sjoerg }
2587330f729Sjoerg
DumpMacro(const MacroInfo & MI) const2597330f729Sjoerg void Preprocessor::DumpMacro(const MacroInfo &MI) const {
2607330f729Sjoerg llvm::errs() << "MACRO: ";
2617330f729Sjoerg for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
2627330f729Sjoerg DumpToken(MI.getReplacementToken(i));
2637330f729Sjoerg llvm::errs() << " ";
2647330f729Sjoerg }
2657330f729Sjoerg llvm::errs() << "\n";
2667330f729Sjoerg }
2677330f729Sjoerg
PrintStats()2687330f729Sjoerg void Preprocessor::PrintStats() {
2697330f729Sjoerg llvm::errs() << "\n*** Preprocessor Stats:\n";
2707330f729Sjoerg llvm::errs() << NumDirectives << " directives found:\n";
2717330f729Sjoerg llvm::errs() << " " << NumDefined << " #define.\n";
2727330f729Sjoerg llvm::errs() << " " << NumUndefined << " #undef.\n";
2737330f729Sjoerg llvm::errs() << " #include/#include_next/#import:\n";
2747330f729Sjoerg llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
2757330f729Sjoerg llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
2767330f729Sjoerg llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
2777330f729Sjoerg llvm::errs() << " " << NumElse << " #else/#elif.\n";
2787330f729Sjoerg llvm::errs() << " " << NumEndif << " #endif.\n";
2797330f729Sjoerg llvm::errs() << " " << NumPragma << " #pragma.\n";
2807330f729Sjoerg llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
2817330f729Sjoerg
2827330f729Sjoerg llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
2837330f729Sjoerg << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
2847330f729Sjoerg << NumFastMacroExpanded << " on the fast path.\n";
2857330f729Sjoerg llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
2867330f729Sjoerg << " token paste (##) operations performed, "
2877330f729Sjoerg << NumFastTokenPaste << " on the fast path.\n";
2887330f729Sjoerg
2897330f729Sjoerg llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
2907330f729Sjoerg
2917330f729Sjoerg llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
2927330f729Sjoerg llvm::errs() << "\n Macro Expanded Tokens: "
2937330f729Sjoerg << llvm::capacity_in_bytes(MacroExpandedTokens);
2947330f729Sjoerg llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
2957330f729Sjoerg // FIXME: List information for all submodules.
2967330f729Sjoerg llvm::errs() << "\n Macros: "
2977330f729Sjoerg << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
2987330f729Sjoerg llvm::errs() << "\n #pragma push_macro Info: "
2997330f729Sjoerg << llvm::capacity_in_bytes(PragmaPushMacroInfo);
3007330f729Sjoerg llvm::errs() << "\n Poison Reasons: "
3017330f729Sjoerg << llvm::capacity_in_bytes(PoisonReasons);
3027330f729Sjoerg llvm::errs() << "\n Comment Handlers: "
3037330f729Sjoerg << llvm::capacity_in_bytes(CommentHandlers) << "\n";
3047330f729Sjoerg }
3057330f729Sjoerg
3067330f729Sjoerg Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const3077330f729Sjoerg Preprocessor::macro_begin(bool IncludeExternalMacros) const {
3087330f729Sjoerg if (IncludeExternalMacros && ExternalSource &&
3097330f729Sjoerg !ReadMacrosFromExternalSource) {
3107330f729Sjoerg ReadMacrosFromExternalSource = true;
3117330f729Sjoerg ExternalSource->ReadDefinedMacros();
3127330f729Sjoerg }
3137330f729Sjoerg
3147330f729Sjoerg // Make sure we cover all macros in visible modules.
3157330f729Sjoerg for (const ModuleMacro &Macro : ModuleMacros)
3167330f729Sjoerg CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
3177330f729Sjoerg
3187330f729Sjoerg return CurSubmoduleState->Macros.begin();
3197330f729Sjoerg }
3207330f729Sjoerg
getTotalMemory() const3217330f729Sjoerg size_t Preprocessor::getTotalMemory() const {
3227330f729Sjoerg return BP.getTotalMemory()
3237330f729Sjoerg + llvm::capacity_in_bytes(MacroExpandedTokens)
3247330f729Sjoerg + Predefines.capacity() /* Predefines buffer. */
3257330f729Sjoerg // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
3267330f729Sjoerg // and ModuleMacros.
3277330f729Sjoerg + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
3287330f729Sjoerg + llvm::capacity_in_bytes(PragmaPushMacroInfo)
3297330f729Sjoerg + llvm::capacity_in_bytes(PoisonReasons)
3307330f729Sjoerg + llvm::capacity_in_bytes(CommentHandlers);
3317330f729Sjoerg }
3327330f729Sjoerg
3337330f729Sjoerg Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const3347330f729Sjoerg Preprocessor::macro_end(bool IncludeExternalMacros) const {
3357330f729Sjoerg if (IncludeExternalMacros && ExternalSource &&
3367330f729Sjoerg !ReadMacrosFromExternalSource) {
3377330f729Sjoerg ReadMacrosFromExternalSource = true;
3387330f729Sjoerg ExternalSource->ReadDefinedMacros();
3397330f729Sjoerg }
3407330f729Sjoerg
3417330f729Sjoerg return CurSubmoduleState->Macros.end();
3427330f729Sjoerg }
3437330f729Sjoerg
3447330f729Sjoerg /// Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)3457330f729Sjoerg static bool MacroDefinitionEquals(const MacroInfo *MI,
3467330f729Sjoerg ArrayRef<TokenValue> Tokens) {
3477330f729Sjoerg return Tokens.size() == MI->getNumTokens() &&
3487330f729Sjoerg std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
3497330f729Sjoerg }
3507330f729Sjoerg
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const3517330f729Sjoerg StringRef Preprocessor::getLastMacroWithSpelling(
3527330f729Sjoerg SourceLocation Loc,
3537330f729Sjoerg ArrayRef<TokenValue> Tokens) const {
3547330f729Sjoerg SourceLocation BestLocation;
3557330f729Sjoerg StringRef BestSpelling;
3567330f729Sjoerg for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
3577330f729Sjoerg I != E; ++I) {
3587330f729Sjoerg const MacroDirective::DefInfo
3597330f729Sjoerg Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
3607330f729Sjoerg if (!Def || !Def.getMacroInfo())
3617330f729Sjoerg continue;
3627330f729Sjoerg if (!Def.getMacroInfo()->isObjectLike())
3637330f729Sjoerg continue;
3647330f729Sjoerg if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
3657330f729Sjoerg continue;
3667330f729Sjoerg SourceLocation Location = Def.getLocation();
3677330f729Sjoerg // Choose the macro defined latest.
3687330f729Sjoerg if (BestLocation.isInvalid() ||
3697330f729Sjoerg (Location.isValid() &&
3707330f729Sjoerg SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
3717330f729Sjoerg BestLocation = Location;
3727330f729Sjoerg BestSpelling = I->first->getName();
3737330f729Sjoerg }
3747330f729Sjoerg }
3757330f729Sjoerg return BestSpelling;
3767330f729Sjoerg }
3777330f729Sjoerg
recomputeCurLexerKind()3787330f729Sjoerg void Preprocessor::recomputeCurLexerKind() {
3797330f729Sjoerg if (CurLexer)
3807330f729Sjoerg CurLexerKind = CLK_Lexer;
3817330f729Sjoerg else if (CurTokenLexer)
3827330f729Sjoerg CurLexerKind = CLK_TokenLexer;
3837330f729Sjoerg else
3847330f729Sjoerg CurLexerKind = CLK_CachingLexer;
3857330f729Sjoerg }
3867330f729Sjoerg
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)3877330f729Sjoerg bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
3887330f729Sjoerg unsigned CompleteLine,
3897330f729Sjoerg unsigned CompleteColumn) {
3907330f729Sjoerg assert(File);
3917330f729Sjoerg assert(CompleteLine && CompleteColumn && "Starts from 1:1");
3927330f729Sjoerg assert(!CodeCompletionFile && "Already set");
3937330f729Sjoerg
3947330f729Sjoerg // Load the actual file's contents.
395*e038c9c4Sjoerg Optional<llvm::MemoryBufferRef> Buffer =
396*e038c9c4Sjoerg SourceMgr.getMemoryBufferForFileOrNone(File);
397*e038c9c4Sjoerg if (!Buffer)
3987330f729Sjoerg return true;
3997330f729Sjoerg
4007330f729Sjoerg // Find the byte position of the truncation point.
4017330f729Sjoerg const char *Position = Buffer->getBufferStart();
4027330f729Sjoerg for (unsigned Line = 1; Line < CompleteLine; ++Line) {
4037330f729Sjoerg for (; *Position; ++Position) {
4047330f729Sjoerg if (*Position != '\r' && *Position != '\n')
4057330f729Sjoerg continue;
4067330f729Sjoerg
4077330f729Sjoerg // Eat \r\n or \n\r as a single line.
4087330f729Sjoerg if ((Position[1] == '\r' || Position[1] == '\n') &&
4097330f729Sjoerg Position[0] != Position[1])
4107330f729Sjoerg ++Position;
4117330f729Sjoerg ++Position;
4127330f729Sjoerg break;
4137330f729Sjoerg }
4147330f729Sjoerg }
4157330f729Sjoerg
4167330f729Sjoerg Position += CompleteColumn - 1;
4177330f729Sjoerg
4187330f729Sjoerg // If pointing inside the preamble, adjust the position at the beginning of
4197330f729Sjoerg // the file after the preamble.
4207330f729Sjoerg if (SkipMainFilePreamble.first &&
4217330f729Sjoerg SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
4227330f729Sjoerg if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
4237330f729Sjoerg Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
4247330f729Sjoerg }
4257330f729Sjoerg
4267330f729Sjoerg if (Position > Buffer->getBufferEnd())
4277330f729Sjoerg Position = Buffer->getBufferEnd();
4287330f729Sjoerg
4297330f729Sjoerg CodeCompletionFile = File;
4307330f729Sjoerg CodeCompletionOffset = Position - Buffer->getBufferStart();
4317330f729Sjoerg
4327330f729Sjoerg auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
4337330f729Sjoerg Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
4347330f729Sjoerg char *NewBuf = NewBuffer->getBufferStart();
4357330f729Sjoerg char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
4367330f729Sjoerg *NewPos = '\0';
4377330f729Sjoerg std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
4387330f729Sjoerg SourceMgr.overrideFileContents(File, std::move(NewBuffer));
4397330f729Sjoerg
4407330f729Sjoerg return false;
4417330f729Sjoerg }
4427330f729Sjoerg
CodeCompleteIncludedFile(llvm::StringRef Dir,bool IsAngled)4437330f729Sjoerg void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
4447330f729Sjoerg bool IsAngled) {
445*e038c9c4Sjoerg setCodeCompletionReached();
4467330f729Sjoerg if (CodeComplete)
4477330f729Sjoerg CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
4487330f729Sjoerg }
4497330f729Sjoerg
CodeCompleteNaturalLanguage()4507330f729Sjoerg void Preprocessor::CodeCompleteNaturalLanguage() {
451*e038c9c4Sjoerg setCodeCompletionReached();
4527330f729Sjoerg if (CodeComplete)
4537330f729Sjoerg CodeComplete->CodeCompleteNaturalLanguage();
4547330f729Sjoerg }
4557330f729Sjoerg
4567330f729Sjoerg /// getSpelling - This method is used to get the spelling of a token into a
4577330f729Sjoerg /// SmallVector. Note that the returned StringRef may not point to the
4587330f729Sjoerg /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const4597330f729Sjoerg StringRef Preprocessor::getSpelling(const Token &Tok,
4607330f729Sjoerg SmallVectorImpl<char> &Buffer,
4617330f729Sjoerg bool *Invalid) const {
4627330f729Sjoerg // NOTE: this has to be checked *before* testing for an IdentifierInfo.
4637330f729Sjoerg if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
4647330f729Sjoerg // Try the fast path.
4657330f729Sjoerg if (const IdentifierInfo *II = Tok.getIdentifierInfo())
4667330f729Sjoerg return II->getName();
4677330f729Sjoerg }
4687330f729Sjoerg
4697330f729Sjoerg // Resize the buffer if we need to copy into it.
4707330f729Sjoerg if (Tok.needsCleaning())
4717330f729Sjoerg Buffer.resize(Tok.getLength());
4727330f729Sjoerg
4737330f729Sjoerg const char *Ptr = Buffer.data();
4747330f729Sjoerg unsigned Len = getSpelling(Tok, Ptr, Invalid);
4757330f729Sjoerg return StringRef(Ptr, Len);
4767330f729Sjoerg }
4777330f729Sjoerg
4787330f729Sjoerg /// CreateString - Plop the specified string into a scratch buffer and return a
4797330f729Sjoerg /// location for it. If specified, the source location provides a source
4807330f729Sjoerg /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)4817330f729Sjoerg void Preprocessor::CreateString(StringRef Str, Token &Tok,
4827330f729Sjoerg SourceLocation ExpansionLocStart,
4837330f729Sjoerg SourceLocation ExpansionLocEnd) {
4847330f729Sjoerg Tok.setLength(Str.size());
4857330f729Sjoerg
4867330f729Sjoerg const char *DestPtr;
4877330f729Sjoerg SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
4887330f729Sjoerg
4897330f729Sjoerg if (ExpansionLocStart.isValid())
4907330f729Sjoerg Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
4917330f729Sjoerg ExpansionLocEnd, Str.size());
4927330f729Sjoerg Tok.setLocation(Loc);
4937330f729Sjoerg
4947330f729Sjoerg // If this is a raw identifier or a literal token, set the pointer data.
4957330f729Sjoerg if (Tok.is(tok::raw_identifier))
4967330f729Sjoerg Tok.setRawIdentifierData(DestPtr);
4977330f729Sjoerg else if (Tok.isLiteral())
4987330f729Sjoerg Tok.setLiteralData(DestPtr);
4997330f729Sjoerg }
5007330f729Sjoerg
SplitToken(SourceLocation Loc,unsigned Length)5017330f729Sjoerg SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
5027330f729Sjoerg auto &SM = getSourceManager();
5037330f729Sjoerg SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
5047330f729Sjoerg std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
5057330f729Sjoerg bool Invalid = false;
5067330f729Sjoerg StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
5077330f729Sjoerg if (Invalid)
5087330f729Sjoerg return SourceLocation();
5097330f729Sjoerg
5107330f729Sjoerg // FIXME: We could consider re-using spelling for tokens we see repeatedly.
5117330f729Sjoerg const char *DestPtr;
5127330f729Sjoerg SourceLocation Spelling =
5137330f729Sjoerg ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
5147330f729Sjoerg return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
5157330f729Sjoerg }
5167330f729Sjoerg
getCurrentModule()5177330f729Sjoerg Module *Preprocessor::getCurrentModule() {
5187330f729Sjoerg if (!getLangOpts().isCompilingModule())
5197330f729Sjoerg return nullptr;
5207330f729Sjoerg
5217330f729Sjoerg return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
5227330f729Sjoerg }
5237330f729Sjoerg
5247330f729Sjoerg //===----------------------------------------------------------------------===//
5257330f729Sjoerg // Preprocessor Initialization Methods
5267330f729Sjoerg //===----------------------------------------------------------------------===//
5277330f729Sjoerg
5287330f729Sjoerg /// EnterMainSourceFile - Enter the specified FileID as the main source file,
5297330f729Sjoerg /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()5307330f729Sjoerg void Preprocessor::EnterMainSourceFile() {
5317330f729Sjoerg // We do not allow the preprocessor to reenter the main file. Doing so will
5327330f729Sjoerg // cause FileID's to accumulate information from both runs (e.g. #line
5337330f729Sjoerg // information) and predefined macros aren't guaranteed to be set properly.
5347330f729Sjoerg assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
5357330f729Sjoerg FileID MainFileID = SourceMgr.getMainFileID();
5367330f729Sjoerg
5377330f729Sjoerg // If MainFileID is loaded it means we loaded an AST file, no need to enter
5387330f729Sjoerg // a main file.
5397330f729Sjoerg if (!SourceMgr.isLoadedFileID(MainFileID)) {
5407330f729Sjoerg // Enter the main file source buffer.
5417330f729Sjoerg EnterSourceFile(MainFileID, nullptr, SourceLocation());
5427330f729Sjoerg
5437330f729Sjoerg // If we've been asked to skip bytes in the main file (e.g., as part of a
5447330f729Sjoerg // precompiled preamble), do so now.
5457330f729Sjoerg if (SkipMainFilePreamble.first > 0)
5467330f729Sjoerg CurLexer->SetByteOffset(SkipMainFilePreamble.first,
5477330f729Sjoerg SkipMainFilePreamble.second);
5487330f729Sjoerg
5497330f729Sjoerg // Tell the header info that the main file was entered. If the file is later
5507330f729Sjoerg // #imported, it won't be re-entered.
5517330f729Sjoerg if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
5527330f729Sjoerg HeaderInfo.IncrementIncludeCount(FE);
5537330f729Sjoerg }
5547330f729Sjoerg
5557330f729Sjoerg // Preprocess Predefines to populate the initial preprocessor state.
5567330f729Sjoerg std::unique_ptr<llvm::MemoryBuffer> SB =
5577330f729Sjoerg llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
5587330f729Sjoerg assert(SB && "Cannot create predefined source buffer");
5597330f729Sjoerg FileID FID = SourceMgr.createFileID(std::move(SB));
5607330f729Sjoerg assert(FID.isValid() && "Could not create FileID for predefines?");
5617330f729Sjoerg setPredefinesFileID(FID);
5627330f729Sjoerg
5637330f729Sjoerg // Start parsing the predefines.
5647330f729Sjoerg EnterSourceFile(FID, nullptr, SourceLocation());
5657330f729Sjoerg
5667330f729Sjoerg if (!PPOpts->PCHThroughHeader.empty()) {
5677330f729Sjoerg // Lookup and save the FileID for the through header. If it isn't found
5687330f729Sjoerg // in the search path, it's a fatal error.
5697330f729Sjoerg const DirectoryLookup *CurDir;
5707330f729Sjoerg Optional<FileEntryRef> File = LookupFile(
5717330f729Sjoerg SourceLocation(), PPOpts->PCHThroughHeader,
5727330f729Sjoerg /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
5737330f729Sjoerg /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
5747330f729Sjoerg /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
5757330f729Sjoerg /*IsFrameworkFound=*/nullptr);
5767330f729Sjoerg if (!File) {
5777330f729Sjoerg Diag(SourceLocation(), diag::err_pp_through_header_not_found)
5787330f729Sjoerg << PPOpts->PCHThroughHeader;
5797330f729Sjoerg return;
5807330f729Sjoerg }
5817330f729Sjoerg setPCHThroughHeaderFileID(
5827330f729Sjoerg SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
5837330f729Sjoerg }
5847330f729Sjoerg
5857330f729Sjoerg // Skip tokens from the Predefines and if needed the main file.
5867330f729Sjoerg if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
5877330f729Sjoerg (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
5887330f729Sjoerg SkipTokensWhileUsingPCH();
5897330f729Sjoerg }
5907330f729Sjoerg
setPCHThroughHeaderFileID(FileID FID)5917330f729Sjoerg void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
5927330f729Sjoerg assert(PCHThroughHeaderFileID.isInvalid() &&
5937330f729Sjoerg "PCHThroughHeaderFileID already set!");
5947330f729Sjoerg PCHThroughHeaderFileID = FID;
5957330f729Sjoerg }
5967330f729Sjoerg
isPCHThroughHeader(const FileEntry * FE)5977330f729Sjoerg bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
5987330f729Sjoerg assert(PCHThroughHeaderFileID.isValid() &&
5997330f729Sjoerg "Invalid PCH through header FileID");
6007330f729Sjoerg return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
6017330f729Sjoerg }
6027330f729Sjoerg
creatingPCHWithThroughHeader()6037330f729Sjoerg bool Preprocessor::creatingPCHWithThroughHeader() {
6047330f729Sjoerg return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
6057330f729Sjoerg PCHThroughHeaderFileID.isValid();
6067330f729Sjoerg }
6077330f729Sjoerg
usingPCHWithThroughHeader()6087330f729Sjoerg bool Preprocessor::usingPCHWithThroughHeader() {
6097330f729Sjoerg return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
6107330f729Sjoerg PCHThroughHeaderFileID.isValid();
6117330f729Sjoerg }
6127330f729Sjoerg
creatingPCHWithPragmaHdrStop()6137330f729Sjoerg bool Preprocessor::creatingPCHWithPragmaHdrStop() {
6147330f729Sjoerg return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop;
6157330f729Sjoerg }
6167330f729Sjoerg
usingPCHWithPragmaHdrStop()6177330f729Sjoerg bool Preprocessor::usingPCHWithPragmaHdrStop() {
6187330f729Sjoerg return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop;
6197330f729Sjoerg }
6207330f729Sjoerg
6217330f729Sjoerg /// Skip tokens until after the #include of the through header or
6227330f729Sjoerg /// until after a #pragma hdrstop is seen. Tokens in the predefines file
6237330f729Sjoerg /// and the main file may be skipped. If the end of the predefines file
6247330f729Sjoerg /// is reached, skipping continues into the main file. If the end of the
6257330f729Sjoerg /// main file is reached, it's a fatal error.
SkipTokensWhileUsingPCH()6267330f729Sjoerg void Preprocessor::SkipTokensWhileUsingPCH() {
6277330f729Sjoerg bool ReachedMainFileEOF = false;
6287330f729Sjoerg bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
6297330f729Sjoerg bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
6307330f729Sjoerg Token Tok;
6317330f729Sjoerg while (true) {
6327330f729Sjoerg bool InPredefines =
6337330f729Sjoerg (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
6347330f729Sjoerg switch (CurLexerKind) {
6357330f729Sjoerg case CLK_Lexer:
6367330f729Sjoerg CurLexer->Lex(Tok);
6377330f729Sjoerg break;
6387330f729Sjoerg case CLK_TokenLexer:
6397330f729Sjoerg CurTokenLexer->Lex(Tok);
6407330f729Sjoerg break;
6417330f729Sjoerg case CLK_CachingLexer:
6427330f729Sjoerg CachingLex(Tok);
6437330f729Sjoerg break;
6447330f729Sjoerg case CLK_LexAfterModuleImport:
6457330f729Sjoerg LexAfterModuleImport(Tok);
6467330f729Sjoerg break;
6477330f729Sjoerg }
6487330f729Sjoerg if (Tok.is(tok::eof) && !InPredefines) {
6497330f729Sjoerg ReachedMainFileEOF = true;
6507330f729Sjoerg break;
6517330f729Sjoerg }
6527330f729Sjoerg if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
6537330f729Sjoerg break;
6547330f729Sjoerg if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
6557330f729Sjoerg break;
6567330f729Sjoerg }
6577330f729Sjoerg if (ReachedMainFileEOF) {
6587330f729Sjoerg if (UsingPCHThroughHeader)
6597330f729Sjoerg Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
6607330f729Sjoerg << PPOpts->PCHThroughHeader << 1;
6617330f729Sjoerg else if (!PPOpts->PCHWithHdrStopCreate)
6627330f729Sjoerg Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
6637330f729Sjoerg }
6647330f729Sjoerg }
6657330f729Sjoerg
replayPreambleConditionalStack()6667330f729Sjoerg void Preprocessor::replayPreambleConditionalStack() {
6677330f729Sjoerg // Restore the conditional stack from the preamble, if there is one.
6687330f729Sjoerg if (PreambleConditionalStack.isReplaying()) {
6697330f729Sjoerg assert(CurPPLexer &&
6707330f729Sjoerg "CurPPLexer is null when calling replayPreambleConditionalStack.");
6717330f729Sjoerg CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
6727330f729Sjoerg PreambleConditionalStack.doneReplaying();
6737330f729Sjoerg if (PreambleConditionalStack.reachedEOFWhileSkipping())
6747330f729Sjoerg SkipExcludedConditionalBlock(
6757330f729Sjoerg PreambleConditionalStack.SkipInfo->HashTokenLoc,
6767330f729Sjoerg PreambleConditionalStack.SkipInfo->IfTokenLoc,
6777330f729Sjoerg PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
6787330f729Sjoerg PreambleConditionalStack.SkipInfo->FoundElse,
6797330f729Sjoerg PreambleConditionalStack.SkipInfo->ElseLoc);
6807330f729Sjoerg }
6817330f729Sjoerg }
6827330f729Sjoerg
EndSourceFile()6837330f729Sjoerg void Preprocessor::EndSourceFile() {
6847330f729Sjoerg // Notify the client that we reached the end of the source file.
6857330f729Sjoerg if (Callbacks)
6867330f729Sjoerg Callbacks->EndOfMainFile();
6877330f729Sjoerg }
6887330f729Sjoerg
6897330f729Sjoerg //===----------------------------------------------------------------------===//
6907330f729Sjoerg // Lexer Event Handling.
6917330f729Sjoerg //===----------------------------------------------------------------------===//
6927330f729Sjoerg
6937330f729Sjoerg /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
6947330f729Sjoerg /// identifier information for the token and install it into the token,
6957330f729Sjoerg /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const6967330f729Sjoerg IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
6977330f729Sjoerg assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
6987330f729Sjoerg
6997330f729Sjoerg // Look up this token, see if it is a macro, or if it is a language keyword.
7007330f729Sjoerg IdentifierInfo *II;
7017330f729Sjoerg if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
7027330f729Sjoerg // No cleaning needed, just use the characters from the lexed buffer.
7037330f729Sjoerg II = getIdentifierInfo(Identifier.getRawIdentifier());
7047330f729Sjoerg } else {
7057330f729Sjoerg // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
7067330f729Sjoerg SmallString<64> IdentifierBuffer;
7077330f729Sjoerg StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
7087330f729Sjoerg
7097330f729Sjoerg if (Identifier.hasUCN()) {
7107330f729Sjoerg SmallString<64> UCNIdentifierBuffer;
7117330f729Sjoerg expandUCNs(UCNIdentifierBuffer, CleanedStr);
7127330f729Sjoerg II = getIdentifierInfo(UCNIdentifierBuffer);
7137330f729Sjoerg } else {
7147330f729Sjoerg II = getIdentifierInfo(CleanedStr);
7157330f729Sjoerg }
7167330f729Sjoerg }
7177330f729Sjoerg
7187330f729Sjoerg // Update the token info (identifier info and appropriate token kind).
7197330f729Sjoerg Identifier.setIdentifierInfo(II);
7207330f729Sjoerg if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
7217330f729Sjoerg getSourceManager().isInSystemHeader(Identifier.getLocation()))
7227330f729Sjoerg Identifier.setKind(tok::identifier);
7237330f729Sjoerg else
7247330f729Sjoerg Identifier.setKind(II->getTokenID());
7257330f729Sjoerg
7267330f729Sjoerg return II;
7277330f729Sjoerg }
7287330f729Sjoerg
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)7297330f729Sjoerg void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
7307330f729Sjoerg PoisonReasons[II] = DiagID;
7317330f729Sjoerg }
7327330f729Sjoerg
PoisonSEHIdentifiers(bool Poison)7337330f729Sjoerg void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
7347330f729Sjoerg assert(Ident__exception_code && Ident__exception_info);
7357330f729Sjoerg assert(Ident___exception_code && Ident___exception_info);
7367330f729Sjoerg Ident__exception_code->setIsPoisoned(Poison);
7377330f729Sjoerg Ident___exception_code->setIsPoisoned(Poison);
7387330f729Sjoerg Ident_GetExceptionCode->setIsPoisoned(Poison);
7397330f729Sjoerg Ident__exception_info->setIsPoisoned(Poison);
7407330f729Sjoerg Ident___exception_info->setIsPoisoned(Poison);
7417330f729Sjoerg Ident_GetExceptionInfo->setIsPoisoned(Poison);
7427330f729Sjoerg Ident__abnormal_termination->setIsPoisoned(Poison);
7437330f729Sjoerg Ident___abnormal_termination->setIsPoisoned(Poison);
7447330f729Sjoerg Ident_AbnormalTermination->setIsPoisoned(Poison);
7457330f729Sjoerg }
7467330f729Sjoerg
HandlePoisonedIdentifier(Token & Identifier)7477330f729Sjoerg void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
7487330f729Sjoerg assert(Identifier.getIdentifierInfo() &&
7497330f729Sjoerg "Can't handle identifiers without identifier info!");
7507330f729Sjoerg llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
7517330f729Sjoerg PoisonReasons.find(Identifier.getIdentifierInfo());
7527330f729Sjoerg if(it == PoisonReasons.end())
7537330f729Sjoerg Diag(Identifier, diag::err_pp_used_poisoned_id);
7547330f729Sjoerg else
7557330f729Sjoerg Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
7567330f729Sjoerg }
7577330f729Sjoerg
7587330f729Sjoerg /// Returns a diagnostic message kind for reporting a future keyword as
7597330f729Sjoerg /// appropriate for the identifier and specified language.
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)7607330f729Sjoerg static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
7617330f729Sjoerg const LangOptions &LangOpts) {
7627330f729Sjoerg assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
7637330f729Sjoerg
7647330f729Sjoerg if (LangOpts.CPlusPlus)
7657330f729Sjoerg return llvm::StringSwitch<diag::kind>(II.getName())
7667330f729Sjoerg #define CXX11_KEYWORD(NAME, FLAGS) \
7677330f729Sjoerg .Case(#NAME, diag::warn_cxx11_keyword)
768*e038c9c4Sjoerg #define CXX20_KEYWORD(NAME, FLAGS) \
769*e038c9c4Sjoerg .Case(#NAME, diag::warn_cxx20_keyword)
7707330f729Sjoerg #include "clang/Basic/TokenKinds.def"
771*e038c9c4Sjoerg // char8_t is not modeled as a CXX20_KEYWORD because it's not
772*e038c9c4Sjoerg // unconditionally enabled in C++20 mode. (It can be disabled
773*e038c9c4Sjoerg // by -fno-char8_t.)
774*e038c9c4Sjoerg .Case("char8_t", diag::warn_cxx20_keyword)
7757330f729Sjoerg ;
7767330f729Sjoerg
7777330f729Sjoerg llvm_unreachable(
7787330f729Sjoerg "Keyword not known to come from a newer Standard or proposed Standard");
7797330f729Sjoerg }
7807330f729Sjoerg
updateOutOfDateIdentifier(IdentifierInfo & II) const7817330f729Sjoerg void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
7827330f729Sjoerg assert(II.isOutOfDate() && "not out of date");
7837330f729Sjoerg getExternalSource()->updateOutOfDateIdentifier(II);
7847330f729Sjoerg }
7857330f729Sjoerg
7867330f729Sjoerg /// HandleIdentifier - This callback is invoked when the lexer reads an
7877330f729Sjoerg /// identifier. This callback looks up the identifier in the map and/or
7887330f729Sjoerg /// potentially macro expands it or turns it into a named token (like 'for').
7897330f729Sjoerg ///
7907330f729Sjoerg /// Note that callers of this method are guarded by checking the
7917330f729Sjoerg /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
7927330f729Sjoerg /// IdentifierInfo methods that compute these properties will need to change to
7937330f729Sjoerg /// match.
HandleIdentifier(Token & Identifier)7947330f729Sjoerg bool Preprocessor::HandleIdentifier(Token &Identifier) {
7957330f729Sjoerg assert(Identifier.getIdentifierInfo() &&
7967330f729Sjoerg "Can't handle identifiers without identifier info!");
7977330f729Sjoerg
7987330f729Sjoerg IdentifierInfo &II = *Identifier.getIdentifierInfo();
7997330f729Sjoerg
8007330f729Sjoerg // If the information about this identifier is out of date, update it from
8017330f729Sjoerg // the external source.
8027330f729Sjoerg // We have to treat __VA_ARGS__ in a special way, since it gets
8037330f729Sjoerg // serialized with isPoisoned = true, but our preprocessor may have
8047330f729Sjoerg // unpoisoned it if we're defining a C99 macro.
8057330f729Sjoerg if (II.isOutOfDate()) {
8067330f729Sjoerg bool CurrentIsPoisoned = false;
8077330f729Sjoerg const bool IsSpecialVariadicMacro =
8087330f729Sjoerg &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
8097330f729Sjoerg if (IsSpecialVariadicMacro)
8107330f729Sjoerg CurrentIsPoisoned = II.isPoisoned();
8117330f729Sjoerg
8127330f729Sjoerg updateOutOfDateIdentifier(II);
8137330f729Sjoerg Identifier.setKind(II.getTokenID());
8147330f729Sjoerg
8157330f729Sjoerg if (IsSpecialVariadicMacro)
8167330f729Sjoerg II.setIsPoisoned(CurrentIsPoisoned);
8177330f729Sjoerg }
8187330f729Sjoerg
8197330f729Sjoerg // If this identifier was poisoned, and if it was not produced from a macro
8207330f729Sjoerg // expansion, emit an error.
8217330f729Sjoerg if (II.isPoisoned() && CurPPLexer) {
8227330f729Sjoerg HandlePoisonedIdentifier(Identifier);
8237330f729Sjoerg }
8247330f729Sjoerg
8257330f729Sjoerg // If this is a macro to be expanded, do it.
8267330f729Sjoerg if (MacroDefinition MD = getMacroDefinition(&II)) {
8277330f729Sjoerg auto *MI = MD.getMacroInfo();
8287330f729Sjoerg assert(MI && "macro definition with no macro info?");
8297330f729Sjoerg if (!DisableMacroExpansion) {
8307330f729Sjoerg if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
8317330f729Sjoerg // C99 6.10.3p10: If the preprocessing token immediately after the
8327330f729Sjoerg // macro name isn't a '(', this macro should not be expanded.
8337330f729Sjoerg if (!MI->isFunctionLike() || isNextPPTokenLParen())
8347330f729Sjoerg return HandleMacroExpandedIdentifier(Identifier, MD);
8357330f729Sjoerg } else {
8367330f729Sjoerg // C99 6.10.3.4p2 says that a disabled macro may never again be
8377330f729Sjoerg // expanded, even if it's in a context where it could be expanded in the
8387330f729Sjoerg // future.
8397330f729Sjoerg Identifier.setFlag(Token::DisableExpand);
8407330f729Sjoerg if (MI->isObjectLike() || isNextPPTokenLParen())
8417330f729Sjoerg Diag(Identifier, diag::pp_disabled_macro_expansion);
8427330f729Sjoerg }
8437330f729Sjoerg }
8447330f729Sjoerg }
8457330f729Sjoerg
8467330f729Sjoerg // If this identifier is a keyword in a newer Standard or proposed Standard,
8477330f729Sjoerg // produce a warning. Don't warn if we're not considering macro expansion,
8487330f729Sjoerg // since this identifier might be the name of a macro.
8497330f729Sjoerg // FIXME: This warning is disabled in cases where it shouldn't be, like
8507330f729Sjoerg // "#define constexpr constexpr", "int constexpr;"
8517330f729Sjoerg if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
8527330f729Sjoerg Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
8537330f729Sjoerg << II.getName();
8547330f729Sjoerg // Don't diagnose this keyword again in this translation unit.
8557330f729Sjoerg II.setIsFutureCompatKeyword(false);
8567330f729Sjoerg }
8577330f729Sjoerg
8587330f729Sjoerg // If this is an extension token, diagnose its use.
8597330f729Sjoerg // We avoid diagnosing tokens that originate from macro definitions.
8607330f729Sjoerg // FIXME: This warning is disabled in cases where it shouldn't be,
8617330f729Sjoerg // like "#define TY typeof", "TY(1) x".
8627330f729Sjoerg if (II.isExtensionToken() && !DisableMacroExpansion)
8637330f729Sjoerg Diag(Identifier, diag::ext_token_used);
8647330f729Sjoerg
8657330f729Sjoerg // If this is the 'import' contextual keyword following an '@', note
8667330f729Sjoerg // that the next token indicates a module name.
8677330f729Sjoerg //
8687330f729Sjoerg // Note that we do not treat 'import' as a contextual
8697330f729Sjoerg // keyword when we're in a caching lexer, because caching lexers only get
8707330f729Sjoerg // used in contexts where import declarations are disallowed.
8717330f729Sjoerg //
8727330f729Sjoerg // Likewise if this is the C++ Modules TS import keyword.
8737330f729Sjoerg if (((LastTokenWasAt && II.isModulesImport()) ||
8747330f729Sjoerg Identifier.is(tok::kw_import)) &&
8757330f729Sjoerg !InMacroArgs && !DisableMacroExpansion &&
8767330f729Sjoerg (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
8777330f729Sjoerg CurLexerKind != CLK_CachingLexer) {
8787330f729Sjoerg ModuleImportLoc = Identifier.getLocation();
8797330f729Sjoerg ModuleImportPath.clear();
8807330f729Sjoerg ModuleImportExpectsIdentifier = true;
8817330f729Sjoerg CurLexerKind = CLK_LexAfterModuleImport;
8827330f729Sjoerg }
8837330f729Sjoerg return true;
8847330f729Sjoerg }
8857330f729Sjoerg
Lex(Token & Result)8867330f729Sjoerg void Preprocessor::Lex(Token &Result) {
8877330f729Sjoerg ++LexLevel;
8887330f729Sjoerg
8897330f729Sjoerg // We loop here until a lex function returns a token; this avoids recursion.
8907330f729Sjoerg bool ReturnedToken;
8917330f729Sjoerg do {
8927330f729Sjoerg switch (CurLexerKind) {
8937330f729Sjoerg case CLK_Lexer:
8947330f729Sjoerg ReturnedToken = CurLexer->Lex(Result);
8957330f729Sjoerg break;
8967330f729Sjoerg case CLK_TokenLexer:
8977330f729Sjoerg ReturnedToken = CurTokenLexer->Lex(Result);
8987330f729Sjoerg break;
8997330f729Sjoerg case CLK_CachingLexer:
9007330f729Sjoerg CachingLex(Result);
9017330f729Sjoerg ReturnedToken = true;
9027330f729Sjoerg break;
9037330f729Sjoerg case CLK_LexAfterModuleImport:
9047330f729Sjoerg ReturnedToken = LexAfterModuleImport(Result);
9057330f729Sjoerg break;
9067330f729Sjoerg }
9077330f729Sjoerg } while (!ReturnedToken);
9087330f729Sjoerg
909*e038c9c4Sjoerg if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
910*e038c9c4Sjoerg return;
911*e038c9c4Sjoerg
9127330f729Sjoerg if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
9137330f729Sjoerg // Remember the identifier before code completion token.
9147330f729Sjoerg setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
9157330f729Sjoerg setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
9167330f729Sjoerg // Set IdenfitierInfo to null to avoid confusing code that handles both
9177330f729Sjoerg // identifiers and completion tokens.
9187330f729Sjoerg Result.setIdentifierInfo(nullptr);
9197330f729Sjoerg }
9207330f729Sjoerg
9217330f729Sjoerg // Update ImportSeqState to track our position within a C++20 import-seq
9227330f729Sjoerg // if this token is being produced as a result of phase 4 of translation.
9237330f729Sjoerg if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
9247330f729Sjoerg !Result.getFlag(Token::IsReinjected)) {
9257330f729Sjoerg switch (Result.getKind()) {
9267330f729Sjoerg case tok::l_paren: case tok::l_square: case tok::l_brace:
9277330f729Sjoerg ImportSeqState.handleOpenBracket();
9287330f729Sjoerg break;
9297330f729Sjoerg case tok::r_paren: case tok::r_square:
9307330f729Sjoerg ImportSeqState.handleCloseBracket();
9317330f729Sjoerg break;
9327330f729Sjoerg case tok::r_brace:
9337330f729Sjoerg ImportSeqState.handleCloseBrace();
9347330f729Sjoerg break;
9357330f729Sjoerg case tok::semi:
9367330f729Sjoerg ImportSeqState.handleSemi();
9377330f729Sjoerg break;
9387330f729Sjoerg case tok::header_name:
9397330f729Sjoerg case tok::annot_header_unit:
9407330f729Sjoerg ImportSeqState.handleHeaderName();
9417330f729Sjoerg break;
9427330f729Sjoerg case tok::kw_export:
9437330f729Sjoerg ImportSeqState.handleExport();
9447330f729Sjoerg break;
9457330f729Sjoerg case tok::identifier:
9467330f729Sjoerg if (Result.getIdentifierInfo()->isModulesImport()) {
9477330f729Sjoerg ImportSeqState.handleImport();
9487330f729Sjoerg if (ImportSeqState.afterImportSeq()) {
9497330f729Sjoerg ModuleImportLoc = Result.getLocation();
9507330f729Sjoerg ModuleImportPath.clear();
9517330f729Sjoerg ModuleImportExpectsIdentifier = true;
9527330f729Sjoerg CurLexerKind = CLK_LexAfterModuleImport;
9537330f729Sjoerg }
9547330f729Sjoerg break;
9557330f729Sjoerg }
9567330f729Sjoerg LLVM_FALLTHROUGH;
9577330f729Sjoerg default:
9587330f729Sjoerg ImportSeqState.handleMisc();
9597330f729Sjoerg break;
9607330f729Sjoerg }
9617330f729Sjoerg }
9627330f729Sjoerg
9637330f729Sjoerg LastTokenWasAt = Result.is(tok::at);
9647330f729Sjoerg --LexLevel;
965*e038c9c4Sjoerg
966*e038c9c4Sjoerg if ((LexLevel == 0 || PreprocessToken) &&
967*e038c9c4Sjoerg !Result.getFlag(Token::IsReinjected)) {
968*e038c9c4Sjoerg if (LexLevel == 0)
969*e038c9c4Sjoerg ++TokenCount;
970*e038c9c4Sjoerg if (OnToken)
9717330f729Sjoerg OnToken(Result);
9727330f729Sjoerg }
973*e038c9c4Sjoerg }
9747330f729Sjoerg
9757330f729Sjoerg /// Lex a header-name token (including one formed from header-name-tokens if
9767330f729Sjoerg /// \p AllowConcatenation is \c true).
9777330f729Sjoerg ///
9787330f729Sjoerg /// \param FilenameTok Filled in with the next token. On success, this will
9797330f729Sjoerg /// be either a header_name token. On failure, it will be whatever other
9807330f729Sjoerg /// token was found instead.
9817330f729Sjoerg /// \param AllowMacroExpansion If \c true, allow the header name to be formed
9827330f729Sjoerg /// by macro expansion (concatenating tokens as necessary if the first
9837330f729Sjoerg /// token is a '<').
9847330f729Sjoerg /// \return \c true if we reached EOD or EOF while looking for a > token in
9857330f729Sjoerg /// a concatenated header name and diagnosed it. \c false otherwise.
LexHeaderName(Token & FilenameTok,bool AllowMacroExpansion)9867330f729Sjoerg bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
9877330f729Sjoerg // Lex using header-name tokenization rules if tokens are being lexed from
9887330f729Sjoerg // a file. Just grab a token normally if we're in a macro expansion.
9897330f729Sjoerg if (CurPPLexer)
9907330f729Sjoerg CurPPLexer->LexIncludeFilename(FilenameTok);
9917330f729Sjoerg else
9927330f729Sjoerg Lex(FilenameTok);
9937330f729Sjoerg
9947330f729Sjoerg // This could be a <foo/bar.h> file coming from a macro expansion. In this
9957330f729Sjoerg // case, glue the tokens together into an angle_string_literal token.
9967330f729Sjoerg SmallString<128> FilenameBuffer;
9977330f729Sjoerg if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
9987330f729Sjoerg bool StartOfLine = FilenameTok.isAtStartOfLine();
9997330f729Sjoerg bool LeadingSpace = FilenameTok.hasLeadingSpace();
10007330f729Sjoerg bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
10017330f729Sjoerg
10027330f729Sjoerg SourceLocation Start = FilenameTok.getLocation();
10037330f729Sjoerg SourceLocation End;
10047330f729Sjoerg FilenameBuffer.push_back('<');
10057330f729Sjoerg
10067330f729Sjoerg // Consume tokens until we find a '>'.
10077330f729Sjoerg // FIXME: A header-name could be formed starting or ending with an
10087330f729Sjoerg // alternative token. It's not clear whether that's ill-formed in all
10097330f729Sjoerg // cases.
10107330f729Sjoerg while (FilenameTok.isNot(tok::greater)) {
10117330f729Sjoerg Lex(FilenameTok);
10127330f729Sjoerg if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
10137330f729Sjoerg Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
10147330f729Sjoerg Diag(Start, diag::note_matching) << tok::less;
10157330f729Sjoerg return true;
10167330f729Sjoerg }
10177330f729Sjoerg
10187330f729Sjoerg End = FilenameTok.getLocation();
10197330f729Sjoerg
10207330f729Sjoerg // FIXME: Provide code completion for #includes.
10217330f729Sjoerg if (FilenameTok.is(tok::code_completion)) {
10227330f729Sjoerg setCodeCompletionReached();
10237330f729Sjoerg Lex(FilenameTok);
10247330f729Sjoerg continue;
10257330f729Sjoerg }
10267330f729Sjoerg
10277330f729Sjoerg // Append the spelling of this token to the buffer. If there was a space
10287330f729Sjoerg // before it, add it now.
10297330f729Sjoerg if (FilenameTok.hasLeadingSpace())
10307330f729Sjoerg FilenameBuffer.push_back(' ');
10317330f729Sjoerg
10327330f729Sjoerg // Get the spelling of the token, directly into FilenameBuffer if
10337330f729Sjoerg // possible.
10347330f729Sjoerg size_t PreAppendSize = FilenameBuffer.size();
10357330f729Sjoerg FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
10367330f729Sjoerg
10377330f729Sjoerg const char *BufPtr = &FilenameBuffer[PreAppendSize];
10387330f729Sjoerg unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
10397330f729Sjoerg
10407330f729Sjoerg // If the token was spelled somewhere else, copy it into FilenameBuffer.
10417330f729Sjoerg if (BufPtr != &FilenameBuffer[PreAppendSize])
10427330f729Sjoerg memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
10437330f729Sjoerg
10447330f729Sjoerg // Resize FilenameBuffer to the correct size.
10457330f729Sjoerg if (FilenameTok.getLength() != ActualLen)
10467330f729Sjoerg FilenameBuffer.resize(PreAppendSize + ActualLen);
10477330f729Sjoerg }
10487330f729Sjoerg
10497330f729Sjoerg FilenameTok.startToken();
10507330f729Sjoerg FilenameTok.setKind(tok::header_name);
10517330f729Sjoerg FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
10527330f729Sjoerg FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
10537330f729Sjoerg FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
10547330f729Sjoerg CreateString(FilenameBuffer, FilenameTok, Start, End);
10557330f729Sjoerg } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
10567330f729Sjoerg // Convert a string-literal token of the form " h-char-sequence "
10577330f729Sjoerg // (produced by macro expansion) into a header-name token.
10587330f729Sjoerg //
10597330f729Sjoerg // The rules for header-names don't quite match the rules for
10607330f729Sjoerg // string-literals, but all the places where they differ result in
10617330f729Sjoerg // undefined behavior, so we can and do treat them the same.
10627330f729Sjoerg //
10637330f729Sjoerg // A string-literal with a prefix or suffix is not translated into a
10647330f729Sjoerg // header-name. This could theoretically be observable via the C++20
10657330f729Sjoerg // context-sensitive header-name formation rules.
10667330f729Sjoerg StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
10677330f729Sjoerg if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
10687330f729Sjoerg FilenameTok.setKind(tok::header_name);
10697330f729Sjoerg }
10707330f729Sjoerg
10717330f729Sjoerg return false;
10727330f729Sjoerg }
10737330f729Sjoerg
10747330f729Sjoerg /// Collect the tokens of a C++20 pp-import-suffix.
CollectPpImportSuffix(SmallVectorImpl<Token> & Toks)10757330f729Sjoerg void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
10767330f729Sjoerg // FIXME: For error recovery, consider recognizing attribute syntax here
10777330f729Sjoerg // and terminating / diagnosing a missing semicolon if we find anything
10787330f729Sjoerg // else? (Can we leave that to the parser?)
10797330f729Sjoerg unsigned BracketDepth = 0;
10807330f729Sjoerg while (true) {
10817330f729Sjoerg Toks.emplace_back();
10827330f729Sjoerg Lex(Toks.back());
10837330f729Sjoerg
10847330f729Sjoerg switch (Toks.back().getKind()) {
10857330f729Sjoerg case tok::l_paren: case tok::l_square: case tok::l_brace:
10867330f729Sjoerg ++BracketDepth;
10877330f729Sjoerg break;
10887330f729Sjoerg
10897330f729Sjoerg case tok::r_paren: case tok::r_square: case tok::r_brace:
10907330f729Sjoerg if (BracketDepth == 0)
10917330f729Sjoerg return;
10927330f729Sjoerg --BracketDepth;
10937330f729Sjoerg break;
10947330f729Sjoerg
10957330f729Sjoerg case tok::semi:
10967330f729Sjoerg if (BracketDepth == 0)
10977330f729Sjoerg return;
10987330f729Sjoerg break;
10997330f729Sjoerg
11007330f729Sjoerg case tok::eof:
11017330f729Sjoerg return;
11027330f729Sjoerg
11037330f729Sjoerg default:
11047330f729Sjoerg break;
11057330f729Sjoerg }
11067330f729Sjoerg }
11077330f729Sjoerg }
11087330f729Sjoerg
11097330f729Sjoerg
11107330f729Sjoerg /// Lex a token following the 'import' contextual keyword.
11117330f729Sjoerg ///
11127330f729Sjoerg /// pp-import: [C++20]
11137330f729Sjoerg /// import header-name pp-import-suffix[opt] ;
11147330f729Sjoerg /// import header-name-tokens pp-import-suffix[opt] ;
11157330f729Sjoerg /// [ObjC] @ import module-name ;
11167330f729Sjoerg /// [Clang] import module-name ;
11177330f729Sjoerg ///
11187330f729Sjoerg /// header-name-tokens:
11197330f729Sjoerg /// string-literal
11207330f729Sjoerg /// < [any sequence of preprocessing-tokens other than >] >
11217330f729Sjoerg ///
11227330f729Sjoerg /// module-name:
11237330f729Sjoerg /// module-name-qualifier[opt] identifier
11247330f729Sjoerg ///
11257330f729Sjoerg /// module-name-qualifier
11267330f729Sjoerg /// module-name-qualifier[opt] identifier .
11277330f729Sjoerg ///
11287330f729Sjoerg /// We respond to a pp-import by importing macros from the named module.
LexAfterModuleImport(Token & Result)11297330f729Sjoerg bool Preprocessor::LexAfterModuleImport(Token &Result) {
11307330f729Sjoerg // Figure out what kind of lexer we actually have.
11317330f729Sjoerg recomputeCurLexerKind();
11327330f729Sjoerg
11337330f729Sjoerg // Lex the next token. The header-name lexing rules are used at the start of
11347330f729Sjoerg // a pp-import.
11357330f729Sjoerg //
11367330f729Sjoerg // For now, we only support header-name imports in C++20 mode.
11377330f729Sjoerg // FIXME: Should we allow this in all language modes that support an import
11387330f729Sjoerg // declaration as an extension?
11397330f729Sjoerg if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
11407330f729Sjoerg if (LexHeaderName(Result))
11417330f729Sjoerg return true;
11427330f729Sjoerg } else {
11437330f729Sjoerg Lex(Result);
11447330f729Sjoerg }
11457330f729Sjoerg
11467330f729Sjoerg // Allocate a holding buffer for a sequence of tokens and introduce it into
11477330f729Sjoerg // the token stream.
11487330f729Sjoerg auto EnterTokens = [this](ArrayRef<Token> Toks) {
11497330f729Sjoerg auto ToksCopy = std::make_unique<Token[]>(Toks.size());
11507330f729Sjoerg std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
11517330f729Sjoerg EnterTokenStream(std::move(ToksCopy), Toks.size(),
11527330f729Sjoerg /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
11537330f729Sjoerg };
11547330f729Sjoerg
11557330f729Sjoerg // Check for a header-name.
11567330f729Sjoerg SmallVector<Token, 32> Suffix;
11577330f729Sjoerg if (Result.is(tok::header_name)) {
11587330f729Sjoerg // Enter the header-name token into the token stream; a Lex action cannot
11597330f729Sjoerg // both return a token and cache tokens (doing so would corrupt the token
11607330f729Sjoerg // cache if the call to Lex comes from CachingLex / PeekAhead).
11617330f729Sjoerg Suffix.push_back(Result);
11627330f729Sjoerg
11637330f729Sjoerg // Consume the pp-import-suffix and expand any macros in it now. We'll add
11647330f729Sjoerg // it back into the token stream later.
11657330f729Sjoerg CollectPpImportSuffix(Suffix);
11667330f729Sjoerg if (Suffix.back().isNot(tok::semi)) {
11677330f729Sjoerg // This is not a pp-import after all.
11687330f729Sjoerg EnterTokens(Suffix);
11697330f729Sjoerg return false;
11707330f729Sjoerg }
11717330f729Sjoerg
11727330f729Sjoerg // C++2a [cpp.module]p1:
11737330f729Sjoerg // The ';' preprocessing-token terminating a pp-import shall not have
11747330f729Sjoerg // been produced by macro replacement.
11757330f729Sjoerg SourceLocation SemiLoc = Suffix.back().getLocation();
11767330f729Sjoerg if (SemiLoc.isMacroID())
11777330f729Sjoerg Diag(SemiLoc, diag::err_header_import_semi_in_macro);
11787330f729Sjoerg
11797330f729Sjoerg // Reconstitute the import token.
11807330f729Sjoerg Token ImportTok;
11817330f729Sjoerg ImportTok.startToken();
11827330f729Sjoerg ImportTok.setKind(tok::kw_import);
11837330f729Sjoerg ImportTok.setLocation(ModuleImportLoc);
11847330f729Sjoerg ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
11857330f729Sjoerg ImportTok.setLength(6);
11867330f729Sjoerg
11877330f729Sjoerg auto Action = HandleHeaderIncludeOrImport(
11887330f729Sjoerg /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
11897330f729Sjoerg switch (Action.Kind) {
11907330f729Sjoerg case ImportAction::None:
11917330f729Sjoerg break;
11927330f729Sjoerg
11937330f729Sjoerg case ImportAction::ModuleBegin:
11947330f729Sjoerg // Let the parser know we're textually entering the module.
11957330f729Sjoerg Suffix.emplace_back();
11967330f729Sjoerg Suffix.back().startToken();
11977330f729Sjoerg Suffix.back().setKind(tok::annot_module_begin);
11987330f729Sjoerg Suffix.back().setLocation(SemiLoc);
11997330f729Sjoerg Suffix.back().setAnnotationEndLoc(SemiLoc);
12007330f729Sjoerg Suffix.back().setAnnotationValue(Action.ModuleForHeader);
12017330f729Sjoerg LLVM_FALLTHROUGH;
12027330f729Sjoerg
12037330f729Sjoerg case ImportAction::ModuleImport:
12047330f729Sjoerg case ImportAction::SkippedModuleImport:
12057330f729Sjoerg // We chose to import (or textually enter) the file. Convert the
12067330f729Sjoerg // header-name token into a header unit annotation token.
12077330f729Sjoerg Suffix[0].setKind(tok::annot_header_unit);
12087330f729Sjoerg Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
12097330f729Sjoerg Suffix[0].setAnnotationValue(Action.ModuleForHeader);
12107330f729Sjoerg // FIXME: Call the moduleImport callback?
12117330f729Sjoerg break;
1212*e038c9c4Sjoerg case ImportAction::Failure:
1213*e038c9c4Sjoerg assert(TheModuleLoader.HadFatalFailure &&
1214*e038c9c4Sjoerg "This should be an early exit only to a fatal error");
1215*e038c9c4Sjoerg Result.setKind(tok::eof);
1216*e038c9c4Sjoerg CurLexer->cutOffLexing();
1217*e038c9c4Sjoerg EnterTokens(Suffix);
1218*e038c9c4Sjoerg return true;
12197330f729Sjoerg }
12207330f729Sjoerg
12217330f729Sjoerg EnterTokens(Suffix);
12227330f729Sjoerg return false;
12237330f729Sjoerg }
12247330f729Sjoerg
12257330f729Sjoerg // The token sequence
12267330f729Sjoerg //
12277330f729Sjoerg // import identifier (. identifier)*
12287330f729Sjoerg //
12297330f729Sjoerg // indicates a module import directive. We already saw the 'import'
12307330f729Sjoerg // contextual keyword, so now we're looking for the identifiers.
12317330f729Sjoerg if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
12327330f729Sjoerg // We expected to see an identifier here, and we did; continue handling
12337330f729Sjoerg // identifiers.
12347330f729Sjoerg ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
12357330f729Sjoerg Result.getLocation()));
12367330f729Sjoerg ModuleImportExpectsIdentifier = false;
12377330f729Sjoerg CurLexerKind = CLK_LexAfterModuleImport;
12387330f729Sjoerg return true;
12397330f729Sjoerg }
12407330f729Sjoerg
12417330f729Sjoerg // If we're expecting a '.' or a ';', and we got a '.', then wait until we
12427330f729Sjoerg // see the next identifier. (We can also see a '[[' that begins an
12437330f729Sjoerg // attribute-specifier-seq here under the C++ Modules TS.)
12447330f729Sjoerg if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
12457330f729Sjoerg ModuleImportExpectsIdentifier = true;
12467330f729Sjoerg CurLexerKind = CLK_LexAfterModuleImport;
12477330f729Sjoerg return true;
12487330f729Sjoerg }
12497330f729Sjoerg
12507330f729Sjoerg // If we didn't recognize a module name at all, this is not a (valid) import.
12517330f729Sjoerg if (ModuleImportPath.empty() || Result.is(tok::eof))
12527330f729Sjoerg return true;
12537330f729Sjoerg
12547330f729Sjoerg // Consume the pp-import-suffix and expand any macros in it now, if we're not
12557330f729Sjoerg // at the semicolon already.
12567330f729Sjoerg SourceLocation SemiLoc = Result.getLocation();
12577330f729Sjoerg if (Result.isNot(tok::semi)) {
12587330f729Sjoerg Suffix.push_back(Result);
12597330f729Sjoerg CollectPpImportSuffix(Suffix);
12607330f729Sjoerg if (Suffix.back().isNot(tok::semi)) {
12617330f729Sjoerg // This is not an import after all.
12627330f729Sjoerg EnterTokens(Suffix);
12637330f729Sjoerg return false;
12647330f729Sjoerg }
12657330f729Sjoerg SemiLoc = Suffix.back().getLocation();
12667330f729Sjoerg }
12677330f729Sjoerg
12687330f729Sjoerg // Under the Modules TS, the dot is just part of the module name, and not
12697330f729Sjoerg // a real hierarchy separator. Flatten such module names now.
12707330f729Sjoerg //
12717330f729Sjoerg // FIXME: Is this the right level to be performing this transformation?
12727330f729Sjoerg std::string FlatModuleName;
12737330f729Sjoerg if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
12747330f729Sjoerg for (auto &Piece : ModuleImportPath) {
12757330f729Sjoerg if (!FlatModuleName.empty())
12767330f729Sjoerg FlatModuleName += ".";
12777330f729Sjoerg FlatModuleName += Piece.first->getName();
12787330f729Sjoerg }
12797330f729Sjoerg SourceLocation FirstPathLoc = ModuleImportPath[0].second;
12807330f729Sjoerg ModuleImportPath.clear();
12817330f729Sjoerg ModuleImportPath.push_back(
12827330f729Sjoerg std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
12837330f729Sjoerg }
12847330f729Sjoerg
12857330f729Sjoerg Module *Imported = nullptr;
12867330f729Sjoerg if (getLangOpts().Modules) {
12877330f729Sjoerg Imported = TheModuleLoader.loadModule(ModuleImportLoc,
12887330f729Sjoerg ModuleImportPath,
12897330f729Sjoerg Module::Hidden,
12907330f729Sjoerg /*IsInclusionDirective=*/false);
12917330f729Sjoerg if (Imported)
12927330f729Sjoerg makeModuleVisible(Imported, SemiLoc);
12937330f729Sjoerg }
12947330f729Sjoerg if (Callbacks)
12957330f729Sjoerg Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
12967330f729Sjoerg
12977330f729Sjoerg if (!Suffix.empty()) {
12987330f729Sjoerg EnterTokens(Suffix);
12997330f729Sjoerg return false;
13007330f729Sjoerg }
13017330f729Sjoerg return true;
13027330f729Sjoerg }
13037330f729Sjoerg
makeModuleVisible(Module * M,SourceLocation Loc)13047330f729Sjoerg void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
13057330f729Sjoerg CurSubmoduleState->VisibleModules.setVisible(
13067330f729Sjoerg M, Loc, [](Module *) {},
13077330f729Sjoerg [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
13087330f729Sjoerg // FIXME: Include the path in the diagnostic.
13097330f729Sjoerg // FIXME: Include the import location for the conflicting module.
13107330f729Sjoerg Diag(ModuleImportLoc, diag::warn_module_conflict)
13117330f729Sjoerg << Path[0]->getFullModuleName()
13127330f729Sjoerg << Conflict->getFullModuleName()
13137330f729Sjoerg << Message;
13147330f729Sjoerg });
13157330f729Sjoerg
13167330f729Sjoerg // Add this module to the imports list of the currently-built submodule.
13177330f729Sjoerg if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
13187330f729Sjoerg BuildingSubmoduleStack.back().M->Imports.insert(M);
13197330f729Sjoerg }
13207330f729Sjoerg
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)13217330f729Sjoerg bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
13227330f729Sjoerg const char *DiagnosticTag,
13237330f729Sjoerg bool AllowMacroExpansion) {
13247330f729Sjoerg // We need at least one string literal.
13257330f729Sjoerg if (Result.isNot(tok::string_literal)) {
13267330f729Sjoerg Diag(Result, diag::err_expected_string_literal)
13277330f729Sjoerg << /*Source='in...'*/0 << DiagnosticTag;
13287330f729Sjoerg return false;
13297330f729Sjoerg }
13307330f729Sjoerg
13317330f729Sjoerg // Lex string literal tokens, optionally with macro expansion.
13327330f729Sjoerg SmallVector<Token, 4> StrToks;
13337330f729Sjoerg do {
13347330f729Sjoerg StrToks.push_back(Result);
13357330f729Sjoerg
13367330f729Sjoerg if (Result.hasUDSuffix())
13377330f729Sjoerg Diag(Result, diag::err_invalid_string_udl);
13387330f729Sjoerg
13397330f729Sjoerg if (AllowMacroExpansion)
13407330f729Sjoerg Lex(Result);
13417330f729Sjoerg else
13427330f729Sjoerg LexUnexpandedToken(Result);
13437330f729Sjoerg } while (Result.is(tok::string_literal));
13447330f729Sjoerg
13457330f729Sjoerg // Concatenate and parse the strings.
13467330f729Sjoerg StringLiteralParser Literal(StrToks, *this);
13477330f729Sjoerg assert(Literal.isAscii() && "Didn't allow wide strings in");
13487330f729Sjoerg
13497330f729Sjoerg if (Literal.hadError)
13507330f729Sjoerg return false;
13517330f729Sjoerg
13527330f729Sjoerg if (Literal.Pascal) {
13537330f729Sjoerg Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
13547330f729Sjoerg << /*Source='in...'*/0 << DiagnosticTag;
13557330f729Sjoerg return false;
13567330f729Sjoerg }
13577330f729Sjoerg
1358*e038c9c4Sjoerg String = std::string(Literal.GetString());
13597330f729Sjoerg return true;
13607330f729Sjoerg }
13617330f729Sjoerg
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)13627330f729Sjoerg bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
13637330f729Sjoerg assert(Tok.is(tok::numeric_constant));
13647330f729Sjoerg SmallString<8> IntegerBuffer;
13657330f729Sjoerg bool NumberInvalid = false;
13667330f729Sjoerg StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
13677330f729Sjoerg if (NumberInvalid)
13687330f729Sjoerg return false;
1369*e038c9c4Sjoerg NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1370*e038c9c4Sjoerg getLangOpts(), getTargetInfo(),
1371*e038c9c4Sjoerg getDiagnostics());
13727330f729Sjoerg if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
13737330f729Sjoerg return false;
13747330f729Sjoerg llvm::APInt APVal(64, 0);
13757330f729Sjoerg if (Literal.GetIntegerValue(APVal))
13767330f729Sjoerg return false;
13777330f729Sjoerg Lex(Tok);
13787330f729Sjoerg Value = APVal.getLimitedValue();
13797330f729Sjoerg return true;
13807330f729Sjoerg }
13817330f729Sjoerg
addCommentHandler(CommentHandler * Handler)13827330f729Sjoerg void Preprocessor::addCommentHandler(CommentHandler *Handler) {
13837330f729Sjoerg assert(Handler && "NULL comment handler");
13847330f729Sjoerg assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
13857330f729Sjoerg "Comment handler already registered");
13867330f729Sjoerg CommentHandlers.push_back(Handler);
13877330f729Sjoerg }
13887330f729Sjoerg
removeCommentHandler(CommentHandler * Handler)13897330f729Sjoerg void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
13907330f729Sjoerg std::vector<CommentHandler *>::iterator Pos =
13917330f729Sjoerg llvm::find(CommentHandlers, Handler);
13927330f729Sjoerg assert(Pos != CommentHandlers.end() && "Comment handler not registered");
13937330f729Sjoerg CommentHandlers.erase(Pos);
13947330f729Sjoerg }
13957330f729Sjoerg
HandleComment(Token & result,SourceRange Comment)13967330f729Sjoerg bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
13977330f729Sjoerg bool AnyPendingTokens = false;
13987330f729Sjoerg for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
13997330f729Sjoerg HEnd = CommentHandlers.end();
14007330f729Sjoerg H != HEnd; ++H) {
14017330f729Sjoerg if ((*H)->HandleComment(*this, Comment))
14027330f729Sjoerg AnyPendingTokens = true;
14037330f729Sjoerg }
14047330f729Sjoerg if (!AnyPendingTokens || getCommentRetentionState())
14057330f729Sjoerg return false;
14067330f729Sjoerg Lex(result);
14077330f729Sjoerg return true;
14087330f729Sjoerg }
14097330f729Sjoerg
14107330f729Sjoerg ModuleLoader::~ModuleLoader() = default;
14117330f729Sjoerg
14127330f729Sjoerg CommentHandler::~CommentHandler() = default;
14137330f729Sjoerg
1414*e038c9c4Sjoerg EmptylineHandler::~EmptylineHandler() = default;
1415*e038c9c4Sjoerg
14167330f729Sjoerg CodeCompletionHandler::~CodeCompletionHandler() = default;
14177330f729Sjoerg
createPreprocessingRecord()14187330f729Sjoerg void Preprocessor::createPreprocessingRecord() {
14197330f729Sjoerg if (Record)
14207330f729Sjoerg return;
14217330f729Sjoerg
14227330f729Sjoerg Record = new PreprocessingRecord(getSourceManager());
14237330f729Sjoerg addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
14247330f729Sjoerg }
1425