1fe6060f1SDimitry Andric //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric 9fe6060f1SDimitry Andric #include "clang/Analysis/MacroExpansionContext.h" 10fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 11bdd1243dSDimitry Andric #include <optional> 12fe6060f1SDimitry Andric 13fe6060f1SDimitry Andric #define DEBUG_TYPE "macro-expansion-context" 14fe6060f1SDimitry Andric 15*0fca6ea1SDimitry Andric static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, 16fe6060f1SDimitry Andric clang::Token Tok); 17fe6060f1SDimitry Andric 18fe6060f1SDimitry Andric namespace clang { 19fe6060f1SDimitry Andric namespace detail { 20fe6060f1SDimitry Andric class MacroExpansionRangeRecorder : public PPCallbacks { 21fe6060f1SDimitry Andric const Preprocessor &PP; 22fe6060f1SDimitry Andric SourceManager &SM; 23fe6060f1SDimitry Andric MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; 24fe6060f1SDimitry Andric 25fe6060f1SDimitry Andric public: 26fe6060f1SDimitry Andric explicit MacroExpansionRangeRecorder( 27fe6060f1SDimitry Andric const Preprocessor &PP, SourceManager &SM, 28fe6060f1SDimitry Andric MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) 29fe6060f1SDimitry Andric : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} 30fe6060f1SDimitry Andric 31fe6060f1SDimitry Andric void MacroExpands(const Token &MacroName, const MacroDefinition &MD, 32fe6060f1SDimitry Andric SourceRange Range, const MacroArgs *Args) override { 33fe6060f1SDimitry Andric // Ignore annotation tokens like: _Pragma("pack(push, 1)") 34fe6060f1SDimitry Andric if (MacroName.getIdentifierInfo()->getName() == "_Pragma") 35fe6060f1SDimitry Andric return; 36fe6060f1SDimitry Andric 37fe6060f1SDimitry Andric SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation()); 38fe6060f1SDimitry Andric assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); 39fe6060f1SDimitry Andric 40fe6060f1SDimitry Andric const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { 41fe6060f1SDimitry Andric // If the range is empty, use the length of the macro. 42fe6060f1SDimitry Andric if (Range.getBegin() == Range.getEnd()) 43fe6060f1SDimitry Andric return SM.getExpansionLoc( 44fe6060f1SDimitry Andric MacroName.getLocation().getLocWithOffset(MacroName.getLength())); 45fe6060f1SDimitry Andric 46fe6060f1SDimitry Andric // Include the last character. 47fe6060f1SDimitry Andric return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1); 48fe6060f1SDimitry Andric }(); 49fe6060f1SDimitry Andric 50fe6060f1SDimitry Andric (void)PP; 51fe6060f1SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '"; 52fe6060f1SDimitry Andric dumpTokenInto(PP, llvm::dbgs(), MacroName); 53fe6060f1SDimitry Andric llvm::dbgs() 54fe6060f1SDimitry Andric << "' with length " << MacroName.getLength() << " at "; 55fe6060f1SDimitry Andric MacroNameBegin.print(llvm::dbgs(), SM); 56fe6060f1SDimitry Andric llvm::dbgs() << ", expansion end at "; 57fe6060f1SDimitry Andric ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); 58fe6060f1SDimitry Andric 59fe6060f1SDimitry Andric // If the expansion range is empty, use the identifier of the macro as a 60fe6060f1SDimitry Andric // range. 61fe6060f1SDimitry Andric MacroExpansionContext::ExpansionRangeMap::iterator It; 62fe6060f1SDimitry Andric bool Inserted; 63fe6060f1SDimitry Andric std::tie(It, Inserted) = 64fe6060f1SDimitry Andric ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd); 65fe6060f1SDimitry Andric if (Inserted) { 66fe6060f1SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "maps "; 67fe6060f1SDimitry Andric It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to "; 68fe6060f1SDimitry Andric It->getSecond().print(llvm::dbgs(), SM); 69fe6060f1SDimitry Andric llvm::dbgs() << '\n';); 70fe6060f1SDimitry Andric } else { 71fe6060f1SDimitry Andric if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) { 72fe6060f1SDimitry Andric It->getSecond() = ExpansionEnd; 73fe6060f1SDimitry Andric LLVM_DEBUG( 74fe6060f1SDimitry Andric llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM); 75fe6060f1SDimitry Andric llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM); 76fe6060f1SDimitry Andric llvm::dbgs() << '\n';); 77fe6060f1SDimitry Andric } 78fe6060f1SDimitry Andric } 79fe6060f1SDimitry Andric } 80fe6060f1SDimitry Andric }; 81fe6060f1SDimitry Andric } // namespace detail 82fe6060f1SDimitry Andric } // namespace clang 83fe6060f1SDimitry Andric 84fe6060f1SDimitry Andric using namespace clang; 85fe6060f1SDimitry Andric 86fe6060f1SDimitry Andric MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) 87fe6060f1SDimitry Andric : LangOpts(LangOpts) {} 88fe6060f1SDimitry Andric 89fe6060f1SDimitry Andric void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { 90fe6060f1SDimitry Andric PP = &NewPP; 91fe6060f1SDimitry Andric SM = &NewPP.getSourceManager(); 92fe6060f1SDimitry Andric 93fe6060f1SDimitry Andric // Make sure that the Preprocessor does not outlive the MacroExpansionContext. 94fe6060f1SDimitry Andric PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>( 95fe6060f1SDimitry Andric *PP, *SM, ExpansionRanges)); 96fe6060f1SDimitry Andric // Same applies here. 97fe6060f1SDimitry Andric PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); 98fe6060f1SDimitry Andric } 99fe6060f1SDimitry Andric 100bdd1243dSDimitry Andric std::optional<StringRef> 101fe6060f1SDimitry Andric MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { 102fe6060f1SDimitry Andric if (MacroExpansionLoc.isMacroID()) 103bdd1243dSDimitry Andric return std::nullopt; 104fe6060f1SDimitry Andric 105bdd1243dSDimitry Andric // If there was no macro expansion at that location, return std::nullopt. 106fe6060f1SDimitry Andric if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end()) 107bdd1243dSDimitry Andric return std::nullopt; 108fe6060f1SDimitry Andric 109fe6060f1SDimitry Andric // There was macro expansion, but resulted in no tokens, return empty string. 110fe6060f1SDimitry Andric const auto It = ExpandedTokens.find_as(MacroExpansionLoc); 111fe6060f1SDimitry Andric if (It == ExpandedTokens.end()) 112fe6060f1SDimitry Andric return StringRef{""}; 113fe6060f1SDimitry Andric 114fe6060f1SDimitry Andric // Otherwise we have the actual token sequence as string. 115fe6060f1SDimitry Andric return It->getSecond().str(); 116fe6060f1SDimitry Andric } 117fe6060f1SDimitry Andric 118bdd1243dSDimitry Andric std::optional<StringRef> 119fe6060f1SDimitry Andric MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { 120fe6060f1SDimitry Andric if (MacroExpansionLoc.isMacroID()) 121bdd1243dSDimitry Andric return std::nullopt; 122fe6060f1SDimitry Andric 123fe6060f1SDimitry Andric const auto It = ExpansionRanges.find_as(MacroExpansionLoc); 124fe6060f1SDimitry Andric if (It == ExpansionRanges.end()) 125bdd1243dSDimitry Andric return std::nullopt; 126fe6060f1SDimitry Andric 127fe6060f1SDimitry Andric assert(It->getFirst() != It->getSecond() && 128fe6060f1SDimitry Andric "Every macro expansion must cover a non-empty range."); 129fe6060f1SDimitry Andric 130fe6060f1SDimitry Andric return Lexer::getSourceText( 131fe6060f1SDimitry Andric CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM, 132fe6060f1SDimitry Andric LangOpts); 133fe6060f1SDimitry Andric } 134fe6060f1SDimitry Andric 135fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpansionRanges() const { 136fe6060f1SDimitry Andric dumpExpansionRangesToStream(llvm::dbgs()); 137fe6060f1SDimitry Andric } 138fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpandedTexts() const { 139fe6060f1SDimitry Andric dumpExpandedTextsToStream(llvm::dbgs()); 140fe6060f1SDimitry Andric } 141fe6060f1SDimitry Andric 142fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { 143fe6060f1SDimitry Andric std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; 144fe6060f1SDimitry Andric LocalExpansionRanges.reserve(ExpansionRanges.size()); 145fe6060f1SDimitry Andric for (const auto &Record : ExpansionRanges) 146fe6060f1SDimitry Andric LocalExpansionRanges.emplace_back( 147fe6060f1SDimitry Andric std::make_pair(Record.getFirst(), Record.getSecond())); 148fe6060f1SDimitry Andric llvm::sort(LocalExpansionRanges); 149fe6060f1SDimitry Andric 150fe6060f1SDimitry Andric OS << "\n=============== ExpansionRanges ===============\n"; 151fe6060f1SDimitry Andric for (const auto &Record : LocalExpansionRanges) { 152fe6060f1SDimitry Andric OS << "> "; 153fe6060f1SDimitry Andric Record.first.print(OS, *SM); 154fe6060f1SDimitry Andric OS << ", "; 155fe6060f1SDimitry Andric Record.second.print(OS, *SM); 156fe6060f1SDimitry Andric OS << '\n'; 157fe6060f1SDimitry Andric } 158fe6060f1SDimitry Andric } 159fe6060f1SDimitry Andric 160fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { 161fe6060f1SDimitry Andric std::vector<std::pair<SourceLocation, MacroExpansionText>> 162fe6060f1SDimitry Andric LocalExpandedTokens; 163fe6060f1SDimitry Andric LocalExpandedTokens.reserve(ExpandedTokens.size()); 164fe6060f1SDimitry Andric for (const auto &Record : ExpandedTokens) 165fe6060f1SDimitry Andric LocalExpandedTokens.emplace_back( 166fe6060f1SDimitry Andric std::make_pair(Record.getFirst(), Record.getSecond())); 167fe6060f1SDimitry Andric llvm::sort(LocalExpandedTokens); 168fe6060f1SDimitry Andric 169fe6060f1SDimitry Andric OS << "\n=============== ExpandedTokens ===============\n"; 170fe6060f1SDimitry Andric for (const auto &Record : LocalExpandedTokens) { 171fe6060f1SDimitry Andric OS << "> "; 172fe6060f1SDimitry Andric Record.first.print(OS, *SM); 173fe6060f1SDimitry Andric OS << " -> '" << Record.second << "'\n"; 174fe6060f1SDimitry Andric } 175fe6060f1SDimitry Andric } 176fe6060f1SDimitry Andric 177fe6060f1SDimitry Andric static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { 178fe6060f1SDimitry Andric assert(Tok.isNot(tok::raw_identifier)); 179fe6060f1SDimitry Andric 180fe6060f1SDimitry Andric // Ignore annotation tokens like: _Pragma("pack(push, 1)") 181fe6060f1SDimitry Andric if (Tok.isAnnotation()) 182fe6060f1SDimitry Andric return; 183fe6060f1SDimitry Andric 184fe6060f1SDimitry Andric if (IdentifierInfo *II = Tok.getIdentifierInfo()) { 185fe6060f1SDimitry Andric // FIXME: For now, we don't respect whitespaces between macro expanded 186fe6060f1SDimitry Andric // tokens. We just emit a space after every identifier to produce a valid 187fe6060f1SDimitry Andric // code for `int a ;` like expansions. 188fe6060f1SDimitry Andric // ^-^-- Space after the 'int' and 'a' identifiers. 189fe6060f1SDimitry Andric OS << II->getName() << ' '; 190fe6060f1SDimitry Andric } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { 191fe6060f1SDimitry Andric OS << StringRef(Tok.getLiteralData(), Tok.getLength()); 192fe6060f1SDimitry Andric } else { 193fe6060f1SDimitry Andric char Tmp[256]; 194fe6060f1SDimitry Andric if (Tok.getLength() < sizeof(Tmp)) { 195fe6060f1SDimitry Andric const char *TokPtr = Tmp; 196fe6060f1SDimitry Andric // FIXME: Might use a different overload for cleaner callsite. 197fe6060f1SDimitry Andric unsigned Len = PP.getSpelling(Tok, TokPtr); 198fe6060f1SDimitry Andric OS.write(TokPtr, Len); 199fe6060f1SDimitry Andric } else { 200fe6060f1SDimitry Andric OS << "<too long token>"; 201fe6060f1SDimitry Andric } 202fe6060f1SDimitry Andric } 203fe6060f1SDimitry Andric } 204fe6060f1SDimitry Andric 205fe6060f1SDimitry Andric void MacroExpansionContext::onTokenLexed(const Token &Tok) { 206fe6060f1SDimitry Andric SourceLocation SLoc = Tok.getLocation(); 207fe6060f1SDimitry Andric if (SLoc.isFileID()) 208fe6060f1SDimitry Andric return; 209fe6060f1SDimitry Andric 210fe6060f1SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '"; 211fe6060f1SDimitry Andric dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at "; 212fe6060f1SDimitry Andric SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); 213fe6060f1SDimitry Andric 214fe6060f1SDimitry Andric // Remove spelling location. 215fe6060f1SDimitry Andric SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc); 216fe6060f1SDimitry Andric 217fe6060f1SDimitry Andric MacroExpansionText TokenAsString; 218fe6060f1SDimitry Andric llvm::raw_svector_ostream OS(TokenAsString); 219fe6060f1SDimitry Andric 220fe6060f1SDimitry Andric // FIXME: Prepend newlines and space to produce the exact same output as the 221fe6060f1SDimitry Andric // preprocessor would for this token. 222fe6060f1SDimitry Andric 223fe6060f1SDimitry Andric dumpTokenInto(*PP, OS, Tok); 224fe6060f1SDimitry Andric 225fe6060f1SDimitry Andric ExpansionMap::iterator It; 226fe6060f1SDimitry Andric bool Inserted; 227fe6060f1SDimitry Andric std::tie(It, Inserted) = 228fe6060f1SDimitry Andric ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString)); 229fe6060f1SDimitry Andric if (!Inserted) 230fe6060f1SDimitry Andric It->getSecond().append(TokenAsString); 231fe6060f1SDimitry Andric } 232fe6060f1SDimitry Andric 233