1*fe6060f1SDimitry Andric //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric 9*fe6060f1SDimitry Andric #include "clang/Analysis/MacroExpansionContext.h" 10*fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 11*fe6060f1SDimitry Andric 12*fe6060f1SDimitry Andric #define DEBUG_TYPE "macro-expansion-context" 13*fe6060f1SDimitry Andric 14*fe6060f1SDimitry Andric static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, 15*fe6060f1SDimitry Andric clang::Token Tok); 16*fe6060f1SDimitry Andric 17*fe6060f1SDimitry Andric namespace clang { 18*fe6060f1SDimitry Andric namespace detail { 19*fe6060f1SDimitry Andric class MacroExpansionRangeRecorder : public PPCallbacks { 20*fe6060f1SDimitry Andric const Preprocessor &PP; 21*fe6060f1SDimitry Andric SourceManager &SM; 22*fe6060f1SDimitry Andric MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; 23*fe6060f1SDimitry Andric 24*fe6060f1SDimitry Andric public: 25*fe6060f1SDimitry Andric explicit MacroExpansionRangeRecorder( 26*fe6060f1SDimitry Andric const Preprocessor &PP, SourceManager &SM, 27*fe6060f1SDimitry Andric MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) 28*fe6060f1SDimitry Andric : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} 29*fe6060f1SDimitry Andric 30*fe6060f1SDimitry Andric void MacroExpands(const Token &MacroName, const MacroDefinition &MD, 31*fe6060f1SDimitry Andric SourceRange Range, const MacroArgs *Args) override { 32*fe6060f1SDimitry Andric // Ignore annotation tokens like: _Pragma("pack(push, 1)") 33*fe6060f1SDimitry Andric if (MacroName.getIdentifierInfo()->getName() == "_Pragma") 34*fe6060f1SDimitry Andric return; 35*fe6060f1SDimitry Andric 36*fe6060f1SDimitry Andric SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation()); 37*fe6060f1SDimitry Andric assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); 38*fe6060f1SDimitry Andric 39*fe6060f1SDimitry Andric const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { 40*fe6060f1SDimitry Andric // If the range is empty, use the length of the macro. 41*fe6060f1SDimitry Andric if (Range.getBegin() == Range.getEnd()) 42*fe6060f1SDimitry Andric return SM.getExpansionLoc( 43*fe6060f1SDimitry Andric MacroName.getLocation().getLocWithOffset(MacroName.getLength())); 44*fe6060f1SDimitry Andric 45*fe6060f1SDimitry Andric // Include the last character. 46*fe6060f1SDimitry Andric return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1); 47*fe6060f1SDimitry Andric }(); 48*fe6060f1SDimitry Andric 49*fe6060f1SDimitry Andric (void)PP; 50*fe6060f1SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '"; 51*fe6060f1SDimitry Andric dumpTokenInto(PP, llvm::dbgs(), MacroName); 52*fe6060f1SDimitry Andric llvm::dbgs() 53*fe6060f1SDimitry Andric << "' with length " << MacroName.getLength() << " at "; 54*fe6060f1SDimitry Andric MacroNameBegin.print(llvm::dbgs(), SM); 55*fe6060f1SDimitry Andric llvm::dbgs() << ", expansion end at "; 56*fe6060f1SDimitry Andric ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); 57*fe6060f1SDimitry Andric 58*fe6060f1SDimitry Andric // If the expansion range is empty, use the identifier of the macro as a 59*fe6060f1SDimitry Andric // range. 60*fe6060f1SDimitry Andric MacroExpansionContext::ExpansionRangeMap::iterator It; 61*fe6060f1SDimitry Andric bool Inserted; 62*fe6060f1SDimitry Andric std::tie(It, Inserted) = 63*fe6060f1SDimitry Andric ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd); 64*fe6060f1SDimitry Andric if (Inserted) { 65*fe6060f1SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "maps "; 66*fe6060f1SDimitry Andric It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to "; 67*fe6060f1SDimitry Andric It->getSecond().print(llvm::dbgs(), SM); 68*fe6060f1SDimitry Andric llvm::dbgs() << '\n';); 69*fe6060f1SDimitry Andric } else { 70*fe6060f1SDimitry Andric if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) { 71*fe6060f1SDimitry Andric It->getSecond() = ExpansionEnd; 72*fe6060f1SDimitry Andric LLVM_DEBUG( 73*fe6060f1SDimitry Andric llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM); 74*fe6060f1SDimitry Andric llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM); 75*fe6060f1SDimitry Andric llvm::dbgs() << '\n';); 76*fe6060f1SDimitry Andric } 77*fe6060f1SDimitry Andric } 78*fe6060f1SDimitry Andric } 79*fe6060f1SDimitry Andric }; 80*fe6060f1SDimitry Andric } // namespace detail 81*fe6060f1SDimitry Andric } // namespace clang 82*fe6060f1SDimitry Andric 83*fe6060f1SDimitry Andric using namespace clang; 84*fe6060f1SDimitry Andric 85*fe6060f1SDimitry Andric MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) 86*fe6060f1SDimitry Andric : LangOpts(LangOpts) {} 87*fe6060f1SDimitry Andric 88*fe6060f1SDimitry Andric void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { 89*fe6060f1SDimitry Andric PP = &NewPP; 90*fe6060f1SDimitry Andric SM = &NewPP.getSourceManager(); 91*fe6060f1SDimitry Andric 92*fe6060f1SDimitry Andric // Make sure that the Preprocessor does not outlive the MacroExpansionContext. 93*fe6060f1SDimitry Andric PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>( 94*fe6060f1SDimitry Andric *PP, *SM, ExpansionRanges)); 95*fe6060f1SDimitry Andric // Same applies here. 96*fe6060f1SDimitry Andric PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); 97*fe6060f1SDimitry Andric } 98*fe6060f1SDimitry Andric 99*fe6060f1SDimitry Andric Optional<StringRef> 100*fe6060f1SDimitry Andric MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { 101*fe6060f1SDimitry Andric if (MacroExpansionLoc.isMacroID()) 102*fe6060f1SDimitry Andric return llvm::None; 103*fe6060f1SDimitry Andric 104*fe6060f1SDimitry Andric // If there was no macro expansion at that location, return None. 105*fe6060f1SDimitry Andric if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end()) 106*fe6060f1SDimitry Andric return llvm::None; 107*fe6060f1SDimitry Andric 108*fe6060f1SDimitry Andric // There was macro expansion, but resulted in no tokens, return empty string. 109*fe6060f1SDimitry Andric const auto It = ExpandedTokens.find_as(MacroExpansionLoc); 110*fe6060f1SDimitry Andric if (It == ExpandedTokens.end()) 111*fe6060f1SDimitry Andric return StringRef{""}; 112*fe6060f1SDimitry Andric 113*fe6060f1SDimitry Andric // Otherwise we have the actual token sequence as string. 114*fe6060f1SDimitry Andric return It->getSecond().str(); 115*fe6060f1SDimitry Andric } 116*fe6060f1SDimitry Andric 117*fe6060f1SDimitry Andric Optional<StringRef> 118*fe6060f1SDimitry Andric MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { 119*fe6060f1SDimitry Andric if (MacroExpansionLoc.isMacroID()) 120*fe6060f1SDimitry Andric return llvm::None; 121*fe6060f1SDimitry Andric 122*fe6060f1SDimitry Andric const auto It = ExpansionRanges.find_as(MacroExpansionLoc); 123*fe6060f1SDimitry Andric if (It == ExpansionRanges.end()) 124*fe6060f1SDimitry Andric return llvm::None; 125*fe6060f1SDimitry Andric 126*fe6060f1SDimitry Andric assert(It->getFirst() != It->getSecond() && 127*fe6060f1SDimitry Andric "Every macro expansion must cover a non-empty range."); 128*fe6060f1SDimitry Andric 129*fe6060f1SDimitry Andric return Lexer::getSourceText( 130*fe6060f1SDimitry Andric CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM, 131*fe6060f1SDimitry Andric LangOpts); 132*fe6060f1SDimitry Andric } 133*fe6060f1SDimitry Andric 134*fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpansionRanges() const { 135*fe6060f1SDimitry Andric dumpExpansionRangesToStream(llvm::dbgs()); 136*fe6060f1SDimitry Andric } 137*fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpandedTexts() const { 138*fe6060f1SDimitry Andric dumpExpandedTextsToStream(llvm::dbgs()); 139*fe6060f1SDimitry Andric } 140*fe6060f1SDimitry Andric 141*fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { 142*fe6060f1SDimitry Andric std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; 143*fe6060f1SDimitry Andric LocalExpansionRanges.reserve(ExpansionRanges.size()); 144*fe6060f1SDimitry Andric for (const auto &Record : ExpansionRanges) 145*fe6060f1SDimitry Andric LocalExpansionRanges.emplace_back( 146*fe6060f1SDimitry Andric std::make_pair(Record.getFirst(), Record.getSecond())); 147*fe6060f1SDimitry Andric llvm::sort(LocalExpansionRanges); 148*fe6060f1SDimitry Andric 149*fe6060f1SDimitry Andric OS << "\n=============== ExpansionRanges ===============\n"; 150*fe6060f1SDimitry Andric for (const auto &Record : LocalExpansionRanges) { 151*fe6060f1SDimitry Andric OS << "> "; 152*fe6060f1SDimitry Andric Record.first.print(OS, *SM); 153*fe6060f1SDimitry Andric OS << ", "; 154*fe6060f1SDimitry Andric Record.second.print(OS, *SM); 155*fe6060f1SDimitry Andric OS << '\n'; 156*fe6060f1SDimitry Andric } 157*fe6060f1SDimitry Andric } 158*fe6060f1SDimitry Andric 159*fe6060f1SDimitry Andric void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { 160*fe6060f1SDimitry Andric std::vector<std::pair<SourceLocation, MacroExpansionText>> 161*fe6060f1SDimitry Andric LocalExpandedTokens; 162*fe6060f1SDimitry Andric LocalExpandedTokens.reserve(ExpandedTokens.size()); 163*fe6060f1SDimitry Andric for (const auto &Record : ExpandedTokens) 164*fe6060f1SDimitry Andric LocalExpandedTokens.emplace_back( 165*fe6060f1SDimitry Andric std::make_pair(Record.getFirst(), Record.getSecond())); 166*fe6060f1SDimitry Andric llvm::sort(LocalExpandedTokens); 167*fe6060f1SDimitry Andric 168*fe6060f1SDimitry Andric OS << "\n=============== ExpandedTokens ===============\n"; 169*fe6060f1SDimitry Andric for (const auto &Record : LocalExpandedTokens) { 170*fe6060f1SDimitry Andric OS << "> "; 171*fe6060f1SDimitry Andric Record.first.print(OS, *SM); 172*fe6060f1SDimitry Andric OS << " -> '" << Record.second << "'\n"; 173*fe6060f1SDimitry Andric } 174*fe6060f1SDimitry Andric } 175*fe6060f1SDimitry Andric 176*fe6060f1SDimitry Andric static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { 177*fe6060f1SDimitry Andric assert(Tok.isNot(tok::raw_identifier)); 178*fe6060f1SDimitry Andric 179*fe6060f1SDimitry Andric // Ignore annotation tokens like: _Pragma("pack(push, 1)") 180*fe6060f1SDimitry Andric if (Tok.isAnnotation()) 181*fe6060f1SDimitry Andric return; 182*fe6060f1SDimitry Andric 183*fe6060f1SDimitry Andric if (IdentifierInfo *II = Tok.getIdentifierInfo()) { 184*fe6060f1SDimitry Andric // FIXME: For now, we don't respect whitespaces between macro expanded 185*fe6060f1SDimitry Andric // tokens. We just emit a space after every identifier to produce a valid 186*fe6060f1SDimitry Andric // code for `int a ;` like expansions. 187*fe6060f1SDimitry Andric // ^-^-- Space after the 'int' and 'a' identifiers. 188*fe6060f1SDimitry Andric OS << II->getName() << ' '; 189*fe6060f1SDimitry Andric } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { 190*fe6060f1SDimitry Andric OS << StringRef(Tok.getLiteralData(), Tok.getLength()); 191*fe6060f1SDimitry Andric } else { 192*fe6060f1SDimitry Andric char Tmp[256]; 193*fe6060f1SDimitry Andric if (Tok.getLength() < sizeof(Tmp)) { 194*fe6060f1SDimitry Andric const char *TokPtr = Tmp; 195*fe6060f1SDimitry Andric // FIXME: Might use a different overload for cleaner callsite. 196*fe6060f1SDimitry Andric unsigned Len = PP.getSpelling(Tok, TokPtr); 197*fe6060f1SDimitry Andric OS.write(TokPtr, Len); 198*fe6060f1SDimitry Andric } else { 199*fe6060f1SDimitry Andric OS << "<too long token>"; 200*fe6060f1SDimitry Andric } 201*fe6060f1SDimitry Andric } 202*fe6060f1SDimitry Andric } 203*fe6060f1SDimitry Andric 204*fe6060f1SDimitry Andric void MacroExpansionContext::onTokenLexed(const Token &Tok) { 205*fe6060f1SDimitry Andric SourceLocation SLoc = Tok.getLocation(); 206*fe6060f1SDimitry Andric if (SLoc.isFileID()) 207*fe6060f1SDimitry Andric return; 208*fe6060f1SDimitry Andric 209*fe6060f1SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '"; 210*fe6060f1SDimitry Andric dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at "; 211*fe6060f1SDimitry Andric SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); 212*fe6060f1SDimitry Andric 213*fe6060f1SDimitry Andric // Remove spelling location. 214*fe6060f1SDimitry Andric SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc); 215*fe6060f1SDimitry Andric 216*fe6060f1SDimitry Andric MacroExpansionText TokenAsString; 217*fe6060f1SDimitry Andric llvm::raw_svector_ostream OS(TokenAsString); 218*fe6060f1SDimitry Andric 219*fe6060f1SDimitry Andric // FIXME: Prepend newlines and space to produce the exact same output as the 220*fe6060f1SDimitry Andric // preprocessor would for this token. 221*fe6060f1SDimitry Andric 222*fe6060f1SDimitry Andric dumpTokenInto(*PP, OS, Tok); 223*fe6060f1SDimitry Andric 224*fe6060f1SDimitry Andric ExpansionMap::iterator It; 225*fe6060f1SDimitry Andric bool Inserted; 226*fe6060f1SDimitry Andric std::tie(It, Inserted) = 227*fe6060f1SDimitry Andric ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString)); 228*fe6060f1SDimitry Andric if (!Inserted) 229*fe6060f1SDimitry Andric It->getSecond().append(TokenAsString); 230*fe6060f1SDimitry Andric } 231*fe6060f1SDimitry Andric 232