xref: /llvm-project/clang/lib/Analysis/MacroExpansionContext.cpp (revision 273777ead296c9ab2c157d16b750e3ee1ace08ec)
16e307100SBalazs Benics //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
26e307100SBalazs Benics //
36e307100SBalazs Benics // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
46e307100SBalazs Benics // See https://llvm.org/LICENSE.txt for license information.
56e307100SBalazs Benics // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66e307100SBalazs Benics //
76e307100SBalazs Benics //===----------------------------------------------------------------------===//
86e307100SBalazs Benics 
96e307100SBalazs Benics #include "clang/Analysis/MacroExpansionContext.h"
106e307100SBalazs Benics #include "llvm/Support/Debug.h"
11a1580d7bSKazu Hirata #include <optional>
126e307100SBalazs Benics 
136e307100SBalazs Benics #define DEBUG_TYPE "macro-expansion-context"
146e307100SBalazs Benics 
15*273777eaSAaron Ballman static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
166e307100SBalazs Benics                           clang::Token Tok);
176e307100SBalazs Benics 
186e307100SBalazs Benics namespace clang {
196e307100SBalazs Benics namespace detail {
206e307100SBalazs Benics class MacroExpansionRangeRecorder : public PPCallbacks {
216e307100SBalazs Benics   const Preprocessor &PP;
226e307100SBalazs Benics   SourceManager &SM;
236e307100SBalazs Benics   MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
246e307100SBalazs Benics 
256e307100SBalazs Benics public:
MacroExpansionRangeRecorder(const Preprocessor & PP,SourceManager & SM,MacroExpansionContext::ExpansionRangeMap & ExpansionRanges)266e307100SBalazs Benics   explicit MacroExpansionRangeRecorder(
276e307100SBalazs Benics       const Preprocessor &PP, SourceManager &SM,
286e307100SBalazs Benics       MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
296e307100SBalazs Benics       : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
306e307100SBalazs Benics 
MacroExpands(const Token & MacroName,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)316e307100SBalazs Benics   void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
326e307100SBalazs Benics                     SourceRange Range, const MacroArgs *Args) override {
336e307100SBalazs Benics     // Ignore annotation tokens like: _Pragma("pack(push, 1)")
346e307100SBalazs Benics     if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
356e307100SBalazs Benics       return;
366e307100SBalazs Benics 
376e307100SBalazs Benics     SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
386e307100SBalazs Benics     assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
396e307100SBalazs Benics 
406e307100SBalazs Benics     const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
416e307100SBalazs Benics       // If the range is empty, use the length of the macro.
426e307100SBalazs Benics       if (Range.getBegin() == Range.getEnd())
436e307100SBalazs Benics         return SM.getExpansionLoc(
446e307100SBalazs Benics             MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
456e307100SBalazs Benics 
466e307100SBalazs Benics       // Include the last character.
476e307100SBalazs Benics       return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
486e307100SBalazs Benics     }();
496e307100SBalazs Benics 
507c83799fSKazu Hirata     (void)PP;
516e307100SBalazs Benics     LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
526e307100SBalazs Benics                dumpTokenInto(PP, llvm::dbgs(), MacroName);
536e307100SBalazs Benics                llvm::dbgs()
546e307100SBalazs Benics                << "' with length " << MacroName.getLength() << " at ";
556e307100SBalazs Benics                MacroNameBegin.print(llvm::dbgs(), SM);
566e307100SBalazs Benics                llvm::dbgs() << ", expansion end at ";
576e307100SBalazs Benics                ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
586e307100SBalazs Benics 
596e307100SBalazs Benics     // If the expansion range is empty, use the identifier of the macro as a
606e307100SBalazs Benics     // range.
616e307100SBalazs Benics     MacroExpansionContext::ExpansionRangeMap::iterator It;
626e307100SBalazs Benics     bool Inserted;
636e307100SBalazs Benics     std::tie(It, Inserted) =
646e307100SBalazs Benics         ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
656e307100SBalazs Benics     if (Inserted) {
666e307100SBalazs Benics       LLVM_DEBUG(llvm::dbgs() << "maps ";
676e307100SBalazs Benics                  It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
686e307100SBalazs Benics                  It->getSecond().print(llvm::dbgs(), SM);
696e307100SBalazs Benics                  llvm::dbgs() << '\n';);
706e307100SBalazs Benics     } else {
716e307100SBalazs Benics       if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
726e307100SBalazs Benics         It->getSecond() = ExpansionEnd;
736e307100SBalazs Benics         LLVM_DEBUG(
746e307100SBalazs Benics             llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
756e307100SBalazs Benics             llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
766e307100SBalazs Benics             llvm::dbgs() << '\n';);
776e307100SBalazs Benics       }
786e307100SBalazs Benics     }
796e307100SBalazs Benics   }
806e307100SBalazs Benics };
816e307100SBalazs Benics } // namespace detail
826e307100SBalazs Benics } // namespace clang
836e307100SBalazs Benics 
846e307100SBalazs Benics using namespace clang;
856e307100SBalazs Benics 
MacroExpansionContext(const LangOptions & LangOpts)866e307100SBalazs Benics MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
876e307100SBalazs Benics     : LangOpts(LangOpts) {}
886e307100SBalazs Benics 
registerForPreprocessor(Preprocessor & NewPP)896e307100SBalazs Benics void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
906e307100SBalazs Benics   PP = &NewPP;
916e307100SBalazs Benics   SM = &NewPP.getSourceManager();
926e307100SBalazs Benics 
936e307100SBalazs Benics   // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
946e307100SBalazs Benics   PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
956e307100SBalazs Benics       *PP, *SM, ExpansionRanges));
966e307100SBalazs Benics   // Same applies here.
976e307100SBalazs Benics   PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
986e307100SBalazs Benics }
996e307100SBalazs Benics 
1006ad0788cSKazu Hirata std::optional<StringRef>
getExpandedText(SourceLocation MacroExpansionLoc) const1016e307100SBalazs Benics MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
1026e307100SBalazs Benics   if (MacroExpansionLoc.isMacroID())
10334e0d057SKazu Hirata     return std::nullopt;
1046e307100SBalazs Benics 
10535b4fbb5SKazu Hirata   // If there was no macro expansion at that location, return std::nullopt.
1066e307100SBalazs Benics   if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
10734e0d057SKazu Hirata     return std::nullopt;
1086e307100SBalazs Benics 
1096e307100SBalazs Benics   // There was macro expansion, but resulted in no tokens, return empty string.
1106e307100SBalazs Benics   const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
1116e307100SBalazs Benics   if (It == ExpandedTokens.end())
1126e307100SBalazs Benics     return StringRef{""};
1136e307100SBalazs Benics 
1146e307100SBalazs Benics   // Otherwise we have the actual token sequence as string.
1151def2579SDavid Blaikie   return It->getSecond().str();
1166e307100SBalazs Benics }
1176e307100SBalazs Benics 
1186ad0788cSKazu Hirata std::optional<StringRef>
getOriginalText(SourceLocation MacroExpansionLoc) const1196e307100SBalazs Benics MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
1206e307100SBalazs Benics   if (MacroExpansionLoc.isMacroID())
12134e0d057SKazu Hirata     return std::nullopt;
1226e307100SBalazs Benics 
1236e307100SBalazs Benics   const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
1246e307100SBalazs Benics   if (It == ExpansionRanges.end())
12534e0d057SKazu Hirata     return std::nullopt;
1266e307100SBalazs Benics 
1276e307100SBalazs Benics   assert(It->getFirst() != It->getSecond() &&
1286e307100SBalazs Benics          "Every macro expansion must cover a non-empty range.");
1296e307100SBalazs Benics 
1306e307100SBalazs Benics   return Lexer::getSourceText(
1316e307100SBalazs Benics       CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
1326e307100SBalazs Benics       LangOpts);
1336e307100SBalazs Benics }
1346e307100SBalazs Benics 
dumpExpansionRanges() const1356e307100SBalazs Benics void MacroExpansionContext::dumpExpansionRanges() const {
1366e307100SBalazs Benics   dumpExpansionRangesToStream(llvm::dbgs());
1376e307100SBalazs Benics }
dumpExpandedTexts() const1386e307100SBalazs Benics void MacroExpansionContext::dumpExpandedTexts() const {
1396e307100SBalazs Benics   dumpExpandedTextsToStream(llvm::dbgs());
1406e307100SBalazs Benics }
1416e307100SBalazs Benics 
dumpExpansionRangesToStream(raw_ostream & OS) const1426e307100SBalazs Benics void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
1436e307100SBalazs Benics   std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
1446e307100SBalazs Benics   LocalExpansionRanges.reserve(ExpansionRanges.size());
1456e307100SBalazs Benics   for (const auto &Record : ExpansionRanges)
1466e307100SBalazs Benics     LocalExpansionRanges.emplace_back(
1476e307100SBalazs Benics         std::make_pair(Record.getFirst(), Record.getSecond()));
1486e307100SBalazs Benics   llvm::sort(LocalExpansionRanges);
1496e307100SBalazs Benics 
1506e307100SBalazs Benics   OS << "\n=============== ExpansionRanges ===============\n";
1516e307100SBalazs Benics   for (const auto &Record : LocalExpansionRanges) {
1526e307100SBalazs Benics     OS << "> ";
1536e307100SBalazs Benics     Record.first.print(OS, *SM);
1546e307100SBalazs Benics     OS << ", ";
1556e307100SBalazs Benics     Record.second.print(OS, *SM);
1566e307100SBalazs Benics     OS << '\n';
1576e307100SBalazs Benics   }
1586e307100SBalazs Benics }
1596e307100SBalazs Benics 
dumpExpandedTextsToStream(raw_ostream & OS) const1606e307100SBalazs Benics void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
1616e307100SBalazs Benics   std::vector<std::pair<SourceLocation, MacroExpansionText>>
1626e307100SBalazs Benics       LocalExpandedTokens;
1636e307100SBalazs Benics   LocalExpandedTokens.reserve(ExpandedTokens.size());
1646e307100SBalazs Benics   for (const auto &Record : ExpandedTokens)
1656e307100SBalazs Benics     LocalExpandedTokens.emplace_back(
1666e307100SBalazs Benics         std::make_pair(Record.getFirst(), Record.getSecond()));
1676e307100SBalazs Benics   llvm::sort(LocalExpandedTokens);
1686e307100SBalazs Benics 
1696e307100SBalazs Benics   OS << "\n=============== ExpandedTokens ===============\n";
1706e307100SBalazs Benics   for (const auto &Record : LocalExpandedTokens) {
1716e307100SBalazs Benics     OS << "> ";
1726e307100SBalazs Benics     Record.first.print(OS, *SM);
1736e307100SBalazs Benics     OS << " -> '" << Record.second << "'\n";
1746e307100SBalazs Benics   }
1756e307100SBalazs Benics }
1766e307100SBalazs Benics 
dumpTokenInto(const Preprocessor & PP,raw_ostream & OS,Token Tok)1776e307100SBalazs Benics static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
1786e307100SBalazs Benics   assert(Tok.isNot(tok::raw_identifier));
1796e307100SBalazs Benics 
1806e307100SBalazs Benics   // Ignore annotation tokens like: _Pragma("pack(push, 1)")
1816e307100SBalazs Benics   if (Tok.isAnnotation())
1826e307100SBalazs Benics     return;
1836e307100SBalazs Benics 
1846e307100SBalazs Benics   if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
1856e307100SBalazs Benics     // FIXME: For now, we don't respect whitespaces between macro expanded
1866e307100SBalazs Benics     // tokens. We just emit a space after every identifier to produce a valid
1876e307100SBalazs Benics     // code for `int a ;` like expansions.
1886e307100SBalazs Benics     //              ^-^-- Space after the 'int' and 'a' identifiers.
1896e307100SBalazs Benics     OS << II->getName() << ' ';
1906e307100SBalazs Benics   } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
1916e307100SBalazs Benics     OS << StringRef(Tok.getLiteralData(), Tok.getLength());
1926e307100SBalazs Benics   } else {
1936e307100SBalazs Benics     char Tmp[256];
1946e307100SBalazs Benics     if (Tok.getLength() < sizeof(Tmp)) {
1956e307100SBalazs Benics       const char *TokPtr = Tmp;
1966e307100SBalazs Benics       // FIXME: Might use a different overload for cleaner callsite.
1976e307100SBalazs Benics       unsigned Len = PP.getSpelling(Tok, TokPtr);
1986e307100SBalazs Benics       OS.write(TokPtr, Len);
1996e307100SBalazs Benics     } else {
2006e307100SBalazs Benics       OS << "<too long token>";
2016e307100SBalazs Benics     }
2026e307100SBalazs Benics   }
2036e307100SBalazs Benics }
2046e307100SBalazs Benics 
onTokenLexed(const Token & Tok)2056e307100SBalazs Benics void MacroExpansionContext::onTokenLexed(const Token &Tok) {
2066e307100SBalazs Benics   SourceLocation SLoc = Tok.getLocation();
2076e307100SBalazs Benics   if (SLoc.isFileID())
2086e307100SBalazs Benics     return;
2096e307100SBalazs Benics 
2106e307100SBalazs Benics   LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
2116e307100SBalazs Benics              dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
2126e307100SBalazs Benics              SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
2136e307100SBalazs Benics 
2146e307100SBalazs Benics   // Remove spelling location.
2156e307100SBalazs Benics   SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
2166e307100SBalazs Benics 
2176e307100SBalazs Benics   MacroExpansionText TokenAsString;
2186e307100SBalazs Benics   llvm::raw_svector_ostream OS(TokenAsString);
2196e307100SBalazs Benics 
2206e307100SBalazs Benics   // FIXME: Prepend newlines and space to produce the exact same output as the
2216e307100SBalazs Benics   // preprocessor would for this token.
2226e307100SBalazs Benics 
2236e307100SBalazs Benics   dumpTokenInto(*PP, OS, Tok);
2246e307100SBalazs Benics 
2256e307100SBalazs Benics   ExpansionMap::iterator It;
2266e307100SBalazs Benics   bool Inserted;
2276e307100SBalazs Benics   std::tie(It, Inserted) =
2286e307100SBalazs Benics       ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
2296e307100SBalazs Benics   if (!Inserted)
2306e307100SBalazs Benics     It->getSecond().append(TokenAsString);
2316e307100SBalazs Benics }
2326e307100SBalazs Benics 
233