16e307100SBalazs Benics //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
26e307100SBalazs Benics //
36e307100SBalazs Benics // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
46e307100SBalazs Benics // See https://llvm.org/LICENSE.txt for license information.
56e307100SBalazs Benics // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66e307100SBalazs Benics //
76e307100SBalazs Benics //===----------------------------------------------------------------------===//
86e307100SBalazs Benics
96e307100SBalazs Benics #include "clang/Analysis/MacroExpansionContext.h"
106e307100SBalazs Benics #include "llvm/Support/Debug.h"
11a1580d7bSKazu Hirata #include <optional>
126e307100SBalazs Benics
136e307100SBalazs Benics #define DEBUG_TYPE "macro-expansion-context"
146e307100SBalazs Benics
15*273777eaSAaron Ballman static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
166e307100SBalazs Benics clang::Token Tok);
176e307100SBalazs Benics
186e307100SBalazs Benics namespace clang {
196e307100SBalazs Benics namespace detail {
206e307100SBalazs Benics class MacroExpansionRangeRecorder : public PPCallbacks {
216e307100SBalazs Benics const Preprocessor &PP;
226e307100SBalazs Benics SourceManager &SM;
236e307100SBalazs Benics MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
246e307100SBalazs Benics
256e307100SBalazs Benics public:
MacroExpansionRangeRecorder(const Preprocessor & PP,SourceManager & SM,MacroExpansionContext::ExpansionRangeMap & ExpansionRanges)266e307100SBalazs Benics explicit MacroExpansionRangeRecorder(
276e307100SBalazs Benics const Preprocessor &PP, SourceManager &SM,
286e307100SBalazs Benics MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
296e307100SBalazs Benics : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
306e307100SBalazs Benics
MacroExpands(const Token & MacroName,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)316e307100SBalazs Benics void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
326e307100SBalazs Benics SourceRange Range, const MacroArgs *Args) override {
336e307100SBalazs Benics // Ignore annotation tokens like: _Pragma("pack(push, 1)")
346e307100SBalazs Benics if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
356e307100SBalazs Benics return;
366e307100SBalazs Benics
376e307100SBalazs Benics SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
386e307100SBalazs Benics assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
396e307100SBalazs Benics
406e307100SBalazs Benics const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
416e307100SBalazs Benics // If the range is empty, use the length of the macro.
426e307100SBalazs Benics if (Range.getBegin() == Range.getEnd())
436e307100SBalazs Benics return SM.getExpansionLoc(
446e307100SBalazs Benics MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
456e307100SBalazs Benics
466e307100SBalazs Benics // Include the last character.
476e307100SBalazs Benics return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
486e307100SBalazs Benics }();
496e307100SBalazs Benics
507c83799fSKazu Hirata (void)PP;
516e307100SBalazs Benics LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
526e307100SBalazs Benics dumpTokenInto(PP, llvm::dbgs(), MacroName);
536e307100SBalazs Benics llvm::dbgs()
546e307100SBalazs Benics << "' with length " << MacroName.getLength() << " at ";
556e307100SBalazs Benics MacroNameBegin.print(llvm::dbgs(), SM);
566e307100SBalazs Benics llvm::dbgs() << ", expansion end at ";
576e307100SBalazs Benics ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
586e307100SBalazs Benics
596e307100SBalazs Benics // If the expansion range is empty, use the identifier of the macro as a
606e307100SBalazs Benics // range.
616e307100SBalazs Benics MacroExpansionContext::ExpansionRangeMap::iterator It;
626e307100SBalazs Benics bool Inserted;
636e307100SBalazs Benics std::tie(It, Inserted) =
646e307100SBalazs Benics ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
656e307100SBalazs Benics if (Inserted) {
666e307100SBalazs Benics LLVM_DEBUG(llvm::dbgs() << "maps ";
676e307100SBalazs Benics It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
686e307100SBalazs Benics It->getSecond().print(llvm::dbgs(), SM);
696e307100SBalazs Benics llvm::dbgs() << '\n';);
706e307100SBalazs Benics } else {
716e307100SBalazs Benics if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
726e307100SBalazs Benics It->getSecond() = ExpansionEnd;
736e307100SBalazs Benics LLVM_DEBUG(
746e307100SBalazs Benics llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
756e307100SBalazs Benics llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
766e307100SBalazs Benics llvm::dbgs() << '\n';);
776e307100SBalazs Benics }
786e307100SBalazs Benics }
796e307100SBalazs Benics }
806e307100SBalazs Benics };
816e307100SBalazs Benics } // namespace detail
826e307100SBalazs Benics } // namespace clang
836e307100SBalazs Benics
846e307100SBalazs Benics using namespace clang;
856e307100SBalazs Benics
MacroExpansionContext(const LangOptions & LangOpts)866e307100SBalazs Benics MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
876e307100SBalazs Benics : LangOpts(LangOpts) {}
886e307100SBalazs Benics
registerForPreprocessor(Preprocessor & NewPP)896e307100SBalazs Benics void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
906e307100SBalazs Benics PP = &NewPP;
916e307100SBalazs Benics SM = &NewPP.getSourceManager();
926e307100SBalazs Benics
936e307100SBalazs Benics // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
946e307100SBalazs Benics PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
956e307100SBalazs Benics *PP, *SM, ExpansionRanges));
966e307100SBalazs Benics // Same applies here.
976e307100SBalazs Benics PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
986e307100SBalazs Benics }
996e307100SBalazs Benics
1006ad0788cSKazu Hirata std::optional<StringRef>
getExpandedText(SourceLocation MacroExpansionLoc) const1016e307100SBalazs Benics MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
1026e307100SBalazs Benics if (MacroExpansionLoc.isMacroID())
10334e0d057SKazu Hirata return std::nullopt;
1046e307100SBalazs Benics
10535b4fbb5SKazu Hirata // If there was no macro expansion at that location, return std::nullopt.
1066e307100SBalazs Benics if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
10734e0d057SKazu Hirata return std::nullopt;
1086e307100SBalazs Benics
1096e307100SBalazs Benics // There was macro expansion, but resulted in no tokens, return empty string.
1106e307100SBalazs Benics const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
1116e307100SBalazs Benics if (It == ExpandedTokens.end())
1126e307100SBalazs Benics return StringRef{""};
1136e307100SBalazs Benics
1146e307100SBalazs Benics // Otherwise we have the actual token sequence as string.
1151def2579SDavid Blaikie return It->getSecond().str();
1166e307100SBalazs Benics }
1176e307100SBalazs Benics
1186ad0788cSKazu Hirata std::optional<StringRef>
getOriginalText(SourceLocation MacroExpansionLoc) const1196e307100SBalazs Benics MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
1206e307100SBalazs Benics if (MacroExpansionLoc.isMacroID())
12134e0d057SKazu Hirata return std::nullopt;
1226e307100SBalazs Benics
1236e307100SBalazs Benics const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
1246e307100SBalazs Benics if (It == ExpansionRanges.end())
12534e0d057SKazu Hirata return std::nullopt;
1266e307100SBalazs Benics
1276e307100SBalazs Benics assert(It->getFirst() != It->getSecond() &&
1286e307100SBalazs Benics "Every macro expansion must cover a non-empty range.");
1296e307100SBalazs Benics
1306e307100SBalazs Benics return Lexer::getSourceText(
1316e307100SBalazs Benics CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
1326e307100SBalazs Benics LangOpts);
1336e307100SBalazs Benics }
1346e307100SBalazs Benics
dumpExpansionRanges() const1356e307100SBalazs Benics void MacroExpansionContext::dumpExpansionRanges() const {
1366e307100SBalazs Benics dumpExpansionRangesToStream(llvm::dbgs());
1376e307100SBalazs Benics }
dumpExpandedTexts() const1386e307100SBalazs Benics void MacroExpansionContext::dumpExpandedTexts() const {
1396e307100SBalazs Benics dumpExpandedTextsToStream(llvm::dbgs());
1406e307100SBalazs Benics }
1416e307100SBalazs Benics
dumpExpansionRangesToStream(raw_ostream & OS) const1426e307100SBalazs Benics void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
1436e307100SBalazs Benics std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
1446e307100SBalazs Benics LocalExpansionRanges.reserve(ExpansionRanges.size());
1456e307100SBalazs Benics for (const auto &Record : ExpansionRanges)
1466e307100SBalazs Benics LocalExpansionRanges.emplace_back(
1476e307100SBalazs Benics std::make_pair(Record.getFirst(), Record.getSecond()));
1486e307100SBalazs Benics llvm::sort(LocalExpansionRanges);
1496e307100SBalazs Benics
1506e307100SBalazs Benics OS << "\n=============== ExpansionRanges ===============\n";
1516e307100SBalazs Benics for (const auto &Record : LocalExpansionRanges) {
1526e307100SBalazs Benics OS << "> ";
1536e307100SBalazs Benics Record.first.print(OS, *SM);
1546e307100SBalazs Benics OS << ", ";
1556e307100SBalazs Benics Record.second.print(OS, *SM);
1566e307100SBalazs Benics OS << '\n';
1576e307100SBalazs Benics }
1586e307100SBalazs Benics }
1596e307100SBalazs Benics
dumpExpandedTextsToStream(raw_ostream & OS) const1606e307100SBalazs Benics void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
1616e307100SBalazs Benics std::vector<std::pair<SourceLocation, MacroExpansionText>>
1626e307100SBalazs Benics LocalExpandedTokens;
1636e307100SBalazs Benics LocalExpandedTokens.reserve(ExpandedTokens.size());
1646e307100SBalazs Benics for (const auto &Record : ExpandedTokens)
1656e307100SBalazs Benics LocalExpandedTokens.emplace_back(
1666e307100SBalazs Benics std::make_pair(Record.getFirst(), Record.getSecond()));
1676e307100SBalazs Benics llvm::sort(LocalExpandedTokens);
1686e307100SBalazs Benics
1696e307100SBalazs Benics OS << "\n=============== ExpandedTokens ===============\n";
1706e307100SBalazs Benics for (const auto &Record : LocalExpandedTokens) {
1716e307100SBalazs Benics OS << "> ";
1726e307100SBalazs Benics Record.first.print(OS, *SM);
1736e307100SBalazs Benics OS << " -> '" << Record.second << "'\n";
1746e307100SBalazs Benics }
1756e307100SBalazs Benics }
1766e307100SBalazs Benics
dumpTokenInto(const Preprocessor & PP,raw_ostream & OS,Token Tok)1776e307100SBalazs Benics static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
1786e307100SBalazs Benics assert(Tok.isNot(tok::raw_identifier));
1796e307100SBalazs Benics
1806e307100SBalazs Benics // Ignore annotation tokens like: _Pragma("pack(push, 1)")
1816e307100SBalazs Benics if (Tok.isAnnotation())
1826e307100SBalazs Benics return;
1836e307100SBalazs Benics
1846e307100SBalazs Benics if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
1856e307100SBalazs Benics // FIXME: For now, we don't respect whitespaces between macro expanded
1866e307100SBalazs Benics // tokens. We just emit a space after every identifier to produce a valid
1876e307100SBalazs Benics // code for `int a ;` like expansions.
1886e307100SBalazs Benics // ^-^-- Space after the 'int' and 'a' identifiers.
1896e307100SBalazs Benics OS << II->getName() << ' ';
1906e307100SBalazs Benics } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
1916e307100SBalazs Benics OS << StringRef(Tok.getLiteralData(), Tok.getLength());
1926e307100SBalazs Benics } else {
1936e307100SBalazs Benics char Tmp[256];
1946e307100SBalazs Benics if (Tok.getLength() < sizeof(Tmp)) {
1956e307100SBalazs Benics const char *TokPtr = Tmp;
1966e307100SBalazs Benics // FIXME: Might use a different overload for cleaner callsite.
1976e307100SBalazs Benics unsigned Len = PP.getSpelling(Tok, TokPtr);
1986e307100SBalazs Benics OS.write(TokPtr, Len);
1996e307100SBalazs Benics } else {
2006e307100SBalazs Benics OS << "<too long token>";
2016e307100SBalazs Benics }
2026e307100SBalazs Benics }
2036e307100SBalazs Benics }
2046e307100SBalazs Benics
onTokenLexed(const Token & Tok)2056e307100SBalazs Benics void MacroExpansionContext::onTokenLexed(const Token &Tok) {
2066e307100SBalazs Benics SourceLocation SLoc = Tok.getLocation();
2076e307100SBalazs Benics if (SLoc.isFileID())
2086e307100SBalazs Benics return;
2096e307100SBalazs Benics
2106e307100SBalazs Benics LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
2116e307100SBalazs Benics dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
2126e307100SBalazs Benics SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
2136e307100SBalazs Benics
2146e307100SBalazs Benics // Remove spelling location.
2156e307100SBalazs Benics SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
2166e307100SBalazs Benics
2176e307100SBalazs Benics MacroExpansionText TokenAsString;
2186e307100SBalazs Benics llvm::raw_svector_ostream OS(TokenAsString);
2196e307100SBalazs Benics
2206e307100SBalazs Benics // FIXME: Prepend newlines and space to produce the exact same output as the
2216e307100SBalazs Benics // preprocessor would for this token.
2226e307100SBalazs Benics
2236e307100SBalazs Benics dumpTokenInto(*PP, OS, Tok);
2246e307100SBalazs Benics
2256e307100SBalazs Benics ExpansionMap::iterator It;
2266e307100SBalazs Benics bool Inserted;
2276e307100SBalazs Benics std::tie(It, Inserted) =
2286e307100SBalazs Benics ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
2296e307100SBalazs Benics if (!Inserted)
2306e307100SBalazs Benics It->getSecond().append(TokenAsString);
2316e307100SBalazs Benics }
2326e307100SBalazs Benics
233