17330f729Sjoerg //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg #include "clang/Tooling/Syntax/Tokens.h"
97330f729Sjoerg
107330f729Sjoerg #include "clang/Basic/Diagnostic.h"
117330f729Sjoerg #include "clang/Basic/IdentifierTable.h"
127330f729Sjoerg #include "clang/Basic/LLVM.h"
137330f729Sjoerg #include "clang/Basic/LangOptions.h"
147330f729Sjoerg #include "clang/Basic/SourceLocation.h"
157330f729Sjoerg #include "clang/Basic/SourceManager.h"
167330f729Sjoerg #include "clang/Basic/TokenKinds.h"
177330f729Sjoerg #include "clang/Lex/PPCallbacks.h"
187330f729Sjoerg #include "clang/Lex/Preprocessor.h"
197330f729Sjoerg #include "clang/Lex/Token.h"
207330f729Sjoerg #include "llvm/ADT/ArrayRef.h"
217330f729Sjoerg #include "llvm/ADT/None.h"
227330f729Sjoerg #include "llvm/ADT/Optional.h"
237330f729Sjoerg #include "llvm/ADT/STLExtras.h"
247330f729Sjoerg #include "llvm/Support/Debug.h"
257330f729Sjoerg #include "llvm/Support/ErrorHandling.h"
267330f729Sjoerg #include "llvm/Support/FormatVariadic.h"
277330f729Sjoerg #include "llvm/Support/raw_ostream.h"
287330f729Sjoerg #include <algorithm>
297330f729Sjoerg #include <cassert>
307330f729Sjoerg #include <iterator>
317330f729Sjoerg #include <string>
327330f729Sjoerg #include <utility>
337330f729Sjoerg #include <vector>
347330f729Sjoerg
357330f729Sjoerg using namespace clang;
367330f729Sjoerg using namespace clang::syntax;
377330f729Sjoerg
38*e038c9c4Sjoerg namespace {
39*e038c9c4Sjoerg // Finds the smallest consecutive subsuquence of Toks that covers R.
40*e038c9c4Sjoerg llvm::ArrayRef<syntax::Token>
getTokensCovering(llvm::ArrayRef<syntax::Token> Toks,SourceRange R,const SourceManager & SM)41*e038c9c4Sjoerg getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
42*e038c9c4Sjoerg const SourceManager &SM) {
43*e038c9c4Sjoerg if (R.isInvalid())
44*e038c9c4Sjoerg return {};
45*e038c9c4Sjoerg const syntax::Token *Begin =
46*e038c9c4Sjoerg llvm::partition_point(Toks, [&](const syntax::Token &T) {
47*e038c9c4Sjoerg return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
48*e038c9c4Sjoerg });
49*e038c9c4Sjoerg const syntax::Token *End =
50*e038c9c4Sjoerg llvm::partition_point(Toks, [&](const syntax::Token &T) {
51*e038c9c4Sjoerg return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
52*e038c9c4Sjoerg });
53*e038c9c4Sjoerg if (Begin > End)
54*e038c9c4Sjoerg return {};
55*e038c9c4Sjoerg return {Begin, End};
56*e038c9c4Sjoerg }
57*e038c9c4Sjoerg
58*e038c9c4Sjoerg // Finds the smallest expansion range that contains expanded tokens First and
59*e038c9c4Sjoerg // Last, e.g.:
60*e038c9c4Sjoerg // #define ID(x) x
61*e038c9c4Sjoerg // ID(ID(ID(a1) a2))
62*e038c9c4Sjoerg // ~~ -> a1
63*e038c9c4Sjoerg // ~~ -> a2
64*e038c9c4Sjoerg // ~~~~~~~~~ -> a1 a2
findCommonRangeForMacroArgs(const syntax::Token & First,const syntax::Token & Last,const SourceManager & SM)65*e038c9c4Sjoerg SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
66*e038c9c4Sjoerg const syntax::Token &Last,
67*e038c9c4Sjoerg const SourceManager &SM) {
68*e038c9c4Sjoerg SourceRange Res;
69*e038c9c4Sjoerg auto FirstLoc = First.location(), LastLoc = Last.location();
70*e038c9c4Sjoerg // Keep traversing up the spelling chain as longs as tokens are part of the
71*e038c9c4Sjoerg // same expansion.
72*e038c9c4Sjoerg while (!FirstLoc.isFileID() && !LastLoc.isFileID()) {
73*e038c9c4Sjoerg auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
74*e038c9c4Sjoerg auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
75*e038c9c4Sjoerg // Stop if expansions have diverged.
76*e038c9c4Sjoerg if (ExpInfoFirst.getExpansionLocStart() !=
77*e038c9c4Sjoerg ExpInfoLast.getExpansionLocStart())
78*e038c9c4Sjoerg break;
79*e038c9c4Sjoerg // Do not continue into macro bodies.
80*e038c9c4Sjoerg if (!ExpInfoFirst.isMacroArgExpansion() ||
81*e038c9c4Sjoerg !ExpInfoLast.isMacroArgExpansion())
82*e038c9c4Sjoerg break;
83*e038c9c4Sjoerg FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
84*e038c9c4Sjoerg LastLoc = SM.getImmediateSpellingLoc(LastLoc);
85*e038c9c4Sjoerg // Update the result afterwards, as we want the tokens that triggered the
86*e038c9c4Sjoerg // expansion.
87*e038c9c4Sjoerg Res = {FirstLoc, LastLoc};
88*e038c9c4Sjoerg }
89*e038c9c4Sjoerg // Normally mapping back to expansion location here only changes FileID, as
90*e038c9c4Sjoerg // we've already found some tokens expanded from the same macro argument, and
91*e038c9c4Sjoerg // they should map to a consecutive subset of spelled tokens. Unfortunately
92*e038c9c4Sjoerg // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
93*e038c9c4Sjoerg // based on their FileID in addition to offsets. So even though we are
94*e038c9c4Sjoerg // referring to same tokens, SourceManager might tell us that one is before
95*e038c9c4Sjoerg // the other if they've got different FileIDs.
96*e038c9c4Sjoerg return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
97*e038c9c4Sjoerg }
98*e038c9c4Sjoerg
99*e038c9c4Sjoerg } // namespace
100*e038c9c4Sjoerg
Token(SourceLocation Location,unsigned Length,tok::TokenKind Kind)1017330f729Sjoerg syntax::Token::Token(SourceLocation Location, unsigned Length,
1027330f729Sjoerg tok::TokenKind Kind)
1037330f729Sjoerg : Location(Location), Length(Length), Kind(Kind) {
1047330f729Sjoerg assert(Location.isValid());
1057330f729Sjoerg }
1067330f729Sjoerg
Token(const clang::Token & T)1077330f729Sjoerg syntax::Token::Token(const clang::Token &T)
1087330f729Sjoerg : Token(T.getLocation(), T.getLength(), T.getKind()) {
1097330f729Sjoerg assert(!T.isAnnotation());
1107330f729Sjoerg }
1117330f729Sjoerg
text(const SourceManager & SM) const1127330f729Sjoerg llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
1137330f729Sjoerg bool Invalid = false;
1147330f729Sjoerg const char *Start = SM.getCharacterData(location(), &Invalid);
1157330f729Sjoerg assert(!Invalid);
1167330f729Sjoerg return llvm::StringRef(Start, length());
1177330f729Sjoerg }
1187330f729Sjoerg
range(const SourceManager & SM) const1197330f729Sjoerg FileRange syntax::Token::range(const SourceManager &SM) const {
1207330f729Sjoerg assert(location().isFileID() && "must be a spelled token");
1217330f729Sjoerg FileID File;
1227330f729Sjoerg unsigned StartOffset;
1237330f729Sjoerg std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
1247330f729Sjoerg return FileRange(File, StartOffset, StartOffset + length());
1257330f729Sjoerg }
1267330f729Sjoerg
range(const SourceManager & SM,const syntax::Token & First,const syntax::Token & Last)1277330f729Sjoerg FileRange syntax::Token::range(const SourceManager &SM,
1287330f729Sjoerg const syntax::Token &First,
1297330f729Sjoerg const syntax::Token &Last) {
1307330f729Sjoerg auto F = First.range(SM);
1317330f729Sjoerg auto L = Last.range(SM);
1327330f729Sjoerg assert(F.file() == L.file() && "tokens from different files");
133*e038c9c4Sjoerg assert((F == L || F.endOffset() <= L.beginOffset()) &&
134*e038c9c4Sjoerg "wrong order of tokens");
1357330f729Sjoerg return FileRange(F.file(), F.beginOffset(), L.endOffset());
1367330f729Sjoerg }
1377330f729Sjoerg
operator <<(llvm::raw_ostream & OS,const Token & T)1387330f729Sjoerg llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
1397330f729Sjoerg return OS << T.str();
1407330f729Sjoerg }
1417330f729Sjoerg
FileRange(FileID File,unsigned BeginOffset,unsigned EndOffset)1427330f729Sjoerg FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
1437330f729Sjoerg : File(File), Begin(BeginOffset), End(EndOffset) {
1447330f729Sjoerg assert(File.isValid());
1457330f729Sjoerg assert(BeginOffset <= EndOffset);
1467330f729Sjoerg }
1477330f729Sjoerg
FileRange(const SourceManager & SM,SourceLocation BeginLoc,unsigned Length)1487330f729Sjoerg FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
1497330f729Sjoerg unsigned Length) {
1507330f729Sjoerg assert(BeginLoc.isValid());
1517330f729Sjoerg assert(BeginLoc.isFileID());
1527330f729Sjoerg
1537330f729Sjoerg std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
1547330f729Sjoerg End = Begin + Length;
1557330f729Sjoerg }
FileRange(const SourceManager & SM,SourceLocation BeginLoc,SourceLocation EndLoc)1567330f729Sjoerg FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
1577330f729Sjoerg SourceLocation EndLoc) {
1587330f729Sjoerg assert(BeginLoc.isValid());
1597330f729Sjoerg assert(BeginLoc.isFileID());
1607330f729Sjoerg assert(EndLoc.isValid());
1617330f729Sjoerg assert(EndLoc.isFileID());
1627330f729Sjoerg assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
1637330f729Sjoerg assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
1647330f729Sjoerg
1657330f729Sjoerg std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
1667330f729Sjoerg End = SM.getFileOffset(EndLoc);
1677330f729Sjoerg }
1687330f729Sjoerg
operator <<(llvm::raw_ostream & OS,const FileRange & R)1697330f729Sjoerg llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
1707330f729Sjoerg const FileRange &R) {
1717330f729Sjoerg return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
1727330f729Sjoerg R.file().getHashValue(), R.beginOffset(),
1737330f729Sjoerg R.endOffset());
1747330f729Sjoerg }
1757330f729Sjoerg
text(const SourceManager & SM) const1767330f729Sjoerg llvm::StringRef FileRange::text(const SourceManager &SM) const {
1777330f729Sjoerg bool Invalid = false;
1787330f729Sjoerg StringRef Text = SM.getBufferData(File, &Invalid);
1797330f729Sjoerg if (Invalid)
1807330f729Sjoerg return "";
1817330f729Sjoerg assert(Begin <= Text.size());
1827330f729Sjoerg assert(End <= Text.size());
1837330f729Sjoerg return Text.substr(Begin, length());
1847330f729Sjoerg }
1857330f729Sjoerg
indexExpandedTokens()186*e038c9c4Sjoerg void TokenBuffer::indexExpandedTokens() {
187*e038c9c4Sjoerg // No-op if the index is already created.
188*e038c9c4Sjoerg if (!ExpandedTokIndex.empty())
189*e038c9c4Sjoerg return;
190*e038c9c4Sjoerg ExpandedTokIndex.reserve(ExpandedTokens.size());
191*e038c9c4Sjoerg // Index ExpandedTokens for faster lookups by SourceLocation.
192*e038c9c4Sjoerg for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) {
193*e038c9c4Sjoerg SourceLocation Loc = ExpandedTokens[I].location();
194*e038c9c4Sjoerg if (Loc.isValid())
195*e038c9c4Sjoerg ExpandedTokIndex[Loc] = I;
196*e038c9c4Sjoerg }
197*e038c9c4Sjoerg }
198*e038c9c4Sjoerg
expandedTokens(SourceRange R) const199*e038c9c4Sjoerg llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
200*e038c9c4Sjoerg if (R.isInvalid())
201*e038c9c4Sjoerg return {};
202*e038c9c4Sjoerg if (!ExpandedTokIndex.empty()) {
203*e038c9c4Sjoerg // Quick lookup if `R` is a token range.
204*e038c9c4Sjoerg // This is a huge win since majority of the users use ranges provided by an
205*e038c9c4Sjoerg // AST. Ranges in AST are token ranges from expanded token stream.
206*e038c9c4Sjoerg const auto B = ExpandedTokIndex.find(R.getBegin());
207*e038c9c4Sjoerg const auto E = ExpandedTokIndex.find(R.getEnd());
208*e038c9c4Sjoerg if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
209*e038c9c4Sjoerg const Token *L = ExpandedTokens.data() + B->getSecond();
210*e038c9c4Sjoerg // Add 1 to End to make a half-open range.
211*e038c9c4Sjoerg const Token *R = ExpandedTokens.data() + E->getSecond() + 1;
212*e038c9c4Sjoerg if (L > R)
213*e038c9c4Sjoerg return {};
214*e038c9c4Sjoerg return {L, R};
215*e038c9c4Sjoerg }
216*e038c9c4Sjoerg }
217*e038c9c4Sjoerg // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
218*e038c9c4Sjoerg // required range.
219*e038c9c4Sjoerg return getTokensCovering(expandedTokens(), R, *SourceMgr);
220*e038c9c4Sjoerg }
221*e038c9c4Sjoerg
toCharRange(const SourceManager & SM) const222*e038c9c4Sjoerg CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
223*e038c9c4Sjoerg return CharSourceRange(
224*e038c9c4Sjoerg SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
225*e038c9c4Sjoerg /*IsTokenRange=*/false);
226*e038c9c4Sjoerg }
227*e038c9c4Sjoerg
2287330f729Sjoerg std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
spelledForExpandedToken(const syntax::Token * Expanded) const2297330f729Sjoerg TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
2307330f729Sjoerg assert(Expanded);
2317330f729Sjoerg assert(ExpandedTokens.data() <= Expanded &&
2327330f729Sjoerg Expanded < ExpandedTokens.data() + ExpandedTokens.size());
2337330f729Sjoerg
2347330f729Sjoerg auto FileIt = Files.find(
2357330f729Sjoerg SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
2367330f729Sjoerg assert(FileIt != Files.end() && "no file for an expanded token");
2377330f729Sjoerg
2387330f729Sjoerg const MarkedFile &File = FileIt->second;
2397330f729Sjoerg
2407330f729Sjoerg unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
2417330f729Sjoerg // Find the first mapping that produced tokens after \p Expanded.
2427330f729Sjoerg auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
2437330f729Sjoerg return M.BeginExpanded <= ExpandedIndex;
2447330f729Sjoerg });
2457330f729Sjoerg // Our token could only be produced by the previous mapping.
2467330f729Sjoerg if (It == File.Mappings.begin()) {
2477330f729Sjoerg // No previous mapping, no need to modify offsets.
248*e038c9c4Sjoerg return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
249*e038c9c4Sjoerg /*Mapping=*/nullptr};
2507330f729Sjoerg }
2517330f729Sjoerg --It; // 'It' now points to last mapping that started before our token.
2527330f729Sjoerg
2537330f729Sjoerg // Check if the token is part of the mapping.
2547330f729Sjoerg if (ExpandedIndex < It->EndExpanded)
255*e038c9c4Sjoerg return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
2567330f729Sjoerg
2577330f729Sjoerg // Not part of the mapping, use the index from previous mapping to compute the
2587330f729Sjoerg // corresponding spelled token.
2597330f729Sjoerg return {
2607330f729Sjoerg &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
261*e038c9c4Sjoerg /*Mapping=*/nullptr};
262*e038c9c4Sjoerg }
263*e038c9c4Sjoerg
264*e038c9c4Sjoerg const TokenBuffer::Mapping *
mappingStartingBeforeSpelled(const MarkedFile & F,const syntax::Token * Spelled)265*e038c9c4Sjoerg TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
266*e038c9c4Sjoerg const syntax::Token *Spelled) {
267*e038c9c4Sjoerg assert(F.SpelledTokens.data() <= Spelled);
268*e038c9c4Sjoerg unsigned SpelledI = Spelled - F.SpelledTokens.data();
269*e038c9c4Sjoerg assert(SpelledI < F.SpelledTokens.size());
270*e038c9c4Sjoerg
271*e038c9c4Sjoerg auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
272*e038c9c4Sjoerg return M.BeginSpelled <= SpelledI;
273*e038c9c4Sjoerg });
274*e038c9c4Sjoerg if (It == F.Mappings.begin())
275*e038c9c4Sjoerg return nullptr;
276*e038c9c4Sjoerg --It;
277*e038c9c4Sjoerg return &*It;
278*e038c9c4Sjoerg }
279*e038c9c4Sjoerg
280*e038c9c4Sjoerg llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const281*e038c9c4Sjoerg TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
282*e038c9c4Sjoerg if (Spelled.empty())
283*e038c9c4Sjoerg return {};
284*e038c9c4Sjoerg const auto &File = fileForSpelled(Spelled);
285*e038c9c4Sjoerg
286*e038c9c4Sjoerg auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
287*e038c9c4Sjoerg unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
288*e038c9c4Sjoerg assert(SpelledFrontI < File.SpelledTokens.size());
289*e038c9c4Sjoerg unsigned ExpandedBegin;
290*e038c9c4Sjoerg if (!FrontMapping) {
291*e038c9c4Sjoerg // No mapping that starts before the first token of Spelled, we don't have
292*e038c9c4Sjoerg // to modify offsets.
293*e038c9c4Sjoerg ExpandedBegin = File.BeginExpanded + SpelledFrontI;
294*e038c9c4Sjoerg } else if (SpelledFrontI < FrontMapping->EndSpelled) {
295*e038c9c4Sjoerg // This mapping applies to Spelled tokens.
296*e038c9c4Sjoerg if (SpelledFrontI != FrontMapping->BeginSpelled) {
297*e038c9c4Sjoerg // Spelled tokens don't cover the entire mapping, returning empty result.
298*e038c9c4Sjoerg return {}; // FIXME: support macro arguments.
299*e038c9c4Sjoerg }
300*e038c9c4Sjoerg // Spelled tokens start at the beginning of this mapping.
301*e038c9c4Sjoerg ExpandedBegin = FrontMapping->BeginExpanded;
302*e038c9c4Sjoerg } else {
303*e038c9c4Sjoerg // Spelled tokens start after the mapping ends (they start in the hole
304*e038c9c4Sjoerg // between 2 mappings, or between a mapping and end of the file).
305*e038c9c4Sjoerg ExpandedBegin =
306*e038c9c4Sjoerg FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
307*e038c9c4Sjoerg }
308*e038c9c4Sjoerg
309*e038c9c4Sjoerg auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
310*e038c9c4Sjoerg unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
311*e038c9c4Sjoerg unsigned ExpandedEnd;
312*e038c9c4Sjoerg if (!BackMapping) {
313*e038c9c4Sjoerg // No mapping that starts before the last token of Spelled, we don't have to
314*e038c9c4Sjoerg // modify offsets.
315*e038c9c4Sjoerg ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
316*e038c9c4Sjoerg } else if (SpelledBackI < BackMapping->EndSpelled) {
317*e038c9c4Sjoerg // This mapping applies to Spelled tokens.
318*e038c9c4Sjoerg if (SpelledBackI + 1 != BackMapping->EndSpelled) {
319*e038c9c4Sjoerg // Spelled tokens don't cover the entire mapping, returning empty result.
320*e038c9c4Sjoerg return {}; // FIXME: support macro arguments.
321*e038c9c4Sjoerg }
322*e038c9c4Sjoerg ExpandedEnd = BackMapping->EndExpanded;
323*e038c9c4Sjoerg } else {
324*e038c9c4Sjoerg // Spelled tokens end after the mapping ends.
325*e038c9c4Sjoerg ExpandedEnd =
326*e038c9c4Sjoerg BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
327*e038c9c4Sjoerg }
328*e038c9c4Sjoerg
329*e038c9c4Sjoerg assert(ExpandedBegin < ExpandedTokens.size());
330*e038c9c4Sjoerg assert(ExpandedEnd < ExpandedTokens.size());
331*e038c9c4Sjoerg // Avoid returning empty ranges.
332*e038c9c4Sjoerg if (ExpandedBegin == ExpandedEnd)
333*e038c9c4Sjoerg return {};
334*e038c9c4Sjoerg return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
335*e038c9c4Sjoerg ExpandedTokens.data() + ExpandedEnd)};
3367330f729Sjoerg }
3377330f729Sjoerg
spelledTokens(FileID FID) const3387330f729Sjoerg llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
3397330f729Sjoerg auto It = Files.find(FID);
3407330f729Sjoerg assert(It != Files.end());
3417330f729Sjoerg return It->second.SpelledTokens;
3427330f729Sjoerg }
3437330f729Sjoerg
spelledTokenAt(SourceLocation Loc) const344*e038c9c4Sjoerg const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
345*e038c9c4Sjoerg assert(Loc.isFileID());
346*e038c9c4Sjoerg const auto *Tok = llvm::partition_point(
347*e038c9c4Sjoerg spelledTokens(SourceMgr->getFileID(Loc)),
348*e038c9c4Sjoerg [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
349*e038c9c4Sjoerg if (!Tok || Tok->location() != Loc)
350*e038c9c4Sjoerg return nullptr;
351*e038c9c4Sjoerg return Tok;
352*e038c9c4Sjoerg }
353*e038c9c4Sjoerg
str() const3547330f729Sjoerg std::string TokenBuffer::Mapping::str() const {
355*e038c9c4Sjoerg return std::string(
356*e038c9c4Sjoerg llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
357*e038c9c4Sjoerg BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
3587330f729Sjoerg }
3597330f729Sjoerg
3607330f729Sjoerg llvm::Optional<llvm::ArrayRef<syntax::Token>>
spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const3617330f729Sjoerg TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
3627330f729Sjoerg // Mapping an empty range is ambiguous in case of empty mappings at either end
3637330f729Sjoerg // of the range, bail out in that case.
3647330f729Sjoerg if (Expanded.empty())
3657330f729Sjoerg return llvm::None;
3667330f729Sjoerg
3677330f729Sjoerg const syntax::Token *BeginSpelled;
3687330f729Sjoerg const Mapping *BeginMapping;
3697330f729Sjoerg std::tie(BeginSpelled, BeginMapping) =
3707330f729Sjoerg spelledForExpandedToken(&Expanded.front());
3717330f729Sjoerg
3727330f729Sjoerg const syntax::Token *LastSpelled;
3737330f729Sjoerg const Mapping *LastMapping;
3747330f729Sjoerg std::tie(LastSpelled, LastMapping) =
3757330f729Sjoerg spelledForExpandedToken(&Expanded.back());
3767330f729Sjoerg
3777330f729Sjoerg FileID FID = SourceMgr->getFileID(BeginSpelled->location());
3787330f729Sjoerg // FIXME: Handle multi-file changes by trying to map onto a common root.
3797330f729Sjoerg if (FID != SourceMgr->getFileID(LastSpelled->location()))
3807330f729Sjoerg return llvm::None;
3817330f729Sjoerg
3827330f729Sjoerg const MarkedFile &File = Files.find(FID)->second;
3837330f729Sjoerg
384*e038c9c4Sjoerg // If both tokens are coming from a macro argument expansion, try and map to
385*e038c9c4Sjoerg // smallest part of the macro argument. BeginMapping && LastMapping check is
386*e038c9c4Sjoerg // only for performance, they are a prerequisite for Expanded.front() and
387*e038c9c4Sjoerg // Expanded.back() being part of a macro arg expansion.
388*e038c9c4Sjoerg if (BeginMapping && LastMapping &&
389*e038c9c4Sjoerg SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
390*e038c9c4Sjoerg SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
391*e038c9c4Sjoerg auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
392*e038c9c4Sjoerg Expanded.back(), *SourceMgr);
393*e038c9c4Sjoerg // It might be the case that tokens are arguments of different macro calls,
394*e038c9c4Sjoerg // in that case we should continue with the logic below instead of returning
395*e038c9c4Sjoerg // an empty range.
396*e038c9c4Sjoerg if (CommonRange.isValid())
397*e038c9c4Sjoerg return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
398*e038c9c4Sjoerg }
399*e038c9c4Sjoerg
400*e038c9c4Sjoerg // Do not allow changes that doesn't cover full expansion.
4017330f729Sjoerg unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
4027330f729Sjoerg unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
403*e038c9c4Sjoerg if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded)
4047330f729Sjoerg return llvm::None;
405*e038c9c4Sjoerg if (LastMapping && LastMapping->EndExpanded != EndExpanded)
4067330f729Sjoerg return llvm::None;
4077330f729Sjoerg // All is good, return the result.
4087330f729Sjoerg return llvm::makeArrayRef(
4097330f729Sjoerg BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
4107330f729Sjoerg : BeginSpelled,
4117330f729Sjoerg LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
4127330f729Sjoerg : LastSpelled + 1);
4137330f729Sjoerg }
4147330f729Sjoerg
makeExpansion(const MarkedFile & F,const Mapping & M) const415*e038c9c4Sjoerg TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
416*e038c9c4Sjoerg const Mapping &M) const {
417*e038c9c4Sjoerg Expansion E;
418*e038c9c4Sjoerg E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
419*e038c9c4Sjoerg F.SpelledTokens.data() + M.EndSpelled);
420*e038c9c4Sjoerg E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded,
421*e038c9c4Sjoerg ExpandedTokens.data() + M.EndExpanded);
422*e038c9c4Sjoerg return E;
423*e038c9c4Sjoerg }
424*e038c9c4Sjoerg
425*e038c9c4Sjoerg const TokenBuffer::MarkedFile &
fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const426*e038c9c4Sjoerg TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
427*e038c9c4Sjoerg assert(!Spelled.empty());
428*e038c9c4Sjoerg assert(Spelled.front().location().isFileID() && "not a spelled token");
429*e038c9c4Sjoerg auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
430*e038c9c4Sjoerg assert(FileIt != Files.end() && "file not tracked by token buffer");
431*e038c9c4Sjoerg const auto &File = FileIt->second;
432*e038c9c4Sjoerg assert(File.SpelledTokens.data() <= Spelled.data() &&
433*e038c9c4Sjoerg Spelled.end() <=
434*e038c9c4Sjoerg (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
435*e038c9c4Sjoerg "Tokens not in spelled range");
436*e038c9c4Sjoerg #ifndef NDEBUG
437*e038c9c4Sjoerg auto T1 = Spelled.back().location();
438*e038c9c4Sjoerg auto T2 = File.SpelledTokens.back().location();
439*e038c9c4Sjoerg assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
440*e038c9c4Sjoerg #endif
441*e038c9c4Sjoerg return File;
442*e038c9c4Sjoerg }
443*e038c9c4Sjoerg
4447330f729Sjoerg llvm::Optional<TokenBuffer::Expansion>
expansionStartingAt(const syntax::Token * Spelled) const4457330f729Sjoerg TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
4467330f729Sjoerg assert(Spelled);
447*e038c9c4Sjoerg const auto &File = fileForSpelled(*Spelled);
4487330f729Sjoerg
4497330f729Sjoerg unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
4507330f729Sjoerg auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
4517330f729Sjoerg return M.BeginSpelled < SpelledIndex;
4527330f729Sjoerg });
4537330f729Sjoerg if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
4547330f729Sjoerg return llvm::None;
455*e038c9c4Sjoerg return makeExpansion(File, *M);
456*e038c9c4Sjoerg }
4577330f729Sjoerg
expansionsOverlapping(llvm::ArrayRef<syntax::Token> Spelled) const458*e038c9c4Sjoerg std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
459*e038c9c4Sjoerg llvm::ArrayRef<syntax::Token> Spelled) const {
460*e038c9c4Sjoerg if (Spelled.empty())
461*e038c9c4Sjoerg return {};
462*e038c9c4Sjoerg const auto &File = fileForSpelled(Spelled);
463*e038c9c4Sjoerg
464*e038c9c4Sjoerg // Find the first overlapping range, and then copy until we stop overlapping.
465*e038c9c4Sjoerg unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
466*e038c9c4Sjoerg unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
467*e038c9c4Sjoerg auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
468*e038c9c4Sjoerg return M.EndSpelled <= SpelledBeginIndex;
469*e038c9c4Sjoerg });
470*e038c9c4Sjoerg std::vector<TokenBuffer::Expansion> Expansions;
471*e038c9c4Sjoerg for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M)
472*e038c9c4Sjoerg Expansions.push_back(makeExpansion(File, *M));
473*e038c9c4Sjoerg return Expansions;
474*e038c9c4Sjoerg }
475*e038c9c4Sjoerg
476*e038c9c4Sjoerg llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)477*e038c9c4Sjoerg syntax::spelledTokensTouching(SourceLocation Loc,
478*e038c9c4Sjoerg llvm::ArrayRef<syntax::Token> Tokens) {
479*e038c9c4Sjoerg assert(Loc.isFileID());
480*e038c9c4Sjoerg
481*e038c9c4Sjoerg auto *Right = llvm::partition_point(
482*e038c9c4Sjoerg Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
483*e038c9c4Sjoerg bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
484*e038c9c4Sjoerg bool AcceptLeft =
485*e038c9c4Sjoerg Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
486*e038c9c4Sjoerg return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
487*e038c9c4Sjoerg Right + (AcceptRight ? 1 : 0));
488*e038c9c4Sjoerg }
489*e038c9c4Sjoerg
490*e038c9c4Sjoerg llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)491*e038c9c4Sjoerg syntax::spelledTokensTouching(SourceLocation Loc,
492*e038c9c4Sjoerg const syntax::TokenBuffer &Tokens) {
493*e038c9c4Sjoerg return spelledTokensTouching(
494*e038c9c4Sjoerg Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
495*e038c9c4Sjoerg }
496*e038c9c4Sjoerg
497*e038c9c4Sjoerg const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)498*e038c9c4Sjoerg syntax::spelledIdentifierTouching(SourceLocation Loc,
499*e038c9c4Sjoerg llvm::ArrayRef<syntax::Token> Tokens) {
500*e038c9c4Sjoerg for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
501*e038c9c4Sjoerg if (Tok.kind() == tok::identifier)
502*e038c9c4Sjoerg return &Tok;
503*e038c9c4Sjoerg }
504*e038c9c4Sjoerg return nullptr;
505*e038c9c4Sjoerg }
506*e038c9c4Sjoerg
507*e038c9c4Sjoerg const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)508*e038c9c4Sjoerg syntax::spelledIdentifierTouching(SourceLocation Loc,
509*e038c9c4Sjoerg const syntax::TokenBuffer &Tokens) {
510*e038c9c4Sjoerg return spelledIdentifierTouching(
511*e038c9c4Sjoerg Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
5127330f729Sjoerg }
5137330f729Sjoerg
5147330f729Sjoerg std::vector<const syntax::Token *>
macroExpansions(FileID FID) const5157330f729Sjoerg TokenBuffer::macroExpansions(FileID FID) const {
5167330f729Sjoerg auto FileIt = Files.find(FID);
5177330f729Sjoerg assert(FileIt != Files.end() && "file not tracked by token buffer");
5187330f729Sjoerg auto &File = FileIt->second;
5197330f729Sjoerg std::vector<const syntax::Token *> Expansions;
5207330f729Sjoerg auto &Spelled = File.SpelledTokens;
5217330f729Sjoerg for (auto Mapping : File.Mappings) {
5227330f729Sjoerg const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
5237330f729Sjoerg if (Token->kind() == tok::TokenKind::identifier)
5247330f729Sjoerg Expansions.push_back(Token);
5257330f729Sjoerg }
5267330f729Sjoerg return Expansions;
5277330f729Sjoerg }
5287330f729Sjoerg
tokenize(const FileRange & FR,const SourceManager & SM,const LangOptions & LO)529*e038c9c4Sjoerg std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
530*e038c9c4Sjoerg const SourceManager &SM,
5317330f729Sjoerg const LangOptions &LO) {
5327330f729Sjoerg std::vector<syntax::Token> Tokens;
5337330f729Sjoerg IdentifierTable Identifiers(LO);
5347330f729Sjoerg auto AddToken = [&](clang::Token T) {
5357330f729Sjoerg // Fill the proper token kind for keywords, etc.
5367330f729Sjoerg if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
5377330f729Sjoerg !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
5387330f729Sjoerg clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
5397330f729Sjoerg T.setIdentifierInfo(&II);
5407330f729Sjoerg T.setKind(II.getTokenID());
5417330f729Sjoerg }
5427330f729Sjoerg Tokens.push_back(syntax::Token(T));
5437330f729Sjoerg };
5447330f729Sjoerg
545*e038c9c4Sjoerg auto SrcBuffer = SM.getBufferData(FR.file());
546*e038c9c4Sjoerg Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
547*e038c9c4Sjoerg SrcBuffer.data() + FR.beginOffset(),
548*e038c9c4Sjoerg // We can't make BufEnd point to FR.endOffset, as Lexer requires a
549*e038c9c4Sjoerg // null terminated buffer.
550*e038c9c4Sjoerg SrcBuffer.data() + SrcBuffer.size());
5517330f729Sjoerg
5527330f729Sjoerg clang::Token T;
553*e038c9c4Sjoerg while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
5547330f729Sjoerg AddToken(T);
555*e038c9c4Sjoerg // LexFromRawLexer returns true when it parses the last token of the file, add
556*e038c9c4Sjoerg // it iff it starts within the range we are interested in.
557*e038c9c4Sjoerg if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
5587330f729Sjoerg AddToken(T);
5597330f729Sjoerg return Tokens;
5607330f729Sjoerg }
5617330f729Sjoerg
tokenize(FileID FID,const SourceManager & SM,const LangOptions & LO)562*e038c9c4Sjoerg std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
563*e038c9c4Sjoerg const LangOptions &LO) {
564*e038c9c4Sjoerg return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
565*e038c9c4Sjoerg }
566*e038c9c4Sjoerg
5677330f729Sjoerg /// Records information reqired to construct mappings for the token buffer that
5687330f729Sjoerg /// we are collecting.
5697330f729Sjoerg class TokenCollector::CollectPPExpansions : public PPCallbacks {
5707330f729Sjoerg public:
CollectPPExpansions(TokenCollector & C)5717330f729Sjoerg CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
5727330f729Sjoerg
5737330f729Sjoerg /// Disabled instance will stop reporting anything to TokenCollector.
5747330f729Sjoerg /// This ensures that uses of the preprocessor after TokenCollector::consume()
5757330f729Sjoerg /// is called do not access the (possibly invalid) collector instance.
disable()5767330f729Sjoerg void disable() { Collector = nullptr; }
5777330f729Sjoerg
MacroExpands(const clang::Token & MacroNameTok,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)5787330f729Sjoerg void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
5797330f729Sjoerg SourceRange Range, const MacroArgs *Args) override {
5807330f729Sjoerg if (!Collector)
5817330f729Sjoerg return;
582*e038c9c4Sjoerg const auto &SM = Collector->PP.getSourceManager();
583*e038c9c4Sjoerg // Only record top-level expansions that directly produce expanded tokens.
584*e038c9c4Sjoerg // This excludes those where:
5857330f729Sjoerg // - the macro use is inside a macro body,
5867330f729Sjoerg // - the macro appears in an argument to another macro.
587*e038c9c4Sjoerg // However macro expansion isn't really a tree, it's token rewrite rules,
588*e038c9c4Sjoerg // so there are other cases, e.g.
589*e038c9c4Sjoerg // #define B(X) X
590*e038c9c4Sjoerg // #define A 1 + B
591*e038c9c4Sjoerg // A(2)
592*e038c9c4Sjoerg // Both A and B produce expanded tokens, though the macro name 'B' comes
593*e038c9c4Sjoerg // from an expansion. The best we can do is merge the mappings for both.
594*e038c9c4Sjoerg
595*e038c9c4Sjoerg // The *last* token of any top-level macro expansion must be in a file.
596*e038c9c4Sjoerg // (In the example above, see the closing paren of the expansion of B).
597*e038c9c4Sjoerg if (!Range.getEnd().isFileID())
5987330f729Sjoerg return;
599*e038c9c4Sjoerg // If there's a current expansion that encloses this one, this one can't be
600*e038c9c4Sjoerg // top-level.
601*e038c9c4Sjoerg if (LastExpansionEnd.isValid() &&
602*e038c9c4Sjoerg !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
603*e038c9c4Sjoerg return;
604*e038c9c4Sjoerg
605*e038c9c4Sjoerg // If the macro invocation (B) starts in a macro (A) but ends in a file,
606*e038c9c4Sjoerg // we'll create a merged mapping for A + B by overwriting the endpoint for
607*e038c9c4Sjoerg // A's startpoint.
608*e038c9c4Sjoerg if (!Range.getBegin().isFileID()) {
609*e038c9c4Sjoerg Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
610*e038c9c4Sjoerg assert(Collector->Expansions.count(Range.getBegin()) &&
611*e038c9c4Sjoerg "Overlapping macros should have same expansion location");
612*e038c9c4Sjoerg }
613*e038c9c4Sjoerg
614*e038c9c4Sjoerg Collector->Expansions[Range.getBegin()] = Range.getEnd();
6157330f729Sjoerg LastExpansionEnd = Range.getEnd();
6167330f729Sjoerg }
6177330f729Sjoerg // FIXME: handle directives like #pragma, #include, etc.
6187330f729Sjoerg private:
6197330f729Sjoerg TokenCollector *Collector;
6207330f729Sjoerg /// Used to detect recursive macro expansions.
6217330f729Sjoerg SourceLocation LastExpansionEnd;
6227330f729Sjoerg };
6237330f729Sjoerg
6247330f729Sjoerg /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
6257330f729Sjoerg /// implementation tracks the tokens, macro expansions and directives coming
6267330f729Sjoerg /// from the preprocessor and:
6277330f729Sjoerg /// - for each token, figures out if it is a part of an expanded token stream,
6287330f729Sjoerg /// spelled token stream or both. Stores the tokens appropriately.
6297330f729Sjoerg /// - records mappings from the spelled to expanded token ranges, e.g. for macro
6307330f729Sjoerg /// expansions.
6317330f729Sjoerg /// FIXME: also properly record:
6327330f729Sjoerg /// - #include directives,
6337330f729Sjoerg /// - #pragma, #line and other PP directives,
6347330f729Sjoerg /// - skipped pp regions,
6357330f729Sjoerg /// - ...
6367330f729Sjoerg
TokenCollector(Preprocessor & PP)6377330f729Sjoerg TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
6387330f729Sjoerg // Collect the expanded token stream during preprocessing.
6397330f729Sjoerg PP.setTokenWatcher([this](const clang::Token &T) {
6407330f729Sjoerg if (T.isAnnotation())
6417330f729Sjoerg return;
6427330f729Sjoerg DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
6437330f729Sjoerg << "Token: "
6447330f729Sjoerg << syntax::Token(T).dumpForTests(
6457330f729Sjoerg this->PP.getSourceManager())
6467330f729Sjoerg << "\n"
6477330f729Sjoerg
6487330f729Sjoerg );
6497330f729Sjoerg Expanded.push_back(syntax::Token(T));
6507330f729Sjoerg });
6517330f729Sjoerg // And locations of macro calls, to properly recover boundaries of those in
6527330f729Sjoerg // case of empty expansions.
6537330f729Sjoerg auto CB = std::make_unique<CollectPPExpansions>(*this);
6547330f729Sjoerg this->Collector = CB.get();
6557330f729Sjoerg PP.addPPCallbacks(std::move(CB));
6567330f729Sjoerg }
6577330f729Sjoerg
6587330f729Sjoerg /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
6597330f729Sjoerg /// token stream.
6607330f729Sjoerg class TokenCollector::Builder {
6617330f729Sjoerg public:
Builder(std::vector<syntax::Token> Expanded,PPExpansions CollectedExpansions,const SourceManager & SM,const LangOptions & LangOpts)6627330f729Sjoerg Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
6637330f729Sjoerg const SourceManager &SM, const LangOptions &LangOpts)
6647330f729Sjoerg : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
6657330f729Sjoerg LangOpts(LangOpts) {
6667330f729Sjoerg Result.ExpandedTokens = std::move(Expanded);
6677330f729Sjoerg }
6687330f729Sjoerg
build()6697330f729Sjoerg TokenBuffer build() && {
6707330f729Sjoerg assert(!Result.ExpandedTokens.empty());
6717330f729Sjoerg assert(Result.ExpandedTokens.back().kind() == tok::eof);
672*e038c9c4Sjoerg
673*e038c9c4Sjoerg // Tokenize every file that contributed tokens to the expanded stream.
674*e038c9c4Sjoerg buildSpelledTokens();
675*e038c9c4Sjoerg
676*e038c9c4Sjoerg // The expanded token stream consists of runs of tokens that came from
677*e038c9c4Sjoerg // the same source (a macro expansion, part of a file etc).
678*e038c9c4Sjoerg // Between these runs are the logical positions of spelled tokens that
679*e038c9c4Sjoerg // didn't expand to anything.
680*e038c9c4Sjoerg while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
681*e038c9c4Sjoerg // Create empty mappings for spelled tokens that expanded to nothing here.
682*e038c9c4Sjoerg // May advance NextSpelled, but NextExpanded is unchanged.
683*e038c9c4Sjoerg discard();
684*e038c9c4Sjoerg // Create mapping for a contiguous run of expanded tokens.
685*e038c9c4Sjoerg // Advances NextExpanded past the run, and NextSpelled accordingly.
686*e038c9c4Sjoerg unsigned OldPosition = NextExpanded;
687*e038c9c4Sjoerg advance();
688*e038c9c4Sjoerg if (NextExpanded == OldPosition)
689*e038c9c4Sjoerg diagnoseAdvanceFailure();
6907330f729Sjoerg }
691*e038c9c4Sjoerg // If any tokens remain in any of the files, they didn't expand to anything.
692*e038c9c4Sjoerg // Create empty mappings up until the end of the file.
693*e038c9c4Sjoerg for (const auto &File : Result.Files)
694*e038c9c4Sjoerg discard(File.first);
6957330f729Sjoerg
696*e038c9c4Sjoerg #ifndef NDEBUG
697*e038c9c4Sjoerg for (auto &pair : Result.Files) {
698*e038c9c4Sjoerg auto &mappings = pair.second.Mappings;
699*e038c9c4Sjoerg assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
700*e038c9c4Sjoerg const TokenBuffer::Mapping &M2) {
701*e038c9c4Sjoerg return M1.BeginSpelled < M2.BeginSpelled &&
702*e038c9c4Sjoerg M1.EndSpelled < M2.EndSpelled &&
703*e038c9c4Sjoerg M1.BeginExpanded < M2.BeginExpanded &&
704*e038c9c4Sjoerg M1.EndExpanded < M2.EndExpanded;
705*e038c9c4Sjoerg }));
706*e038c9c4Sjoerg }
707*e038c9c4Sjoerg #endif
7087330f729Sjoerg
7097330f729Sjoerg return std::move(Result);
7107330f729Sjoerg }
7117330f729Sjoerg
7127330f729Sjoerg private:
713*e038c9c4Sjoerg // Consume a sequence of spelled tokens that didn't expand to anything.
714*e038c9c4Sjoerg // In the simplest case, skips spelled tokens until finding one that produced
715*e038c9c4Sjoerg // the NextExpanded token, and creates an empty mapping for them.
716*e038c9c4Sjoerg // If Drain is provided, skips remaining tokens from that file instead.
discard(llvm::Optional<FileID> Drain=llvm::None)717*e038c9c4Sjoerg void discard(llvm::Optional<FileID> Drain = llvm::None) {
718*e038c9c4Sjoerg SourceLocation Target =
719*e038c9c4Sjoerg Drain ? SM.getLocForEndOfFile(*Drain)
720*e038c9c4Sjoerg : SM.getExpansionLoc(
721*e038c9c4Sjoerg Result.ExpandedTokens[NextExpanded].location());
722*e038c9c4Sjoerg FileID File = SM.getFileID(Target);
723*e038c9c4Sjoerg const auto &SpelledTokens = Result.Files[File].SpelledTokens;
724*e038c9c4Sjoerg auto &NextSpelled = this->NextSpelled[File];
725*e038c9c4Sjoerg
726*e038c9c4Sjoerg TokenBuffer::Mapping Mapping;
727*e038c9c4Sjoerg Mapping.BeginSpelled = NextSpelled;
728*e038c9c4Sjoerg // When dropping trailing tokens from a file, the empty mapping should
729*e038c9c4Sjoerg // be positioned within the file's expanded-token range (at the end).
730*e038c9c4Sjoerg Mapping.BeginExpanded = Mapping.EndExpanded =
731*e038c9c4Sjoerg Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
732*e038c9c4Sjoerg // We may want to split into several adjacent empty mappings.
733*e038c9c4Sjoerg // FlushMapping() emits the current mapping and starts a new one.
734*e038c9c4Sjoerg auto FlushMapping = [&, this] {
735*e038c9c4Sjoerg Mapping.EndSpelled = NextSpelled;
736*e038c9c4Sjoerg if (Mapping.BeginSpelled != Mapping.EndSpelled)
737*e038c9c4Sjoerg Result.Files[File].Mappings.push_back(Mapping);
738*e038c9c4Sjoerg Mapping.BeginSpelled = NextSpelled;
739*e038c9c4Sjoerg };
740*e038c9c4Sjoerg
741*e038c9c4Sjoerg while (NextSpelled < SpelledTokens.size() &&
742*e038c9c4Sjoerg SpelledTokens[NextSpelled].location() < Target) {
743*e038c9c4Sjoerg // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
744*e038c9c4Sjoerg // then we want to partition our (empty) mapping.
745*e038c9c4Sjoerg // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
746*e038c9c4Sjoerg SourceLocation KnownEnd =
747*e038c9c4Sjoerg CollectedExpansions.lookup(SpelledTokens[NextSpelled].location());
748*e038c9c4Sjoerg if (KnownEnd.isValid()) {
749*e038c9c4Sjoerg FlushMapping(); // Emits [Start, NextSpelled)
750*e038c9c4Sjoerg while (NextSpelled < SpelledTokens.size() &&
751*e038c9c4Sjoerg SpelledTokens[NextSpelled].location() <= KnownEnd)
752*e038c9c4Sjoerg ++NextSpelled;
753*e038c9c4Sjoerg FlushMapping(); // Emits [NextSpelled, KnownEnd]
754*e038c9c4Sjoerg // Now the loop contitues and will emit (KnownEnd, Target).
755*e038c9c4Sjoerg } else {
756*e038c9c4Sjoerg ++NextSpelled;
7577330f729Sjoerg }
758*e038c9c4Sjoerg }
759*e038c9c4Sjoerg FlushMapping();
760*e038c9c4Sjoerg }
7617330f729Sjoerg
762*e038c9c4Sjoerg // Consumes the NextExpanded token and others that are part of the same run.
763*e038c9c4Sjoerg // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
764*e038c9c4Sjoerg // (unless this is a run of file tokens, which we represent with no mapping).
advance()765*e038c9c4Sjoerg void advance() {
766*e038c9c4Sjoerg const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
767*e038c9c4Sjoerg SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
768*e038c9c4Sjoerg FileID File = SM.getFileID(Expansion);
769*e038c9c4Sjoerg const auto &SpelledTokens = Result.Files[File].SpelledTokens;
770*e038c9c4Sjoerg auto &NextSpelled = this->NextSpelled[File];
7717330f729Sjoerg
772*e038c9c4Sjoerg if (Tok.location().isFileID()) {
773*e038c9c4Sjoerg // A run of file tokens continues while the expanded/spelled tokens match.
774*e038c9c4Sjoerg while (NextSpelled < SpelledTokens.size() &&
775*e038c9c4Sjoerg NextExpanded < Result.ExpandedTokens.size() &&
776*e038c9c4Sjoerg SpelledTokens[NextSpelled].location() ==
777*e038c9c4Sjoerg Result.ExpandedTokens[NextExpanded].location()) {
778*e038c9c4Sjoerg ++NextSpelled;
779*e038c9c4Sjoerg ++NextExpanded;
780*e038c9c4Sjoerg }
781*e038c9c4Sjoerg // We need no mapping for file tokens copied to the expanded stream.
782*e038c9c4Sjoerg } else {
783*e038c9c4Sjoerg // We found a new macro expansion. We should have its spelling bounds.
784*e038c9c4Sjoerg auto End = CollectedExpansions.lookup(Expansion);
785*e038c9c4Sjoerg assert(End.isValid() && "Macro expansion wasn't captured?");
786*e038c9c4Sjoerg
787*e038c9c4Sjoerg // Mapping starts here...
788*e038c9c4Sjoerg TokenBuffer::Mapping Mapping;
789*e038c9c4Sjoerg Mapping.BeginExpanded = NextExpanded;
790*e038c9c4Sjoerg Mapping.BeginSpelled = NextSpelled;
791*e038c9c4Sjoerg // ... consumes spelled tokens within bounds we captured ...
792*e038c9c4Sjoerg while (NextSpelled < SpelledTokens.size() &&
793*e038c9c4Sjoerg SpelledTokens[NextSpelled].location() <= End)
794*e038c9c4Sjoerg ++NextSpelled;
795*e038c9c4Sjoerg // ... consumes expanded tokens rooted at the same expansion ...
796*e038c9c4Sjoerg while (NextExpanded < Result.ExpandedTokens.size() &&
797*e038c9c4Sjoerg SM.getExpansionLoc(
798*e038c9c4Sjoerg Result.ExpandedTokens[NextExpanded].location()) == Expansion)
799*e038c9c4Sjoerg ++NextExpanded;
800*e038c9c4Sjoerg // ... and ends here.
801*e038c9c4Sjoerg Mapping.EndExpanded = NextExpanded;
802*e038c9c4Sjoerg Mapping.EndSpelled = NextSpelled;
803*e038c9c4Sjoerg Result.Files[File].Mappings.push_back(Mapping);
8047330f729Sjoerg }
8057330f729Sjoerg }
8067330f729Sjoerg
807*e038c9c4Sjoerg // advance() is supposed to consume at least one token - if not, we crash.
diagnoseAdvanceFailure()808*e038c9c4Sjoerg void diagnoseAdvanceFailure() {
809*e038c9c4Sjoerg #ifndef NDEBUG
810*e038c9c4Sjoerg // Show the failed-to-map token in context.
811*e038c9c4Sjoerg for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
812*e038c9c4Sjoerg I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
813*e038c9c4Sjoerg const char *L =
814*e038c9c4Sjoerg (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " ";
815*e038c9c4Sjoerg llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
8167330f729Sjoerg }
817*e038c9c4Sjoerg #endif
818*e038c9c4Sjoerg llvm_unreachable("Couldn't map expanded token to spelled tokens!");
8197330f729Sjoerg }
8207330f729Sjoerg
8217330f729Sjoerg /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
8227330f729Sjoerg /// ranges for each of the files.
buildSpelledTokens()8237330f729Sjoerg void buildSpelledTokens() {
8247330f729Sjoerg for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
825*e038c9c4Sjoerg const auto &Tok = Result.ExpandedTokens[I];
826*e038c9c4Sjoerg auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
8277330f729Sjoerg auto It = Result.Files.try_emplace(FID);
8287330f729Sjoerg TokenBuffer::MarkedFile &File = It.first->second;
8297330f729Sjoerg
830*e038c9c4Sjoerg // The eof token should not be considered part of the main-file's range.
831*e038c9c4Sjoerg File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
832*e038c9c4Sjoerg
8337330f729Sjoerg if (!It.second)
8347330f729Sjoerg continue; // we have seen this file before.
8357330f729Sjoerg // This is the first time we see this file.
8367330f729Sjoerg File.BeginExpanded = I;
8377330f729Sjoerg File.SpelledTokens = tokenize(FID, SM, LangOpts);
8387330f729Sjoerg }
8397330f729Sjoerg }
8407330f729Sjoerg
8417330f729Sjoerg TokenBuffer Result;
842*e038c9c4Sjoerg unsigned NextExpanded = 0; // cursor in ExpandedTokens
843*e038c9c4Sjoerg llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
8447330f729Sjoerg PPExpansions CollectedExpansions;
8457330f729Sjoerg const SourceManager &SM;
8467330f729Sjoerg const LangOptions &LangOpts;
8477330f729Sjoerg };
8487330f729Sjoerg
consume()8497330f729Sjoerg TokenBuffer TokenCollector::consume() && {
8507330f729Sjoerg PP.setTokenWatcher(nullptr);
8517330f729Sjoerg Collector->disable();
8527330f729Sjoerg return Builder(std::move(Expanded), std::move(Expansions),
8537330f729Sjoerg PP.getSourceManager(), PP.getLangOpts())
8547330f729Sjoerg .build();
8557330f729Sjoerg }
8567330f729Sjoerg
str() const8577330f729Sjoerg std::string syntax::Token::str() const {
858*e038c9c4Sjoerg return std::string(llvm::formatv("Token({0}, length = {1})",
859*e038c9c4Sjoerg tok::getTokenName(kind()), length()));
8607330f729Sjoerg }
8617330f729Sjoerg
dumpForTests(const SourceManager & SM) const8627330f729Sjoerg std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
863*e038c9c4Sjoerg return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
864*e038c9c4Sjoerg tok::getTokenName(kind()), length()));
8657330f729Sjoerg }
8667330f729Sjoerg
dumpForTests() const8677330f729Sjoerg std::string TokenBuffer::dumpForTests() const {
8687330f729Sjoerg auto PrintToken = [this](const syntax::Token &T) -> std::string {
8697330f729Sjoerg if (T.kind() == tok::eof)
8707330f729Sjoerg return "<eof>";
871*e038c9c4Sjoerg return std::string(T.text(*SourceMgr));
8727330f729Sjoerg };
8737330f729Sjoerg
8747330f729Sjoerg auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
8757330f729Sjoerg llvm::ArrayRef<syntax::Token> Tokens) {
8767330f729Sjoerg if (Tokens.empty()) {
8777330f729Sjoerg OS << "<empty>";
8787330f729Sjoerg return;
8797330f729Sjoerg }
8807330f729Sjoerg OS << Tokens[0].text(*SourceMgr);
8817330f729Sjoerg for (unsigned I = 1; I < Tokens.size(); ++I) {
8827330f729Sjoerg if (Tokens[I].kind() == tok::eof)
8837330f729Sjoerg continue;
8847330f729Sjoerg OS << " " << PrintToken(Tokens[I]);
8857330f729Sjoerg }
8867330f729Sjoerg };
8877330f729Sjoerg
8887330f729Sjoerg std::string Dump;
8897330f729Sjoerg llvm::raw_string_ostream OS(Dump);
8907330f729Sjoerg
8917330f729Sjoerg OS << "expanded tokens:\n"
8927330f729Sjoerg << " ";
8937330f729Sjoerg // (!) we do not show '<eof>'.
8947330f729Sjoerg DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
8957330f729Sjoerg OS << "\n";
8967330f729Sjoerg
8977330f729Sjoerg std::vector<FileID> Keys;
8987330f729Sjoerg for (auto F : Files)
8997330f729Sjoerg Keys.push_back(F.first);
9007330f729Sjoerg llvm::sort(Keys);
9017330f729Sjoerg
9027330f729Sjoerg for (FileID ID : Keys) {
9037330f729Sjoerg const MarkedFile &File = Files.find(ID)->second;
9047330f729Sjoerg auto *Entry = SourceMgr->getFileEntryForID(ID);
9057330f729Sjoerg if (!Entry)
9067330f729Sjoerg continue; // Skip builtin files.
9077330f729Sjoerg OS << llvm::formatv("file '{0}'\n", Entry->getName())
9087330f729Sjoerg << " spelled tokens:\n"
9097330f729Sjoerg << " ";
9107330f729Sjoerg DumpTokens(OS, File.SpelledTokens);
9117330f729Sjoerg OS << "\n";
9127330f729Sjoerg
9137330f729Sjoerg if (File.Mappings.empty()) {
9147330f729Sjoerg OS << " no mappings.\n";
9157330f729Sjoerg continue;
9167330f729Sjoerg }
9177330f729Sjoerg OS << " mappings:\n";
9187330f729Sjoerg for (auto &M : File.Mappings) {
9197330f729Sjoerg OS << llvm::formatv(
9207330f729Sjoerg " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
9217330f729Sjoerg PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
9227330f729Sjoerg M.EndSpelled == File.SpelledTokens.size()
9237330f729Sjoerg ? "<eof>"
9247330f729Sjoerg : PrintToken(File.SpelledTokens[M.EndSpelled]),
9257330f729Sjoerg M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
9267330f729Sjoerg M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
9277330f729Sjoerg M.EndExpanded);
9287330f729Sjoerg }
9297330f729Sjoerg }
9307330f729Sjoerg return OS.str();
9317330f729Sjoerg }
932