10b57cec5SDimitry Andric //===- Tokens.cpp - collect tokens from preprocessing ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric #include "clang/Tooling/Syntax/Tokens.h" 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #include "clang/Basic/Diagnostic.h" 110b57cec5SDimitry Andric #include "clang/Basic/IdentifierTable.h" 120b57cec5SDimitry Andric #include "clang/Basic/LLVM.h" 130b57cec5SDimitry Andric #include "clang/Basic/LangOptions.h" 140b57cec5SDimitry Andric #include "clang/Basic/SourceLocation.h" 150b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h" 160b57cec5SDimitry Andric #include "clang/Basic/TokenKinds.h" 170b57cec5SDimitry Andric #include "clang/Lex/PPCallbacks.h" 180b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h" 190b57cec5SDimitry Andric #include "clang/Lex/Token.h" 200b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 210b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 220b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 230b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 240b57cec5SDimitry Andric #include "llvm/Support/FormatVariadic.h" 250b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 260b57cec5SDimitry Andric #include <algorithm> 270b57cec5SDimitry Andric #include <cassert> 280b57cec5SDimitry Andric #include <iterator> 29bdd1243dSDimitry Andric #include <optional> 300b57cec5SDimitry Andric #include <string> 310b57cec5SDimitry Andric #include <utility> 320b57cec5SDimitry Andric #include <vector> 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric using namespace clang; 350b57cec5SDimitry Andric using namespace clang::syntax; 360b57cec5SDimitry Andric 375ffd83dbSDimitry Andric namespace { 385ffd83dbSDimitry Andric // Finds the smallest consecutive subsuquence of Toks that covers R. 395ffd83dbSDimitry Andric llvm::ArrayRef<syntax::Token> 405ffd83dbSDimitry Andric getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R, 415ffd83dbSDimitry Andric const SourceManager &SM) { 425ffd83dbSDimitry Andric if (R.isInvalid()) 435ffd83dbSDimitry Andric return {}; 445ffd83dbSDimitry Andric const syntax::Token *Begin = 455ffd83dbSDimitry Andric llvm::partition_point(Toks, [&](const syntax::Token &T) { 465ffd83dbSDimitry Andric return SM.isBeforeInTranslationUnit(T.location(), R.getBegin()); 475ffd83dbSDimitry Andric }); 485ffd83dbSDimitry Andric const syntax::Token *End = 495ffd83dbSDimitry Andric llvm::partition_point(Toks, [&](const syntax::Token &T) { 505ffd83dbSDimitry Andric return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location()); 515ffd83dbSDimitry Andric }); 525ffd83dbSDimitry Andric if (Begin > End) 535ffd83dbSDimitry Andric return {}; 545ffd83dbSDimitry Andric return {Begin, End}; 555ffd83dbSDimitry Andric } 565ffd83dbSDimitry Andric 576246ae0bSDimitry Andric // Finds the range within FID corresponding to expanded tokens [First, Last]. 586246ae0bSDimitry Andric // Prev precedes First and Next follows Last, these must *not* be included. 596246ae0bSDimitry Andric // If no range satisfies the criteria, returns an invalid range. 606246ae0bSDimitry Andric // 615ffd83dbSDimitry Andric // #define ID(x) x 625ffd83dbSDimitry Andric // ID(ID(ID(a1) a2)) 635ffd83dbSDimitry Andric // ~~ -> a1 645ffd83dbSDimitry Andric // ~~ -> a2 655ffd83dbSDimitry Andric // ~~~~~~~~~ -> a1 a2 666246ae0bSDimitry Andric SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last, 676246ae0bSDimitry Andric SourceLocation Prev, SourceLocation Next, 686246ae0bSDimitry Andric FileID TargetFile, 695ffd83dbSDimitry Andric const SourceManager &SM) { 706246ae0bSDimitry Andric // There are two main parts to this algorithm: 716246ae0bSDimitry Andric // - identifying which spelled range covers the expanded tokens 726246ae0bSDimitry Andric // - validating that this range doesn't cover any extra tokens (First/Last) 736246ae0bSDimitry Andric // 746246ae0bSDimitry Andric // We do these in order. However as we transform the expanded range into the 756246ae0bSDimitry Andric // spelled one, we adjust First/Last so the validation remains simple. 766246ae0bSDimitry Andric 776246ae0bSDimitry Andric assert(SM.getSLocEntry(TargetFile).isFile()); 786246ae0bSDimitry Andric // In most cases, to select First and Last we must return their expansion 796246ae0bSDimitry Andric // range, i.e. the whole of any macros they are included in. 806246ae0bSDimitry Andric // 816246ae0bSDimitry Andric // When First and Last are part of the *same macro arg* of a macro written 826246ae0bSDimitry Andric // in TargetFile, we that slice of the arg, i.e. their spelling range. 836246ae0bSDimitry Andric // 846246ae0bSDimitry Andric // Unwrap such macro calls. If the target file has A(B(C)), the 856246ae0bSDimitry Andric // SourceLocation stack of a token inside C shows us the expansion of A first, 866246ae0bSDimitry Andric // then B, then any macros inside C's body, then C itself. 876246ae0bSDimitry Andric // (This is the reverse of the order the PP applies the expansions in). 886246ae0bSDimitry Andric while (First.isMacroID() && Last.isMacroID()) { 896246ae0bSDimitry Andric auto DecFirst = SM.getDecomposedLoc(First); 906246ae0bSDimitry Andric auto DecLast = SM.getDecomposedLoc(Last); 916246ae0bSDimitry Andric auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion(); 926246ae0bSDimitry Andric auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion(); 936246ae0bSDimitry Andric 946246ae0bSDimitry Andric if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion()) 955ffd83dbSDimitry Andric break; 966246ae0bSDimitry Andric // Locations are in the same macro arg if they expand to the same place. 976246ae0bSDimitry Andric // (They may still have different FileIDs - an arg can have >1 chunks!) 986246ae0bSDimitry Andric if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart()) 995ffd83dbSDimitry Andric break; 1006246ae0bSDimitry Andric // Careful, given: 1016246ae0bSDimitry Andric // #define HIDE ID(ID(a)) 1026246ae0bSDimitry Andric // ID(ID(HIDE)) 1036246ae0bSDimitry Andric // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2. 1046246ae0bSDimitry Andric // We distinguish them by whether the macro expands into the target file. 1056246ae0bSDimitry Andric // Fortunately, the target file ones will always appear first. 10606c3fb27SDimitry Andric auto ExpFileID = SM.getFileID(ExpFirst.getExpansionLocStart()); 10706c3fb27SDimitry Andric if (ExpFileID == TargetFile) 1086246ae0bSDimitry Andric break; 1096246ae0bSDimitry Andric // Replace each endpoint with its spelling inside the macro arg. 1106246ae0bSDimitry Andric // (This is getImmediateSpellingLoc without repeating lookups). 1116246ae0bSDimitry Andric First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second); 1126246ae0bSDimitry Andric Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second); 1136246ae0bSDimitry Andric } 1146246ae0bSDimitry Andric 1156246ae0bSDimitry Andric // In all remaining cases we need the full containing macros. 1166246ae0bSDimitry Andric // If this overlaps Prev or Next, then no range is possible. 1176246ae0bSDimitry Andric SourceRange Candidate = 1186246ae0bSDimitry Andric SM.getExpansionRange(SourceRange(First, Last)).getAsRange(); 1196246ae0bSDimitry Andric auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin()); 12006c3fb27SDimitry Andric auto DecLast = SM.getDecomposedExpansionLoc(Candidate.getEnd()); 1216246ae0bSDimitry Andric // Can end up in the wrong file due to bad input or token-pasting shenanigans. 12206c3fb27SDimitry Andric if (Candidate.isInvalid() || DecFirst.first != TargetFile || 12306c3fb27SDimitry Andric DecLast.first != TargetFile) 1246246ae0bSDimitry Andric return SourceRange(); 1256246ae0bSDimitry Andric // Check bounds, which may still be inside macros. 1266246ae0bSDimitry Andric if (Prev.isValid()) { 1276246ae0bSDimitry Andric auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin()); 1286246ae0bSDimitry Andric if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second) 1296246ae0bSDimitry Andric return SourceRange(); 1306246ae0bSDimitry Andric } 1316246ae0bSDimitry Andric if (Next.isValid()) { 1326246ae0bSDimitry Andric auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd()); 1336246ae0bSDimitry Andric if (Dec.first != DecLast.first || Dec.second <= DecLast.second) 1346246ae0bSDimitry Andric return SourceRange(); 1356246ae0bSDimitry Andric } 1366246ae0bSDimitry Andric // Now we know that Candidate is a file range that covers [First, Last] 1376246ae0bSDimitry Andric // without encroaching on {Prev, Next}. Ship it! 1386246ae0bSDimitry Andric return Candidate; 1395ffd83dbSDimitry Andric } 1405ffd83dbSDimitry Andric 1415ffd83dbSDimitry Andric } // namespace 1425ffd83dbSDimitry Andric 1430b57cec5SDimitry Andric syntax::Token::Token(SourceLocation Location, unsigned Length, 1440b57cec5SDimitry Andric tok::TokenKind Kind) 1450b57cec5SDimitry Andric : Location(Location), Length(Length), Kind(Kind) { 1460b57cec5SDimitry Andric assert(Location.isValid()); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric syntax::Token::Token(const clang::Token &T) 1500b57cec5SDimitry Andric : Token(T.getLocation(), T.getLength(), T.getKind()) { 1510b57cec5SDimitry Andric assert(!T.isAnnotation()); 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric llvm::StringRef syntax::Token::text(const SourceManager &SM) const { 1550b57cec5SDimitry Andric bool Invalid = false; 1560b57cec5SDimitry Andric const char *Start = SM.getCharacterData(location(), &Invalid); 1570b57cec5SDimitry Andric assert(!Invalid); 1580b57cec5SDimitry Andric return llvm::StringRef(Start, length()); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric FileRange syntax::Token::range(const SourceManager &SM) const { 1620b57cec5SDimitry Andric assert(location().isFileID() && "must be a spelled token"); 1630b57cec5SDimitry Andric FileID File; 1640b57cec5SDimitry Andric unsigned StartOffset; 1650b57cec5SDimitry Andric std::tie(File, StartOffset) = SM.getDecomposedLoc(location()); 1660b57cec5SDimitry Andric return FileRange(File, StartOffset, StartOffset + length()); 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric FileRange syntax::Token::range(const SourceManager &SM, 1700b57cec5SDimitry Andric const syntax::Token &First, 1710b57cec5SDimitry Andric const syntax::Token &Last) { 1720b57cec5SDimitry Andric auto F = First.range(SM); 1730b57cec5SDimitry Andric auto L = Last.range(SM); 1740b57cec5SDimitry Andric assert(F.file() == L.file() && "tokens from different files"); 1755ffd83dbSDimitry Andric assert((F == L || F.endOffset() <= L.beginOffset()) && 1765ffd83dbSDimitry Andric "wrong order of tokens"); 1770b57cec5SDimitry Andric return FileRange(F.file(), F.beginOffset(), L.endOffset()); 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) { 1810b57cec5SDimitry Andric return OS << T.str(); 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset) 1850b57cec5SDimitry Andric : File(File), Begin(BeginOffset), End(EndOffset) { 1860b57cec5SDimitry Andric assert(File.isValid()); 1870b57cec5SDimitry Andric assert(BeginOffset <= EndOffset); 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc, 1910b57cec5SDimitry Andric unsigned Length) { 1920b57cec5SDimitry Andric assert(BeginLoc.isValid()); 1930b57cec5SDimitry Andric assert(BeginLoc.isFileID()); 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc); 1960b57cec5SDimitry Andric End = Begin + Length; 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc, 1990b57cec5SDimitry Andric SourceLocation EndLoc) { 2000b57cec5SDimitry Andric assert(BeginLoc.isValid()); 2010b57cec5SDimitry Andric assert(BeginLoc.isFileID()); 2020b57cec5SDimitry Andric assert(EndLoc.isValid()); 2030b57cec5SDimitry Andric assert(EndLoc.isFileID()); 2040b57cec5SDimitry Andric assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc)); 2050b57cec5SDimitry Andric assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc)); 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc); 2080b57cec5SDimitry Andric End = SM.getFileOffset(EndLoc); 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, 2120b57cec5SDimitry Andric const FileRange &R) { 2130b57cec5SDimitry Andric return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})", 2140b57cec5SDimitry Andric R.file().getHashValue(), R.beginOffset(), 2150b57cec5SDimitry Andric R.endOffset()); 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric llvm::StringRef FileRange::text(const SourceManager &SM) const { 2190b57cec5SDimitry Andric bool Invalid = false; 2200b57cec5SDimitry Andric StringRef Text = SM.getBufferData(File, &Invalid); 2210b57cec5SDimitry Andric if (Invalid) 2220b57cec5SDimitry Andric return ""; 2230b57cec5SDimitry Andric assert(Begin <= Text.size()); 2240b57cec5SDimitry Andric assert(End <= Text.size()); 2250b57cec5SDimitry Andric return Text.substr(Begin, length()); 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 228fe6060f1SDimitry Andric void TokenBuffer::indexExpandedTokens() { 229fe6060f1SDimitry Andric // No-op if the index is already created. 230fe6060f1SDimitry Andric if (!ExpandedTokIndex.empty()) 231fe6060f1SDimitry Andric return; 232fe6060f1SDimitry Andric ExpandedTokIndex.reserve(ExpandedTokens.size()); 233fe6060f1SDimitry Andric // Index ExpandedTokens for faster lookups by SourceLocation. 234fe6060f1SDimitry Andric for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) { 235fe6060f1SDimitry Andric SourceLocation Loc = ExpandedTokens[I].location(); 236fe6060f1SDimitry Andric if (Loc.isValid()) 237fe6060f1SDimitry Andric ExpandedTokIndex[Loc] = I; 238fe6060f1SDimitry Andric } 239fe6060f1SDimitry Andric } 240fe6060f1SDimitry Andric 241480093f4SDimitry Andric llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { 242fe6060f1SDimitry Andric if (R.isInvalid()) 243fe6060f1SDimitry Andric return {}; 244fe6060f1SDimitry Andric if (!ExpandedTokIndex.empty()) { 245fe6060f1SDimitry Andric // Quick lookup if `R` is a token range. 246fe6060f1SDimitry Andric // This is a huge win since majority of the users use ranges provided by an 247fe6060f1SDimitry Andric // AST. Ranges in AST are token ranges from expanded token stream. 248fe6060f1SDimitry Andric const auto B = ExpandedTokIndex.find(R.getBegin()); 249fe6060f1SDimitry Andric const auto E = ExpandedTokIndex.find(R.getEnd()); 250fe6060f1SDimitry Andric if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) { 251fe6060f1SDimitry Andric const Token *L = ExpandedTokens.data() + B->getSecond(); 252fe6060f1SDimitry Andric // Add 1 to End to make a half-open range. 253fe6060f1SDimitry Andric const Token *R = ExpandedTokens.data() + E->getSecond() + 1; 254fe6060f1SDimitry Andric if (L > R) 255fe6060f1SDimitry Andric return {}; 256fe6060f1SDimitry Andric return {L, R}; 257fe6060f1SDimitry Andric } 258fe6060f1SDimitry Andric } 259fe6060f1SDimitry Andric // Slow case. Use `isBeforeInTranslationUnit` to binary search for the 260fe6060f1SDimitry Andric // required range. 2615ffd83dbSDimitry Andric return getTokensCovering(expandedTokens(), R, *SourceMgr); 262480093f4SDimitry Andric } 263480093f4SDimitry Andric 264480093f4SDimitry Andric CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { 265480093f4SDimitry Andric return CharSourceRange( 266480093f4SDimitry Andric SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)), 267480093f4SDimitry Andric /*IsTokenRange=*/false); 268480093f4SDimitry Andric } 269480093f4SDimitry Andric 2700b57cec5SDimitry Andric std::pair<const syntax::Token *, const TokenBuffer::Mapping *> 2710b57cec5SDimitry Andric TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { 2720b57cec5SDimitry Andric assert(Expanded); 2730b57cec5SDimitry Andric assert(ExpandedTokens.data() <= Expanded && 2740b57cec5SDimitry Andric Expanded < ExpandedTokens.data() + ExpandedTokens.size()); 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric auto FileIt = Files.find( 2770b57cec5SDimitry Andric SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location()))); 2780b57cec5SDimitry Andric assert(FileIt != Files.end() && "no file for an expanded token"); 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric const MarkedFile &File = FileIt->second; 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric unsigned ExpandedIndex = Expanded - ExpandedTokens.data(); 2830b57cec5SDimitry Andric // Find the first mapping that produced tokens after \p Expanded. 2840b57cec5SDimitry Andric auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) { 2850b57cec5SDimitry Andric return M.BeginExpanded <= ExpandedIndex; 2860b57cec5SDimitry Andric }); 2870b57cec5SDimitry Andric // Our token could only be produced by the previous mapping. 2880b57cec5SDimitry Andric if (It == File.Mappings.begin()) { 2890b57cec5SDimitry Andric // No previous mapping, no need to modify offsets. 2905ffd83dbSDimitry Andric return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], 2915ffd83dbSDimitry Andric /*Mapping=*/nullptr}; 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric --It; // 'It' now points to last mapping that started before our token. 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andric // Check if the token is part of the mapping. 2960b57cec5SDimitry Andric if (ExpandedIndex < It->EndExpanded) 2975ffd83dbSDimitry Andric return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It}; 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric // Not part of the mapping, use the index from previous mapping to compute the 3000b57cec5SDimitry Andric // corresponding spelled token. 3010b57cec5SDimitry Andric return { 3020b57cec5SDimitry Andric &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], 3035ffd83dbSDimitry Andric /*Mapping=*/nullptr}; 3045ffd83dbSDimitry Andric } 3055ffd83dbSDimitry Andric 3065ffd83dbSDimitry Andric const TokenBuffer::Mapping * 3075ffd83dbSDimitry Andric TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F, 3085ffd83dbSDimitry Andric const syntax::Token *Spelled) { 3095ffd83dbSDimitry Andric assert(F.SpelledTokens.data() <= Spelled); 3105ffd83dbSDimitry Andric unsigned SpelledI = Spelled - F.SpelledTokens.data(); 3115ffd83dbSDimitry Andric assert(SpelledI < F.SpelledTokens.size()); 3125ffd83dbSDimitry Andric 3135ffd83dbSDimitry Andric auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) { 3145ffd83dbSDimitry Andric return M.BeginSpelled <= SpelledI; 3155ffd83dbSDimitry Andric }); 3165ffd83dbSDimitry Andric if (It == F.Mappings.begin()) 3175ffd83dbSDimitry Andric return nullptr; 3185ffd83dbSDimitry Andric --It; 3195ffd83dbSDimitry Andric return &*It; 3205ffd83dbSDimitry Andric } 3215ffd83dbSDimitry Andric 3225ffd83dbSDimitry Andric llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1> 3235ffd83dbSDimitry Andric TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { 3245ffd83dbSDimitry Andric if (Spelled.empty()) 3255ffd83dbSDimitry Andric return {}; 326e8d8bef9SDimitry Andric const auto &File = fileForSpelled(Spelled); 3275ffd83dbSDimitry Andric 3285ffd83dbSDimitry Andric auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front()); 3295ffd83dbSDimitry Andric unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data(); 3305ffd83dbSDimitry Andric assert(SpelledFrontI < File.SpelledTokens.size()); 3315ffd83dbSDimitry Andric unsigned ExpandedBegin; 3325ffd83dbSDimitry Andric if (!FrontMapping) { 3335ffd83dbSDimitry Andric // No mapping that starts before the first token of Spelled, we don't have 3345ffd83dbSDimitry Andric // to modify offsets. 3355ffd83dbSDimitry Andric ExpandedBegin = File.BeginExpanded + SpelledFrontI; 3365ffd83dbSDimitry Andric } else if (SpelledFrontI < FrontMapping->EndSpelled) { 3375ffd83dbSDimitry Andric // This mapping applies to Spelled tokens. 3385ffd83dbSDimitry Andric if (SpelledFrontI != FrontMapping->BeginSpelled) { 3395ffd83dbSDimitry Andric // Spelled tokens don't cover the entire mapping, returning empty result. 3405ffd83dbSDimitry Andric return {}; // FIXME: support macro arguments. 3415ffd83dbSDimitry Andric } 3425ffd83dbSDimitry Andric // Spelled tokens start at the beginning of this mapping. 3435ffd83dbSDimitry Andric ExpandedBegin = FrontMapping->BeginExpanded; 3445ffd83dbSDimitry Andric } else { 3455ffd83dbSDimitry Andric // Spelled tokens start after the mapping ends (they start in the hole 3465ffd83dbSDimitry Andric // between 2 mappings, or between a mapping and end of the file). 3475ffd83dbSDimitry Andric ExpandedBegin = 3485ffd83dbSDimitry Andric FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled); 3495ffd83dbSDimitry Andric } 3505ffd83dbSDimitry Andric 3515ffd83dbSDimitry Andric auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back()); 3525ffd83dbSDimitry Andric unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data(); 3535ffd83dbSDimitry Andric unsigned ExpandedEnd; 3545ffd83dbSDimitry Andric if (!BackMapping) { 3555ffd83dbSDimitry Andric // No mapping that starts before the last token of Spelled, we don't have to 3565ffd83dbSDimitry Andric // modify offsets. 3575ffd83dbSDimitry Andric ExpandedEnd = File.BeginExpanded + SpelledBackI + 1; 3585ffd83dbSDimitry Andric } else if (SpelledBackI < BackMapping->EndSpelled) { 3595ffd83dbSDimitry Andric // This mapping applies to Spelled tokens. 3605ffd83dbSDimitry Andric if (SpelledBackI + 1 != BackMapping->EndSpelled) { 3615ffd83dbSDimitry Andric // Spelled tokens don't cover the entire mapping, returning empty result. 3625ffd83dbSDimitry Andric return {}; // FIXME: support macro arguments. 3635ffd83dbSDimitry Andric } 3645ffd83dbSDimitry Andric ExpandedEnd = BackMapping->EndExpanded; 3655ffd83dbSDimitry Andric } else { 3665ffd83dbSDimitry Andric // Spelled tokens end after the mapping ends. 3675ffd83dbSDimitry Andric ExpandedEnd = 3685ffd83dbSDimitry Andric BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1; 3695ffd83dbSDimitry Andric } 3705ffd83dbSDimitry Andric 3715ffd83dbSDimitry Andric assert(ExpandedBegin < ExpandedTokens.size()); 3725ffd83dbSDimitry Andric assert(ExpandedEnd < ExpandedTokens.size()); 3735ffd83dbSDimitry Andric // Avoid returning empty ranges. 3745ffd83dbSDimitry Andric if (ExpandedBegin == ExpandedEnd) 3755ffd83dbSDimitry Andric return {}; 376bdd1243dSDimitry Andric return {llvm::ArrayRef(ExpandedTokens.data() + ExpandedBegin, 3775ffd83dbSDimitry Andric ExpandedTokens.data() + ExpandedEnd)}; 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const { 3810b57cec5SDimitry Andric auto It = Files.find(FID); 3820b57cec5SDimitry Andric assert(It != Files.end()); 3830b57cec5SDimitry Andric return It->second.SpelledTokens; 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric 386*0fca6ea1SDimitry Andric const syntax::Token * 387*0fca6ea1SDimitry Andric TokenBuffer::spelledTokenContaining(SourceLocation Loc) const { 3885ffd83dbSDimitry Andric assert(Loc.isFileID()); 3895ffd83dbSDimitry Andric const auto *Tok = llvm::partition_point( 3905ffd83dbSDimitry Andric spelledTokens(SourceMgr->getFileID(Loc)), 391*0fca6ea1SDimitry Andric [&](const syntax::Token &Tok) { return Tok.endLocation() <= Loc; }); 392*0fca6ea1SDimitry Andric if (!Tok || Loc < Tok->location()) 3935ffd83dbSDimitry Andric return nullptr; 3945ffd83dbSDimitry Andric return Tok; 3955ffd83dbSDimitry Andric } 3965ffd83dbSDimitry Andric 3970b57cec5SDimitry Andric std::string TokenBuffer::Mapping::str() const { 3985ffd83dbSDimitry Andric return std::string( 3995ffd83dbSDimitry Andric llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", 4005ffd83dbSDimitry Andric BeginSpelled, EndSpelled, BeginExpanded, EndExpanded)); 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 403bdd1243dSDimitry Andric std::optional<llvm::ArrayRef<syntax::Token>> 4040b57cec5SDimitry Andric TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { 4057a6dacacSDimitry Andric // In cases of invalid code, AST nodes can have source ranges that include 4067a6dacacSDimitry Andric // the `eof` token. As there's no spelling for this token, exclude it from 4077a6dacacSDimitry Andric // the range. 4087a6dacacSDimitry Andric if (!Expanded.empty() && Expanded.back().kind() == tok::eof) { 4097a6dacacSDimitry Andric Expanded = Expanded.drop_back(); 4107a6dacacSDimitry Andric } 4110b57cec5SDimitry Andric // Mapping an empty range is ambiguous in case of empty mappings at either end 4120b57cec5SDimitry Andric // of the range, bail out in that case. 4130b57cec5SDimitry Andric if (Expanded.empty()) 414bdd1243dSDimitry Andric return std::nullopt; 4156246ae0bSDimitry Andric const syntax::Token *First = &Expanded.front(); 4166246ae0bSDimitry Andric const syntax::Token *Last = &Expanded.back(); 417bdd1243dSDimitry Andric auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First); 418bdd1243dSDimitry Andric auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last); 4190b57cec5SDimitry Andric 4206246ae0bSDimitry Andric FileID FID = SourceMgr->getFileID(FirstSpelled->location()); 4210b57cec5SDimitry Andric // FIXME: Handle multi-file changes by trying to map onto a common root. 4220b57cec5SDimitry Andric if (FID != SourceMgr->getFileID(LastSpelled->location())) 423bdd1243dSDimitry Andric return std::nullopt; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric const MarkedFile &File = Files.find(FID)->second; 4260b57cec5SDimitry Andric 4276246ae0bSDimitry Andric // If the range is within one macro argument, the result may be only part of a 4286246ae0bSDimitry Andric // Mapping. We must use the general (SourceManager-based) algorithm. 4296246ae0bSDimitry Andric if (FirstMapping && FirstMapping == LastMapping && 4306246ae0bSDimitry Andric SourceMgr->isMacroArgExpansion(First->location()) && 4316246ae0bSDimitry Andric SourceMgr->isMacroArgExpansion(Last->location())) { 4326246ae0bSDimitry Andric // We use excluded Prev/Next token for bounds checking. 4336246ae0bSDimitry Andric SourceLocation Prev = (First == &ExpandedTokens.front()) 4346246ae0bSDimitry Andric ? SourceLocation() 4356246ae0bSDimitry Andric : (First - 1)->location(); 4366246ae0bSDimitry Andric SourceLocation Next = (Last == &ExpandedTokens.back()) 4376246ae0bSDimitry Andric ? SourceLocation() 4386246ae0bSDimitry Andric : (Last + 1)->location(); 4396246ae0bSDimitry Andric SourceRange Range = spelledForExpandedSlow( 4406246ae0bSDimitry Andric First->location(), Last->location(), Prev, Next, FID, *SourceMgr); 4416246ae0bSDimitry Andric if (Range.isInvalid()) 442bdd1243dSDimitry Andric return std::nullopt; 4436246ae0bSDimitry Andric return getTokensCovering(File.SpelledTokens, Range, *SourceMgr); 4445ffd83dbSDimitry Andric } 4455ffd83dbSDimitry Andric 4466246ae0bSDimitry Andric // Otherwise, use the fast version based on Mappings. 4475ffd83dbSDimitry Andric // Do not allow changes that doesn't cover full expansion. 4486246ae0bSDimitry Andric unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data(); 4496246ae0bSDimitry Andric unsigned LastExpanded = Expanded.end() - ExpandedTokens.data(); 4506246ae0bSDimitry Andric if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded) 451bdd1243dSDimitry Andric return std::nullopt; 4526246ae0bSDimitry Andric if (LastMapping && LastMapping->EndExpanded != LastExpanded) 453bdd1243dSDimitry Andric return std::nullopt; 454bdd1243dSDimitry Andric return llvm::ArrayRef( 4556246ae0bSDimitry Andric FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled 4566246ae0bSDimitry Andric : FirstSpelled, 4570b57cec5SDimitry Andric LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled 4580b57cec5SDimitry Andric : LastSpelled + 1); 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 461e8d8bef9SDimitry Andric TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F, 462e8d8bef9SDimitry Andric const Mapping &M) const { 463e8d8bef9SDimitry Andric Expansion E; 464bdd1243dSDimitry Andric E.Spelled = llvm::ArrayRef(F.SpelledTokens.data() + M.BeginSpelled, 465e8d8bef9SDimitry Andric F.SpelledTokens.data() + M.EndSpelled); 466bdd1243dSDimitry Andric E.Expanded = llvm::ArrayRef(ExpandedTokens.data() + M.BeginExpanded, 467e8d8bef9SDimitry Andric ExpandedTokens.data() + M.EndExpanded); 468e8d8bef9SDimitry Andric return E; 469e8d8bef9SDimitry Andric } 470e8d8bef9SDimitry Andric 471e8d8bef9SDimitry Andric const TokenBuffer::MarkedFile & 472e8d8bef9SDimitry Andric TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { 473e8d8bef9SDimitry Andric assert(!Spelled.empty()); 474e8d8bef9SDimitry Andric assert(Spelled.front().location().isFileID() && "not a spelled token"); 475e8d8bef9SDimitry Andric auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location())); 476e8d8bef9SDimitry Andric assert(FileIt != Files.end() && "file not tracked by token buffer"); 477e8d8bef9SDimitry Andric const auto &File = FileIt->second; 478e8d8bef9SDimitry Andric assert(File.SpelledTokens.data() <= Spelled.data() && 479e8d8bef9SDimitry Andric Spelled.end() <= 480e8d8bef9SDimitry Andric (File.SpelledTokens.data() + File.SpelledTokens.size()) && 481e8d8bef9SDimitry Andric "Tokens not in spelled range"); 482e8d8bef9SDimitry Andric #ifndef NDEBUG 483e8d8bef9SDimitry Andric auto T1 = Spelled.back().location(); 484e8d8bef9SDimitry Andric auto T2 = File.SpelledTokens.back().location(); 485e8d8bef9SDimitry Andric assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2)); 486e8d8bef9SDimitry Andric #endif 487e8d8bef9SDimitry Andric return File; 488e8d8bef9SDimitry Andric } 489e8d8bef9SDimitry Andric 490bdd1243dSDimitry Andric std::optional<TokenBuffer::Expansion> 4910b57cec5SDimitry Andric TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const { 4920b57cec5SDimitry Andric assert(Spelled); 493e8d8bef9SDimitry Andric const auto &File = fileForSpelled(*Spelled); 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric unsigned SpelledIndex = Spelled - File.SpelledTokens.data(); 4960b57cec5SDimitry Andric auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) { 4970b57cec5SDimitry Andric return M.BeginSpelled < SpelledIndex; 4980b57cec5SDimitry Andric }); 4990b57cec5SDimitry Andric if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex) 500bdd1243dSDimitry Andric return std::nullopt; 501e8d8bef9SDimitry Andric return makeExpansion(File, *M); 5020b57cec5SDimitry Andric } 503e8d8bef9SDimitry Andric 504e8d8bef9SDimitry Andric std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping( 505e8d8bef9SDimitry Andric llvm::ArrayRef<syntax::Token> Spelled) const { 506e8d8bef9SDimitry Andric if (Spelled.empty()) 507e8d8bef9SDimitry Andric return {}; 508e8d8bef9SDimitry Andric const auto &File = fileForSpelled(Spelled); 509e8d8bef9SDimitry Andric 510e8d8bef9SDimitry Andric // Find the first overlapping range, and then copy until we stop overlapping. 511e8d8bef9SDimitry Andric unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data(); 512e8d8bef9SDimitry Andric unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data(); 513e8d8bef9SDimitry Andric auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) { 514e8d8bef9SDimitry Andric return M.EndSpelled <= SpelledBeginIndex; 515e8d8bef9SDimitry Andric }); 516e8d8bef9SDimitry Andric std::vector<TokenBuffer::Expansion> Expansions; 517e8d8bef9SDimitry Andric for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M) 518e8d8bef9SDimitry Andric Expansions.push_back(makeExpansion(File, *M)); 519e8d8bef9SDimitry Andric return Expansions; 520e8d8bef9SDimitry Andric } 521e8d8bef9SDimitry Andric 522480093f4SDimitry Andric llvm::ArrayRef<syntax::Token> 523480093f4SDimitry Andric syntax::spelledTokensTouching(SourceLocation Loc, 5245ffd83dbSDimitry Andric llvm::ArrayRef<syntax::Token> Tokens) { 525480093f4SDimitry Andric assert(Loc.isFileID()); 5265ffd83dbSDimitry Andric 527480093f4SDimitry Andric auto *Right = llvm::partition_point( 5285ffd83dbSDimitry Andric Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); 5295ffd83dbSDimitry Andric bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc; 5305ffd83dbSDimitry Andric bool AcceptLeft = 5315ffd83dbSDimitry Andric Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc; 532bdd1243dSDimitry Andric return llvm::ArrayRef(Right - (AcceptLeft ? 1 : 0), 533480093f4SDimitry Andric Right + (AcceptRight ? 1 : 0)); 534480093f4SDimitry Andric } 535480093f4SDimitry Andric 5365ffd83dbSDimitry Andric llvm::ArrayRef<syntax::Token> 5375ffd83dbSDimitry Andric syntax::spelledTokensTouching(SourceLocation Loc, 5385ffd83dbSDimitry Andric const syntax::TokenBuffer &Tokens) { 5395ffd83dbSDimitry Andric return spelledTokensTouching( 5405ffd83dbSDimitry Andric Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); 5415ffd83dbSDimitry Andric } 5425ffd83dbSDimitry Andric 543480093f4SDimitry Andric const syntax::Token * 544480093f4SDimitry Andric syntax::spelledIdentifierTouching(SourceLocation Loc, 5455ffd83dbSDimitry Andric llvm::ArrayRef<syntax::Token> Tokens) { 546480093f4SDimitry Andric for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) { 547480093f4SDimitry Andric if (Tok.kind() == tok::identifier) 548480093f4SDimitry Andric return &Tok; 549480093f4SDimitry Andric } 550480093f4SDimitry Andric return nullptr; 551480093f4SDimitry Andric } 552480093f4SDimitry Andric 5535ffd83dbSDimitry Andric const syntax::Token * 5545ffd83dbSDimitry Andric syntax::spelledIdentifierTouching(SourceLocation Loc, 5555ffd83dbSDimitry Andric const syntax::TokenBuffer &Tokens) { 5565ffd83dbSDimitry Andric return spelledIdentifierTouching( 5575ffd83dbSDimitry Andric Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); 5585ffd83dbSDimitry Andric } 5595ffd83dbSDimitry Andric 560a7dea167SDimitry Andric std::vector<const syntax::Token *> 561a7dea167SDimitry Andric TokenBuffer::macroExpansions(FileID FID) const { 562a7dea167SDimitry Andric auto FileIt = Files.find(FID); 563a7dea167SDimitry Andric assert(FileIt != Files.end() && "file not tracked by token buffer"); 564a7dea167SDimitry Andric auto &File = FileIt->second; 565a7dea167SDimitry Andric std::vector<const syntax::Token *> Expansions; 566a7dea167SDimitry Andric auto &Spelled = File.SpelledTokens; 567a7dea167SDimitry Andric for (auto Mapping : File.Mappings) { 568a7dea167SDimitry Andric const syntax::Token *Token = &Spelled[Mapping.BeginSpelled]; 569a7dea167SDimitry Andric if (Token->kind() == tok::TokenKind::identifier) 570a7dea167SDimitry Andric Expansions.push_back(Token); 571a7dea167SDimitry Andric } 572a7dea167SDimitry Andric return Expansions; 573a7dea167SDimitry Andric } 574a7dea167SDimitry Andric 5755ffd83dbSDimitry Andric std::vector<syntax::Token> syntax::tokenize(const FileRange &FR, 5765ffd83dbSDimitry Andric const SourceManager &SM, 5770b57cec5SDimitry Andric const LangOptions &LO) { 5780b57cec5SDimitry Andric std::vector<syntax::Token> Tokens; 5790b57cec5SDimitry Andric IdentifierTable Identifiers(LO); 5800b57cec5SDimitry Andric auto AddToken = [&](clang::Token T) { 5810b57cec5SDimitry Andric // Fill the proper token kind for keywords, etc. 5820b57cec5SDimitry Andric if (T.getKind() == tok::raw_identifier && !T.needsCleaning() && 5830b57cec5SDimitry Andric !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases. 5840b57cec5SDimitry Andric clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier()); 5850b57cec5SDimitry Andric T.setIdentifierInfo(&II); 5860b57cec5SDimitry Andric T.setKind(II.getTokenID()); 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric Tokens.push_back(syntax::Token(T)); 5890b57cec5SDimitry Andric }; 5900b57cec5SDimitry Andric 5915ffd83dbSDimitry Andric auto SrcBuffer = SM.getBufferData(FR.file()); 5925ffd83dbSDimitry Andric Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(), 5935ffd83dbSDimitry Andric SrcBuffer.data() + FR.beginOffset(), 5945ffd83dbSDimitry Andric // We can't make BufEnd point to FR.endOffset, as Lexer requires a 5955ffd83dbSDimitry Andric // null terminated buffer. 5965ffd83dbSDimitry Andric SrcBuffer.data() + SrcBuffer.size()); 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric clang::Token T; 5995ffd83dbSDimitry Andric while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset()) 6000b57cec5SDimitry Andric AddToken(T); 6015ffd83dbSDimitry Andric // LexFromRawLexer returns true when it parses the last token of the file, add 6025ffd83dbSDimitry Andric // it iff it starts within the range we are interested in. 6035ffd83dbSDimitry Andric if (SM.getFileOffset(T.getLocation()) < FR.endOffset()) 6040b57cec5SDimitry Andric AddToken(T); 6050b57cec5SDimitry Andric return Tokens; 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6085ffd83dbSDimitry Andric std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, 6095ffd83dbSDimitry Andric const LangOptions &LO) { 6105ffd83dbSDimitry Andric return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO); 6115ffd83dbSDimitry Andric } 6125ffd83dbSDimitry Andric 6130b57cec5SDimitry Andric /// Records information reqired to construct mappings for the token buffer that 6140b57cec5SDimitry Andric /// we are collecting. 6150b57cec5SDimitry Andric class TokenCollector::CollectPPExpansions : public PPCallbacks { 6160b57cec5SDimitry Andric public: 6170b57cec5SDimitry Andric CollectPPExpansions(TokenCollector &C) : Collector(&C) {} 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric /// Disabled instance will stop reporting anything to TokenCollector. 6200b57cec5SDimitry Andric /// This ensures that uses of the preprocessor after TokenCollector::consume() 6210b57cec5SDimitry Andric /// is called do not access the (possibly invalid) collector instance. 6220b57cec5SDimitry Andric void disable() { Collector = nullptr; } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD, 6250b57cec5SDimitry Andric SourceRange Range, const MacroArgs *Args) override { 6260b57cec5SDimitry Andric if (!Collector) 6270b57cec5SDimitry Andric return; 628e837bb5cSDimitry Andric const auto &SM = Collector->PP.getSourceManager(); 629e837bb5cSDimitry Andric // Only record top-level expansions that directly produce expanded tokens. 630e837bb5cSDimitry Andric // This excludes those where: 6310b57cec5SDimitry Andric // - the macro use is inside a macro body, 6320b57cec5SDimitry Andric // - the macro appears in an argument to another macro. 633e837bb5cSDimitry Andric // However macro expansion isn't really a tree, it's token rewrite rules, 634e837bb5cSDimitry Andric // so there are other cases, e.g. 635e837bb5cSDimitry Andric // #define B(X) X 636e837bb5cSDimitry Andric // #define A 1 + B 637e837bb5cSDimitry Andric // A(2) 638e837bb5cSDimitry Andric // Both A and B produce expanded tokens, though the macro name 'B' comes 639e837bb5cSDimitry Andric // from an expansion. The best we can do is merge the mappings for both. 640e837bb5cSDimitry Andric 641e837bb5cSDimitry Andric // The *last* token of any top-level macro expansion must be in a file. 642e837bb5cSDimitry Andric // (In the example above, see the closing paren of the expansion of B). 643e837bb5cSDimitry Andric if (!Range.getEnd().isFileID()) 6440b57cec5SDimitry Andric return; 645e837bb5cSDimitry Andric // If there's a current expansion that encloses this one, this one can't be 646e837bb5cSDimitry Andric // top-level. 647e837bb5cSDimitry Andric if (LastExpansionEnd.isValid() && 648e837bb5cSDimitry Andric !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd())) 649e837bb5cSDimitry Andric return; 650e837bb5cSDimitry Andric 651e837bb5cSDimitry Andric // If the macro invocation (B) starts in a macro (A) but ends in a file, 652e837bb5cSDimitry Andric // we'll create a merged mapping for A + B by overwriting the endpoint for 653e837bb5cSDimitry Andric // A's startpoint. 654e837bb5cSDimitry Andric if (!Range.getBegin().isFileID()) { 655e837bb5cSDimitry Andric Range.setBegin(SM.getExpansionLoc(Range.getBegin())); 656e8d8bef9SDimitry Andric assert(Collector->Expansions.count(Range.getBegin()) && 657e837bb5cSDimitry Andric "Overlapping macros should have same expansion location"); 658e837bb5cSDimitry Andric } 659e837bb5cSDimitry Andric 660e8d8bef9SDimitry Andric Collector->Expansions[Range.getBegin()] = Range.getEnd(); 6610b57cec5SDimitry Andric LastExpansionEnd = Range.getEnd(); 6620b57cec5SDimitry Andric } 6630b57cec5SDimitry Andric // FIXME: handle directives like #pragma, #include, etc. 6640b57cec5SDimitry Andric private: 6650b57cec5SDimitry Andric TokenCollector *Collector; 6660b57cec5SDimitry Andric /// Used to detect recursive macro expansions. 6670b57cec5SDimitry Andric SourceLocation LastExpansionEnd; 6680b57cec5SDimitry Andric }; 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric /// Fills in the TokenBuffer by tracing the run of a preprocessor. The 6710b57cec5SDimitry Andric /// implementation tracks the tokens, macro expansions and directives coming 6720b57cec5SDimitry Andric /// from the preprocessor and: 6730b57cec5SDimitry Andric /// - for each token, figures out if it is a part of an expanded token stream, 6740b57cec5SDimitry Andric /// spelled token stream or both. Stores the tokens appropriately. 6750b57cec5SDimitry Andric /// - records mappings from the spelled to expanded token ranges, e.g. for macro 6760b57cec5SDimitry Andric /// expansions. 6770b57cec5SDimitry Andric /// FIXME: also properly record: 6780b57cec5SDimitry Andric /// - #include directives, 6790b57cec5SDimitry Andric /// - #pragma, #line and other PP directives, 6800b57cec5SDimitry Andric /// - skipped pp regions, 6810b57cec5SDimitry Andric /// - ... 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) { 6840b57cec5SDimitry Andric // Collect the expanded token stream during preprocessing. 6850b57cec5SDimitry Andric PP.setTokenWatcher([this](const clang::Token &T) { 6860b57cec5SDimitry Andric if (T.isAnnotation()) 6870b57cec5SDimitry Andric return; 6880b57cec5SDimitry Andric DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs() 6890b57cec5SDimitry Andric << "Token: " 6900b57cec5SDimitry Andric << syntax::Token(T).dumpForTests( 6910b57cec5SDimitry Andric this->PP.getSourceManager()) 6920b57cec5SDimitry Andric << "\n" 6930b57cec5SDimitry Andric 6940b57cec5SDimitry Andric ); 6950b57cec5SDimitry Andric Expanded.push_back(syntax::Token(T)); 6960b57cec5SDimitry Andric }); 6970b57cec5SDimitry Andric // And locations of macro calls, to properly recover boundaries of those in 6980b57cec5SDimitry Andric // case of empty expansions. 699a7dea167SDimitry Andric auto CB = std::make_unique<CollectPPExpansions>(*this); 7000b57cec5SDimitry Andric this->Collector = CB.get(); 7010b57cec5SDimitry Andric PP.addPPCallbacks(std::move(CB)); 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric 7040b57cec5SDimitry Andric /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded 7050b57cec5SDimitry Andric /// token stream. 7060b57cec5SDimitry Andric class TokenCollector::Builder { 7070b57cec5SDimitry Andric public: 7080b57cec5SDimitry Andric Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions, 7090b57cec5SDimitry Andric const SourceManager &SM, const LangOptions &LangOpts) 7100b57cec5SDimitry Andric : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM), 7110b57cec5SDimitry Andric LangOpts(LangOpts) { 7120b57cec5SDimitry Andric Result.ExpandedTokens = std::move(Expanded); 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric TokenBuffer build() && { 7160b57cec5SDimitry Andric assert(!Result.ExpandedTokens.empty()); 7170b57cec5SDimitry Andric assert(Result.ExpandedTokens.back().kind() == tok::eof); 718e837bb5cSDimitry Andric 719e837bb5cSDimitry Andric // Tokenize every file that contributed tokens to the expanded stream. 720e837bb5cSDimitry Andric buildSpelledTokens(); 721e837bb5cSDimitry Andric 722e837bb5cSDimitry Andric // The expanded token stream consists of runs of tokens that came from 723e837bb5cSDimitry Andric // the same source (a macro expansion, part of a file etc). 724e837bb5cSDimitry Andric // Between these runs are the logical positions of spelled tokens that 725e837bb5cSDimitry Andric // didn't expand to anything. 726e837bb5cSDimitry Andric while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) { 727e837bb5cSDimitry Andric // Create empty mappings for spelled tokens that expanded to nothing here. 728e837bb5cSDimitry Andric // May advance NextSpelled, but NextExpanded is unchanged. 729e837bb5cSDimitry Andric discard(); 730e837bb5cSDimitry Andric // Create mapping for a contiguous run of expanded tokens. 731e837bb5cSDimitry Andric // Advances NextExpanded past the run, and NextSpelled accordingly. 732e837bb5cSDimitry Andric unsigned OldPosition = NextExpanded; 733e837bb5cSDimitry Andric advance(); 734e837bb5cSDimitry Andric if (NextExpanded == OldPosition) 735e837bb5cSDimitry Andric diagnoseAdvanceFailure(); 7360b57cec5SDimitry Andric } 737e837bb5cSDimitry Andric // If any tokens remain in any of the files, they didn't expand to anything. 738e837bb5cSDimitry Andric // Create empty mappings up until the end of the file. 739e837bb5cSDimitry Andric for (const auto &File : Result.Files) 740e837bb5cSDimitry Andric discard(File.first); 7410b57cec5SDimitry Andric 7425ffd83dbSDimitry Andric #ifndef NDEBUG 7435ffd83dbSDimitry Andric for (auto &pair : Result.Files) { 7445ffd83dbSDimitry Andric auto &mappings = pair.second.Mappings; 7455ffd83dbSDimitry Andric assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1, 7465ffd83dbSDimitry Andric const TokenBuffer::Mapping &M2) { 7475ffd83dbSDimitry Andric return M1.BeginSpelled < M2.BeginSpelled && 7485ffd83dbSDimitry Andric M1.EndSpelled < M2.EndSpelled && 7495ffd83dbSDimitry Andric M1.BeginExpanded < M2.BeginExpanded && 7505ffd83dbSDimitry Andric M1.EndExpanded < M2.EndExpanded; 7515ffd83dbSDimitry Andric })); 7525ffd83dbSDimitry Andric } 7535ffd83dbSDimitry Andric #endif 7545ffd83dbSDimitry Andric 7550b57cec5SDimitry Andric return std::move(Result); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric private: 759e837bb5cSDimitry Andric // Consume a sequence of spelled tokens that didn't expand to anything. 760e837bb5cSDimitry Andric // In the simplest case, skips spelled tokens until finding one that produced 761e837bb5cSDimitry Andric // the NextExpanded token, and creates an empty mapping for them. 762e837bb5cSDimitry Andric // If Drain is provided, skips remaining tokens from that file instead. 763bdd1243dSDimitry Andric void discard(std::optional<FileID> Drain = std::nullopt) { 764e837bb5cSDimitry Andric SourceLocation Target = 765e837bb5cSDimitry Andric Drain ? SM.getLocForEndOfFile(*Drain) 766e837bb5cSDimitry Andric : SM.getExpansionLoc( 767e837bb5cSDimitry Andric Result.ExpandedTokens[NextExpanded].location()); 768e837bb5cSDimitry Andric FileID File = SM.getFileID(Target); 769e837bb5cSDimitry Andric const auto &SpelledTokens = Result.Files[File].SpelledTokens; 770e837bb5cSDimitry Andric auto &NextSpelled = this->NextSpelled[File]; 771e837bb5cSDimitry Andric 772e837bb5cSDimitry Andric TokenBuffer::Mapping Mapping; 773e837bb5cSDimitry Andric Mapping.BeginSpelled = NextSpelled; 774e837bb5cSDimitry Andric // When dropping trailing tokens from a file, the empty mapping should 775e837bb5cSDimitry Andric // be positioned within the file's expanded-token range (at the end). 776e837bb5cSDimitry Andric Mapping.BeginExpanded = Mapping.EndExpanded = 777e837bb5cSDimitry Andric Drain ? Result.Files[*Drain].EndExpanded : NextExpanded; 778e837bb5cSDimitry Andric // We may want to split into several adjacent empty mappings. 779e837bb5cSDimitry Andric // FlushMapping() emits the current mapping and starts a new one. 780e837bb5cSDimitry Andric auto FlushMapping = [&, this] { 781e837bb5cSDimitry Andric Mapping.EndSpelled = NextSpelled; 782e837bb5cSDimitry Andric if (Mapping.BeginSpelled != Mapping.EndSpelled) 783e837bb5cSDimitry Andric Result.Files[File].Mappings.push_back(Mapping); 784e837bb5cSDimitry Andric Mapping.BeginSpelled = NextSpelled; 785e837bb5cSDimitry Andric }; 786e837bb5cSDimitry Andric 787e837bb5cSDimitry Andric while (NextSpelled < SpelledTokens.size() && 788e837bb5cSDimitry Andric SpelledTokens[NextSpelled].location() < Target) { 789e837bb5cSDimitry Andric // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion) 790e837bb5cSDimitry Andric // then we want to partition our (empty) mapping. 791e837bb5cSDimitry Andric // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target) 792e8d8bef9SDimitry Andric SourceLocation KnownEnd = 793e8d8bef9SDimitry Andric CollectedExpansions.lookup(SpelledTokens[NextSpelled].location()); 794e837bb5cSDimitry Andric if (KnownEnd.isValid()) { 795e837bb5cSDimitry Andric FlushMapping(); // Emits [Start, NextSpelled) 796e837bb5cSDimitry Andric while (NextSpelled < SpelledTokens.size() && 797e837bb5cSDimitry Andric SpelledTokens[NextSpelled].location() <= KnownEnd) 798e837bb5cSDimitry Andric ++NextSpelled; 799e837bb5cSDimitry Andric FlushMapping(); // Emits [NextSpelled, KnownEnd] 800bdd1243dSDimitry Andric // Now the loop continues and will emit (KnownEnd, Target). 801e837bb5cSDimitry Andric } else { 802e837bb5cSDimitry Andric ++NextSpelled; 8030b57cec5SDimitry Andric } 804e837bb5cSDimitry Andric } 805e837bb5cSDimitry Andric FlushMapping(); 806e837bb5cSDimitry Andric } 8070b57cec5SDimitry Andric 808e837bb5cSDimitry Andric // Consumes the NextExpanded token and others that are part of the same run. 809e837bb5cSDimitry Andric // Increases NextExpanded and NextSpelled by at least one, and adds a mapping 810e837bb5cSDimitry Andric // (unless this is a run of file tokens, which we represent with no mapping). 811e837bb5cSDimitry Andric void advance() { 812e837bb5cSDimitry Andric const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded]; 813e837bb5cSDimitry Andric SourceLocation Expansion = SM.getExpansionLoc(Tok.location()); 814e837bb5cSDimitry Andric FileID File = SM.getFileID(Expansion); 815e837bb5cSDimitry Andric const auto &SpelledTokens = Result.Files[File].SpelledTokens; 816e837bb5cSDimitry Andric auto &NextSpelled = this->NextSpelled[File]; 8170b57cec5SDimitry Andric 818e837bb5cSDimitry Andric if (Tok.location().isFileID()) { 819e837bb5cSDimitry Andric // A run of file tokens continues while the expanded/spelled tokens match. 820e837bb5cSDimitry Andric while (NextSpelled < SpelledTokens.size() && 821e837bb5cSDimitry Andric NextExpanded < Result.ExpandedTokens.size() && 822e837bb5cSDimitry Andric SpelledTokens[NextSpelled].location() == 823e837bb5cSDimitry Andric Result.ExpandedTokens[NextExpanded].location()) { 824e837bb5cSDimitry Andric ++NextSpelled; 825e837bb5cSDimitry Andric ++NextExpanded; 826e837bb5cSDimitry Andric } 827e837bb5cSDimitry Andric // We need no mapping for file tokens copied to the expanded stream. 828e837bb5cSDimitry Andric } else { 829e837bb5cSDimitry Andric // We found a new macro expansion. We should have its spelling bounds. 830e8d8bef9SDimitry Andric auto End = CollectedExpansions.lookup(Expansion); 831e837bb5cSDimitry Andric assert(End.isValid() && "Macro expansion wasn't captured?"); 832e837bb5cSDimitry Andric 833e837bb5cSDimitry Andric // Mapping starts here... 834e837bb5cSDimitry Andric TokenBuffer::Mapping Mapping; 835e837bb5cSDimitry Andric Mapping.BeginExpanded = NextExpanded; 836e837bb5cSDimitry Andric Mapping.BeginSpelled = NextSpelled; 837e837bb5cSDimitry Andric // ... consumes spelled tokens within bounds we captured ... 838e837bb5cSDimitry Andric while (NextSpelled < SpelledTokens.size() && 839e837bb5cSDimitry Andric SpelledTokens[NextSpelled].location() <= End) 840e837bb5cSDimitry Andric ++NextSpelled; 841e837bb5cSDimitry Andric // ... consumes expanded tokens rooted at the same expansion ... 842e837bb5cSDimitry Andric while (NextExpanded < Result.ExpandedTokens.size() && 843e837bb5cSDimitry Andric SM.getExpansionLoc( 844e837bb5cSDimitry Andric Result.ExpandedTokens[NextExpanded].location()) == Expansion) 845e837bb5cSDimitry Andric ++NextExpanded; 846e837bb5cSDimitry Andric // ... and ends here. 847e837bb5cSDimitry Andric Mapping.EndExpanded = NextExpanded; 848e837bb5cSDimitry Andric Mapping.EndSpelled = NextSpelled; 849e837bb5cSDimitry Andric Result.Files[File].Mappings.push_back(Mapping); 8500b57cec5SDimitry Andric } 8510b57cec5SDimitry Andric } 8520b57cec5SDimitry Andric 853e837bb5cSDimitry Andric // advance() is supposed to consume at least one token - if not, we crash. 854e837bb5cSDimitry Andric void diagnoseAdvanceFailure() { 855e837bb5cSDimitry Andric #ifndef NDEBUG 856e837bb5cSDimitry Andric // Show the failed-to-map token in context. 857e837bb5cSDimitry Andric for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10; 858e837bb5cSDimitry Andric I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) { 859e837bb5cSDimitry Andric const char *L = 860e837bb5cSDimitry Andric (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " "; 861e837bb5cSDimitry Andric llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n"; 8620b57cec5SDimitry Andric } 863e837bb5cSDimitry Andric #endif 864e837bb5cSDimitry Andric llvm_unreachable("Couldn't map expanded token to spelled tokens!"); 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric /// Initializes TokenBuffer::Files and fills spelled tokens and expanded 8680b57cec5SDimitry Andric /// ranges for each of the files. 8690b57cec5SDimitry Andric void buildSpelledTokens() { 8700b57cec5SDimitry Andric for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) { 871e837bb5cSDimitry Andric const auto &Tok = Result.ExpandedTokens[I]; 872e837bb5cSDimitry Andric auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location())); 8730b57cec5SDimitry Andric auto It = Result.Files.try_emplace(FID); 8740b57cec5SDimitry Andric TokenBuffer::MarkedFile &File = It.first->second; 8750b57cec5SDimitry Andric 876e837bb5cSDimitry Andric // The eof token should not be considered part of the main-file's range. 877e837bb5cSDimitry Andric File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1; 878e837bb5cSDimitry Andric 8790b57cec5SDimitry Andric if (!It.second) 8800b57cec5SDimitry Andric continue; // we have seen this file before. 8810b57cec5SDimitry Andric // This is the first time we see this file. 8820b57cec5SDimitry Andric File.BeginExpanded = I; 8830b57cec5SDimitry Andric File.SpelledTokens = tokenize(FID, SM, LangOpts); 8840b57cec5SDimitry Andric } 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric TokenBuffer Result; 888e837bb5cSDimitry Andric unsigned NextExpanded = 0; // cursor in ExpandedTokens 889e837bb5cSDimitry Andric llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens 8900b57cec5SDimitry Andric PPExpansions CollectedExpansions; 8910b57cec5SDimitry Andric const SourceManager &SM; 8920b57cec5SDimitry Andric const LangOptions &LangOpts; 8930b57cec5SDimitry Andric }; 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric TokenBuffer TokenCollector::consume() && { 8960b57cec5SDimitry Andric PP.setTokenWatcher(nullptr); 8970b57cec5SDimitry Andric Collector->disable(); 8980b57cec5SDimitry Andric return Builder(std::move(Expanded), std::move(Expansions), 8990b57cec5SDimitry Andric PP.getSourceManager(), PP.getLangOpts()) 9000b57cec5SDimitry Andric .build(); 9010b57cec5SDimitry Andric } 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric std::string syntax::Token::str() const { 9045ffd83dbSDimitry Andric return std::string(llvm::formatv("Token({0}, length = {1})", 9055ffd83dbSDimitry Andric tok::getTokenName(kind()), length())); 9060b57cec5SDimitry Andric } 9070b57cec5SDimitry Andric 9080b57cec5SDimitry Andric std::string syntax::Token::dumpForTests(const SourceManager &SM) const { 9095ffd83dbSDimitry Andric return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM), 9105ffd83dbSDimitry Andric tok::getTokenName(kind()), length())); 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric std::string TokenBuffer::dumpForTests() const { 9140b57cec5SDimitry Andric auto PrintToken = [this](const syntax::Token &T) -> std::string { 9150b57cec5SDimitry Andric if (T.kind() == tok::eof) 9160b57cec5SDimitry Andric return "<eof>"; 9175ffd83dbSDimitry Andric return std::string(T.text(*SourceMgr)); 9180b57cec5SDimitry Andric }; 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS, 9210b57cec5SDimitry Andric llvm::ArrayRef<syntax::Token> Tokens) { 9220b57cec5SDimitry Andric if (Tokens.empty()) { 9230b57cec5SDimitry Andric OS << "<empty>"; 9240b57cec5SDimitry Andric return; 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric OS << Tokens[0].text(*SourceMgr); 9270b57cec5SDimitry Andric for (unsigned I = 1; I < Tokens.size(); ++I) { 9280b57cec5SDimitry Andric if (Tokens[I].kind() == tok::eof) 9290b57cec5SDimitry Andric continue; 9300b57cec5SDimitry Andric OS << " " << PrintToken(Tokens[I]); 9310b57cec5SDimitry Andric } 9320b57cec5SDimitry Andric }; 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric std::string Dump; 9350b57cec5SDimitry Andric llvm::raw_string_ostream OS(Dump); 9360b57cec5SDimitry Andric 9370b57cec5SDimitry Andric OS << "expanded tokens:\n" 9380b57cec5SDimitry Andric << " "; 9390b57cec5SDimitry Andric // (!) we do not show '<eof>'. 940bdd1243dSDimitry Andric DumpTokens(OS, llvm::ArrayRef(ExpandedTokens).drop_back()); 9410b57cec5SDimitry Andric OS << "\n"; 9420b57cec5SDimitry Andric 9430b57cec5SDimitry Andric std::vector<FileID> Keys; 94406c3fb27SDimitry Andric for (const auto &F : Files) 9450b57cec5SDimitry Andric Keys.push_back(F.first); 9460b57cec5SDimitry Andric llvm::sort(Keys); 9470b57cec5SDimitry Andric 9480b57cec5SDimitry Andric for (FileID ID : Keys) { 9490b57cec5SDimitry Andric const MarkedFile &File = Files.find(ID)->second; 9505f757f3fSDimitry Andric auto Entry = SourceMgr->getFileEntryRefForID(ID); 9510b57cec5SDimitry Andric if (!Entry) 9520b57cec5SDimitry Andric continue; // Skip builtin files. 9535f757f3fSDimitry Andric std::string Path = llvm::sys::path::convert_to_slash(Entry->getName()); 9545f757f3fSDimitry Andric OS << llvm::formatv("file '{0}'\n", Path) << " spelled tokens:\n" 9550b57cec5SDimitry Andric << " "; 9560b57cec5SDimitry Andric DumpTokens(OS, File.SpelledTokens); 9570b57cec5SDimitry Andric OS << "\n"; 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric if (File.Mappings.empty()) { 9600b57cec5SDimitry Andric OS << " no mappings.\n"; 9610b57cec5SDimitry Andric continue; 9620b57cec5SDimitry Andric } 9630b57cec5SDimitry Andric OS << " mappings:\n"; 9640b57cec5SDimitry Andric for (auto &M : File.Mappings) { 9650b57cec5SDimitry Andric OS << llvm::formatv( 9660b57cec5SDimitry Andric " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n", 9670b57cec5SDimitry Andric PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled, 9680b57cec5SDimitry Andric M.EndSpelled == File.SpelledTokens.size() 9690b57cec5SDimitry Andric ? "<eof>" 9700b57cec5SDimitry Andric : PrintToken(File.SpelledTokens[M.EndSpelled]), 9710b57cec5SDimitry Andric M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]), 9720b57cec5SDimitry Andric M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]), 9730b57cec5SDimitry Andric M.EndExpanded); 9740b57cec5SDimitry Andric } 9750b57cec5SDimitry Andric } 9760eae32dcSDimitry Andric return Dump; 9770b57cec5SDimitry Andric } 978