xref: /freebsd-src/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric #include "clang/Tooling/Syntax/Tokens.h"
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric #include "clang/Basic/Diagnostic.h"
110b57cec5SDimitry Andric #include "clang/Basic/IdentifierTable.h"
120b57cec5SDimitry Andric #include "clang/Basic/LLVM.h"
130b57cec5SDimitry Andric #include "clang/Basic/LangOptions.h"
140b57cec5SDimitry Andric #include "clang/Basic/SourceLocation.h"
150b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
160b57cec5SDimitry Andric #include "clang/Basic/TokenKinds.h"
170b57cec5SDimitry Andric #include "clang/Lex/PPCallbacks.h"
180b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h"
190b57cec5SDimitry Andric #include "clang/Lex/Token.h"
200b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
210b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
220b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
230b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
240b57cec5SDimitry Andric #include "llvm/Support/FormatVariadic.h"
250b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
260b57cec5SDimitry Andric #include <algorithm>
270b57cec5SDimitry Andric #include <cassert>
280b57cec5SDimitry Andric #include <iterator>
29bdd1243dSDimitry Andric #include <optional>
300b57cec5SDimitry Andric #include <string>
310b57cec5SDimitry Andric #include <utility>
320b57cec5SDimitry Andric #include <vector>
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric using namespace clang;
350b57cec5SDimitry Andric using namespace clang::syntax;
360b57cec5SDimitry Andric 
375ffd83dbSDimitry Andric namespace {
385ffd83dbSDimitry Andric // Finds the smallest consecutive subsuquence of Toks that covers R.
395ffd83dbSDimitry Andric llvm::ArrayRef<syntax::Token>
405ffd83dbSDimitry Andric getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
415ffd83dbSDimitry Andric                   const SourceManager &SM) {
425ffd83dbSDimitry Andric   if (R.isInvalid())
435ffd83dbSDimitry Andric     return {};
445ffd83dbSDimitry Andric   const syntax::Token *Begin =
455ffd83dbSDimitry Andric       llvm::partition_point(Toks, [&](const syntax::Token &T) {
465ffd83dbSDimitry Andric         return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
475ffd83dbSDimitry Andric       });
485ffd83dbSDimitry Andric   const syntax::Token *End =
495ffd83dbSDimitry Andric       llvm::partition_point(Toks, [&](const syntax::Token &T) {
505ffd83dbSDimitry Andric         return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
515ffd83dbSDimitry Andric       });
525ffd83dbSDimitry Andric   if (Begin > End)
535ffd83dbSDimitry Andric     return {};
545ffd83dbSDimitry Andric   return {Begin, End};
555ffd83dbSDimitry Andric }
565ffd83dbSDimitry Andric 
576246ae0bSDimitry Andric // Finds the range within FID corresponding to expanded tokens [First, Last].
586246ae0bSDimitry Andric // Prev precedes First and Next follows Last, these must *not* be included.
596246ae0bSDimitry Andric // If no range satisfies the criteria, returns an invalid range.
606246ae0bSDimitry Andric //
615ffd83dbSDimitry Andric // #define ID(x) x
625ffd83dbSDimitry Andric // ID(ID(ID(a1) a2))
635ffd83dbSDimitry Andric //          ~~       -> a1
645ffd83dbSDimitry Andric //              ~~   -> a2
655ffd83dbSDimitry Andric //       ~~~~~~~~~   -> a1 a2
666246ae0bSDimitry Andric SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last,
676246ae0bSDimitry Andric                                    SourceLocation Prev, SourceLocation Next,
686246ae0bSDimitry Andric                                    FileID TargetFile,
695ffd83dbSDimitry Andric                                    const SourceManager &SM) {
706246ae0bSDimitry Andric   // There are two main parts to this algorithm:
716246ae0bSDimitry Andric   //  - identifying which spelled range covers the expanded tokens
726246ae0bSDimitry Andric   //  - validating that this range doesn't cover any extra tokens (First/Last)
736246ae0bSDimitry Andric   //
746246ae0bSDimitry Andric   // We do these in order. However as we transform the expanded range into the
756246ae0bSDimitry Andric   // spelled one, we adjust First/Last so the validation remains simple.
766246ae0bSDimitry Andric 
776246ae0bSDimitry Andric   assert(SM.getSLocEntry(TargetFile).isFile());
786246ae0bSDimitry Andric   // In most cases, to select First and Last we must return their expansion
796246ae0bSDimitry Andric   // range, i.e. the whole of any macros they are included in.
806246ae0bSDimitry Andric   //
816246ae0bSDimitry Andric   // When First and Last are part of the *same macro arg* of a macro written
826246ae0bSDimitry Andric   // in TargetFile, we that slice of the arg, i.e. their spelling range.
836246ae0bSDimitry Andric   //
846246ae0bSDimitry Andric   // Unwrap such macro calls. If the target file has A(B(C)), the
856246ae0bSDimitry Andric   // SourceLocation stack of a token inside C shows us the expansion of A first,
866246ae0bSDimitry Andric   // then B, then any macros inside C's body, then C itself.
876246ae0bSDimitry Andric   // (This is the reverse of the order the PP applies the expansions in).
886246ae0bSDimitry Andric   while (First.isMacroID() && Last.isMacroID()) {
896246ae0bSDimitry Andric     auto DecFirst = SM.getDecomposedLoc(First);
906246ae0bSDimitry Andric     auto DecLast = SM.getDecomposedLoc(Last);
916246ae0bSDimitry Andric     auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion();
926246ae0bSDimitry Andric     auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion();
936246ae0bSDimitry Andric 
946246ae0bSDimitry Andric     if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion())
955ffd83dbSDimitry Andric       break;
966246ae0bSDimitry Andric     // Locations are in the same macro arg if they expand to the same place.
976246ae0bSDimitry Andric     // (They may still have different FileIDs - an arg can have >1 chunks!)
986246ae0bSDimitry Andric     if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart())
995ffd83dbSDimitry Andric       break;
1006246ae0bSDimitry Andric     // Careful, given:
1016246ae0bSDimitry Andric     //   #define HIDE ID(ID(a))
1026246ae0bSDimitry Andric     //   ID(ID(HIDE))
1036246ae0bSDimitry Andric     // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2.
1046246ae0bSDimitry Andric     // We distinguish them by whether the macro expands into the target file.
1056246ae0bSDimitry Andric     // Fortunately, the target file ones will always appear first.
10606c3fb27SDimitry Andric     auto ExpFileID = SM.getFileID(ExpFirst.getExpansionLocStart());
10706c3fb27SDimitry Andric     if (ExpFileID == TargetFile)
1086246ae0bSDimitry Andric       break;
1096246ae0bSDimitry Andric     // Replace each endpoint with its spelling inside the macro arg.
1106246ae0bSDimitry Andric     // (This is getImmediateSpellingLoc without repeating lookups).
1116246ae0bSDimitry Andric     First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second);
1126246ae0bSDimitry Andric     Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second);
1136246ae0bSDimitry Andric   }
1146246ae0bSDimitry Andric 
1156246ae0bSDimitry Andric   // In all remaining cases we need the full containing macros.
1166246ae0bSDimitry Andric   // If this overlaps Prev or Next, then no range is possible.
1176246ae0bSDimitry Andric   SourceRange Candidate =
1186246ae0bSDimitry Andric       SM.getExpansionRange(SourceRange(First, Last)).getAsRange();
1196246ae0bSDimitry Andric   auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin());
12006c3fb27SDimitry Andric   auto DecLast = SM.getDecomposedExpansionLoc(Candidate.getEnd());
1216246ae0bSDimitry Andric   // Can end up in the wrong file due to bad input or token-pasting shenanigans.
12206c3fb27SDimitry Andric   if (Candidate.isInvalid() || DecFirst.first != TargetFile ||
12306c3fb27SDimitry Andric       DecLast.first != TargetFile)
1246246ae0bSDimitry Andric     return SourceRange();
1256246ae0bSDimitry Andric   // Check bounds, which may still be inside macros.
1266246ae0bSDimitry Andric   if (Prev.isValid()) {
1276246ae0bSDimitry Andric     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin());
1286246ae0bSDimitry Andric     if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second)
1296246ae0bSDimitry Andric       return SourceRange();
1306246ae0bSDimitry Andric   }
1316246ae0bSDimitry Andric   if (Next.isValid()) {
1326246ae0bSDimitry Andric     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd());
1336246ae0bSDimitry Andric     if (Dec.first != DecLast.first || Dec.second <= DecLast.second)
1346246ae0bSDimitry Andric       return SourceRange();
1356246ae0bSDimitry Andric   }
1366246ae0bSDimitry Andric   // Now we know that Candidate is a file range that covers [First, Last]
1376246ae0bSDimitry Andric   // without encroaching on {Prev, Next}. Ship it!
1386246ae0bSDimitry Andric   return Candidate;
1395ffd83dbSDimitry Andric }
1405ffd83dbSDimitry Andric 
1415ffd83dbSDimitry Andric } // namespace
1425ffd83dbSDimitry Andric 
1430b57cec5SDimitry Andric syntax::Token::Token(SourceLocation Location, unsigned Length,
1440b57cec5SDimitry Andric                      tok::TokenKind Kind)
1450b57cec5SDimitry Andric     : Location(Location), Length(Length), Kind(Kind) {
1460b57cec5SDimitry Andric   assert(Location.isValid());
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric syntax::Token::Token(const clang::Token &T)
1500b57cec5SDimitry Andric     : Token(T.getLocation(), T.getLength(), T.getKind()) {
1510b57cec5SDimitry Andric   assert(!T.isAnnotation());
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
1550b57cec5SDimitry Andric   bool Invalid = false;
1560b57cec5SDimitry Andric   const char *Start = SM.getCharacterData(location(), &Invalid);
1570b57cec5SDimitry Andric   assert(!Invalid);
1580b57cec5SDimitry Andric   return llvm::StringRef(Start, length());
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric FileRange syntax::Token::range(const SourceManager &SM) const {
1620b57cec5SDimitry Andric   assert(location().isFileID() && "must be a spelled token");
1630b57cec5SDimitry Andric   FileID File;
1640b57cec5SDimitry Andric   unsigned StartOffset;
1650b57cec5SDimitry Andric   std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
1660b57cec5SDimitry Andric   return FileRange(File, StartOffset, StartOffset + length());
1670b57cec5SDimitry Andric }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric FileRange syntax::Token::range(const SourceManager &SM,
1700b57cec5SDimitry Andric                                const syntax::Token &First,
1710b57cec5SDimitry Andric                                const syntax::Token &Last) {
1720b57cec5SDimitry Andric   auto F = First.range(SM);
1730b57cec5SDimitry Andric   auto L = Last.range(SM);
1740b57cec5SDimitry Andric   assert(F.file() == L.file() && "tokens from different files");
1755ffd83dbSDimitry Andric   assert((F == L || F.endOffset() <= L.beginOffset()) &&
1765ffd83dbSDimitry Andric          "wrong order of tokens");
1770b57cec5SDimitry Andric   return FileRange(F.file(), F.beginOffset(), L.endOffset());
1780b57cec5SDimitry Andric }
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
1810b57cec5SDimitry Andric   return OS << T.str();
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
1850b57cec5SDimitry Andric     : File(File), Begin(BeginOffset), End(EndOffset) {
1860b57cec5SDimitry Andric   assert(File.isValid());
1870b57cec5SDimitry Andric   assert(BeginOffset <= EndOffset);
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
1910b57cec5SDimitry Andric                      unsigned Length) {
1920b57cec5SDimitry Andric   assert(BeginLoc.isValid());
1930b57cec5SDimitry Andric   assert(BeginLoc.isFileID());
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
1960b57cec5SDimitry Andric   End = Begin + Length;
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
1990b57cec5SDimitry Andric                      SourceLocation EndLoc) {
2000b57cec5SDimitry Andric   assert(BeginLoc.isValid());
2010b57cec5SDimitry Andric   assert(BeginLoc.isFileID());
2020b57cec5SDimitry Andric   assert(EndLoc.isValid());
2030b57cec5SDimitry Andric   assert(EndLoc.isFileID());
2040b57cec5SDimitry Andric   assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
2050b57cec5SDimitry Andric   assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
2080b57cec5SDimitry Andric   End = SM.getFileOffset(EndLoc);
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
2120b57cec5SDimitry Andric                                       const FileRange &R) {
2130b57cec5SDimitry Andric   return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
2140b57cec5SDimitry Andric                              R.file().getHashValue(), R.beginOffset(),
2150b57cec5SDimitry Andric                              R.endOffset());
2160b57cec5SDimitry Andric }
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric llvm::StringRef FileRange::text(const SourceManager &SM) const {
2190b57cec5SDimitry Andric   bool Invalid = false;
2200b57cec5SDimitry Andric   StringRef Text = SM.getBufferData(File, &Invalid);
2210b57cec5SDimitry Andric   if (Invalid)
2220b57cec5SDimitry Andric     return "";
2230b57cec5SDimitry Andric   assert(Begin <= Text.size());
2240b57cec5SDimitry Andric   assert(End <= Text.size());
2250b57cec5SDimitry Andric   return Text.substr(Begin, length());
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric 
228fe6060f1SDimitry Andric void TokenBuffer::indexExpandedTokens() {
229fe6060f1SDimitry Andric   // No-op if the index is already created.
230fe6060f1SDimitry Andric   if (!ExpandedTokIndex.empty())
231fe6060f1SDimitry Andric     return;
232fe6060f1SDimitry Andric   ExpandedTokIndex.reserve(ExpandedTokens.size());
233fe6060f1SDimitry Andric   // Index ExpandedTokens for faster lookups by SourceLocation.
234fe6060f1SDimitry Andric   for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) {
235fe6060f1SDimitry Andric     SourceLocation Loc = ExpandedTokens[I].location();
236fe6060f1SDimitry Andric     if (Loc.isValid())
237fe6060f1SDimitry Andric       ExpandedTokIndex[Loc] = I;
238fe6060f1SDimitry Andric   }
239fe6060f1SDimitry Andric }
240fe6060f1SDimitry Andric 
241480093f4SDimitry Andric llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
242fe6060f1SDimitry Andric   if (R.isInvalid())
243fe6060f1SDimitry Andric     return {};
244fe6060f1SDimitry Andric   if (!ExpandedTokIndex.empty()) {
245fe6060f1SDimitry Andric     // Quick lookup if `R` is a token range.
246fe6060f1SDimitry Andric     // This is a huge win since majority of the users use ranges provided by an
247fe6060f1SDimitry Andric     // AST. Ranges in AST are token ranges from expanded token stream.
248fe6060f1SDimitry Andric     const auto B = ExpandedTokIndex.find(R.getBegin());
249fe6060f1SDimitry Andric     const auto E = ExpandedTokIndex.find(R.getEnd());
250fe6060f1SDimitry Andric     if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
251fe6060f1SDimitry Andric       const Token *L = ExpandedTokens.data() + B->getSecond();
252fe6060f1SDimitry Andric       // Add 1 to End to make a half-open range.
253fe6060f1SDimitry Andric       const Token *R = ExpandedTokens.data() + E->getSecond() + 1;
254fe6060f1SDimitry Andric       if (L > R)
255fe6060f1SDimitry Andric         return {};
256fe6060f1SDimitry Andric       return {L, R};
257fe6060f1SDimitry Andric     }
258fe6060f1SDimitry Andric   }
259fe6060f1SDimitry Andric   // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
260fe6060f1SDimitry Andric   // required range.
2615ffd83dbSDimitry Andric   return getTokensCovering(expandedTokens(), R, *SourceMgr);
262480093f4SDimitry Andric }
263480093f4SDimitry Andric 
264480093f4SDimitry Andric CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
265480093f4SDimitry Andric   return CharSourceRange(
266480093f4SDimitry Andric       SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
267480093f4SDimitry Andric       /*IsTokenRange=*/false);
268480093f4SDimitry Andric }
269480093f4SDimitry Andric 
2700b57cec5SDimitry Andric std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
2710b57cec5SDimitry Andric TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
2720b57cec5SDimitry Andric   assert(Expanded);
2730b57cec5SDimitry Andric   assert(ExpandedTokens.data() <= Expanded &&
2740b57cec5SDimitry Andric          Expanded < ExpandedTokens.data() + ExpandedTokens.size());
2750b57cec5SDimitry Andric 
2760b57cec5SDimitry Andric   auto FileIt = Files.find(
2770b57cec5SDimitry Andric       SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
2780b57cec5SDimitry Andric   assert(FileIt != Files.end() && "no file for an expanded token");
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric   const MarkedFile &File = FileIt->second;
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric   unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
2830b57cec5SDimitry Andric   // Find the first mapping that produced tokens after \p Expanded.
2840b57cec5SDimitry Andric   auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
2850b57cec5SDimitry Andric     return M.BeginExpanded <= ExpandedIndex;
2860b57cec5SDimitry Andric   });
2870b57cec5SDimitry Andric   // Our token could only be produced by the previous mapping.
2880b57cec5SDimitry Andric   if (It == File.Mappings.begin()) {
2890b57cec5SDimitry Andric     // No previous mapping, no need to modify offsets.
2905ffd83dbSDimitry Andric     return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
2915ffd83dbSDimitry Andric             /*Mapping=*/nullptr};
2920b57cec5SDimitry Andric   }
2930b57cec5SDimitry Andric   --It; // 'It' now points to last mapping that started before our token.
2940b57cec5SDimitry Andric 
2950b57cec5SDimitry Andric   // Check if the token is part of the mapping.
2960b57cec5SDimitry Andric   if (ExpandedIndex < It->EndExpanded)
2975ffd83dbSDimitry Andric     return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric   // Not part of the mapping, use the index from previous mapping to compute the
3000b57cec5SDimitry Andric   // corresponding spelled token.
3010b57cec5SDimitry Andric   return {
3020b57cec5SDimitry Andric       &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
3035ffd83dbSDimitry Andric       /*Mapping=*/nullptr};
3045ffd83dbSDimitry Andric }
3055ffd83dbSDimitry Andric 
3065ffd83dbSDimitry Andric const TokenBuffer::Mapping *
3075ffd83dbSDimitry Andric TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
3085ffd83dbSDimitry Andric                                           const syntax::Token *Spelled) {
3095ffd83dbSDimitry Andric   assert(F.SpelledTokens.data() <= Spelled);
3105ffd83dbSDimitry Andric   unsigned SpelledI = Spelled - F.SpelledTokens.data();
3115ffd83dbSDimitry Andric   assert(SpelledI < F.SpelledTokens.size());
3125ffd83dbSDimitry Andric 
3135ffd83dbSDimitry Andric   auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
3145ffd83dbSDimitry Andric     return M.BeginSpelled <= SpelledI;
3155ffd83dbSDimitry Andric   });
3165ffd83dbSDimitry Andric   if (It == F.Mappings.begin())
3175ffd83dbSDimitry Andric     return nullptr;
3185ffd83dbSDimitry Andric   --It;
3195ffd83dbSDimitry Andric   return &*It;
3205ffd83dbSDimitry Andric }
3215ffd83dbSDimitry Andric 
3225ffd83dbSDimitry Andric llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
3235ffd83dbSDimitry Andric TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
3245ffd83dbSDimitry Andric   if (Spelled.empty())
3255ffd83dbSDimitry Andric     return {};
326e8d8bef9SDimitry Andric   const auto &File = fileForSpelled(Spelled);
3275ffd83dbSDimitry Andric 
3285ffd83dbSDimitry Andric   auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
3295ffd83dbSDimitry Andric   unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
3305ffd83dbSDimitry Andric   assert(SpelledFrontI < File.SpelledTokens.size());
3315ffd83dbSDimitry Andric   unsigned ExpandedBegin;
3325ffd83dbSDimitry Andric   if (!FrontMapping) {
3335ffd83dbSDimitry Andric     // No mapping that starts before the first token of Spelled, we don't have
3345ffd83dbSDimitry Andric     // to modify offsets.
3355ffd83dbSDimitry Andric     ExpandedBegin = File.BeginExpanded + SpelledFrontI;
3365ffd83dbSDimitry Andric   } else if (SpelledFrontI < FrontMapping->EndSpelled) {
3375ffd83dbSDimitry Andric     // This mapping applies to Spelled tokens.
3385ffd83dbSDimitry Andric     if (SpelledFrontI != FrontMapping->BeginSpelled) {
3395ffd83dbSDimitry Andric       // Spelled tokens don't cover the entire mapping, returning empty result.
3405ffd83dbSDimitry Andric       return {}; // FIXME: support macro arguments.
3415ffd83dbSDimitry Andric     }
3425ffd83dbSDimitry Andric     // Spelled tokens start at the beginning of this mapping.
3435ffd83dbSDimitry Andric     ExpandedBegin = FrontMapping->BeginExpanded;
3445ffd83dbSDimitry Andric   } else {
3455ffd83dbSDimitry Andric     // Spelled tokens start after the mapping ends (they start in the hole
3465ffd83dbSDimitry Andric     // between 2 mappings, or between a mapping and end of the file).
3475ffd83dbSDimitry Andric     ExpandedBegin =
3485ffd83dbSDimitry Andric         FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
3495ffd83dbSDimitry Andric   }
3505ffd83dbSDimitry Andric 
3515ffd83dbSDimitry Andric   auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
3525ffd83dbSDimitry Andric   unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
3535ffd83dbSDimitry Andric   unsigned ExpandedEnd;
3545ffd83dbSDimitry Andric   if (!BackMapping) {
3555ffd83dbSDimitry Andric     // No mapping that starts before the last token of Spelled, we don't have to
3565ffd83dbSDimitry Andric     // modify offsets.
3575ffd83dbSDimitry Andric     ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
3585ffd83dbSDimitry Andric   } else if (SpelledBackI < BackMapping->EndSpelled) {
3595ffd83dbSDimitry Andric     // This mapping applies to Spelled tokens.
3605ffd83dbSDimitry Andric     if (SpelledBackI + 1 != BackMapping->EndSpelled) {
3615ffd83dbSDimitry Andric       // Spelled tokens don't cover the entire mapping, returning empty result.
3625ffd83dbSDimitry Andric       return {}; // FIXME: support macro arguments.
3635ffd83dbSDimitry Andric     }
3645ffd83dbSDimitry Andric     ExpandedEnd = BackMapping->EndExpanded;
3655ffd83dbSDimitry Andric   } else {
3665ffd83dbSDimitry Andric     // Spelled tokens end after the mapping ends.
3675ffd83dbSDimitry Andric     ExpandedEnd =
3685ffd83dbSDimitry Andric         BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
3695ffd83dbSDimitry Andric   }
3705ffd83dbSDimitry Andric 
3715ffd83dbSDimitry Andric   assert(ExpandedBegin < ExpandedTokens.size());
3725ffd83dbSDimitry Andric   assert(ExpandedEnd < ExpandedTokens.size());
3735ffd83dbSDimitry Andric   // Avoid returning empty ranges.
3745ffd83dbSDimitry Andric   if (ExpandedBegin == ExpandedEnd)
3755ffd83dbSDimitry Andric     return {};
376bdd1243dSDimitry Andric   return {llvm::ArrayRef(ExpandedTokens.data() + ExpandedBegin,
3775ffd83dbSDimitry Andric                          ExpandedTokens.data() + ExpandedEnd)};
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
3810b57cec5SDimitry Andric   auto It = Files.find(FID);
3820b57cec5SDimitry Andric   assert(It != Files.end());
3830b57cec5SDimitry Andric   return It->second.SpelledTokens;
3840b57cec5SDimitry Andric }
3850b57cec5SDimitry Andric 
386*0fca6ea1SDimitry Andric const syntax::Token *
387*0fca6ea1SDimitry Andric TokenBuffer::spelledTokenContaining(SourceLocation Loc) const {
3885ffd83dbSDimitry Andric   assert(Loc.isFileID());
3895ffd83dbSDimitry Andric   const auto *Tok = llvm::partition_point(
3905ffd83dbSDimitry Andric       spelledTokens(SourceMgr->getFileID(Loc)),
391*0fca6ea1SDimitry Andric       [&](const syntax::Token &Tok) { return Tok.endLocation() <= Loc; });
392*0fca6ea1SDimitry Andric   if (!Tok || Loc < Tok->location())
3935ffd83dbSDimitry Andric     return nullptr;
3945ffd83dbSDimitry Andric   return Tok;
3955ffd83dbSDimitry Andric }
3965ffd83dbSDimitry Andric 
3970b57cec5SDimitry Andric std::string TokenBuffer::Mapping::str() const {
3985ffd83dbSDimitry Andric   return std::string(
3995ffd83dbSDimitry Andric       llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
4005ffd83dbSDimitry Andric                     BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric 
403bdd1243dSDimitry Andric std::optional<llvm::ArrayRef<syntax::Token>>
4040b57cec5SDimitry Andric TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
4057a6dacacSDimitry Andric   // In cases of invalid code, AST nodes can have source ranges that include
4067a6dacacSDimitry Andric   // the `eof` token. As there's no spelling for this token, exclude it from
4077a6dacacSDimitry Andric   // the range.
4087a6dacacSDimitry Andric   if (!Expanded.empty() && Expanded.back().kind() == tok::eof) {
4097a6dacacSDimitry Andric     Expanded = Expanded.drop_back();
4107a6dacacSDimitry Andric   }
4110b57cec5SDimitry Andric   // Mapping an empty range is ambiguous in case of empty mappings at either end
4120b57cec5SDimitry Andric   // of the range, bail out in that case.
4130b57cec5SDimitry Andric   if (Expanded.empty())
414bdd1243dSDimitry Andric     return std::nullopt;
4156246ae0bSDimitry Andric   const syntax::Token *First = &Expanded.front();
4166246ae0bSDimitry Andric   const syntax::Token *Last = &Expanded.back();
417bdd1243dSDimitry Andric   auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
418bdd1243dSDimitry Andric   auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
4190b57cec5SDimitry Andric 
4206246ae0bSDimitry Andric   FileID FID = SourceMgr->getFileID(FirstSpelled->location());
4210b57cec5SDimitry Andric   // FIXME: Handle multi-file changes by trying to map onto a common root.
4220b57cec5SDimitry Andric   if (FID != SourceMgr->getFileID(LastSpelled->location()))
423bdd1243dSDimitry Andric     return std::nullopt;
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   const MarkedFile &File = Files.find(FID)->second;
4260b57cec5SDimitry Andric 
4276246ae0bSDimitry Andric   // If the range is within one macro argument, the result may be only part of a
4286246ae0bSDimitry Andric   // Mapping. We must use the general (SourceManager-based) algorithm.
4296246ae0bSDimitry Andric   if (FirstMapping && FirstMapping == LastMapping &&
4306246ae0bSDimitry Andric       SourceMgr->isMacroArgExpansion(First->location()) &&
4316246ae0bSDimitry Andric       SourceMgr->isMacroArgExpansion(Last->location())) {
4326246ae0bSDimitry Andric     // We use excluded Prev/Next token for bounds checking.
4336246ae0bSDimitry Andric     SourceLocation Prev = (First == &ExpandedTokens.front())
4346246ae0bSDimitry Andric                               ? SourceLocation()
4356246ae0bSDimitry Andric                               : (First - 1)->location();
4366246ae0bSDimitry Andric     SourceLocation Next = (Last == &ExpandedTokens.back())
4376246ae0bSDimitry Andric                               ? SourceLocation()
4386246ae0bSDimitry Andric                               : (Last + 1)->location();
4396246ae0bSDimitry Andric     SourceRange Range = spelledForExpandedSlow(
4406246ae0bSDimitry Andric         First->location(), Last->location(), Prev, Next, FID, *SourceMgr);
4416246ae0bSDimitry Andric     if (Range.isInvalid())
442bdd1243dSDimitry Andric       return std::nullopt;
4436246ae0bSDimitry Andric     return getTokensCovering(File.SpelledTokens, Range, *SourceMgr);
4445ffd83dbSDimitry Andric   }
4455ffd83dbSDimitry Andric 
4466246ae0bSDimitry Andric   // Otherwise, use the fast version based on Mappings.
4475ffd83dbSDimitry Andric   // Do not allow changes that doesn't cover full expansion.
4486246ae0bSDimitry Andric   unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data();
4496246ae0bSDimitry Andric   unsigned LastExpanded = Expanded.end() - ExpandedTokens.data();
4506246ae0bSDimitry Andric   if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded)
451bdd1243dSDimitry Andric     return std::nullopt;
4526246ae0bSDimitry Andric   if (LastMapping && LastMapping->EndExpanded != LastExpanded)
453bdd1243dSDimitry Andric     return std::nullopt;
454bdd1243dSDimitry Andric   return llvm::ArrayRef(
4556246ae0bSDimitry Andric       FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled
4566246ae0bSDimitry Andric                    : FirstSpelled,
4570b57cec5SDimitry Andric       LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
4580b57cec5SDimitry Andric                   : LastSpelled + 1);
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric 
461e8d8bef9SDimitry Andric TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
462e8d8bef9SDimitry Andric                                                   const Mapping &M) const {
463e8d8bef9SDimitry Andric   Expansion E;
464bdd1243dSDimitry Andric   E.Spelled = llvm::ArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
465e8d8bef9SDimitry Andric                              F.SpelledTokens.data() + M.EndSpelled);
466bdd1243dSDimitry Andric   E.Expanded = llvm::ArrayRef(ExpandedTokens.data() + M.BeginExpanded,
467e8d8bef9SDimitry Andric                               ExpandedTokens.data() + M.EndExpanded);
468e8d8bef9SDimitry Andric   return E;
469e8d8bef9SDimitry Andric }
470e8d8bef9SDimitry Andric 
471e8d8bef9SDimitry Andric const TokenBuffer::MarkedFile &
472e8d8bef9SDimitry Andric TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
473e8d8bef9SDimitry Andric   assert(!Spelled.empty());
474e8d8bef9SDimitry Andric   assert(Spelled.front().location().isFileID() && "not a spelled token");
475e8d8bef9SDimitry Andric   auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
476e8d8bef9SDimitry Andric   assert(FileIt != Files.end() && "file not tracked by token buffer");
477e8d8bef9SDimitry Andric   const auto &File = FileIt->second;
478e8d8bef9SDimitry Andric   assert(File.SpelledTokens.data() <= Spelled.data() &&
479e8d8bef9SDimitry Andric          Spelled.end() <=
480e8d8bef9SDimitry Andric              (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
481e8d8bef9SDimitry Andric          "Tokens not in spelled range");
482e8d8bef9SDimitry Andric #ifndef NDEBUG
483e8d8bef9SDimitry Andric   auto T1 = Spelled.back().location();
484e8d8bef9SDimitry Andric   auto T2 = File.SpelledTokens.back().location();
485e8d8bef9SDimitry Andric   assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
486e8d8bef9SDimitry Andric #endif
487e8d8bef9SDimitry Andric   return File;
488e8d8bef9SDimitry Andric }
489e8d8bef9SDimitry Andric 
490bdd1243dSDimitry Andric std::optional<TokenBuffer::Expansion>
4910b57cec5SDimitry Andric TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
4920b57cec5SDimitry Andric   assert(Spelled);
493e8d8bef9SDimitry Andric   const auto &File = fileForSpelled(*Spelled);
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric   unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
4960b57cec5SDimitry Andric   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
4970b57cec5SDimitry Andric     return M.BeginSpelled < SpelledIndex;
4980b57cec5SDimitry Andric   });
4990b57cec5SDimitry Andric   if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
500bdd1243dSDimitry Andric     return std::nullopt;
501e8d8bef9SDimitry Andric   return makeExpansion(File, *M);
5020b57cec5SDimitry Andric }
503e8d8bef9SDimitry Andric 
504e8d8bef9SDimitry Andric std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
505e8d8bef9SDimitry Andric     llvm::ArrayRef<syntax::Token> Spelled) const {
506e8d8bef9SDimitry Andric   if (Spelled.empty())
507e8d8bef9SDimitry Andric     return {};
508e8d8bef9SDimitry Andric   const auto &File = fileForSpelled(Spelled);
509e8d8bef9SDimitry Andric 
510e8d8bef9SDimitry Andric   // Find the first overlapping range, and then copy until we stop overlapping.
511e8d8bef9SDimitry Andric   unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
512e8d8bef9SDimitry Andric   unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
513e8d8bef9SDimitry Andric   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
514e8d8bef9SDimitry Andric     return M.EndSpelled <= SpelledBeginIndex;
515e8d8bef9SDimitry Andric   });
516e8d8bef9SDimitry Andric   std::vector<TokenBuffer::Expansion> Expansions;
517e8d8bef9SDimitry Andric   for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M)
518e8d8bef9SDimitry Andric     Expansions.push_back(makeExpansion(File, *M));
519e8d8bef9SDimitry Andric   return Expansions;
520e8d8bef9SDimitry Andric }
521e8d8bef9SDimitry Andric 
522480093f4SDimitry Andric llvm::ArrayRef<syntax::Token>
523480093f4SDimitry Andric syntax::spelledTokensTouching(SourceLocation Loc,
5245ffd83dbSDimitry Andric                               llvm::ArrayRef<syntax::Token> Tokens) {
525480093f4SDimitry Andric   assert(Loc.isFileID());
5265ffd83dbSDimitry Andric 
527480093f4SDimitry Andric   auto *Right = llvm::partition_point(
5285ffd83dbSDimitry Andric       Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
5295ffd83dbSDimitry Andric   bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
5305ffd83dbSDimitry Andric   bool AcceptLeft =
5315ffd83dbSDimitry Andric       Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
532bdd1243dSDimitry Andric   return llvm::ArrayRef(Right - (AcceptLeft ? 1 : 0),
533480093f4SDimitry Andric                         Right + (AcceptRight ? 1 : 0));
534480093f4SDimitry Andric }
535480093f4SDimitry Andric 
5365ffd83dbSDimitry Andric llvm::ArrayRef<syntax::Token>
5375ffd83dbSDimitry Andric syntax::spelledTokensTouching(SourceLocation Loc,
5385ffd83dbSDimitry Andric                               const syntax::TokenBuffer &Tokens) {
5395ffd83dbSDimitry Andric   return spelledTokensTouching(
5405ffd83dbSDimitry Andric       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
5415ffd83dbSDimitry Andric }
5425ffd83dbSDimitry Andric 
543480093f4SDimitry Andric const syntax::Token *
544480093f4SDimitry Andric syntax::spelledIdentifierTouching(SourceLocation Loc,
5455ffd83dbSDimitry Andric                                   llvm::ArrayRef<syntax::Token> Tokens) {
546480093f4SDimitry Andric   for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
547480093f4SDimitry Andric     if (Tok.kind() == tok::identifier)
548480093f4SDimitry Andric       return &Tok;
549480093f4SDimitry Andric   }
550480093f4SDimitry Andric   return nullptr;
551480093f4SDimitry Andric }
552480093f4SDimitry Andric 
5535ffd83dbSDimitry Andric const syntax::Token *
5545ffd83dbSDimitry Andric syntax::spelledIdentifierTouching(SourceLocation Loc,
5555ffd83dbSDimitry Andric                                   const syntax::TokenBuffer &Tokens) {
5565ffd83dbSDimitry Andric   return spelledIdentifierTouching(
5575ffd83dbSDimitry Andric       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
5585ffd83dbSDimitry Andric }
5595ffd83dbSDimitry Andric 
560a7dea167SDimitry Andric std::vector<const syntax::Token *>
561a7dea167SDimitry Andric TokenBuffer::macroExpansions(FileID FID) const {
562a7dea167SDimitry Andric   auto FileIt = Files.find(FID);
563a7dea167SDimitry Andric   assert(FileIt != Files.end() && "file not tracked by token buffer");
564a7dea167SDimitry Andric   auto &File = FileIt->second;
565a7dea167SDimitry Andric   std::vector<const syntax::Token *> Expansions;
566a7dea167SDimitry Andric   auto &Spelled = File.SpelledTokens;
567a7dea167SDimitry Andric   for (auto Mapping : File.Mappings) {
568a7dea167SDimitry Andric     const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
569a7dea167SDimitry Andric     if (Token->kind() == tok::TokenKind::identifier)
570a7dea167SDimitry Andric       Expansions.push_back(Token);
571a7dea167SDimitry Andric   }
572a7dea167SDimitry Andric   return Expansions;
573a7dea167SDimitry Andric }
574a7dea167SDimitry Andric 
5755ffd83dbSDimitry Andric std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
5765ffd83dbSDimitry Andric                                             const SourceManager &SM,
5770b57cec5SDimitry Andric                                             const LangOptions &LO) {
5780b57cec5SDimitry Andric   std::vector<syntax::Token> Tokens;
5790b57cec5SDimitry Andric   IdentifierTable Identifiers(LO);
5800b57cec5SDimitry Andric   auto AddToken = [&](clang::Token T) {
5810b57cec5SDimitry Andric     // Fill the proper token kind for keywords, etc.
5820b57cec5SDimitry Andric     if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
5830b57cec5SDimitry Andric         !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
5840b57cec5SDimitry Andric       clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
5850b57cec5SDimitry Andric       T.setIdentifierInfo(&II);
5860b57cec5SDimitry Andric       T.setKind(II.getTokenID());
5870b57cec5SDimitry Andric     }
5880b57cec5SDimitry Andric     Tokens.push_back(syntax::Token(T));
5890b57cec5SDimitry Andric   };
5900b57cec5SDimitry Andric 
5915ffd83dbSDimitry Andric   auto SrcBuffer = SM.getBufferData(FR.file());
5925ffd83dbSDimitry Andric   Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
5935ffd83dbSDimitry Andric           SrcBuffer.data() + FR.beginOffset(),
5945ffd83dbSDimitry Andric           // We can't make BufEnd point to FR.endOffset, as Lexer requires a
5955ffd83dbSDimitry Andric           // null terminated buffer.
5965ffd83dbSDimitry Andric           SrcBuffer.data() + SrcBuffer.size());
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric   clang::Token T;
5995ffd83dbSDimitry Andric   while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
6000b57cec5SDimitry Andric     AddToken(T);
6015ffd83dbSDimitry Andric   // LexFromRawLexer returns true when it parses the last token of the file, add
6025ffd83dbSDimitry Andric   // it iff it starts within the range we are interested in.
6035ffd83dbSDimitry Andric   if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
6040b57cec5SDimitry Andric     AddToken(T);
6050b57cec5SDimitry Andric   return Tokens;
6060b57cec5SDimitry Andric }
6070b57cec5SDimitry Andric 
6085ffd83dbSDimitry Andric std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
6095ffd83dbSDimitry Andric                                             const LangOptions &LO) {
6105ffd83dbSDimitry Andric   return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
6115ffd83dbSDimitry Andric }
6125ffd83dbSDimitry Andric 
6130b57cec5SDimitry Andric /// Records information reqired to construct mappings for the token buffer that
6140b57cec5SDimitry Andric /// we are collecting.
6150b57cec5SDimitry Andric class TokenCollector::CollectPPExpansions : public PPCallbacks {
6160b57cec5SDimitry Andric public:
6170b57cec5SDimitry Andric   CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric   /// Disabled instance will stop reporting anything to TokenCollector.
6200b57cec5SDimitry Andric   /// This ensures that uses of the preprocessor after TokenCollector::consume()
6210b57cec5SDimitry Andric   /// is called do not access the (possibly invalid) collector instance.
6220b57cec5SDimitry Andric   void disable() { Collector = nullptr; }
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric   void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
6250b57cec5SDimitry Andric                     SourceRange Range, const MacroArgs *Args) override {
6260b57cec5SDimitry Andric     if (!Collector)
6270b57cec5SDimitry Andric       return;
628e837bb5cSDimitry Andric     const auto &SM = Collector->PP.getSourceManager();
629e837bb5cSDimitry Andric     // Only record top-level expansions that directly produce expanded tokens.
630e837bb5cSDimitry Andric     // This excludes those where:
6310b57cec5SDimitry Andric     //   - the macro use is inside a macro body,
6320b57cec5SDimitry Andric     //   - the macro appears in an argument to another macro.
633e837bb5cSDimitry Andric     // However macro expansion isn't really a tree, it's token rewrite rules,
634e837bb5cSDimitry Andric     // so there are other cases, e.g.
635e837bb5cSDimitry Andric     //   #define B(X) X
636e837bb5cSDimitry Andric     //   #define A 1 + B
637e837bb5cSDimitry Andric     //   A(2)
638e837bb5cSDimitry Andric     // Both A and B produce expanded tokens, though the macro name 'B' comes
639e837bb5cSDimitry Andric     // from an expansion. The best we can do is merge the mappings for both.
640e837bb5cSDimitry Andric 
641e837bb5cSDimitry Andric     // The *last* token of any top-level macro expansion must be in a file.
642e837bb5cSDimitry Andric     // (In the example above, see the closing paren of the expansion of B).
643e837bb5cSDimitry Andric     if (!Range.getEnd().isFileID())
6440b57cec5SDimitry Andric       return;
645e837bb5cSDimitry Andric     // If there's a current expansion that encloses this one, this one can't be
646e837bb5cSDimitry Andric     // top-level.
647e837bb5cSDimitry Andric     if (LastExpansionEnd.isValid() &&
648e837bb5cSDimitry Andric         !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
649e837bb5cSDimitry Andric       return;
650e837bb5cSDimitry Andric 
651e837bb5cSDimitry Andric     // If the macro invocation (B) starts in a macro (A) but ends in a file,
652e837bb5cSDimitry Andric     // we'll create a merged mapping for A + B by overwriting the endpoint for
653e837bb5cSDimitry Andric     // A's startpoint.
654e837bb5cSDimitry Andric     if (!Range.getBegin().isFileID()) {
655e837bb5cSDimitry Andric       Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
656e8d8bef9SDimitry Andric       assert(Collector->Expansions.count(Range.getBegin()) &&
657e837bb5cSDimitry Andric              "Overlapping macros should have same expansion location");
658e837bb5cSDimitry Andric     }
659e837bb5cSDimitry Andric 
660e8d8bef9SDimitry Andric     Collector->Expansions[Range.getBegin()] = Range.getEnd();
6610b57cec5SDimitry Andric     LastExpansionEnd = Range.getEnd();
6620b57cec5SDimitry Andric   }
6630b57cec5SDimitry Andric   // FIXME: handle directives like #pragma, #include, etc.
6640b57cec5SDimitry Andric private:
6650b57cec5SDimitry Andric   TokenCollector *Collector;
6660b57cec5SDimitry Andric   /// Used to detect recursive macro expansions.
6670b57cec5SDimitry Andric   SourceLocation LastExpansionEnd;
6680b57cec5SDimitry Andric };
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
6710b57cec5SDimitry Andric /// implementation tracks the tokens, macro expansions and directives coming
6720b57cec5SDimitry Andric /// from the preprocessor and:
6730b57cec5SDimitry Andric /// - for each token, figures out if it is a part of an expanded token stream,
6740b57cec5SDimitry Andric ///   spelled token stream or both. Stores the tokens appropriately.
6750b57cec5SDimitry Andric /// - records mappings from the spelled to expanded token ranges, e.g. for macro
6760b57cec5SDimitry Andric ///   expansions.
6770b57cec5SDimitry Andric /// FIXME: also properly record:
6780b57cec5SDimitry Andric ///          - #include directives,
6790b57cec5SDimitry Andric ///          - #pragma, #line and other PP directives,
6800b57cec5SDimitry Andric ///          - skipped pp regions,
6810b57cec5SDimitry Andric ///          - ...
6820b57cec5SDimitry Andric 
6830b57cec5SDimitry Andric TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
6840b57cec5SDimitry Andric   // Collect the expanded token stream during preprocessing.
6850b57cec5SDimitry Andric   PP.setTokenWatcher([this](const clang::Token &T) {
6860b57cec5SDimitry Andric     if (T.isAnnotation())
6870b57cec5SDimitry Andric       return;
6880b57cec5SDimitry Andric     DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
6890b57cec5SDimitry Andric                                           << "Token: "
6900b57cec5SDimitry Andric                                           << syntax::Token(T).dumpForTests(
6910b57cec5SDimitry Andric                                                  this->PP.getSourceManager())
6920b57cec5SDimitry Andric                                           << "\n"
6930b57cec5SDimitry Andric 
6940b57cec5SDimitry Andric     );
6950b57cec5SDimitry Andric     Expanded.push_back(syntax::Token(T));
6960b57cec5SDimitry Andric   });
6970b57cec5SDimitry Andric   // And locations of macro calls, to properly recover boundaries of those in
6980b57cec5SDimitry Andric   // case of empty expansions.
699a7dea167SDimitry Andric   auto CB = std::make_unique<CollectPPExpansions>(*this);
7000b57cec5SDimitry Andric   this->Collector = CB.get();
7010b57cec5SDimitry Andric   PP.addPPCallbacks(std::move(CB));
7020b57cec5SDimitry Andric }
7030b57cec5SDimitry Andric 
7040b57cec5SDimitry Andric /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
7050b57cec5SDimitry Andric /// token stream.
7060b57cec5SDimitry Andric class TokenCollector::Builder {
7070b57cec5SDimitry Andric public:
7080b57cec5SDimitry Andric   Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
7090b57cec5SDimitry Andric           const SourceManager &SM, const LangOptions &LangOpts)
7100b57cec5SDimitry Andric       : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
7110b57cec5SDimitry Andric         LangOpts(LangOpts) {
7120b57cec5SDimitry Andric     Result.ExpandedTokens = std::move(Expanded);
7130b57cec5SDimitry Andric   }
7140b57cec5SDimitry Andric 
7150b57cec5SDimitry Andric   TokenBuffer build() && {
7160b57cec5SDimitry Andric     assert(!Result.ExpandedTokens.empty());
7170b57cec5SDimitry Andric     assert(Result.ExpandedTokens.back().kind() == tok::eof);
718e837bb5cSDimitry Andric 
719e837bb5cSDimitry Andric     // Tokenize every file that contributed tokens to the expanded stream.
720e837bb5cSDimitry Andric     buildSpelledTokens();
721e837bb5cSDimitry Andric 
722e837bb5cSDimitry Andric     // The expanded token stream consists of runs of tokens that came from
723e837bb5cSDimitry Andric     // the same source (a macro expansion, part of a file etc).
724e837bb5cSDimitry Andric     // Between these runs are the logical positions of spelled tokens that
725e837bb5cSDimitry Andric     // didn't expand to anything.
726e837bb5cSDimitry Andric     while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
727e837bb5cSDimitry Andric       // Create empty mappings for spelled tokens that expanded to nothing here.
728e837bb5cSDimitry Andric       // May advance NextSpelled, but NextExpanded is unchanged.
729e837bb5cSDimitry Andric       discard();
730e837bb5cSDimitry Andric       // Create mapping for a contiguous run of expanded tokens.
731e837bb5cSDimitry Andric       // Advances NextExpanded past the run, and NextSpelled accordingly.
732e837bb5cSDimitry Andric       unsigned OldPosition = NextExpanded;
733e837bb5cSDimitry Andric       advance();
734e837bb5cSDimitry Andric       if (NextExpanded == OldPosition)
735e837bb5cSDimitry Andric         diagnoseAdvanceFailure();
7360b57cec5SDimitry Andric     }
737e837bb5cSDimitry Andric     // If any tokens remain in any of the files, they didn't expand to anything.
738e837bb5cSDimitry Andric     // Create empty mappings up until the end of the file.
739e837bb5cSDimitry Andric     for (const auto &File : Result.Files)
740e837bb5cSDimitry Andric       discard(File.first);
7410b57cec5SDimitry Andric 
7425ffd83dbSDimitry Andric #ifndef NDEBUG
7435ffd83dbSDimitry Andric     for (auto &pair : Result.Files) {
7445ffd83dbSDimitry Andric       auto &mappings = pair.second.Mappings;
7455ffd83dbSDimitry Andric       assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
7465ffd83dbSDimitry Andric                                           const TokenBuffer::Mapping &M2) {
7475ffd83dbSDimitry Andric         return M1.BeginSpelled < M2.BeginSpelled &&
7485ffd83dbSDimitry Andric                M1.EndSpelled < M2.EndSpelled &&
7495ffd83dbSDimitry Andric                M1.BeginExpanded < M2.BeginExpanded &&
7505ffd83dbSDimitry Andric                M1.EndExpanded < M2.EndExpanded;
7515ffd83dbSDimitry Andric       }));
7525ffd83dbSDimitry Andric     }
7535ffd83dbSDimitry Andric #endif
7545ffd83dbSDimitry Andric 
7550b57cec5SDimitry Andric     return std::move(Result);
7560b57cec5SDimitry Andric   }
7570b57cec5SDimitry Andric 
7580b57cec5SDimitry Andric private:
759e837bb5cSDimitry Andric   // Consume a sequence of spelled tokens that didn't expand to anything.
760e837bb5cSDimitry Andric   // In the simplest case, skips spelled tokens until finding one that produced
761e837bb5cSDimitry Andric   // the NextExpanded token, and creates an empty mapping for them.
762e837bb5cSDimitry Andric   // If Drain is provided, skips remaining tokens from that file instead.
763bdd1243dSDimitry Andric   void discard(std::optional<FileID> Drain = std::nullopt) {
764e837bb5cSDimitry Andric     SourceLocation Target =
765e837bb5cSDimitry Andric         Drain ? SM.getLocForEndOfFile(*Drain)
766e837bb5cSDimitry Andric               : SM.getExpansionLoc(
767e837bb5cSDimitry Andric                     Result.ExpandedTokens[NextExpanded].location());
768e837bb5cSDimitry Andric     FileID File = SM.getFileID(Target);
769e837bb5cSDimitry Andric     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
770e837bb5cSDimitry Andric     auto &NextSpelled = this->NextSpelled[File];
771e837bb5cSDimitry Andric 
772e837bb5cSDimitry Andric     TokenBuffer::Mapping Mapping;
773e837bb5cSDimitry Andric     Mapping.BeginSpelled = NextSpelled;
774e837bb5cSDimitry Andric     // When dropping trailing tokens from a file, the empty mapping should
775e837bb5cSDimitry Andric     // be positioned within the file's expanded-token range (at the end).
776e837bb5cSDimitry Andric     Mapping.BeginExpanded = Mapping.EndExpanded =
777e837bb5cSDimitry Andric         Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
778e837bb5cSDimitry Andric     // We may want to split into several adjacent empty mappings.
779e837bb5cSDimitry Andric     // FlushMapping() emits the current mapping and starts a new one.
780e837bb5cSDimitry Andric     auto FlushMapping = [&, this] {
781e837bb5cSDimitry Andric       Mapping.EndSpelled = NextSpelled;
782e837bb5cSDimitry Andric       if (Mapping.BeginSpelled != Mapping.EndSpelled)
783e837bb5cSDimitry Andric         Result.Files[File].Mappings.push_back(Mapping);
784e837bb5cSDimitry Andric       Mapping.BeginSpelled = NextSpelled;
785e837bb5cSDimitry Andric     };
786e837bb5cSDimitry Andric 
787e837bb5cSDimitry Andric     while (NextSpelled < SpelledTokens.size() &&
788e837bb5cSDimitry Andric            SpelledTokens[NextSpelled].location() < Target) {
789e837bb5cSDimitry Andric       // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
790e837bb5cSDimitry Andric       // then we want to partition our (empty) mapping.
791e837bb5cSDimitry Andric       //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
792e8d8bef9SDimitry Andric       SourceLocation KnownEnd =
793e8d8bef9SDimitry Andric           CollectedExpansions.lookup(SpelledTokens[NextSpelled].location());
794e837bb5cSDimitry Andric       if (KnownEnd.isValid()) {
795e837bb5cSDimitry Andric         FlushMapping(); // Emits [Start, NextSpelled)
796e837bb5cSDimitry Andric         while (NextSpelled < SpelledTokens.size() &&
797e837bb5cSDimitry Andric                SpelledTokens[NextSpelled].location() <= KnownEnd)
798e837bb5cSDimitry Andric           ++NextSpelled;
799e837bb5cSDimitry Andric         FlushMapping(); // Emits [NextSpelled, KnownEnd]
800bdd1243dSDimitry Andric         // Now the loop continues and will emit (KnownEnd, Target).
801e837bb5cSDimitry Andric       } else {
802e837bb5cSDimitry Andric         ++NextSpelled;
8030b57cec5SDimitry Andric       }
804e837bb5cSDimitry Andric     }
805e837bb5cSDimitry Andric     FlushMapping();
806e837bb5cSDimitry Andric   }
8070b57cec5SDimitry Andric 
808e837bb5cSDimitry Andric   // Consumes the NextExpanded token and others that are part of the same run.
809e837bb5cSDimitry Andric   // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
810e837bb5cSDimitry Andric   // (unless this is a run of file tokens, which we represent with no mapping).
811e837bb5cSDimitry Andric   void advance() {
812e837bb5cSDimitry Andric     const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
813e837bb5cSDimitry Andric     SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
814e837bb5cSDimitry Andric     FileID File = SM.getFileID(Expansion);
815e837bb5cSDimitry Andric     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
816e837bb5cSDimitry Andric     auto &NextSpelled = this->NextSpelled[File];
8170b57cec5SDimitry Andric 
818e837bb5cSDimitry Andric     if (Tok.location().isFileID()) {
819e837bb5cSDimitry Andric       // A run of file tokens continues while the expanded/spelled tokens match.
820e837bb5cSDimitry Andric       while (NextSpelled < SpelledTokens.size() &&
821e837bb5cSDimitry Andric              NextExpanded < Result.ExpandedTokens.size() &&
822e837bb5cSDimitry Andric              SpelledTokens[NextSpelled].location() ==
823e837bb5cSDimitry Andric                  Result.ExpandedTokens[NextExpanded].location()) {
824e837bb5cSDimitry Andric         ++NextSpelled;
825e837bb5cSDimitry Andric         ++NextExpanded;
826e837bb5cSDimitry Andric       }
827e837bb5cSDimitry Andric       // We need no mapping for file tokens copied to the expanded stream.
828e837bb5cSDimitry Andric     } else {
829e837bb5cSDimitry Andric       // We found a new macro expansion. We should have its spelling bounds.
830e8d8bef9SDimitry Andric       auto End = CollectedExpansions.lookup(Expansion);
831e837bb5cSDimitry Andric       assert(End.isValid() && "Macro expansion wasn't captured?");
832e837bb5cSDimitry Andric 
833e837bb5cSDimitry Andric       // Mapping starts here...
834e837bb5cSDimitry Andric       TokenBuffer::Mapping Mapping;
835e837bb5cSDimitry Andric       Mapping.BeginExpanded = NextExpanded;
836e837bb5cSDimitry Andric       Mapping.BeginSpelled = NextSpelled;
837e837bb5cSDimitry Andric       // ... consumes spelled tokens within bounds we captured ...
838e837bb5cSDimitry Andric       while (NextSpelled < SpelledTokens.size() &&
839e837bb5cSDimitry Andric              SpelledTokens[NextSpelled].location() <= End)
840e837bb5cSDimitry Andric         ++NextSpelled;
841e837bb5cSDimitry Andric       // ... consumes expanded tokens rooted at the same expansion ...
842e837bb5cSDimitry Andric       while (NextExpanded < Result.ExpandedTokens.size() &&
843e837bb5cSDimitry Andric              SM.getExpansionLoc(
844e837bb5cSDimitry Andric                  Result.ExpandedTokens[NextExpanded].location()) == Expansion)
845e837bb5cSDimitry Andric         ++NextExpanded;
846e837bb5cSDimitry Andric       // ... and ends here.
847e837bb5cSDimitry Andric       Mapping.EndExpanded = NextExpanded;
848e837bb5cSDimitry Andric       Mapping.EndSpelled = NextSpelled;
849e837bb5cSDimitry Andric       Result.Files[File].Mappings.push_back(Mapping);
8500b57cec5SDimitry Andric     }
8510b57cec5SDimitry Andric   }
8520b57cec5SDimitry Andric 
853e837bb5cSDimitry Andric   // advance() is supposed to consume at least one token - if not, we crash.
854e837bb5cSDimitry Andric   void diagnoseAdvanceFailure() {
855e837bb5cSDimitry Andric #ifndef NDEBUG
856e837bb5cSDimitry Andric     // Show the failed-to-map token in context.
857e837bb5cSDimitry Andric     for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
858e837bb5cSDimitry Andric          I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
859e837bb5cSDimitry Andric       const char *L =
860e837bb5cSDimitry Andric           (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
861e837bb5cSDimitry Andric       llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
8620b57cec5SDimitry Andric     }
863e837bb5cSDimitry Andric #endif
864e837bb5cSDimitry Andric     llvm_unreachable("Couldn't map expanded token to spelled tokens!");
8650b57cec5SDimitry Andric   }
8660b57cec5SDimitry Andric 
8670b57cec5SDimitry Andric   /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
8680b57cec5SDimitry Andric   /// ranges for each of the files.
8690b57cec5SDimitry Andric   void buildSpelledTokens() {
8700b57cec5SDimitry Andric     for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
871e837bb5cSDimitry Andric       const auto &Tok = Result.ExpandedTokens[I];
872e837bb5cSDimitry Andric       auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
8730b57cec5SDimitry Andric       auto It = Result.Files.try_emplace(FID);
8740b57cec5SDimitry Andric       TokenBuffer::MarkedFile &File = It.first->second;
8750b57cec5SDimitry Andric 
876e837bb5cSDimitry Andric       // The eof token should not be considered part of the main-file's range.
877e837bb5cSDimitry Andric       File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
878e837bb5cSDimitry Andric 
8790b57cec5SDimitry Andric       if (!It.second)
8800b57cec5SDimitry Andric         continue; // we have seen this file before.
8810b57cec5SDimitry Andric       // This is the first time we see this file.
8820b57cec5SDimitry Andric       File.BeginExpanded = I;
8830b57cec5SDimitry Andric       File.SpelledTokens = tokenize(FID, SM, LangOpts);
8840b57cec5SDimitry Andric     }
8850b57cec5SDimitry Andric   }
8860b57cec5SDimitry Andric 
8870b57cec5SDimitry Andric   TokenBuffer Result;
888e837bb5cSDimitry Andric   unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
889e837bb5cSDimitry Andric   llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
8900b57cec5SDimitry Andric   PPExpansions CollectedExpansions;
8910b57cec5SDimitry Andric   const SourceManager &SM;
8920b57cec5SDimitry Andric   const LangOptions &LangOpts;
8930b57cec5SDimitry Andric };
8940b57cec5SDimitry Andric 
8950b57cec5SDimitry Andric TokenBuffer TokenCollector::consume() && {
8960b57cec5SDimitry Andric   PP.setTokenWatcher(nullptr);
8970b57cec5SDimitry Andric   Collector->disable();
8980b57cec5SDimitry Andric   return Builder(std::move(Expanded), std::move(Expansions),
8990b57cec5SDimitry Andric                  PP.getSourceManager(), PP.getLangOpts())
9000b57cec5SDimitry Andric       .build();
9010b57cec5SDimitry Andric }
9020b57cec5SDimitry Andric 
9030b57cec5SDimitry Andric std::string syntax::Token::str() const {
9045ffd83dbSDimitry Andric   return std::string(llvm::formatv("Token({0}, length = {1})",
9055ffd83dbSDimitry Andric                                    tok::getTokenName(kind()), length()));
9060b57cec5SDimitry Andric }
9070b57cec5SDimitry Andric 
9080b57cec5SDimitry Andric std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
9095ffd83dbSDimitry Andric   return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
9105ffd83dbSDimitry Andric                                    tok::getTokenName(kind()), length()));
9110b57cec5SDimitry Andric }
9120b57cec5SDimitry Andric 
9130b57cec5SDimitry Andric std::string TokenBuffer::dumpForTests() const {
9140b57cec5SDimitry Andric   auto PrintToken = [this](const syntax::Token &T) -> std::string {
9150b57cec5SDimitry Andric     if (T.kind() == tok::eof)
9160b57cec5SDimitry Andric       return "<eof>";
9175ffd83dbSDimitry Andric     return std::string(T.text(*SourceMgr));
9180b57cec5SDimitry Andric   };
9190b57cec5SDimitry Andric 
9200b57cec5SDimitry Andric   auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
9210b57cec5SDimitry Andric                                         llvm::ArrayRef<syntax::Token> Tokens) {
9220b57cec5SDimitry Andric     if (Tokens.empty()) {
9230b57cec5SDimitry Andric       OS << "<empty>";
9240b57cec5SDimitry Andric       return;
9250b57cec5SDimitry Andric     }
9260b57cec5SDimitry Andric     OS << Tokens[0].text(*SourceMgr);
9270b57cec5SDimitry Andric     for (unsigned I = 1; I < Tokens.size(); ++I) {
9280b57cec5SDimitry Andric       if (Tokens[I].kind() == tok::eof)
9290b57cec5SDimitry Andric         continue;
9300b57cec5SDimitry Andric       OS << " " << PrintToken(Tokens[I]);
9310b57cec5SDimitry Andric     }
9320b57cec5SDimitry Andric   };
9330b57cec5SDimitry Andric 
9340b57cec5SDimitry Andric   std::string Dump;
9350b57cec5SDimitry Andric   llvm::raw_string_ostream OS(Dump);
9360b57cec5SDimitry Andric 
9370b57cec5SDimitry Andric   OS << "expanded tokens:\n"
9380b57cec5SDimitry Andric      << "  ";
9390b57cec5SDimitry Andric   // (!) we do not show '<eof>'.
940bdd1243dSDimitry Andric   DumpTokens(OS, llvm::ArrayRef(ExpandedTokens).drop_back());
9410b57cec5SDimitry Andric   OS << "\n";
9420b57cec5SDimitry Andric 
9430b57cec5SDimitry Andric   std::vector<FileID> Keys;
94406c3fb27SDimitry Andric   for (const auto &F : Files)
9450b57cec5SDimitry Andric     Keys.push_back(F.first);
9460b57cec5SDimitry Andric   llvm::sort(Keys);
9470b57cec5SDimitry Andric 
9480b57cec5SDimitry Andric   for (FileID ID : Keys) {
9490b57cec5SDimitry Andric     const MarkedFile &File = Files.find(ID)->second;
9505f757f3fSDimitry Andric     auto Entry = SourceMgr->getFileEntryRefForID(ID);
9510b57cec5SDimitry Andric     if (!Entry)
9520b57cec5SDimitry Andric       continue; // Skip builtin files.
9535f757f3fSDimitry Andric     std::string Path = llvm::sys::path::convert_to_slash(Entry->getName());
9545f757f3fSDimitry Andric     OS << llvm::formatv("file '{0}'\n", Path) << "  spelled tokens:\n"
9550b57cec5SDimitry Andric        << "    ";
9560b57cec5SDimitry Andric     DumpTokens(OS, File.SpelledTokens);
9570b57cec5SDimitry Andric     OS << "\n";
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric     if (File.Mappings.empty()) {
9600b57cec5SDimitry Andric       OS << "  no mappings.\n";
9610b57cec5SDimitry Andric       continue;
9620b57cec5SDimitry Andric     }
9630b57cec5SDimitry Andric     OS << "  mappings:\n";
9640b57cec5SDimitry Andric     for (auto &M : File.Mappings) {
9650b57cec5SDimitry Andric       OS << llvm::formatv(
9660b57cec5SDimitry Andric           "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
9670b57cec5SDimitry Andric           PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
9680b57cec5SDimitry Andric           M.EndSpelled == File.SpelledTokens.size()
9690b57cec5SDimitry Andric               ? "<eof>"
9700b57cec5SDimitry Andric               : PrintToken(File.SpelledTokens[M.EndSpelled]),
9710b57cec5SDimitry Andric           M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
9720b57cec5SDimitry Andric           M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
9730b57cec5SDimitry Andric           M.EndExpanded);
9740b57cec5SDimitry Andric     }
9750b57cec5SDimitry Andric   }
9760eae32dcSDimitry Andric   return Dump;
9770b57cec5SDimitry Andric }
978