xref: /openbsd-src/gnu/llvm/clang/lib/Tooling/Syntax/Tokens.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1e5dd7070Spatrick //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick #include "clang/Tooling/Syntax/Tokens.h"
9e5dd7070Spatrick 
10e5dd7070Spatrick #include "clang/Basic/Diagnostic.h"
11e5dd7070Spatrick #include "clang/Basic/IdentifierTable.h"
12e5dd7070Spatrick #include "clang/Basic/LLVM.h"
13e5dd7070Spatrick #include "clang/Basic/LangOptions.h"
14e5dd7070Spatrick #include "clang/Basic/SourceLocation.h"
15e5dd7070Spatrick #include "clang/Basic/SourceManager.h"
16e5dd7070Spatrick #include "clang/Basic/TokenKinds.h"
17e5dd7070Spatrick #include "clang/Lex/PPCallbacks.h"
18e5dd7070Spatrick #include "clang/Lex/Preprocessor.h"
19e5dd7070Spatrick #include "clang/Lex/Token.h"
20e5dd7070Spatrick #include "llvm/ADT/ArrayRef.h"
21e5dd7070Spatrick #include "llvm/ADT/STLExtras.h"
22e5dd7070Spatrick #include "llvm/Support/Debug.h"
23e5dd7070Spatrick #include "llvm/Support/ErrorHandling.h"
24e5dd7070Spatrick #include "llvm/Support/FormatVariadic.h"
25e5dd7070Spatrick #include "llvm/Support/raw_ostream.h"
26e5dd7070Spatrick #include <algorithm>
27e5dd7070Spatrick #include <cassert>
28e5dd7070Spatrick #include <iterator>
29*12c85518Srobert #include <optional>
30e5dd7070Spatrick #include <string>
31e5dd7070Spatrick #include <utility>
32e5dd7070Spatrick #include <vector>
33e5dd7070Spatrick 
34e5dd7070Spatrick using namespace clang;
35e5dd7070Spatrick using namespace clang::syntax;
36e5dd7070Spatrick 
37ec727ea7Spatrick namespace {
38ec727ea7Spatrick // Finds the smallest consecutive subsuquence of Toks that covers R.
39ec727ea7Spatrick llvm::ArrayRef<syntax::Token>
getTokensCovering(llvm::ArrayRef<syntax::Token> Toks,SourceRange R,const SourceManager & SM)40ec727ea7Spatrick getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
41ec727ea7Spatrick                   const SourceManager &SM) {
42ec727ea7Spatrick   if (R.isInvalid())
43ec727ea7Spatrick     return {};
44ec727ea7Spatrick   const syntax::Token *Begin =
45ec727ea7Spatrick       llvm::partition_point(Toks, [&](const syntax::Token &T) {
46ec727ea7Spatrick         return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
47ec727ea7Spatrick       });
48ec727ea7Spatrick   const syntax::Token *End =
49ec727ea7Spatrick       llvm::partition_point(Toks, [&](const syntax::Token &T) {
50ec727ea7Spatrick         return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
51ec727ea7Spatrick       });
52ec727ea7Spatrick   if (Begin > End)
53ec727ea7Spatrick     return {};
54ec727ea7Spatrick   return {Begin, End};
55ec727ea7Spatrick }
56ec727ea7Spatrick 
57*12c85518Srobert // Finds the range within FID corresponding to expanded tokens [First, Last].
58*12c85518Srobert // Prev precedes First and Next follows Last, these must *not* be included.
59*12c85518Srobert // If no range satisfies the criteria, returns an invalid range.
60*12c85518Srobert //
61ec727ea7Spatrick // #define ID(x) x
62ec727ea7Spatrick // ID(ID(ID(a1) a2))
63ec727ea7Spatrick //          ~~       -> a1
64ec727ea7Spatrick //              ~~   -> a2
65ec727ea7Spatrick //       ~~~~~~~~~   -> a1 a2
spelledForExpandedSlow(SourceLocation First,SourceLocation Last,SourceLocation Prev,SourceLocation Next,FileID TargetFile,const SourceManager & SM)66*12c85518Srobert SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last,
67*12c85518Srobert                                    SourceLocation Prev, SourceLocation Next,
68*12c85518Srobert                                    FileID TargetFile,
69ec727ea7Spatrick                                    const SourceManager &SM) {
70*12c85518Srobert   // There are two main parts to this algorithm:
71*12c85518Srobert   //  - identifying which spelled range covers the expanded tokens
72*12c85518Srobert   //  - validating that this range doesn't cover any extra tokens (First/Last)
73*12c85518Srobert   //
74*12c85518Srobert   // We do these in order. However as we transform the expanded range into the
75*12c85518Srobert   // spelled one, we adjust First/Last so the validation remains simple.
76*12c85518Srobert 
77*12c85518Srobert   assert(SM.getSLocEntry(TargetFile).isFile());
78*12c85518Srobert   // In most cases, to select First and Last we must return their expansion
79*12c85518Srobert   // range, i.e. the whole of any macros they are included in.
80*12c85518Srobert   //
81*12c85518Srobert   // When First and Last are part of the *same macro arg* of a macro written
82*12c85518Srobert   // in TargetFile, we that slice of the arg, i.e. their spelling range.
83*12c85518Srobert   //
84*12c85518Srobert   // Unwrap such macro calls. If the target file has A(B(C)), the
85*12c85518Srobert   // SourceLocation stack of a token inside C shows us the expansion of A first,
86*12c85518Srobert   // then B, then any macros inside C's body, then C itself.
87*12c85518Srobert   // (This is the reverse of the order the PP applies the expansions in).
88*12c85518Srobert   while (First.isMacroID() && Last.isMacroID()) {
89*12c85518Srobert     auto DecFirst = SM.getDecomposedLoc(First);
90*12c85518Srobert     auto DecLast = SM.getDecomposedLoc(Last);
91*12c85518Srobert     auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion();
92*12c85518Srobert     auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion();
93*12c85518Srobert 
94*12c85518Srobert     if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion())
95ec727ea7Spatrick       break;
96*12c85518Srobert     // Locations are in the same macro arg if they expand to the same place.
97*12c85518Srobert     // (They may still have different FileIDs - an arg can have >1 chunks!)
98*12c85518Srobert     if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart())
99ec727ea7Spatrick       break;
100*12c85518Srobert     // Careful, given:
101*12c85518Srobert     //   #define HIDE ID(ID(a))
102*12c85518Srobert     //   ID(ID(HIDE))
103*12c85518Srobert     // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2.
104*12c85518Srobert     // We distinguish them by whether the macro expands into the target file.
105*12c85518Srobert     // Fortunately, the target file ones will always appear first.
106*12c85518Srobert     auto &ExpMacro =
107*12c85518Srobert         SM.getSLocEntry(SM.getFileID(ExpFirst.getExpansionLocStart()))
108*12c85518Srobert             .getExpansion();
109*12c85518Srobert     if (ExpMacro.getExpansionLocStart().isMacroID())
110*12c85518Srobert       break;
111*12c85518Srobert     // Replace each endpoint with its spelling inside the macro arg.
112*12c85518Srobert     // (This is getImmediateSpellingLoc without repeating lookups).
113*12c85518Srobert     First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second);
114*12c85518Srobert     Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second);
115*12c85518Srobert 
116*12c85518Srobert     // Now: how do we adjust the previous/next bounds? Three cases:
117*12c85518Srobert     // A) If they are also part of the same macro arg, we translate them too.
118*12c85518Srobert     //   This will ensure that we don't select any macros nested within the
119*12c85518Srobert     //   macro arg that cover extra tokens. Critical case:
120*12c85518Srobert     //      #define ID(X) X
121*12c85518Srobert     //      ID(prev target) // selecting 'target' succeeds
122*12c85518Srobert     //      #define LARGE ID(prev target)
123*12c85518Srobert     //      LARGE // selecting 'target' fails.
124*12c85518Srobert     // B) They are not in the macro at all, then their expansion range is a
125*12c85518Srobert     //    sibling to it, and we can safely substitute that.
126*12c85518Srobert     //      #define PREV prev
127*12c85518Srobert     //      #define ID(X) X
128*12c85518Srobert     //      PREV ID(target) // selecting 'target' succeeds.
129*12c85518Srobert     //      #define LARGE PREV ID(target)
130*12c85518Srobert     //      LARGE // selecting 'target' fails.
131*12c85518Srobert     // C) They are in a different arg of this macro, or the macro body.
132*12c85518Srobert     //    Now selecting the whole macro arg is fine, but the whole macro is not.
133*12c85518Srobert     //    Model this by setting using the edge of the macro call as the bound.
134*12c85518Srobert     //      #define ID2(X, Y) X Y
135*12c85518Srobert     //      ID2(prev, target) // selecting 'target' succeeds
136*12c85518Srobert     //      #define LARGE ID2(prev, target)
137*12c85518Srobert     //      LARGE // selecting 'target' fails
138*12c85518Srobert     auto AdjustBound = [&](SourceLocation &Bound) {
139*12c85518Srobert       if (Bound.isInvalid() || !Bound.isMacroID()) // Non-macro must be case B.
140*12c85518Srobert         return;
141*12c85518Srobert       auto DecBound = SM.getDecomposedLoc(Bound);
142*12c85518Srobert       auto &ExpBound = SM.getSLocEntry(DecBound.first).getExpansion();
143*12c85518Srobert       if (ExpBound.isMacroArgExpansion() &&
144*12c85518Srobert           ExpBound.getExpansionLocStart() == ExpFirst.getExpansionLocStart()) {
145*12c85518Srobert         // Case A: translate to (spelling) loc within the macro arg.
146*12c85518Srobert         Bound = ExpBound.getSpellingLoc().getLocWithOffset(DecBound.second);
147*12c85518Srobert         return;
148ec727ea7Spatrick       }
149*12c85518Srobert       while (Bound.isMacroID()) {
150*12c85518Srobert         SourceRange Exp = SM.getImmediateExpansionRange(Bound).getAsRange();
151*12c85518Srobert         if (Exp.getBegin() == ExpMacro.getExpansionLocStart()) {
152*12c85518Srobert           // Case B: bounds become the macro call itself.
153*12c85518Srobert           Bound = (&Bound == &Prev) ? Exp.getBegin() : Exp.getEnd();
154*12c85518Srobert           return;
155*12c85518Srobert         }
156*12c85518Srobert         // Either case C, or expansion location will later find case B.
157*12c85518Srobert         // We choose the upper bound for Prev and the lower one for Next:
158*12c85518Srobert         //   ID(prev) target ID(next)
159*12c85518Srobert         //          ^        ^
160*12c85518Srobert         //      new-prev  new-next
161*12c85518Srobert         Bound = (&Bound == &Prev) ? Exp.getEnd() : Exp.getBegin();
162*12c85518Srobert       }
163*12c85518Srobert     };
164*12c85518Srobert     AdjustBound(Prev);
165*12c85518Srobert     AdjustBound(Next);
166*12c85518Srobert   }
167*12c85518Srobert 
168*12c85518Srobert   // In all remaining cases we need the full containing macros.
169*12c85518Srobert   // If this overlaps Prev or Next, then no range is possible.
170*12c85518Srobert   SourceRange Candidate =
171*12c85518Srobert       SM.getExpansionRange(SourceRange(First, Last)).getAsRange();
172*12c85518Srobert   auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin());
173*12c85518Srobert   auto DecLast = SM.getDecomposedLoc(Candidate.getEnd());
174*12c85518Srobert   // Can end up in the wrong file due to bad input or token-pasting shenanigans.
175*12c85518Srobert   if (Candidate.isInvalid() || DecFirst.first != TargetFile || DecLast.first != TargetFile)
176*12c85518Srobert     return SourceRange();
177*12c85518Srobert   // Check bounds, which may still be inside macros.
178*12c85518Srobert   if (Prev.isValid()) {
179*12c85518Srobert     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin());
180*12c85518Srobert     if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second)
181*12c85518Srobert       return SourceRange();
182*12c85518Srobert   }
183*12c85518Srobert   if (Next.isValid()) {
184*12c85518Srobert     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd());
185*12c85518Srobert     if (Dec.first != DecLast.first || Dec.second <= DecLast.second)
186*12c85518Srobert       return SourceRange();
187*12c85518Srobert   }
188*12c85518Srobert   // Now we know that Candidate is a file range that covers [First, Last]
189*12c85518Srobert   // without encroaching on {Prev, Next}. Ship it!
190*12c85518Srobert   return Candidate;
191ec727ea7Spatrick }
192ec727ea7Spatrick 
193ec727ea7Spatrick } // namespace
194ec727ea7Spatrick 
Token(SourceLocation Location,unsigned Length,tok::TokenKind Kind)195e5dd7070Spatrick syntax::Token::Token(SourceLocation Location, unsigned Length,
196e5dd7070Spatrick                      tok::TokenKind Kind)
197e5dd7070Spatrick     : Location(Location), Length(Length), Kind(Kind) {
198e5dd7070Spatrick   assert(Location.isValid());
199e5dd7070Spatrick }
200e5dd7070Spatrick 
Token(const clang::Token & T)201e5dd7070Spatrick syntax::Token::Token(const clang::Token &T)
202e5dd7070Spatrick     : Token(T.getLocation(), T.getLength(), T.getKind()) {
203e5dd7070Spatrick   assert(!T.isAnnotation());
204e5dd7070Spatrick }
205e5dd7070Spatrick 
text(const SourceManager & SM) const206e5dd7070Spatrick llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
207e5dd7070Spatrick   bool Invalid = false;
208e5dd7070Spatrick   const char *Start = SM.getCharacterData(location(), &Invalid);
209e5dd7070Spatrick   assert(!Invalid);
210e5dd7070Spatrick   return llvm::StringRef(Start, length());
211e5dd7070Spatrick }
212e5dd7070Spatrick 
range(const SourceManager & SM) const213e5dd7070Spatrick FileRange syntax::Token::range(const SourceManager &SM) const {
214e5dd7070Spatrick   assert(location().isFileID() && "must be a spelled token");
215e5dd7070Spatrick   FileID File;
216e5dd7070Spatrick   unsigned StartOffset;
217e5dd7070Spatrick   std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
218e5dd7070Spatrick   return FileRange(File, StartOffset, StartOffset + length());
219e5dd7070Spatrick }
220e5dd7070Spatrick 
range(const SourceManager & SM,const syntax::Token & First,const syntax::Token & Last)221e5dd7070Spatrick FileRange syntax::Token::range(const SourceManager &SM,
222e5dd7070Spatrick                                const syntax::Token &First,
223e5dd7070Spatrick                                const syntax::Token &Last) {
224e5dd7070Spatrick   auto F = First.range(SM);
225e5dd7070Spatrick   auto L = Last.range(SM);
226e5dd7070Spatrick   assert(F.file() == L.file() && "tokens from different files");
227ec727ea7Spatrick   assert((F == L || F.endOffset() <= L.beginOffset()) &&
228ec727ea7Spatrick          "wrong order of tokens");
229e5dd7070Spatrick   return FileRange(F.file(), F.beginOffset(), L.endOffset());
230e5dd7070Spatrick }
231e5dd7070Spatrick 
operator <<(llvm::raw_ostream & OS,const Token & T)232e5dd7070Spatrick llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
233e5dd7070Spatrick   return OS << T.str();
234e5dd7070Spatrick }
235e5dd7070Spatrick 
FileRange(FileID File,unsigned BeginOffset,unsigned EndOffset)236e5dd7070Spatrick FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
237e5dd7070Spatrick     : File(File), Begin(BeginOffset), End(EndOffset) {
238e5dd7070Spatrick   assert(File.isValid());
239e5dd7070Spatrick   assert(BeginOffset <= EndOffset);
240e5dd7070Spatrick }
241e5dd7070Spatrick 
FileRange(const SourceManager & SM,SourceLocation BeginLoc,unsigned Length)242e5dd7070Spatrick FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
243e5dd7070Spatrick                      unsigned Length) {
244e5dd7070Spatrick   assert(BeginLoc.isValid());
245e5dd7070Spatrick   assert(BeginLoc.isFileID());
246e5dd7070Spatrick 
247e5dd7070Spatrick   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
248e5dd7070Spatrick   End = Begin + Length;
249e5dd7070Spatrick }
FileRange(const SourceManager & SM,SourceLocation BeginLoc,SourceLocation EndLoc)250e5dd7070Spatrick FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
251e5dd7070Spatrick                      SourceLocation EndLoc) {
252e5dd7070Spatrick   assert(BeginLoc.isValid());
253e5dd7070Spatrick   assert(BeginLoc.isFileID());
254e5dd7070Spatrick   assert(EndLoc.isValid());
255e5dd7070Spatrick   assert(EndLoc.isFileID());
256e5dd7070Spatrick   assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
257e5dd7070Spatrick   assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
258e5dd7070Spatrick 
259e5dd7070Spatrick   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
260e5dd7070Spatrick   End = SM.getFileOffset(EndLoc);
261e5dd7070Spatrick }
262e5dd7070Spatrick 
operator <<(llvm::raw_ostream & OS,const FileRange & R)263e5dd7070Spatrick llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
264e5dd7070Spatrick                                       const FileRange &R) {
265e5dd7070Spatrick   return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
266e5dd7070Spatrick                              R.file().getHashValue(), R.beginOffset(),
267e5dd7070Spatrick                              R.endOffset());
268e5dd7070Spatrick }
269e5dd7070Spatrick 
text(const SourceManager & SM) const270e5dd7070Spatrick llvm::StringRef FileRange::text(const SourceManager &SM) const {
271e5dd7070Spatrick   bool Invalid = false;
272e5dd7070Spatrick   StringRef Text = SM.getBufferData(File, &Invalid);
273e5dd7070Spatrick   if (Invalid)
274e5dd7070Spatrick     return "";
275e5dd7070Spatrick   assert(Begin <= Text.size());
276e5dd7070Spatrick   assert(End <= Text.size());
277e5dd7070Spatrick   return Text.substr(Begin, length());
278e5dd7070Spatrick }
279e5dd7070Spatrick 
indexExpandedTokens()280a9ac8606Spatrick void TokenBuffer::indexExpandedTokens() {
281a9ac8606Spatrick   // No-op if the index is already created.
282a9ac8606Spatrick   if (!ExpandedTokIndex.empty())
283a9ac8606Spatrick     return;
284a9ac8606Spatrick   ExpandedTokIndex.reserve(ExpandedTokens.size());
285a9ac8606Spatrick   // Index ExpandedTokens for faster lookups by SourceLocation.
286a9ac8606Spatrick   for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) {
287a9ac8606Spatrick     SourceLocation Loc = ExpandedTokens[I].location();
288a9ac8606Spatrick     if (Loc.isValid())
289a9ac8606Spatrick       ExpandedTokIndex[Loc] = I;
290a9ac8606Spatrick   }
291a9ac8606Spatrick }
292a9ac8606Spatrick 
expandedTokens(SourceRange R) const293e5dd7070Spatrick llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
294a9ac8606Spatrick   if (R.isInvalid())
295a9ac8606Spatrick     return {};
296a9ac8606Spatrick   if (!ExpandedTokIndex.empty()) {
297a9ac8606Spatrick     // Quick lookup if `R` is a token range.
298a9ac8606Spatrick     // This is a huge win since majority of the users use ranges provided by an
299a9ac8606Spatrick     // AST. Ranges in AST are token ranges from expanded token stream.
300a9ac8606Spatrick     const auto B = ExpandedTokIndex.find(R.getBegin());
301a9ac8606Spatrick     const auto E = ExpandedTokIndex.find(R.getEnd());
302a9ac8606Spatrick     if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
303a9ac8606Spatrick       const Token *L = ExpandedTokens.data() + B->getSecond();
304a9ac8606Spatrick       // Add 1 to End to make a half-open range.
305a9ac8606Spatrick       const Token *R = ExpandedTokens.data() + E->getSecond() + 1;
306a9ac8606Spatrick       if (L > R)
307a9ac8606Spatrick         return {};
308a9ac8606Spatrick       return {L, R};
309a9ac8606Spatrick     }
310a9ac8606Spatrick   }
311a9ac8606Spatrick   // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
312a9ac8606Spatrick   // required range.
313ec727ea7Spatrick   return getTokensCovering(expandedTokens(), R, *SourceMgr);
314e5dd7070Spatrick }
315e5dd7070Spatrick 
toCharRange(const SourceManager & SM) const316e5dd7070Spatrick CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
317e5dd7070Spatrick   return CharSourceRange(
318e5dd7070Spatrick       SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
319e5dd7070Spatrick       /*IsTokenRange=*/false);
320e5dd7070Spatrick }
321e5dd7070Spatrick 
322e5dd7070Spatrick std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
spelledForExpandedToken(const syntax::Token * Expanded) const323e5dd7070Spatrick TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
324e5dd7070Spatrick   assert(Expanded);
325e5dd7070Spatrick   assert(ExpandedTokens.data() <= Expanded &&
326e5dd7070Spatrick          Expanded < ExpandedTokens.data() + ExpandedTokens.size());
327e5dd7070Spatrick 
328e5dd7070Spatrick   auto FileIt = Files.find(
329e5dd7070Spatrick       SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
330e5dd7070Spatrick   assert(FileIt != Files.end() && "no file for an expanded token");
331e5dd7070Spatrick 
332e5dd7070Spatrick   const MarkedFile &File = FileIt->second;
333e5dd7070Spatrick 
334e5dd7070Spatrick   unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
335e5dd7070Spatrick   // Find the first mapping that produced tokens after \p Expanded.
336e5dd7070Spatrick   auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
337e5dd7070Spatrick     return M.BeginExpanded <= ExpandedIndex;
338e5dd7070Spatrick   });
339e5dd7070Spatrick   // Our token could only be produced by the previous mapping.
340e5dd7070Spatrick   if (It == File.Mappings.begin()) {
341e5dd7070Spatrick     // No previous mapping, no need to modify offsets.
342ec727ea7Spatrick     return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
343ec727ea7Spatrick             /*Mapping=*/nullptr};
344e5dd7070Spatrick   }
345e5dd7070Spatrick   --It; // 'It' now points to last mapping that started before our token.
346e5dd7070Spatrick 
347e5dd7070Spatrick   // Check if the token is part of the mapping.
348e5dd7070Spatrick   if (ExpandedIndex < It->EndExpanded)
349ec727ea7Spatrick     return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
350e5dd7070Spatrick 
351e5dd7070Spatrick   // Not part of the mapping, use the index from previous mapping to compute the
352e5dd7070Spatrick   // corresponding spelled token.
353e5dd7070Spatrick   return {
354e5dd7070Spatrick       &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
355ec727ea7Spatrick       /*Mapping=*/nullptr};
356ec727ea7Spatrick }
357ec727ea7Spatrick 
358ec727ea7Spatrick const TokenBuffer::Mapping *
mappingStartingBeforeSpelled(const MarkedFile & F,const syntax::Token * Spelled)359ec727ea7Spatrick TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
360ec727ea7Spatrick                                           const syntax::Token *Spelled) {
361ec727ea7Spatrick   assert(F.SpelledTokens.data() <= Spelled);
362ec727ea7Spatrick   unsigned SpelledI = Spelled - F.SpelledTokens.data();
363ec727ea7Spatrick   assert(SpelledI < F.SpelledTokens.size());
364ec727ea7Spatrick 
365ec727ea7Spatrick   auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
366ec727ea7Spatrick     return M.BeginSpelled <= SpelledI;
367ec727ea7Spatrick   });
368ec727ea7Spatrick   if (It == F.Mappings.begin())
369ec727ea7Spatrick     return nullptr;
370ec727ea7Spatrick   --It;
371ec727ea7Spatrick   return &*It;
372ec727ea7Spatrick }
373ec727ea7Spatrick 
374ec727ea7Spatrick llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const375ec727ea7Spatrick TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
376ec727ea7Spatrick   if (Spelled.empty())
377ec727ea7Spatrick     return {};
378a9ac8606Spatrick   const auto &File = fileForSpelled(Spelled);
379ec727ea7Spatrick 
380ec727ea7Spatrick   auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
381ec727ea7Spatrick   unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
382ec727ea7Spatrick   assert(SpelledFrontI < File.SpelledTokens.size());
383ec727ea7Spatrick   unsigned ExpandedBegin;
384ec727ea7Spatrick   if (!FrontMapping) {
385ec727ea7Spatrick     // No mapping that starts before the first token of Spelled, we don't have
386ec727ea7Spatrick     // to modify offsets.
387ec727ea7Spatrick     ExpandedBegin = File.BeginExpanded + SpelledFrontI;
388ec727ea7Spatrick   } else if (SpelledFrontI < FrontMapping->EndSpelled) {
389ec727ea7Spatrick     // This mapping applies to Spelled tokens.
390ec727ea7Spatrick     if (SpelledFrontI != FrontMapping->BeginSpelled) {
391ec727ea7Spatrick       // Spelled tokens don't cover the entire mapping, returning empty result.
392ec727ea7Spatrick       return {}; // FIXME: support macro arguments.
393ec727ea7Spatrick     }
394ec727ea7Spatrick     // Spelled tokens start at the beginning of this mapping.
395ec727ea7Spatrick     ExpandedBegin = FrontMapping->BeginExpanded;
396ec727ea7Spatrick   } else {
397ec727ea7Spatrick     // Spelled tokens start after the mapping ends (they start in the hole
398ec727ea7Spatrick     // between 2 mappings, or between a mapping and end of the file).
399ec727ea7Spatrick     ExpandedBegin =
400ec727ea7Spatrick         FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
401ec727ea7Spatrick   }
402ec727ea7Spatrick 
403ec727ea7Spatrick   auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
404ec727ea7Spatrick   unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
405ec727ea7Spatrick   unsigned ExpandedEnd;
406ec727ea7Spatrick   if (!BackMapping) {
407ec727ea7Spatrick     // No mapping that starts before the last token of Spelled, we don't have to
408ec727ea7Spatrick     // modify offsets.
409ec727ea7Spatrick     ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
410ec727ea7Spatrick   } else if (SpelledBackI < BackMapping->EndSpelled) {
411ec727ea7Spatrick     // This mapping applies to Spelled tokens.
412ec727ea7Spatrick     if (SpelledBackI + 1 != BackMapping->EndSpelled) {
413ec727ea7Spatrick       // Spelled tokens don't cover the entire mapping, returning empty result.
414ec727ea7Spatrick       return {}; // FIXME: support macro arguments.
415ec727ea7Spatrick     }
416ec727ea7Spatrick     ExpandedEnd = BackMapping->EndExpanded;
417ec727ea7Spatrick   } else {
418ec727ea7Spatrick     // Spelled tokens end after the mapping ends.
419ec727ea7Spatrick     ExpandedEnd =
420ec727ea7Spatrick         BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
421ec727ea7Spatrick   }
422ec727ea7Spatrick 
423ec727ea7Spatrick   assert(ExpandedBegin < ExpandedTokens.size());
424ec727ea7Spatrick   assert(ExpandedEnd < ExpandedTokens.size());
425ec727ea7Spatrick   // Avoid returning empty ranges.
426ec727ea7Spatrick   if (ExpandedBegin == ExpandedEnd)
427ec727ea7Spatrick     return {};
428*12c85518Srobert   return {llvm::ArrayRef(ExpandedTokens.data() + ExpandedBegin,
429ec727ea7Spatrick                          ExpandedTokens.data() + ExpandedEnd)};
430e5dd7070Spatrick }
431e5dd7070Spatrick 
spelledTokens(FileID FID) const432e5dd7070Spatrick llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
433e5dd7070Spatrick   auto It = Files.find(FID);
434e5dd7070Spatrick   assert(It != Files.end());
435e5dd7070Spatrick   return It->second.SpelledTokens;
436e5dd7070Spatrick }
437e5dd7070Spatrick 
spelledTokenAt(SourceLocation Loc) const438ec727ea7Spatrick const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
439ec727ea7Spatrick   assert(Loc.isFileID());
440ec727ea7Spatrick   const auto *Tok = llvm::partition_point(
441ec727ea7Spatrick       spelledTokens(SourceMgr->getFileID(Loc)),
442ec727ea7Spatrick       [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
443ec727ea7Spatrick   if (!Tok || Tok->location() != Loc)
444ec727ea7Spatrick     return nullptr;
445ec727ea7Spatrick   return Tok;
446ec727ea7Spatrick }
447ec727ea7Spatrick 
str() const448e5dd7070Spatrick std::string TokenBuffer::Mapping::str() const {
449ec727ea7Spatrick   return std::string(
450ec727ea7Spatrick       llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
451ec727ea7Spatrick                     BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
452e5dd7070Spatrick }
453e5dd7070Spatrick 
454*12c85518Srobert std::optional<llvm::ArrayRef<syntax::Token>>
spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const455e5dd7070Spatrick TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
456e5dd7070Spatrick   // Mapping an empty range is ambiguous in case of empty mappings at either end
457e5dd7070Spatrick   // of the range, bail out in that case.
458e5dd7070Spatrick   if (Expanded.empty())
459*12c85518Srobert     return std::nullopt;
460*12c85518Srobert   const syntax::Token *First = &Expanded.front();
461*12c85518Srobert   const syntax::Token *Last = &Expanded.back();
462*12c85518Srobert   auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
463*12c85518Srobert   auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
464e5dd7070Spatrick 
465*12c85518Srobert   FileID FID = SourceMgr->getFileID(FirstSpelled->location());
466e5dd7070Spatrick   // FIXME: Handle multi-file changes by trying to map onto a common root.
467e5dd7070Spatrick   if (FID != SourceMgr->getFileID(LastSpelled->location()))
468*12c85518Srobert     return std::nullopt;
469e5dd7070Spatrick 
470e5dd7070Spatrick   const MarkedFile &File = Files.find(FID)->second;
471e5dd7070Spatrick 
472*12c85518Srobert   // If the range is within one macro argument, the result may be only part of a
473*12c85518Srobert   // Mapping. We must use the general (SourceManager-based) algorithm.
474*12c85518Srobert   if (FirstMapping && FirstMapping == LastMapping &&
475*12c85518Srobert       SourceMgr->isMacroArgExpansion(First->location()) &&
476*12c85518Srobert       SourceMgr->isMacroArgExpansion(Last->location())) {
477*12c85518Srobert     // We use excluded Prev/Next token for bounds checking.
478*12c85518Srobert     SourceLocation Prev = (First == &ExpandedTokens.front())
479*12c85518Srobert                               ? SourceLocation()
480*12c85518Srobert                               : (First - 1)->location();
481*12c85518Srobert     SourceLocation Next = (Last == &ExpandedTokens.back())
482*12c85518Srobert                               ? SourceLocation()
483*12c85518Srobert                               : (Last + 1)->location();
484*12c85518Srobert     SourceRange Range = spelledForExpandedSlow(
485*12c85518Srobert         First->location(), Last->location(), Prev, Next, FID, *SourceMgr);
486*12c85518Srobert     if (Range.isInvalid())
487*12c85518Srobert       return std::nullopt;
488*12c85518Srobert     return getTokensCovering(File.SpelledTokens, Range, *SourceMgr);
489ec727ea7Spatrick   }
490ec727ea7Spatrick 
491*12c85518Srobert   // Otherwise, use the fast version based on Mappings.
492ec727ea7Spatrick   // Do not allow changes that doesn't cover full expansion.
493*12c85518Srobert   unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data();
494*12c85518Srobert   unsigned LastExpanded = Expanded.end() - ExpandedTokens.data();
495*12c85518Srobert   if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded)
496*12c85518Srobert     return std::nullopt;
497*12c85518Srobert   if (LastMapping && LastMapping->EndExpanded != LastExpanded)
498*12c85518Srobert     return std::nullopt;
499*12c85518Srobert   return llvm::ArrayRef(
500*12c85518Srobert       FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled
501*12c85518Srobert                    : FirstSpelled,
502e5dd7070Spatrick       LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
503e5dd7070Spatrick                   : LastSpelled + 1);
504e5dd7070Spatrick }
505e5dd7070Spatrick 
makeExpansion(const MarkedFile & F,const Mapping & M) const506a9ac8606Spatrick TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
507a9ac8606Spatrick                                                   const Mapping &M) const {
508a9ac8606Spatrick   Expansion E;
509*12c85518Srobert   E.Spelled = llvm::ArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
510a9ac8606Spatrick                              F.SpelledTokens.data() + M.EndSpelled);
511*12c85518Srobert   E.Expanded = llvm::ArrayRef(ExpandedTokens.data() + M.BeginExpanded,
512a9ac8606Spatrick                               ExpandedTokens.data() + M.EndExpanded);
513a9ac8606Spatrick   return E;
514a9ac8606Spatrick }
515a9ac8606Spatrick 
516a9ac8606Spatrick const TokenBuffer::MarkedFile &
fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const517a9ac8606Spatrick TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
518a9ac8606Spatrick   assert(!Spelled.empty());
519a9ac8606Spatrick   assert(Spelled.front().location().isFileID() && "not a spelled token");
520a9ac8606Spatrick   auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
521a9ac8606Spatrick   assert(FileIt != Files.end() && "file not tracked by token buffer");
522a9ac8606Spatrick   const auto &File = FileIt->second;
523a9ac8606Spatrick   assert(File.SpelledTokens.data() <= Spelled.data() &&
524a9ac8606Spatrick          Spelled.end() <=
525a9ac8606Spatrick              (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
526a9ac8606Spatrick          "Tokens not in spelled range");
527a9ac8606Spatrick #ifndef NDEBUG
528a9ac8606Spatrick   auto T1 = Spelled.back().location();
529a9ac8606Spatrick   auto T2 = File.SpelledTokens.back().location();
530a9ac8606Spatrick   assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
531a9ac8606Spatrick #endif
532a9ac8606Spatrick   return File;
533a9ac8606Spatrick }
534a9ac8606Spatrick 
535*12c85518Srobert std::optional<TokenBuffer::Expansion>
expansionStartingAt(const syntax::Token * Spelled) const536e5dd7070Spatrick TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
537e5dd7070Spatrick   assert(Spelled);
538a9ac8606Spatrick   const auto &File = fileForSpelled(*Spelled);
539e5dd7070Spatrick 
540e5dd7070Spatrick   unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
541e5dd7070Spatrick   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
542e5dd7070Spatrick     return M.BeginSpelled < SpelledIndex;
543e5dd7070Spatrick   });
544e5dd7070Spatrick   if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
545*12c85518Srobert     return std::nullopt;
546a9ac8606Spatrick   return makeExpansion(File, *M);
547e5dd7070Spatrick }
548a9ac8606Spatrick 
expansionsOverlapping(llvm::ArrayRef<syntax::Token> Spelled) const549a9ac8606Spatrick std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
550a9ac8606Spatrick     llvm::ArrayRef<syntax::Token> Spelled) const {
551a9ac8606Spatrick   if (Spelled.empty())
552a9ac8606Spatrick     return {};
553a9ac8606Spatrick   const auto &File = fileForSpelled(Spelled);
554a9ac8606Spatrick 
555a9ac8606Spatrick   // Find the first overlapping range, and then copy until we stop overlapping.
556a9ac8606Spatrick   unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
557a9ac8606Spatrick   unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
558a9ac8606Spatrick   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
559a9ac8606Spatrick     return M.EndSpelled <= SpelledBeginIndex;
560a9ac8606Spatrick   });
561a9ac8606Spatrick   std::vector<TokenBuffer::Expansion> Expansions;
562a9ac8606Spatrick   for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M)
563a9ac8606Spatrick     Expansions.push_back(makeExpansion(File, *M));
564a9ac8606Spatrick   return Expansions;
565a9ac8606Spatrick }
566a9ac8606Spatrick 
567e5dd7070Spatrick llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)568e5dd7070Spatrick syntax::spelledTokensTouching(SourceLocation Loc,
569ec727ea7Spatrick                               llvm::ArrayRef<syntax::Token> Tokens) {
570e5dd7070Spatrick   assert(Loc.isFileID());
571ec727ea7Spatrick 
572e5dd7070Spatrick   auto *Right = llvm::partition_point(
573ec727ea7Spatrick       Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
574ec727ea7Spatrick   bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
575ec727ea7Spatrick   bool AcceptLeft =
576ec727ea7Spatrick       Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
577*12c85518Srobert   return llvm::ArrayRef(Right - (AcceptLeft ? 1 : 0),
578e5dd7070Spatrick                         Right + (AcceptRight ? 1 : 0));
579e5dd7070Spatrick }
580e5dd7070Spatrick 
581ec727ea7Spatrick llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)582ec727ea7Spatrick syntax::spelledTokensTouching(SourceLocation Loc,
583ec727ea7Spatrick                               const syntax::TokenBuffer &Tokens) {
584ec727ea7Spatrick   return spelledTokensTouching(
585ec727ea7Spatrick       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
586ec727ea7Spatrick }
587ec727ea7Spatrick 
588e5dd7070Spatrick const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)589e5dd7070Spatrick syntax::spelledIdentifierTouching(SourceLocation Loc,
590ec727ea7Spatrick                                   llvm::ArrayRef<syntax::Token> Tokens) {
591e5dd7070Spatrick   for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
592e5dd7070Spatrick     if (Tok.kind() == tok::identifier)
593e5dd7070Spatrick       return &Tok;
594e5dd7070Spatrick   }
595e5dd7070Spatrick   return nullptr;
596e5dd7070Spatrick }
597e5dd7070Spatrick 
598ec727ea7Spatrick const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)599ec727ea7Spatrick syntax::spelledIdentifierTouching(SourceLocation Loc,
600ec727ea7Spatrick                                   const syntax::TokenBuffer &Tokens) {
601ec727ea7Spatrick   return spelledIdentifierTouching(
602ec727ea7Spatrick       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
603ec727ea7Spatrick }
604ec727ea7Spatrick 
605e5dd7070Spatrick std::vector<const syntax::Token *>
macroExpansions(FileID FID) const606e5dd7070Spatrick TokenBuffer::macroExpansions(FileID FID) const {
607e5dd7070Spatrick   auto FileIt = Files.find(FID);
608e5dd7070Spatrick   assert(FileIt != Files.end() && "file not tracked by token buffer");
609e5dd7070Spatrick   auto &File = FileIt->second;
610e5dd7070Spatrick   std::vector<const syntax::Token *> Expansions;
611e5dd7070Spatrick   auto &Spelled = File.SpelledTokens;
612e5dd7070Spatrick   for (auto Mapping : File.Mappings) {
613e5dd7070Spatrick     const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
614e5dd7070Spatrick     if (Token->kind() == tok::TokenKind::identifier)
615e5dd7070Spatrick       Expansions.push_back(Token);
616e5dd7070Spatrick   }
617e5dd7070Spatrick   return Expansions;
618e5dd7070Spatrick }
619e5dd7070Spatrick 
tokenize(const FileRange & FR,const SourceManager & SM,const LangOptions & LO)620ec727ea7Spatrick std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
621ec727ea7Spatrick                                             const SourceManager &SM,
622e5dd7070Spatrick                                             const LangOptions &LO) {
623e5dd7070Spatrick   std::vector<syntax::Token> Tokens;
624e5dd7070Spatrick   IdentifierTable Identifiers(LO);
625e5dd7070Spatrick   auto AddToken = [&](clang::Token T) {
626e5dd7070Spatrick     // Fill the proper token kind for keywords, etc.
627e5dd7070Spatrick     if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
628e5dd7070Spatrick         !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
629e5dd7070Spatrick       clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
630e5dd7070Spatrick       T.setIdentifierInfo(&II);
631e5dd7070Spatrick       T.setKind(II.getTokenID());
632e5dd7070Spatrick     }
633e5dd7070Spatrick     Tokens.push_back(syntax::Token(T));
634e5dd7070Spatrick   };
635e5dd7070Spatrick 
636ec727ea7Spatrick   auto SrcBuffer = SM.getBufferData(FR.file());
637ec727ea7Spatrick   Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
638ec727ea7Spatrick           SrcBuffer.data() + FR.beginOffset(),
639ec727ea7Spatrick           // We can't make BufEnd point to FR.endOffset, as Lexer requires a
640ec727ea7Spatrick           // null terminated buffer.
641ec727ea7Spatrick           SrcBuffer.data() + SrcBuffer.size());
642e5dd7070Spatrick 
643e5dd7070Spatrick   clang::Token T;
644ec727ea7Spatrick   while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
645e5dd7070Spatrick     AddToken(T);
646ec727ea7Spatrick   // LexFromRawLexer returns true when it parses the last token of the file, add
647ec727ea7Spatrick   // it iff it starts within the range we are interested in.
648ec727ea7Spatrick   if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
649e5dd7070Spatrick     AddToken(T);
650e5dd7070Spatrick   return Tokens;
651e5dd7070Spatrick }
652e5dd7070Spatrick 
tokenize(FileID FID,const SourceManager & SM,const LangOptions & LO)653ec727ea7Spatrick std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
654ec727ea7Spatrick                                             const LangOptions &LO) {
655ec727ea7Spatrick   return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
656ec727ea7Spatrick }
657ec727ea7Spatrick 
658e5dd7070Spatrick /// Records information reqired to construct mappings for the token buffer that
659e5dd7070Spatrick /// we are collecting.
660e5dd7070Spatrick class TokenCollector::CollectPPExpansions : public PPCallbacks {
661e5dd7070Spatrick public:
CollectPPExpansions(TokenCollector & C)662e5dd7070Spatrick   CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
663e5dd7070Spatrick 
664e5dd7070Spatrick   /// Disabled instance will stop reporting anything to TokenCollector.
665e5dd7070Spatrick   /// This ensures that uses of the preprocessor after TokenCollector::consume()
666e5dd7070Spatrick   /// is called do not access the (possibly invalid) collector instance.
disable()667e5dd7070Spatrick   void disable() { Collector = nullptr; }
668e5dd7070Spatrick 
MacroExpands(const clang::Token & MacroNameTok,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)669e5dd7070Spatrick   void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
670e5dd7070Spatrick                     SourceRange Range, const MacroArgs *Args) override {
671e5dd7070Spatrick     if (!Collector)
672e5dd7070Spatrick       return;
673389bb291Spatrick     const auto &SM = Collector->PP.getSourceManager();
674389bb291Spatrick     // Only record top-level expansions that directly produce expanded tokens.
675389bb291Spatrick     // This excludes those where:
676e5dd7070Spatrick     //   - the macro use is inside a macro body,
677e5dd7070Spatrick     //   - the macro appears in an argument to another macro.
678389bb291Spatrick     // However macro expansion isn't really a tree, it's token rewrite rules,
679389bb291Spatrick     // so there are other cases, e.g.
680389bb291Spatrick     //   #define B(X) X
681389bb291Spatrick     //   #define A 1 + B
682389bb291Spatrick     //   A(2)
683389bb291Spatrick     // Both A and B produce expanded tokens, though the macro name 'B' comes
684389bb291Spatrick     // from an expansion. The best we can do is merge the mappings for both.
685389bb291Spatrick 
686389bb291Spatrick     // The *last* token of any top-level macro expansion must be in a file.
687389bb291Spatrick     // (In the example above, see the closing paren of the expansion of B).
688389bb291Spatrick     if (!Range.getEnd().isFileID())
689e5dd7070Spatrick       return;
690389bb291Spatrick     // If there's a current expansion that encloses this one, this one can't be
691389bb291Spatrick     // top-level.
692389bb291Spatrick     if (LastExpansionEnd.isValid() &&
693389bb291Spatrick         !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
694389bb291Spatrick       return;
695389bb291Spatrick 
696389bb291Spatrick     // If the macro invocation (B) starts in a macro (A) but ends in a file,
697389bb291Spatrick     // we'll create a merged mapping for A + B by overwriting the endpoint for
698389bb291Spatrick     // A's startpoint.
699389bb291Spatrick     if (!Range.getBegin().isFileID()) {
700389bb291Spatrick       Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
701a9ac8606Spatrick       assert(Collector->Expansions.count(Range.getBegin()) &&
702389bb291Spatrick              "Overlapping macros should have same expansion location");
703389bb291Spatrick     }
704389bb291Spatrick 
705a9ac8606Spatrick     Collector->Expansions[Range.getBegin()] = Range.getEnd();
706e5dd7070Spatrick     LastExpansionEnd = Range.getEnd();
707e5dd7070Spatrick   }
708e5dd7070Spatrick   // FIXME: handle directives like #pragma, #include, etc.
709e5dd7070Spatrick private:
710e5dd7070Spatrick   TokenCollector *Collector;
711e5dd7070Spatrick   /// Used to detect recursive macro expansions.
712e5dd7070Spatrick   SourceLocation LastExpansionEnd;
713e5dd7070Spatrick };
714e5dd7070Spatrick 
715e5dd7070Spatrick /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
716e5dd7070Spatrick /// implementation tracks the tokens, macro expansions and directives coming
717e5dd7070Spatrick /// from the preprocessor and:
718e5dd7070Spatrick /// - for each token, figures out if it is a part of an expanded token stream,
719e5dd7070Spatrick ///   spelled token stream or both. Stores the tokens appropriately.
720e5dd7070Spatrick /// - records mappings from the spelled to expanded token ranges, e.g. for macro
721e5dd7070Spatrick ///   expansions.
722e5dd7070Spatrick /// FIXME: also properly record:
723e5dd7070Spatrick ///          - #include directives,
724e5dd7070Spatrick ///          - #pragma, #line and other PP directives,
725e5dd7070Spatrick ///          - skipped pp regions,
726e5dd7070Spatrick ///          - ...
727e5dd7070Spatrick 
TokenCollector(Preprocessor & PP)728e5dd7070Spatrick TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
729e5dd7070Spatrick   // Collect the expanded token stream during preprocessing.
730e5dd7070Spatrick   PP.setTokenWatcher([this](const clang::Token &T) {
731e5dd7070Spatrick     if (T.isAnnotation())
732e5dd7070Spatrick       return;
733e5dd7070Spatrick     DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
734e5dd7070Spatrick                                           << "Token: "
735e5dd7070Spatrick                                           << syntax::Token(T).dumpForTests(
736e5dd7070Spatrick                                                  this->PP.getSourceManager())
737e5dd7070Spatrick                                           << "\n"
738e5dd7070Spatrick 
739e5dd7070Spatrick     );
740e5dd7070Spatrick     Expanded.push_back(syntax::Token(T));
741e5dd7070Spatrick   });
742e5dd7070Spatrick   // And locations of macro calls, to properly recover boundaries of those in
743e5dd7070Spatrick   // case of empty expansions.
744e5dd7070Spatrick   auto CB = std::make_unique<CollectPPExpansions>(*this);
745e5dd7070Spatrick   this->Collector = CB.get();
746e5dd7070Spatrick   PP.addPPCallbacks(std::move(CB));
747e5dd7070Spatrick }
748e5dd7070Spatrick 
749e5dd7070Spatrick /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
750e5dd7070Spatrick /// token stream.
751e5dd7070Spatrick class TokenCollector::Builder {
752e5dd7070Spatrick public:
Builder(std::vector<syntax::Token> Expanded,PPExpansions CollectedExpansions,const SourceManager & SM,const LangOptions & LangOpts)753e5dd7070Spatrick   Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
754e5dd7070Spatrick           const SourceManager &SM, const LangOptions &LangOpts)
755e5dd7070Spatrick       : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
756e5dd7070Spatrick         LangOpts(LangOpts) {
757e5dd7070Spatrick     Result.ExpandedTokens = std::move(Expanded);
758e5dd7070Spatrick   }
759e5dd7070Spatrick 
build()760e5dd7070Spatrick   TokenBuffer build() && {
761e5dd7070Spatrick     assert(!Result.ExpandedTokens.empty());
762e5dd7070Spatrick     assert(Result.ExpandedTokens.back().kind() == tok::eof);
763389bb291Spatrick 
764389bb291Spatrick     // Tokenize every file that contributed tokens to the expanded stream.
765389bb291Spatrick     buildSpelledTokens();
766389bb291Spatrick 
767389bb291Spatrick     // The expanded token stream consists of runs of tokens that came from
768389bb291Spatrick     // the same source (a macro expansion, part of a file etc).
769389bb291Spatrick     // Between these runs are the logical positions of spelled tokens that
770389bb291Spatrick     // didn't expand to anything.
771389bb291Spatrick     while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
772389bb291Spatrick       // Create empty mappings for spelled tokens that expanded to nothing here.
773389bb291Spatrick       // May advance NextSpelled, but NextExpanded is unchanged.
774389bb291Spatrick       discard();
775389bb291Spatrick       // Create mapping for a contiguous run of expanded tokens.
776389bb291Spatrick       // Advances NextExpanded past the run, and NextSpelled accordingly.
777389bb291Spatrick       unsigned OldPosition = NextExpanded;
778389bb291Spatrick       advance();
779389bb291Spatrick       if (NextExpanded == OldPosition)
780389bb291Spatrick         diagnoseAdvanceFailure();
781e5dd7070Spatrick     }
782389bb291Spatrick     // If any tokens remain in any of the files, they didn't expand to anything.
783389bb291Spatrick     // Create empty mappings up until the end of the file.
784389bb291Spatrick     for (const auto &File : Result.Files)
785389bb291Spatrick       discard(File.first);
786e5dd7070Spatrick 
787ec727ea7Spatrick #ifndef NDEBUG
788ec727ea7Spatrick     for (auto &pair : Result.Files) {
789ec727ea7Spatrick       auto &mappings = pair.second.Mappings;
790ec727ea7Spatrick       assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
791ec727ea7Spatrick                                           const TokenBuffer::Mapping &M2) {
792ec727ea7Spatrick         return M1.BeginSpelled < M2.BeginSpelled &&
793ec727ea7Spatrick                M1.EndSpelled < M2.EndSpelled &&
794ec727ea7Spatrick                M1.BeginExpanded < M2.BeginExpanded &&
795ec727ea7Spatrick                M1.EndExpanded < M2.EndExpanded;
796ec727ea7Spatrick       }));
797ec727ea7Spatrick     }
798ec727ea7Spatrick #endif
799ec727ea7Spatrick 
800e5dd7070Spatrick     return std::move(Result);
801e5dd7070Spatrick   }
802e5dd7070Spatrick 
803e5dd7070Spatrick private:
804389bb291Spatrick   // Consume a sequence of spelled tokens that didn't expand to anything.
805389bb291Spatrick   // In the simplest case, skips spelled tokens until finding one that produced
806389bb291Spatrick   // the NextExpanded token, and creates an empty mapping for them.
807389bb291Spatrick   // If Drain is provided, skips remaining tokens from that file instead.
discard(std::optional<FileID> Drain=std::nullopt)808*12c85518Srobert   void discard(std::optional<FileID> Drain = std::nullopt) {
809389bb291Spatrick     SourceLocation Target =
810389bb291Spatrick         Drain ? SM.getLocForEndOfFile(*Drain)
811389bb291Spatrick               : SM.getExpansionLoc(
812389bb291Spatrick                     Result.ExpandedTokens[NextExpanded].location());
813389bb291Spatrick     FileID File = SM.getFileID(Target);
814389bb291Spatrick     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
815389bb291Spatrick     auto &NextSpelled = this->NextSpelled[File];
816389bb291Spatrick 
817389bb291Spatrick     TokenBuffer::Mapping Mapping;
818389bb291Spatrick     Mapping.BeginSpelled = NextSpelled;
819389bb291Spatrick     // When dropping trailing tokens from a file, the empty mapping should
820389bb291Spatrick     // be positioned within the file's expanded-token range (at the end).
821389bb291Spatrick     Mapping.BeginExpanded = Mapping.EndExpanded =
822389bb291Spatrick         Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
823389bb291Spatrick     // We may want to split into several adjacent empty mappings.
824389bb291Spatrick     // FlushMapping() emits the current mapping and starts a new one.
825389bb291Spatrick     auto FlushMapping = [&, this] {
826389bb291Spatrick       Mapping.EndSpelled = NextSpelled;
827389bb291Spatrick       if (Mapping.BeginSpelled != Mapping.EndSpelled)
828389bb291Spatrick         Result.Files[File].Mappings.push_back(Mapping);
829389bb291Spatrick       Mapping.BeginSpelled = NextSpelled;
830389bb291Spatrick     };
831389bb291Spatrick 
832389bb291Spatrick     while (NextSpelled < SpelledTokens.size() &&
833389bb291Spatrick            SpelledTokens[NextSpelled].location() < Target) {
834389bb291Spatrick       // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
835389bb291Spatrick       // then we want to partition our (empty) mapping.
836389bb291Spatrick       //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
837a9ac8606Spatrick       SourceLocation KnownEnd =
838a9ac8606Spatrick           CollectedExpansions.lookup(SpelledTokens[NextSpelled].location());
839389bb291Spatrick       if (KnownEnd.isValid()) {
840389bb291Spatrick         FlushMapping(); // Emits [Start, NextSpelled)
841389bb291Spatrick         while (NextSpelled < SpelledTokens.size() &&
842389bb291Spatrick                SpelledTokens[NextSpelled].location() <= KnownEnd)
843389bb291Spatrick           ++NextSpelled;
844389bb291Spatrick         FlushMapping(); // Emits [NextSpelled, KnownEnd]
845*12c85518Srobert         // Now the loop continues and will emit (KnownEnd, Target).
846389bb291Spatrick       } else {
847389bb291Spatrick         ++NextSpelled;
848e5dd7070Spatrick       }
849389bb291Spatrick     }
850389bb291Spatrick     FlushMapping();
851389bb291Spatrick   }
852e5dd7070Spatrick 
853389bb291Spatrick   // Consumes the NextExpanded token and others that are part of the same run.
854389bb291Spatrick   // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
855389bb291Spatrick   // (unless this is a run of file tokens, which we represent with no mapping).
advance()856389bb291Spatrick   void advance() {
857389bb291Spatrick     const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
858389bb291Spatrick     SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
859389bb291Spatrick     FileID File = SM.getFileID(Expansion);
860389bb291Spatrick     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
861389bb291Spatrick     auto &NextSpelled = this->NextSpelled[File];
862e5dd7070Spatrick 
863389bb291Spatrick     if (Tok.location().isFileID()) {
864389bb291Spatrick       // A run of file tokens continues while the expanded/spelled tokens match.
865389bb291Spatrick       while (NextSpelled < SpelledTokens.size() &&
866389bb291Spatrick              NextExpanded < Result.ExpandedTokens.size() &&
867389bb291Spatrick              SpelledTokens[NextSpelled].location() ==
868389bb291Spatrick                  Result.ExpandedTokens[NextExpanded].location()) {
869389bb291Spatrick         ++NextSpelled;
870389bb291Spatrick         ++NextExpanded;
871389bb291Spatrick       }
872389bb291Spatrick       // We need no mapping for file tokens copied to the expanded stream.
873389bb291Spatrick     } else {
874389bb291Spatrick       // We found a new macro expansion. We should have its spelling bounds.
875a9ac8606Spatrick       auto End = CollectedExpansions.lookup(Expansion);
876389bb291Spatrick       assert(End.isValid() && "Macro expansion wasn't captured?");
877389bb291Spatrick 
878389bb291Spatrick       // Mapping starts here...
879389bb291Spatrick       TokenBuffer::Mapping Mapping;
880389bb291Spatrick       Mapping.BeginExpanded = NextExpanded;
881389bb291Spatrick       Mapping.BeginSpelled = NextSpelled;
882389bb291Spatrick       // ... consumes spelled tokens within bounds we captured ...
883389bb291Spatrick       while (NextSpelled < SpelledTokens.size() &&
884389bb291Spatrick              SpelledTokens[NextSpelled].location() <= End)
885389bb291Spatrick         ++NextSpelled;
886389bb291Spatrick       // ... consumes expanded tokens rooted at the same expansion ...
887389bb291Spatrick       while (NextExpanded < Result.ExpandedTokens.size() &&
888389bb291Spatrick              SM.getExpansionLoc(
889389bb291Spatrick                  Result.ExpandedTokens[NextExpanded].location()) == Expansion)
890389bb291Spatrick         ++NextExpanded;
891389bb291Spatrick       // ... and ends here.
892389bb291Spatrick       Mapping.EndExpanded = NextExpanded;
893389bb291Spatrick       Mapping.EndSpelled = NextSpelled;
894389bb291Spatrick       Result.Files[File].Mappings.push_back(Mapping);
895e5dd7070Spatrick     }
896e5dd7070Spatrick   }
897e5dd7070Spatrick 
898389bb291Spatrick   // advance() is supposed to consume at least one token - if not, we crash.
diagnoseAdvanceFailure()899389bb291Spatrick   void diagnoseAdvanceFailure() {
900389bb291Spatrick #ifndef NDEBUG
901389bb291Spatrick     // Show the failed-to-map token in context.
902389bb291Spatrick     for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
903389bb291Spatrick          I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
904389bb291Spatrick       const char *L =
905389bb291Spatrick           (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
906389bb291Spatrick       llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
907e5dd7070Spatrick     }
908389bb291Spatrick #endif
909389bb291Spatrick     llvm_unreachable("Couldn't map expanded token to spelled tokens!");
910e5dd7070Spatrick   }
911e5dd7070Spatrick 
912e5dd7070Spatrick   /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
913e5dd7070Spatrick   /// ranges for each of the files.
buildSpelledTokens()914e5dd7070Spatrick   void buildSpelledTokens() {
915e5dd7070Spatrick     for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
916389bb291Spatrick       const auto &Tok = Result.ExpandedTokens[I];
917389bb291Spatrick       auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
918e5dd7070Spatrick       auto It = Result.Files.try_emplace(FID);
919e5dd7070Spatrick       TokenBuffer::MarkedFile &File = It.first->second;
920e5dd7070Spatrick 
921389bb291Spatrick       // The eof token should not be considered part of the main-file's range.
922389bb291Spatrick       File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
923389bb291Spatrick 
924e5dd7070Spatrick       if (!It.second)
925e5dd7070Spatrick         continue; // we have seen this file before.
926e5dd7070Spatrick       // This is the first time we see this file.
927e5dd7070Spatrick       File.BeginExpanded = I;
928e5dd7070Spatrick       File.SpelledTokens = tokenize(FID, SM, LangOpts);
929e5dd7070Spatrick     }
930e5dd7070Spatrick   }
931e5dd7070Spatrick 
932e5dd7070Spatrick   TokenBuffer Result;
933389bb291Spatrick   unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
934389bb291Spatrick   llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
935e5dd7070Spatrick   PPExpansions CollectedExpansions;
936e5dd7070Spatrick   const SourceManager &SM;
937e5dd7070Spatrick   const LangOptions &LangOpts;
938e5dd7070Spatrick };
939e5dd7070Spatrick 
consume()940e5dd7070Spatrick TokenBuffer TokenCollector::consume() && {
941e5dd7070Spatrick   PP.setTokenWatcher(nullptr);
942e5dd7070Spatrick   Collector->disable();
943e5dd7070Spatrick   return Builder(std::move(Expanded), std::move(Expansions),
944e5dd7070Spatrick                  PP.getSourceManager(), PP.getLangOpts())
945e5dd7070Spatrick       .build();
946e5dd7070Spatrick }
947e5dd7070Spatrick 
str() const948e5dd7070Spatrick std::string syntax::Token::str() const {
949ec727ea7Spatrick   return std::string(llvm::formatv("Token({0}, length = {1})",
950ec727ea7Spatrick                                    tok::getTokenName(kind()), length()));
951e5dd7070Spatrick }
952e5dd7070Spatrick 
dumpForTests(const SourceManager & SM) const953e5dd7070Spatrick std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
954ec727ea7Spatrick   return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
955ec727ea7Spatrick                                    tok::getTokenName(kind()), length()));
956e5dd7070Spatrick }
957e5dd7070Spatrick 
dumpForTests() const958e5dd7070Spatrick std::string TokenBuffer::dumpForTests() const {
959e5dd7070Spatrick   auto PrintToken = [this](const syntax::Token &T) -> std::string {
960e5dd7070Spatrick     if (T.kind() == tok::eof)
961e5dd7070Spatrick       return "<eof>";
962ec727ea7Spatrick     return std::string(T.text(*SourceMgr));
963e5dd7070Spatrick   };
964e5dd7070Spatrick 
965e5dd7070Spatrick   auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
966e5dd7070Spatrick                                         llvm::ArrayRef<syntax::Token> Tokens) {
967e5dd7070Spatrick     if (Tokens.empty()) {
968e5dd7070Spatrick       OS << "<empty>";
969e5dd7070Spatrick       return;
970e5dd7070Spatrick     }
971e5dd7070Spatrick     OS << Tokens[0].text(*SourceMgr);
972e5dd7070Spatrick     for (unsigned I = 1; I < Tokens.size(); ++I) {
973e5dd7070Spatrick       if (Tokens[I].kind() == tok::eof)
974e5dd7070Spatrick         continue;
975e5dd7070Spatrick       OS << " " << PrintToken(Tokens[I]);
976e5dd7070Spatrick     }
977e5dd7070Spatrick   };
978e5dd7070Spatrick 
979e5dd7070Spatrick   std::string Dump;
980e5dd7070Spatrick   llvm::raw_string_ostream OS(Dump);
981e5dd7070Spatrick 
982e5dd7070Spatrick   OS << "expanded tokens:\n"
983e5dd7070Spatrick      << "  ";
984e5dd7070Spatrick   // (!) we do not show '<eof>'.
985*12c85518Srobert   DumpTokens(OS, llvm::ArrayRef(ExpandedTokens).drop_back());
986e5dd7070Spatrick   OS << "\n";
987e5dd7070Spatrick 
988e5dd7070Spatrick   std::vector<FileID> Keys;
989e5dd7070Spatrick   for (auto F : Files)
990e5dd7070Spatrick     Keys.push_back(F.first);
991e5dd7070Spatrick   llvm::sort(Keys);
992e5dd7070Spatrick 
993e5dd7070Spatrick   for (FileID ID : Keys) {
994e5dd7070Spatrick     const MarkedFile &File = Files.find(ID)->second;
995e5dd7070Spatrick     auto *Entry = SourceMgr->getFileEntryForID(ID);
996e5dd7070Spatrick     if (!Entry)
997e5dd7070Spatrick       continue; // Skip builtin files.
998e5dd7070Spatrick     OS << llvm::formatv("file '{0}'\n", Entry->getName())
999e5dd7070Spatrick        << "  spelled tokens:\n"
1000e5dd7070Spatrick        << "    ";
1001e5dd7070Spatrick     DumpTokens(OS, File.SpelledTokens);
1002e5dd7070Spatrick     OS << "\n";
1003e5dd7070Spatrick 
1004e5dd7070Spatrick     if (File.Mappings.empty()) {
1005e5dd7070Spatrick       OS << "  no mappings.\n";
1006e5dd7070Spatrick       continue;
1007e5dd7070Spatrick     }
1008e5dd7070Spatrick     OS << "  mappings:\n";
1009e5dd7070Spatrick     for (auto &M : File.Mappings) {
1010e5dd7070Spatrick       OS << llvm::formatv(
1011e5dd7070Spatrick           "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
1012e5dd7070Spatrick           PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
1013e5dd7070Spatrick           M.EndSpelled == File.SpelledTokens.size()
1014e5dd7070Spatrick               ? "<eof>"
1015e5dd7070Spatrick               : PrintToken(File.SpelledTokens[M.EndSpelled]),
1016e5dd7070Spatrick           M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
1017e5dd7070Spatrick           M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
1018e5dd7070Spatrick           M.EndExpanded);
1019e5dd7070Spatrick     }
1020e5dd7070Spatrick   }
1021*12c85518Srobert   return Dump;
1022e5dd7070Spatrick }
1023