xref: /llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp (revision 5f1adf0433c6007f8be885b832c852da67e8524c)
1e7230ea7SIlya Biryukov //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2e7230ea7SIlya Biryukov //
3e7230ea7SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e7230ea7SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information.
5e7230ea7SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e7230ea7SIlya Biryukov //
7e7230ea7SIlya Biryukov //===----------------------------------------------------------------------===//
8e7230ea7SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h"
9e7230ea7SIlya Biryukov 
10e7230ea7SIlya Biryukov #include "clang/Basic/Diagnostic.h"
11e7230ea7SIlya Biryukov #include "clang/Basic/IdentifierTable.h"
12e7230ea7SIlya Biryukov #include "clang/Basic/LLVM.h"
13e7230ea7SIlya Biryukov #include "clang/Basic/LangOptions.h"
14e7230ea7SIlya Biryukov #include "clang/Basic/SourceLocation.h"
15e7230ea7SIlya Biryukov #include "clang/Basic/SourceManager.h"
16e7230ea7SIlya Biryukov #include "clang/Basic/TokenKinds.h"
175e69f27eSIlya Biryukov #include "clang/Lex/PPCallbacks.h"
18e7230ea7SIlya Biryukov #include "clang/Lex/Preprocessor.h"
19e7230ea7SIlya Biryukov #include "clang/Lex/Token.h"
20e7230ea7SIlya Biryukov #include "llvm/ADT/ArrayRef.h"
21e7230ea7SIlya Biryukov #include "llvm/ADT/STLExtras.h"
22e7230ea7SIlya Biryukov #include "llvm/Support/Debug.h"
23e7230ea7SIlya Biryukov #include "llvm/Support/ErrorHandling.h"
24e7230ea7SIlya Biryukov #include "llvm/Support/FormatVariadic.h"
25e7230ea7SIlya Biryukov #include "llvm/Support/raw_ostream.h"
26e7230ea7SIlya Biryukov #include <algorithm>
27e7230ea7SIlya Biryukov #include <cassert>
28e7230ea7SIlya Biryukov #include <iterator>
29a1580d7bSKazu Hirata #include <optional>
30e7230ea7SIlya Biryukov #include <string>
31e7230ea7SIlya Biryukov #include <utility>
32e7230ea7SIlya Biryukov #include <vector>
33e7230ea7SIlya Biryukov 
34e7230ea7SIlya Biryukov using namespace clang;
35e7230ea7SIlya Biryukov using namespace clang::syntax;
36e7230ea7SIlya Biryukov 
379619c2ccSKadir Cetinkaya namespace {
389619c2ccSKadir Cetinkaya // Finds the smallest consecutive subsuquence of Toks that covers R.
399619c2ccSKadir Cetinkaya llvm::ArrayRef<syntax::Token>
getTokensCovering(llvm::ArrayRef<syntax::Token> Toks,SourceRange R,const SourceManager & SM)409619c2ccSKadir Cetinkaya getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
419619c2ccSKadir Cetinkaya                   const SourceManager &SM) {
429619c2ccSKadir Cetinkaya   if (R.isInvalid())
439619c2ccSKadir Cetinkaya     return {};
449619c2ccSKadir Cetinkaya   const syntax::Token *Begin =
459619c2ccSKadir Cetinkaya       llvm::partition_point(Toks, [&](const syntax::Token &T) {
469619c2ccSKadir Cetinkaya         return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
479619c2ccSKadir Cetinkaya       });
489619c2ccSKadir Cetinkaya   const syntax::Token *End =
499619c2ccSKadir Cetinkaya       llvm::partition_point(Toks, [&](const syntax::Token &T) {
509619c2ccSKadir Cetinkaya         return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
519619c2ccSKadir Cetinkaya       });
529619c2ccSKadir Cetinkaya   if (Begin > End)
539619c2ccSKadir Cetinkaya     return {};
549619c2ccSKadir Cetinkaya   return {Begin, End};
559619c2ccSKadir Cetinkaya }
569619c2ccSKadir Cetinkaya 
5767268ee1SSam McCall // Finds the range within FID corresponding to expanded tokens [First, Last].
5867268ee1SSam McCall // Prev precedes First and Next follows Last, these must *not* be included.
5967268ee1SSam McCall // If no range satisfies the criteria, returns an invalid range.
6067268ee1SSam McCall //
619619c2ccSKadir Cetinkaya // #define ID(x) x
629619c2ccSKadir Cetinkaya // ID(ID(ID(a1) a2))
639619c2ccSKadir Cetinkaya //          ~~       -> a1
649619c2ccSKadir Cetinkaya //              ~~   -> a2
659619c2ccSKadir Cetinkaya //       ~~~~~~~~~   -> a1 a2
spelledForExpandedSlow(SourceLocation First,SourceLocation Last,SourceLocation Prev,SourceLocation Next,FileID TargetFile,const SourceManager & SM)6667268ee1SSam McCall SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last,
6767268ee1SSam McCall                                    SourceLocation Prev, SourceLocation Next,
6867268ee1SSam McCall                                    FileID TargetFile,
699619c2ccSKadir Cetinkaya                                    const SourceManager &SM) {
7067268ee1SSam McCall   // There are two main parts to this algorithm:
7167268ee1SSam McCall   //  - identifying which spelled range covers the expanded tokens
7267268ee1SSam McCall   //  - validating that this range doesn't cover any extra tokens (First/Last)
7367268ee1SSam McCall   //
7467268ee1SSam McCall   // We do these in order. However as we transform the expanded range into the
7567268ee1SSam McCall   // spelled one, we adjust First/Last so the validation remains simple.
7667268ee1SSam McCall 
7767268ee1SSam McCall   assert(SM.getSLocEntry(TargetFile).isFile());
7867268ee1SSam McCall   // In most cases, to select First and Last we must return their expansion
7967268ee1SSam McCall   // range, i.e. the whole of any macros they are included in.
8067268ee1SSam McCall   //
8167268ee1SSam McCall   // When First and Last are part of the *same macro arg* of a macro written
8267268ee1SSam McCall   // in TargetFile, we that slice of the arg, i.e. their spelling range.
8367268ee1SSam McCall   //
8467268ee1SSam McCall   // Unwrap such macro calls. If the target file has A(B(C)), the
8567268ee1SSam McCall   // SourceLocation stack of a token inside C shows us the expansion of A first,
8667268ee1SSam McCall   // then B, then any macros inside C's body, then C itself.
8767268ee1SSam McCall   // (This is the reverse of the order the PP applies the expansions in).
8867268ee1SSam McCall   while (First.isMacroID() && Last.isMacroID()) {
8967268ee1SSam McCall     auto DecFirst = SM.getDecomposedLoc(First);
9067268ee1SSam McCall     auto DecLast = SM.getDecomposedLoc(Last);
9167268ee1SSam McCall     auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion();
9267268ee1SSam McCall     auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion();
9367268ee1SSam McCall 
9467268ee1SSam McCall     if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion())
959619c2ccSKadir Cetinkaya       break;
9667268ee1SSam McCall     // Locations are in the same macro arg if they expand to the same place.
9767268ee1SSam McCall     // (They may still have different FileIDs - an arg can have >1 chunks!)
9867268ee1SSam McCall     if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart())
999619c2ccSKadir Cetinkaya       break;
10067268ee1SSam McCall     // Careful, given:
10167268ee1SSam McCall     //   #define HIDE ID(ID(a))
10267268ee1SSam McCall     //   ID(ID(HIDE))
10367268ee1SSam McCall     // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2.
10467268ee1SSam McCall     // We distinguish them by whether the macro expands into the target file.
10567268ee1SSam McCall     // Fortunately, the target file ones will always appear first.
1069841daf2SKadir Cetinkaya     auto ExpFileID = SM.getFileID(ExpFirst.getExpansionLocStart());
1079841daf2SKadir Cetinkaya     if (ExpFileID == TargetFile)
10867268ee1SSam McCall       break;
10967268ee1SSam McCall     // Replace each endpoint with its spelling inside the macro arg.
11067268ee1SSam McCall     // (This is getImmediateSpellingLoc without repeating lookups).
11167268ee1SSam McCall     First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second);
11267268ee1SSam McCall     Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second);
11367268ee1SSam McCall   }
11467268ee1SSam McCall 
11567268ee1SSam McCall   // In all remaining cases we need the full containing macros.
11667268ee1SSam McCall   // If this overlaps Prev or Next, then no range is possible.
11767268ee1SSam McCall   SourceRange Candidate =
11867268ee1SSam McCall       SM.getExpansionRange(SourceRange(First, Last)).getAsRange();
11967268ee1SSam McCall   auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin());
1209841daf2SKadir Cetinkaya   auto DecLast = SM.getDecomposedExpansionLoc(Candidate.getEnd());
12167268ee1SSam McCall   // Can end up in the wrong file due to bad input or token-pasting shenanigans.
1229841daf2SKadir Cetinkaya   if (Candidate.isInvalid() || DecFirst.first != TargetFile ||
1239841daf2SKadir Cetinkaya       DecLast.first != TargetFile)
12467268ee1SSam McCall     return SourceRange();
12567268ee1SSam McCall   // Check bounds, which may still be inside macros.
12667268ee1SSam McCall   if (Prev.isValid()) {
12767268ee1SSam McCall     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin());
12867268ee1SSam McCall     if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second)
12967268ee1SSam McCall       return SourceRange();
13067268ee1SSam McCall   }
13167268ee1SSam McCall   if (Next.isValid()) {
13267268ee1SSam McCall     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd());
13367268ee1SSam McCall     if (Dec.first != DecLast.first || Dec.second <= DecLast.second)
13467268ee1SSam McCall       return SourceRange();
13567268ee1SSam McCall   }
13667268ee1SSam McCall   // Now we know that Candidate is a file range that covers [First, Last]
13767268ee1SSam McCall   // without encroaching on {Prev, Next}. Ship it!
13867268ee1SSam McCall   return Candidate;
1399619c2ccSKadir Cetinkaya }
1409619c2ccSKadir Cetinkaya 
1419619c2ccSKadir Cetinkaya } // namespace
1429619c2ccSKadir Cetinkaya 
Token(SourceLocation Location,unsigned Length,tok::TokenKind Kind)143625a0f70SIlya Biryukov syntax::Token::Token(SourceLocation Location, unsigned Length,
144625a0f70SIlya Biryukov                      tok::TokenKind Kind)
145625a0f70SIlya Biryukov     : Location(Location), Length(Length), Kind(Kind) {
146625a0f70SIlya Biryukov   assert(Location.isValid());
147625a0f70SIlya Biryukov }
148625a0f70SIlya Biryukov 
Token(const clang::Token & T)149e7230ea7SIlya Biryukov syntax::Token::Token(const clang::Token &T)
150e7230ea7SIlya Biryukov     : Token(T.getLocation(), T.getLength(), T.getKind()) {
151e7230ea7SIlya Biryukov   assert(!T.isAnnotation());
152e7230ea7SIlya Biryukov }
153e7230ea7SIlya Biryukov 
text(const SourceManager & SM) const154e7230ea7SIlya Biryukov llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
155e7230ea7SIlya Biryukov   bool Invalid = false;
156e7230ea7SIlya Biryukov   const char *Start = SM.getCharacterData(location(), &Invalid);
157e7230ea7SIlya Biryukov   assert(!Invalid);
158e7230ea7SIlya Biryukov   return llvm::StringRef(Start, length());
159e7230ea7SIlya Biryukov }
160e7230ea7SIlya Biryukov 
range(const SourceManager & SM) const161e7230ea7SIlya Biryukov FileRange syntax::Token::range(const SourceManager &SM) const {
162e7230ea7SIlya Biryukov   assert(location().isFileID() && "must be a spelled token");
163e7230ea7SIlya Biryukov   FileID File;
164e7230ea7SIlya Biryukov   unsigned StartOffset;
165e7230ea7SIlya Biryukov   std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
166e7230ea7SIlya Biryukov   return FileRange(File, StartOffset, StartOffset + length());
167e7230ea7SIlya Biryukov }
168e7230ea7SIlya Biryukov 
range(const SourceManager & SM,const syntax::Token & First,const syntax::Token & Last)169e7230ea7SIlya Biryukov FileRange syntax::Token::range(const SourceManager &SM,
170e7230ea7SIlya Biryukov                                const syntax::Token &First,
171e7230ea7SIlya Biryukov                                const syntax::Token &Last) {
172e7230ea7SIlya Biryukov   auto F = First.range(SM);
173e7230ea7SIlya Biryukov   auto L = Last.range(SM);
174e7230ea7SIlya Biryukov   assert(F.file() == L.file() && "tokens from different files");
1758c2cf499SKadir Cetinkaya   assert((F == L || F.endOffset() <= L.beginOffset()) &&
1768c2cf499SKadir Cetinkaya          "wrong order of tokens");
177e7230ea7SIlya Biryukov   return FileRange(F.file(), F.beginOffset(), L.endOffset());
178e7230ea7SIlya Biryukov }
179e7230ea7SIlya Biryukov 
operator <<(llvm::raw_ostream & OS,const Token & T)180e7230ea7SIlya Biryukov llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
181e7230ea7SIlya Biryukov   return OS << T.str();
182e7230ea7SIlya Biryukov }
183e7230ea7SIlya Biryukov 
FileRange(FileID File,unsigned BeginOffset,unsigned EndOffset)184e7230ea7SIlya Biryukov FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
185e7230ea7SIlya Biryukov     : File(File), Begin(BeginOffset), End(EndOffset) {
186e7230ea7SIlya Biryukov   assert(File.isValid());
187e7230ea7SIlya Biryukov   assert(BeginOffset <= EndOffset);
188e7230ea7SIlya Biryukov }
189e7230ea7SIlya Biryukov 
FileRange(const SourceManager & SM,SourceLocation BeginLoc,unsigned Length)190e7230ea7SIlya Biryukov FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
191e7230ea7SIlya Biryukov                      unsigned Length) {
192e7230ea7SIlya Biryukov   assert(BeginLoc.isValid());
193e7230ea7SIlya Biryukov   assert(BeginLoc.isFileID());
194e7230ea7SIlya Biryukov 
195e7230ea7SIlya Biryukov   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
196e7230ea7SIlya Biryukov   End = Begin + Length;
197e7230ea7SIlya Biryukov }
FileRange(const SourceManager & SM,SourceLocation BeginLoc,SourceLocation EndLoc)198e7230ea7SIlya Biryukov FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
199e7230ea7SIlya Biryukov                      SourceLocation EndLoc) {
200e7230ea7SIlya Biryukov   assert(BeginLoc.isValid());
201e7230ea7SIlya Biryukov   assert(BeginLoc.isFileID());
202e7230ea7SIlya Biryukov   assert(EndLoc.isValid());
203e7230ea7SIlya Biryukov   assert(EndLoc.isFileID());
204e7230ea7SIlya Biryukov   assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
205e7230ea7SIlya Biryukov   assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
206e7230ea7SIlya Biryukov 
207e7230ea7SIlya Biryukov   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
208e7230ea7SIlya Biryukov   End = SM.getFileOffset(EndLoc);
209e7230ea7SIlya Biryukov }
210e7230ea7SIlya Biryukov 
operator <<(llvm::raw_ostream & OS,const FileRange & R)211e7230ea7SIlya Biryukov llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
212e7230ea7SIlya Biryukov                                       const FileRange &R) {
213e7230ea7SIlya Biryukov   return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
214e7230ea7SIlya Biryukov                              R.file().getHashValue(), R.beginOffset(),
215e7230ea7SIlya Biryukov                              R.endOffset());
216e7230ea7SIlya Biryukov }
217e7230ea7SIlya Biryukov 
text(const SourceManager & SM) const218e7230ea7SIlya Biryukov llvm::StringRef FileRange::text(const SourceManager &SM) const {
219e7230ea7SIlya Biryukov   bool Invalid = false;
220e7230ea7SIlya Biryukov   StringRef Text = SM.getBufferData(File, &Invalid);
221e7230ea7SIlya Biryukov   if (Invalid)
222e7230ea7SIlya Biryukov     return "";
223e7230ea7SIlya Biryukov   assert(Begin <= Text.size());
224e7230ea7SIlya Biryukov   assert(End <= Text.size());
225e7230ea7SIlya Biryukov   return Text.substr(Begin, length());
226e7230ea7SIlya Biryukov }
227e7230ea7SIlya Biryukov 
indexExpandedTokens()228aa979084SUtkarsh Saxena void TokenBuffer::indexExpandedTokens() {
229aa979084SUtkarsh Saxena   // No-op if the index is already created.
230aa979084SUtkarsh Saxena   if (!ExpandedTokIndex.empty())
231aa979084SUtkarsh Saxena     return;
232aa979084SUtkarsh Saxena   ExpandedTokIndex.reserve(ExpandedTokens.size());
233aa979084SUtkarsh Saxena   // Index ExpandedTokens for faster lookups by SourceLocation.
234cd824a48SUtkarsh Saxena   for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) {
235cd824a48SUtkarsh Saxena     SourceLocation Loc = ExpandedTokens[I].location();
236cd824a48SUtkarsh Saxena     if (Loc.isValid())
237cd824a48SUtkarsh Saxena       ExpandedTokIndex[Loc] = I;
238cd824a48SUtkarsh Saxena   }
239aa979084SUtkarsh Saxena }
240aa979084SUtkarsh Saxena 
expandedTokens(SourceRange R) const241c9c714c7SSam McCall llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
242cd824a48SUtkarsh Saxena   if (R.isInvalid())
243cd824a48SUtkarsh Saxena     return {};
244aa979084SUtkarsh Saxena   if (!ExpandedTokIndex.empty()) {
245aa979084SUtkarsh Saxena     // Quick lookup if `R` is a token range.
246aa979084SUtkarsh Saxena     // This is a huge win since majority of the users use ranges provided by an
247aa979084SUtkarsh Saxena     // AST. Ranges in AST are token ranges from expanded token stream.
248aa979084SUtkarsh Saxena     const auto B = ExpandedTokIndex.find(R.getBegin());
249aa979084SUtkarsh Saxena     const auto E = ExpandedTokIndex.find(R.getEnd());
250aa979084SUtkarsh Saxena     if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
251cd824a48SUtkarsh Saxena       const Token *L = ExpandedTokens.data() + B->getSecond();
252aa979084SUtkarsh Saxena       // Add 1 to End to make a half-open range.
253cd824a48SUtkarsh Saxena       const Token *R = ExpandedTokens.data() + E->getSecond() + 1;
254cd824a48SUtkarsh Saxena       if (L > R)
255cd824a48SUtkarsh Saxena         return {};
256cd824a48SUtkarsh Saxena       return {L, R};
257aa979084SUtkarsh Saxena     }
258aa979084SUtkarsh Saxena   }
259aa979084SUtkarsh Saxena   // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
260aa979084SUtkarsh Saxena   // required range.
2619619c2ccSKadir Cetinkaya   return getTokensCovering(expandedTokens(), R, *SourceMgr);
262c9c714c7SSam McCall }
263c9c714c7SSam McCall 
toCharRange(const SourceManager & SM) const2641ad15046SIlya Biryukov CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
2651ad15046SIlya Biryukov   return CharSourceRange(
2661ad15046SIlya Biryukov       SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
2671ad15046SIlya Biryukov       /*IsTokenRange=*/false);
2681ad15046SIlya Biryukov }
2691ad15046SIlya Biryukov 
270e7230ea7SIlya Biryukov std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
spelledForExpandedToken(const syntax::Token * Expanded) const271e7230ea7SIlya Biryukov TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
272e7230ea7SIlya Biryukov   assert(Expanded);
273e7230ea7SIlya Biryukov   assert(ExpandedTokens.data() <= Expanded &&
274e7230ea7SIlya Biryukov          Expanded < ExpandedTokens.data() + ExpandedTokens.size());
275e7230ea7SIlya Biryukov 
276e7230ea7SIlya Biryukov   auto FileIt = Files.find(
277e7230ea7SIlya Biryukov       SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
278e7230ea7SIlya Biryukov   assert(FileIt != Files.end() && "no file for an expanded token");
279e7230ea7SIlya Biryukov 
280e7230ea7SIlya Biryukov   const MarkedFile &File = FileIt->second;
281e7230ea7SIlya Biryukov 
282e7230ea7SIlya Biryukov   unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
283e7230ea7SIlya Biryukov   // Find the first mapping that produced tokens after \p Expanded.
28478ee2fbfSFangrui Song   auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
28578ee2fbfSFangrui Song     return M.BeginExpanded <= ExpandedIndex;
286e7230ea7SIlya Biryukov   });
287e7230ea7SIlya Biryukov   // Our token could only be produced by the previous mapping.
288e7230ea7SIlya Biryukov   if (It == File.Mappings.begin()) {
289e7230ea7SIlya Biryukov     // No previous mapping, no need to modify offsets.
2901bf055c9SMarcel Hlopko     return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
2911bf055c9SMarcel Hlopko             /*Mapping=*/nullptr};
292e7230ea7SIlya Biryukov   }
293e7230ea7SIlya Biryukov   --It; // 'It' now points to last mapping that started before our token.
294e7230ea7SIlya Biryukov 
295e7230ea7SIlya Biryukov   // Check if the token is part of the mapping.
296e7230ea7SIlya Biryukov   if (ExpandedIndex < It->EndExpanded)
2971bf055c9SMarcel Hlopko     return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
298e7230ea7SIlya Biryukov 
299e7230ea7SIlya Biryukov   // Not part of the mapping, use the index from previous mapping to compute the
300e7230ea7SIlya Biryukov   // corresponding spelled token.
301e7230ea7SIlya Biryukov   return {
302e7230ea7SIlya Biryukov       &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
3031bf055c9SMarcel Hlopko       /*Mapping=*/nullptr};
3041bf055c9SMarcel Hlopko }
3051bf055c9SMarcel Hlopko 
3061bf055c9SMarcel Hlopko const TokenBuffer::Mapping *
mappingStartingBeforeSpelled(const MarkedFile & F,const syntax::Token * Spelled)3071bf055c9SMarcel Hlopko TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
3081bf055c9SMarcel Hlopko                                           const syntax::Token *Spelled) {
3091bf055c9SMarcel Hlopko   assert(F.SpelledTokens.data() <= Spelled);
3101bf055c9SMarcel Hlopko   unsigned SpelledI = Spelled - F.SpelledTokens.data();
3111bf055c9SMarcel Hlopko   assert(SpelledI < F.SpelledTokens.size());
3121bf055c9SMarcel Hlopko 
3131bf055c9SMarcel Hlopko   auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
3141bf055c9SMarcel Hlopko     return M.BeginSpelled <= SpelledI;
3151bf055c9SMarcel Hlopko   });
3161bf055c9SMarcel Hlopko   if (It == F.Mappings.begin())
3171bf055c9SMarcel Hlopko     return nullptr;
3181bf055c9SMarcel Hlopko   --It;
3191bf055c9SMarcel Hlopko   return &*It;
3201bf055c9SMarcel Hlopko }
3211bf055c9SMarcel Hlopko 
3221bf055c9SMarcel Hlopko llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const3231bf055c9SMarcel Hlopko TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
3241bf055c9SMarcel Hlopko   if (Spelled.empty())
3251bf055c9SMarcel Hlopko     return {};
326f0ab336eSSam McCall   const auto &File = fileForSpelled(Spelled);
3271bf055c9SMarcel Hlopko 
3281bf055c9SMarcel Hlopko   auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
3291bf055c9SMarcel Hlopko   unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
3301bf055c9SMarcel Hlopko   assert(SpelledFrontI < File.SpelledTokens.size());
3311bf055c9SMarcel Hlopko   unsigned ExpandedBegin;
3321bf055c9SMarcel Hlopko   if (!FrontMapping) {
3331bf055c9SMarcel Hlopko     // No mapping that starts before the first token of Spelled, we don't have
3341bf055c9SMarcel Hlopko     // to modify offsets.
3351bf055c9SMarcel Hlopko     ExpandedBegin = File.BeginExpanded + SpelledFrontI;
3361bf055c9SMarcel Hlopko   } else if (SpelledFrontI < FrontMapping->EndSpelled) {
3371bf055c9SMarcel Hlopko     // This mapping applies to Spelled tokens.
3381bf055c9SMarcel Hlopko     if (SpelledFrontI != FrontMapping->BeginSpelled) {
3391bf055c9SMarcel Hlopko       // Spelled tokens don't cover the entire mapping, returning empty result.
3401bf055c9SMarcel Hlopko       return {}; // FIXME: support macro arguments.
3411bf055c9SMarcel Hlopko     }
3421bf055c9SMarcel Hlopko     // Spelled tokens start at the beginning of this mapping.
3431bf055c9SMarcel Hlopko     ExpandedBegin = FrontMapping->BeginExpanded;
3441bf055c9SMarcel Hlopko   } else {
3451bf055c9SMarcel Hlopko     // Spelled tokens start after the mapping ends (they start in the hole
3461bf055c9SMarcel Hlopko     // between 2 mappings, or between a mapping and end of the file).
3471bf055c9SMarcel Hlopko     ExpandedBegin =
3481bf055c9SMarcel Hlopko         FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
3491bf055c9SMarcel Hlopko   }
3501bf055c9SMarcel Hlopko 
3511bf055c9SMarcel Hlopko   auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
3521bf055c9SMarcel Hlopko   unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
3531bf055c9SMarcel Hlopko   unsigned ExpandedEnd;
3541bf055c9SMarcel Hlopko   if (!BackMapping) {
3551bf055c9SMarcel Hlopko     // No mapping that starts before the last token of Spelled, we don't have to
3561bf055c9SMarcel Hlopko     // modify offsets.
3571bf055c9SMarcel Hlopko     ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
3581bf055c9SMarcel Hlopko   } else if (SpelledBackI < BackMapping->EndSpelled) {
3591bf055c9SMarcel Hlopko     // This mapping applies to Spelled tokens.
3601bf055c9SMarcel Hlopko     if (SpelledBackI + 1 != BackMapping->EndSpelled) {
3611bf055c9SMarcel Hlopko       // Spelled tokens don't cover the entire mapping, returning empty result.
3621bf055c9SMarcel Hlopko       return {}; // FIXME: support macro arguments.
3631bf055c9SMarcel Hlopko     }
3641bf055c9SMarcel Hlopko     ExpandedEnd = BackMapping->EndExpanded;
3651bf055c9SMarcel Hlopko   } else {
3661bf055c9SMarcel Hlopko     // Spelled tokens end after the mapping ends.
3671bf055c9SMarcel Hlopko     ExpandedEnd =
3681bf055c9SMarcel Hlopko         BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
3691bf055c9SMarcel Hlopko   }
3701bf055c9SMarcel Hlopko 
3711bf055c9SMarcel Hlopko   assert(ExpandedBegin < ExpandedTokens.size());
3721bf055c9SMarcel Hlopko   assert(ExpandedEnd < ExpandedTokens.size());
3731bf055c9SMarcel Hlopko   // Avoid returning empty ranges.
3741bf055c9SMarcel Hlopko   if (ExpandedBegin == ExpandedEnd)
3751bf055c9SMarcel Hlopko     return {};
376a3c248dbSserge-sans-paille   return {llvm::ArrayRef(ExpandedTokens.data() + ExpandedBegin,
3771bf055c9SMarcel Hlopko                          ExpandedTokens.data() + ExpandedEnd)};
378e7230ea7SIlya Biryukov }
379e7230ea7SIlya Biryukov 
spelledTokens(FileID FID) const380e7230ea7SIlya Biryukov llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
381e7230ea7SIlya Biryukov   auto It = Files.find(FID);
382e7230ea7SIlya Biryukov   assert(It != Files.end());
383e7230ea7SIlya Biryukov   return It->second.SpelledTokens;
384e7230ea7SIlya Biryukov }
385e7230ea7SIlya Biryukov 
386*5f1adf04SUtkarsh Saxena const syntax::Token *
spelledTokenContaining(SourceLocation Loc) const387*5f1adf04SUtkarsh Saxena TokenBuffer::spelledTokenContaining(SourceLocation Loc) const {
388cd9b2e18SKadir Cetinkaya   assert(Loc.isFileID());
389cd9b2e18SKadir Cetinkaya   const auto *Tok = llvm::partition_point(
390cd9b2e18SKadir Cetinkaya       spelledTokens(SourceMgr->getFileID(Loc)),
391*5f1adf04SUtkarsh Saxena       [&](const syntax::Token &Tok) { return Tok.endLocation() <= Loc; });
392*5f1adf04SUtkarsh Saxena   if (!Tok || Loc < Tok->location())
393cd9b2e18SKadir Cetinkaya     return nullptr;
394cd9b2e18SKadir Cetinkaya   return Tok;
395cd9b2e18SKadir Cetinkaya }
396cd9b2e18SKadir Cetinkaya 
str() const397e7230ea7SIlya Biryukov std::string TokenBuffer::Mapping::str() const {
398adcd0268SBenjamin Kramer   return std::string(
399adcd0268SBenjamin Kramer       llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
400adcd0268SBenjamin Kramer                     BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
401e7230ea7SIlya Biryukov }
402e7230ea7SIlya Biryukov 
403b0de3630SFangrui Song std::optional<llvm::ArrayRef<syntax::Token>>
spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const404e7230ea7SIlya Biryukov TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
4059d1dada5SNathan Ridge   // In cases of invalid code, AST nodes can have source ranges that include
4069d1dada5SNathan Ridge   // the `eof` token. As there's no spelling for this token, exclude it from
4079d1dada5SNathan Ridge   // the range.
4089d1dada5SNathan Ridge   if (!Expanded.empty() && Expanded.back().kind() == tok::eof) {
4099d1dada5SNathan Ridge     Expanded = Expanded.drop_back();
4109d1dada5SNathan Ridge   }
411e7230ea7SIlya Biryukov   // Mapping an empty range is ambiguous in case of empty mappings at either end
412e7230ea7SIlya Biryukov   // of the range, bail out in that case.
413e7230ea7SIlya Biryukov   if (Expanded.empty())
4145891420eSKazu Hirata     return std::nullopt;
41567268ee1SSam McCall   const syntax::Token *First = &Expanded.front();
41667268ee1SSam McCall   const syntax::Token *Last = &Expanded.back();
41767268ee1SSam McCall   auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
41867268ee1SSam McCall   auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
419e7230ea7SIlya Biryukov 
42067268ee1SSam McCall   FileID FID = SourceMgr->getFileID(FirstSpelled->location());
421e7230ea7SIlya Biryukov   // FIXME: Handle multi-file changes by trying to map onto a common root.
422e7230ea7SIlya Biryukov   if (FID != SourceMgr->getFileID(LastSpelled->location()))
4235891420eSKazu Hirata     return std::nullopt;
424e7230ea7SIlya Biryukov 
425e7230ea7SIlya Biryukov   const MarkedFile &File = Files.find(FID)->second;
426e7230ea7SIlya Biryukov 
42767268ee1SSam McCall   // If the range is within one macro argument, the result may be only part of a
42867268ee1SSam McCall   // Mapping. We must use the general (SourceManager-based) algorithm.
42967268ee1SSam McCall   if (FirstMapping && FirstMapping == LastMapping &&
43067268ee1SSam McCall       SourceMgr->isMacroArgExpansion(First->location()) &&
43167268ee1SSam McCall       SourceMgr->isMacroArgExpansion(Last->location())) {
43267268ee1SSam McCall     // We use excluded Prev/Next token for bounds checking.
43367268ee1SSam McCall     SourceLocation Prev = (First == &ExpandedTokens.front())
43467268ee1SSam McCall                               ? SourceLocation()
43567268ee1SSam McCall                               : (First - 1)->location();
43667268ee1SSam McCall     SourceLocation Next = (Last == &ExpandedTokens.back())
43767268ee1SSam McCall                               ? SourceLocation()
43867268ee1SSam McCall                               : (Last + 1)->location();
43967268ee1SSam McCall     SourceRange Range = spelledForExpandedSlow(
44067268ee1SSam McCall         First->location(), Last->location(), Prev, Next, FID, *SourceMgr);
44167268ee1SSam McCall     if (Range.isInvalid())
4425891420eSKazu Hirata       return std::nullopt;
44367268ee1SSam McCall     return getTokensCovering(File.SpelledTokens, Range, *SourceMgr);
4449619c2ccSKadir Cetinkaya   }
4459619c2ccSKadir Cetinkaya 
44667268ee1SSam McCall   // Otherwise, use the fast version based on Mappings.
4479619c2ccSKadir Cetinkaya   // Do not allow changes that doesn't cover full expansion.
44867268ee1SSam McCall   unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data();
44967268ee1SSam McCall   unsigned LastExpanded = Expanded.end() - ExpandedTokens.data();
45067268ee1SSam McCall   if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded)
4515891420eSKazu Hirata     return std::nullopt;
45267268ee1SSam McCall   if (LastMapping && LastMapping->EndExpanded != LastExpanded)
4535891420eSKazu Hirata     return std::nullopt;
454a3c248dbSserge-sans-paille   return llvm::ArrayRef(
45567268ee1SSam McCall       FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled
45667268ee1SSam McCall                    : FirstSpelled,
457e7230ea7SIlya Biryukov       LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
458e7230ea7SIlya Biryukov                   : LastSpelled + 1);
459e7230ea7SIlya Biryukov }
460e7230ea7SIlya Biryukov 
makeExpansion(const MarkedFile & F,const Mapping & M) const461f0ab336eSSam McCall TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
462f0ab336eSSam McCall                                                   const Mapping &M) const {
463f0ab336eSSam McCall   Expansion E;
464a3c248dbSserge-sans-paille   E.Spelled = llvm::ArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
465f0ab336eSSam McCall                              F.SpelledTokens.data() + M.EndSpelled);
466a3c248dbSserge-sans-paille   E.Expanded = llvm::ArrayRef(ExpandedTokens.data() + M.BeginExpanded,
467f0ab336eSSam McCall                               ExpandedTokens.data() + M.EndExpanded);
468f0ab336eSSam McCall   return E;
469f0ab336eSSam McCall }
470f0ab336eSSam McCall 
471f0ab336eSSam McCall const TokenBuffer::MarkedFile &
fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const472f0ab336eSSam McCall TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
473f0ab336eSSam McCall   assert(!Spelled.empty());
474f0ab336eSSam McCall   assert(Spelled.front().location().isFileID() && "not a spelled token");
475f0ab336eSSam McCall   auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
476f0ab336eSSam McCall   assert(FileIt != Files.end() && "file not tracked by token buffer");
477f0ab336eSSam McCall   const auto &File = FileIt->second;
478f0ab336eSSam McCall   assert(File.SpelledTokens.data() <= Spelled.data() &&
479f0ab336eSSam McCall          Spelled.end() <=
480f0ab336eSSam McCall              (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
481f0ab336eSSam McCall          "Tokens not in spelled range");
482f0ab336eSSam McCall #ifndef NDEBUG
483f0ab336eSSam McCall   auto T1 = Spelled.back().location();
484f0ab336eSSam McCall   auto T2 = File.SpelledTokens.back().location();
485f0ab336eSSam McCall   assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
486f0ab336eSSam McCall #endif
487f0ab336eSSam McCall   return File;
488f0ab336eSSam McCall }
489f0ab336eSSam McCall 
490b0de3630SFangrui Song std::optional<TokenBuffer::Expansion>
expansionStartingAt(const syntax::Token * Spelled) const4915aed309aSIlya Biryukov TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
4925aed309aSIlya Biryukov   assert(Spelled);
493f0ab336eSSam McCall   const auto &File = fileForSpelled(*Spelled);
4945aed309aSIlya Biryukov 
4955aed309aSIlya Biryukov   unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
49678ee2fbfSFangrui Song   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
49778ee2fbfSFangrui Song     return M.BeginSpelled < SpelledIndex;
4985aed309aSIlya Biryukov   });
4995aed309aSIlya Biryukov   if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
5005891420eSKazu Hirata     return std::nullopt;
501f0ab336eSSam McCall   return makeExpansion(File, *M);
5025aed309aSIlya Biryukov }
503f0ab336eSSam McCall 
expansionsOverlapping(llvm::ArrayRef<syntax::Token> Spelled) const504f0ab336eSSam McCall std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
505f0ab336eSSam McCall     llvm::ArrayRef<syntax::Token> Spelled) const {
506f0ab336eSSam McCall   if (Spelled.empty())
507f0ab336eSSam McCall     return {};
508f0ab336eSSam McCall   const auto &File = fileForSpelled(Spelled);
509f0ab336eSSam McCall 
510f0ab336eSSam McCall   // Find the first overlapping range, and then copy until we stop overlapping.
511f0ab336eSSam McCall   unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
512f0ab336eSSam McCall   unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
513f0ab336eSSam McCall   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
514f0ab336eSSam McCall     return M.EndSpelled <= SpelledBeginIndex;
515f0ab336eSSam McCall   });
516f0ab336eSSam McCall   std::vector<TokenBuffer::Expansion> Expansions;
517f0ab336eSSam McCall   for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M)
518f0ab336eSSam McCall     Expansions.push_back(makeExpansion(File, *M));
519f0ab336eSSam McCall   return Expansions;
520f0ab336eSSam McCall }
521f0ab336eSSam McCall 
5223f8da5d0SSam McCall llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)5233f8da5d0SSam McCall syntax::spelledTokensTouching(SourceLocation Loc,
5246bfc45cfSKirill Bobyrev                               llvm::ArrayRef<syntax::Token> Tokens) {
5253f8da5d0SSam McCall   assert(Loc.isFileID());
5266bfc45cfSKirill Bobyrev 
5273f8da5d0SSam McCall   auto *Right = llvm::partition_point(
5286bfc45cfSKirill Bobyrev       Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
5296bfc45cfSKirill Bobyrev   bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
5306bfc45cfSKirill Bobyrev   bool AcceptLeft =
5316bfc45cfSKirill Bobyrev       Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
532a3c248dbSserge-sans-paille   return llvm::ArrayRef(Right - (AcceptLeft ? 1 : 0),
5333f8da5d0SSam McCall                         Right + (AcceptRight ? 1 : 0));
5343f8da5d0SSam McCall }
5353f8da5d0SSam McCall 
5366bfc45cfSKirill Bobyrev llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)5376bfc45cfSKirill Bobyrev syntax::spelledTokensTouching(SourceLocation Loc,
5386bfc45cfSKirill Bobyrev                               const syntax::TokenBuffer &Tokens) {
5396bfc45cfSKirill Bobyrev   return spelledTokensTouching(
5406bfc45cfSKirill Bobyrev       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
5416bfc45cfSKirill Bobyrev }
5426bfc45cfSKirill Bobyrev 
5433f8da5d0SSam McCall const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)5443f8da5d0SSam McCall syntax::spelledIdentifierTouching(SourceLocation Loc,
5456bfc45cfSKirill Bobyrev                                   llvm::ArrayRef<syntax::Token> Tokens) {
5463f8da5d0SSam McCall   for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
5473f8da5d0SSam McCall     if (Tok.kind() == tok::identifier)
5483f8da5d0SSam McCall       return &Tok;
5493f8da5d0SSam McCall   }
5503f8da5d0SSam McCall   return nullptr;
5513f8da5d0SSam McCall }
5523f8da5d0SSam McCall 
5536bfc45cfSKirill Bobyrev const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)5546bfc45cfSKirill Bobyrev syntax::spelledIdentifierTouching(SourceLocation Loc,
5556bfc45cfSKirill Bobyrev                                   const syntax::TokenBuffer &Tokens) {
5566bfc45cfSKirill Bobyrev   return spelledIdentifierTouching(
5576bfc45cfSKirill Bobyrev       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
5586bfc45cfSKirill Bobyrev }
5596bfc45cfSKirill Bobyrev 
5606687fde0SJohan Vikstrom std::vector<const syntax::Token *>
macroExpansions(FileID FID) const5616687fde0SJohan Vikstrom TokenBuffer::macroExpansions(FileID FID) const {
5626687fde0SJohan Vikstrom   auto FileIt = Files.find(FID);
5636687fde0SJohan Vikstrom   assert(FileIt != Files.end() && "file not tracked by token buffer");
5646687fde0SJohan Vikstrom   auto &File = FileIt->second;
5656687fde0SJohan Vikstrom   std::vector<const syntax::Token *> Expansions;
5666687fde0SJohan Vikstrom   auto &Spelled = File.SpelledTokens;
5676687fde0SJohan Vikstrom   for (auto Mapping : File.Mappings) {
5686687fde0SJohan Vikstrom     const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
5696687fde0SJohan Vikstrom     if (Token->kind() == tok::TokenKind::identifier)
5706687fde0SJohan Vikstrom       Expansions.push_back(Token);
5716687fde0SJohan Vikstrom   }
5726687fde0SJohan Vikstrom   return Expansions;
5736687fde0SJohan Vikstrom }
5746687fde0SJohan Vikstrom 
tokenize(const FileRange & FR,const SourceManager & SM,const LangOptions & LO)5758c2cf499SKadir Cetinkaya std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
5768c2cf499SKadir Cetinkaya                                             const SourceManager &SM,
577e7230ea7SIlya Biryukov                                             const LangOptions &LO) {
578e7230ea7SIlya Biryukov   std::vector<syntax::Token> Tokens;
579e7230ea7SIlya Biryukov   IdentifierTable Identifiers(LO);
580e7230ea7SIlya Biryukov   auto AddToken = [&](clang::Token T) {
581e7230ea7SIlya Biryukov     // Fill the proper token kind for keywords, etc.
582e7230ea7SIlya Biryukov     if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
583e7230ea7SIlya Biryukov         !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
584e7230ea7SIlya Biryukov       clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
585e7230ea7SIlya Biryukov       T.setIdentifierInfo(&II);
586e7230ea7SIlya Biryukov       T.setKind(II.getTokenID());
587e7230ea7SIlya Biryukov     }
588e7230ea7SIlya Biryukov     Tokens.push_back(syntax::Token(T));
589e7230ea7SIlya Biryukov   };
590e7230ea7SIlya Biryukov 
5918c2cf499SKadir Cetinkaya   auto SrcBuffer = SM.getBufferData(FR.file());
5928c2cf499SKadir Cetinkaya   Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
5938c2cf499SKadir Cetinkaya           SrcBuffer.data() + FR.beginOffset(),
5948c2cf499SKadir Cetinkaya           // We can't make BufEnd point to FR.endOffset, as Lexer requires a
5958c2cf499SKadir Cetinkaya           // null terminated buffer.
5968c2cf499SKadir Cetinkaya           SrcBuffer.data() + SrcBuffer.size());
597e7230ea7SIlya Biryukov 
598e7230ea7SIlya Biryukov   clang::Token T;
5998c2cf499SKadir Cetinkaya   while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
600e7230ea7SIlya Biryukov     AddToken(T);
6018c2cf499SKadir Cetinkaya   // LexFromRawLexer returns true when it parses the last token of the file, add
6028c2cf499SKadir Cetinkaya   // it iff it starts within the range we are interested in.
6038c2cf499SKadir Cetinkaya   if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
604e7230ea7SIlya Biryukov     AddToken(T);
605e7230ea7SIlya Biryukov   return Tokens;
606e7230ea7SIlya Biryukov }
607e7230ea7SIlya Biryukov 
tokenize(FileID FID,const SourceManager & SM,const LangOptions & LO)6088c2cf499SKadir Cetinkaya std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
6098c2cf499SKadir Cetinkaya                                             const LangOptions &LO) {
6108c2cf499SKadir Cetinkaya   return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
6118c2cf499SKadir Cetinkaya }
6128c2cf499SKadir Cetinkaya 
6135e69f27eSIlya Biryukov /// Records information reqired to construct mappings for the token buffer that
6145e69f27eSIlya Biryukov /// we are collecting.
6155e69f27eSIlya Biryukov class TokenCollector::CollectPPExpansions : public PPCallbacks {
6165e69f27eSIlya Biryukov public:
CollectPPExpansions(TokenCollector & C)6175e69f27eSIlya Biryukov   CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
6185e69f27eSIlya Biryukov 
6195e69f27eSIlya Biryukov   /// Disabled instance will stop reporting anything to TokenCollector.
6205e69f27eSIlya Biryukov   /// This ensures that uses of the preprocessor after TokenCollector::consume()
6215e69f27eSIlya Biryukov   /// is called do not access the (possibly invalid) collector instance.
disable()6225e69f27eSIlya Biryukov   void disable() { Collector = nullptr; }
6235e69f27eSIlya Biryukov 
MacroExpands(const clang::Token & MacroNameTok,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)6245e69f27eSIlya Biryukov   void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
6255e69f27eSIlya Biryukov                     SourceRange Range, const MacroArgs *Args) override {
6265e69f27eSIlya Biryukov     if (!Collector)
6275e69f27eSIlya Biryukov       return;
628d66afd6dSSam McCall     const auto &SM = Collector->PP.getSourceManager();
629d66afd6dSSam McCall     // Only record top-level expansions that directly produce expanded tokens.
630d66afd6dSSam McCall     // This excludes those where:
6315e69f27eSIlya Biryukov     //   - the macro use is inside a macro body,
6325e69f27eSIlya Biryukov     //   - the macro appears in an argument to another macro.
633d66afd6dSSam McCall     // However macro expansion isn't really a tree, it's token rewrite rules,
634d66afd6dSSam McCall     // so there are other cases, e.g.
635d66afd6dSSam McCall     //   #define B(X) X
636d66afd6dSSam McCall     //   #define A 1 + B
637d66afd6dSSam McCall     //   A(2)
638d66afd6dSSam McCall     // Both A and B produce expanded tokens, though the macro name 'B' comes
639d66afd6dSSam McCall     // from an expansion. The best we can do is merge the mappings for both.
640d66afd6dSSam McCall 
641d66afd6dSSam McCall     // The *last* token of any top-level macro expansion must be in a file.
642d66afd6dSSam McCall     // (In the example above, see the closing paren of the expansion of B).
643d66afd6dSSam McCall     if (!Range.getEnd().isFileID())
6445e69f27eSIlya Biryukov       return;
645d66afd6dSSam McCall     // If there's a current expansion that encloses this one, this one can't be
646d66afd6dSSam McCall     // top-level.
647d66afd6dSSam McCall     if (LastExpansionEnd.isValid() &&
648d66afd6dSSam McCall         !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
649d66afd6dSSam McCall       return;
650d66afd6dSSam McCall 
651d66afd6dSSam McCall     // If the macro invocation (B) starts in a macro (A) but ends in a file,
652d66afd6dSSam McCall     // we'll create a merged mapping for A + B by overwriting the endpoint for
653d66afd6dSSam McCall     // A's startpoint.
654d66afd6dSSam McCall     if (!Range.getBegin().isFileID()) {
655d66afd6dSSam McCall       Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
65678194118SMikhail Maltsev       assert(Collector->Expansions.count(Range.getBegin()) &&
657d66afd6dSSam McCall              "Overlapping macros should have same expansion location");
658d66afd6dSSam McCall     }
659d66afd6dSSam McCall 
66078194118SMikhail Maltsev     Collector->Expansions[Range.getBegin()] = Range.getEnd();
6615e69f27eSIlya Biryukov     LastExpansionEnd = Range.getEnd();
6625e69f27eSIlya Biryukov   }
6635e69f27eSIlya Biryukov   // FIXME: handle directives like #pragma, #include, etc.
6645e69f27eSIlya Biryukov private:
6655e69f27eSIlya Biryukov   TokenCollector *Collector;
6665e69f27eSIlya Biryukov   /// Used to detect recursive macro expansions.
6675e69f27eSIlya Biryukov   SourceLocation LastExpansionEnd;
6685e69f27eSIlya Biryukov };
6695e69f27eSIlya Biryukov 
670e7230ea7SIlya Biryukov /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
671e7230ea7SIlya Biryukov /// implementation tracks the tokens, macro expansions and directives coming
672e7230ea7SIlya Biryukov /// from the preprocessor and:
673e7230ea7SIlya Biryukov /// - for each token, figures out if it is a part of an expanded token stream,
674e7230ea7SIlya Biryukov ///   spelled token stream or both. Stores the tokens appropriately.
675e7230ea7SIlya Biryukov /// - records mappings from the spelled to expanded token ranges, e.g. for macro
676e7230ea7SIlya Biryukov ///   expansions.
677e7230ea7SIlya Biryukov /// FIXME: also properly record:
678e7230ea7SIlya Biryukov ///          - #include directives,
679e7230ea7SIlya Biryukov ///          - #pragma, #line and other PP directives,
680e7230ea7SIlya Biryukov ///          - skipped pp regions,
681e7230ea7SIlya Biryukov ///          - ...
682e7230ea7SIlya Biryukov 
TokenCollector(Preprocessor & PP)683e7230ea7SIlya Biryukov TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
684e7230ea7SIlya Biryukov   // Collect the expanded token stream during preprocessing.
685e7230ea7SIlya Biryukov   PP.setTokenWatcher([this](const clang::Token &T) {
686e7230ea7SIlya Biryukov     if (T.isAnnotation())
687e7230ea7SIlya Biryukov       return;
688e7230ea7SIlya Biryukov     DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
689e7230ea7SIlya Biryukov                                           << "Token: "
690e7230ea7SIlya Biryukov                                           << syntax::Token(T).dumpForTests(
691e7230ea7SIlya Biryukov                                                  this->PP.getSourceManager())
692e7230ea7SIlya Biryukov                                           << "\n"
693e7230ea7SIlya Biryukov 
694e7230ea7SIlya Biryukov     );
695e7230ea7SIlya Biryukov     Expanded.push_back(syntax::Token(T));
696e7230ea7SIlya Biryukov   });
6975e69f27eSIlya Biryukov   // And locations of macro calls, to properly recover boundaries of those in
6985e69f27eSIlya Biryukov   // case of empty expansions.
6992b3d49b6SJonas Devlieghere   auto CB = std::make_unique<CollectPPExpansions>(*this);
7005e69f27eSIlya Biryukov   this->Collector = CB.get();
7015e69f27eSIlya Biryukov   PP.addPPCallbacks(std::move(CB));
702e7230ea7SIlya Biryukov }
703e7230ea7SIlya Biryukov 
704e7230ea7SIlya Biryukov /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
705e7230ea7SIlya Biryukov /// token stream.
706e7230ea7SIlya Biryukov class TokenCollector::Builder {
707e7230ea7SIlya Biryukov public:
Builder(std::vector<syntax::Token> Expanded,PPExpansions CollectedExpansions,const SourceManager & SM,const LangOptions & LangOpts)7085e69f27eSIlya Biryukov   Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
7095e69f27eSIlya Biryukov           const SourceManager &SM, const LangOptions &LangOpts)
7105e69f27eSIlya Biryukov       : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
7115e69f27eSIlya Biryukov         LangOpts(LangOpts) {
712e7230ea7SIlya Biryukov     Result.ExpandedTokens = std::move(Expanded);
713e7230ea7SIlya Biryukov   }
714e7230ea7SIlya Biryukov 
build()715e7230ea7SIlya Biryukov   TokenBuffer build() && {
716e7230ea7SIlya Biryukov     assert(!Result.ExpandedTokens.empty());
717e7230ea7SIlya Biryukov     assert(Result.ExpandedTokens.back().kind() == tok::eof);
718ec0b9908SSam McCall 
719ec0b9908SSam McCall     // Tokenize every file that contributed tokens to the expanded stream.
720ec0b9908SSam McCall     buildSpelledTokens();
721ec0b9908SSam McCall 
722ec0b9908SSam McCall     // The expanded token stream consists of runs of tokens that came from
723ec0b9908SSam McCall     // the same source (a macro expansion, part of a file etc).
724ec0b9908SSam McCall     // Between these runs are the logical positions of spelled tokens that
725ec0b9908SSam McCall     // didn't expand to anything.
726ec0b9908SSam McCall     while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
727ec0b9908SSam McCall       // Create empty mappings for spelled tokens that expanded to nothing here.
728ec0b9908SSam McCall       // May advance NextSpelled, but NextExpanded is unchanged.
729ec0b9908SSam McCall       discard();
730ec0b9908SSam McCall       // Create mapping for a contiguous run of expanded tokens.
731ec0b9908SSam McCall       // Advances NextExpanded past the run, and NextSpelled accordingly.
732ec0b9908SSam McCall       unsigned OldPosition = NextExpanded;
733ec0b9908SSam McCall       advance();
734ec0b9908SSam McCall       if (NextExpanded == OldPosition)
735ec0b9908SSam McCall         diagnoseAdvanceFailure();
736e7230ea7SIlya Biryukov     }
737ec0b9908SSam McCall     // If any tokens remain in any of the files, they didn't expand to anything.
738ec0b9908SSam McCall     // Create empty mappings up until the end of the file.
739ec0b9908SSam McCall     for (const auto &File : Result.Files)
740ec0b9908SSam McCall       discard(File.first);
741e7230ea7SIlya Biryukov 
7421bf055c9SMarcel Hlopko #ifndef NDEBUG
7431bf055c9SMarcel Hlopko     for (auto &pair : Result.Files) {
7441bf055c9SMarcel Hlopko       auto &mappings = pair.second.Mappings;
7451647ff6eSGeorgii Rymar       assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
7461647ff6eSGeorgii Rymar                                           const TokenBuffer::Mapping &M2) {
7471bf055c9SMarcel Hlopko         return M1.BeginSpelled < M2.BeginSpelled &&
7481bf055c9SMarcel Hlopko                M1.EndSpelled < M2.EndSpelled &&
7491bf055c9SMarcel Hlopko                M1.BeginExpanded < M2.BeginExpanded &&
7501bf055c9SMarcel Hlopko                M1.EndExpanded < M2.EndExpanded;
7511bf055c9SMarcel Hlopko       }));
7521bf055c9SMarcel Hlopko     }
7531bf055c9SMarcel Hlopko #endif
7541bf055c9SMarcel Hlopko 
755e7230ea7SIlya Biryukov     return std::move(Result);
756e7230ea7SIlya Biryukov   }
757e7230ea7SIlya Biryukov 
758e7230ea7SIlya Biryukov private:
759ec0b9908SSam McCall   // Consume a sequence of spelled tokens that didn't expand to anything.
760ec0b9908SSam McCall   // In the simplest case, skips spelled tokens until finding one that produced
761ec0b9908SSam McCall   // the NextExpanded token, and creates an empty mapping for them.
762ec0b9908SSam McCall   // If Drain is provided, skips remaining tokens from that file instead.
discard(std::optional<FileID> Drain=std::nullopt)7636ad0788cSKazu Hirata   void discard(std::optional<FileID> Drain = std::nullopt) {
764ec0b9908SSam McCall     SourceLocation Target =
765ec0b9908SSam McCall         Drain ? SM.getLocForEndOfFile(*Drain)
766ec0b9908SSam McCall               : SM.getExpansionLoc(
767ec0b9908SSam McCall                     Result.ExpandedTokens[NextExpanded].location());
768ec0b9908SSam McCall     FileID File = SM.getFileID(Target);
769ec0b9908SSam McCall     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
770ec0b9908SSam McCall     auto &NextSpelled = this->NextSpelled[File];
771ec0b9908SSam McCall 
772ec0b9908SSam McCall     TokenBuffer::Mapping Mapping;
773ec0b9908SSam McCall     Mapping.BeginSpelled = NextSpelled;
774ec0b9908SSam McCall     // When dropping trailing tokens from a file, the empty mapping should
775ec0b9908SSam McCall     // be positioned within the file's expanded-token range (at the end).
776ec0b9908SSam McCall     Mapping.BeginExpanded = Mapping.EndExpanded =
777ec0b9908SSam McCall         Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
778ec0b9908SSam McCall     // We may want to split into several adjacent empty mappings.
779ec0b9908SSam McCall     // FlushMapping() emits the current mapping and starts a new one.
780ec0b9908SSam McCall     auto FlushMapping = [&, this] {
781ec0b9908SSam McCall       Mapping.EndSpelled = NextSpelled;
782ec0b9908SSam McCall       if (Mapping.BeginSpelled != Mapping.EndSpelled)
783ec0b9908SSam McCall         Result.Files[File].Mappings.push_back(Mapping);
784ec0b9908SSam McCall       Mapping.BeginSpelled = NextSpelled;
785ec0b9908SSam McCall     };
786ec0b9908SSam McCall 
787ec0b9908SSam McCall     while (NextSpelled < SpelledTokens.size() &&
788ec0b9908SSam McCall            SpelledTokens[NextSpelled].location() < Target) {
789ec0b9908SSam McCall       // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
790ec0b9908SSam McCall       // then we want to partition our (empty) mapping.
791ec0b9908SSam McCall       //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
79278194118SMikhail Maltsev       SourceLocation KnownEnd =
79378194118SMikhail Maltsev           CollectedExpansions.lookup(SpelledTokens[NextSpelled].location());
794ec0b9908SSam McCall       if (KnownEnd.isValid()) {
795ec0b9908SSam McCall         FlushMapping(); // Emits [Start, NextSpelled)
796ec0b9908SSam McCall         while (NextSpelled < SpelledTokens.size() &&
797ec0b9908SSam McCall                SpelledTokens[NextSpelled].location() <= KnownEnd)
798ec0b9908SSam McCall           ++NextSpelled;
799ec0b9908SSam McCall         FlushMapping(); // Emits [NextSpelled, KnownEnd]
8005674a3c8SGabriel Ravier         // Now the loop continues and will emit (KnownEnd, Target).
801ec0b9908SSam McCall       } else {
802ec0b9908SSam McCall         ++NextSpelled;
803e7230ea7SIlya Biryukov       }
804ec0b9908SSam McCall     }
805ec0b9908SSam McCall     FlushMapping();
806ec0b9908SSam McCall   }
807e7230ea7SIlya Biryukov 
808ec0b9908SSam McCall   // Consumes the NextExpanded token and others that are part of the same run.
809ec0b9908SSam McCall   // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
810ec0b9908SSam McCall   // (unless this is a run of file tokens, which we represent with no mapping).
advance()811ec0b9908SSam McCall   void advance() {
812ec0b9908SSam McCall     const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
813ec0b9908SSam McCall     SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
814ec0b9908SSam McCall     FileID File = SM.getFileID(Expansion);
815ec0b9908SSam McCall     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
816ec0b9908SSam McCall     auto &NextSpelled = this->NextSpelled[File];
817e7230ea7SIlya Biryukov 
818ec0b9908SSam McCall     if (Tok.location().isFileID()) {
819ec0b9908SSam McCall       // A run of file tokens continues while the expanded/spelled tokens match.
820ec0b9908SSam McCall       while (NextSpelled < SpelledTokens.size() &&
821ec0b9908SSam McCall              NextExpanded < Result.ExpandedTokens.size() &&
822ec0b9908SSam McCall              SpelledTokens[NextSpelled].location() ==
823ec0b9908SSam McCall                  Result.ExpandedTokens[NextExpanded].location()) {
824ec0b9908SSam McCall         ++NextSpelled;
825ec0b9908SSam McCall         ++NextExpanded;
826ec0b9908SSam McCall       }
827ec0b9908SSam McCall       // We need no mapping for file tokens copied to the expanded stream.
828ec0b9908SSam McCall     } else {
829ec0b9908SSam McCall       // We found a new macro expansion. We should have its spelling bounds.
83078194118SMikhail Maltsev       auto End = CollectedExpansions.lookup(Expansion);
831ec0b9908SSam McCall       assert(End.isValid() && "Macro expansion wasn't captured?");
832ec0b9908SSam McCall 
833ec0b9908SSam McCall       // Mapping starts here...
834ec0b9908SSam McCall       TokenBuffer::Mapping Mapping;
835ec0b9908SSam McCall       Mapping.BeginExpanded = NextExpanded;
836ec0b9908SSam McCall       Mapping.BeginSpelled = NextSpelled;
837ec0b9908SSam McCall       // ... consumes spelled tokens within bounds we captured ...
838ec0b9908SSam McCall       while (NextSpelled < SpelledTokens.size() &&
839ec0b9908SSam McCall              SpelledTokens[NextSpelled].location() <= End)
840ec0b9908SSam McCall         ++NextSpelled;
841ec0b9908SSam McCall       // ... consumes expanded tokens rooted at the same expansion ...
842ec0b9908SSam McCall       while (NextExpanded < Result.ExpandedTokens.size() &&
843ec0b9908SSam McCall              SM.getExpansionLoc(
844ec0b9908SSam McCall                  Result.ExpandedTokens[NextExpanded].location()) == Expansion)
845ec0b9908SSam McCall         ++NextExpanded;
846ec0b9908SSam McCall       // ... and ends here.
847ec0b9908SSam McCall       Mapping.EndExpanded = NextExpanded;
848ec0b9908SSam McCall       Mapping.EndSpelled = NextSpelled;
849ec0b9908SSam McCall       Result.Files[File].Mappings.push_back(Mapping);
850e7230ea7SIlya Biryukov     }
851e7230ea7SIlya Biryukov   }
852e7230ea7SIlya Biryukov 
853ec0b9908SSam McCall   // advance() is supposed to consume at least one token - if not, we crash.
diagnoseAdvanceFailure()854ec0b9908SSam McCall   void diagnoseAdvanceFailure() {
855ec0b9908SSam McCall #ifndef NDEBUG
856ec0b9908SSam McCall     // Show the failed-to-map token in context.
857ec0b9908SSam McCall     for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
858ec0b9908SSam McCall          I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
859ec0b9908SSam McCall       const char *L =
860ec0b9908SSam McCall           (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
861ec0b9908SSam McCall       llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
862e7230ea7SIlya Biryukov     }
863ec0b9908SSam McCall #endif
864ec0b9908SSam McCall     llvm_unreachable("Couldn't map expanded token to spelled tokens!");
865e7230ea7SIlya Biryukov   }
866e7230ea7SIlya Biryukov 
867e7230ea7SIlya Biryukov   /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
868e7230ea7SIlya Biryukov   /// ranges for each of the files.
buildSpelledTokens()869e7230ea7SIlya Biryukov   void buildSpelledTokens() {
870e7230ea7SIlya Biryukov     for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
871ec0b9908SSam McCall       const auto &Tok = Result.ExpandedTokens[I];
872ec0b9908SSam McCall       auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
873e7230ea7SIlya Biryukov       auto It = Result.Files.try_emplace(FID);
874e7230ea7SIlya Biryukov       TokenBuffer::MarkedFile &File = It.first->second;
875e7230ea7SIlya Biryukov 
876ec0b9908SSam McCall       // The eof token should not be considered part of the main-file's range.
877ec0b9908SSam McCall       File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
878ec0b9908SSam McCall 
879e7230ea7SIlya Biryukov       if (!It.second)
880e7230ea7SIlya Biryukov         continue; // we have seen this file before.
881e7230ea7SIlya Biryukov       // This is the first time we see this file.
882e7230ea7SIlya Biryukov       File.BeginExpanded = I;
883e7230ea7SIlya Biryukov       File.SpelledTokens = tokenize(FID, SM, LangOpts);
884e7230ea7SIlya Biryukov     }
885e7230ea7SIlya Biryukov   }
886e7230ea7SIlya Biryukov 
887e7230ea7SIlya Biryukov   TokenBuffer Result;
888ec0b9908SSam McCall   unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
889ec0b9908SSam McCall   llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
8905e69f27eSIlya Biryukov   PPExpansions CollectedExpansions;
891e7230ea7SIlya Biryukov   const SourceManager &SM;
892e7230ea7SIlya Biryukov   const LangOptions &LangOpts;
893e7230ea7SIlya Biryukov };
894e7230ea7SIlya Biryukov 
consume()895e7230ea7SIlya Biryukov TokenBuffer TokenCollector::consume() && {
896e7230ea7SIlya Biryukov   PP.setTokenWatcher(nullptr);
8975e69f27eSIlya Biryukov   Collector->disable();
8985e69f27eSIlya Biryukov   return Builder(std::move(Expanded), std::move(Expansions),
8995e69f27eSIlya Biryukov                  PP.getSourceManager(), PP.getLangOpts())
900e7230ea7SIlya Biryukov       .build();
901e7230ea7SIlya Biryukov }
902e7230ea7SIlya Biryukov 
str() const903e7230ea7SIlya Biryukov std::string syntax::Token::str() const {
904adcd0268SBenjamin Kramer   return std::string(llvm::formatv("Token({0}, length = {1})",
905adcd0268SBenjamin Kramer                                    tok::getTokenName(kind()), length()));
906e7230ea7SIlya Biryukov }
907e7230ea7SIlya Biryukov 
dumpForTests(const SourceManager & SM) const908e7230ea7SIlya Biryukov std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
909cdce2fe5SMarcel Hlopko   return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
910cdce2fe5SMarcel Hlopko                                    tok::getTokenName(kind()), length()));
911e7230ea7SIlya Biryukov }
912e7230ea7SIlya Biryukov 
dumpForTests() const913e7230ea7SIlya Biryukov std::string TokenBuffer::dumpForTests() const {
914e7230ea7SIlya Biryukov   auto PrintToken = [this](const syntax::Token &T) -> std::string {
915e7230ea7SIlya Biryukov     if (T.kind() == tok::eof)
916e7230ea7SIlya Biryukov       return "<eof>";
917adcd0268SBenjamin Kramer     return std::string(T.text(*SourceMgr));
918e7230ea7SIlya Biryukov   };
919e7230ea7SIlya Biryukov 
920e7230ea7SIlya Biryukov   auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
921e7230ea7SIlya Biryukov                                         llvm::ArrayRef<syntax::Token> Tokens) {
92226c066d6SIlya Biryukov     if (Tokens.empty()) {
923e7230ea7SIlya Biryukov       OS << "<empty>";
924e7230ea7SIlya Biryukov       return;
925e7230ea7SIlya Biryukov     }
926e7230ea7SIlya Biryukov     OS << Tokens[0].text(*SourceMgr);
927e7230ea7SIlya Biryukov     for (unsigned I = 1; I < Tokens.size(); ++I) {
928e7230ea7SIlya Biryukov       if (Tokens[I].kind() == tok::eof)
929e7230ea7SIlya Biryukov         continue;
930e7230ea7SIlya Biryukov       OS << " " << PrintToken(Tokens[I]);
931e7230ea7SIlya Biryukov     }
932e7230ea7SIlya Biryukov   };
933e7230ea7SIlya Biryukov 
934e7230ea7SIlya Biryukov   std::string Dump;
935e7230ea7SIlya Biryukov   llvm::raw_string_ostream OS(Dump);
936e7230ea7SIlya Biryukov 
937e7230ea7SIlya Biryukov   OS << "expanded tokens:\n"
938e7230ea7SIlya Biryukov      << "  ";
93926c066d6SIlya Biryukov   // (!) we do not show '<eof>'.
940a3c248dbSserge-sans-paille   DumpTokens(OS, llvm::ArrayRef(ExpandedTokens).drop_back());
941e7230ea7SIlya Biryukov   OS << "\n";
942e7230ea7SIlya Biryukov 
943e7230ea7SIlya Biryukov   std::vector<FileID> Keys;
944e52a8b89SManna, Soumi   for (const auto &F : Files)
945e7230ea7SIlya Biryukov     Keys.push_back(F.first);
946e7230ea7SIlya Biryukov   llvm::sort(Keys);
947e7230ea7SIlya Biryukov 
948e7230ea7SIlya Biryukov   for (FileID ID : Keys) {
949e7230ea7SIlya Biryukov     const MarkedFile &File = Files.find(ID)->second;
950523c4712SJan Svoboda     auto Entry = SourceMgr->getFileEntryRefForID(ID);
951e7230ea7SIlya Biryukov     if (!Entry)
952e7230ea7SIlya Biryukov       continue; // Skip builtin files.
9535523fefbSJan Svoboda     std::string Path = llvm::sys::path::convert_to_slash(Entry->getName());
9545523fefbSJan Svoboda     OS << llvm::formatv("file '{0}'\n", Path) << "  spelled tokens:\n"
955e7230ea7SIlya Biryukov        << "    ";
956e7230ea7SIlya Biryukov     DumpTokens(OS, File.SpelledTokens);
957e7230ea7SIlya Biryukov     OS << "\n";
958e7230ea7SIlya Biryukov 
959e7230ea7SIlya Biryukov     if (File.Mappings.empty()) {
960e7230ea7SIlya Biryukov       OS << "  no mappings.\n";
961e7230ea7SIlya Biryukov       continue;
962e7230ea7SIlya Biryukov     }
963e7230ea7SIlya Biryukov     OS << "  mappings:\n";
964e7230ea7SIlya Biryukov     for (auto &M : File.Mappings) {
965e7230ea7SIlya Biryukov       OS << llvm::formatv(
966e7230ea7SIlya Biryukov           "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
967e7230ea7SIlya Biryukov           PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
968e7230ea7SIlya Biryukov           M.EndSpelled == File.SpelledTokens.size()
969e7230ea7SIlya Biryukov               ? "<eof>"
970e7230ea7SIlya Biryukov               : PrintToken(File.SpelledTokens[M.EndSpelled]),
971e7230ea7SIlya Biryukov           M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
972e7230ea7SIlya Biryukov           M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
973e7230ea7SIlya Biryukov           M.EndExpanded);
974e7230ea7SIlya Biryukov     }
975e7230ea7SIlya Biryukov   }
9765336befeSLogan Smith   return Dump;
977e7230ea7SIlya Biryukov }
978