xref: /llvm-project/clang/unittests/Tooling/Syntax/TokensTest.cpp (revision 9d1dada57741d204f8a95aa2b0c89a7242e101f1)
1 //===- TokensTest.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/VirtualFileSystem.h"
37 #include "llvm/Support/raw_os_ostream.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Testing/Annotations/Annotations.h"
40 #include "llvm/Testing/Support/SupportHelpers.h"
41 #include <cassert>
42 #include <cstdlib>
43 #include <gmock/gmock.h>
44 #include <gtest/gtest.h>
45 #include <memory>
46 #include <optional>
47 #include <ostream>
48 #include <string>
49 
50 using namespace clang;
51 using namespace clang::syntax;
52 
53 using llvm::ValueIs;
54 using ::testing::_;
55 using ::testing::AllOf;
56 using ::testing::Contains;
57 using ::testing::ElementsAre;
58 using ::testing::Field;
59 using ::testing::IsEmpty;
60 using ::testing::Matcher;
61 using ::testing::Not;
62 using ::testing::Pointee;
63 using ::testing::StartsWith;
64 
65 namespace {
66 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
67 // argument.
68 MATCHER_P(SameRange, A, "") {
69   return A.begin() == arg.begin() && A.end() == arg.end();
70 }
71 
72 Matcher<TokenBuffer::Expansion>
73 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
74             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
75   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
76                Field(&TokenBuffer::Expansion::Expanded, Expanded));
77 }
78 // Matchers for syntax::Token.
79 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
80 MATCHER_P2(HasText, Text, SourceMgr, "") {
81   return arg.text(*SourceMgr) == Text;
82 }
83 /// Checks the start and end location of a token are equal to SourceRng.
84 MATCHER_P(RangeIs, SourceRng, "") {
85   return arg.location() == SourceRng.first &&
86          arg.endLocation() == SourceRng.second;
87 }
88 
89 class TokenCollectorTest : public ::testing::Test {
90 public:
91   /// Run the clang frontend, collect the preprocessed tokens from the frontend
92   /// invocation and store them in this->Buffer.
93   /// This also clears SourceManager before running the compiler.
94   void recordTokens(llvm::StringRef Code) {
95     class RecordTokens : public ASTFrontendAction {
96     public:
97       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
98 
99       bool BeginSourceFileAction(CompilerInstance &CI) override {
100         assert(!Collector && "expected only a single call to BeginSourceFile");
101         Collector.emplace(CI.getPreprocessor());
102         return true;
103       }
104       void EndSourceFileAction() override {
105         assert(Collector && "BeginSourceFileAction was never called");
106         Result = std::move(*Collector).consume();
107         Result.indexExpandedTokens();
108       }
109 
110       std::unique_ptr<ASTConsumer>
111       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
112         return std::make_unique<ASTConsumer>();
113       }
114 
115     private:
116       TokenBuffer &Result;
117       std::optional<TokenCollector> Collector;
118     };
119 
120     constexpr const char *FileName = "./input.cpp";
121     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
122     // Prepare to run a compiler.
123     if (!Diags->getClient())
124       Diags->setClient(new IgnoringDiagConsumer);
125     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
126                                       FileName};
127     CreateInvocationOptions CIOpts;
128     CIOpts.Diags = Diags;
129     CIOpts.VFS = FS;
130     auto CI = createInvocation(Args, std::move(CIOpts));
131     assert(CI);
132     CI->getFrontendOpts().DisableFree = false;
133     CI->getPreprocessorOpts().addRemappedFile(
134         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
135     CompilerInstance Compiler;
136     Compiler.setInvocation(std::move(CI));
137     Compiler.setDiagnostics(Diags.get());
138     Compiler.setFileManager(FileMgr.get());
139     Compiler.setSourceManager(SourceMgr.get());
140 
141     this->Buffer = TokenBuffer(*SourceMgr);
142     RecordTokens Recorder(this->Buffer);
143     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
144         << "failed to run the frontend";
145   }
146 
147   /// Record the tokens and return a test dump of the resulting buffer.
148   std::string collectAndDump(llvm::StringRef Code) {
149     recordTokens(Code);
150     return Buffer.dumpForTests();
151   }
152 
153   // Adds a file to the test VFS.
154   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
155     if (!FS->addFile(Path, time_t(),
156                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
157       ADD_FAILURE() << "could not add a file to VFS: " << Path;
158     }
159   }
160 
161   /// Add a new file, run syntax::tokenize() on the range if any, run it on the
162   /// whole file otherwise and return the results.
163   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
164     llvm::Annotations Annot(Text);
165     auto FID = SourceMgr->createFileID(
166         llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
167     // FIXME: pass proper LangOptions.
168     if (Annot.ranges().empty())
169       return syntax::tokenize(FID, *SourceMgr, LangOptions());
170     return syntax::tokenize(
171         syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
172         *SourceMgr, LangOptions());
173   }
174 
175   // Specialized versions of matchers that hide the SourceManager from clients.
176   Matcher<syntax::Token> HasText(std::string Text) const {
177     return ::HasText(Text, SourceMgr.get());
178   }
179   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
180     std::pair<SourceLocation, SourceLocation> Ls;
181     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
182                    .getLocWithOffset(R.Begin);
183     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
184                     .getLocWithOffset(R.End);
185     return ::RangeIs(Ls);
186   }
187 
188   /// Finds a subrange in O(n * m).
189   template <class T, class U, class Eq>
190   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
191                                  llvm::ArrayRef<T> Range, Eq F) {
192     assert(Subrange.size() >= 1);
193     if (Range.size() < Subrange.size())
194       return llvm::ArrayRef(Range.end(), Range.end());
195     for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
196          Begin <= Last; ++Begin) {
197       auto It = Begin;
198       for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
199            ++ItSub, ++It) {
200         if (!F(*ItSub, *It))
201           goto continue_outer;
202       }
203       return llvm::ArrayRef(Begin, It);
204     continue_outer:;
205     }
206     return llvm::ArrayRef(Range.end(), Range.end());
207   }
208 
209   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
210   /// The match should be unique. \p Query is a whitespace-separated list of
211   /// tokens to search for.
212   llvm::ArrayRef<syntax::Token>
213   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
214     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
215     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
216     if (QueryTokens.empty()) {
217       ADD_FAILURE() << "will not look for an empty list of tokens";
218       std::abort();
219     }
220     // An equality test for search.
221     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
222       return Q == T.text(*SourceMgr);
223     };
224     // Find a match.
225     auto Found = findSubrange(llvm::ArrayRef(QueryTokens), Tokens, TextMatches);
226     if (Found.begin() == Tokens.end()) {
227       ADD_FAILURE() << "could not find the subrange for " << Query;
228       std::abort();
229     }
230     // Check that the match is unique.
231     if (findSubrange(llvm::ArrayRef(QueryTokens),
232                      llvm::ArrayRef(Found.end(), Tokens.end()), TextMatches)
233             .begin() != Tokens.end()) {
234       ADD_FAILURE() << "match is not unique for " << Query;
235       std::abort();
236     }
237     return Found;
238   };
239 
240   // Specialized versions of findTokenRange for expanded and spelled tokens.
241   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
242     return findTokenRange(Query, Buffer.expandedTokens());
243   }
244   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
245                                             FileID File = FileID()) {
246     if (!File.isValid())
247       File = SourceMgr->getMainFileID();
248     return findTokenRange(Query, Buffer.spelledTokens(File));
249   }
250 
251   // Data fields.
252   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
253       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
254   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
255       new llvm::vfs::InMemoryFileSystem;
256   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
257       new FileManager(FileSystemOptions(), FS);
258   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
259       new SourceManager(*Diags, *FileMgr);
260   /// Contains last result of calling recordTokens().
261   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
262 };
263 
264 TEST_F(TokenCollectorTest, RawMode) {
265   EXPECT_THAT(tokenize("int main() {}"),
266               ElementsAre(Kind(tok::kw_int),
267                           AllOf(HasText("main"), Kind(tok::identifier)),
268                           Kind(tok::l_paren), Kind(tok::r_paren),
269                           Kind(tok::l_brace), Kind(tok::r_brace)));
270   // Comments are ignored for now.
271   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
272               ElementsAre(Kind(tok::kw_int),
273                           AllOf(HasText("a"), Kind(tok::identifier)),
274                           Kind(tok::semi)));
275   EXPECT_THAT(tokenize("int [[main() {]]}"),
276               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
277                           Kind(tok::l_paren), Kind(tok::r_paren),
278                           Kind(tok::l_brace)));
279   EXPECT_THAT(tokenize("int [[main() {   ]]}"),
280               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
281                           Kind(tok::l_paren), Kind(tok::r_paren),
282                           Kind(tok::l_brace)));
283   // First token is partially parsed, last token is fully included even though
284   // only a part of it is contained in the range.
285   EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
286               ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
287                           Kind(tok::l_paren), Kind(tok::r_paren),
288                           Kind(tok::l_brace), Kind(tok::kw_return)));
289 }
290 
291 TEST_F(TokenCollectorTest, Basic) {
292   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
293       {"int main() {}",
294        R"(expanded tokens:
295   int main ( ) { }
296 file './input.cpp'
297   spelled tokens:
298     int main ( ) { }
299   no mappings.
300 )"},
301       // All kinds of whitespace are ignored.
302       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
303        R"(expanded tokens:
304   int main ( ) { }
305 file './input.cpp'
306   spelled tokens:
307     int main ( ) { }
308   no mappings.
309 )"},
310       // Annotation tokens are ignored.
311       {R"cpp(
312         #pragma GCC visibility push (public)
313         #pragma GCC visibility pop
314       )cpp",
315        R"(expanded tokens:
316   <empty>
317 file './input.cpp'
318   spelled tokens:
319     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
320   mappings:
321     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
322 )"},
323       // Empty files should not crash.
324       {R"cpp()cpp", R"(expanded tokens:
325   <empty>
326 file './input.cpp'
327   spelled tokens:
328     <empty>
329   no mappings.
330 )"},
331       // Should not crash on errors inside '#define' directives. Error is that
332       // stringification (#B) does not refer to a macro parameter.
333       {
334           R"cpp(
335 a
336 #define MACRO() A #B
337 )cpp",
338           R"(expanded tokens:
339   a
340 file './input.cpp'
341   spelled tokens:
342     a # define MACRO ( ) A # B
343   mappings:
344     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
345 )"}};
346   for (auto &Test : TestCases)
347     EXPECT_EQ(collectAndDump(Test.first), Test.second)
348         << collectAndDump(Test.first);
349 }
350 
351 TEST_F(TokenCollectorTest, Locations) {
352   // Check locations of the tokens.
353   llvm::Annotations Code(R"cpp(
354     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
355   )cpp");
356   recordTokens(Code.code());
357   // Check expanded tokens.
358   EXPECT_THAT(
359       Buffer.expandedTokens(),
360       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
361                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
362                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
363                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
364                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
365                   Kind(tok::eof)));
366   // Check spelled tokens.
367   EXPECT_THAT(
368       Buffer.spelledTokens(SourceMgr->getMainFileID()),
369       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
370                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
371                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
372                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
373                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
374 
375   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
376   for (auto &R : Code.ranges()) {
377     EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)),
378                 Pointee(RangeIs(R)));
379   }
380 }
381 
382 TEST_F(TokenCollectorTest, MacroDirectives) {
383   // Macro directives are not stored anywhere at the moment.
384   std::string Code = R"cpp(
385     #define FOO a
386     #include "unresolved_file.h"
387     #undef FOO
388     #ifdef X
389     #else
390     #endif
391     #ifndef Y
392     #endif
393     #if 1
394     #elif 2
395     #else
396     #endif
397     #pragma once
398     #pragma something lalala
399 
400     int a;
401   )cpp";
402   std::string Expected =
403       "expanded tokens:\n"
404       "  int a ;\n"
405       "file './input.cpp'\n"
406       "  spelled tokens:\n"
407       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
408       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
409       "# endif # pragma once # pragma something lalala int a ;\n"
410       "  mappings:\n"
411       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
412   EXPECT_EQ(collectAndDump(Code), Expected);
413 }
414 
415 TEST_F(TokenCollectorTest, MacroReplacements) {
416   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
417       // A simple object-like macro.
418       {R"cpp(
419     #define INT int const
420     INT a;
421   )cpp",
422        R"(expanded tokens:
423   int const a ;
424 file './input.cpp'
425   spelled tokens:
426     # define INT int const INT a ;
427   mappings:
428     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
429     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
430 )"},
431       // A simple function-like macro.
432       {R"cpp(
433     #define INT(a) const int
434     INT(10+10) a;
435   )cpp",
436        R"(expanded tokens:
437   const int a ;
438 file './input.cpp'
439   spelled tokens:
440     # define INT ( a ) const int INT ( 10 + 10 ) a ;
441   mappings:
442     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
443     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
444 )"},
445       // Recursive macro replacements.
446       {R"cpp(
447     #define ID(X) X
448     #define INT int const
449     ID(ID(INT)) a;
450   )cpp",
451        R"(expanded tokens:
452   int const a ;
453 file './input.cpp'
454   spelled tokens:
455     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
456   mappings:
457     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
458     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
459 )"},
460       // A little more complicated recursive macro replacements.
461       {R"cpp(
462     #define ADD(X, Y) X+Y
463     #define MULT(X, Y) X*Y
464 
465     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
466   )cpp",
467        "expanded tokens:\n"
468        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
469        "file './input.cpp'\n"
470        "  spelled tokens:\n"
471        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
472        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
473        "  mappings:\n"
474        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
475        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
476       // Empty macro replacement.
477       // FIXME: the #define directives should not be glued together.
478       {R"cpp(
479     #define EMPTY
480     #define EMPTY_FUNC(X)
481     EMPTY
482     EMPTY_FUNC(1+2+3)
483     )cpp",
484        R"(expanded tokens:
485   <empty>
486 file './input.cpp'
487   spelled tokens:
488     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
489   mappings:
490     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
491     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
492     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
493 )"},
494       // File ends with a macro replacement.
495       {R"cpp(
496     #define FOO 10+10;
497     int a = FOO
498     )cpp",
499        R"(expanded tokens:
500   int a = 10 + 10 ;
501 file './input.cpp'
502   spelled tokens:
503     # define FOO 10 + 10 ; int a = FOO
504   mappings:
505     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
506     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
507 )"},
508       {R"cpp(
509          #define NUM 42
510          #define ID(a) a
511          #define M 1 + ID
512          M(NUM)
513        )cpp",
514        R"(expanded tokens:
515   1 + 42
516 file './input.cpp'
517   spelled tokens:
518     # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
519   mappings:
520     ['#'_0, 'M'_17) => ['1'_0, '1'_0)
521     ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
522 )"},
523   };
524 
525   for (auto &Test : TestCases) {
526     std::string Dump = collectAndDump(Test.first);
527     EXPECT_EQ(Test.second, Dump) << Dump;
528   }
529 }
530 
531 TEST_F(TokenCollectorTest, SpecialTokens) {
532   // Tokens coming from concatenations.
533   recordTokens(R"cpp(
534     #define CONCAT(a, b) a ## b
535     int a = CONCAT(1, 2);
536   )cpp");
537   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
538               Contains(HasText("12")));
539   // Multi-line tokens with slashes at the end.
540   recordTokens("i\\\nn\\\nt");
541   EXPECT_THAT(Buffer.expandedTokens(),
542               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
543                           Kind(tok::eof)));
544   // FIXME: test tokens with digraphs and UCN identifiers.
545 }
546 
547 TEST_F(TokenCollectorTest, LateBoundTokens) {
548   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
549   // but we choose to record them as a single token (for now).
550   llvm::Annotations Code(R"cpp(
551     template <class T>
552     struct foo { int a; };
553     int bar = foo<foo<int$br[[>>]]().a;
554     int baz = 10 $op[[>>]] 2;
555   )cpp");
556   recordTokens(Code.code());
557   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
558               AllOf(Contains(AllOf(Kind(tok::greatergreater),
559                                    RangeIs(Code.range("br")))),
560                     Contains(AllOf(Kind(tok::greatergreater),
561                                    RangeIs(Code.range("op"))))));
562 }
563 
564 TEST_F(TokenCollectorTest, DelayedParsing) {
565   llvm::StringLiteral Code = R"cpp(
566     struct Foo {
567       int method() {
568         // Parser will visit method bodies and initializers multiple times, but
569         // TokenBuffer should only record the first walk over the tokens;
570         return 100;
571       }
572       int a = 10;
573 
574       struct Subclass {
575         void foo() {
576           Foo().method();
577         }
578       };
579     };
580   )cpp";
581   std::string ExpectedTokens =
582       "expanded tokens:\n"
583       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
584       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
585   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
586 }
587 
588 TEST_F(TokenCollectorTest, MultiFile) {
589   addFile("./foo.h", R"cpp(
590     #define ADD(X, Y) X+Y
591     int a = 100;
592     #include "bar.h"
593   )cpp");
594   addFile("./bar.h", R"cpp(
595     int b = ADD(1, 2);
596     #define MULT(X, Y) X*Y
597   )cpp");
598   llvm::StringLiteral Code = R"cpp(
599     #include "foo.h"
600     int c = ADD(1, MULT(2,3));
601   )cpp";
602 
603   std::string Expected = R"(expanded tokens:
604   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
605 file './input.cpp'
606   spelled tokens:
607     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
608   mappings:
609     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
610     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
611 file './foo.h'
612   spelled tokens:
613     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
614   mappings:
615     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
616     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
617 file './bar.h'
618   spelled tokens:
619     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
620   mappings:
621     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
622     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
623 )";
624 
625   EXPECT_EQ(Expected, collectAndDump(Code))
626       << "input: " << Code << "\nresults: " << collectAndDump(Code);
627 }
628 
629 class TokenBufferTest : public TokenCollectorTest {};
630 
631 TEST_F(TokenBufferTest, SpelledByExpanded) {
632   recordTokens(R"cpp(
633     a1 a2 a3 b1 b2
634   )cpp");
635 
636   // Expanded and spelled tokens are stored separately.
637   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
638   // Searching for subranges of expanded tokens should give the corresponding
639   // spelled ones.
640   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
641               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
642   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
643               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
644   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
645               ValueIs(SameRange(findSpelled("b1 b2"))));
646 
647   // Test search on simple macro expansions.
648   recordTokens(R"cpp(
649     #define A a1 a2 a3
650     #define B b1 b2
651 
652     A split B
653   )cpp");
654   // Ranges going across expansion boundaries.
655   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
656               ValueIs(SameRange(findSpelled("A split B"))));
657   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
658               ValueIs(SameRange(findSpelled("A split").drop_back())));
659   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
660               ValueIs(SameRange(findSpelled("split B").drop_front())));
661   // Ranges not fully covering macro invocations should fail.
662   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
663   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), std::nullopt);
664   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
665             std::nullopt);
666 
667   // Recursive macro invocations.
668   recordTokens(R"cpp(
669     #define ID(x) x
670     #define B b1 b2
671 
672     ID(ID(ID(a1) a2 a3)) split ID(B)
673   )cpp");
674 
675   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
676               ValueIs(SameRange(findSpelled("( B").drop_front())));
677   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
678               ValueIs(SameRange(findSpelled(
679                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
680   // Mixed ranges with expanded and spelled tokens.
681   EXPECT_THAT(
682       Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
683       ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
684   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
685               ValueIs(SameRange(findSpelled("split ID ( B )"))));
686   // Macro arguments
687   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
688               ValueIs(SameRange(findSpelled("a1"))));
689   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
690               ValueIs(SameRange(findSpelled("a2"))));
691   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
692               ValueIs(SameRange(findSpelled("a3"))));
693   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
694               ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
695   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
696               ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
697 
698   // Empty macro expansions.
699   recordTokens(R"cpp(
700     #define EMPTY
701     #define ID(X) X
702 
703     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
704     EMPTY EMPTY ID(4 5 6) split2
705     ID(7 8 9) EMPTY EMPTY
706   )cpp");
707   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
708               ValueIs(SameRange(findSpelled("1 2 3"))));
709   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
710               ValueIs(SameRange(findSpelled("4 5 6"))));
711   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
712               ValueIs(SameRange(findSpelled("7 8 9"))));
713 
714   // Empty mappings coming from various directives.
715   recordTokens(R"cpp(
716     #define ID(X) X
717     ID(1)
718     #pragma lalala
719     not_mapped
720   )cpp");
721   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
722               ValueIs(SameRange(findSpelled("not_mapped"))));
723 
724   // Multiple macro arguments
725   recordTokens(R"cpp(
726     #define ID(X) X
727     #define ID2(X, Y) X Y
728 
729     ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
730   )cpp");
731   // Should fail, spans multiple arguments.
732   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
733   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
734               ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
735   EXPECT_THAT(
736       Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
737       ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
738   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
739               ValueIs(SameRange(findSpelled("a5 a6"))));
740   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
741               ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
742   // Should fail, spans multiple invocations.
743   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")),
744             std::nullopt);
745 
746   // https://github.com/clangd/clangd/issues/1289
747   recordTokens(R"cpp(
748     #define FOO(X) foo(X)
749     #define INDIRECT FOO(y)
750     INDIRECT // expands to foo(y)
751   )cpp");
752   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), std::nullopt);
753 
754   recordTokens(R"cpp(
755     #define FOO(X) a X b
756     FOO(y)
757   )cpp");
758   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y")),
759               ValueIs(SameRange(findSpelled("y"))));
760 
761   recordTokens(R"cpp(
762     #define ID(X) X
763     #define BAR ID(1)
764     BAR
765   )cpp");
766   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1")),
767               ValueIs(SameRange(findSpelled(") BAR").drop_front())));
768 
769   // Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
770   recordTokens(R"cpp(
771     #define ID(X) X
772     ID(prev good)
773     ID(prev ID(good2))
774     #define LARGE ID(prev ID(bad))
775     LARGE
776   )cpp");
777   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
778               ValueIs(SameRange(findSpelled("good"))));
779   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
780               ValueIs(SameRange(findSpelled("good2"))));
781   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
782 
783   recordTokens(R"cpp(
784     #define PREV prev
785     #define ID(X) X
786     PREV ID(good)
787     #define LARGE PREV ID(bad)
788     LARGE
789   )cpp");
790   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
791               ValueIs(SameRange(findSpelled("good"))));
792   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
793 
794   recordTokens(R"cpp(
795     #define ID(X) X
796     #define ID2(X, Y) X Y
797     ID2(prev, good)
798     ID2(prev, ID(good2))
799     #define LARGE ID2(prev, bad)
800     LARGE
801   )cpp");
802   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
803               ValueIs(SameRange(findSpelled("good"))));
804   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
805               ValueIs(SameRange(findSpelled("good2"))));
806   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
807 
808   // Prev from macro body.
809   recordTokens(R"cpp(
810     #define ID(X) X
811     #define ID2(X, Y) X prev ID(Y)
812     ID2(not_prev, good)
813   )cpp");
814   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
815               ValueIs(SameRange(findSpelled("good"))));
816   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt);
817 }
818 
819 TEST_F(TokenBufferTest, NoCrashForEofToken) {
820   recordTokens(R"cpp(
821     int main() {
822   )cpp");
823   ASSERT_TRUE(!Buffer.expandedTokens().empty());
824   ASSERT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
825   // Expanded range including `eof` is handled gracefully (`eof` is ignored).
826   EXPECT_THAT(
827       Buffer.spelledForExpanded(Buffer.expandedTokens()),
828       ValueIs(SameRange(Buffer.spelledTokens(SourceMgr->getMainFileID()))));
829 }
830 
831 TEST_F(TokenBufferTest, ExpandedTokensForRange) {
832   recordTokens(R"cpp(
833     #define SIGN(X) X##_washere
834     A SIGN(B) C SIGN(D) E SIGN(F) G
835   )cpp");
836 
837   SourceRange R(findExpanded("C").front().location(),
838                 findExpanded("F_washere").front().location());
839   // Expanded and spelled tokens are stored separately.
840   EXPECT_THAT(Buffer.expandedTokens(R),
841               SameRange(findExpanded("C D_washere E F_washere")));
842   EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
843 }
844 
845 TEST_F(TokenBufferTest, ExpansionsOverlapping) {
846   // Object-like macro expansions.
847   recordTokens(R"cpp(
848     #define FOO 3+4
849     int a = FOO 1;
850     int b = FOO 2;
851   )cpp");
852 
853   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1");
854   EXPECT_THAT(
855       Buffer.expansionStartingAt(Foo1.data()),
856       ValueIs(IsExpansion(SameRange(Foo1.drop_back()),
857                           SameRange(findExpanded("3 + 4 1").drop_back()))));
858   EXPECT_THAT(
859       Buffer.expansionsOverlapping(Foo1),
860       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()),
861                               SameRange(findExpanded("3 + 4 1").drop_back()))));
862 
863   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2");
864   EXPECT_THAT(
865       Buffer.expansionStartingAt(Foo2.data()),
866       ValueIs(IsExpansion(SameRange(Foo2.drop_back()),
867                           SameRange(findExpanded("3 + 4 2").drop_back()))));
868   EXPECT_THAT(
869       Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())),
870       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _),
871                   IsExpansion(SameRange(Foo2.drop_back()), _)));
872 
873   // Function-like macro expansions.
874   recordTokens(R"cpp(
875     #define ID(X) X
876     int a = ID(1+2+3);
877     int b = ID(ID(2+3+4));
878   )cpp");
879 
880   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
881   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
882               ValueIs(IsExpansion(SameRange(ID1),
883                                   SameRange(findExpanded("1 + 2 + 3")))));
884   // Only the first spelled token should be found.
885   for (const auto &T : ID1.drop_front())
886     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
887 
888   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
889   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
890               ValueIs(IsExpansion(SameRange(ID2),
891                                   SameRange(findExpanded("2 + 3 + 4")))));
892   // Only the first spelled token should be found.
893   for (const auto &T : ID2.drop_front())
894     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
895 
896   EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef(
897                   findSpelled("1 + 2").data(), findSpelled("4").data())),
898               ElementsAre(IsExpansion(SameRange(ID1), _),
899                           IsExpansion(SameRange(ID2), _)));
900 
901   // PP directives.
902   recordTokens(R"cpp(
903 #define FOO 1
904 int a = FOO;
905 #pragma once
906 int b = 1;
907   )cpp");
908 
909   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
910   EXPECT_THAT(
911       Buffer.expansionStartingAt(&DefineFoo.front()),
912       ValueIs(IsExpansion(SameRange(DefineFoo),
913                           SameRange(findExpanded("int a").take_front(0)))));
914   // Only the first spelled token should be found.
915   for (const auto &T : DefineFoo.drop_front())
916     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
917 
918   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
919   EXPECT_THAT(
920       Buffer.expansionStartingAt(&PragmaOnce.front()),
921       ValueIs(IsExpansion(SameRange(PragmaOnce),
922                           SameRange(findExpanded("int b").take_front(0)))));
923   // Only the first spelled token should be found.
924   for (const auto &T : PragmaOnce.drop_front())
925     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
926 
927   EXPECT_THAT(
928       Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")),
929       ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _),
930                   IsExpansion(SameRange(PragmaOnce), _)));
931 }
932 
933 TEST_F(TokenBufferTest, TokensToFileRange) {
934   addFile("./foo.h", "token_from_header");
935   llvm::Annotations Code(R"cpp(
936     #define FOO token_from_expansion
937     #include "./foo.h"
938     $all[[$i[[int]] a = FOO;]]
939   )cpp");
940   recordTokens(Code.code());
941 
942   auto &SM = *SourceMgr;
943 
944   // Two simple examples.
945   auto Int = findExpanded("int").front();
946   auto Semi = findExpanded(";").front();
947   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
948                                      Code.range("i").End));
949   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
950             FileRange(SM.getMainFileID(), Code.range("all").Begin,
951                       Code.range("all").End));
952   // We don't test assertion failures because death tests are slow.
953 }
954 
955 TEST_F(TokenBufferTest, MacroExpansions) {
956   llvm::Annotations Code(R"cpp(
957     #define FOO B
958     #define FOO2 BA
959     #define CALL(X) int X
960     #define G CALL(FOO2)
961     int B;
962     $macro[[FOO]];
963     $macro[[CALL]](A);
964     $macro[[G]];
965   )cpp");
966   recordTokens(Code.code());
967   auto &SM = *SourceMgr;
968   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
969   std::vector<FileRange> ExpectedMacroRanges;
970   for (auto Range : Code.ranges("macro"))
971     ExpectedMacroRanges.push_back(
972         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
973   std::vector<FileRange> ActualMacroRanges;
974   for (auto Expansion : Expansions)
975     ActualMacroRanges.push_back(Expansion->range(SM));
976   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
977 }
978 
979 TEST_F(TokenBufferTest, Touching) {
980   llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
981   recordTokens(Code.code());
982 
983   auto Touching = [&](int Index) {
984     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
985                                                    Code.points()[Index]);
986     return spelledTokensTouching(Loc, Buffer);
987   };
988   auto Identifier = [&](int Index) {
989     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
990                                                    Code.points()[Index]);
991     const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
992     return Tok ? Tok->text(*SourceMgr) : "";
993   };
994 
995   EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
996   EXPECT_EQ(Identifier(0), "");
997   EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
998   EXPECT_EQ(Identifier(1), "");
999   EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
1000   EXPECT_EQ(Identifier(2), "");
1001 
1002   EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
1003   EXPECT_EQ(Identifier(3), "ab");
1004   EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
1005   EXPECT_EQ(Identifier(4), "ab");
1006 
1007   EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
1008   EXPECT_EQ(Identifier(5), "ab");
1009 
1010   EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
1011   EXPECT_EQ(Identifier(6), "");
1012 
1013   EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
1014   EXPECT_EQ(Identifier(7), "");
1015 
1016   ASSERT_EQ(Code.points().size(), 8u);
1017 }
1018 
1019 TEST_F(TokenBufferTest, ExpandedBySpelled) {
1020   recordTokens(R"cpp(
1021     a1 a2 a3 b1 b2
1022   )cpp");
1023   // Expanded and spelled tokens are stored separately.
1024   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
1025   // Searching for subranges of expanded tokens should give the corresponding
1026   // spelled ones.
1027   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
1028               ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
1029   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
1030               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1031   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
1032               ElementsAre(SameRange(findExpanded("b1 b2"))));
1033 
1034   // Test search on simple macro expansions.
1035   recordTokens(R"cpp(
1036     #define A a1 a2 a3
1037     #define B b1 b2
1038 
1039     A split B
1040   )cpp");
1041   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
1042               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1043   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
1044               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1045   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
1046               ElementsAre(SameRange(findExpanded("b1 b2"))));
1047 
1048   // Ranges not fully covering macro expansions should fail.
1049   recordTokens(R"cpp(
1050     #define ID(x) x
1051 
1052     ID(a)
1053   )cpp");
1054   // Spelled don't cover entire mapping (missing ID token) -> empty result
1055   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
1056   // Spelled don't cover entire mapping (missing ) token) -> empty result
1057   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
1058 
1059   // Recursive macro invocations.
1060   recordTokens(R"cpp(
1061     #define ID(x) x
1062     #define B b1 b2
1063 
1064     ID(ID(ID(a1) a2 a3)) split ID(B)
1065   )cpp");
1066 
1067   EXPECT_THAT(
1068       Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
1069       ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1070   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
1071               ElementsAre(SameRange(findExpanded("b1 b2"))));
1072   EXPECT_THAT(Buffer.expandedForSpelled(
1073                   findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
1074               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1075   // FIXME: these should succeed, but we do not support macro arguments yet.
1076   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
1077   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
1078               IsEmpty());
1079 
1080   // Empty macro expansions.
1081   recordTokens(R"cpp(
1082     #define EMPTY
1083     #define ID(X) X
1084 
1085     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
1086     EMPTY EMPTY ID(4 5 6) split2
1087     ID(7 8 9) EMPTY EMPTY
1088   )cpp");
1089   // Covered by empty expansions on one of both of the sides.
1090   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
1091               ElementsAre(SameRange(findExpanded("1 2 3"))));
1092   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
1093               ElementsAre(SameRange(findExpanded("4 5 6"))));
1094   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
1095               ElementsAre(SameRange(findExpanded("7 8 9"))));
1096   // Including the empty macro expansions on the side.
1097   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
1098               ElementsAre(SameRange(findExpanded("1 2 3"))));
1099   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
1100               ElementsAre(SameRange(findExpanded("1 2 3"))));
1101   EXPECT_THAT(
1102       Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
1103       ElementsAre(SameRange(findExpanded("1 2 3"))));
1104 
1105   // Empty mappings coming from various directives.
1106   recordTokens(R"cpp(
1107     #define ID(X) X
1108     ID(1)
1109     #pragma lalala
1110     not_mapped
1111   )cpp");
1112   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
1113               IsEmpty());
1114   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
1115               IsEmpty());
1116 
1117   // Empty macro expansion.
1118   recordTokens(R"cpp(
1119     #define EMPTY
1120     EMPTY int a = 100;
1121   )cpp");
1122   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
1123               IsEmpty());
1124 }
1125 
1126 TEST_F(TokenCollectorTest, Pragmas) {
1127   // Tokens coming from concatenations.
1128   recordTokens(R"cpp(
1129     void foo() {
1130       #pragma unroll 4
1131       for(int i=0;i<4;++i);
1132     }
1133   )cpp");
1134 }
1135 } // namespace
1136