xref: /llvm-project/clang/unittests/Tooling/Syntax/TokensTest.cpp (revision 5f1adf0433c6007f8be885b832c852da67e8524c)
1 //===- TokensTest.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/VirtualFileSystem.h"
37 #include "llvm/Support/raw_os_ostream.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Testing/Annotations/Annotations.h"
40 #include "llvm/Testing/Support/SupportHelpers.h"
41 #include <cassert>
42 #include <cstdlib>
43 #include <gmock/gmock.h>
44 #include <gtest/gtest.h>
45 #include <memory>
46 #include <optional>
47 #include <ostream>
48 #include <string>
49 
50 using namespace clang;
51 using namespace clang::syntax;
52 
53 using llvm::ValueIs;
54 using ::testing::_;
55 using ::testing::AllOf;
56 using ::testing::Contains;
57 using ::testing::ElementsAre;
58 using ::testing::Field;
59 using ::testing::IsEmpty;
60 using ::testing::Matcher;
61 using ::testing::Not;
62 using ::testing::Pointee;
63 using ::testing::StartsWith;
64 
65 namespace {
66 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
67 // argument.
68 MATCHER_P(SameRange, A, "") {
69   return A.begin() == arg.begin() && A.end() == arg.end();
70 }
71 
72 Matcher<TokenBuffer::Expansion>
IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,Matcher<llvm::ArrayRef<syntax::Token>> Expanded)73 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
74             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
75   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
76                Field(&TokenBuffer::Expansion::Expanded, Expanded));
77 }
78 // Matchers for syntax::Token.
79 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
80 MATCHER_P2(HasText, Text, SourceMgr, "") {
81   return arg.text(*SourceMgr) == Text;
82 }
83 /// Checks the start and end location of a token are equal to SourceRng.
84 MATCHER_P(RangeIs, SourceRng, "") {
85   return arg.location() == SourceRng.first &&
86          arg.endLocation() == SourceRng.second;
87 }
88 
89 class TokenCollectorTest : public ::testing::Test {
90 public:
91   /// Run the clang frontend, collect the preprocessed tokens from the frontend
92   /// invocation and store them in this->Buffer.
93   /// This also clears SourceManager before running the compiler.
recordTokens(llvm::StringRef Code)94   void recordTokens(llvm::StringRef Code) {
95     class RecordTokens : public ASTFrontendAction {
96     public:
97       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
98 
99       bool BeginSourceFileAction(CompilerInstance &CI) override {
100         assert(!Collector && "expected only a single call to BeginSourceFile");
101         Collector.emplace(CI.getPreprocessor());
102         return true;
103       }
104       void EndSourceFileAction() override {
105         assert(Collector && "BeginSourceFileAction was never called");
106         Result = std::move(*Collector).consume();
107         Result.indexExpandedTokens();
108       }
109 
110       std::unique_ptr<ASTConsumer>
111       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
112         return std::make_unique<ASTConsumer>();
113       }
114 
115     private:
116       TokenBuffer &Result;
117       std::optional<TokenCollector> Collector;
118     };
119 
120     constexpr const char *FileName = "./input.cpp";
121     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
122     // Prepare to run a compiler.
123     if (!Diags->getClient())
124       Diags->setClient(new IgnoringDiagConsumer);
125     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
126                                       FileName};
127     CreateInvocationOptions CIOpts;
128     CIOpts.Diags = Diags;
129     CIOpts.VFS = FS;
130     auto CI = createInvocation(Args, std::move(CIOpts));
131     assert(CI);
132     CI->getFrontendOpts().DisableFree = false;
133     CI->getPreprocessorOpts().addRemappedFile(
134         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
135     CompilerInstance Compiler;
136     Compiler.setInvocation(std::move(CI));
137     Compiler.setDiagnostics(Diags.get());
138     Compiler.setFileManager(FileMgr.get());
139     Compiler.setSourceManager(SourceMgr.get());
140 
141     this->Buffer = TokenBuffer(*SourceMgr);
142     RecordTokens Recorder(this->Buffer);
143     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
144         << "failed to run the frontend";
145   }
146 
147   /// Record the tokens and return a test dump of the resulting buffer.
collectAndDump(llvm::StringRef Code)148   std::string collectAndDump(llvm::StringRef Code) {
149     recordTokens(Code);
150     return Buffer.dumpForTests();
151   }
152 
153   // Adds a file to the test VFS.
addFile(llvm::StringRef Path,llvm::StringRef Contents)154   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
155     if (!FS->addFile(Path, time_t(),
156                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
157       ADD_FAILURE() << "could not add a file to VFS: " << Path;
158     }
159   }
160 
161   /// Add a new file, run syntax::tokenize() on the range if any, run it on the
162   /// whole file otherwise and return the results.
tokenize(llvm::StringRef Text)163   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
164     llvm::Annotations Annot(Text);
165     auto FID = SourceMgr->createFileID(
166         llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
167     // FIXME: pass proper LangOptions.
168     if (Annot.ranges().empty())
169       return syntax::tokenize(FID, *SourceMgr, LangOptions());
170     return syntax::tokenize(
171         syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
172         *SourceMgr, LangOptions());
173   }
174 
175   // Specialized versions of matchers that hide the SourceManager from clients.
HasText(std::string Text) const176   Matcher<syntax::Token> HasText(std::string Text) const {
177     return ::HasText(Text, SourceMgr.get());
178   }
RangeIs(llvm::Annotations::Range R) const179   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
180     std::pair<SourceLocation, SourceLocation> Ls;
181     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
182                    .getLocWithOffset(R.Begin);
183     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
184                     .getLocWithOffset(R.End);
185     return ::RangeIs(Ls);
186   }
187 
188   /// Finds a subrange in O(n * m).
189   template <class T, class U, class Eq>
findSubrange(llvm::ArrayRef<U> Subrange,llvm::ArrayRef<T> Range,Eq F)190   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
191                                  llvm::ArrayRef<T> Range, Eq F) {
192     assert(Subrange.size() >= 1);
193     if (Range.size() < Subrange.size())
194       return llvm::ArrayRef(Range.end(), Range.end());
195     for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
196          Begin <= Last; ++Begin) {
197       auto It = Begin;
198       for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
199            ++ItSub, ++It) {
200         if (!F(*ItSub, *It))
201           goto continue_outer;
202       }
203       return llvm::ArrayRef(Begin, It);
204     continue_outer:;
205     }
206     return llvm::ArrayRef(Range.end(), Range.end());
207   }
208 
209   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
210   /// The match should be unique. \p Query is a whitespace-separated list of
211   /// tokens to search for.
212   llvm::ArrayRef<syntax::Token>
findTokenRange(llvm::StringRef Query,llvm::ArrayRef<syntax::Token> Tokens)213   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
214     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
215     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
216     if (QueryTokens.empty()) {
217       ADD_FAILURE() << "will not look for an empty list of tokens";
218       std::abort();
219     }
220     // An equality test for search.
221     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
222       return Q == T.text(*SourceMgr);
223     };
224     // Find a match.
225     auto Found = findSubrange(llvm::ArrayRef(QueryTokens), Tokens, TextMatches);
226     if (Found.begin() == Tokens.end()) {
227       ADD_FAILURE() << "could not find the subrange for " << Query;
228       std::abort();
229     }
230     // Check that the match is unique.
231     if (findSubrange(llvm::ArrayRef(QueryTokens),
232                      llvm::ArrayRef(Found.end(), Tokens.end()), TextMatches)
233             .begin() != Tokens.end()) {
234       ADD_FAILURE() << "match is not unique for " << Query;
235       std::abort();
236     }
237     return Found;
238   };
239 
240   // Specialized versions of findTokenRange for expanded and spelled tokens.
findExpanded(llvm::StringRef Query)241   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
242     return findTokenRange(Query, Buffer.expandedTokens());
243   }
findSpelled(llvm::StringRef Query,FileID File=FileID ())244   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
245                                             FileID File = FileID()) {
246     if (!File.isValid())
247       File = SourceMgr->getMainFileID();
248     return findTokenRange(Query, Buffer.spelledTokens(File));
249   }
250 
251   // Data fields.
252   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
253       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
254   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
255       new llvm::vfs::InMemoryFileSystem;
256   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
257       new FileManager(FileSystemOptions(), FS);
258   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
259       new SourceManager(*Diags, *FileMgr);
260   /// Contains last result of calling recordTokens().
261   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
262 };
263 
TEST_F(TokenCollectorTest,RawMode)264 TEST_F(TokenCollectorTest, RawMode) {
265   EXPECT_THAT(tokenize("int main() {}"),
266               ElementsAre(Kind(tok::kw_int),
267                           AllOf(HasText("main"), Kind(tok::identifier)),
268                           Kind(tok::l_paren), Kind(tok::r_paren),
269                           Kind(tok::l_brace), Kind(tok::r_brace)));
270   // Comments are ignored for now.
271   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
272               ElementsAre(Kind(tok::kw_int),
273                           AllOf(HasText("a"), Kind(tok::identifier)),
274                           Kind(tok::semi)));
275   EXPECT_THAT(tokenize("int [[main() {]]}"),
276               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
277                           Kind(tok::l_paren), Kind(tok::r_paren),
278                           Kind(tok::l_brace)));
279   EXPECT_THAT(tokenize("int [[main() {   ]]}"),
280               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
281                           Kind(tok::l_paren), Kind(tok::r_paren),
282                           Kind(tok::l_brace)));
283   // First token is partially parsed, last token is fully included even though
284   // only a part of it is contained in the range.
285   EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
286               ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
287                           Kind(tok::l_paren), Kind(tok::r_paren),
288                           Kind(tok::l_brace), Kind(tok::kw_return)));
289 }
290 
TEST_F(TokenCollectorTest,Basic)291 TEST_F(TokenCollectorTest, Basic) {
292   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
293       {"int main() {}",
294        R"(expanded tokens:
295   int main ( ) { }
296 file './input.cpp'
297   spelled tokens:
298     int main ( ) { }
299   no mappings.
300 )"},
301       // All kinds of whitespace are ignored.
302       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
303        R"(expanded tokens:
304   int main ( ) { }
305 file './input.cpp'
306   spelled tokens:
307     int main ( ) { }
308   no mappings.
309 )"},
310       // Annotation tokens are ignored.
311       {R"cpp(
312         #pragma GCC visibility push (public)
313         #pragma GCC visibility pop
314       )cpp",
315        R"(expanded tokens:
316   <empty>
317 file './input.cpp'
318   spelled tokens:
319     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
320   mappings:
321     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
322 )"},
323       // Empty files should not crash.
324       {R"cpp()cpp", R"(expanded tokens:
325   <empty>
326 file './input.cpp'
327   spelled tokens:
328     <empty>
329   no mappings.
330 )"},
331       // Should not crash on errors inside '#define' directives. Error is that
332       // stringification (#B) does not refer to a macro parameter.
333       {
334           R"cpp(
335 a
336 #define MACRO() A #B
337 )cpp",
338           R"(expanded tokens:
339   a
340 file './input.cpp'
341   spelled tokens:
342     a # define MACRO ( ) A # B
343   mappings:
344     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
345 )"}};
346   for (auto &Test : TestCases)
347     EXPECT_EQ(collectAndDump(Test.first), Test.second)
348         << collectAndDump(Test.first);
349 }
350 
TEST_F(TokenCollectorTest,Locations)351 TEST_F(TokenCollectorTest, Locations) {
352   // Check locations of the tokens.
353   llvm::Annotations Code(R"cpp(
354     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
355   )cpp");
356   recordTokens(Code.code());
357   // Check expanded tokens.
358   EXPECT_THAT(
359       Buffer.expandedTokens(),
360       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
361                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
362                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
363                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
364                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
365                   Kind(tok::eof)));
366   // Check spelled tokens.
367   EXPECT_THAT(
368       Buffer.spelledTokens(SourceMgr->getMainFileID()),
369       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
370                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
371                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
372                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
373                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
374 
375   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
376   for (auto &R : Code.ranges()) {
377     EXPECT_THAT(
378         Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(R.Begin)),
379         Pointee(RangeIs(R)));
380   }
381 }
382 
TEST_F(TokenCollectorTest,LocationInMiddleOfSpelledToken)383 TEST_F(TokenCollectorTest, LocationInMiddleOfSpelledToken) {
384   llvm::Annotations Code(R"cpp(
385     int foo = [[baa^aar]];
386   )cpp");
387   recordTokens(Code.code());
388   // Check spelled tokens.
389   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
390   EXPECT_THAT(
391       Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(Code.point())),
392       Pointee(RangeIs(Code.range())));
393 }
394 
TEST_F(TokenCollectorTest,MacroDirectives)395 TEST_F(TokenCollectorTest, MacroDirectives) {
396   // Macro directives are not stored anywhere at the moment.
397   std::string Code = R"cpp(
398     #define FOO a
399     #include "unresolved_file.h"
400     #undef FOO
401     #ifdef X
402     #else
403     #endif
404     #ifndef Y
405     #endif
406     #if 1
407     #elif 2
408     #else
409     #endif
410     #pragma once
411     #pragma something lalala
412 
413     int a;
414   )cpp";
415   std::string Expected =
416       "expanded tokens:\n"
417       "  int a ;\n"
418       "file './input.cpp'\n"
419       "  spelled tokens:\n"
420       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
421       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
422       "# endif # pragma once # pragma something lalala int a ;\n"
423       "  mappings:\n"
424       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
425   EXPECT_EQ(collectAndDump(Code), Expected);
426 }
427 
TEST_F(TokenCollectorTest,MacroReplacements)428 TEST_F(TokenCollectorTest, MacroReplacements) {
429   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
430       // A simple object-like macro.
431       {R"cpp(
432     #define INT int const
433     INT a;
434   )cpp",
435        R"(expanded tokens:
436   int const a ;
437 file './input.cpp'
438   spelled tokens:
439     # define INT int const INT a ;
440   mappings:
441     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
442     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
443 )"},
444       // A simple function-like macro.
445       {R"cpp(
446     #define INT(a) const int
447     INT(10+10) a;
448   )cpp",
449        R"(expanded tokens:
450   const int a ;
451 file './input.cpp'
452   spelled tokens:
453     # define INT ( a ) const int INT ( 10 + 10 ) a ;
454   mappings:
455     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
456     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
457 )"},
458       // Recursive macro replacements.
459       {R"cpp(
460     #define ID(X) X
461     #define INT int const
462     ID(ID(INT)) a;
463   )cpp",
464        R"(expanded tokens:
465   int const a ;
466 file './input.cpp'
467   spelled tokens:
468     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
469   mappings:
470     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
471     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
472 )"},
473       // A little more complicated recursive macro replacements.
474       {R"cpp(
475     #define ADD(X, Y) X+Y
476     #define MULT(X, Y) X*Y
477 
478     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
479   )cpp",
480        "expanded tokens:\n"
481        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
482        "file './input.cpp'\n"
483        "  spelled tokens:\n"
484        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
485        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
486        "  mappings:\n"
487        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
488        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
489       // Empty macro replacement.
490       // FIXME: the #define directives should not be glued together.
491       {R"cpp(
492     #define EMPTY
493     #define EMPTY_FUNC(X)
494     EMPTY
495     EMPTY_FUNC(1+2+3)
496     )cpp",
497        R"(expanded tokens:
498   <empty>
499 file './input.cpp'
500   spelled tokens:
501     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
502   mappings:
503     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
504     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
505     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
506 )"},
507       // File ends with a macro replacement.
508       {R"cpp(
509     #define FOO 10+10;
510     int a = FOO
511     )cpp",
512        R"(expanded tokens:
513   int a = 10 + 10 ;
514 file './input.cpp'
515   spelled tokens:
516     # define FOO 10 + 10 ; int a = FOO
517   mappings:
518     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
519     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
520 )"},
521       {R"cpp(
522          #define NUM 42
523          #define ID(a) a
524          #define M 1 + ID
525          M(NUM)
526        )cpp",
527        R"(expanded tokens:
528   1 + 42
529 file './input.cpp'
530   spelled tokens:
531     # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
532   mappings:
533     ['#'_0, 'M'_17) => ['1'_0, '1'_0)
534     ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
535 )"},
536   };
537 
538   for (auto &Test : TestCases) {
539     std::string Dump = collectAndDump(Test.first);
540     EXPECT_EQ(Test.second, Dump) << Dump;
541   }
542 }
543 
TEST_F(TokenCollectorTest,SpecialTokens)544 TEST_F(TokenCollectorTest, SpecialTokens) {
545   // Tokens coming from concatenations.
546   recordTokens(R"cpp(
547     #define CONCAT(a, b) a ## b
548     int a = CONCAT(1, 2);
549   )cpp");
550   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
551               Contains(HasText("12")));
552   // Multi-line tokens with slashes at the end.
553   recordTokens("i\\\nn\\\nt");
554   EXPECT_THAT(Buffer.expandedTokens(),
555               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
556                           Kind(tok::eof)));
557   // FIXME: test tokens with digraphs and UCN identifiers.
558 }
559 
TEST_F(TokenCollectorTest,LateBoundTokens)560 TEST_F(TokenCollectorTest, LateBoundTokens) {
561   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
562   // but we choose to record them as a single token (for now).
563   llvm::Annotations Code(R"cpp(
564     template <class T>
565     struct foo { int a; };
566     int bar = foo<foo<int$br[[>>]]().a;
567     int baz = 10 $op[[>>]] 2;
568   )cpp");
569   recordTokens(Code.code());
570   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
571               AllOf(Contains(AllOf(Kind(tok::greatergreater),
572                                    RangeIs(Code.range("br")))),
573                     Contains(AllOf(Kind(tok::greatergreater),
574                                    RangeIs(Code.range("op"))))));
575 }
576 
TEST_F(TokenCollectorTest,DelayedParsing)577 TEST_F(TokenCollectorTest, DelayedParsing) {
578   llvm::StringLiteral Code = R"cpp(
579     struct Foo {
580       int method() {
581         // Parser will visit method bodies and initializers multiple times, but
582         // TokenBuffer should only record the first walk over the tokens;
583         return 100;
584       }
585       int a = 10;
586 
587       struct Subclass {
588         void foo() {
589           Foo().method();
590         }
591       };
592     };
593   )cpp";
594   std::string ExpectedTokens =
595       "expanded tokens:\n"
596       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
597       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
598   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
599 }
600 
TEST_F(TokenCollectorTest,MultiFile)601 TEST_F(TokenCollectorTest, MultiFile) {
602   addFile("./foo.h", R"cpp(
603     #define ADD(X, Y) X+Y
604     int a = 100;
605     #include "bar.h"
606   )cpp");
607   addFile("./bar.h", R"cpp(
608     int b = ADD(1, 2);
609     #define MULT(X, Y) X*Y
610   )cpp");
611   llvm::StringLiteral Code = R"cpp(
612     #include "foo.h"
613     int c = ADD(1, MULT(2,3));
614   )cpp";
615 
616   std::string Expected = R"(expanded tokens:
617   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
618 file './input.cpp'
619   spelled tokens:
620     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
621   mappings:
622     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
623     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
624 file './foo.h'
625   spelled tokens:
626     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
627   mappings:
628     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
629     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
630 file './bar.h'
631   spelled tokens:
632     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
633   mappings:
634     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
635     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
636 )";
637 
638   EXPECT_EQ(Expected, collectAndDump(Code))
639       << "input: " << Code << "\nresults: " << collectAndDump(Code);
640 }
641 
642 class TokenBufferTest : public TokenCollectorTest {};
643 
TEST_F(TokenBufferTest,SpelledByExpanded)644 TEST_F(TokenBufferTest, SpelledByExpanded) {
645   recordTokens(R"cpp(
646     a1 a2 a3 b1 b2
647   )cpp");
648 
649   // Expanded and spelled tokens are stored separately.
650   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
651   // Searching for subranges of expanded tokens should give the corresponding
652   // spelled ones.
653   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
654               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
655   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
656               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
657   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
658               ValueIs(SameRange(findSpelled("b1 b2"))));
659 
660   // Test search on simple macro expansions.
661   recordTokens(R"cpp(
662     #define A a1 a2 a3
663     #define B b1 b2
664 
665     A split B
666   )cpp");
667   // Ranges going across expansion boundaries.
668   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
669               ValueIs(SameRange(findSpelled("A split B"))));
670   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
671               ValueIs(SameRange(findSpelled("A split").drop_back())));
672   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
673               ValueIs(SameRange(findSpelled("split B").drop_front())));
674   // Ranges not fully covering macro invocations should fail.
675   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
676   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), std::nullopt);
677   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
678             std::nullopt);
679 
680   // Recursive macro invocations.
681   recordTokens(R"cpp(
682     #define ID(x) x
683     #define B b1 b2
684 
685     ID(ID(ID(a1) a2 a3)) split ID(B)
686   )cpp");
687 
688   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
689               ValueIs(SameRange(findSpelled("( B").drop_front())));
690   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
691               ValueIs(SameRange(findSpelled(
692                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
693   // Mixed ranges with expanded and spelled tokens.
694   EXPECT_THAT(
695       Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
696       ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
697   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
698               ValueIs(SameRange(findSpelled("split ID ( B )"))));
699   // Macro arguments
700   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
701               ValueIs(SameRange(findSpelled("a1"))));
702   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
703               ValueIs(SameRange(findSpelled("a2"))));
704   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
705               ValueIs(SameRange(findSpelled("a3"))));
706   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
707               ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
708   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
709               ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
710 
711   // Empty macro expansions.
712   recordTokens(R"cpp(
713     #define EMPTY
714     #define ID(X) X
715 
716     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
717     EMPTY EMPTY ID(4 5 6) split2
718     ID(7 8 9) EMPTY EMPTY
719   )cpp");
720   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
721               ValueIs(SameRange(findSpelled("1 2 3"))));
722   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
723               ValueIs(SameRange(findSpelled("4 5 6"))));
724   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
725               ValueIs(SameRange(findSpelled("7 8 9"))));
726 
727   // Empty mappings coming from various directives.
728   recordTokens(R"cpp(
729     #define ID(X) X
730     ID(1)
731     #pragma lalala
732     not_mapped
733   )cpp");
734   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
735               ValueIs(SameRange(findSpelled("not_mapped"))));
736 
737   // Multiple macro arguments
738   recordTokens(R"cpp(
739     #define ID(X) X
740     #define ID2(X, Y) X Y
741 
742     ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
743   )cpp");
744   // Should fail, spans multiple arguments.
745   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
746   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
747               ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
748   EXPECT_THAT(
749       Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
750       ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
751   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
752               ValueIs(SameRange(findSpelled("a5 a6"))));
753   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
754               ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
755   // Should fail, spans multiple invocations.
756   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")),
757             std::nullopt);
758 
759   // https://github.com/clangd/clangd/issues/1289
760   recordTokens(R"cpp(
761     #define FOO(X) foo(X)
762     #define INDIRECT FOO(y)
763     INDIRECT // expands to foo(y)
764   )cpp");
765   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), std::nullopt);
766 
767   recordTokens(R"cpp(
768     #define FOO(X) a X b
769     FOO(y)
770   )cpp");
771   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y")),
772               ValueIs(SameRange(findSpelled("y"))));
773 
774   recordTokens(R"cpp(
775     #define ID(X) X
776     #define BAR ID(1)
777     BAR
778   )cpp");
779   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1")),
780               ValueIs(SameRange(findSpelled(") BAR").drop_front())));
781 
782   // Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
783   recordTokens(R"cpp(
784     #define ID(X) X
785     ID(prev good)
786     ID(prev ID(good2))
787     #define LARGE ID(prev ID(bad))
788     LARGE
789   )cpp");
790   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
791               ValueIs(SameRange(findSpelled("good"))));
792   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
793               ValueIs(SameRange(findSpelled("good2"))));
794   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
795 
796   recordTokens(R"cpp(
797     #define PREV prev
798     #define ID(X) X
799     PREV ID(good)
800     #define LARGE PREV ID(bad)
801     LARGE
802   )cpp");
803   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
804               ValueIs(SameRange(findSpelled("good"))));
805   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
806 
807   recordTokens(R"cpp(
808     #define ID(X) X
809     #define ID2(X, Y) X Y
810     ID2(prev, good)
811     ID2(prev, ID(good2))
812     #define LARGE ID2(prev, bad)
813     LARGE
814   )cpp");
815   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
816               ValueIs(SameRange(findSpelled("good"))));
817   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
818               ValueIs(SameRange(findSpelled("good2"))));
819   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
820 
821   // Prev from macro body.
822   recordTokens(R"cpp(
823     #define ID(X) X
824     #define ID2(X, Y) X prev ID(Y)
825     ID2(not_prev, good)
826   )cpp");
827   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
828               ValueIs(SameRange(findSpelled("good"))));
829   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt);
830 }
831 
TEST_F(TokenBufferTest,NoCrashForEofToken)832 TEST_F(TokenBufferTest, NoCrashForEofToken) {
833   recordTokens(R"cpp(
834     int main() {
835   )cpp");
836   ASSERT_TRUE(!Buffer.expandedTokens().empty());
837   ASSERT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
838   // Expanded range including `eof` is handled gracefully (`eof` is ignored).
839   EXPECT_THAT(
840       Buffer.spelledForExpanded(Buffer.expandedTokens()),
841       ValueIs(SameRange(Buffer.spelledTokens(SourceMgr->getMainFileID()))));
842 }
843 
TEST_F(TokenBufferTest,ExpandedTokensForRange)844 TEST_F(TokenBufferTest, ExpandedTokensForRange) {
845   recordTokens(R"cpp(
846     #define SIGN(X) X##_washere
847     A SIGN(B) C SIGN(D) E SIGN(F) G
848   )cpp");
849 
850   SourceRange R(findExpanded("C").front().location(),
851                 findExpanded("F_washere").front().location());
852   // Expanded and spelled tokens are stored separately.
853   EXPECT_THAT(Buffer.expandedTokens(R),
854               SameRange(findExpanded("C D_washere E F_washere")));
855   EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
856 }
857 
TEST_F(TokenBufferTest,ExpansionsOverlapping)858 TEST_F(TokenBufferTest, ExpansionsOverlapping) {
859   // Object-like macro expansions.
860   recordTokens(R"cpp(
861     #define FOO 3+4
862     int a = FOO 1;
863     int b = FOO 2;
864   )cpp");
865 
866   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1");
867   EXPECT_THAT(
868       Buffer.expansionStartingAt(Foo1.data()),
869       ValueIs(IsExpansion(SameRange(Foo1.drop_back()),
870                           SameRange(findExpanded("3 + 4 1").drop_back()))));
871   EXPECT_THAT(
872       Buffer.expansionsOverlapping(Foo1),
873       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()),
874                               SameRange(findExpanded("3 + 4 1").drop_back()))));
875 
876   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2");
877   EXPECT_THAT(
878       Buffer.expansionStartingAt(Foo2.data()),
879       ValueIs(IsExpansion(SameRange(Foo2.drop_back()),
880                           SameRange(findExpanded("3 + 4 2").drop_back()))));
881   EXPECT_THAT(
882       Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())),
883       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _),
884                   IsExpansion(SameRange(Foo2.drop_back()), _)));
885 
886   // Function-like macro expansions.
887   recordTokens(R"cpp(
888     #define ID(X) X
889     int a = ID(1+2+3);
890     int b = ID(ID(2+3+4));
891   )cpp");
892 
893   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
894   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
895               ValueIs(IsExpansion(SameRange(ID1),
896                                   SameRange(findExpanded("1 + 2 + 3")))));
897   // Only the first spelled token should be found.
898   for (const auto &T : ID1.drop_front())
899     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
900 
901   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
902   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
903               ValueIs(IsExpansion(SameRange(ID2),
904                                   SameRange(findExpanded("2 + 3 + 4")))));
905   // Only the first spelled token should be found.
906   for (const auto &T : ID2.drop_front())
907     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
908 
909   EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef(
910                   findSpelled("1 + 2").data(), findSpelled("4").data())),
911               ElementsAre(IsExpansion(SameRange(ID1), _),
912                           IsExpansion(SameRange(ID2), _)));
913 
914   // PP directives.
915   recordTokens(R"cpp(
916 #define FOO 1
917 int a = FOO;
918 #pragma once
919 int b = 1;
920   )cpp");
921 
922   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
923   EXPECT_THAT(
924       Buffer.expansionStartingAt(&DefineFoo.front()),
925       ValueIs(IsExpansion(SameRange(DefineFoo),
926                           SameRange(findExpanded("int a").take_front(0)))));
927   // Only the first spelled token should be found.
928   for (const auto &T : DefineFoo.drop_front())
929     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
930 
931   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
932   EXPECT_THAT(
933       Buffer.expansionStartingAt(&PragmaOnce.front()),
934       ValueIs(IsExpansion(SameRange(PragmaOnce),
935                           SameRange(findExpanded("int b").take_front(0)))));
936   // Only the first spelled token should be found.
937   for (const auto &T : PragmaOnce.drop_front())
938     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
939 
940   EXPECT_THAT(
941       Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")),
942       ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _),
943                   IsExpansion(SameRange(PragmaOnce), _)));
944 }
945 
TEST_F(TokenBufferTest,TokensToFileRange)946 TEST_F(TokenBufferTest, TokensToFileRange) {
947   addFile("./foo.h", "token_from_header");
948   llvm::Annotations Code(R"cpp(
949     #define FOO token_from_expansion
950     #include "./foo.h"
951     $all[[$i[[int]] a = FOO;]]
952   )cpp");
953   recordTokens(Code.code());
954 
955   auto &SM = *SourceMgr;
956 
957   // Two simple examples.
958   auto Int = findExpanded("int").front();
959   auto Semi = findExpanded(";").front();
960   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
961                                      Code.range("i").End));
962   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
963             FileRange(SM.getMainFileID(), Code.range("all").Begin,
964                       Code.range("all").End));
965   // We don't test assertion failures because death tests are slow.
966 }
967 
TEST_F(TokenBufferTest,MacroExpansions)968 TEST_F(TokenBufferTest, MacroExpansions) {
969   llvm::Annotations Code(R"cpp(
970     #define FOO B
971     #define FOO2 BA
972     #define CALL(X) int X
973     #define G CALL(FOO2)
974     int B;
975     $macro[[FOO]];
976     $macro[[CALL]](A);
977     $macro[[G]];
978   )cpp");
979   recordTokens(Code.code());
980   auto &SM = *SourceMgr;
981   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
982   std::vector<FileRange> ExpectedMacroRanges;
983   for (auto Range : Code.ranges("macro"))
984     ExpectedMacroRanges.push_back(
985         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
986   std::vector<FileRange> ActualMacroRanges;
987   for (auto Expansion : Expansions)
988     ActualMacroRanges.push_back(Expansion->range(SM));
989   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
990 }
991 
TEST_F(TokenBufferTest,Touching)992 TEST_F(TokenBufferTest, Touching) {
993   llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
994   recordTokens(Code.code());
995 
996   auto Touching = [&](int Index) {
997     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
998                                                    Code.points()[Index]);
999     return spelledTokensTouching(Loc, Buffer);
1000   };
1001   auto Identifier = [&](int Index) {
1002     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
1003                                                    Code.points()[Index]);
1004     const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
1005     return Tok ? Tok->text(*SourceMgr) : "";
1006   };
1007 
1008   EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
1009   EXPECT_EQ(Identifier(0), "");
1010   EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
1011   EXPECT_EQ(Identifier(1), "");
1012   EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
1013   EXPECT_EQ(Identifier(2), "");
1014 
1015   EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
1016   EXPECT_EQ(Identifier(3), "ab");
1017   EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
1018   EXPECT_EQ(Identifier(4), "ab");
1019 
1020   EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
1021   EXPECT_EQ(Identifier(5), "ab");
1022 
1023   EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
1024   EXPECT_EQ(Identifier(6), "");
1025 
1026   EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
1027   EXPECT_EQ(Identifier(7), "");
1028 
1029   ASSERT_EQ(Code.points().size(), 8u);
1030 }
1031 
TEST_F(TokenBufferTest,ExpandedBySpelled)1032 TEST_F(TokenBufferTest, ExpandedBySpelled) {
1033   recordTokens(R"cpp(
1034     a1 a2 a3 b1 b2
1035   )cpp");
1036   // Expanded and spelled tokens are stored separately.
1037   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
1038   // Searching for subranges of expanded tokens should give the corresponding
1039   // spelled ones.
1040   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
1041               ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
1042   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
1043               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1044   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
1045               ElementsAre(SameRange(findExpanded("b1 b2"))));
1046 
1047   // Test search on simple macro expansions.
1048   recordTokens(R"cpp(
1049     #define A a1 a2 a3
1050     #define B b1 b2
1051 
1052     A split B
1053   )cpp");
1054   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
1055               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1056   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
1057               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1058   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
1059               ElementsAre(SameRange(findExpanded("b1 b2"))));
1060 
1061   // Ranges not fully covering macro expansions should fail.
1062   recordTokens(R"cpp(
1063     #define ID(x) x
1064 
1065     ID(a)
1066   )cpp");
1067   // Spelled don't cover entire mapping (missing ID token) -> empty result
1068   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
1069   // Spelled don't cover entire mapping (missing ) token) -> empty result
1070   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
1071 
1072   // Recursive macro invocations.
1073   recordTokens(R"cpp(
1074     #define ID(x) x
1075     #define B b1 b2
1076 
1077     ID(ID(ID(a1) a2 a3)) split ID(B)
1078   )cpp");
1079 
1080   EXPECT_THAT(
1081       Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
1082       ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1083   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
1084               ElementsAre(SameRange(findExpanded("b1 b2"))));
1085   EXPECT_THAT(Buffer.expandedForSpelled(
1086                   findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
1087               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1088   // FIXME: these should succeed, but we do not support macro arguments yet.
1089   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
1090   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
1091               IsEmpty());
1092 
1093   // Empty macro expansions.
1094   recordTokens(R"cpp(
1095     #define EMPTY
1096     #define ID(X) X
1097 
1098     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
1099     EMPTY EMPTY ID(4 5 6) split2
1100     ID(7 8 9) EMPTY EMPTY
1101   )cpp");
1102   // Covered by empty expansions on one of both of the sides.
1103   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
1104               ElementsAre(SameRange(findExpanded("1 2 3"))));
1105   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
1106               ElementsAre(SameRange(findExpanded("4 5 6"))));
1107   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
1108               ElementsAre(SameRange(findExpanded("7 8 9"))));
1109   // Including the empty macro expansions on the side.
1110   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
1111               ElementsAre(SameRange(findExpanded("1 2 3"))));
1112   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
1113               ElementsAre(SameRange(findExpanded("1 2 3"))));
1114   EXPECT_THAT(
1115       Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
1116       ElementsAre(SameRange(findExpanded("1 2 3"))));
1117 
1118   // Empty mappings coming from various directives.
1119   recordTokens(R"cpp(
1120     #define ID(X) X
1121     ID(1)
1122     #pragma lalala
1123     not_mapped
1124   )cpp");
1125   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
1126               IsEmpty());
1127   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
1128               IsEmpty());
1129 
1130   // Empty macro expansion.
1131   recordTokens(R"cpp(
1132     #define EMPTY
1133     EMPTY int a = 100;
1134   )cpp");
1135   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
1136               IsEmpty());
1137 }
1138 
TEST_F(TokenCollectorTest,Pragmas)1139 TEST_F(TokenCollectorTest, Pragmas) {
1140   // Tokens coming from concatenations.
1141   recordTokens(R"cpp(
1142     void foo() {
1143       #pragma unroll 4
1144       for(int i=0;i<4;++i);
1145     }
1146   )cpp");
1147 }
1148 } // namespace
1149