xref: /llvm-project/clang/unittests/Tooling/Syntax/TokensTest.cpp (revision d66afd6dde542dc373f87e07fe764c071fe20d76)
1 //===- TokensTest.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/VirtualFileSystem.h"
39 #include "llvm/Support/raw_os_ostream.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Testing/Support/Annotations.h"
42 #include "llvm/Testing/Support/SupportHelpers.h"
43 #include "gmock/gmock.h"
44 #include <cassert>
45 #include <cstdlib>
46 #include <gmock/gmock.h>
47 #include <gtest/gtest.h>
48 #include <memory>
49 #include <ostream>
50 #include <string>
51 
52 using namespace clang;
53 using namespace clang::syntax;
54 
55 using llvm::ValueIs;
56 using ::testing::AllOf;
57 using ::testing::Contains;
58 using ::testing::ElementsAre;
59 using ::testing::Field;
60 using ::testing::Matcher;
61 using ::testing::Not;
62 using ::testing::Pointee;
63 using ::testing::StartsWith;
64 
65 namespace {
66 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
67 // argument.
68 MATCHER_P(SameRange, A, "") {
69   return A.begin() == arg.begin() && A.end() == arg.end();
70 }
71 
72 Matcher<TokenBuffer::Expansion>
73 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
74             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
75   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
76                Field(&TokenBuffer::Expansion::Expanded, Expanded));
77 }
78 // Matchers for syntax::Token.
79 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
80 MATCHER_P2(HasText, Text, SourceMgr, "") {
81   return arg.text(*SourceMgr) == Text;
82 }
83 /// Checks the start and end location of a token are equal to SourceRng.
84 MATCHER_P(RangeIs, SourceRng, "") {
85   return arg.location() == SourceRng.first &&
86          arg.endLocation() == SourceRng.second;
87 }
88 
89 class TokenCollectorTest : public ::testing::Test {
90 public:
91   /// Run the clang frontend, collect the preprocessed tokens from the frontend
92   /// invocation and store them in this->Buffer.
93   /// This also clears SourceManager before running the compiler.
94   void recordTokens(llvm::StringRef Code) {
95     class RecordTokens : public ASTFrontendAction {
96     public:
97       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
98 
99       bool BeginSourceFileAction(CompilerInstance &CI) override {
100         assert(!Collector && "expected only a single call to BeginSourceFile");
101         Collector.emplace(CI.getPreprocessor());
102         return true;
103       }
104       void EndSourceFileAction() override {
105         assert(Collector && "BeginSourceFileAction was never called");
106         Result = std::move(*Collector).consume();
107       }
108 
109       std::unique_ptr<ASTConsumer>
110       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
111         return std::make_unique<ASTConsumer>();
112       }
113 
114     private:
115       TokenBuffer &Result;
116       llvm::Optional<TokenCollector> Collector;
117     };
118 
119     constexpr const char *FileName = "./input.cpp";
120     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
121     // Prepare to run a compiler.
122     if (!Diags->getClient())
123       Diags->setClient(new IgnoringDiagConsumer);
124     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
125                                       FileName};
126     auto CI = createInvocationFromCommandLine(Args, Diags, FS);
127     assert(CI);
128     CI->getFrontendOpts().DisableFree = false;
129     CI->getPreprocessorOpts().addRemappedFile(
130         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
131     CompilerInstance Compiler;
132     Compiler.setInvocation(std::move(CI));
133     Compiler.setDiagnostics(Diags.get());
134     Compiler.setFileManager(FileMgr.get());
135     Compiler.setSourceManager(SourceMgr.get());
136 
137     this->Buffer = TokenBuffer(*SourceMgr);
138     RecordTokens Recorder(this->Buffer);
139     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
140         << "failed to run the frontend";
141   }
142 
143   /// Record the tokens and return a test dump of the resulting buffer.
144   std::string collectAndDump(llvm::StringRef Code) {
145     recordTokens(Code);
146     return Buffer.dumpForTests();
147   }
148 
149   // Adds a file to the test VFS.
150   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
151     if (!FS->addFile(Path, time_t(),
152                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
153       ADD_FAILURE() << "could not add a file to VFS: " << Path;
154     }
155   }
156 
157   /// Add a new file, run syntax::tokenize() on the range if any, run it on the
158   /// whole file otherwise and return the results.
159   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
160     llvm::Annotations Annot(Text);
161     auto FID = SourceMgr->createFileID(
162         llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
163     // FIXME: pass proper LangOptions.
164     if (Annot.ranges().empty())
165       return syntax::tokenize(FID, *SourceMgr, LangOptions());
166     return syntax::tokenize(
167         syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
168         *SourceMgr, LangOptions());
169   }
170 
171   // Specialized versions of matchers that hide the SourceManager from clients.
172   Matcher<syntax::Token> HasText(std::string Text) const {
173     return ::HasText(Text, SourceMgr.get());
174   }
175   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
176     std::pair<SourceLocation, SourceLocation> Ls;
177     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
178                    .getLocWithOffset(R.Begin);
179     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
180                     .getLocWithOffset(R.End);
181     return ::RangeIs(Ls);
182   }
183 
184   /// Finds a subrange in O(n * m).
185   template <class T, class U, class Eq>
186   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
187                                  llvm::ArrayRef<T> Range, Eq F) {
188     for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
189       auto It = Begin;
190       for (auto ItSub = Subrange.begin();
191            ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
192         if (!F(*ItSub, *It))
193           goto continue_outer;
194       }
195       return llvm::makeArrayRef(Begin, It);
196     continue_outer:;
197     }
198     return llvm::makeArrayRef(Range.end(), Range.end());
199   }
200 
201   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
202   /// The match should be unique. \p Query is a whitespace-separated list of
203   /// tokens to search for.
204   llvm::ArrayRef<syntax::Token>
205   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
206     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
207     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
208     if (QueryTokens.empty()) {
209       ADD_FAILURE() << "will not look for an empty list of tokens";
210       std::abort();
211     }
212     // An equality test for search.
213     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
214       return Q == T.text(*SourceMgr);
215     };
216     // Find a match.
217     auto Found =
218         findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
219     if (Found.begin() == Tokens.end()) {
220       ADD_FAILURE() << "could not find the subrange for " << Query;
221       std::abort();
222     }
223     // Check that the match is unique.
224     if (findSubrange(llvm::makeArrayRef(QueryTokens),
225                      llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
226             .begin() != Tokens.end()) {
227       ADD_FAILURE() << "match is not unique for " << Query;
228       std::abort();
229     }
230     return Found;
231   };
232 
233   // Specialized versions of findTokenRange for expanded and spelled tokens.
234   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
235     return findTokenRange(Query, Buffer.expandedTokens());
236   }
237   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
238                                             FileID File = FileID()) {
239     if (!File.isValid())
240       File = SourceMgr->getMainFileID();
241     return findTokenRange(Query, Buffer.spelledTokens(File));
242   }
243 
244   // Data fields.
245   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
246       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
247   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
248       new llvm::vfs::InMemoryFileSystem;
249   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
250       new FileManager(FileSystemOptions(), FS);
251   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
252       new SourceManager(*Diags, *FileMgr);
253   /// Contains last result of calling recordTokens().
254   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
255 };
256 
257 TEST_F(TokenCollectorTest, RawMode) {
258   EXPECT_THAT(tokenize("int main() {}"),
259               ElementsAre(Kind(tok::kw_int),
260                           AllOf(HasText("main"), Kind(tok::identifier)),
261                           Kind(tok::l_paren), Kind(tok::r_paren),
262                           Kind(tok::l_brace), Kind(tok::r_brace)));
263   // Comments are ignored for now.
264   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
265               ElementsAre(Kind(tok::kw_int),
266                           AllOf(HasText("a"), Kind(tok::identifier)),
267                           Kind(tok::semi)));
268   EXPECT_THAT(tokenize("int [[main() {]]}"),
269               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
270                           Kind(tok::l_paren), Kind(tok::r_paren),
271                           Kind(tok::l_brace)));
272   EXPECT_THAT(tokenize("int [[main() {   ]]}"),
273               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
274                           Kind(tok::l_paren), Kind(tok::r_paren),
275                           Kind(tok::l_brace)));
276   // First token is partially parsed, last token is fully included even though
277   // only a part of it is contained in the range.
278   EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
279               ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
280                           Kind(tok::l_paren), Kind(tok::r_paren),
281                           Kind(tok::l_brace), Kind(tok::kw_return)));
282 }
283 
284 TEST_F(TokenCollectorTest, Basic) {
285   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
286       {"int main() {}",
287        R"(expanded tokens:
288   int main ( ) { }
289 file './input.cpp'
290   spelled tokens:
291     int main ( ) { }
292   no mappings.
293 )"},
294       // All kinds of whitespace are ignored.
295       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
296        R"(expanded tokens:
297   int main ( ) { }
298 file './input.cpp'
299   spelled tokens:
300     int main ( ) { }
301   no mappings.
302 )"},
303       // Annotation tokens are ignored.
304       {R"cpp(
305         #pragma GCC visibility push (public)
306         #pragma GCC visibility pop
307       )cpp",
308        R"(expanded tokens:
309   <empty>
310 file './input.cpp'
311   spelled tokens:
312     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
313   mappings:
314     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
315 )"},
316       // Empty files should not crash.
317       {R"cpp()cpp", R"(expanded tokens:
318   <empty>
319 file './input.cpp'
320   spelled tokens:
321     <empty>
322   no mappings.
323 )"},
324       // Should not crash on errors inside '#define' directives. Error is that
325       // stringification (#B) does not refer to a macro parameter.
326       {
327           R"cpp(
328 a
329 #define MACRO() A #B
330 )cpp",
331           R"(expanded tokens:
332   a
333 file './input.cpp'
334   spelled tokens:
335     a # define MACRO ( ) A # B
336   mappings:
337     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
338 )"}};
339   for (auto &Test : TestCases)
340     EXPECT_EQ(collectAndDump(Test.first), Test.second)
341         << collectAndDump(Test.first);
342 }
343 
344 TEST_F(TokenCollectorTest, Locations) {
345   // Check locations of the tokens.
346   llvm::Annotations Code(R"cpp(
347     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
348   )cpp");
349   recordTokens(Code.code());
350   // Check expanded tokens.
351   EXPECT_THAT(
352       Buffer.expandedTokens(),
353       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
354                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
355                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
356                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
357                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
358                   Kind(tok::eof)));
359   // Check spelled tokens.
360   EXPECT_THAT(
361       Buffer.spelledTokens(SourceMgr->getMainFileID()),
362       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
363                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
364                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
365                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
366                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
367 
368   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
369   for (auto &R : Code.ranges()) {
370     EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)),
371                 Pointee(RangeIs(R)));
372   }
373 }
374 
375 TEST_F(TokenCollectorTest, MacroDirectives) {
376   // Macro directives are not stored anywhere at the moment.
377   std::string Code = R"cpp(
378     #define FOO a
379     #include "unresolved_file.h"
380     #undef FOO
381     #ifdef X
382     #else
383     #endif
384     #ifndef Y
385     #endif
386     #if 1
387     #elif 2
388     #else
389     #endif
390     #pragma once
391     #pragma something lalala
392 
393     int a;
394   )cpp";
395   std::string Expected =
396       "expanded tokens:\n"
397       "  int a ;\n"
398       "file './input.cpp'\n"
399       "  spelled tokens:\n"
400       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
401       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
402       "# endif # pragma once # pragma something lalala int a ;\n"
403       "  mappings:\n"
404       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
405   EXPECT_EQ(collectAndDump(Code), Expected);
406 }
407 
408 TEST_F(TokenCollectorTest, MacroReplacements) {
409   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
410       // A simple object-like macro.
411       {R"cpp(
412     #define INT int const
413     INT a;
414   )cpp",
415        R"(expanded tokens:
416   int const a ;
417 file './input.cpp'
418   spelled tokens:
419     # define INT int const INT a ;
420   mappings:
421     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
422     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
423 )"},
424       // A simple function-like macro.
425       {R"cpp(
426     #define INT(a) const int
427     INT(10+10) a;
428   )cpp",
429        R"(expanded tokens:
430   const int a ;
431 file './input.cpp'
432   spelled tokens:
433     # define INT ( a ) const int INT ( 10 + 10 ) a ;
434   mappings:
435     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
436     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
437 )"},
438       // Recursive macro replacements.
439       {R"cpp(
440     #define ID(X) X
441     #define INT int const
442     ID(ID(INT)) a;
443   )cpp",
444        R"(expanded tokens:
445   int const a ;
446 file './input.cpp'
447   spelled tokens:
448     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
449   mappings:
450     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
451     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
452 )"},
453       // A little more complicated recursive macro replacements.
454       {R"cpp(
455     #define ADD(X, Y) X+Y
456     #define MULT(X, Y) X*Y
457 
458     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
459   )cpp",
460        "expanded tokens:\n"
461        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
462        "file './input.cpp'\n"
463        "  spelled tokens:\n"
464        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
465        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
466        "  mappings:\n"
467        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
468        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
469       // Empty macro replacement.
470       // FIXME: the #define directives should not be glued together.
471       {R"cpp(
472     #define EMPTY
473     #define EMPTY_FUNC(X)
474     EMPTY
475     EMPTY_FUNC(1+2+3)
476     )cpp",
477        R"(expanded tokens:
478   <empty>
479 file './input.cpp'
480   spelled tokens:
481     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
482   mappings:
483     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
484     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
485     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
486 )"},
487       // File ends with a macro replacement.
488       {R"cpp(
489     #define FOO 10+10;
490     int a = FOO
491     )cpp",
492        R"(expanded tokens:
493   int a = 10 + 10 ;
494 file './input.cpp'
495   spelled tokens:
496     # define FOO 10 + 10 ; int a = FOO
497   mappings:
498     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
499     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
500 )"},
501       {R"cpp(
502          #define NUM 42
503          #define ID(a) a
504          #define M 1 + ID
505          M(NUM)
506        )cpp",
507        R"(expanded tokens:
508   1 + 42
509 file './input.cpp'
510   spelled tokens:
511     # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
512   mappings:
513     ['#'_0, 'M'_17) => ['1'_0, '1'_0)
514     ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
515 )"},
516   };
517 
518   for (auto &Test : TestCases) {
519     std::string Dump = collectAndDump(Test.first);
520     EXPECT_EQ(Test.second, Dump) << Dump;
521   }
522 }
523 
524 TEST_F(TokenCollectorTest, SpecialTokens) {
525   // Tokens coming from concatenations.
526   recordTokens(R"cpp(
527     #define CONCAT(a, b) a ## b
528     int a = CONCAT(1, 2);
529   )cpp");
530   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
531               Contains(HasText("12")));
532   // Multi-line tokens with slashes at the end.
533   recordTokens("i\\\nn\\\nt");
534   EXPECT_THAT(Buffer.expandedTokens(),
535               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
536                           Kind(tok::eof)));
537   // FIXME: test tokens with digraphs and UCN identifiers.
538 }
539 
540 TEST_F(TokenCollectorTest, LateBoundTokens) {
541   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
542   // but we choose to record them as a single token (for now).
543   llvm::Annotations Code(R"cpp(
544     template <class T>
545     struct foo { int a; };
546     int bar = foo<foo<int$br[[>>]]().a;
547     int baz = 10 $op[[>>]] 2;
548   )cpp");
549   recordTokens(Code.code());
550   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
551               AllOf(Contains(AllOf(Kind(tok::greatergreater),
552                                    RangeIs(Code.range("br")))),
553                     Contains(AllOf(Kind(tok::greatergreater),
554                                    RangeIs(Code.range("op"))))));
555 }
556 
557 TEST_F(TokenCollectorTest, DelayedParsing) {
558   llvm::StringLiteral Code = R"cpp(
559     struct Foo {
560       int method() {
561         // Parser will visit method bodies and initializers multiple times, but
562         // TokenBuffer should only record the first walk over the tokens;
563         return 100;
564       }
565       int a = 10;
566 
567       struct Subclass {
568         void foo() {
569           Foo().method();
570         }
571       };
572     };
573   )cpp";
574   std::string ExpectedTokens =
575       "expanded tokens:\n"
576       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
577       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
578   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
579 }
580 
581 TEST_F(TokenCollectorTest, MultiFile) {
582   addFile("./foo.h", R"cpp(
583     #define ADD(X, Y) X+Y
584     int a = 100;
585     #include "bar.h"
586   )cpp");
587   addFile("./bar.h", R"cpp(
588     int b = ADD(1, 2);
589     #define MULT(X, Y) X*Y
590   )cpp");
591   llvm::StringLiteral Code = R"cpp(
592     #include "foo.h"
593     int c = ADD(1, MULT(2,3));
594   )cpp";
595 
596   std::string Expected = R"(expanded tokens:
597   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
598 file './input.cpp'
599   spelled tokens:
600     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
601   mappings:
602     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
603     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
604 file './foo.h'
605   spelled tokens:
606     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
607   mappings:
608     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
609     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
610 file './bar.h'
611   spelled tokens:
612     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
613   mappings:
614     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
615     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
616 )";
617 
618   EXPECT_EQ(Expected, collectAndDump(Code))
619       << "input: " << Code << "\nresults: " << collectAndDump(Code);
620 }
621 
622 class TokenBufferTest : public TokenCollectorTest {};
623 
624 TEST_F(TokenBufferTest, SpelledByExpanded) {
625   recordTokens(R"cpp(
626     a1 a2 a3 b1 b2
627   )cpp");
628 
629   // Sanity check: expanded and spelled tokens are stored separately.
630   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
631   // Searching for subranges of expanded tokens should give the corresponding
632   // spelled ones.
633   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
634               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
635   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
636               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
637   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
638               ValueIs(SameRange(findSpelled("b1 b2"))));
639 
640   // Test search on simple macro expansions.
641   recordTokens(R"cpp(
642     #define A a1 a2 a3
643     #define B b1 b2
644 
645     A split B
646   )cpp");
647   // Ranges going across expansion boundaries.
648   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
649               ValueIs(SameRange(findSpelled("A split B"))));
650   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
651               ValueIs(SameRange(findSpelled("A split").drop_back())));
652   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
653               ValueIs(SameRange(findSpelled("split B").drop_front())));
654   // Ranges not fully covering macro invocations should fail.
655   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
656   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
657   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
658             llvm::None);
659 
660   // Recursive macro invocations.
661   recordTokens(R"cpp(
662     #define ID(x) x
663     #define B b1 b2
664 
665     ID(ID(ID(a1) a2 a3)) split ID(B)
666   )cpp");
667 
668   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
669               ValueIs(SameRange(findSpelled("( B").drop_front())));
670   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
671               ValueIs(SameRange(findSpelled(
672                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
673   // Mixed ranges with expanded and spelled tokens.
674   EXPECT_THAT(
675       Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
676       ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
677   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
678               ValueIs(SameRange(findSpelled("split ID ( B )"))));
679   // Macro arguments
680   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
681               ValueIs(SameRange(findSpelled("a1"))));
682   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
683               ValueIs(SameRange(findSpelled("a2"))));
684   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
685               ValueIs(SameRange(findSpelled("a3"))));
686   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
687               ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
688   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
689               ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
690 
691   // Empty macro expansions.
692   recordTokens(R"cpp(
693     #define EMPTY
694     #define ID(X) X
695 
696     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
697     EMPTY EMPTY ID(4 5 6) split2
698     ID(7 8 9) EMPTY EMPTY
699   )cpp");
700   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
701               ValueIs(SameRange(findSpelled("1 2 3"))));
702   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
703               ValueIs(SameRange(findSpelled("4 5 6"))));
704   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
705               ValueIs(SameRange(findSpelled("7 8 9"))));
706 
707   // Empty mappings coming from various directives.
708   recordTokens(R"cpp(
709     #define ID(X) X
710     ID(1)
711     #pragma lalala
712     not_mapped
713   )cpp");
714   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
715               ValueIs(SameRange(findSpelled("not_mapped"))));
716 
717   // Multiple macro arguments
718   recordTokens(R"cpp(
719     #define ID(X) X
720     #define ID2(X, Y) X Y
721 
722     ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
723   )cpp");
724   // Should fail, spans multiple arguments.
725   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
726   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
727               ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
728   EXPECT_THAT(
729       Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
730       ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
731   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
732               ValueIs(SameRange(findSpelled("a5 a6"))));
733   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
734               ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
735   // Should fail, spans multiple invocations.
736   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None);
737 }
738 
739 TEST_F(TokenBufferTest, ExpandedTokensForRange) {
740   recordTokens(R"cpp(
741     #define SIGN(X) X##_washere
742     A SIGN(B) C SIGN(D) E SIGN(F) G
743   )cpp");
744 
745   SourceRange R(findExpanded("C").front().location(),
746                 findExpanded("F_washere").front().location());
747   // Sanity check: expanded and spelled tokens are stored separately.
748   EXPECT_THAT(Buffer.expandedTokens(R),
749               SameRange(findExpanded("C D_washere E F_washere")));
750   EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
751 }
752 
753 TEST_F(TokenBufferTest, ExpansionStartingAt) {
754   // Object-like macro expansions.
755   recordTokens(R"cpp(
756     #define FOO 3+4
757     int a = FOO 1;
758     int b = FOO 2;
759   )cpp");
760 
761   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
762   EXPECT_THAT(
763       Buffer.expansionStartingAt(Foo1.data()),
764       ValueIs(IsExpansion(SameRange(Foo1),
765                           SameRange(findExpanded("3 + 4 1").drop_back()))));
766 
767   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
768   EXPECT_THAT(
769       Buffer.expansionStartingAt(Foo2.data()),
770       ValueIs(IsExpansion(SameRange(Foo2),
771                           SameRange(findExpanded("3 + 4 2").drop_back()))));
772 
773   // Function-like macro expansions.
774   recordTokens(R"cpp(
775     #define ID(X) X
776     int a = ID(1+2+3);
777     int b = ID(ID(2+3+4));
778   )cpp");
779 
780   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
781   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
782               ValueIs(IsExpansion(SameRange(ID1),
783                                   SameRange(findExpanded("1 + 2 + 3")))));
784   // Only the first spelled token should be found.
785   for (const auto &T : ID1.drop_front())
786     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
787 
788   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
789   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
790               ValueIs(IsExpansion(SameRange(ID2),
791                                   SameRange(findExpanded("2 + 3 + 4")))));
792   // Only the first spelled token should be found.
793   for (const auto &T : ID2.drop_front())
794     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
795 
796   // PP directives.
797   recordTokens(R"cpp(
798 #define FOO 1
799 int a = FOO;
800 #pragma once
801 int b = 1;
802   )cpp");
803 
804   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
805   EXPECT_THAT(
806       Buffer.expansionStartingAt(&DefineFoo.front()),
807       ValueIs(IsExpansion(SameRange(DefineFoo),
808                           SameRange(findExpanded("int a").take_front(0)))));
809   // Only the first spelled token should be found.
810   for (const auto &T : DefineFoo.drop_front())
811     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
812 
813   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
814   EXPECT_THAT(
815       Buffer.expansionStartingAt(&PragmaOnce.front()),
816       ValueIs(IsExpansion(SameRange(PragmaOnce),
817                           SameRange(findExpanded("int b").take_front(0)))));
818   // Only the first spelled token should be found.
819   for (const auto &T : PragmaOnce.drop_front())
820     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
821 }
822 
823 TEST_F(TokenBufferTest, TokensToFileRange) {
824   addFile("./foo.h", "token_from_header");
825   llvm::Annotations Code(R"cpp(
826     #define FOO token_from_expansion
827     #include "./foo.h"
828     $all[[$i[[int]] a = FOO;]]
829   )cpp");
830   recordTokens(Code.code());
831 
832   auto &SM = *SourceMgr;
833 
834   // Two simple examples.
835   auto Int = findExpanded("int").front();
836   auto Semi = findExpanded(";").front();
837   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
838                                      Code.range("i").End));
839   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
840             FileRange(SM.getMainFileID(), Code.range("all").Begin,
841                       Code.range("all").End));
842   // We don't test assertion failures because death tests are slow.
843 }
844 
845 TEST_F(TokenBufferTest, MacroExpansions) {
846   llvm::Annotations Code(R"cpp(
847     #define FOO B
848     #define FOO2 BA
849     #define CALL(X) int X
850     #define G CALL(FOO2)
851     int B;
852     $macro[[FOO]];
853     $macro[[CALL]](A);
854     $macro[[G]];
855   )cpp");
856   recordTokens(Code.code());
857   auto &SM = *SourceMgr;
858   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
859   std::vector<FileRange> ExpectedMacroRanges;
860   for (auto Range : Code.ranges("macro"))
861     ExpectedMacroRanges.push_back(
862         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
863   std::vector<FileRange> ActualMacroRanges;
864   for (auto Expansion : Expansions)
865     ActualMacroRanges.push_back(Expansion->range(SM));
866   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
867 }
868 
869 TEST_F(TokenBufferTest, Touching) {
870   llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
871   recordTokens(Code.code());
872 
873   auto Touching = [&](int Index) {
874     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
875                                                    Code.points()[Index]);
876     return spelledTokensTouching(Loc, Buffer);
877   };
878   auto Identifier = [&](int Index) {
879     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
880                                                    Code.points()[Index]);
881     const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
882     return Tok ? Tok->text(*SourceMgr) : "";
883   };
884 
885   EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
886   EXPECT_EQ(Identifier(0), "");
887   EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
888   EXPECT_EQ(Identifier(1), "");
889   EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
890   EXPECT_EQ(Identifier(2), "");
891 
892   EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
893   EXPECT_EQ(Identifier(3), "ab");
894   EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
895   EXPECT_EQ(Identifier(4), "ab");
896 
897   EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
898   EXPECT_EQ(Identifier(5), "ab");
899 
900   EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
901   EXPECT_EQ(Identifier(6), "");
902 
903   EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
904   EXPECT_EQ(Identifier(7), "");
905 
906   ASSERT_EQ(Code.points().size(), 8u);
907 }
908 
909 } // namespace
910