xref: /llvm-project/clang/unittests/Tooling/Syntax/TokensTest.cpp (revision 1bf055c9891f1a5ab2ff6a04348bd83fcc0a9cde)
1 //===- TokensTest.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/VirtualFileSystem.h"
39 #include "llvm/Support/raw_os_ostream.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Testing/Support/Annotations.h"
42 #include "llvm/Testing/Support/SupportHelpers.h"
43 #include "gmock/gmock.h"
44 #include <cassert>
45 #include <cstdlib>
46 #include <gmock/gmock.h>
47 #include <gtest/gtest.h>
48 #include <memory>
49 #include <ostream>
50 #include <string>
51 
52 using namespace clang;
53 using namespace clang::syntax;
54 
55 using llvm::ValueIs;
56 using ::testing::AllOf;
57 using ::testing::Contains;
58 using ::testing::ElementsAre;
59 using ::testing::Field;
60 using ::testing::IsEmpty;
61 using ::testing::Matcher;
62 using ::testing::Not;
63 using ::testing::Pointee;
64 using ::testing::StartsWith;
65 
66 namespace {
67 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
68 // argument.
69 MATCHER_P(SameRange, A, "") {
70   return A.begin() == arg.begin() && A.end() == arg.end();
71 }
72 
73 Matcher<TokenBuffer::Expansion>
74 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
75             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
76   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
77                Field(&TokenBuffer::Expansion::Expanded, Expanded));
78 }
79 // Matchers for syntax::Token.
80 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
81 MATCHER_P2(HasText, Text, SourceMgr, "") {
82   return arg.text(*SourceMgr) == Text;
83 }
84 /// Checks the start and end location of a token are equal to SourceRng.
85 MATCHER_P(RangeIs, SourceRng, "") {
86   return arg.location() == SourceRng.first &&
87          arg.endLocation() == SourceRng.second;
88 }
89 
90 class TokenCollectorTest : public ::testing::Test {
91 public:
92   /// Run the clang frontend, collect the preprocessed tokens from the frontend
93   /// invocation and store them in this->Buffer.
94   /// This also clears SourceManager before running the compiler.
95   void recordTokens(llvm::StringRef Code) {
96     class RecordTokens : public ASTFrontendAction {
97     public:
98       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
99 
100       bool BeginSourceFileAction(CompilerInstance &CI) override {
101         assert(!Collector && "expected only a single call to BeginSourceFile");
102         Collector.emplace(CI.getPreprocessor());
103         return true;
104       }
105       void EndSourceFileAction() override {
106         assert(Collector && "BeginSourceFileAction was never called");
107         Result = std::move(*Collector).consume();
108       }
109 
110       std::unique_ptr<ASTConsumer>
111       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
112         return std::make_unique<ASTConsumer>();
113       }
114 
115     private:
116       TokenBuffer &Result;
117       llvm::Optional<TokenCollector> Collector;
118     };
119 
120     constexpr const char *FileName = "./input.cpp";
121     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
122     // Prepare to run a compiler.
123     if (!Diags->getClient())
124       Diags->setClient(new IgnoringDiagConsumer);
125     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
126                                       FileName};
127     auto CI = createInvocationFromCommandLine(Args, Diags, FS);
128     assert(CI);
129     CI->getFrontendOpts().DisableFree = false;
130     CI->getPreprocessorOpts().addRemappedFile(
131         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
132     CompilerInstance Compiler;
133     Compiler.setInvocation(std::move(CI));
134     Compiler.setDiagnostics(Diags.get());
135     Compiler.setFileManager(FileMgr.get());
136     Compiler.setSourceManager(SourceMgr.get());
137 
138     this->Buffer = TokenBuffer(*SourceMgr);
139     RecordTokens Recorder(this->Buffer);
140     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
141         << "failed to run the frontend";
142   }
143 
144   /// Record the tokens and return a test dump of the resulting buffer.
145   std::string collectAndDump(llvm::StringRef Code) {
146     recordTokens(Code);
147     return Buffer.dumpForTests();
148   }
149 
150   // Adds a file to the test VFS.
151   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
152     if (!FS->addFile(Path, time_t(),
153                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
154       ADD_FAILURE() << "could not add a file to VFS: " << Path;
155     }
156   }
157 
158   /// Add a new file, run syntax::tokenize() on the range if any, run it on the
159   /// whole file otherwise and return the results.
160   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
161     llvm::Annotations Annot(Text);
162     auto FID = SourceMgr->createFileID(
163         llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
164     // FIXME: pass proper LangOptions.
165     if (Annot.ranges().empty())
166       return syntax::tokenize(FID, *SourceMgr, LangOptions());
167     return syntax::tokenize(
168         syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
169         *SourceMgr, LangOptions());
170   }
171 
172   // Specialized versions of matchers that hide the SourceManager from clients.
173   Matcher<syntax::Token> HasText(std::string Text) const {
174     return ::HasText(Text, SourceMgr.get());
175   }
176   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
177     std::pair<SourceLocation, SourceLocation> Ls;
178     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
179                    .getLocWithOffset(R.Begin);
180     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
181                     .getLocWithOffset(R.End);
182     return ::RangeIs(Ls);
183   }
184 
185   /// Finds a subrange in O(n * m).
186   template <class T, class U, class Eq>
187   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
188                                  llvm::ArrayRef<T> Range, Eq F) {
189     assert(Subrange.size() >= 1);
190     if (Range.size() < Subrange.size())
191       return llvm::makeArrayRef(Range.end(), Range.end());
192     for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
193          Begin <= Last; ++Begin) {
194       auto It = Begin;
195       for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
196            ++ItSub, ++It) {
197         if (!F(*ItSub, *It))
198           goto continue_outer;
199       }
200       return llvm::makeArrayRef(Begin, It);
201     continue_outer:;
202     }
203     return llvm::makeArrayRef(Range.end(), Range.end());
204   }
205 
206   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
207   /// The match should be unique. \p Query is a whitespace-separated list of
208   /// tokens to search for.
209   llvm::ArrayRef<syntax::Token>
210   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
211     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
212     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
213     if (QueryTokens.empty()) {
214       ADD_FAILURE() << "will not look for an empty list of tokens";
215       std::abort();
216     }
217     // An equality test for search.
218     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
219       return Q == T.text(*SourceMgr);
220     };
221     // Find a match.
222     auto Found =
223         findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
224     if (Found.begin() == Tokens.end()) {
225       ADD_FAILURE() << "could not find the subrange for " << Query;
226       std::abort();
227     }
228     // Check that the match is unique.
229     if (findSubrange(llvm::makeArrayRef(QueryTokens),
230                      llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
231             .begin() != Tokens.end()) {
232       ADD_FAILURE() << "match is not unique for " << Query;
233       std::abort();
234     }
235     return Found;
236   };
237 
238   // Specialized versions of findTokenRange for expanded and spelled tokens.
239   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
240     return findTokenRange(Query, Buffer.expandedTokens());
241   }
242   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
243                                             FileID File = FileID()) {
244     if (!File.isValid())
245       File = SourceMgr->getMainFileID();
246     return findTokenRange(Query, Buffer.spelledTokens(File));
247   }
248 
249   // Data fields.
250   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
251       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
252   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
253       new llvm::vfs::InMemoryFileSystem;
254   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
255       new FileManager(FileSystemOptions(), FS);
256   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
257       new SourceManager(*Diags, *FileMgr);
258   /// Contains last result of calling recordTokens().
259   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
260 };
261 
262 TEST_F(TokenCollectorTest, RawMode) {
263   EXPECT_THAT(tokenize("int main() {}"),
264               ElementsAre(Kind(tok::kw_int),
265                           AllOf(HasText("main"), Kind(tok::identifier)),
266                           Kind(tok::l_paren), Kind(tok::r_paren),
267                           Kind(tok::l_brace), Kind(tok::r_brace)));
268   // Comments are ignored for now.
269   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
270               ElementsAre(Kind(tok::kw_int),
271                           AllOf(HasText("a"), Kind(tok::identifier)),
272                           Kind(tok::semi)));
273   EXPECT_THAT(tokenize("int [[main() {]]}"),
274               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
275                           Kind(tok::l_paren), Kind(tok::r_paren),
276                           Kind(tok::l_brace)));
277   EXPECT_THAT(tokenize("int [[main() {   ]]}"),
278               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
279                           Kind(tok::l_paren), Kind(tok::r_paren),
280                           Kind(tok::l_brace)));
281   // First token is partially parsed, last token is fully included even though
282   // only a part of it is contained in the range.
283   EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
284               ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
285                           Kind(tok::l_paren), Kind(tok::r_paren),
286                           Kind(tok::l_brace), Kind(tok::kw_return)));
287 }
288 
289 TEST_F(TokenCollectorTest, Basic) {
290   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
291       {"int main() {}",
292        R"(expanded tokens:
293   int main ( ) { }
294 file './input.cpp'
295   spelled tokens:
296     int main ( ) { }
297   no mappings.
298 )"},
299       // All kinds of whitespace are ignored.
300       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
301        R"(expanded tokens:
302   int main ( ) { }
303 file './input.cpp'
304   spelled tokens:
305     int main ( ) { }
306   no mappings.
307 )"},
308       // Annotation tokens are ignored.
309       {R"cpp(
310         #pragma GCC visibility push (public)
311         #pragma GCC visibility pop
312       )cpp",
313        R"(expanded tokens:
314   <empty>
315 file './input.cpp'
316   spelled tokens:
317     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
318   mappings:
319     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
320 )"},
321       // Empty files should not crash.
322       {R"cpp()cpp", R"(expanded tokens:
323   <empty>
324 file './input.cpp'
325   spelled tokens:
326     <empty>
327   no mappings.
328 )"},
329       // Should not crash on errors inside '#define' directives. Error is that
330       // stringification (#B) does not refer to a macro parameter.
331       {
332           R"cpp(
333 a
334 #define MACRO() A #B
335 )cpp",
336           R"(expanded tokens:
337   a
338 file './input.cpp'
339   spelled tokens:
340     a # define MACRO ( ) A # B
341   mappings:
342     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
343 )"}};
344   for (auto &Test : TestCases)
345     EXPECT_EQ(collectAndDump(Test.first), Test.second)
346         << collectAndDump(Test.first);
347 }
348 
349 TEST_F(TokenCollectorTest, Locations) {
350   // Check locations of the tokens.
351   llvm::Annotations Code(R"cpp(
352     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
353   )cpp");
354   recordTokens(Code.code());
355   // Check expanded tokens.
356   EXPECT_THAT(
357       Buffer.expandedTokens(),
358       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
359                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
360                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
361                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
362                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
363                   Kind(tok::eof)));
364   // Check spelled tokens.
365   EXPECT_THAT(
366       Buffer.spelledTokens(SourceMgr->getMainFileID()),
367       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
368                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
369                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
370                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
371                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
372 
373   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
374   for (auto &R : Code.ranges()) {
375     EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)),
376                 Pointee(RangeIs(R)));
377   }
378 }
379 
380 TEST_F(TokenCollectorTest, MacroDirectives) {
381   // Macro directives are not stored anywhere at the moment.
382   std::string Code = R"cpp(
383     #define FOO a
384     #include "unresolved_file.h"
385     #undef FOO
386     #ifdef X
387     #else
388     #endif
389     #ifndef Y
390     #endif
391     #if 1
392     #elif 2
393     #else
394     #endif
395     #pragma once
396     #pragma something lalala
397 
398     int a;
399   )cpp";
400   std::string Expected =
401       "expanded tokens:\n"
402       "  int a ;\n"
403       "file './input.cpp'\n"
404       "  spelled tokens:\n"
405       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
406       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
407       "# endif # pragma once # pragma something lalala int a ;\n"
408       "  mappings:\n"
409       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
410   EXPECT_EQ(collectAndDump(Code), Expected);
411 }
412 
413 TEST_F(TokenCollectorTest, MacroReplacements) {
414   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
415       // A simple object-like macro.
416       {R"cpp(
417     #define INT int const
418     INT a;
419   )cpp",
420        R"(expanded tokens:
421   int const a ;
422 file './input.cpp'
423   spelled tokens:
424     # define INT int const INT a ;
425   mappings:
426     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
427     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
428 )"},
429       // A simple function-like macro.
430       {R"cpp(
431     #define INT(a) const int
432     INT(10+10) a;
433   )cpp",
434        R"(expanded tokens:
435   const int a ;
436 file './input.cpp'
437   spelled tokens:
438     # define INT ( a ) const int INT ( 10 + 10 ) a ;
439   mappings:
440     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
441     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
442 )"},
443       // Recursive macro replacements.
444       {R"cpp(
445     #define ID(X) X
446     #define INT int const
447     ID(ID(INT)) a;
448   )cpp",
449        R"(expanded tokens:
450   int const a ;
451 file './input.cpp'
452   spelled tokens:
453     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
454   mappings:
455     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
456     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
457 )"},
458       // A little more complicated recursive macro replacements.
459       {R"cpp(
460     #define ADD(X, Y) X+Y
461     #define MULT(X, Y) X*Y
462 
463     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
464   )cpp",
465        "expanded tokens:\n"
466        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
467        "file './input.cpp'\n"
468        "  spelled tokens:\n"
469        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
470        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
471        "  mappings:\n"
472        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
473        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
474       // Empty macro replacement.
475       // FIXME: the #define directives should not be glued together.
476       {R"cpp(
477     #define EMPTY
478     #define EMPTY_FUNC(X)
479     EMPTY
480     EMPTY_FUNC(1+2+3)
481     )cpp",
482        R"(expanded tokens:
483   <empty>
484 file './input.cpp'
485   spelled tokens:
486     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
487   mappings:
488     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
489     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
490     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
491 )"},
492       // File ends with a macro replacement.
493       {R"cpp(
494     #define FOO 10+10;
495     int a = FOO
496     )cpp",
497        R"(expanded tokens:
498   int a = 10 + 10 ;
499 file './input.cpp'
500   spelled tokens:
501     # define FOO 10 + 10 ; int a = FOO
502   mappings:
503     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
504     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
505 )"},
506       {R"cpp(
507          #define NUM 42
508          #define ID(a) a
509          #define M 1 + ID
510          M(NUM)
511        )cpp",
512        R"(expanded tokens:
513   1 + 42
514 file './input.cpp'
515   spelled tokens:
516     # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
517   mappings:
518     ['#'_0, 'M'_17) => ['1'_0, '1'_0)
519     ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
520 )"},
521   };
522 
523   for (auto &Test : TestCases) {
524     std::string Dump = collectAndDump(Test.first);
525     EXPECT_EQ(Test.second, Dump) << Dump;
526   }
527 }
528 
529 TEST_F(TokenCollectorTest, SpecialTokens) {
530   // Tokens coming from concatenations.
531   recordTokens(R"cpp(
532     #define CONCAT(a, b) a ## b
533     int a = CONCAT(1, 2);
534   )cpp");
535   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
536               Contains(HasText("12")));
537   // Multi-line tokens with slashes at the end.
538   recordTokens("i\\\nn\\\nt");
539   EXPECT_THAT(Buffer.expandedTokens(),
540               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
541                           Kind(tok::eof)));
542   // FIXME: test tokens with digraphs and UCN identifiers.
543 }
544 
545 TEST_F(TokenCollectorTest, LateBoundTokens) {
546   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
547   // but we choose to record them as a single token (for now).
548   llvm::Annotations Code(R"cpp(
549     template <class T>
550     struct foo { int a; };
551     int bar = foo<foo<int$br[[>>]]().a;
552     int baz = 10 $op[[>>]] 2;
553   )cpp");
554   recordTokens(Code.code());
555   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
556               AllOf(Contains(AllOf(Kind(tok::greatergreater),
557                                    RangeIs(Code.range("br")))),
558                     Contains(AllOf(Kind(tok::greatergreater),
559                                    RangeIs(Code.range("op"))))));
560 }
561 
562 TEST_F(TokenCollectorTest, DelayedParsing) {
563   llvm::StringLiteral Code = R"cpp(
564     struct Foo {
565       int method() {
566         // Parser will visit method bodies and initializers multiple times, but
567         // TokenBuffer should only record the first walk over the tokens;
568         return 100;
569       }
570       int a = 10;
571 
572       struct Subclass {
573         void foo() {
574           Foo().method();
575         }
576       };
577     };
578   )cpp";
579   std::string ExpectedTokens =
580       "expanded tokens:\n"
581       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
582       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
583   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
584 }
585 
586 TEST_F(TokenCollectorTest, MultiFile) {
587   addFile("./foo.h", R"cpp(
588     #define ADD(X, Y) X+Y
589     int a = 100;
590     #include "bar.h"
591   )cpp");
592   addFile("./bar.h", R"cpp(
593     int b = ADD(1, 2);
594     #define MULT(X, Y) X*Y
595   )cpp");
596   llvm::StringLiteral Code = R"cpp(
597     #include "foo.h"
598     int c = ADD(1, MULT(2,3));
599   )cpp";
600 
601   std::string Expected = R"(expanded tokens:
602   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
603 file './input.cpp'
604   spelled tokens:
605     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
606   mappings:
607     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
608     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
609 file './foo.h'
610   spelled tokens:
611     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
612   mappings:
613     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
614     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
615 file './bar.h'
616   spelled tokens:
617     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
618   mappings:
619     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
620     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
621 )";
622 
623   EXPECT_EQ(Expected, collectAndDump(Code))
624       << "input: " << Code << "\nresults: " << collectAndDump(Code);
625 }
626 
627 class TokenBufferTest : public TokenCollectorTest {};
628 
629 TEST_F(TokenBufferTest, SpelledByExpanded) {
630   recordTokens(R"cpp(
631     a1 a2 a3 b1 b2
632   )cpp");
633 
634   // Sanity check: expanded and spelled tokens are stored separately.
635   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
636   // Searching for subranges of expanded tokens should give the corresponding
637   // spelled ones.
638   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
639               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
640   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
641               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
642   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
643               ValueIs(SameRange(findSpelled("b1 b2"))));
644 
645   // Test search on simple macro expansions.
646   recordTokens(R"cpp(
647     #define A a1 a2 a3
648     #define B b1 b2
649 
650     A split B
651   )cpp");
652   // Ranges going across expansion boundaries.
653   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
654               ValueIs(SameRange(findSpelled("A split B"))));
655   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
656               ValueIs(SameRange(findSpelled("A split").drop_back())));
657   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
658               ValueIs(SameRange(findSpelled("split B").drop_front())));
659   // Ranges not fully covering macro invocations should fail.
660   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
661   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
662   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
663             llvm::None);
664 
665   // Recursive macro invocations.
666   recordTokens(R"cpp(
667     #define ID(x) x
668     #define B b1 b2
669 
670     ID(ID(ID(a1) a2 a3)) split ID(B)
671   )cpp");
672 
673   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
674               ValueIs(SameRange(findSpelled("( B").drop_front())));
675   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
676               ValueIs(SameRange(findSpelled(
677                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
678   // Mixed ranges with expanded and spelled tokens.
679   EXPECT_THAT(
680       Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
681       ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
682   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
683               ValueIs(SameRange(findSpelled("split ID ( B )"))));
684   // Macro arguments
685   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
686               ValueIs(SameRange(findSpelled("a1"))));
687   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
688               ValueIs(SameRange(findSpelled("a2"))));
689   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
690               ValueIs(SameRange(findSpelled("a3"))));
691   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
692               ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
693   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
694               ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
695 
696   // Empty macro expansions.
697   recordTokens(R"cpp(
698     #define EMPTY
699     #define ID(X) X
700 
701     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
702     EMPTY EMPTY ID(4 5 6) split2
703     ID(7 8 9) EMPTY EMPTY
704   )cpp");
705   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
706               ValueIs(SameRange(findSpelled("1 2 3"))));
707   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
708               ValueIs(SameRange(findSpelled("4 5 6"))));
709   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
710               ValueIs(SameRange(findSpelled("7 8 9"))));
711 
712   // Empty mappings coming from various directives.
713   recordTokens(R"cpp(
714     #define ID(X) X
715     ID(1)
716     #pragma lalala
717     not_mapped
718   )cpp");
719   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
720               ValueIs(SameRange(findSpelled("not_mapped"))));
721 
722   // Multiple macro arguments
723   recordTokens(R"cpp(
724     #define ID(X) X
725     #define ID2(X, Y) X Y
726 
727     ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
728   )cpp");
729   // Should fail, spans multiple arguments.
730   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
731   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
732               ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
733   EXPECT_THAT(
734       Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
735       ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
736   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
737               ValueIs(SameRange(findSpelled("a5 a6"))));
738   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
739               ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
740   // Should fail, spans multiple invocations.
741   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None);
742 }
743 
744 TEST_F(TokenBufferTest, ExpandedTokensForRange) {
745   recordTokens(R"cpp(
746     #define SIGN(X) X##_washere
747     A SIGN(B) C SIGN(D) E SIGN(F) G
748   )cpp");
749 
750   SourceRange R(findExpanded("C").front().location(),
751                 findExpanded("F_washere").front().location());
752   // Sanity check: expanded and spelled tokens are stored separately.
753   EXPECT_THAT(Buffer.expandedTokens(R),
754               SameRange(findExpanded("C D_washere E F_washere")));
755   EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
756 }
757 
758 TEST_F(TokenBufferTest, ExpansionStartingAt) {
759   // Object-like macro expansions.
760   recordTokens(R"cpp(
761     #define FOO 3+4
762     int a = FOO 1;
763     int b = FOO 2;
764   )cpp");
765 
766   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
767   EXPECT_THAT(
768       Buffer.expansionStartingAt(Foo1.data()),
769       ValueIs(IsExpansion(SameRange(Foo1),
770                           SameRange(findExpanded("3 + 4 1").drop_back()))));
771 
772   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
773   EXPECT_THAT(
774       Buffer.expansionStartingAt(Foo2.data()),
775       ValueIs(IsExpansion(SameRange(Foo2),
776                           SameRange(findExpanded("3 + 4 2").drop_back()))));
777 
778   // Function-like macro expansions.
779   recordTokens(R"cpp(
780     #define ID(X) X
781     int a = ID(1+2+3);
782     int b = ID(ID(2+3+4));
783   )cpp");
784 
785   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
786   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
787               ValueIs(IsExpansion(SameRange(ID1),
788                                   SameRange(findExpanded("1 + 2 + 3")))));
789   // Only the first spelled token should be found.
790   for (const auto &T : ID1.drop_front())
791     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
792 
793   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
794   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
795               ValueIs(IsExpansion(SameRange(ID2),
796                                   SameRange(findExpanded("2 + 3 + 4")))));
797   // Only the first spelled token should be found.
798   for (const auto &T : ID2.drop_front())
799     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
800 
801   // PP directives.
802   recordTokens(R"cpp(
803 #define FOO 1
804 int a = FOO;
805 #pragma once
806 int b = 1;
807   )cpp");
808 
809   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
810   EXPECT_THAT(
811       Buffer.expansionStartingAt(&DefineFoo.front()),
812       ValueIs(IsExpansion(SameRange(DefineFoo),
813                           SameRange(findExpanded("int a").take_front(0)))));
814   // Only the first spelled token should be found.
815   for (const auto &T : DefineFoo.drop_front())
816     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
817 
818   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
819   EXPECT_THAT(
820       Buffer.expansionStartingAt(&PragmaOnce.front()),
821       ValueIs(IsExpansion(SameRange(PragmaOnce),
822                           SameRange(findExpanded("int b").take_front(0)))));
823   // Only the first spelled token should be found.
824   for (const auto &T : PragmaOnce.drop_front())
825     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
826 }
827 
828 TEST_F(TokenBufferTest, TokensToFileRange) {
829   addFile("./foo.h", "token_from_header");
830   llvm::Annotations Code(R"cpp(
831     #define FOO token_from_expansion
832     #include "./foo.h"
833     $all[[$i[[int]] a = FOO;]]
834   )cpp");
835   recordTokens(Code.code());
836 
837   auto &SM = *SourceMgr;
838 
839   // Two simple examples.
840   auto Int = findExpanded("int").front();
841   auto Semi = findExpanded(";").front();
842   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
843                                      Code.range("i").End));
844   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
845             FileRange(SM.getMainFileID(), Code.range("all").Begin,
846                       Code.range("all").End));
847   // We don't test assertion failures because death tests are slow.
848 }
849 
850 TEST_F(TokenBufferTest, MacroExpansions) {
851   llvm::Annotations Code(R"cpp(
852     #define FOO B
853     #define FOO2 BA
854     #define CALL(X) int X
855     #define G CALL(FOO2)
856     int B;
857     $macro[[FOO]];
858     $macro[[CALL]](A);
859     $macro[[G]];
860   )cpp");
861   recordTokens(Code.code());
862   auto &SM = *SourceMgr;
863   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
864   std::vector<FileRange> ExpectedMacroRanges;
865   for (auto Range : Code.ranges("macro"))
866     ExpectedMacroRanges.push_back(
867         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
868   std::vector<FileRange> ActualMacroRanges;
869   for (auto Expansion : Expansions)
870     ActualMacroRanges.push_back(Expansion->range(SM));
871   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
872 }
873 
874 TEST_F(TokenBufferTest, Touching) {
875   llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
876   recordTokens(Code.code());
877 
878   auto Touching = [&](int Index) {
879     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
880                                                    Code.points()[Index]);
881     return spelledTokensTouching(Loc, Buffer);
882   };
883   auto Identifier = [&](int Index) {
884     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
885                                                    Code.points()[Index]);
886     const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
887     return Tok ? Tok->text(*SourceMgr) : "";
888   };
889 
890   EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
891   EXPECT_EQ(Identifier(0), "");
892   EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
893   EXPECT_EQ(Identifier(1), "");
894   EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
895   EXPECT_EQ(Identifier(2), "");
896 
897   EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
898   EXPECT_EQ(Identifier(3), "ab");
899   EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
900   EXPECT_EQ(Identifier(4), "ab");
901 
902   EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
903   EXPECT_EQ(Identifier(5), "ab");
904 
905   EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
906   EXPECT_EQ(Identifier(6), "");
907 
908   EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
909   EXPECT_EQ(Identifier(7), "");
910 
911   ASSERT_EQ(Code.points().size(), 8u);
912 }
913 
914 TEST_F(TokenBufferTest, ExpandedBySpelled) {
915   recordTokens(R"cpp(
916     a1 a2 a3 b1 b2
917   )cpp");
918   // Sanity check: expanded and spelled tokens are stored separately.
919   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
920   // Searching for subranges of expanded tokens should give the corresponding
921   // spelled ones.
922   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
923               ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
924   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
925               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
926   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
927               ElementsAre(SameRange(findExpanded("b1 b2"))));
928 
929   // Test search on simple macro expansions.
930   recordTokens(R"cpp(
931     #define A a1 a2 a3
932     #define B b1 b2
933 
934     A split B
935   )cpp");
936   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
937               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
938   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
939               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
940   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
941               ElementsAre(SameRange(findExpanded("b1 b2"))));
942 
943   // Ranges not fully covering macro expansions should fail.
944   recordTokens(R"cpp(
945     #define ID(x) x
946 
947     ID(a)
948   )cpp");
949   // Spelled don't cover entire mapping (missing ID token) -> empty result
950   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
951   // Spelled don't cover entire mapping (missing ) token) -> empty result
952   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
953 
954   // Recursive macro invocations.
955   recordTokens(R"cpp(
956     #define ID(x) x
957     #define B b1 b2
958 
959     ID(ID(ID(a1) a2 a3)) split ID(B)
960   )cpp");
961 
962   EXPECT_THAT(
963       Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
964       ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
965   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
966               ElementsAre(SameRange(findExpanded("b1 b2"))));
967   EXPECT_THAT(Buffer.expandedForSpelled(
968                   findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
969               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
970   // FIXME: these should succeed, but we do not support macro arguments yet.
971   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
972   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
973               IsEmpty());
974 
975   // Empty macro expansions.
976   recordTokens(R"cpp(
977     #define EMPTY
978     #define ID(X) X
979 
980     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
981     EMPTY EMPTY ID(4 5 6) split2
982     ID(7 8 9) EMPTY EMPTY
983   )cpp");
984   // Covered by empty expansions on one of both of the sides.
985   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
986               ElementsAre(SameRange(findExpanded("1 2 3"))));
987   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
988               ElementsAre(SameRange(findExpanded("4 5 6"))));
989   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
990               ElementsAre(SameRange(findExpanded("7 8 9"))));
991   // Including the empty macro expansions on the side.
992   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
993               ElementsAre(SameRange(findExpanded("1 2 3"))));
994   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
995               ElementsAre(SameRange(findExpanded("1 2 3"))));
996   EXPECT_THAT(
997       Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
998       ElementsAre(SameRange(findExpanded("1 2 3"))));
999 
1000   // Empty mappings coming from various directives.
1001   recordTokens(R"cpp(
1002     #define ID(X) X
1003     ID(1)
1004     #pragma lalala
1005     not_mapped
1006   )cpp");
1007   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
1008               IsEmpty());
1009   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
1010               IsEmpty());
1011 
1012   // Empty macro expansion.
1013   recordTokens(R"cpp(
1014     #define EMPTY
1015     EMPTY int a = 100;
1016   )cpp");
1017   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
1018               IsEmpty());
1019 }
1020 
1021 } // namespace
1022