xref: /llvm-project/clang/unittests/Tooling/Syntax/TokensTest.cpp (revision 8c2cf499e6119be8f3f1a0d42c4bb7e45b0d615d)
1 //===- TokensTest.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/VirtualFileSystem.h"
39 #include "llvm/Support/raw_os_ostream.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Testing/Support/Annotations.h"
42 #include "llvm/Testing/Support/SupportHelpers.h"
43 #include "gmock/gmock.h"
44 #include <cassert>
45 #include <cstdlib>
46 #include <gmock/gmock.h>
47 #include <gtest/gtest.h>
48 #include <memory>
49 #include <ostream>
50 #include <string>
51 
52 using namespace clang;
53 using namespace clang::syntax;
54 
55 using llvm::ValueIs;
56 using ::testing::AllOf;
57 using ::testing::Contains;
58 using ::testing::ElementsAre;
59 using ::testing::Field;
60 using ::testing::Matcher;
61 using ::testing::Not;
62 using ::testing::StartsWith;
63 
64 namespace {
65 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
66 // argument.
67 MATCHER_P(SameRange, A, "") {
68   return A.begin() == arg.begin() && A.end() == arg.end();
69 }
70 
71 Matcher<TokenBuffer::Expansion>
72 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
73             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
74   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
75                Field(&TokenBuffer::Expansion::Expanded, Expanded));
76 }
77 // Matchers for syntax::Token.
78 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
79 MATCHER_P2(HasText, Text, SourceMgr, "") {
80   return arg.text(*SourceMgr) == Text;
81 }
82 /// Checks the start and end location of a token are equal to SourceRng.
83 MATCHER_P(RangeIs, SourceRng, "") {
84   return arg.location() == SourceRng.first &&
85          arg.endLocation() == SourceRng.second;
86 }
87 
88 class TokenCollectorTest : public ::testing::Test {
89 public:
90   /// Run the clang frontend, collect the preprocessed tokens from the frontend
91   /// invocation and store them in this->Buffer.
92   /// This also clears SourceManager before running the compiler.
93   void recordTokens(llvm::StringRef Code) {
94     class RecordTokens : public ASTFrontendAction {
95     public:
96       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
97 
98       bool BeginSourceFileAction(CompilerInstance &CI) override {
99         assert(!Collector && "expected only a single call to BeginSourceFile");
100         Collector.emplace(CI.getPreprocessor());
101         return true;
102       }
103       void EndSourceFileAction() override {
104         assert(Collector && "BeginSourceFileAction was never called");
105         Result = std::move(*Collector).consume();
106       }
107 
108       std::unique_ptr<ASTConsumer>
109       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
110         return std::make_unique<ASTConsumer>();
111       }
112 
113     private:
114       TokenBuffer &Result;
115       llvm::Optional<TokenCollector> Collector;
116     };
117 
118     constexpr const char *FileName = "./input.cpp";
119     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
120     // Prepare to run a compiler.
121     if (!Diags->getClient())
122       Diags->setClient(new IgnoringDiagConsumer);
123     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
124                                       FileName};
125     auto CI = createInvocationFromCommandLine(Args, Diags, FS);
126     assert(CI);
127     CI->getFrontendOpts().DisableFree = false;
128     CI->getPreprocessorOpts().addRemappedFile(
129         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
130     CompilerInstance Compiler;
131     Compiler.setInvocation(std::move(CI));
132     Compiler.setDiagnostics(Diags.get());
133     Compiler.setFileManager(FileMgr.get());
134     Compiler.setSourceManager(SourceMgr.get());
135 
136     this->Buffer = TokenBuffer(*SourceMgr);
137     RecordTokens Recorder(this->Buffer);
138     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
139         << "failed to run the frontend";
140   }
141 
142   /// Record the tokens and return a test dump of the resulting buffer.
143   std::string collectAndDump(llvm::StringRef Code) {
144     recordTokens(Code);
145     return Buffer.dumpForTests();
146   }
147 
148   // Adds a file to the test VFS.
149   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
150     if (!FS->addFile(Path, time_t(),
151                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
152       ADD_FAILURE() << "could not add a file to VFS: " << Path;
153     }
154   }
155 
156   /// Add a new file, run syntax::tokenize() on the range if any, run it on the
157   /// whole file otherwise and return the results.
158   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
159     llvm::Annotations Annot(Text);
160     auto FID = SourceMgr->createFileID(
161         llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
162     // FIXME: pass proper LangOptions.
163     if (Annot.ranges().empty())
164       return syntax::tokenize(FID, *SourceMgr, LangOptions());
165     return syntax::tokenize(
166         syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
167         *SourceMgr, LangOptions());
168   }
169 
170   // Specialized versions of matchers that hide the SourceManager from clients.
171   Matcher<syntax::Token> HasText(std::string Text) const {
172     return ::HasText(Text, SourceMgr.get());
173   }
174   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
175     std::pair<SourceLocation, SourceLocation> Ls;
176     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
177                    .getLocWithOffset(R.Begin);
178     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
179                     .getLocWithOffset(R.End);
180     return ::RangeIs(Ls);
181   }
182 
183   /// Finds a subrange in O(n * m).
184   template <class T, class U, class Eq>
185   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
186                                  llvm::ArrayRef<T> Range, Eq F) {
187     for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
188       auto It = Begin;
189       for (auto ItSub = Subrange.begin();
190            ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
191         if (!F(*ItSub, *It))
192           goto continue_outer;
193       }
194       return llvm::makeArrayRef(Begin, It);
195     continue_outer:;
196     }
197     return llvm::makeArrayRef(Range.end(), Range.end());
198   }
199 
200   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
201   /// The match should be unique. \p Query is a whitespace-separated list of
202   /// tokens to search for.
203   llvm::ArrayRef<syntax::Token>
204   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
205     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
206     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
207     if (QueryTokens.empty()) {
208       ADD_FAILURE() << "will not look for an empty list of tokens";
209       std::abort();
210     }
211     // An equality test for search.
212     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
213       return Q == T.text(*SourceMgr);
214     };
215     // Find a match.
216     auto Found =
217         findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
218     if (Found.begin() == Tokens.end()) {
219       ADD_FAILURE() << "could not find the subrange for " << Query;
220       std::abort();
221     }
222     // Check that the match is unique.
223     if (findSubrange(llvm::makeArrayRef(QueryTokens),
224                      llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
225             .begin() != Tokens.end()) {
226       ADD_FAILURE() << "match is not unique for " << Query;
227       std::abort();
228     }
229     return Found;
230   };
231 
232   // Specialized versions of findTokenRange for expanded and spelled tokens.
233   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
234     return findTokenRange(Query, Buffer.expandedTokens());
235   }
236   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
237                                             FileID File = FileID()) {
238     if (!File.isValid())
239       File = SourceMgr->getMainFileID();
240     return findTokenRange(Query, Buffer.spelledTokens(File));
241   }
242 
243   // Data fields.
244   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
245       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
246   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
247       new llvm::vfs::InMemoryFileSystem;
248   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
249       new FileManager(FileSystemOptions(), FS);
250   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
251       new SourceManager(*Diags, *FileMgr);
252   /// Contains last result of calling recordTokens().
253   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
254 };
255 
256 TEST_F(TokenCollectorTest, RawMode) {
257   EXPECT_THAT(tokenize("int main() {}"),
258               ElementsAre(Kind(tok::kw_int),
259                           AllOf(HasText("main"), Kind(tok::identifier)),
260                           Kind(tok::l_paren), Kind(tok::r_paren),
261                           Kind(tok::l_brace), Kind(tok::r_brace)));
262   // Comments are ignored for now.
263   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
264               ElementsAre(Kind(tok::kw_int),
265                           AllOf(HasText("a"), Kind(tok::identifier)),
266                           Kind(tok::semi)));
267   EXPECT_THAT(tokenize("int [[main() {]]}"),
268               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
269                           Kind(tok::l_paren), Kind(tok::r_paren),
270                           Kind(tok::l_brace)));
271   EXPECT_THAT(tokenize("int [[main() {   ]]}"),
272               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
273                           Kind(tok::l_paren), Kind(tok::r_paren),
274                           Kind(tok::l_brace)));
275   // First token is partially parsed, last token is fully included even though
276   // only a part of it is contained in the range.
277   EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
278               ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
279                           Kind(tok::l_paren), Kind(tok::r_paren),
280                           Kind(tok::l_brace), Kind(tok::kw_return)));
281 }
282 
283 TEST_F(TokenCollectorTest, Basic) {
284   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
285       {"int main() {}",
286        R"(expanded tokens:
287   int main ( ) { }
288 file './input.cpp'
289   spelled tokens:
290     int main ( ) { }
291   no mappings.
292 )"},
293       // All kinds of whitespace are ignored.
294       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
295        R"(expanded tokens:
296   int main ( ) { }
297 file './input.cpp'
298   spelled tokens:
299     int main ( ) { }
300   no mappings.
301 )"},
302       // Annotation tokens are ignored.
303       {R"cpp(
304         #pragma GCC visibility push (public)
305         #pragma GCC visibility pop
306       )cpp",
307        R"(expanded tokens:
308   <empty>
309 file './input.cpp'
310   spelled tokens:
311     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
312   mappings:
313     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
314 )"},
315       // Empty files should not crash.
316       {R"cpp()cpp", R"(expanded tokens:
317   <empty>
318 file './input.cpp'
319   spelled tokens:
320     <empty>
321   no mappings.
322 )"},
323       // Should not crash on errors inside '#define' directives. Error is that
324       // stringification (#B) does not refer to a macro parameter.
325       {
326           R"cpp(
327 a
328 #define MACRO() A #B
329 )cpp",
330           R"(expanded tokens:
331   a
332 file './input.cpp'
333   spelled tokens:
334     a # define MACRO ( ) A # B
335   mappings:
336     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
337 )"}};
338   for (auto &Test : TestCases)
339     EXPECT_EQ(collectAndDump(Test.first), Test.second)
340         << collectAndDump(Test.first);
341 }
342 
343 TEST_F(TokenCollectorTest, Locations) {
344   // Check locations of the tokens.
345   llvm::Annotations Code(R"cpp(
346     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
347   )cpp");
348   recordTokens(Code.code());
349   // Check expanded tokens.
350   EXPECT_THAT(
351       Buffer.expandedTokens(),
352       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
353                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
354                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
355                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
356                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
357                   Kind(tok::eof)));
358   // Check spelled tokens.
359   EXPECT_THAT(
360       Buffer.spelledTokens(SourceMgr->getMainFileID()),
361       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
362                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
363                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
364                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
365                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
366 }
367 
368 TEST_F(TokenCollectorTest, MacroDirectives) {
369   // Macro directives are not stored anywhere at the moment.
370   std::string Code = R"cpp(
371     #define FOO a
372     #include "unresolved_file.h"
373     #undef FOO
374     #ifdef X
375     #else
376     #endif
377     #ifndef Y
378     #endif
379     #if 1
380     #elif 2
381     #else
382     #endif
383     #pragma once
384     #pragma something lalala
385 
386     int a;
387   )cpp";
388   std::string Expected =
389       "expanded tokens:\n"
390       "  int a ;\n"
391       "file './input.cpp'\n"
392       "  spelled tokens:\n"
393       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
394       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
395       "# endif # pragma once # pragma something lalala int a ;\n"
396       "  mappings:\n"
397       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
398   EXPECT_EQ(collectAndDump(Code), Expected);
399 }
400 
401 TEST_F(TokenCollectorTest, MacroReplacements) {
402   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
403       // A simple object-like macro.
404       {R"cpp(
405     #define INT int const
406     INT a;
407   )cpp",
408        R"(expanded tokens:
409   int const a ;
410 file './input.cpp'
411   spelled tokens:
412     # define INT int const INT a ;
413   mappings:
414     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
415     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
416 )"},
417       // A simple function-like macro.
418       {R"cpp(
419     #define INT(a) const int
420     INT(10+10) a;
421   )cpp",
422        R"(expanded tokens:
423   const int a ;
424 file './input.cpp'
425   spelled tokens:
426     # define INT ( a ) const int INT ( 10 + 10 ) a ;
427   mappings:
428     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
429     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
430 )"},
431       // Recursive macro replacements.
432       {R"cpp(
433     #define ID(X) X
434     #define INT int const
435     ID(ID(INT)) a;
436   )cpp",
437        R"(expanded tokens:
438   int const a ;
439 file './input.cpp'
440   spelled tokens:
441     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
442   mappings:
443     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
444     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
445 )"},
446       // A little more complicated recursive macro replacements.
447       {R"cpp(
448     #define ADD(X, Y) X+Y
449     #define MULT(X, Y) X*Y
450 
451     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
452   )cpp",
453        "expanded tokens:\n"
454        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
455        "file './input.cpp'\n"
456        "  spelled tokens:\n"
457        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
458        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
459        "  mappings:\n"
460        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
461        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
462       // Empty macro replacement.
463       // FIXME: the #define directives should not be glued together.
464       {R"cpp(
465     #define EMPTY
466     #define EMPTY_FUNC(X)
467     EMPTY
468     EMPTY_FUNC(1+2+3)
469     )cpp",
470        R"(expanded tokens:
471   <empty>
472 file './input.cpp'
473   spelled tokens:
474     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
475   mappings:
476     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
477     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
478     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
479 )"},
480       // File ends with a macro replacement.
481       {R"cpp(
482     #define FOO 10+10;
483     int a = FOO
484     )cpp",
485        R"(expanded tokens:
486   int a = 10 + 10 ;
487 file './input.cpp'
488   spelled tokens:
489     # define FOO 10 + 10 ; int a = FOO
490   mappings:
491     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
492     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
493 )"}};
494 
495   for (auto &Test : TestCases)
496     EXPECT_EQ(Test.second, collectAndDump(Test.first))
497         << collectAndDump(Test.first);
498 }
499 
500 TEST_F(TokenCollectorTest, SpecialTokens) {
501   // Tokens coming from concatenations.
502   recordTokens(R"cpp(
503     #define CONCAT(a, b) a ## b
504     int a = CONCAT(1, 2);
505   )cpp");
506   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
507               Contains(HasText("12")));
508   // Multi-line tokens with slashes at the end.
509   recordTokens("i\\\nn\\\nt");
510   EXPECT_THAT(Buffer.expandedTokens(),
511               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
512                           Kind(tok::eof)));
513   // FIXME: test tokens with digraphs and UCN identifiers.
514 }
515 
516 TEST_F(TokenCollectorTest, LateBoundTokens) {
517   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
518   // but we choose to record them as a single token (for now).
519   llvm::Annotations Code(R"cpp(
520     template <class T>
521     struct foo { int a; };
522     int bar = foo<foo<int$br[[>>]]().a;
523     int baz = 10 $op[[>>]] 2;
524   )cpp");
525   recordTokens(Code.code());
526   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
527               AllOf(Contains(AllOf(Kind(tok::greatergreater),
528                                    RangeIs(Code.range("br")))),
529                     Contains(AllOf(Kind(tok::greatergreater),
530                                    RangeIs(Code.range("op"))))));
531 }
532 
533 TEST_F(TokenCollectorTest, DelayedParsing) {
534   llvm::StringLiteral Code = R"cpp(
535     struct Foo {
536       int method() {
537         // Parser will visit method bodies and initializers multiple times, but
538         // TokenBuffer should only record the first walk over the tokens;
539         return 100;
540       }
541       int a = 10;
542 
543       struct Subclass {
544         void foo() {
545           Foo().method();
546         }
547       };
548     };
549   )cpp";
550   std::string ExpectedTokens =
551       "expanded tokens:\n"
552       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
553       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
554   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
555 }
556 
557 TEST_F(TokenCollectorTest, MultiFile) {
558   addFile("./foo.h", R"cpp(
559     #define ADD(X, Y) X+Y
560     int a = 100;
561     #include "bar.h"
562   )cpp");
563   addFile("./bar.h", R"cpp(
564     int b = ADD(1, 2);
565     #define MULT(X, Y) X*Y
566   )cpp");
567   llvm::StringLiteral Code = R"cpp(
568     #include "foo.h"
569     int c = ADD(1, MULT(2,3));
570   )cpp";
571 
572   std::string Expected = R"(expanded tokens:
573   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
574 file './input.cpp'
575   spelled tokens:
576     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
577   mappings:
578     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
579     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
580 file './foo.h'
581   spelled tokens:
582     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
583   mappings:
584     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
585     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
586 file './bar.h'
587   spelled tokens:
588     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
589   mappings:
590     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
591     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
592 )";
593 
594   EXPECT_EQ(Expected, collectAndDump(Code))
595       << "input: " << Code << "\nresults: " << collectAndDump(Code);
596 }
597 
598 class TokenBufferTest : public TokenCollectorTest {};
599 
600 TEST_F(TokenBufferTest, SpelledByExpanded) {
601   recordTokens(R"cpp(
602     a1 a2 a3 b1 b2
603   )cpp");
604 
605   // Sanity check: expanded and spelled tokens are stored separately.
606   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
607   // Searching for subranges of expanded tokens should give the corresponding
608   // spelled ones.
609   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
610               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
611   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
612               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
613   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
614               ValueIs(SameRange(findSpelled("b1 b2"))));
615 
616   // Test search on simple macro expansions.
617   recordTokens(R"cpp(
618     #define A a1 a2 a3
619     #define B b1 b2
620 
621     A split B
622   )cpp");
623   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
624               ValueIs(SameRange(findSpelled("A split B"))));
625   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
626               ValueIs(SameRange(findSpelled("A split").drop_back())));
627   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
628               ValueIs(SameRange(findSpelled("split B").drop_front())));
629   // Ranges not fully covering macro invocations should fail.
630   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
631   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
632   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
633             llvm::None);
634 
635   // Recursive macro invocations.
636   recordTokens(R"cpp(
637     #define ID(x) x
638     #define B b1 b2
639 
640     ID(ID(ID(a1) a2 a3)) split ID(B)
641   )cpp");
642 
643   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
644               ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
645   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
646               ValueIs(SameRange(findSpelled("ID ( B )"))));
647   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
648               ValueIs(SameRange(findSpelled(
649                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
650   // Ranges crossing macro call boundaries.
651   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
652             llvm::None);
653   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
654             llvm::None);
655   // FIXME: next two examples should map to macro arguments, but currently they
656   //        fail.
657   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
658   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
659 
660   // Empty macro expansions.
661   recordTokens(R"cpp(
662     #define EMPTY
663     #define ID(X) X
664 
665     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
666     EMPTY EMPTY ID(4 5 6) split2
667     ID(7 8 9) EMPTY EMPTY
668   )cpp");
669   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
670               ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
671   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
672               ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
673   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
674               ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
675 
676   // Empty mappings coming from various directives.
677   recordTokens(R"cpp(
678     #define ID(X) X
679     ID(1)
680     #pragma lalala
681     not_mapped
682   )cpp");
683   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
684               ValueIs(SameRange(findSpelled("not_mapped"))));
685 }
686 
687 TEST_F(TokenBufferTest, ExpandedTokensForRange) {
688   recordTokens(R"cpp(
689     #define SIGN(X) X##_washere
690     A SIGN(B) C SIGN(D) E SIGN(F) G
691   )cpp");
692 
693   SourceRange R(findExpanded("C").front().location(),
694                 findExpanded("F_washere").front().location());
695   // Sanity check: expanded and spelled tokens are stored separately.
696   EXPECT_THAT(Buffer.expandedTokens(R),
697               SameRange(findExpanded("C D_washere E F_washere")));
698   EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
699 }
700 
701 TEST_F(TokenBufferTest, ExpansionStartingAt) {
702   // Object-like macro expansions.
703   recordTokens(R"cpp(
704     #define FOO 3+4
705     int a = FOO 1;
706     int b = FOO 2;
707   )cpp");
708 
709   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
710   EXPECT_THAT(
711       Buffer.expansionStartingAt(Foo1.data()),
712       ValueIs(IsExpansion(SameRange(Foo1),
713                           SameRange(findExpanded("3 + 4 1").drop_back()))));
714 
715   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
716   EXPECT_THAT(
717       Buffer.expansionStartingAt(Foo2.data()),
718       ValueIs(IsExpansion(SameRange(Foo2),
719                           SameRange(findExpanded("3 + 4 2").drop_back()))));
720 
721   // Function-like macro expansions.
722   recordTokens(R"cpp(
723     #define ID(X) X
724     int a = ID(1+2+3);
725     int b = ID(ID(2+3+4));
726   )cpp");
727 
728   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
729   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
730               ValueIs(IsExpansion(SameRange(ID1),
731                                   SameRange(findExpanded("1 + 2 + 3")))));
732   // Only the first spelled token should be found.
733   for (const auto &T : ID1.drop_front())
734     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
735 
736   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
737   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
738               ValueIs(IsExpansion(SameRange(ID2),
739                                   SameRange(findExpanded("2 + 3 + 4")))));
740   // Only the first spelled token should be found.
741   for (const auto &T : ID2.drop_front())
742     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
743 
744   // PP directives.
745   recordTokens(R"cpp(
746 #define FOO 1
747 int a = FOO;
748 #pragma once
749 int b = 1;
750   )cpp");
751 
752   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
753   EXPECT_THAT(
754       Buffer.expansionStartingAt(&DefineFoo.front()),
755       ValueIs(IsExpansion(SameRange(DefineFoo),
756                           SameRange(findExpanded("int a").take_front(0)))));
757   // Only the first spelled token should be found.
758   for (const auto &T : DefineFoo.drop_front())
759     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
760 
761   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
762   EXPECT_THAT(
763       Buffer.expansionStartingAt(&PragmaOnce.front()),
764       ValueIs(IsExpansion(SameRange(PragmaOnce),
765                           SameRange(findExpanded("int b").take_front(0)))));
766   // Only the first spelled token should be found.
767   for (const auto &T : PragmaOnce.drop_front())
768     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
769 }
770 
771 TEST_F(TokenBufferTest, TokensToFileRange) {
772   addFile("./foo.h", "token_from_header");
773   llvm::Annotations Code(R"cpp(
774     #define FOO token_from_expansion
775     #include "./foo.h"
776     $all[[$i[[int]] a = FOO;]]
777   )cpp");
778   recordTokens(Code.code());
779 
780   auto &SM = *SourceMgr;
781 
782   // Two simple examples.
783   auto Int = findExpanded("int").front();
784   auto Semi = findExpanded(";").front();
785   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
786                                      Code.range("i").End));
787   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
788             FileRange(SM.getMainFileID(), Code.range("all").Begin,
789                       Code.range("all").End));
790   // We don't test assertion failures because death tests are slow.
791 }
792 
793 TEST_F(TokenBufferTest, MacroExpansions) {
794   llvm::Annotations Code(R"cpp(
795     #define FOO B
796     #define FOO2 BA
797     #define CALL(X) int X
798     #define G CALL(FOO2)
799     int B;
800     $macro[[FOO]];
801     $macro[[CALL]](A);
802     $macro[[G]];
803   )cpp");
804   recordTokens(Code.code());
805   auto &SM = *SourceMgr;
806   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
807   std::vector<FileRange> ExpectedMacroRanges;
808   for (auto Range : Code.ranges("macro"))
809     ExpectedMacroRanges.push_back(
810         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
811   std::vector<FileRange> ActualMacroRanges;
812   for (auto Expansion : Expansions)
813     ActualMacroRanges.push_back(Expansion->range(SM));
814   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
815 }
816 
817 TEST_F(TokenBufferTest, Touching) {
818   llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
819   recordTokens(Code.code());
820 
821   auto Touching = [&](int Index) {
822     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
823                                                    Code.points()[Index]);
824     return spelledTokensTouching(Loc, Buffer);
825   };
826   auto Identifier = [&](int Index) {
827     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
828                                                    Code.points()[Index]);
829     const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
830     return Tok ? Tok->text(*SourceMgr) : "";
831   };
832 
833   EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
834   EXPECT_EQ(Identifier(0), "");
835   EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
836   EXPECT_EQ(Identifier(1), "");
837   EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
838   EXPECT_EQ(Identifier(2), "");
839 
840   EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
841   EXPECT_EQ(Identifier(3), "ab");
842   EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
843   EXPECT_EQ(Identifier(4), "ab");
844 
845   EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
846   EXPECT_EQ(Identifier(5), "ab");
847 
848   EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
849   EXPECT_EQ(Identifier(6), "");
850 
851   EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
852   EXPECT_EQ(Identifier(7), "");
853 
854   ASSERT_EQ(Code.points().size(), 8u);
855 }
856 
857 } // namespace
858