1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include "gmock/gmock.h" 44 #include <cassert> 45 #include <cstdlib> 46 #include <gmock/gmock.h> 47 #include <gtest/gtest.h> 48 #include <memory> 49 #include <ostream> 50 #include <string> 51 52 using namespace clang; 53 using namespace clang::syntax; 54 55 using llvm::ValueIs; 56 using ::testing::_; 57 using ::testing::AllOf; 58 using ::testing::Contains; 59 using ::testing::ElementsAre; 60 using ::testing::Field; 61 using ::testing::IsEmpty; 62 using ::testing::Matcher; 63 using ::testing::Not; 64 using ::testing::Pointee; 65 using ::testing::StartsWith; 66 67 namespace { 68 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 69 // argument. 70 MATCHER_P(SameRange, A, "") { 71 return A.begin() == arg.begin() && A.end() == arg.end(); 72 } 73 74 Matcher<TokenBuffer::Expansion> 75 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, 76 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { 77 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), 78 Field(&TokenBuffer::Expansion::Expanded, Expanded)); 79 } 80 // Matchers for syntax::Token. 81 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 82 MATCHER_P2(HasText, Text, SourceMgr, "") { 83 return arg.text(*SourceMgr) == Text; 84 } 85 /// Checks the start and end location of a token are equal to SourceRng. 86 MATCHER_P(RangeIs, SourceRng, "") { 87 return arg.location() == SourceRng.first && 88 arg.endLocation() == SourceRng.second; 89 } 90 91 class TokenCollectorTest : public ::testing::Test { 92 public: 93 /// Run the clang frontend, collect the preprocessed tokens from the frontend 94 /// invocation and store them in this->Buffer. 95 /// This also clears SourceManager before running the compiler. 96 void recordTokens(llvm::StringRef Code) { 97 class RecordTokens : public ASTFrontendAction { 98 public: 99 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 100 101 bool BeginSourceFileAction(CompilerInstance &CI) override { 102 assert(!Collector && "expected only a single call to BeginSourceFile"); 103 Collector.emplace(CI.getPreprocessor()); 104 return true; 105 } 106 void EndSourceFileAction() override { 107 assert(Collector && "BeginSourceFileAction was never called"); 108 Result = std::move(*Collector).consume(); 109 } 110 111 std::unique_ptr<ASTConsumer> 112 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 113 return std::make_unique<ASTConsumer>(); 114 } 115 116 private: 117 TokenBuffer &Result; 118 llvm::Optional<TokenCollector> Collector; 119 }; 120 121 constexpr const char *FileName = "./input.cpp"; 122 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 123 // Prepare to run a compiler. 124 if (!Diags->getClient()) 125 Diags->setClient(new IgnoringDiagConsumer); 126 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 127 FileName}; 128 auto CI = createInvocationFromCommandLine(Args, Diags, FS); 129 assert(CI); 130 CI->getFrontendOpts().DisableFree = false; 131 CI->getPreprocessorOpts().addRemappedFile( 132 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 133 CompilerInstance Compiler; 134 Compiler.setInvocation(std::move(CI)); 135 Compiler.setDiagnostics(Diags.get()); 136 Compiler.setFileManager(FileMgr.get()); 137 Compiler.setSourceManager(SourceMgr.get()); 138 139 this->Buffer = TokenBuffer(*SourceMgr); 140 RecordTokens Recorder(this->Buffer); 141 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 142 << "failed to run the frontend"; 143 } 144 145 /// Record the tokens and return a test dump of the resulting buffer. 146 std::string collectAndDump(llvm::StringRef Code) { 147 recordTokens(Code); 148 return Buffer.dumpForTests(); 149 } 150 151 // Adds a file to the test VFS. 152 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 153 if (!FS->addFile(Path, time_t(), 154 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 155 ADD_FAILURE() << "could not add a file to VFS: " << Path; 156 } 157 } 158 159 /// Add a new file, run syntax::tokenize() on the range if any, run it on the 160 /// whole file otherwise and return the results. 161 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 162 llvm::Annotations Annot(Text); 163 auto FID = SourceMgr->createFileID( 164 llvm::MemoryBuffer::getMemBufferCopy(Annot.code())); 165 // FIXME: pass proper LangOptions. 166 if (Annot.ranges().empty()) 167 return syntax::tokenize(FID, *SourceMgr, LangOptions()); 168 return syntax::tokenize( 169 syntax::FileRange(FID, Annot.range().Begin, Annot.range().End), 170 *SourceMgr, LangOptions()); 171 } 172 173 // Specialized versions of matchers that hide the SourceManager from clients. 174 Matcher<syntax::Token> HasText(std::string Text) const { 175 return ::HasText(Text, SourceMgr.get()); 176 } 177 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 178 std::pair<SourceLocation, SourceLocation> Ls; 179 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 180 .getLocWithOffset(R.Begin); 181 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 182 .getLocWithOffset(R.End); 183 return ::RangeIs(Ls); 184 } 185 186 /// Finds a subrange in O(n * m). 187 template <class T, class U, class Eq> 188 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 189 llvm::ArrayRef<T> Range, Eq F) { 190 assert(Subrange.size() >= 1); 191 if (Range.size() < Subrange.size()) 192 return llvm::makeArrayRef(Range.end(), Range.end()); 193 for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); 194 Begin <= Last; ++Begin) { 195 auto It = Begin; 196 for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); 197 ++ItSub, ++It) { 198 if (!F(*ItSub, *It)) 199 goto continue_outer; 200 } 201 return llvm::makeArrayRef(Begin, It); 202 continue_outer:; 203 } 204 return llvm::makeArrayRef(Range.end(), Range.end()); 205 } 206 207 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 208 /// The match should be unique. \p Query is a whitespace-separated list of 209 /// tokens to search for. 210 llvm::ArrayRef<syntax::Token> 211 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 212 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 213 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 214 if (QueryTokens.empty()) { 215 ADD_FAILURE() << "will not look for an empty list of tokens"; 216 std::abort(); 217 } 218 // An equality test for search. 219 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 220 return Q == T.text(*SourceMgr); 221 }; 222 // Find a match. 223 auto Found = 224 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 225 if (Found.begin() == Tokens.end()) { 226 ADD_FAILURE() << "could not find the subrange for " << Query; 227 std::abort(); 228 } 229 // Check that the match is unique. 230 if (findSubrange(llvm::makeArrayRef(QueryTokens), 231 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 232 .begin() != Tokens.end()) { 233 ADD_FAILURE() << "match is not unique for " << Query; 234 std::abort(); 235 } 236 return Found; 237 }; 238 239 // Specialized versions of findTokenRange for expanded and spelled tokens. 240 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 241 return findTokenRange(Query, Buffer.expandedTokens()); 242 } 243 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 244 FileID File = FileID()) { 245 if (!File.isValid()) 246 File = SourceMgr->getMainFileID(); 247 return findTokenRange(Query, Buffer.spelledTokens(File)); 248 } 249 250 // Data fields. 251 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 252 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 253 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 254 new llvm::vfs::InMemoryFileSystem; 255 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 256 new FileManager(FileSystemOptions(), FS); 257 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 258 new SourceManager(*Diags, *FileMgr); 259 /// Contains last result of calling recordTokens(). 260 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 261 }; 262 263 TEST_F(TokenCollectorTest, RawMode) { 264 EXPECT_THAT(tokenize("int main() {}"), 265 ElementsAre(Kind(tok::kw_int), 266 AllOf(HasText("main"), Kind(tok::identifier)), 267 Kind(tok::l_paren), Kind(tok::r_paren), 268 Kind(tok::l_brace), Kind(tok::r_brace))); 269 // Comments are ignored for now. 270 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 271 ElementsAre(Kind(tok::kw_int), 272 AllOf(HasText("a"), Kind(tok::identifier)), 273 Kind(tok::semi))); 274 EXPECT_THAT(tokenize("int [[main() {]]}"), 275 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 276 Kind(tok::l_paren), Kind(tok::r_paren), 277 Kind(tok::l_brace))); 278 EXPECT_THAT(tokenize("int [[main() { ]]}"), 279 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 280 Kind(tok::l_paren), Kind(tok::r_paren), 281 Kind(tok::l_brace))); 282 // First token is partially parsed, last token is fully included even though 283 // only a part of it is contained in the range. 284 EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"), 285 ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)), 286 Kind(tok::l_paren), Kind(tok::r_paren), 287 Kind(tok::l_brace), Kind(tok::kw_return))); 288 } 289 290 TEST_F(TokenCollectorTest, Basic) { 291 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 292 {"int main() {}", 293 R"(expanded tokens: 294 int main ( ) { } 295 file './input.cpp' 296 spelled tokens: 297 int main ( ) { } 298 no mappings. 299 )"}, 300 // All kinds of whitespace are ignored. 301 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 302 R"(expanded tokens: 303 int main ( ) { } 304 file './input.cpp' 305 spelled tokens: 306 int main ( ) { } 307 no mappings. 308 )"}, 309 // Annotation tokens are ignored. 310 {R"cpp( 311 #pragma GCC visibility push (public) 312 #pragma GCC visibility pop 313 )cpp", 314 R"(expanded tokens: 315 <empty> 316 file './input.cpp' 317 spelled tokens: 318 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 319 mappings: 320 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 321 )"}, 322 // Empty files should not crash. 323 {R"cpp()cpp", R"(expanded tokens: 324 <empty> 325 file './input.cpp' 326 spelled tokens: 327 <empty> 328 no mappings. 329 )"}, 330 // Should not crash on errors inside '#define' directives. Error is that 331 // stringification (#B) does not refer to a macro parameter. 332 { 333 R"cpp( 334 a 335 #define MACRO() A #B 336 )cpp", 337 R"(expanded tokens: 338 a 339 file './input.cpp' 340 spelled tokens: 341 a # define MACRO ( ) A # B 342 mappings: 343 ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) 344 )"}}; 345 for (auto &Test : TestCases) 346 EXPECT_EQ(collectAndDump(Test.first), Test.second) 347 << collectAndDump(Test.first); 348 } 349 350 TEST_F(TokenCollectorTest, Locations) { 351 // Check locations of the tokens. 352 llvm::Annotations Code(R"cpp( 353 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 354 )cpp"); 355 recordTokens(Code.code()); 356 // Check expanded tokens. 357 EXPECT_THAT( 358 Buffer.expandedTokens(), 359 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 360 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 361 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 362 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 363 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 364 Kind(tok::eof))); 365 // Check spelled tokens. 366 EXPECT_THAT( 367 Buffer.spelledTokens(SourceMgr->getMainFileID()), 368 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 369 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 370 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 371 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 372 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 373 374 auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()); 375 for (auto &R : Code.ranges()) { 376 EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)), 377 Pointee(RangeIs(R))); 378 } 379 } 380 381 TEST_F(TokenCollectorTest, MacroDirectives) { 382 // Macro directives are not stored anywhere at the moment. 383 std::string Code = R"cpp( 384 #define FOO a 385 #include "unresolved_file.h" 386 #undef FOO 387 #ifdef X 388 #else 389 #endif 390 #ifndef Y 391 #endif 392 #if 1 393 #elif 2 394 #else 395 #endif 396 #pragma once 397 #pragma something lalala 398 399 int a; 400 )cpp"; 401 std::string Expected = 402 "expanded tokens:\n" 403 " int a ;\n" 404 "file './input.cpp'\n" 405 " spelled tokens:\n" 406 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 407 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 408 "# endif # pragma once # pragma something lalala int a ;\n" 409 " mappings:\n" 410 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 411 EXPECT_EQ(collectAndDump(Code), Expected); 412 } 413 414 TEST_F(TokenCollectorTest, MacroReplacements) { 415 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 416 // A simple object-like macro. 417 {R"cpp( 418 #define INT int const 419 INT a; 420 )cpp", 421 R"(expanded tokens: 422 int const a ; 423 file './input.cpp' 424 spelled tokens: 425 # define INT int const INT a ; 426 mappings: 427 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 428 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 429 )"}, 430 // A simple function-like macro. 431 {R"cpp( 432 #define INT(a) const int 433 INT(10+10) a; 434 )cpp", 435 R"(expanded tokens: 436 const int a ; 437 file './input.cpp' 438 spelled tokens: 439 # define INT ( a ) const int INT ( 10 + 10 ) a ; 440 mappings: 441 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 442 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 443 )"}, 444 // Recursive macro replacements. 445 {R"cpp( 446 #define ID(X) X 447 #define INT int const 448 ID(ID(INT)) a; 449 )cpp", 450 R"(expanded tokens: 451 int const a ; 452 file './input.cpp' 453 spelled tokens: 454 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 455 mappings: 456 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 457 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 458 )"}, 459 // A little more complicated recursive macro replacements. 460 {R"cpp( 461 #define ADD(X, Y) X+Y 462 #define MULT(X, Y) X*Y 463 464 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 465 )cpp", 466 "expanded tokens:\n" 467 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 468 "file './input.cpp'\n" 469 " spelled tokens:\n" 470 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 471 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 472 " mappings:\n" 473 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 474 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 475 // Empty macro replacement. 476 // FIXME: the #define directives should not be glued together. 477 {R"cpp( 478 #define EMPTY 479 #define EMPTY_FUNC(X) 480 EMPTY 481 EMPTY_FUNC(1+2+3) 482 )cpp", 483 R"(expanded tokens: 484 <empty> 485 file './input.cpp' 486 spelled tokens: 487 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 488 mappings: 489 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) 490 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) 491 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 492 )"}, 493 // File ends with a macro replacement. 494 {R"cpp( 495 #define FOO 10+10; 496 int a = FOO 497 )cpp", 498 R"(expanded tokens: 499 int a = 10 + 10 ; 500 file './input.cpp' 501 spelled tokens: 502 # define FOO 10 + 10 ; int a = FOO 503 mappings: 504 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 505 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 506 )"}, 507 {R"cpp( 508 #define NUM 42 509 #define ID(a) a 510 #define M 1 + ID 511 M(NUM) 512 )cpp", 513 R"(expanded tokens: 514 1 + 42 515 file './input.cpp' 516 spelled tokens: 517 # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM ) 518 mappings: 519 ['#'_0, 'M'_17) => ['1'_0, '1'_0) 520 ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3) 521 )"}, 522 }; 523 524 for (auto &Test : TestCases) { 525 std::string Dump = collectAndDump(Test.first); 526 EXPECT_EQ(Test.second, Dump) << Dump; 527 } 528 } 529 530 TEST_F(TokenCollectorTest, SpecialTokens) { 531 // Tokens coming from concatenations. 532 recordTokens(R"cpp( 533 #define CONCAT(a, b) a ## b 534 int a = CONCAT(1, 2); 535 )cpp"); 536 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 537 Contains(HasText("12"))); 538 // Multi-line tokens with slashes at the end. 539 recordTokens("i\\\nn\\\nt"); 540 EXPECT_THAT(Buffer.expandedTokens(), 541 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 542 Kind(tok::eof))); 543 // FIXME: test tokens with digraphs and UCN identifiers. 544 } 545 546 TEST_F(TokenCollectorTest, LateBoundTokens) { 547 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 548 // but we choose to record them as a single token (for now). 549 llvm::Annotations Code(R"cpp( 550 template <class T> 551 struct foo { int a; }; 552 int bar = foo<foo<int$br[[>>]]().a; 553 int baz = 10 $op[[>>]] 2; 554 )cpp"); 555 recordTokens(Code.code()); 556 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 557 AllOf(Contains(AllOf(Kind(tok::greatergreater), 558 RangeIs(Code.range("br")))), 559 Contains(AllOf(Kind(tok::greatergreater), 560 RangeIs(Code.range("op")))))); 561 } 562 563 TEST_F(TokenCollectorTest, DelayedParsing) { 564 llvm::StringLiteral Code = R"cpp( 565 struct Foo { 566 int method() { 567 // Parser will visit method bodies and initializers multiple times, but 568 // TokenBuffer should only record the first walk over the tokens; 569 return 100; 570 } 571 int a = 10; 572 573 struct Subclass { 574 void foo() { 575 Foo().method(); 576 } 577 }; 578 }; 579 )cpp"; 580 std::string ExpectedTokens = 581 "expanded tokens:\n" 582 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 583 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 584 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 585 } 586 587 TEST_F(TokenCollectorTest, MultiFile) { 588 addFile("./foo.h", R"cpp( 589 #define ADD(X, Y) X+Y 590 int a = 100; 591 #include "bar.h" 592 )cpp"); 593 addFile("./bar.h", R"cpp( 594 int b = ADD(1, 2); 595 #define MULT(X, Y) X*Y 596 )cpp"); 597 llvm::StringLiteral Code = R"cpp( 598 #include "foo.h" 599 int c = ADD(1, MULT(2,3)); 600 )cpp"; 601 602 std::string Expected = R"(expanded tokens: 603 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 604 file './input.cpp' 605 spelled tokens: 606 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 607 mappings: 608 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 609 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 610 file './foo.h' 611 spelled tokens: 612 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 613 mappings: 614 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 615 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 616 file './bar.h' 617 spelled tokens: 618 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 619 mappings: 620 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 621 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 622 )"; 623 624 EXPECT_EQ(Expected, collectAndDump(Code)) 625 << "input: " << Code << "\nresults: " << collectAndDump(Code); 626 } 627 628 class TokenBufferTest : public TokenCollectorTest {}; 629 630 TEST_F(TokenBufferTest, SpelledByExpanded) { 631 recordTokens(R"cpp( 632 a1 a2 a3 b1 b2 633 )cpp"); 634 635 // Sanity check: expanded and spelled tokens are stored separately. 636 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 637 // Searching for subranges of expanded tokens should give the corresponding 638 // spelled ones. 639 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 640 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 641 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 642 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 643 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 644 ValueIs(SameRange(findSpelled("b1 b2")))); 645 646 // Test search on simple macro expansions. 647 recordTokens(R"cpp( 648 #define A a1 a2 a3 649 #define B b1 b2 650 651 A split B 652 )cpp"); 653 // Ranges going across expansion boundaries. 654 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 655 ValueIs(SameRange(findSpelled("A split B")))); 656 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 657 ValueIs(SameRange(findSpelled("A split").drop_back()))); 658 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 659 ValueIs(SameRange(findSpelled("split B").drop_front()))); 660 // Ranges not fully covering macro invocations should fail. 661 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 662 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 663 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 664 llvm::None); 665 666 // Recursive macro invocations. 667 recordTokens(R"cpp( 668 #define ID(x) x 669 #define B b1 b2 670 671 ID(ID(ID(a1) a2 a3)) split ID(B) 672 )cpp"); 673 674 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 675 ValueIs(SameRange(findSpelled("( B").drop_front()))); 676 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 677 ValueIs(SameRange(findSpelled( 678 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 679 // Mixed ranges with expanded and spelled tokens. 680 EXPECT_THAT( 681 Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")), 682 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split")))); 683 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")), 684 ValueIs(SameRange(findSpelled("split ID ( B )")))); 685 // Macro arguments 686 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")), 687 ValueIs(SameRange(findSpelled("a1")))); 688 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")), 689 ValueIs(SameRange(findSpelled("a2")))); 690 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")), 691 ValueIs(SameRange(findSpelled("a3")))); 692 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")), 693 ValueIs(SameRange(findSpelled("ID ( a1 ) a2")))); 694 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 695 ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3")))); 696 697 // Empty macro expansions. 698 recordTokens(R"cpp( 699 #define EMPTY 700 #define ID(X) X 701 702 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 703 EMPTY EMPTY ID(4 5 6) split2 704 ID(7 8 9) EMPTY EMPTY 705 )cpp"); 706 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 707 ValueIs(SameRange(findSpelled("1 2 3")))); 708 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 709 ValueIs(SameRange(findSpelled("4 5 6")))); 710 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 711 ValueIs(SameRange(findSpelled("7 8 9")))); 712 713 // Empty mappings coming from various directives. 714 recordTokens(R"cpp( 715 #define ID(X) X 716 ID(1) 717 #pragma lalala 718 not_mapped 719 )cpp"); 720 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 721 ValueIs(SameRange(findSpelled("not_mapped")))); 722 723 // Multiple macro arguments 724 recordTokens(R"cpp( 725 #define ID(X) X 726 #define ID2(X, Y) X Y 727 728 ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) 729 )cpp"); 730 // Should fail, spans multiple arguments. 731 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 732 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")), 733 ValueIs(SameRange(findSpelled("ID ( a2 ) a3")))); 734 EXPECT_THAT( 735 Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 736 ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )")))); 737 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")), 738 ValueIs(SameRange(findSpelled("a5 a6")))); 739 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")), 740 ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )")))); 741 // Should fail, spans multiple invocations. 742 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None); 743 } 744 745 TEST_F(TokenBufferTest, ExpandedTokensForRange) { 746 recordTokens(R"cpp( 747 #define SIGN(X) X##_washere 748 A SIGN(B) C SIGN(D) E SIGN(F) G 749 )cpp"); 750 751 SourceRange R(findExpanded("C").front().location(), 752 findExpanded("F_washere").front().location()); 753 // Sanity check: expanded and spelled tokens are stored separately. 754 EXPECT_THAT(Buffer.expandedTokens(R), 755 SameRange(findExpanded("C D_washere E F_washere"))); 756 EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); 757 } 758 759 TEST_F(TokenBufferTest, ExpansionsOverlapping) { 760 // Object-like macro expansions. 761 recordTokens(R"cpp( 762 #define FOO 3+4 763 int a = FOO 1; 764 int b = FOO 2; 765 )cpp"); 766 767 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1"); 768 EXPECT_THAT( 769 Buffer.expansionStartingAt(Foo1.data()), 770 ValueIs(IsExpansion(SameRange(Foo1.drop_back()), 771 SameRange(findExpanded("3 + 4 1").drop_back())))); 772 EXPECT_THAT( 773 Buffer.expansionsOverlapping(Foo1), 774 ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), 775 SameRange(findExpanded("3 + 4 1").drop_back())))); 776 777 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2"); 778 EXPECT_THAT( 779 Buffer.expansionStartingAt(Foo2.data()), 780 ValueIs(IsExpansion(SameRange(Foo2.drop_back()), 781 SameRange(findExpanded("3 + 4 2").drop_back())))); 782 EXPECT_THAT(Buffer.expansionsOverlapping( 783 llvm::makeArrayRef(Foo1.begin(), Foo2.end())), 784 ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _), 785 IsExpansion(SameRange(Foo2.drop_back()), _))); 786 787 // Function-like macro expansions. 788 recordTokens(R"cpp( 789 #define ID(X) X 790 int a = ID(1+2+3); 791 int b = ID(ID(2+3+4)); 792 )cpp"); 793 794 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); 795 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), 796 ValueIs(IsExpansion(SameRange(ID1), 797 SameRange(findExpanded("1 + 2 + 3"))))); 798 // Only the first spelled token should be found. 799 for (const auto &T : ID1.drop_front()) 800 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 801 802 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); 803 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), 804 ValueIs(IsExpansion(SameRange(ID2), 805 SameRange(findExpanded("2 + 3 + 4"))))); 806 // Only the first spelled token should be found. 807 for (const auto &T : ID2.drop_front()) 808 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 809 810 EXPECT_THAT(Buffer.expansionsOverlapping(llvm::makeArrayRef( 811 findSpelled("1 + 2").data(), findSpelled("4").data())), 812 ElementsAre(IsExpansion(SameRange(ID1), _), 813 IsExpansion(SameRange(ID2), _))); 814 815 // PP directives. 816 recordTokens(R"cpp( 817 #define FOO 1 818 int a = FOO; 819 #pragma once 820 int b = 1; 821 )cpp"); 822 823 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); 824 EXPECT_THAT( 825 Buffer.expansionStartingAt(&DefineFoo.front()), 826 ValueIs(IsExpansion(SameRange(DefineFoo), 827 SameRange(findExpanded("int a").take_front(0))))); 828 // Only the first spelled token should be found. 829 for (const auto &T : DefineFoo.drop_front()) 830 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 831 832 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); 833 EXPECT_THAT( 834 Buffer.expansionStartingAt(&PragmaOnce.front()), 835 ValueIs(IsExpansion(SameRange(PragmaOnce), 836 SameRange(findExpanded("int b").take_front(0))))); 837 // Only the first spelled token should be found. 838 for (const auto &T : PragmaOnce.drop_front()) 839 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 840 841 EXPECT_THAT( 842 Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")), 843 ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _), 844 IsExpansion(SameRange(PragmaOnce), _))); 845 } 846 847 TEST_F(TokenBufferTest, TokensToFileRange) { 848 addFile("./foo.h", "token_from_header"); 849 llvm::Annotations Code(R"cpp( 850 #define FOO token_from_expansion 851 #include "./foo.h" 852 $all[[$i[[int]] a = FOO;]] 853 )cpp"); 854 recordTokens(Code.code()); 855 856 auto &SM = *SourceMgr; 857 858 // Two simple examples. 859 auto Int = findExpanded("int").front(); 860 auto Semi = findExpanded(";").front(); 861 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 862 Code.range("i").End)); 863 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 864 FileRange(SM.getMainFileID(), Code.range("all").Begin, 865 Code.range("all").End)); 866 // We don't test assertion failures because death tests are slow. 867 } 868 869 TEST_F(TokenBufferTest, MacroExpansions) { 870 llvm::Annotations Code(R"cpp( 871 #define FOO B 872 #define FOO2 BA 873 #define CALL(X) int X 874 #define G CALL(FOO2) 875 int B; 876 $macro[[FOO]]; 877 $macro[[CALL]](A); 878 $macro[[G]]; 879 )cpp"); 880 recordTokens(Code.code()); 881 auto &SM = *SourceMgr; 882 auto Expansions = Buffer.macroExpansions(SM.getMainFileID()); 883 std::vector<FileRange> ExpectedMacroRanges; 884 for (auto Range : Code.ranges("macro")) 885 ExpectedMacroRanges.push_back( 886 FileRange(SM.getMainFileID(), Range.Begin, Range.End)); 887 std::vector<FileRange> ActualMacroRanges; 888 for (auto Expansion : Expansions) 889 ActualMacroRanges.push_back(Expansion->range(SM)); 890 EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); 891 } 892 893 TEST_F(TokenBufferTest, Touching) { 894 llvm::Annotations Code("^i^nt^ ^a^b^=^1;^"); 895 recordTokens(Code.code()); 896 897 auto Touching = [&](int Index) { 898 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 899 Code.points()[Index]); 900 return spelledTokensTouching(Loc, Buffer); 901 }; 902 auto Identifier = [&](int Index) { 903 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 904 Code.points()[Index]); 905 const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer); 906 return Tok ? Tok->text(*SourceMgr) : ""; 907 }; 908 909 EXPECT_THAT(Touching(0), SameRange(findSpelled("int"))); 910 EXPECT_EQ(Identifier(0), ""); 911 EXPECT_THAT(Touching(1), SameRange(findSpelled("int"))); 912 EXPECT_EQ(Identifier(1), ""); 913 EXPECT_THAT(Touching(2), SameRange(findSpelled("int"))); 914 EXPECT_EQ(Identifier(2), ""); 915 916 EXPECT_THAT(Touching(3), SameRange(findSpelled("ab"))); 917 EXPECT_EQ(Identifier(3), "ab"); 918 EXPECT_THAT(Touching(4), SameRange(findSpelled("ab"))); 919 EXPECT_EQ(Identifier(4), "ab"); 920 921 EXPECT_THAT(Touching(5), SameRange(findSpelled("ab ="))); 922 EXPECT_EQ(Identifier(5), "ab"); 923 924 EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1"))); 925 EXPECT_EQ(Identifier(6), ""); 926 927 EXPECT_THAT(Touching(7), SameRange(findSpelled(";"))); 928 EXPECT_EQ(Identifier(7), ""); 929 930 ASSERT_EQ(Code.points().size(), 8u); 931 } 932 933 TEST_F(TokenBufferTest, ExpandedBySpelled) { 934 recordTokens(R"cpp( 935 a1 a2 a3 b1 b2 936 )cpp"); 937 // Sanity check: expanded and spelled tokens are stored separately. 938 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 939 // Searching for subranges of expanded tokens should give the corresponding 940 // spelled ones. 941 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")), 942 ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2")))); 943 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")), 944 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 945 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")), 946 ElementsAre(SameRange(findExpanded("b1 b2")))); 947 948 // Test search on simple macro expansions. 949 recordTokens(R"cpp( 950 #define A a1 a2 a3 951 #define B b1 b2 952 953 A split B 954 )cpp"); 955 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")), 956 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); 957 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()), 958 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 959 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()), 960 ElementsAre(SameRange(findExpanded("b1 b2")))); 961 962 // Ranges not fully covering macro expansions should fail. 963 recordTokens(R"cpp( 964 #define ID(x) x 965 966 ID(a) 967 )cpp"); 968 // Spelled don't cover entire mapping (missing ID token) -> empty result 969 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty()); 970 // Spelled don't cover entire mapping (missing ) token) -> empty result 971 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty()); 972 973 // Recursive macro invocations. 974 recordTokens(R"cpp( 975 #define ID(x) x 976 #define B b1 b2 977 978 ID(ID(ID(a1) a2 a3)) split ID(B) 979 )cpp"); 980 981 EXPECT_THAT( 982 Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")), 983 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 984 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")), 985 ElementsAre(SameRange(findExpanded("b1 b2")))); 986 EXPECT_THAT(Buffer.expandedForSpelled( 987 findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")), 988 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); 989 // FIXME: these should succeed, but we do not support macro arguments yet. 990 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty()); 991 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")), 992 IsEmpty()); 993 994 // Empty macro expansions. 995 recordTokens(R"cpp( 996 #define EMPTY 997 #define ID(X) X 998 999 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 1000 EMPTY EMPTY ID(4 5 6) split2 1001 ID(7 8 9) EMPTY EMPTY 1002 )cpp"); 1003 // Covered by empty expansions on one of both of the sides. 1004 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")), 1005 ElementsAre(SameRange(findExpanded("1 2 3")))); 1006 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")), 1007 ElementsAre(SameRange(findExpanded("4 5 6")))); 1008 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")), 1009 ElementsAre(SameRange(findExpanded("7 8 9")))); 1010 // Including the empty macro expansions on the side. 1011 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")), 1012 ElementsAre(SameRange(findExpanded("1 2 3")))); 1013 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")), 1014 ElementsAre(SameRange(findExpanded("1 2 3")))); 1015 EXPECT_THAT( 1016 Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")), 1017 ElementsAre(SameRange(findExpanded("1 2 3")))); 1018 1019 // Empty mappings coming from various directives. 1020 recordTokens(R"cpp( 1021 #define ID(X) X 1022 ID(1) 1023 #pragma lalala 1024 not_mapped 1025 )cpp"); 1026 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")), 1027 IsEmpty()); 1028 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")), 1029 IsEmpty()); 1030 1031 // Empty macro expansion. 1032 recordTokens(R"cpp( 1033 #define EMPTY 1034 EMPTY int a = 100; 1035 )cpp"); 1036 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()), 1037 IsEmpty()); 1038 } 1039 1040 TEST_F(TokenCollectorTest, Pragmas) { 1041 // Tokens coming from concatenations. 1042 recordTokens(R"cpp( 1043 void foo() { 1044 #pragma unroll 4 1045 for(int i=0;i<4;++i); 1046 } 1047 )cpp"); 1048 } 1049 } // namespace 1050