1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include "gmock/gmock.h" 44 #include <cassert> 45 #include <cstdlib> 46 #include <gmock/gmock.h> 47 #include <gtest/gtest.h> 48 #include <memory> 49 #include <ostream> 50 #include <string> 51 52 using namespace clang; 53 using namespace clang::syntax; 54 55 using llvm::ValueIs; 56 using ::testing::AllOf; 57 using ::testing::Contains; 58 using ::testing::ElementsAre; 59 using ::testing::Field; 60 using ::testing::IsEmpty; 61 using ::testing::Matcher; 62 using ::testing::Not; 63 using ::testing::Pointee; 64 using ::testing::StartsWith; 65 66 namespace { 67 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 68 // argument. 69 MATCHER_P(SameRange, A, "") { 70 return A.begin() == arg.begin() && A.end() == arg.end(); 71 } 72 73 Matcher<TokenBuffer::Expansion> 74 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, 75 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { 76 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), 77 Field(&TokenBuffer::Expansion::Expanded, Expanded)); 78 } 79 // Matchers for syntax::Token. 80 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 81 MATCHER_P2(HasText, Text, SourceMgr, "") { 82 return arg.text(*SourceMgr) == Text; 83 } 84 /// Checks the start and end location of a token are equal to SourceRng. 85 MATCHER_P(RangeIs, SourceRng, "") { 86 return arg.location() == SourceRng.first && 87 arg.endLocation() == SourceRng.second; 88 } 89 90 class TokenCollectorTest : public ::testing::Test { 91 public: 92 /// Run the clang frontend, collect the preprocessed tokens from the frontend 93 /// invocation and store them in this->Buffer. 94 /// This also clears SourceManager before running the compiler. 95 void recordTokens(llvm::StringRef Code) { 96 class RecordTokens : public ASTFrontendAction { 97 public: 98 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 99 100 bool BeginSourceFileAction(CompilerInstance &CI) override { 101 assert(!Collector && "expected only a single call to BeginSourceFile"); 102 Collector.emplace(CI.getPreprocessor()); 103 return true; 104 } 105 void EndSourceFileAction() override { 106 assert(Collector && "BeginSourceFileAction was never called"); 107 Result = std::move(*Collector).consume(); 108 } 109 110 std::unique_ptr<ASTConsumer> 111 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 112 return std::make_unique<ASTConsumer>(); 113 } 114 115 private: 116 TokenBuffer &Result; 117 llvm::Optional<TokenCollector> Collector; 118 }; 119 120 constexpr const char *FileName = "./input.cpp"; 121 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 122 // Prepare to run a compiler. 123 if (!Diags->getClient()) 124 Diags->setClient(new IgnoringDiagConsumer); 125 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 126 FileName}; 127 auto CI = createInvocationFromCommandLine(Args, Diags, FS); 128 assert(CI); 129 CI->getFrontendOpts().DisableFree = false; 130 CI->getPreprocessorOpts().addRemappedFile( 131 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 132 CompilerInstance Compiler; 133 Compiler.setInvocation(std::move(CI)); 134 Compiler.setDiagnostics(Diags.get()); 135 Compiler.setFileManager(FileMgr.get()); 136 Compiler.setSourceManager(SourceMgr.get()); 137 138 this->Buffer = TokenBuffer(*SourceMgr); 139 RecordTokens Recorder(this->Buffer); 140 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 141 << "failed to run the frontend"; 142 } 143 144 /// Record the tokens and return a test dump of the resulting buffer. 145 std::string collectAndDump(llvm::StringRef Code) { 146 recordTokens(Code); 147 return Buffer.dumpForTests(); 148 } 149 150 // Adds a file to the test VFS. 151 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 152 if (!FS->addFile(Path, time_t(), 153 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 154 ADD_FAILURE() << "could not add a file to VFS: " << Path; 155 } 156 } 157 158 /// Add a new file, run syntax::tokenize() on the range if any, run it on the 159 /// whole file otherwise and return the results. 160 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 161 llvm::Annotations Annot(Text); 162 auto FID = SourceMgr->createFileID( 163 llvm::MemoryBuffer::getMemBufferCopy(Annot.code())); 164 // FIXME: pass proper LangOptions. 165 if (Annot.ranges().empty()) 166 return syntax::tokenize(FID, *SourceMgr, LangOptions()); 167 return syntax::tokenize( 168 syntax::FileRange(FID, Annot.range().Begin, Annot.range().End), 169 *SourceMgr, LangOptions()); 170 } 171 172 // Specialized versions of matchers that hide the SourceManager from clients. 173 Matcher<syntax::Token> HasText(std::string Text) const { 174 return ::HasText(Text, SourceMgr.get()); 175 } 176 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 177 std::pair<SourceLocation, SourceLocation> Ls; 178 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 179 .getLocWithOffset(R.Begin); 180 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 181 .getLocWithOffset(R.End); 182 return ::RangeIs(Ls); 183 } 184 185 /// Finds a subrange in O(n * m). 186 template <class T, class U, class Eq> 187 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 188 llvm::ArrayRef<T> Range, Eq F) { 189 assert(Subrange.size() >= 1); 190 if (Range.size() < Subrange.size()) 191 return llvm::makeArrayRef(Range.end(), Range.end()); 192 for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); 193 Begin <= Last; ++Begin) { 194 auto It = Begin; 195 for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); 196 ++ItSub, ++It) { 197 if (!F(*ItSub, *It)) 198 goto continue_outer; 199 } 200 return llvm::makeArrayRef(Begin, It); 201 continue_outer:; 202 } 203 return llvm::makeArrayRef(Range.end(), Range.end()); 204 } 205 206 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 207 /// The match should be unique. \p Query is a whitespace-separated list of 208 /// tokens to search for. 209 llvm::ArrayRef<syntax::Token> 210 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 211 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 212 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 213 if (QueryTokens.empty()) { 214 ADD_FAILURE() << "will not look for an empty list of tokens"; 215 std::abort(); 216 } 217 // An equality test for search. 218 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 219 return Q == T.text(*SourceMgr); 220 }; 221 // Find a match. 222 auto Found = 223 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 224 if (Found.begin() == Tokens.end()) { 225 ADD_FAILURE() << "could not find the subrange for " << Query; 226 std::abort(); 227 } 228 // Check that the match is unique. 229 if (findSubrange(llvm::makeArrayRef(QueryTokens), 230 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 231 .begin() != Tokens.end()) { 232 ADD_FAILURE() << "match is not unique for " << Query; 233 std::abort(); 234 } 235 return Found; 236 }; 237 238 // Specialized versions of findTokenRange for expanded and spelled tokens. 239 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 240 return findTokenRange(Query, Buffer.expandedTokens()); 241 } 242 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 243 FileID File = FileID()) { 244 if (!File.isValid()) 245 File = SourceMgr->getMainFileID(); 246 return findTokenRange(Query, Buffer.spelledTokens(File)); 247 } 248 249 // Data fields. 250 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 251 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 252 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 253 new llvm::vfs::InMemoryFileSystem; 254 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 255 new FileManager(FileSystemOptions(), FS); 256 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 257 new SourceManager(*Diags, *FileMgr); 258 /// Contains last result of calling recordTokens(). 259 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 260 }; 261 262 TEST_F(TokenCollectorTest, RawMode) { 263 EXPECT_THAT(tokenize("int main() {}"), 264 ElementsAre(Kind(tok::kw_int), 265 AllOf(HasText("main"), Kind(tok::identifier)), 266 Kind(tok::l_paren), Kind(tok::r_paren), 267 Kind(tok::l_brace), Kind(tok::r_brace))); 268 // Comments are ignored for now. 269 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 270 ElementsAre(Kind(tok::kw_int), 271 AllOf(HasText("a"), Kind(tok::identifier)), 272 Kind(tok::semi))); 273 EXPECT_THAT(tokenize("int [[main() {]]}"), 274 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 275 Kind(tok::l_paren), Kind(tok::r_paren), 276 Kind(tok::l_brace))); 277 EXPECT_THAT(tokenize("int [[main() { ]]}"), 278 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 279 Kind(tok::l_paren), Kind(tok::r_paren), 280 Kind(tok::l_brace))); 281 // First token is partially parsed, last token is fully included even though 282 // only a part of it is contained in the range. 283 EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"), 284 ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)), 285 Kind(tok::l_paren), Kind(tok::r_paren), 286 Kind(tok::l_brace), Kind(tok::kw_return))); 287 } 288 289 TEST_F(TokenCollectorTest, Basic) { 290 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 291 {"int main() {}", 292 R"(expanded tokens: 293 int main ( ) { } 294 file './input.cpp' 295 spelled tokens: 296 int main ( ) { } 297 no mappings. 298 )"}, 299 // All kinds of whitespace are ignored. 300 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 301 R"(expanded tokens: 302 int main ( ) { } 303 file './input.cpp' 304 spelled tokens: 305 int main ( ) { } 306 no mappings. 307 )"}, 308 // Annotation tokens are ignored. 309 {R"cpp( 310 #pragma GCC visibility push (public) 311 #pragma GCC visibility pop 312 )cpp", 313 R"(expanded tokens: 314 <empty> 315 file './input.cpp' 316 spelled tokens: 317 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 318 mappings: 319 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 320 )"}, 321 // Empty files should not crash. 322 {R"cpp()cpp", R"(expanded tokens: 323 <empty> 324 file './input.cpp' 325 spelled tokens: 326 <empty> 327 no mappings. 328 )"}, 329 // Should not crash on errors inside '#define' directives. Error is that 330 // stringification (#B) does not refer to a macro parameter. 331 { 332 R"cpp( 333 a 334 #define MACRO() A #B 335 )cpp", 336 R"(expanded tokens: 337 a 338 file './input.cpp' 339 spelled tokens: 340 a # define MACRO ( ) A # B 341 mappings: 342 ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) 343 )"}}; 344 for (auto &Test : TestCases) 345 EXPECT_EQ(collectAndDump(Test.first), Test.second) 346 << collectAndDump(Test.first); 347 } 348 349 TEST_F(TokenCollectorTest, Locations) { 350 // Check locations of the tokens. 351 llvm::Annotations Code(R"cpp( 352 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 353 )cpp"); 354 recordTokens(Code.code()); 355 // Check expanded tokens. 356 EXPECT_THAT( 357 Buffer.expandedTokens(), 358 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 359 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 360 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 361 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 362 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 363 Kind(tok::eof))); 364 // Check spelled tokens. 365 EXPECT_THAT( 366 Buffer.spelledTokens(SourceMgr->getMainFileID()), 367 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 368 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 369 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 370 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 371 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 372 373 auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()); 374 for (auto &R : Code.ranges()) { 375 EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)), 376 Pointee(RangeIs(R))); 377 } 378 } 379 380 TEST_F(TokenCollectorTest, MacroDirectives) { 381 // Macro directives are not stored anywhere at the moment. 382 std::string Code = R"cpp( 383 #define FOO a 384 #include "unresolved_file.h" 385 #undef FOO 386 #ifdef X 387 #else 388 #endif 389 #ifndef Y 390 #endif 391 #if 1 392 #elif 2 393 #else 394 #endif 395 #pragma once 396 #pragma something lalala 397 398 int a; 399 )cpp"; 400 std::string Expected = 401 "expanded tokens:\n" 402 " int a ;\n" 403 "file './input.cpp'\n" 404 " spelled tokens:\n" 405 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 406 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 407 "# endif # pragma once # pragma something lalala int a ;\n" 408 " mappings:\n" 409 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 410 EXPECT_EQ(collectAndDump(Code), Expected); 411 } 412 413 TEST_F(TokenCollectorTest, MacroReplacements) { 414 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 415 // A simple object-like macro. 416 {R"cpp( 417 #define INT int const 418 INT a; 419 )cpp", 420 R"(expanded tokens: 421 int const a ; 422 file './input.cpp' 423 spelled tokens: 424 # define INT int const INT a ; 425 mappings: 426 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 427 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 428 )"}, 429 // A simple function-like macro. 430 {R"cpp( 431 #define INT(a) const int 432 INT(10+10) a; 433 )cpp", 434 R"(expanded tokens: 435 const int a ; 436 file './input.cpp' 437 spelled tokens: 438 # define INT ( a ) const int INT ( 10 + 10 ) a ; 439 mappings: 440 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 441 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 442 )"}, 443 // Recursive macro replacements. 444 {R"cpp( 445 #define ID(X) X 446 #define INT int const 447 ID(ID(INT)) a; 448 )cpp", 449 R"(expanded tokens: 450 int const a ; 451 file './input.cpp' 452 spelled tokens: 453 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 454 mappings: 455 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 456 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 457 )"}, 458 // A little more complicated recursive macro replacements. 459 {R"cpp( 460 #define ADD(X, Y) X+Y 461 #define MULT(X, Y) X*Y 462 463 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 464 )cpp", 465 "expanded tokens:\n" 466 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 467 "file './input.cpp'\n" 468 " spelled tokens:\n" 469 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 470 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 471 " mappings:\n" 472 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 473 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 474 // Empty macro replacement. 475 // FIXME: the #define directives should not be glued together. 476 {R"cpp( 477 #define EMPTY 478 #define EMPTY_FUNC(X) 479 EMPTY 480 EMPTY_FUNC(1+2+3) 481 )cpp", 482 R"(expanded tokens: 483 <empty> 484 file './input.cpp' 485 spelled tokens: 486 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 487 mappings: 488 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) 489 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) 490 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 491 )"}, 492 // File ends with a macro replacement. 493 {R"cpp( 494 #define FOO 10+10; 495 int a = FOO 496 )cpp", 497 R"(expanded tokens: 498 int a = 10 + 10 ; 499 file './input.cpp' 500 spelled tokens: 501 # define FOO 10 + 10 ; int a = FOO 502 mappings: 503 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 504 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 505 )"}, 506 {R"cpp( 507 #define NUM 42 508 #define ID(a) a 509 #define M 1 + ID 510 M(NUM) 511 )cpp", 512 R"(expanded tokens: 513 1 + 42 514 file './input.cpp' 515 spelled tokens: 516 # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM ) 517 mappings: 518 ['#'_0, 'M'_17) => ['1'_0, '1'_0) 519 ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3) 520 )"}, 521 }; 522 523 for (auto &Test : TestCases) { 524 std::string Dump = collectAndDump(Test.first); 525 EXPECT_EQ(Test.second, Dump) << Dump; 526 } 527 } 528 529 TEST_F(TokenCollectorTest, SpecialTokens) { 530 // Tokens coming from concatenations. 531 recordTokens(R"cpp( 532 #define CONCAT(a, b) a ## b 533 int a = CONCAT(1, 2); 534 )cpp"); 535 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 536 Contains(HasText("12"))); 537 // Multi-line tokens with slashes at the end. 538 recordTokens("i\\\nn\\\nt"); 539 EXPECT_THAT(Buffer.expandedTokens(), 540 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 541 Kind(tok::eof))); 542 // FIXME: test tokens with digraphs and UCN identifiers. 543 } 544 545 TEST_F(TokenCollectorTest, LateBoundTokens) { 546 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 547 // but we choose to record them as a single token (for now). 548 llvm::Annotations Code(R"cpp( 549 template <class T> 550 struct foo { int a; }; 551 int bar = foo<foo<int$br[[>>]]().a; 552 int baz = 10 $op[[>>]] 2; 553 )cpp"); 554 recordTokens(Code.code()); 555 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 556 AllOf(Contains(AllOf(Kind(tok::greatergreater), 557 RangeIs(Code.range("br")))), 558 Contains(AllOf(Kind(tok::greatergreater), 559 RangeIs(Code.range("op")))))); 560 } 561 562 TEST_F(TokenCollectorTest, DelayedParsing) { 563 llvm::StringLiteral Code = R"cpp( 564 struct Foo { 565 int method() { 566 // Parser will visit method bodies and initializers multiple times, but 567 // TokenBuffer should only record the first walk over the tokens; 568 return 100; 569 } 570 int a = 10; 571 572 struct Subclass { 573 void foo() { 574 Foo().method(); 575 } 576 }; 577 }; 578 )cpp"; 579 std::string ExpectedTokens = 580 "expanded tokens:\n" 581 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 582 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 583 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 584 } 585 586 TEST_F(TokenCollectorTest, MultiFile) { 587 addFile("./foo.h", R"cpp( 588 #define ADD(X, Y) X+Y 589 int a = 100; 590 #include "bar.h" 591 )cpp"); 592 addFile("./bar.h", R"cpp( 593 int b = ADD(1, 2); 594 #define MULT(X, Y) X*Y 595 )cpp"); 596 llvm::StringLiteral Code = R"cpp( 597 #include "foo.h" 598 int c = ADD(1, MULT(2,3)); 599 )cpp"; 600 601 std::string Expected = R"(expanded tokens: 602 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 603 file './input.cpp' 604 spelled tokens: 605 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 606 mappings: 607 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 608 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 609 file './foo.h' 610 spelled tokens: 611 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 612 mappings: 613 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 614 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 615 file './bar.h' 616 spelled tokens: 617 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 618 mappings: 619 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 620 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 621 )"; 622 623 EXPECT_EQ(Expected, collectAndDump(Code)) 624 << "input: " << Code << "\nresults: " << collectAndDump(Code); 625 } 626 627 class TokenBufferTest : public TokenCollectorTest {}; 628 629 TEST_F(TokenBufferTest, SpelledByExpanded) { 630 recordTokens(R"cpp( 631 a1 a2 a3 b1 b2 632 )cpp"); 633 634 // Sanity check: expanded and spelled tokens are stored separately. 635 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 636 // Searching for subranges of expanded tokens should give the corresponding 637 // spelled ones. 638 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 639 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 640 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 641 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 642 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 643 ValueIs(SameRange(findSpelled("b1 b2")))); 644 645 // Test search on simple macro expansions. 646 recordTokens(R"cpp( 647 #define A a1 a2 a3 648 #define B b1 b2 649 650 A split B 651 )cpp"); 652 // Ranges going across expansion boundaries. 653 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 654 ValueIs(SameRange(findSpelled("A split B")))); 655 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 656 ValueIs(SameRange(findSpelled("A split").drop_back()))); 657 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 658 ValueIs(SameRange(findSpelled("split B").drop_front()))); 659 // Ranges not fully covering macro invocations should fail. 660 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 661 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 662 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 663 llvm::None); 664 665 // Recursive macro invocations. 666 recordTokens(R"cpp( 667 #define ID(x) x 668 #define B b1 b2 669 670 ID(ID(ID(a1) a2 a3)) split ID(B) 671 )cpp"); 672 673 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 674 ValueIs(SameRange(findSpelled("( B").drop_front()))); 675 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 676 ValueIs(SameRange(findSpelled( 677 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 678 // Mixed ranges with expanded and spelled tokens. 679 EXPECT_THAT( 680 Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")), 681 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split")))); 682 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")), 683 ValueIs(SameRange(findSpelled("split ID ( B )")))); 684 // Macro arguments 685 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")), 686 ValueIs(SameRange(findSpelled("a1")))); 687 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")), 688 ValueIs(SameRange(findSpelled("a2")))); 689 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")), 690 ValueIs(SameRange(findSpelled("a3")))); 691 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")), 692 ValueIs(SameRange(findSpelled("ID ( a1 ) a2")))); 693 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 694 ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3")))); 695 696 // Empty macro expansions. 697 recordTokens(R"cpp( 698 #define EMPTY 699 #define ID(X) X 700 701 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 702 EMPTY EMPTY ID(4 5 6) split2 703 ID(7 8 9) EMPTY EMPTY 704 )cpp"); 705 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 706 ValueIs(SameRange(findSpelled("1 2 3")))); 707 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 708 ValueIs(SameRange(findSpelled("4 5 6")))); 709 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 710 ValueIs(SameRange(findSpelled("7 8 9")))); 711 712 // Empty mappings coming from various directives. 713 recordTokens(R"cpp( 714 #define ID(X) X 715 ID(1) 716 #pragma lalala 717 not_mapped 718 )cpp"); 719 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 720 ValueIs(SameRange(findSpelled("not_mapped")))); 721 722 // Multiple macro arguments 723 recordTokens(R"cpp( 724 #define ID(X) X 725 #define ID2(X, Y) X Y 726 727 ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) 728 )cpp"); 729 // Should fail, spans multiple arguments. 730 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 731 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")), 732 ValueIs(SameRange(findSpelled("ID ( a2 ) a3")))); 733 EXPECT_THAT( 734 Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 735 ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )")))); 736 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")), 737 ValueIs(SameRange(findSpelled("a5 a6")))); 738 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")), 739 ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )")))); 740 // Should fail, spans multiple invocations. 741 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None); 742 } 743 744 TEST_F(TokenBufferTest, ExpandedTokensForRange) { 745 recordTokens(R"cpp( 746 #define SIGN(X) X##_washere 747 A SIGN(B) C SIGN(D) E SIGN(F) G 748 )cpp"); 749 750 SourceRange R(findExpanded("C").front().location(), 751 findExpanded("F_washere").front().location()); 752 // Sanity check: expanded and spelled tokens are stored separately. 753 EXPECT_THAT(Buffer.expandedTokens(R), 754 SameRange(findExpanded("C D_washere E F_washere"))); 755 EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); 756 } 757 758 TEST_F(TokenBufferTest, ExpansionStartingAt) { 759 // Object-like macro expansions. 760 recordTokens(R"cpp( 761 #define FOO 3+4 762 int a = FOO 1; 763 int b = FOO 2; 764 )cpp"); 765 766 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back(); 767 EXPECT_THAT( 768 Buffer.expansionStartingAt(Foo1.data()), 769 ValueIs(IsExpansion(SameRange(Foo1), 770 SameRange(findExpanded("3 + 4 1").drop_back())))); 771 772 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back(); 773 EXPECT_THAT( 774 Buffer.expansionStartingAt(Foo2.data()), 775 ValueIs(IsExpansion(SameRange(Foo2), 776 SameRange(findExpanded("3 + 4 2").drop_back())))); 777 778 // Function-like macro expansions. 779 recordTokens(R"cpp( 780 #define ID(X) X 781 int a = ID(1+2+3); 782 int b = ID(ID(2+3+4)); 783 )cpp"); 784 785 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); 786 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), 787 ValueIs(IsExpansion(SameRange(ID1), 788 SameRange(findExpanded("1 + 2 + 3"))))); 789 // Only the first spelled token should be found. 790 for (const auto &T : ID1.drop_front()) 791 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 792 793 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); 794 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), 795 ValueIs(IsExpansion(SameRange(ID2), 796 SameRange(findExpanded("2 + 3 + 4"))))); 797 // Only the first spelled token should be found. 798 for (const auto &T : ID2.drop_front()) 799 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 800 801 // PP directives. 802 recordTokens(R"cpp( 803 #define FOO 1 804 int a = FOO; 805 #pragma once 806 int b = 1; 807 )cpp"); 808 809 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); 810 EXPECT_THAT( 811 Buffer.expansionStartingAt(&DefineFoo.front()), 812 ValueIs(IsExpansion(SameRange(DefineFoo), 813 SameRange(findExpanded("int a").take_front(0))))); 814 // Only the first spelled token should be found. 815 for (const auto &T : DefineFoo.drop_front()) 816 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 817 818 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); 819 EXPECT_THAT( 820 Buffer.expansionStartingAt(&PragmaOnce.front()), 821 ValueIs(IsExpansion(SameRange(PragmaOnce), 822 SameRange(findExpanded("int b").take_front(0))))); 823 // Only the first spelled token should be found. 824 for (const auto &T : PragmaOnce.drop_front()) 825 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 826 } 827 828 TEST_F(TokenBufferTest, TokensToFileRange) { 829 addFile("./foo.h", "token_from_header"); 830 llvm::Annotations Code(R"cpp( 831 #define FOO token_from_expansion 832 #include "./foo.h" 833 $all[[$i[[int]] a = FOO;]] 834 )cpp"); 835 recordTokens(Code.code()); 836 837 auto &SM = *SourceMgr; 838 839 // Two simple examples. 840 auto Int = findExpanded("int").front(); 841 auto Semi = findExpanded(";").front(); 842 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 843 Code.range("i").End)); 844 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 845 FileRange(SM.getMainFileID(), Code.range("all").Begin, 846 Code.range("all").End)); 847 // We don't test assertion failures because death tests are slow. 848 } 849 850 TEST_F(TokenBufferTest, MacroExpansions) { 851 llvm::Annotations Code(R"cpp( 852 #define FOO B 853 #define FOO2 BA 854 #define CALL(X) int X 855 #define G CALL(FOO2) 856 int B; 857 $macro[[FOO]]; 858 $macro[[CALL]](A); 859 $macro[[G]]; 860 )cpp"); 861 recordTokens(Code.code()); 862 auto &SM = *SourceMgr; 863 auto Expansions = Buffer.macroExpansions(SM.getMainFileID()); 864 std::vector<FileRange> ExpectedMacroRanges; 865 for (auto Range : Code.ranges("macro")) 866 ExpectedMacroRanges.push_back( 867 FileRange(SM.getMainFileID(), Range.Begin, Range.End)); 868 std::vector<FileRange> ActualMacroRanges; 869 for (auto Expansion : Expansions) 870 ActualMacroRanges.push_back(Expansion->range(SM)); 871 EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); 872 } 873 874 TEST_F(TokenBufferTest, Touching) { 875 llvm::Annotations Code("^i^nt^ ^a^b^=^1;^"); 876 recordTokens(Code.code()); 877 878 auto Touching = [&](int Index) { 879 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 880 Code.points()[Index]); 881 return spelledTokensTouching(Loc, Buffer); 882 }; 883 auto Identifier = [&](int Index) { 884 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 885 Code.points()[Index]); 886 const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer); 887 return Tok ? Tok->text(*SourceMgr) : ""; 888 }; 889 890 EXPECT_THAT(Touching(0), SameRange(findSpelled("int"))); 891 EXPECT_EQ(Identifier(0), ""); 892 EXPECT_THAT(Touching(1), SameRange(findSpelled("int"))); 893 EXPECT_EQ(Identifier(1), ""); 894 EXPECT_THAT(Touching(2), SameRange(findSpelled("int"))); 895 EXPECT_EQ(Identifier(2), ""); 896 897 EXPECT_THAT(Touching(3), SameRange(findSpelled("ab"))); 898 EXPECT_EQ(Identifier(3), "ab"); 899 EXPECT_THAT(Touching(4), SameRange(findSpelled("ab"))); 900 EXPECT_EQ(Identifier(4), "ab"); 901 902 EXPECT_THAT(Touching(5), SameRange(findSpelled("ab ="))); 903 EXPECT_EQ(Identifier(5), "ab"); 904 905 EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1"))); 906 EXPECT_EQ(Identifier(6), ""); 907 908 EXPECT_THAT(Touching(7), SameRange(findSpelled(";"))); 909 EXPECT_EQ(Identifier(7), ""); 910 911 ASSERT_EQ(Code.points().size(), 8u); 912 } 913 914 TEST_F(TokenBufferTest, ExpandedBySpelled) { 915 recordTokens(R"cpp( 916 a1 a2 a3 b1 b2 917 )cpp"); 918 // Sanity check: expanded and spelled tokens are stored separately. 919 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 920 // Searching for subranges of expanded tokens should give the corresponding 921 // spelled ones. 922 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")), 923 ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2")))); 924 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")), 925 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 926 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")), 927 ElementsAre(SameRange(findExpanded("b1 b2")))); 928 929 // Test search on simple macro expansions. 930 recordTokens(R"cpp( 931 #define A a1 a2 a3 932 #define B b1 b2 933 934 A split B 935 )cpp"); 936 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")), 937 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); 938 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()), 939 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 940 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()), 941 ElementsAre(SameRange(findExpanded("b1 b2")))); 942 943 // Ranges not fully covering macro expansions should fail. 944 recordTokens(R"cpp( 945 #define ID(x) x 946 947 ID(a) 948 )cpp"); 949 // Spelled don't cover entire mapping (missing ID token) -> empty result 950 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty()); 951 // Spelled don't cover entire mapping (missing ) token) -> empty result 952 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty()); 953 954 // Recursive macro invocations. 955 recordTokens(R"cpp( 956 #define ID(x) x 957 #define B b1 b2 958 959 ID(ID(ID(a1) a2 a3)) split ID(B) 960 )cpp"); 961 962 EXPECT_THAT( 963 Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")), 964 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 965 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")), 966 ElementsAre(SameRange(findExpanded("b1 b2")))); 967 EXPECT_THAT(Buffer.expandedForSpelled( 968 findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")), 969 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); 970 // FIXME: these should succeed, but we do not support macro arguments yet. 971 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty()); 972 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")), 973 IsEmpty()); 974 975 // Empty macro expansions. 976 recordTokens(R"cpp( 977 #define EMPTY 978 #define ID(X) X 979 980 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 981 EMPTY EMPTY ID(4 5 6) split2 982 ID(7 8 9) EMPTY EMPTY 983 )cpp"); 984 // Covered by empty expansions on one of both of the sides. 985 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")), 986 ElementsAre(SameRange(findExpanded("1 2 3")))); 987 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")), 988 ElementsAre(SameRange(findExpanded("4 5 6")))); 989 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")), 990 ElementsAre(SameRange(findExpanded("7 8 9")))); 991 // Including the empty macro expansions on the side. 992 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")), 993 ElementsAre(SameRange(findExpanded("1 2 3")))); 994 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")), 995 ElementsAre(SameRange(findExpanded("1 2 3")))); 996 EXPECT_THAT( 997 Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")), 998 ElementsAre(SameRange(findExpanded("1 2 3")))); 999 1000 // Empty mappings coming from various directives. 1001 recordTokens(R"cpp( 1002 #define ID(X) X 1003 ID(1) 1004 #pragma lalala 1005 not_mapped 1006 )cpp"); 1007 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")), 1008 IsEmpty()); 1009 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")), 1010 IsEmpty()); 1011 1012 // Empty macro expansion. 1013 recordTokens(R"cpp( 1014 #define EMPTY 1015 EMPTY int a = 100; 1016 )cpp"); 1017 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()), 1018 IsEmpty()); 1019 } 1020 1021 } // namespace 1022