1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include "gmock/gmock.h" 44 #include <cassert> 45 #include <cstdlib> 46 #include <gmock/gmock.h> 47 #include <gtest/gtest.h> 48 #include <memory> 49 #include <ostream> 50 #include <string> 51 52 using namespace clang; 53 using namespace clang::syntax; 54 55 using llvm::ValueIs; 56 using ::testing::AllOf; 57 using ::testing::Contains; 58 using ::testing::ElementsAre; 59 using ::testing::Field; 60 using ::testing::Matcher; 61 using ::testing::Not; 62 using ::testing::Pointee; 63 using ::testing::StartsWith; 64 65 namespace { 66 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 67 // argument. 68 MATCHER_P(SameRange, A, "") { 69 return A.begin() == arg.begin() && A.end() == arg.end(); 70 } 71 72 Matcher<TokenBuffer::Expansion> 73 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, 74 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { 75 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), 76 Field(&TokenBuffer::Expansion::Expanded, Expanded)); 77 } 78 // Matchers for syntax::Token. 79 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 80 MATCHER_P2(HasText, Text, SourceMgr, "") { 81 return arg.text(*SourceMgr) == Text; 82 } 83 /// Checks the start and end location of a token are equal to SourceRng. 84 MATCHER_P(RangeIs, SourceRng, "") { 85 return arg.location() == SourceRng.first && 86 arg.endLocation() == SourceRng.second; 87 } 88 89 class TokenCollectorTest : public ::testing::Test { 90 public: 91 /// Run the clang frontend, collect the preprocessed tokens from the frontend 92 /// invocation and store them in this->Buffer. 93 /// This also clears SourceManager before running the compiler. 94 void recordTokens(llvm::StringRef Code) { 95 class RecordTokens : public ASTFrontendAction { 96 public: 97 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 98 99 bool BeginSourceFileAction(CompilerInstance &CI) override { 100 assert(!Collector && "expected only a single call to BeginSourceFile"); 101 Collector.emplace(CI.getPreprocessor()); 102 return true; 103 } 104 void EndSourceFileAction() override { 105 assert(Collector && "BeginSourceFileAction was never called"); 106 Result = std::move(*Collector).consume(); 107 } 108 109 std::unique_ptr<ASTConsumer> 110 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 111 return std::make_unique<ASTConsumer>(); 112 } 113 114 private: 115 TokenBuffer &Result; 116 llvm::Optional<TokenCollector> Collector; 117 }; 118 119 constexpr const char *FileName = "./input.cpp"; 120 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 121 // Prepare to run a compiler. 122 if (!Diags->getClient()) 123 Diags->setClient(new IgnoringDiagConsumer); 124 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 125 FileName}; 126 auto CI = createInvocationFromCommandLine(Args, Diags, FS); 127 assert(CI); 128 CI->getFrontendOpts().DisableFree = false; 129 CI->getPreprocessorOpts().addRemappedFile( 130 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 131 CompilerInstance Compiler; 132 Compiler.setInvocation(std::move(CI)); 133 Compiler.setDiagnostics(Diags.get()); 134 Compiler.setFileManager(FileMgr.get()); 135 Compiler.setSourceManager(SourceMgr.get()); 136 137 this->Buffer = TokenBuffer(*SourceMgr); 138 RecordTokens Recorder(this->Buffer); 139 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 140 << "failed to run the frontend"; 141 } 142 143 /// Record the tokens and return a test dump of the resulting buffer. 144 std::string collectAndDump(llvm::StringRef Code) { 145 recordTokens(Code); 146 return Buffer.dumpForTests(); 147 } 148 149 // Adds a file to the test VFS. 150 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 151 if (!FS->addFile(Path, time_t(), 152 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 153 ADD_FAILURE() << "could not add a file to VFS: " << Path; 154 } 155 } 156 157 /// Add a new file, run syntax::tokenize() on the range if any, run it on the 158 /// whole file otherwise and return the results. 159 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 160 llvm::Annotations Annot(Text); 161 auto FID = SourceMgr->createFileID( 162 llvm::MemoryBuffer::getMemBufferCopy(Annot.code())); 163 // FIXME: pass proper LangOptions. 164 if (Annot.ranges().empty()) 165 return syntax::tokenize(FID, *SourceMgr, LangOptions()); 166 return syntax::tokenize( 167 syntax::FileRange(FID, Annot.range().Begin, Annot.range().End), 168 *SourceMgr, LangOptions()); 169 } 170 171 // Specialized versions of matchers that hide the SourceManager from clients. 172 Matcher<syntax::Token> HasText(std::string Text) const { 173 return ::HasText(Text, SourceMgr.get()); 174 } 175 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 176 std::pair<SourceLocation, SourceLocation> Ls; 177 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 178 .getLocWithOffset(R.Begin); 179 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 180 .getLocWithOffset(R.End); 181 return ::RangeIs(Ls); 182 } 183 184 /// Finds a subrange in O(n * m). 185 template <class T, class U, class Eq> 186 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 187 llvm::ArrayRef<T> Range, Eq F) { 188 for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { 189 auto It = Begin; 190 for (auto ItSub = Subrange.begin(); 191 ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { 192 if (!F(*ItSub, *It)) 193 goto continue_outer; 194 } 195 return llvm::makeArrayRef(Begin, It); 196 continue_outer:; 197 } 198 return llvm::makeArrayRef(Range.end(), Range.end()); 199 } 200 201 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 202 /// The match should be unique. \p Query is a whitespace-separated list of 203 /// tokens to search for. 204 llvm::ArrayRef<syntax::Token> 205 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 206 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 207 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 208 if (QueryTokens.empty()) { 209 ADD_FAILURE() << "will not look for an empty list of tokens"; 210 std::abort(); 211 } 212 // An equality test for search. 213 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 214 return Q == T.text(*SourceMgr); 215 }; 216 // Find a match. 217 auto Found = 218 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 219 if (Found.begin() == Tokens.end()) { 220 ADD_FAILURE() << "could not find the subrange for " << Query; 221 std::abort(); 222 } 223 // Check that the match is unique. 224 if (findSubrange(llvm::makeArrayRef(QueryTokens), 225 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 226 .begin() != Tokens.end()) { 227 ADD_FAILURE() << "match is not unique for " << Query; 228 std::abort(); 229 } 230 return Found; 231 }; 232 233 // Specialized versions of findTokenRange for expanded and spelled tokens. 234 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 235 return findTokenRange(Query, Buffer.expandedTokens()); 236 } 237 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 238 FileID File = FileID()) { 239 if (!File.isValid()) 240 File = SourceMgr->getMainFileID(); 241 return findTokenRange(Query, Buffer.spelledTokens(File)); 242 } 243 244 // Data fields. 245 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 246 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 247 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 248 new llvm::vfs::InMemoryFileSystem; 249 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 250 new FileManager(FileSystemOptions(), FS); 251 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 252 new SourceManager(*Diags, *FileMgr); 253 /// Contains last result of calling recordTokens(). 254 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 255 }; 256 257 TEST_F(TokenCollectorTest, RawMode) { 258 EXPECT_THAT(tokenize("int main() {}"), 259 ElementsAre(Kind(tok::kw_int), 260 AllOf(HasText("main"), Kind(tok::identifier)), 261 Kind(tok::l_paren), Kind(tok::r_paren), 262 Kind(tok::l_brace), Kind(tok::r_brace))); 263 // Comments are ignored for now. 264 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 265 ElementsAre(Kind(tok::kw_int), 266 AllOf(HasText("a"), Kind(tok::identifier)), 267 Kind(tok::semi))); 268 EXPECT_THAT(tokenize("int [[main() {]]}"), 269 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 270 Kind(tok::l_paren), Kind(tok::r_paren), 271 Kind(tok::l_brace))); 272 EXPECT_THAT(tokenize("int [[main() { ]]}"), 273 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 274 Kind(tok::l_paren), Kind(tok::r_paren), 275 Kind(tok::l_brace))); 276 // First token is partially parsed, last token is fully included even though 277 // only a part of it is contained in the range. 278 EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"), 279 ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)), 280 Kind(tok::l_paren), Kind(tok::r_paren), 281 Kind(tok::l_brace), Kind(tok::kw_return))); 282 } 283 284 TEST_F(TokenCollectorTest, Basic) { 285 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 286 {"int main() {}", 287 R"(expanded tokens: 288 int main ( ) { } 289 file './input.cpp' 290 spelled tokens: 291 int main ( ) { } 292 no mappings. 293 )"}, 294 // All kinds of whitespace are ignored. 295 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 296 R"(expanded tokens: 297 int main ( ) { } 298 file './input.cpp' 299 spelled tokens: 300 int main ( ) { } 301 no mappings. 302 )"}, 303 // Annotation tokens are ignored. 304 {R"cpp( 305 #pragma GCC visibility push (public) 306 #pragma GCC visibility pop 307 )cpp", 308 R"(expanded tokens: 309 <empty> 310 file './input.cpp' 311 spelled tokens: 312 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 313 mappings: 314 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 315 )"}, 316 // Empty files should not crash. 317 {R"cpp()cpp", R"(expanded tokens: 318 <empty> 319 file './input.cpp' 320 spelled tokens: 321 <empty> 322 no mappings. 323 )"}, 324 // Should not crash on errors inside '#define' directives. Error is that 325 // stringification (#B) does not refer to a macro parameter. 326 { 327 R"cpp( 328 a 329 #define MACRO() A #B 330 )cpp", 331 R"(expanded tokens: 332 a 333 file './input.cpp' 334 spelled tokens: 335 a # define MACRO ( ) A # B 336 mappings: 337 ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) 338 )"}}; 339 for (auto &Test : TestCases) 340 EXPECT_EQ(collectAndDump(Test.first), Test.second) 341 << collectAndDump(Test.first); 342 } 343 344 TEST_F(TokenCollectorTest, Locations) { 345 // Check locations of the tokens. 346 llvm::Annotations Code(R"cpp( 347 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 348 )cpp"); 349 recordTokens(Code.code()); 350 // Check expanded tokens. 351 EXPECT_THAT( 352 Buffer.expandedTokens(), 353 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 354 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 355 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 356 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 357 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 358 Kind(tok::eof))); 359 // Check spelled tokens. 360 EXPECT_THAT( 361 Buffer.spelledTokens(SourceMgr->getMainFileID()), 362 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 363 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 364 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 365 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 366 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 367 368 auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()); 369 for (auto &R : Code.ranges()) { 370 EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)), 371 Pointee(RangeIs(R))); 372 } 373 } 374 375 TEST_F(TokenCollectorTest, MacroDirectives) { 376 // Macro directives are not stored anywhere at the moment. 377 std::string Code = R"cpp( 378 #define FOO a 379 #include "unresolved_file.h" 380 #undef FOO 381 #ifdef X 382 #else 383 #endif 384 #ifndef Y 385 #endif 386 #if 1 387 #elif 2 388 #else 389 #endif 390 #pragma once 391 #pragma something lalala 392 393 int a; 394 )cpp"; 395 std::string Expected = 396 "expanded tokens:\n" 397 " int a ;\n" 398 "file './input.cpp'\n" 399 " spelled tokens:\n" 400 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 401 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 402 "# endif # pragma once # pragma something lalala int a ;\n" 403 " mappings:\n" 404 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 405 EXPECT_EQ(collectAndDump(Code), Expected); 406 } 407 408 TEST_F(TokenCollectorTest, MacroReplacements) { 409 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 410 // A simple object-like macro. 411 {R"cpp( 412 #define INT int const 413 INT a; 414 )cpp", 415 R"(expanded tokens: 416 int const a ; 417 file './input.cpp' 418 spelled tokens: 419 # define INT int const INT a ; 420 mappings: 421 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 422 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 423 )"}, 424 // A simple function-like macro. 425 {R"cpp( 426 #define INT(a) const int 427 INT(10+10) a; 428 )cpp", 429 R"(expanded tokens: 430 const int a ; 431 file './input.cpp' 432 spelled tokens: 433 # define INT ( a ) const int INT ( 10 + 10 ) a ; 434 mappings: 435 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 436 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 437 )"}, 438 // Recursive macro replacements. 439 {R"cpp( 440 #define ID(X) X 441 #define INT int const 442 ID(ID(INT)) a; 443 )cpp", 444 R"(expanded tokens: 445 int const a ; 446 file './input.cpp' 447 spelled tokens: 448 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 449 mappings: 450 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 451 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 452 )"}, 453 // A little more complicated recursive macro replacements. 454 {R"cpp( 455 #define ADD(X, Y) X+Y 456 #define MULT(X, Y) X*Y 457 458 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 459 )cpp", 460 "expanded tokens:\n" 461 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 462 "file './input.cpp'\n" 463 " spelled tokens:\n" 464 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 465 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 466 " mappings:\n" 467 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 468 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 469 // Empty macro replacement. 470 // FIXME: the #define directives should not be glued together. 471 {R"cpp( 472 #define EMPTY 473 #define EMPTY_FUNC(X) 474 EMPTY 475 EMPTY_FUNC(1+2+3) 476 )cpp", 477 R"(expanded tokens: 478 <empty> 479 file './input.cpp' 480 spelled tokens: 481 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 482 mappings: 483 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) 484 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) 485 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 486 )"}, 487 // File ends with a macro replacement. 488 {R"cpp( 489 #define FOO 10+10; 490 int a = FOO 491 )cpp", 492 R"(expanded tokens: 493 int a = 10 + 10 ; 494 file './input.cpp' 495 spelled tokens: 496 # define FOO 10 + 10 ; int a = FOO 497 mappings: 498 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 499 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 500 )"}, 501 {R"cpp( 502 #define NUM 42 503 #define ID(a) a 504 #define M 1 + ID 505 M(NUM) 506 )cpp", 507 R"(expanded tokens: 508 1 + 42 509 file './input.cpp' 510 spelled tokens: 511 # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM ) 512 mappings: 513 ['#'_0, 'M'_17) => ['1'_0, '1'_0) 514 ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3) 515 )"}, 516 }; 517 518 for (auto &Test : TestCases) { 519 std::string Dump = collectAndDump(Test.first); 520 EXPECT_EQ(Test.second, Dump) << Dump; 521 } 522 } 523 524 TEST_F(TokenCollectorTest, SpecialTokens) { 525 // Tokens coming from concatenations. 526 recordTokens(R"cpp( 527 #define CONCAT(a, b) a ## b 528 int a = CONCAT(1, 2); 529 )cpp"); 530 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 531 Contains(HasText("12"))); 532 // Multi-line tokens with slashes at the end. 533 recordTokens("i\\\nn\\\nt"); 534 EXPECT_THAT(Buffer.expandedTokens(), 535 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 536 Kind(tok::eof))); 537 // FIXME: test tokens with digraphs and UCN identifiers. 538 } 539 540 TEST_F(TokenCollectorTest, LateBoundTokens) { 541 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 542 // but we choose to record them as a single token (for now). 543 llvm::Annotations Code(R"cpp( 544 template <class T> 545 struct foo { int a; }; 546 int bar = foo<foo<int$br[[>>]]().a; 547 int baz = 10 $op[[>>]] 2; 548 )cpp"); 549 recordTokens(Code.code()); 550 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 551 AllOf(Contains(AllOf(Kind(tok::greatergreater), 552 RangeIs(Code.range("br")))), 553 Contains(AllOf(Kind(tok::greatergreater), 554 RangeIs(Code.range("op")))))); 555 } 556 557 TEST_F(TokenCollectorTest, DelayedParsing) { 558 llvm::StringLiteral Code = R"cpp( 559 struct Foo { 560 int method() { 561 // Parser will visit method bodies and initializers multiple times, but 562 // TokenBuffer should only record the first walk over the tokens; 563 return 100; 564 } 565 int a = 10; 566 567 struct Subclass { 568 void foo() { 569 Foo().method(); 570 } 571 }; 572 }; 573 )cpp"; 574 std::string ExpectedTokens = 575 "expanded tokens:\n" 576 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 577 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 578 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 579 } 580 581 TEST_F(TokenCollectorTest, MultiFile) { 582 addFile("./foo.h", R"cpp( 583 #define ADD(X, Y) X+Y 584 int a = 100; 585 #include "bar.h" 586 )cpp"); 587 addFile("./bar.h", R"cpp( 588 int b = ADD(1, 2); 589 #define MULT(X, Y) X*Y 590 )cpp"); 591 llvm::StringLiteral Code = R"cpp( 592 #include "foo.h" 593 int c = ADD(1, MULT(2,3)); 594 )cpp"; 595 596 std::string Expected = R"(expanded tokens: 597 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 598 file './input.cpp' 599 spelled tokens: 600 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 601 mappings: 602 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 603 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 604 file './foo.h' 605 spelled tokens: 606 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 607 mappings: 608 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 609 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 610 file './bar.h' 611 spelled tokens: 612 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 613 mappings: 614 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 615 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 616 )"; 617 618 EXPECT_EQ(Expected, collectAndDump(Code)) 619 << "input: " << Code << "\nresults: " << collectAndDump(Code); 620 } 621 622 class TokenBufferTest : public TokenCollectorTest {}; 623 624 TEST_F(TokenBufferTest, SpelledByExpanded) { 625 recordTokens(R"cpp( 626 a1 a2 a3 b1 b2 627 )cpp"); 628 629 // Sanity check: expanded and spelled tokens are stored separately. 630 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 631 // Searching for subranges of expanded tokens should give the corresponding 632 // spelled ones. 633 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 634 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 635 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 636 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 637 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 638 ValueIs(SameRange(findSpelled("b1 b2")))); 639 640 // Test search on simple macro expansions. 641 recordTokens(R"cpp( 642 #define A a1 a2 a3 643 #define B b1 b2 644 645 A split B 646 )cpp"); 647 // Ranges going across expansion boundaries. 648 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 649 ValueIs(SameRange(findSpelled("A split B")))); 650 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 651 ValueIs(SameRange(findSpelled("A split").drop_back()))); 652 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 653 ValueIs(SameRange(findSpelled("split B").drop_front()))); 654 // Ranges not fully covering macro invocations should fail. 655 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 656 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 657 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 658 llvm::None); 659 660 // Recursive macro invocations. 661 recordTokens(R"cpp( 662 #define ID(x) x 663 #define B b1 b2 664 665 ID(ID(ID(a1) a2 a3)) split ID(B) 666 )cpp"); 667 668 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 669 ValueIs(SameRange(findSpelled("( B").drop_front()))); 670 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 671 ValueIs(SameRange(findSpelled( 672 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 673 // Mixed ranges with expanded and spelled tokens. 674 EXPECT_THAT( 675 Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")), 676 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split")))); 677 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")), 678 ValueIs(SameRange(findSpelled("split ID ( B )")))); 679 // Macro arguments 680 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")), 681 ValueIs(SameRange(findSpelled("a1")))); 682 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")), 683 ValueIs(SameRange(findSpelled("a2")))); 684 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")), 685 ValueIs(SameRange(findSpelled("a3")))); 686 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")), 687 ValueIs(SameRange(findSpelled("ID ( a1 ) a2")))); 688 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 689 ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3")))); 690 691 // Empty macro expansions. 692 recordTokens(R"cpp( 693 #define EMPTY 694 #define ID(X) X 695 696 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 697 EMPTY EMPTY ID(4 5 6) split2 698 ID(7 8 9) EMPTY EMPTY 699 )cpp"); 700 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 701 ValueIs(SameRange(findSpelled("1 2 3")))); 702 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 703 ValueIs(SameRange(findSpelled("4 5 6")))); 704 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 705 ValueIs(SameRange(findSpelled("7 8 9")))); 706 707 // Empty mappings coming from various directives. 708 recordTokens(R"cpp( 709 #define ID(X) X 710 ID(1) 711 #pragma lalala 712 not_mapped 713 )cpp"); 714 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 715 ValueIs(SameRange(findSpelled("not_mapped")))); 716 717 // Multiple macro arguments 718 recordTokens(R"cpp( 719 #define ID(X) X 720 #define ID2(X, Y) X Y 721 722 ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) 723 )cpp"); 724 // Should fail, spans multiple arguments. 725 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 726 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")), 727 ValueIs(SameRange(findSpelled("ID ( a2 ) a3")))); 728 EXPECT_THAT( 729 Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 730 ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )")))); 731 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")), 732 ValueIs(SameRange(findSpelled("a5 a6")))); 733 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")), 734 ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )")))); 735 // Should fail, spans multiple invocations. 736 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None); 737 } 738 739 TEST_F(TokenBufferTest, ExpandedTokensForRange) { 740 recordTokens(R"cpp( 741 #define SIGN(X) X##_washere 742 A SIGN(B) C SIGN(D) E SIGN(F) G 743 )cpp"); 744 745 SourceRange R(findExpanded("C").front().location(), 746 findExpanded("F_washere").front().location()); 747 // Sanity check: expanded and spelled tokens are stored separately. 748 EXPECT_THAT(Buffer.expandedTokens(R), 749 SameRange(findExpanded("C D_washere E F_washere"))); 750 EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); 751 } 752 753 TEST_F(TokenBufferTest, ExpansionStartingAt) { 754 // Object-like macro expansions. 755 recordTokens(R"cpp( 756 #define FOO 3+4 757 int a = FOO 1; 758 int b = FOO 2; 759 )cpp"); 760 761 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back(); 762 EXPECT_THAT( 763 Buffer.expansionStartingAt(Foo1.data()), 764 ValueIs(IsExpansion(SameRange(Foo1), 765 SameRange(findExpanded("3 + 4 1").drop_back())))); 766 767 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back(); 768 EXPECT_THAT( 769 Buffer.expansionStartingAt(Foo2.data()), 770 ValueIs(IsExpansion(SameRange(Foo2), 771 SameRange(findExpanded("3 + 4 2").drop_back())))); 772 773 // Function-like macro expansions. 774 recordTokens(R"cpp( 775 #define ID(X) X 776 int a = ID(1+2+3); 777 int b = ID(ID(2+3+4)); 778 )cpp"); 779 780 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); 781 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), 782 ValueIs(IsExpansion(SameRange(ID1), 783 SameRange(findExpanded("1 + 2 + 3"))))); 784 // Only the first spelled token should be found. 785 for (const auto &T : ID1.drop_front()) 786 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 787 788 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); 789 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), 790 ValueIs(IsExpansion(SameRange(ID2), 791 SameRange(findExpanded("2 + 3 + 4"))))); 792 // Only the first spelled token should be found. 793 for (const auto &T : ID2.drop_front()) 794 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 795 796 // PP directives. 797 recordTokens(R"cpp( 798 #define FOO 1 799 int a = FOO; 800 #pragma once 801 int b = 1; 802 )cpp"); 803 804 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); 805 EXPECT_THAT( 806 Buffer.expansionStartingAt(&DefineFoo.front()), 807 ValueIs(IsExpansion(SameRange(DefineFoo), 808 SameRange(findExpanded("int a").take_front(0))))); 809 // Only the first spelled token should be found. 810 for (const auto &T : DefineFoo.drop_front()) 811 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 812 813 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); 814 EXPECT_THAT( 815 Buffer.expansionStartingAt(&PragmaOnce.front()), 816 ValueIs(IsExpansion(SameRange(PragmaOnce), 817 SameRange(findExpanded("int b").take_front(0))))); 818 // Only the first spelled token should be found. 819 for (const auto &T : PragmaOnce.drop_front()) 820 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 821 } 822 823 TEST_F(TokenBufferTest, TokensToFileRange) { 824 addFile("./foo.h", "token_from_header"); 825 llvm::Annotations Code(R"cpp( 826 #define FOO token_from_expansion 827 #include "./foo.h" 828 $all[[$i[[int]] a = FOO;]] 829 )cpp"); 830 recordTokens(Code.code()); 831 832 auto &SM = *SourceMgr; 833 834 // Two simple examples. 835 auto Int = findExpanded("int").front(); 836 auto Semi = findExpanded(";").front(); 837 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 838 Code.range("i").End)); 839 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 840 FileRange(SM.getMainFileID(), Code.range("all").Begin, 841 Code.range("all").End)); 842 // We don't test assertion failures because death tests are slow. 843 } 844 845 TEST_F(TokenBufferTest, MacroExpansions) { 846 llvm::Annotations Code(R"cpp( 847 #define FOO B 848 #define FOO2 BA 849 #define CALL(X) int X 850 #define G CALL(FOO2) 851 int B; 852 $macro[[FOO]]; 853 $macro[[CALL]](A); 854 $macro[[G]]; 855 )cpp"); 856 recordTokens(Code.code()); 857 auto &SM = *SourceMgr; 858 auto Expansions = Buffer.macroExpansions(SM.getMainFileID()); 859 std::vector<FileRange> ExpectedMacroRanges; 860 for (auto Range : Code.ranges("macro")) 861 ExpectedMacroRanges.push_back( 862 FileRange(SM.getMainFileID(), Range.Begin, Range.End)); 863 std::vector<FileRange> ActualMacroRanges; 864 for (auto Expansion : Expansions) 865 ActualMacroRanges.push_back(Expansion->range(SM)); 866 EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); 867 } 868 869 TEST_F(TokenBufferTest, Touching) { 870 llvm::Annotations Code("^i^nt^ ^a^b^=^1;^"); 871 recordTokens(Code.code()); 872 873 auto Touching = [&](int Index) { 874 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 875 Code.points()[Index]); 876 return spelledTokensTouching(Loc, Buffer); 877 }; 878 auto Identifier = [&](int Index) { 879 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 880 Code.points()[Index]); 881 const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer); 882 return Tok ? Tok->text(*SourceMgr) : ""; 883 }; 884 885 EXPECT_THAT(Touching(0), SameRange(findSpelled("int"))); 886 EXPECT_EQ(Identifier(0), ""); 887 EXPECT_THAT(Touching(1), SameRange(findSpelled("int"))); 888 EXPECT_EQ(Identifier(1), ""); 889 EXPECT_THAT(Touching(2), SameRange(findSpelled("int"))); 890 EXPECT_EQ(Identifier(2), ""); 891 892 EXPECT_THAT(Touching(3), SameRange(findSpelled("ab"))); 893 EXPECT_EQ(Identifier(3), "ab"); 894 EXPECT_THAT(Touching(4), SameRange(findSpelled("ab"))); 895 EXPECT_EQ(Identifier(4), "ab"); 896 897 EXPECT_THAT(Touching(5), SameRange(findSpelled("ab ="))); 898 EXPECT_EQ(Identifier(5), "ab"); 899 900 EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1"))); 901 EXPECT_EQ(Identifier(6), ""); 902 903 EXPECT_THAT(Touching(7), SameRange(findSpelled(";"))); 904 EXPECT_EQ(Identifier(7), ""); 905 906 ASSERT_EQ(Code.points().size(), 8u); 907 } 908 909 } // namespace 910