1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include <cassert> 44 #include <cstdlib> 45 #include <gmock/gmock.h> 46 #include <gtest/gtest.h> 47 #include <memory> 48 #include <ostream> 49 #include <string> 50 51 using namespace clang; 52 using namespace clang::syntax; 53 54 using llvm::ValueIs; 55 using ::testing::_; 56 using ::testing::AllOf; 57 using ::testing::Contains; 58 using ::testing::ElementsAre; 59 using ::testing::Field; 60 using ::testing::IsEmpty; 61 using ::testing::Matcher; 62 using ::testing::Not; 63 using ::testing::Pointee; 64 using ::testing::StartsWith; 65 66 namespace { 67 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 68 // argument. 69 MATCHER_P(SameRange, A, "") { 70 return A.begin() == arg.begin() && A.end() == arg.end(); 71 } 72 73 Matcher<TokenBuffer::Expansion> 74 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, 75 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { 76 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), 77 Field(&TokenBuffer::Expansion::Expanded, Expanded)); 78 } 79 // Matchers for syntax::Token. 80 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 81 MATCHER_P2(HasText, Text, SourceMgr, "") { 82 return arg.text(*SourceMgr) == Text; 83 } 84 /// Checks the start and end location of a token are equal to SourceRng. 85 MATCHER_P(RangeIs, SourceRng, "") { 86 return arg.location() == SourceRng.first && 87 arg.endLocation() == SourceRng.second; 88 } 89 90 class TokenCollectorTest : public ::testing::Test { 91 public: 92 /// Run the clang frontend, collect the preprocessed tokens from the frontend 93 /// invocation and store them in this->Buffer. 94 /// This also clears SourceManager before running the compiler. 95 void recordTokens(llvm::StringRef Code) { 96 class RecordTokens : public ASTFrontendAction { 97 public: 98 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 99 100 bool BeginSourceFileAction(CompilerInstance &CI) override { 101 assert(!Collector && "expected only a single call to BeginSourceFile"); 102 Collector.emplace(CI.getPreprocessor()); 103 return true; 104 } 105 void EndSourceFileAction() override { 106 assert(Collector && "BeginSourceFileAction was never called"); 107 Result = std::move(*Collector).consume(); 108 Result.indexExpandedTokens(); 109 } 110 111 std::unique_ptr<ASTConsumer> 112 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 113 return std::make_unique<ASTConsumer>(); 114 } 115 116 private: 117 TokenBuffer &Result; 118 llvm::Optional<TokenCollector> Collector; 119 }; 120 121 constexpr const char *FileName = "./input.cpp"; 122 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 123 // Prepare to run a compiler. 124 if (!Diags->getClient()) 125 Diags->setClient(new IgnoringDiagConsumer); 126 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 127 FileName}; 128 CreateInvocationOptions CIOpts; 129 CIOpts.Diags = Diags; 130 CIOpts.VFS = FS; 131 auto CI = createInvocation(Args, std::move(CIOpts)); 132 assert(CI); 133 CI->getFrontendOpts().DisableFree = false; 134 CI->getPreprocessorOpts().addRemappedFile( 135 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 136 CompilerInstance Compiler; 137 Compiler.setInvocation(std::move(CI)); 138 Compiler.setDiagnostics(Diags.get()); 139 Compiler.setFileManager(FileMgr.get()); 140 Compiler.setSourceManager(SourceMgr.get()); 141 142 this->Buffer = TokenBuffer(*SourceMgr); 143 RecordTokens Recorder(this->Buffer); 144 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 145 << "failed to run the frontend"; 146 } 147 148 /// Record the tokens and return a test dump of the resulting buffer. 149 std::string collectAndDump(llvm::StringRef Code) { 150 recordTokens(Code); 151 return Buffer.dumpForTests(); 152 } 153 154 // Adds a file to the test VFS. 155 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 156 if (!FS->addFile(Path, time_t(), 157 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 158 ADD_FAILURE() << "could not add a file to VFS: " << Path; 159 } 160 } 161 162 /// Add a new file, run syntax::tokenize() on the range if any, run it on the 163 /// whole file otherwise and return the results. 164 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 165 llvm::Annotations Annot(Text); 166 auto FID = SourceMgr->createFileID( 167 llvm::MemoryBuffer::getMemBufferCopy(Annot.code())); 168 // FIXME: pass proper LangOptions. 169 if (Annot.ranges().empty()) 170 return syntax::tokenize(FID, *SourceMgr, LangOptions()); 171 return syntax::tokenize( 172 syntax::FileRange(FID, Annot.range().Begin, Annot.range().End), 173 *SourceMgr, LangOptions()); 174 } 175 176 // Specialized versions of matchers that hide the SourceManager from clients. 177 Matcher<syntax::Token> HasText(std::string Text) const { 178 return ::HasText(Text, SourceMgr.get()); 179 } 180 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 181 std::pair<SourceLocation, SourceLocation> Ls; 182 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 183 .getLocWithOffset(R.Begin); 184 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 185 .getLocWithOffset(R.End); 186 return ::RangeIs(Ls); 187 } 188 189 /// Finds a subrange in O(n * m). 190 template <class T, class U, class Eq> 191 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 192 llvm::ArrayRef<T> Range, Eq F) { 193 assert(Subrange.size() >= 1); 194 if (Range.size() < Subrange.size()) 195 return llvm::makeArrayRef(Range.end(), Range.end()); 196 for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); 197 Begin <= Last; ++Begin) { 198 auto It = Begin; 199 for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); 200 ++ItSub, ++It) { 201 if (!F(*ItSub, *It)) 202 goto continue_outer; 203 } 204 return llvm::makeArrayRef(Begin, It); 205 continue_outer:; 206 } 207 return llvm::makeArrayRef(Range.end(), Range.end()); 208 } 209 210 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 211 /// The match should be unique. \p Query is a whitespace-separated list of 212 /// tokens to search for. 213 llvm::ArrayRef<syntax::Token> 214 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 215 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 216 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 217 if (QueryTokens.empty()) { 218 ADD_FAILURE() << "will not look for an empty list of tokens"; 219 std::abort(); 220 } 221 // An equality test for search. 222 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 223 return Q == T.text(*SourceMgr); 224 }; 225 // Find a match. 226 auto Found = 227 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 228 if (Found.begin() == Tokens.end()) { 229 ADD_FAILURE() << "could not find the subrange for " << Query; 230 std::abort(); 231 } 232 // Check that the match is unique. 233 if (findSubrange(llvm::makeArrayRef(QueryTokens), 234 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 235 .begin() != Tokens.end()) { 236 ADD_FAILURE() << "match is not unique for " << Query; 237 std::abort(); 238 } 239 return Found; 240 }; 241 242 // Specialized versions of findTokenRange for expanded and spelled tokens. 243 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 244 return findTokenRange(Query, Buffer.expandedTokens()); 245 } 246 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 247 FileID File = FileID()) { 248 if (!File.isValid()) 249 File = SourceMgr->getMainFileID(); 250 return findTokenRange(Query, Buffer.spelledTokens(File)); 251 } 252 253 // Data fields. 254 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 255 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 256 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 257 new llvm::vfs::InMemoryFileSystem; 258 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 259 new FileManager(FileSystemOptions(), FS); 260 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 261 new SourceManager(*Diags, *FileMgr); 262 /// Contains last result of calling recordTokens(). 263 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 264 }; 265 266 TEST_F(TokenCollectorTest, RawMode) { 267 EXPECT_THAT(tokenize("int main() {}"), 268 ElementsAre(Kind(tok::kw_int), 269 AllOf(HasText("main"), Kind(tok::identifier)), 270 Kind(tok::l_paren), Kind(tok::r_paren), 271 Kind(tok::l_brace), Kind(tok::r_brace))); 272 // Comments are ignored for now. 273 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 274 ElementsAre(Kind(tok::kw_int), 275 AllOf(HasText("a"), Kind(tok::identifier)), 276 Kind(tok::semi))); 277 EXPECT_THAT(tokenize("int [[main() {]]}"), 278 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 279 Kind(tok::l_paren), Kind(tok::r_paren), 280 Kind(tok::l_brace))); 281 EXPECT_THAT(tokenize("int [[main() { ]]}"), 282 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 283 Kind(tok::l_paren), Kind(tok::r_paren), 284 Kind(tok::l_brace))); 285 // First token is partially parsed, last token is fully included even though 286 // only a part of it is contained in the range. 287 EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"), 288 ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)), 289 Kind(tok::l_paren), Kind(tok::r_paren), 290 Kind(tok::l_brace), Kind(tok::kw_return))); 291 } 292 293 TEST_F(TokenCollectorTest, Basic) { 294 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 295 {"int main() {}", 296 R"(expanded tokens: 297 int main ( ) { } 298 file './input.cpp' 299 spelled tokens: 300 int main ( ) { } 301 no mappings. 302 )"}, 303 // All kinds of whitespace are ignored. 304 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 305 R"(expanded tokens: 306 int main ( ) { } 307 file './input.cpp' 308 spelled tokens: 309 int main ( ) { } 310 no mappings. 311 )"}, 312 // Annotation tokens are ignored. 313 {R"cpp( 314 #pragma GCC visibility push (public) 315 #pragma GCC visibility pop 316 )cpp", 317 R"(expanded tokens: 318 <empty> 319 file './input.cpp' 320 spelled tokens: 321 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 322 mappings: 323 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 324 )"}, 325 // Empty files should not crash. 326 {R"cpp()cpp", R"(expanded tokens: 327 <empty> 328 file './input.cpp' 329 spelled tokens: 330 <empty> 331 no mappings. 332 )"}, 333 // Should not crash on errors inside '#define' directives. Error is that 334 // stringification (#B) does not refer to a macro parameter. 335 { 336 R"cpp( 337 a 338 #define MACRO() A #B 339 )cpp", 340 R"(expanded tokens: 341 a 342 file './input.cpp' 343 spelled tokens: 344 a # define MACRO ( ) A # B 345 mappings: 346 ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) 347 )"}}; 348 for (auto &Test : TestCases) 349 EXPECT_EQ(collectAndDump(Test.first), Test.second) 350 << collectAndDump(Test.first); 351 } 352 353 TEST_F(TokenCollectorTest, Locations) { 354 // Check locations of the tokens. 355 llvm::Annotations Code(R"cpp( 356 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 357 )cpp"); 358 recordTokens(Code.code()); 359 // Check expanded tokens. 360 EXPECT_THAT( 361 Buffer.expandedTokens(), 362 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 363 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 364 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 365 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 366 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 367 Kind(tok::eof))); 368 // Check spelled tokens. 369 EXPECT_THAT( 370 Buffer.spelledTokens(SourceMgr->getMainFileID()), 371 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 372 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 373 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 374 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 375 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 376 377 auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()); 378 for (auto &R : Code.ranges()) { 379 EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)), 380 Pointee(RangeIs(R))); 381 } 382 } 383 384 TEST_F(TokenCollectorTest, MacroDirectives) { 385 // Macro directives are not stored anywhere at the moment. 386 std::string Code = R"cpp( 387 #define FOO a 388 #include "unresolved_file.h" 389 #undef FOO 390 #ifdef X 391 #else 392 #endif 393 #ifndef Y 394 #endif 395 #if 1 396 #elif 2 397 #else 398 #endif 399 #pragma once 400 #pragma something lalala 401 402 int a; 403 )cpp"; 404 std::string Expected = 405 "expanded tokens:\n" 406 " int a ;\n" 407 "file './input.cpp'\n" 408 " spelled tokens:\n" 409 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 410 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 411 "# endif # pragma once # pragma something lalala int a ;\n" 412 " mappings:\n" 413 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 414 EXPECT_EQ(collectAndDump(Code), Expected); 415 } 416 417 TEST_F(TokenCollectorTest, MacroReplacements) { 418 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 419 // A simple object-like macro. 420 {R"cpp( 421 #define INT int const 422 INT a; 423 )cpp", 424 R"(expanded tokens: 425 int const a ; 426 file './input.cpp' 427 spelled tokens: 428 # define INT int const INT a ; 429 mappings: 430 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 431 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 432 )"}, 433 // A simple function-like macro. 434 {R"cpp( 435 #define INT(a) const int 436 INT(10+10) a; 437 )cpp", 438 R"(expanded tokens: 439 const int a ; 440 file './input.cpp' 441 spelled tokens: 442 # define INT ( a ) const int INT ( 10 + 10 ) a ; 443 mappings: 444 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 445 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 446 )"}, 447 // Recursive macro replacements. 448 {R"cpp( 449 #define ID(X) X 450 #define INT int const 451 ID(ID(INT)) a; 452 )cpp", 453 R"(expanded tokens: 454 int const a ; 455 file './input.cpp' 456 spelled tokens: 457 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 458 mappings: 459 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 460 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 461 )"}, 462 // A little more complicated recursive macro replacements. 463 {R"cpp( 464 #define ADD(X, Y) X+Y 465 #define MULT(X, Y) X*Y 466 467 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 468 )cpp", 469 "expanded tokens:\n" 470 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 471 "file './input.cpp'\n" 472 " spelled tokens:\n" 473 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 474 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 475 " mappings:\n" 476 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 477 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 478 // Empty macro replacement. 479 // FIXME: the #define directives should not be glued together. 480 {R"cpp( 481 #define EMPTY 482 #define EMPTY_FUNC(X) 483 EMPTY 484 EMPTY_FUNC(1+2+3) 485 )cpp", 486 R"(expanded tokens: 487 <empty> 488 file './input.cpp' 489 spelled tokens: 490 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 491 mappings: 492 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) 493 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) 494 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 495 )"}, 496 // File ends with a macro replacement. 497 {R"cpp( 498 #define FOO 10+10; 499 int a = FOO 500 )cpp", 501 R"(expanded tokens: 502 int a = 10 + 10 ; 503 file './input.cpp' 504 spelled tokens: 505 # define FOO 10 + 10 ; int a = FOO 506 mappings: 507 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 508 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 509 )"}, 510 {R"cpp( 511 #define NUM 42 512 #define ID(a) a 513 #define M 1 + ID 514 M(NUM) 515 )cpp", 516 R"(expanded tokens: 517 1 + 42 518 file './input.cpp' 519 spelled tokens: 520 # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM ) 521 mappings: 522 ['#'_0, 'M'_17) => ['1'_0, '1'_0) 523 ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3) 524 )"}, 525 }; 526 527 for (auto &Test : TestCases) { 528 std::string Dump = collectAndDump(Test.first); 529 EXPECT_EQ(Test.second, Dump) << Dump; 530 } 531 } 532 533 TEST_F(TokenCollectorTest, SpecialTokens) { 534 // Tokens coming from concatenations. 535 recordTokens(R"cpp( 536 #define CONCAT(a, b) a ## b 537 int a = CONCAT(1, 2); 538 )cpp"); 539 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 540 Contains(HasText("12"))); 541 // Multi-line tokens with slashes at the end. 542 recordTokens("i\\\nn\\\nt"); 543 EXPECT_THAT(Buffer.expandedTokens(), 544 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 545 Kind(tok::eof))); 546 // FIXME: test tokens with digraphs and UCN identifiers. 547 } 548 549 TEST_F(TokenCollectorTest, LateBoundTokens) { 550 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 551 // but we choose to record them as a single token (for now). 552 llvm::Annotations Code(R"cpp( 553 template <class T> 554 struct foo { int a; }; 555 int bar = foo<foo<int$br[[>>]]().a; 556 int baz = 10 $op[[>>]] 2; 557 )cpp"); 558 recordTokens(Code.code()); 559 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 560 AllOf(Contains(AllOf(Kind(tok::greatergreater), 561 RangeIs(Code.range("br")))), 562 Contains(AllOf(Kind(tok::greatergreater), 563 RangeIs(Code.range("op")))))); 564 } 565 566 TEST_F(TokenCollectorTest, DelayedParsing) { 567 llvm::StringLiteral Code = R"cpp( 568 struct Foo { 569 int method() { 570 // Parser will visit method bodies and initializers multiple times, but 571 // TokenBuffer should only record the first walk over the tokens; 572 return 100; 573 } 574 int a = 10; 575 576 struct Subclass { 577 void foo() { 578 Foo().method(); 579 } 580 }; 581 }; 582 )cpp"; 583 std::string ExpectedTokens = 584 "expanded tokens:\n" 585 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 586 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 587 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 588 } 589 590 TEST_F(TokenCollectorTest, MultiFile) { 591 addFile("./foo.h", R"cpp( 592 #define ADD(X, Y) X+Y 593 int a = 100; 594 #include "bar.h" 595 )cpp"); 596 addFile("./bar.h", R"cpp( 597 int b = ADD(1, 2); 598 #define MULT(X, Y) X*Y 599 )cpp"); 600 llvm::StringLiteral Code = R"cpp( 601 #include "foo.h" 602 int c = ADD(1, MULT(2,3)); 603 )cpp"; 604 605 std::string Expected = R"(expanded tokens: 606 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 607 file './input.cpp' 608 spelled tokens: 609 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 610 mappings: 611 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 612 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 613 file './foo.h' 614 spelled tokens: 615 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 616 mappings: 617 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 618 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 619 file './bar.h' 620 spelled tokens: 621 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 622 mappings: 623 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 624 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 625 )"; 626 627 EXPECT_EQ(Expected, collectAndDump(Code)) 628 << "input: " << Code << "\nresults: " << collectAndDump(Code); 629 } 630 631 class TokenBufferTest : public TokenCollectorTest {}; 632 633 TEST_F(TokenBufferTest, SpelledByExpanded) { 634 recordTokens(R"cpp( 635 a1 a2 a3 b1 b2 636 )cpp"); 637 638 // Expanded and spelled tokens are stored separately. 639 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 640 // Searching for subranges of expanded tokens should give the corresponding 641 // spelled ones. 642 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 643 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 644 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 645 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 646 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 647 ValueIs(SameRange(findSpelled("b1 b2")))); 648 649 // Test search on simple macro expansions. 650 recordTokens(R"cpp( 651 #define A a1 a2 a3 652 #define B b1 b2 653 654 A split B 655 )cpp"); 656 // Ranges going across expansion boundaries. 657 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 658 ValueIs(SameRange(findSpelled("A split B")))); 659 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 660 ValueIs(SameRange(findSpelled("A split").drop_back()))); 661 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 662 ValueIs(SameRange(findSpelled("split B").drop_front()))); 663 // Ranges not fully covering macro invocations should fail. 664 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 665 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 666 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 667 llvm::None); 668 669 // Recursive macro invocations. 670 recordTokens(R"cpp( 671 #define ID(x) x 672 #define B b1 b2 673 674 ID(ID(ID(a1) a2 a3)) split ID(B) 675 )cpp"); 676 677 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 678 ValueIs(SameRange(findSpelled("( B").drop_front()))); 679 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 680 ValueIs(SameRange(findSpelled( 681 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 682 // Mixed ranges with expanded and spelled tokens. 683 EXPECT_THAT( 684 Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")), 685 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split")))); 686 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")), 687 ValueIs(SameRange(findSpelled("split ID ( B )")))); 688 // Macro arguments 689 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")), 690 ValueIs(SameRange(findSpelled("a1")))); 691 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")), 692 ValueIs(SameRange(findSpelled("a2")))); 693 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")), 694 ValueIs(SameRange(findSpelled("a3")))); 695 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")), 696 ValueIs(SameRange(findSpelled("ID ( a1 ) a2")))); 697 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 698 ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3")))); 699 700 // Empty macro expansions. 701 recordTokens(R"cpp( 702 #define EMPTY 703 #define ID(X) X 704 705 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 706 EMPTY EMPTY ID(4 5 6) split2 707 ID(7 8 9) EMPTY EMPTY 708 )cpp"); 709 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 710 ValueIs(SameRange(findSpelled("1 2 3")))); 711 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 712 ValueIs(SameRange(findSpelled("4 5 6")))); 713 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 714 ValueIs(SameRange(findSpelled("7 8 9")))); 715 716 // Empty mappings coming from various directives. 717 recordTokens(R"cpp( 718 #define ID(X) X 719 ID(1) 720 #pragma lalala 721 not_mapped 722 )cpp"); 723 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 724 ValueIs(SameRange(findSpelled("not_mapped")))); 725 726 // Multiple macro arguments 727 recordTokens(R"cpp( 728 #define ID(X) X 729 #define ID2(X, Y) X Y 730 731 ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) 732 )cpp"); 733 // Should fail, spans multiple arguments. 734 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 735 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")), 736 ValueIs(SameRange(findSpelled("ID ( a2 ) a3")))); 737 EXPECT_THAT( 738 Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 739 ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )")))); 740 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")), 741 ValueIs(SameRange(findSpelled("a5 a6")))); 742 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")), 743 ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )")))); 744 // Should fail, spans multiple invocations. 745 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None); 746 } 747 748 TEST_F(TokenBufferTest, ExpandedTokensForRange) { 749 recordTokens(R"cpp( 750 #define SIGN(X) X##_washere 751 A SIGN(B) C SIGN(D) E SIGN(F) G 752 )cpp"); 753 754 SourceRange R(findExpanded("C").front().location(), 755 findExpanded("F_washere").front().location()); 756 // Expanded and spelled tokens are stored separately. 757 EXPECT_THAT(Buffer.expandedTokens(R), 758 SameRange(findExpanded("C D_washere E F_washere"))); 759 EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); 760 } 761 762 TEST_F(TokenBufferTest, ExpansionsOverlapping) { 763 // Object-like macro expansions. 764 recordTokens(R"cpp( 765 #define FOO 3+4 766 int a = FOO 1; 767 int b = FOO 2; 768 )cpp"); 769 770 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1"); 771 EXPECT_THAT( 772 Buffer.expansionStartingAt(Foo1.data()), 773 ValueIs(IsExpansion(SameRange(Foo1.drop_back()), 774 SameRange(findExpanded("3 + 4 1").drop_back())))); 775 EXPECT_THAT( 776 Buffer.expansionsOverlapping(Foo1), 777 ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), 778 SameRange(findExpanded("3 + 4 1").drop_back())))); 779 780 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2"); 781 EXPECT_THAT( 782 Buffer.expansionStartingAt(Foo2.data()), 783 ValueIs(IsExpansion(SameRange(Foo2.drop_back()), 784 SameRange(findExpanded("3 + 4 2").drop_back())))); 785 EXPECT_THAT(Buffer.expansionsOverlapping( 786 llvm::makeArrayRef(Foo1.begin(), Foo2.end())), 787 ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _), 788 IsExpansion(SameRange(Foo2.drop_back()), _))); 789 790 // Function-like macro expansions. 791 recordTokens(R"cpp( 792 #define ID(X) X 793 int a = ID(1+2+3); 794 int b = ID(ID(2+3+4)); 795 )cpp"); 796 797 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); 798 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), 799 ValueIs(IsExpansion(SameRange(ID1), 800 SameRange(findExpanded("1 + 2 + 3"))))); 801 // Only the first spelled token should be found. 802 for (const auto &T : ID1.drop_front()) 803 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 804 805 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); 806 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), 807 ValueIs(IsExpansion(SameRange(ID2), 808 SameRange(findExpanded("2 + 3 + 4"))))); 809 // Only the first spelled token should be found. 810 for (const auto &T : ID2.drop_front()) 811 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 812 813 EXPECT_THAT(Buffer.expansionsOverlapping(llvm::makeArrayRef( 814 findSpelled("1 + 2").data(), findSpelled("4").data())), 815 ElementsAre(IsExpansion(SameRange(ID1), _), 816 IsExpansion(SameRange(ID2), _))); 817 818 // PP directives. 819 recordTokens(R"cpp( 820 #define FOO 1 821 int a = FOO; 822 #pragma once 823 int b = 1; 824 )cpp"); 825 826 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); 827 EXPECT_THAT( 828 Buffer.expansionStartingAt(&DefineFoo.front()), 829 ValueIs(IsExpansion(SameRange(DefineFoo), 830 SameRange(findExpanded("int a").take_front(0))))); 831 // Only the first spelled token should be found. 832 for (const auto &T : DefineFoo.drop_front()) 833 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 834 835 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); 836 EXPECT_THAT( 837 Buffer.expansionStartingAt(&PragmaOnce.front()), 838 ValueIs(IsExpansion(SameRange(PragmaOnce), 839 SameRange(findExpanded("int b").take_front(0))))); 840 // Only the first spelled token should be found. 841 for (const auto &T : PragmaOnce.drop_front()) 842 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 843 844 EXPECT_THAT( 845 Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")), 846 ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _), 847 IsExpansion(SameRange(PragmaOnce), _))); 848 } 849 850 TEST_F(TokenBufferTest, TokensToFileRange) { 851 addFile("./foo.h", "token_from_header"); 852 llvm::Annotations Code(R"cpp( 853 #define FOO token_from_expansion 854 #include "./foo.h" 855 $all[[$i[[int]] a = FOO;]] 856 )cpp"); 857 recordTokens(Code.code()); 858 859 auto &SM = *SourceMgr; 860 861 // Two simple examples. 862 auto Int = findExpanded("int").front(); 863 auto Semi = findExpanded(";").front(); 864 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 865 Code.range("i").End)); 866 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 867 FileRange(SM.getMainFileID(), Code.range("all").Begin, 868 Code.range("all").End)); 869 // We don't test assertion failures because death tests are slow. 870 } 871 872 TEST_F(TokenBufferTest, MacroExpansions) { 873 llvm::Annotations Code(R"cpp( 874 #define FOO B 875 #define FOO2 BA 876 #define CALL(X) int X 877 #define G CALL(FOO2) 878 int B; 879 $macro[[FOO]]; 880 $macro[[CALL]](A); 881 $macro[[G]]; 882 )cpp"); 883 recordTokens(Code.code()); 884 auto &SM = *SourceMgr; 885 auto Expansions = Buffer.macroExpansions(SM.getMainFileID()); 886 std::vector<FileRange> ExpectedMacroRanges; 887 for (auto Range : Code.ranges("macro")) 888 ExpectedMacroRanges.push_back( 889 FileRange(SM.getMainFileID(), Range.Begin, Range.End)); 890 std::vector<FileRange> ActualMacroRanges; 891 for (auto Expansion : Expansions) 892 ActualMacroRanges.push_back(Expansion->range(SM)); 893 EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); 894 } 895 896 TEST_F(TokenBufferTest, Touching) { 897 llvm::Annotations Code("^i^nt^ ^a^b^=^1;^"); 898 recordTokens(Code.code()); 899 900 auto Touching = [&](int Index) { 901 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 902 Code.points()[Index]); 903 return spelledTokensTouching(Loc, Buffer); 904 }; 905 auto Identifier = [&](int Index) { 906 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 907 Code.points()[Index]); 908 const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer); 909 return Tok ? Tok->text(*SourceMgr) : ""; 910 }; 911 912 EXPECT_THAT(Touching(0), SameRange(findSpelled("int"))); 913 EXPECT_EQ(Identifier(0), ""); 914 EXPECT_THAT(Touching(1), SameRange(findSpelled("int"))); 915 EXPECT_EQ(Identifier(1), ""); 916 EXPECT_THAT(Touching(2), SameRange(findSpelled("int"))); 917 EXPECT_EQ(Identifier(2), ""); 918 919 EXPECT_THAT(Touching(3), SameRange(findSpelled("ab"))); 920 EXPECT_EQ(Identifier(3), "ab"); 921 EXPECT_THAT(Touching(4), SameRange(findSpelled("ab"))); 922 EXPECT_EQ(Identifier(4), "ab"); 923 924 EXPECT_THAT(Touching(5), SameRange(findSpelled("ab ="))); 925 EXPECT_EQ(Identifier(5), "ab"); 926 927 EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1"))); 928 EXPECT_EQ(Identifier(6), ""); 929 930 EXPECT_THAT(Touching(7), SameRange(findSpelled(";"))); 931 EXPECT_EQ(Identifier(7), ""); 932 933 ASSERT_EQ(Code.points().size(), 8u); 934 } 935 936 TEST_F(TokenBufferTest, ExpandedBySpelled) { 937 recordTokens(R"cpp( 938 a1 a2 a3 b1 b2 939 )cpp"); 940 // Expanded and spelled tokens are stored separately. 941 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 942 // Searching for subranges of expanded tokens should give the corresponding 943 // spelled ones. 944 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")), 945 ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2")))); 946 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")), 947 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 948 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")), 949 ElementsAre(SameRange(findExpanded("b1 b2")))); 950 951 // Test search on simple macro expansions. 952 recordTokens(R"cpp( 953 #define A a1 a2 a3 954 #define B b1 b2 955 956 A split B 957 )cpp"); 958 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")), 959 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); 960 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()), 961 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 962 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()), 963 ElementsAre(SameRange(findExpanded("b1 b2")))); 964 965 // Ranges not fully covering macro expansions should fail. 966 recordTokens(R"cpp( 967 #define ID(x) x 968 969 ID(a) 970 )cpp"); 971 // Spelled don't cover entire mapping (missing ID token) -> empty result 972 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty()); 973 // Spelled don't cover entire mapping (missing ) token) -> empty result 974 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty()); 975 976 // Recursive macro invocations. 977 recordTokens(R"cpp( 978 #define ID(x) x 979 #define B b1 b2 980 981 ID(ID(ID(a1) a2 a3)) split ID(B) 982 )cpp"); 983 984 EXPECT_THAT( 985 Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")), 986 ElementsAre(SameRange(findExpanded("a1 a2 a3")))); 987 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")), 988 ElementsAre(SameRange(findExpanded("b1 b2")))); 989 EXPECT_THAT(Buffer.expandedForSpelled( 990 findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")), 991 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); 992 // FIXME: these should succeed, but we do not support macro arguments yet. 993 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty()); 994 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")), 995 IsEmpty()); 996 997 // Empty macro expansions. 998 recordTokens(R"cpp( 999 #define EMPTY 1000 #define ID(X) X 1001 1002 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 1003 EMPTY EMPTY ID(4 5 6) split2 1004 ID(7 8 9) EMPTY EMPTY 1005 )cpp"); 1006 // Covered by empty expansions on one of both of the sides. 1007 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")), 1008 ElementsAre(SameRange(findExpanded("1 2 3")))); 1009 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")), 1010 ElementsAre(SameRange(findExpanded("4 5 6")))); 1011 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")), 1012 ElementsAre(SameRange(findExpanded("7 8 9")))); 1013 // Including the empty macro expansions on the side. 1014 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")), 1015 ElementsAre(SameRange(findExpanded("1 2 3")))); 1016 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")), 1017 ElementsAre(SameRange(findExpanded("1 2 3")))); 1018 EXPECT_THAT( 1019 Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")), 1020 ElementsAre(SameRange(findExpanded("1 2 3")))); 1021 1022 // Empty mappings coming from various directives. 1023 recordTokens(R"cpp( 1024 #define ID(X) X 1025 ID(1) 1026 #pragma lalala 1027 not_mapped 1028 )cpp"); 1029 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")), 1030 IsEmpty()); 1031 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")), 1032 IsEmpty()); 1033 1034 // Empty macro expansion. 1035 recordTokens(R"cpp( 1036 #define EMPTY 1037 EMPTY int a = 100; 1038 )cpp"); 1039 EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()), 1040 IsEmpty()); 1041 } 1042 1043 TEST_F(TokenCollectorTest, Pragmas) { 1044 // Tokens coming from concatenations. 1045 recordTokens(R"cpp( 1046 void foo() { 1047 #pragma unroll 4 1048 for(int i=0;i<4;++i); 1049 } 1050 )cpp"); 1051 } 1052 } // namespace 1053