1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include "gmock/gmock.h" 44 #include <cassert> 45 #include <cstdlib> 46 #include <gmock/gmock.h> 47 #include <gtest/gtest.h> 48 #include <memory> 49 #include <ostream> 50 #include <string> 51 52 using namespace clang; 53 using namespace clang::syntax; 54 55 using llvm::ValueIs; 56 using ::testing::AllOf; 57 using ::testing::Contains; 58 using ::testing::ElementsAre; 59 using ::testing::Field; 60 using ::testing::Matcher; 61 using ::testing::Not; 62 using ::testing::StartsWith; 63 64 namespace { 65 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 66 // argument. 67 MATCHER_P(SameRange, A, "") { 68 return A.begin() == arg.begin() && A.end() == arg.end(); 69 } 70 71 Matcher<TokenBuffer::Expansion> 72 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, 73 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { 74 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), 75 Field(&TokenBuffer::Expansion::Expanded, Expanded)); 76 } 77 // Matchers for syntax::Token. 78 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 79 MATCHER_P2(HasText, Text, SourceMgr, "") { 80 return arg.text(*SourceMgr) == Text; 81 } 82 /// Checks the start and end location of a token are equal to SourceRng. 83 MATCHER_P(RangeIs, SourceRng, "") { 84 return arg.location() == SourceRng.first && 85 arg.endLocation() == SourceRng.second; 86 } 87 88 class TokenCollectorTest : public ::testing::Test { 89 public: 90 /// Run the clang frontend, collect the preprocessed tokens from the frontend 91 /// invocation and store them in this->Buffer. 92 /// This also clears SourceManager before running the compiler. 93 void recordTokens(llvm::StringRef Code) { 94 class RecordTokens : public ASTFrontendAction { 95 public: 96 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 97 98 bool BeginSourceFileAction(CompilerInstance &CI) override { 99 assert(!Collector && "expected only a single call to BeginSourceFile"); 100 Collector.emplace(CI.getPreprocessor()); 101 return true; 102 } 103 void EndSourceFileAction() override { 104 assert(Collector && "BeginSourceFileAction was never called"); 105 Result = std::move(*Collector).consume(); 106 } 107 108 std::unique_ptr<ASTConsumer> 109 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 110 return std::make_unique<ASTConsumer>(); 111 } 112 113 private: 114 TokenBuffer &Result; 115 llvm::Optional<TokenCollector> Collector; 116 }; 117 118 constexpr const char *FileName = "./input.cpp"; 119 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 120 // Prepare to run a compiler. 121 if (!Diags->getClient()) 122 Diags->setClient(new IgnoringDiagConsumer); 123 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 124 FileName}; 125 auto CI = createInvocationFromCommandLine(Args, Diags, FS); 126 assert(CI); 127 CI->getFrontendOpts().DisableFree = false; 128 CI->getPreprocessorOpts().addRemappedFile( 129 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 130 CompilerInstance Compiler; 131 Compiler.setInvocation(std::move(CI)); 132 Compiler.setDiagnostics(Diags.get()); 133 Compiler.setFileManager(FileMgr.get()); 134 Compiler.setSourceManager(SourceMgr.get()); 135 136 this->Buffer = TokenBuffer(*SourceMgr); 137 RecordTokens Recorder(this->Buffer); 138 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 139 << "failed to run the frontend"; 140 } 141 142 /// Record the tokens and return a test dump of the resulting buffer. 143 std::string collectAndDump(llvm::StringRef Code) { 144 recordTokens(Code); 145 return Buffer.dumpForTests(); 146 } 147 148 // Adds a file to the test VFS. 149 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 150 if (!FS->addFile(Path, time_t(), 151 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 152 ADD_FAILURE() << "could not add a file to VFS: " << Path; 153 } 154 } 155 156 /// Add a new file, run syntax::tokenize() on the range if any, run it on the 157 /// whole file otherwise and return the results. 158 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 159 llvm::Annotations Annot(Text); 160 auto FID = SourceMgr->createFileID( 161 llvm::MemoryBuffer::getMemBufferCopy(Annot.code())); 162 // FIXME: pass proper LangOptions. 163 if (Annot.ranges().empty()) 164 return syntax::tokenize(FID, *SourceMgr, LangOptions()); 165 return syntax::tokenize( 166 syntax::FileRange(FID, Annot.range().Begin, Annot.range().End), 167 *SourceMgr, LangOptions()); 168 } 169 170 // Specialized versions of matchers that hide the SourceManager from clients. 171 Matcher<syntax::Token> HasText(std::string Text) const { 172 return ::HasText(Text, SourceMgr.get()); 173 } 174 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 175 std::pair<SourceLocation, SourceLocation> Ls; 176 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 177 .getLocWithOffset(R.Begin); 178 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 179 .getLocWithOffset(R.End); 180 return ::RangeIs(Ls); 181 } 182 183 /// Finds a subrange in O(n * m). 184 template <class T, class U, class Eq> 185 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 186 llvm::ArrayRef<T> Range, Eq F) { 187 for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { 188 auto It = Begin; 189 for (auto ItSub = Subrange.begin(); 190 ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { 191 if (!F(*ItSub, *It)) 192 goto continue_outer; 193 } 194 return llvm::makeArrayRef(Begin, It); 195 continue_outer:; 196 } 197 return llvm::makeArrayRef(Range.end(), Range.end()); 198 } 199 200 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 201 /// The match should be unique. \p Query is a whitespace-separated list of 202 /// tokens to search for. 203 llvm::ArrayRef<syntax::Token> 204 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 205 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 206 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 207 if (QueryTokens.empty()) { 208 ADD_FAILURE() << "will not look for an empty list of tokens"; 209 std::abort(); 210 } 211 // An equality test for search. 212 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 213 return Q == T.text(*SourceMgr); 214 }; 215 // Find a match. 216 auto Found = 217 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 218 if (Found.begin() == Tokens.end()) { 219 ADD_FAILURE() << "could not find the subrange for " << Query; 220 std::abort(); 221 } 222 // Check that the match is unique. 223 if (findSubrange(llvm::makeArrayRef(QueryTokens), 224 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 225 .begin() != Tokens.end()) { 226 ADD_FAILURE() << "match is not unique for " << Query; 227 std::abort(); 228 } 229 return Found; 230 }; 231 232 // Specialized versions of findTokenRange for expanded and spelled tokens. 233 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 234 return findTokenRange(Query, Buffer.expandedTokens()); 235 } 236 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 237 FileID File = FileID()) { 238 if (!File.isValid()) 239 File = SourceMgr->getMainFileID(); 240 return findTokenRange(Query, Buffer.spelledTokens(File)); 241 } 242 243 // Data fields. 244 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 245 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 246 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 247 new llvm::vfs::InMemoryFileSystem; 248 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 249 new FileManager(FileSystemOptions(), FS); 250 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 251 new SourceManager(*Diags, *FileMgr); 252 /// Contains last result of calling recordTokens(). 253 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 254 }; 255 256 TEST_F(TokenCollectorTest, RawMode) { 257 EXPECT_THAT(tokenize("int main() {}"), 258 ElementsAre(Kind(tok::kw_int), 259 AllOf(HasText("main"), Kind(tok::identifier)), 260 Kind(tok::l_paren), Kind(tok::r_paren), 261 Kind(tok::l_brace), Kind(tok::r_brace))); 262 // Comments are ignored for now. 263 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 264 ElementsAre(Kind(tok::kw_int), 265 AllOf(HasText("a"), Kind(tok::identifier)), 266 Kind(tok::semi))); 267 EXPECT_THAT(tokenize("int [[main() {]]}"), 268 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 269 Kind(tok::l_paren), Kind(tok::r_paren), 270 Kind(tok::l_brace))); 271 EXPECT_THAT(tokenize("int [[main() { ]]}"), 272 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)), 273 Kind(tok::l_paren), Kind(tok::r_paren), 274 Kind(tok::l_brace))); 275 // First token is partially parsed, last token is fully included even though 276 // only a part of it is contained in the range. 277 EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"), 278 ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)), 279 Kind(tok::l_paren), Kind(tok::r_paren), 280 Kind(tok::l_brace), Kind(tok::kw_return))); 281 } 282 283 TEST_F(TokenCollectorTest, Basic) { 284 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 285 {"int main() {}", 286 R"(expanded tokens: 287 int main ( ) { } 288 file './input.cpp' 289 spelled tokens: 290 int main ( ) { } 291 no mappings. 292 )"}, 293 // All kinds of whitespace are ignored. 294 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 295 R"(expanded tokens: 296 int main ( ) { } 297 file './input.cpp' 298 spelled tokens: 299 int main ( ) { } 300 no mappings. 301 )"}, 302 // Annotation tokens are ignored. 303 {R"cpp( 304 #pragma GCC visibility push (public) 305 #pragma GCC visibility pop 306 )cpp", 307 R"(expanded tokens: 308 <empty> 309 file './input.cpp' 310 spelled tokens: 311 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 312 mappings: 313 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 314 )"}, 315 // Empty files should not crash. 316 {R"cpp()cpp", R"(expanded tokens: 317 <empty> 318 file './input.cpp' 319 spelled tokens: 320 <empty> 321 no mappings. 322 )"}, 323 // Should not crash on errors inside '#define' directives. Error is that 324 // stringification (#B) does not refer to a macro parameter. 325 { 326 R"cpp( 327 a 328 #define MACRO() A #B 329 )cpp", 330 R"(expanded tokens: 331 a 332 file './input.cpp' 333 spelled tokens: 334 a # define MACRO ( ) A # B 335 mappings: 336 ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) 337 )"}}; 338 for (auto &Test : TestCases) 339 EXPECT_EQ(collectAndDump(Test.first), Test.second) 340 << collectAndDump(Test.first); 341 } 342 343 TEST_F(TokenCollectorTest, Locations) { 344 // Check locations of the tokens. 345 llvm::Annotations Code(R"cpp( 346 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 347 )cpp"); 348 recordTokens(Code.code()); 349 // Check expanded tokens. 350 EXPECT_THAT( 351 Buffer.expandedTokens(), 352 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 353 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 354 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 355 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 356 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 357 Kind(tok::eof))); 358 // Check spelled tokens. 359 EXPECT_THAT( 360 Buffer.spelledTokens(SourceMgr->getMainFileID()), 361 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 362 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 363 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 364 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 365 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 366 } 367 368 TEST_F(TokenCollectorTest, MacroDirectives) { 369 // Macro directives are not stored anywhere at the moment. 370 std::string Code = R"cpp( 371 #define FOO a 372 #include "unresolved_file.h" 373 #undef FOO 374 #ifdef X 375 #else 376 #endif 377 #ifndef Y 378 #endif 379 #if 1 380 #elif 2 381 #else 382 #endif 383 #pragma once 384 #pragma something lalala 385 386 int a; 387 )cpp"; 388 std::string Expected = 389 "expanded tokens:\n" 390 " int a ;\n" 391 "file './input.cpp'\n" 392 " spelled tokens:\n" 393 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 394 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 395 "# endif # pragma once # pragma something lalala int a ;\n" 396 " mappings:\n" 397 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 398 EXPECT_EQ(collectAndDump(Code), Expected); 399 } 400 401 TEST_F(TokenCollectorTest, MacroReplacements) { 402 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 403 // A simple object-like macro. 404 {R"cpp( 405 #define INT int const 406 INT a; 407 )cpp", 408 R"(expanded tokens: 409 int const a ; 410 file './input.cpp' 411 spelled tokens: 412 # define INT int const INT a ; 413 mappings: 414 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 415 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 416 )"}, 417 // A simple function-like macro. 418 {R"cpp( 419 #define INT(a) const int 420 INT(10+10) a; 421 )cpp", 422 R"(expanded tokens: 423 const int a ; 424 file './input.cpp' 425 spelled tokens: 426 # define INT ( a ) const int INT ( 10 + 10 ) a ; 427 mappings: 428 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 429 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 430 )"}, 431 // Recursive macro replacements. 432 {R"cpp( 433 #define ID(X) X 434 #define INT int const 435 ID(ID(INT)) a; 436 )cpp", 437 R"(expanded tokens: 438 int const a ; 439 file './input.cpp' 440 spelled tokens: 441 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 442 mappings: 443 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 444 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 445 )"}, 446 // A little more complicated recursive macro replacements. 447 {R"cpp( 448 #define ADD(X, Y) X+Y 449 #define MULT(X, Y) X*Y 450 451 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 452 )cpp", 453 "expanded tokens:\n" 454 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 455 "file './input.cpp'\n" 456 " spelled tokens:\n" 457 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 458 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 459 " mappings:\n" 460 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 461 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 462 // Empty macro replacement. 463 // FIXME: the #define directives should not be glued together. 464 {R"cpp( 465 #define EMPTY 466 #define EMPTY_FUNC(X) 467 EMPTY 468 EMPTY_FUNC(1+2+3) 469 )cpp", 470 R"(expanded tokens: 471 <empty> 472 file './input.cpp' 473 spelled tokens: 474 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 475 mappings: 476 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) 477 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) 478 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 479 )"}, 480 // File ends with a macro replacement. 481 {R"cpp( 482 #define FOO 10+10; 483 int a = FOO 484 )cpp", 485 R"(expanded tokens: 486 int a = 10 + 10 ; 487 file './input.cpp' 488 spelled tokens: 489 # define FOO 10 + 10 ; int a = FOO 490 mappings: 491 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 492 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 493 )"}}; 494 495 for (auto &Test : TestCases) 496 EXPECT_EQ(Test.second, collectAndDump(Test.first)) 497 << collectAndDump(Test.first); 498 } 499 500 TEST_F(TokenCollectorTest, SpecialTokens) { 501 // Tokens coming from concatenations. 502 recordTokens(R"cpp( 503 #define CONCAT(a, b) a ## b 504 int a = CONCAT(1, 2); 505 )cpp"); 506 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 507 Contains(HasText("12"))); 508 // Multi-line tokens with slashes at the end. 509 recordTokens("i\\\nn\\\nt"); 510 EXPECT_THAT(Buffer.expandedTokens(), 511 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 512 Kind(tok::eof))); 513 // FIXME: test tokens with digraphs and UCN identifiers. 514 } 515 516 TEST_F(TokenCollectorTest, LateBoundTokens) { 517 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 518 // but we choose to record them as a single token (for now). 519 llvm::Annotations Code(R"cpp( 520 template <class T> 521 struct foo { int a; }; 522 int bar = foo<foo<int$br[[>>]]().a; 523 int baz = 10 $op[[>>]] 2; 524 )cpp"); 525 recordTokens(Code.code()); 526 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 527 AllOf(Contains(AllOf(Kind(tok::greatergreater), 528 RangeIs(Code.range("br")))), 529 Contains(AllOf(Kind(tok::greatergreater), 530 RangeIs(Code.range("op")))))); 531 } 532 533 TEST_F(TokenCollectorTest, DelayedParsing) { 534 llvm::StringLiteral Code = R"cpp( 535 struct Foo { 536 int method() { 537 // Parser will visit method bodies and initializers multiple times, but 538 // TokenBuffer should only record the first walk over the tokens; 539 return 100; 540 } 541 int a = 10; 542 543 struct Subclass { 544 void foo() { 545 Foo().method(); 546 } 547 }; 548 }; 549 )cpp"; 550 std::string ExpectedTokens = 551 "expanded tokens:\n" 552 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 553 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 554 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 555 } 556 557 TEST_F(TokenCollectorTest, MultiFile) { 558 addFile("./foo.h", R"cpp( 559 #define ADD(X, Y) X+Y 560 int a = 100; 561 #include "bar.h" 562 )cpp"); 563 addFile("./bar.h", R"cpp( 564 int b = ADD(1, 2); 565 #define MULT(X, Y) X*Y 566 )cpp"); 567 llvm::StringLiteral Code = R"cpp( 568 #include "foo.h" 569 int c = ADD(1, MULT(2,3)); 570 )cpp"; 571 572 std::string Expected = R"(expanded tokens: 573 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 574 file './input.cpp' 575 spelled tokens: 576 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 577 mappings: 578 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 579 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 580 file './foo.h' 581 spelled tokens: 582 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 583 mappings: 584 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 585 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 586 file './bar.h' 587 spelled tokens: 588 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 589 mappings: 590 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 591 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 592 )"; 593 594 EXPECT_EQ(Expected, collectAndDump(Code)) 595 << "input: " << Code << "\nresults: " << collectAndDump(Code); 596 } 597 598 class TokenBufferTest : public TokenCollectorTest {}; 599 600 TEST_F(TokenBufferTest, SpelledByExpanded) { 601 recordTokens(R"cpp( 602 a1 a2 a3 b1 b2 603 )cpp"); 604 605 // Sanity check: expanded and spelled tokens are stored separately. 606 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 607 // Searching for subranges of expanded tokens should give the corresponding 608 // spelled ones. 609 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 610 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 611 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 612 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 613 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 614 ValueIs(SameRange(findSpelled("b1 b2")))); 615 616 // Test search on simple macro expansions. 617 recordTokens(R"cpp( 618 #define A a1 a2 a3 619 #define B b1 b2 620 621 A split B 622 )cpp"); 623 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 624 ValueIs(SameRange(findSpelled("A split B")))); 625 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 626 ValueIs(SameRange(findSpelled("A split").drop_back()))); 627 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 628 ValueIs(SameRange(findSpelled("split B").drop_front()))); 629 // Ranges not fully covering macro invocations should fail. 630 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 631 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 632 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 633 llvm::None); 634 635 // Recursive macro invocations. 636 recordTokens(R"cpp( 637 #define ID(x) x 638 #define B b1 b2 639 640 ID(ID(ID(a1) a2 a3)) split ID(B) 641 )cpp"); 642 643 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 644 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")))); 645 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 646 ValueIs(SameRange(findSpelled("ID ( B )")))); 647 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 648 ValueIs(SameRange(findSpelled( 649 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 650 // Ranges crossing macro call boundaries. 651 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")), 652 llvm::None); 653 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")), 654 llvm::None); 655 // FIXME: next two examples should map to macro arguments, but currently they 656 // fail. 657 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None); 658 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 659 660 // Empty macro expansions. 661 recordTokens(R"cpp( 662 #define EMPTY 663 #define ID(X) X 664 665 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 666 EMPTY EMPTY ID(4 5 6) split2 667 ID(7 8 9) EMPTY EMPTY 668 )cpp"); 669 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 670 ValueIs(SameRange(findSpelled("ID ( 1 2 3 )")))); 671 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 672 ValueIs(SameRange(findSpelled("ID ( 4 5 6 )")))); 673 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 674 ValueIs(SameRange(findSpelled("ID ( 7 8 9 )")))); 675 676 // Empty mappings coming from various directives. 677 recordTokens(R"cpp( 678 #define ID(X) X 679 ID(1) 680 #pragma lalala 681 not_mapped 682 )cpp"); 683 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 684 ValueIs(SameRange(findSpelled("not_mapped")))); 685 } 686 687 TEST_F(TokenBufferTest, ExpandedTokensForRange) { 688 recordTokens(R"cpp( 689 #define SIGN(X) X##_washere 690 A SIGN(B) C SIGN(D) E SIGN(F) G 691 )cpp"); 692 693 SourceRange R(findExpanded("C").front().location(), 694 findExpanded("F_washere").front().location()); 695 // Sanity check: expanded and spelled tokens are stored separately. 696 EXPECT_THAT(Buffer.expandedTokens(R), 697 SameRange(findExpanded("C D_washere E F_washere"))); 698 EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); 699 } 700 701 TEST_F(TokenBufferTest, ExpansionStartingAt) { 702 // Object-like macro expansions. 703 recordTokens(R"cpp( 704 #define FOO 3+4 705 int a = FOO 1; 706 int b = FOO 2; 707 )cpp"); 708 709 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back(); 710 EXPECT_THAT( 711 Buffer.expansionStartingAt(Foo1.data()), 712 ValueIs(IsExpansion(SameRange(Foo1), 713 SameRange(findExpanded("3 + 4 1").drop_back())))); 714 715 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back(); 716 EXPECT_THAT( 717 Buffer.expansionStartingAt(Foo2.data()), 718 ValueIs(IsExpansion(SameRange(Foo2), 719 SameRange(findExpanded("3 + 4 2").drop_back())))); 720 721 // Function-like macro expansions. 722 recordTokens(R"cpp( 723 #define ID(X) X 724 int a = ID(1+2+3); 725 int b = ID(ID(2+3+4)); 726 )cpp"); 727 728 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); 729 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), 730 ValueIs(IsExpansion(SameRange(ID1), 731 SameRange(findExpanded("1 + 2 + 3"))))); 732 // Only the first spelled token should be found. 733 for (const auto &T : ID1.drop_front()) 734 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 735 736 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); 737 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), 738 ValueIs(IsExpansion(SameRange(ID2), 739 SameRange(findExpanded("2 + 3 + 4"))))); 740 // Only the first spelled token should be found. 741 for (const auto &T : ID2.drop_front()) 742 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 743 744 // PP directives. 745 recordTokens(R"cpp( 746 #define FOO 1 747 int a = FOO; 748 #pragma once 749 int b = 1; 750 )cpp"); 751 752 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); 753 EXPECT_THAT( 754 Buffer.expansionStartingAt(&DefineFoo.front()), 755 ValueIs(IsExpansion(SameRange(DefineFoo), 756 SameRange(findExpanded("int a").take_front(0))))); 757 // Only the first spelled token should be found. 758 for (const auto &T : DefineFoo.drop_front()) 759 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 760 761 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); 762 EXPECT_THAT( 763 Buffer.expansionStartingAt(&PragmaOnce.front()), 764 ValueIs(IsExpansion(SameRange(PragmaOnce), 765 SameRange(findExpanded("int b").take_front(0))))); 766 // Only the first spelled token should be found. 767 for (const auto &T : PragmaOnce.drop_front()) 768 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 769 } 770 771 TEST_F(TokenBufferTest, TokensToFileRange) { 772 addFile("./foo.h", "token_from_header"); 773 llvm::Annotations Code(R"cpp( 774 #define FOO token_from_expansion 775 #include "./foo.h" 776 $all[[$i[[int]] a = FOO;]] 777 )cpp"); 778 recordTokens(Code.code()); 779 780 auto &SM = *SourceMgr; 781 782 // Two simple examples. 783 auto Int = findExpanded("int").front(); 784 auto Semi = findExpanded(";").front(); 785 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 786 Code.range("i").End)); 787 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 788 FileRange(SM.getMainFileID(), Code.range("all").Begin, 789 Code.range("all").End)); 790 // We don't test assertion failures because death tests are slow. 791 } 792 793 TEST_F(TokenBufferTest, MacroExpansions) { 794 llvm::Annotations Code(R"cpp( 795 #define FOO B 796 #define FOO2 BA 797 #define CALL(X) int X 798 #define G CALL(FOO2) 799 int B; 800 $macro[[FOO]]; 801 $macro[[CALL]](A); 802 $macro[[G]]; 803 )cpp"); 804 recordTokens(Code.code()); 805 auto &SM = *SourceMgr; 806 auto Expansions = Buffer.macroExpansions(SM.getMainFileID()); 807 std::vector<FileRange> ExpectedMacroRanges; 808 for (auto Range : Code.ranges("macro")) 809 ExpectedMacroRanges.push_back( 810 FileRange(SM.getMainFileID(), Range.Begin, Range.End)); 811 std::vector<FileRange> ActualMacroRanges; 812 for (auto Expansion : Expansions) 813 ActualMacroRanges.push_back(Expansion->range(SM)); 814 EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); 815 } 816 817 TEST_F(TokenBufferTest, Touching) { 818 llvm::Annotations Code("^i^nt^ ^a^b^=^1;^"); 819 recordTokens(Code.code()); 820 821 auto Touching = [&](int Index) { 822 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 823 Code.points()[Index]); 824 return spelledTokensTouching(Loc, Buffer); 825 }; 826 auto Identifier = [&](int Index) { 827 SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), 828 Code.points()[Index]); 829 const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer); 830 return Tok ? Tok->text(*SourceMgr) : ""; 831 }; 832 833 EXPECT_THAT(Touching(0), SameRange(findSpelled("int"))); 834 EXPECT_EQ(Identifier(0), ""); 835 EXPECT_THAT(Touching(1), SameRange(findSpelled("int"))); 836 EXPECT_EQ(Identifier(1), ""); 837 EXPECT_THAT(Touching(2), SameRange(findSpelled("int"))); 838 EXPECT_EQ(Identifier(2), ""); 839 840 EXPECT_THAT(Touching(3), SameRange(findSpelled("ab"))); 841 EXPECT_EQ(Identifier(3), "ab"); 842 EXPECT_THAT(Touching(4), SameRange(findSpelled("ab"))); 843 EXPECT_EQ(Identifier(4), "ab"); 844 845 EXPECT_THAT(Touching(5), SameRange(findSpelled("ab ="))); 846 EXPECT_EQ(Identifier(5), "ab"); 847 848 EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1"))); 849 EXPECT_EQ(Identifier(6), ""); 850 851 EXPECT_THAT(Touching(7), SameRange(findSpelled(";"))); 852 EXPECT_EQ(Identifier(7), ""); 853 854 ASSERT_EQ(Code.points().size(), 8u); 855 } 856 857 } // namespace 858