1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Lex/Lexer.h" 10 #include "clang/Basic/Diagnostic.h" 11 #include "clang/Basic/DiagnosticOptions.h" 12 #include "clang/Basic/FileManager.h" 13 #include "clang/Basic/LangOptions.h" 14 #include "clang/Basic/SourceLocation.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Basic/TargetInfo.h" 17 #include "clang/Basic/TargetOptions.h" 18 #include "clang/Basic/TokenKinds.h" 19 #include "clang/Lex/HeaderSearch.h" 20 #include "clang/Lex/HeaderSearchOptions.h" 21 #include "clang/Lex/LiteralSupport.h" 22 #include "clang/Lex/MacroArgs.h" 23 #include "clang/Lex/MacroInfo.h" 24 #include "clang/Lex/ModuleLoader.h" 25 #include "clang/Lex/Preprocessor.h" 26 #include "clang/Lex/PreprocessorOptions.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/Testing/Annotations/Annotations.h" 30 #include "gmock/gmock.h" 31 #include "gtest/gtest.h" 32 #include <memory> 33 #include <string> 34 #include <vector> 35 36 namespace { 37 using namespace clang; 38 using testing::ElementsAre; 39 40 // The test fixture. 41 class LexerTest : public ::testing::Test { 42 protected: 43 LexerTest() 44 : FileMgr(FileMgrOpts), 45 DiagID(new DiagnosticIDs()), 46 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), 47 SourceMgr(Diags, FileMgr), 48 TargetOpts(new TargetOptions) 49 { 50 TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; 51 Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); 52 } 53 54 std::unique_ptr<Preprocessor> CreatePP(StringRef Source, 55 TrivialModuleLoader &ModLoader) { 56 std::unique_ptr<llvm::MemoryBuffer> Buf = 57 llvm::MemoryBuffer::getMemBuffer(Source); 58 SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); 59 60 HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr, 61 Diags, LangOpts, Target.get()); 62 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( 63 std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr, 64 HeaderInfo, ModLoader, 65 /*IILookup =*/nullptr, 66 /*OwnsHeaderSearch =*/false); 67 PP->Initialize(*Target); 68 PP->EnterMainSourceFile(); 69 return PP; 70 } 71 72 std::vector<Token> Lex(StringRef Source) { 73 TrivialModuleLoader ModLoader; 74 PP = CreatePP(Source, ModLoader); 75 76 std::vector<Token> toks; 77 PP->LexTokensUntilEOF(&toks); 78 79 return toks; 80 } 81 82 std::vector<Token> CheckLex(StringRef Source, 83 ArrayRef<tok::TokenKind> ExpectedTokens) { 84 auto toks = Lex(Source); 85 EXPECT_EQ(ExpectedTokens.size(), toks.size()); 86 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { 87 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); 88 } 89 90 return toks; 91 } 92 93 std::string getSourceText(Token Begin, Token End) { 94 bool Invalid; 95 StringRef Str = 96 Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange( 97 Begin.getLocation(), End.getLocation())), 98 SourceMgr, LangOpts, &Invalid); 99 if (Invalid) 100 return "<INVALID>"; 101 return std::string(Str); 102 } 103 104 FileSystemOptions FileMgrOpts; 105 FileManager FileMgr; 106 IntrusiveRefCntPtr<DiagnosticIDs> DiagID; 107 DiagnosticsEngine Diags; 108 SourceManager SourceMgr; 109 LangOptions LangOpts; 110 std::shared_ptr<TargetOptions> TargetOpts; 111 IntrusiveRefCntPtr<TargetInfo> Target; 112 std::unique_ptr<Preprocessor> PP; 113 }; 114 115 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { 116 std::vector<tok::TokenKind> ExpectedTokens; 117 ExpectedTokens.push_back(tok::identifier); 118 ExpectedTokens.push_back(tok::l_paren); 119 ExpectedTokens.push_back(tok::identifier); 120 ExpectedTokens.push_back(tok::r_paren); 121 122 std::vector<Token> toks = CheckLex("#define M(x) x\n" 123 "M(f(M(i)))", 124 ExpectedTokens); 125 126 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2])); 127 } 128 129 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { 130 std::vector<tok::TokenKind> ExpectedTokens; 131 ExpectedTokens.push_back(tok::identifier); 132 ExpectedTokens.push_back(tok::identifier); 133 134 std::vector<Token> toks = CheckLex("#define M(x) x\n" 135 "M(M(i) c)", 136 ExpectedTokens); 137 138 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0])); 139 } 140 141 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { 142 std::vector<tok::TokenKind> ExpectedTokens; 143 ExpectedTokens.push_back(tok::identifier); 144 ExpectedTokens.push_back(tok::identifier); 145 ExpectedTokens.push_back(tok::identifier); 146 147 std::vector<Token> toks = CheckLex("#define M(x) x\n" 148 "M(c c M(i))", 149 ExpectedTokens); 150 151 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2])); 152 } 153 154 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { 155 std::vector<tok::TokenKind> ExpectedTokens; 156 ExpectedTokens.push_back(tok::identifier); 157 ExpectedTokens.push_back(tok::identifier); 158 ExpectedTokens.push_back(tok::identifier); 159 160 std::vector<Token> toks = CheckLex("#define M(x) x\n" 161 "M(M(i) c c)", 162 ExpectedTokens); 163 164 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1])); 165 } 166 167 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { 168 std::vector<tok::TokenKind> ExpectedTokens; 169 ExpectedTokens.push_back(tok::identifier); 170 ExpectedTokens.push_back(tok::identifier); 171 ExpectedTokens.push_back(tok::identifier); 172 ExpectedTokens.push_back(tok::identifier); 173 174 std::vector<Token> toks = CheckLex("#define M(x) x\n" 175 "M(c M(i)) M(M(i) c)", 176 ExpectedTokens); 177 178 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2])); 179 } 180 181 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { 182 std::vector<tok::TokenKind> ExpectedTokens; 183 ExpectedTokens.push_back(tok::identifier); 184 ExpectedTokens.push_back(tok::l_paren); 185 ExpectedTokens.push_back(tok::identifier); 186 ExpectedTokens.push_back(tok::r_paren); 187 188 std::vector<Token> toks = CheckLex("#define M(x) x\n" 189 "#define C(x) M(x##c)\n" 190 "M(f(C(i)))", 191 ExpectedTokens); 192 193 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2])); 194 } 195 196 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { 197 std::vector<tok::TokenKind> ExpectedTokens; 198 ExpectedTokens.push_back(tok::identifier); 199 ExpectedTokens.push_back(tok::l_paren); 200 ExpectedTokens.push_back(tok::identifier); 201 ExpectedTokens.push_back(tok::r_paren); 202 203 std::vector<Token> toks = CheckLex("#define M(x) x\n" 204 "f(M(M(i)))", 205 ExpectedTokens); 206 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2])); 207 } 208 209 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { 210 std::vector<tok::TokenKind> ExpectedTokens; 211 ExpectedTokens.push_back(tok::identifier); 212 ExpectedTokens.push_back(tok::l_paren); 213 ExpectedTokens.push_back(tok::identifier); 214 ExpectedTokens.push_back(tok::r_paren); 215 216 std::vector<Token> toks = CheckLex("#define M(x) x\n" 217 "M(f(i))", 218 ExpectedTokens); 219 EXPECT_EQ("i", getSourceText(toks[2], toks[2])); 220 } 221 222 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { 223 std::vector<tok::TokenKind> ExpectedTokens; 224 ExpectedTokens.push_back(tok::identifier); 225 ExpectedTokens.push_back(tok::l_paren); 226 ExpectedTokens.push_back(tok::identifier); 227 ExpectedTokens.push_back(tok::r_paren); 228 229 std::vector<Token> toks = CheckLex("#define M(x) x\n" 230 "#define C(x) x\n" 231 "f(C(M(i)))", 232 ExpectedTokens); 233 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2])); 234 } 235 236 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { 237 std::vector<tok::TokenKind> ExpectedTokens; 238 ExpectedTokens.push_back(tok::identifier); 239 ExpectedTokens.push_back(tok::l_paren); 240 ExpectedTokens.push_back(tok::identifier); 241 ExpectedTokens.push_back(tok::identifier); 242 ExpectedTokens.push_back(tok::r_paren); 243 244 std::vector<Token> toks = CheckLex("#define M(x) x\n" 245 "#define C(x) c x\n" 246 "f(C(M(i)))", 247 ExpectedTokens); 248 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 249 } 250 251 TEST_F(LexerTest, GetSourceTextExpandsRecursively) { 252 std::vector<tok::TokenKind> ExpectedTokens; 253 ExpectedTokens.push_back(tok::identifier); 254 ExpectedTokens.push_back(tok::identifier); 255 ExpectedTokens.push_back(tok::l_paren); 256 ExpectedTokens.push_back(tok::identifier); 257 ExpectedTokens.push_back(tok::r_paren); 258 259 std::vector<Token> toks = CheckLex("#define M(x) x\n" 260 "#define C(x) c M(x)\n" 261 "C(f(M(i)))", 262 ExpectedTokens); 263 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 264 } 265 266 TEST_F(LexerTest, LexAPI) { 267 std::vector<tok::TokenKind> ExpectedTokens; 268 // Line 1 (after the #defines) 269 ExpectedTokens.push_back(tok::l_square); 270 ExpectedTokens.push_back(tok::identifier); 271 ExpectedTokens.push_back(tok::r_square); 272 ExpectedTokens.push_back(tok::l_square); 273 ExpectedTokens.push_back(tok::identifier); 274 ExpectedTokens.push_back(tok::r_square); 275 // Line 2 276 ExpectedTokens.push_back(tok::identifier); 277 ExpectedTokens.push_back(tok::identifier); 278 ExpectedTokens.push_back(tok::identifier); 279 ExpectedTokens.push_back(tok::identifier); 280 281 std::vector<Token> toks = CheckLex("#define M(x) [x]\n" 282 "#define N(x) x\n" 283 "#define INN(x) x\n" 284 "#define NOF1 INN(val)\n" 285 "#define NOF2 val\n" 286 "M(foo) N([bar])\n" 287 "N(INN(val)) N(NOF1) N(NOF2) N(val)", 288 ExpectedTokens); 289 290 SourceLocation lsqrLoc = toks[0].getLocation(); 291 SourceLocation idLoc = toks[1].getLocation(); 292 SourceLocation rsqrLoc = toks[2].getLocation(); 293 CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc); 294 295 SourceLocation Loc; 296 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); 297 EXPECT_EQ(Loc, macroRange.getBegin()); 298 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 299 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 300 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); 301 EXPECT_EQ(Loc, macroRange.getEnd()); 302 EXPECT_TRUE(macroRange.isTokenRange()); 303 304 CharSourceRange range = Lexer::makeFileCharRange( 305 CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts); 306 EXPECT_TRUE(range.isInvalid()); 307 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc), 308 SourceMgr, LangOpts); 309 EXPECT_TRUE(range.isInvalid()); 310 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 311 SourceMgr, LangOpts); 312 EXPECT_TRUE(!range.isTokenRange()); 313 EXPECT_EQ(range.getAsRange(), 314 SourceRange(macroRange.getBegin(), 315 macroRange.getEnd().getLocWithOffset(1))); 316 317 StringRef text = Lexer::getSourceText( 318 CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 319 SourceMgr, LangOpts); 320 EXPECT_EQ(text, "M(foo)"); 321 322 SourceLocation macroLsqrLoc = toks[3].getLocation(); 323 SourceLocation macroIdLoc = toks[4].getLocation(); 324 SourceLocation macroRsqrLoc = toks[5].getLocation(); 325 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc); 326 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc); 327 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc); 328 329 range = Lexer::makeFileCharRange( 330 CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc), 331 SourceMgr, LangOpts); 332 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), 333 range.getAsRange()); 334 335 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc), 336 SourceMgr, LangOpts); 337 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), 338 range.getAsRange()); 339 340 macroRange = SourceMgr.getExpansionRange(macroLsqrLoc); 341 range = Lexer::makeFileCharRange( 342 CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc), 343 SourceMgr, LangOpts); 344 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), 345 range.getAsRange()); 346 347 text = Lexer::getSourceText( 348 CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)), 349 SourceMgr, LangOpts); 350 EXPECT_EQ(text, "[bar"); 351 352 353 SourceLocation idLoc1 = toks[6].getLocation(); 354 SourceLocation idLoc2 = toks[7].getLocation(); 355 SourceLocation idLoc3 = toks[8].getLocation(); 356 SourceLocation idLoc4 = toks[9].getLocation(); 357 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); 358 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); 359 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); 360 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); 361 } 362 363 TEST_F(LexerTest, HandlesSplitTokens) { 364 std::vector<tok::TokenKind> ExpectedTokens; 365 // Line 1 (after the #defines) 366 ExpectedTokens.push_back(tok::identifier); 367 ExpectedTokens.push_back(tok::less); 368 ExpectedTokens.push_back(tok::identifier); 369 ExpectedTokens.push_back(tok::less); 370 ExpectedTokens.push_back(tok::greatergreater); 371 // Line 2 372 ExpectedTokens.push_back(tok::identifier); 373 ExpectedTokens.push_back(tok::less); 374 ExpectedTokens.push_back(tok::identifier); 375 ExpectedTokens.push_back(tok::less); 376 ExpectedTokens.push_back(tok::greatergreater); 377 378 std::vector<Token> toks = CheckLex("#define TY ty\n" 379 "#define RANGLE ty<ty<>>\n" 380 "TY<ty<>>\n" 381 "RANGLE", 382 ExpectedTokens); 383 384 SourceLocation outerTyLoc = toks[0].getLocation(); 385 SourceLocation innerTyLoc = toks[2].getLocation(); 386 SourceLocation gtgtLoc = toks[4].getLocation(); 387 // Split the token to simulate the action of the parser and force creation of 388 // an `ExpansionTokenRange`. 389 SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1); 390 391 // Verify that it only captures the first greater-then and not the second one. 392 CharSourceRange range = Lexer::makeFileCharRange( 393 CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr, 394 LangOpts); 395 EXPECT_TRUE(range.isCharRange()); 396 EXPECT_EQ(range.getAsRange(), 397 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); 398 399 // Verify case where range begins in a macro expansion. 400 range = Lexer::makeFileCharRange( 401 CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr, 402 LangOpts); 403 EXPECT_TRUE(range.isCharRange()); 404 EXPECT_EQ(range.getAsRange(), 405 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), 406 gtgtLoc.getLocWithOffset(1))); 407 408 SourceLocation macroInnerTyLoc = toks[7].getLocation(); 409 SourceLocation macroGtgtLoc = toks[9].getLocation(); 410 // Split the token to simulate the action of the parser and force creation of 411 // an `ExpansionTokenRange`. 412 SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1); 413 414 // Verify that it fails (because it only captures the first greater-then and 415 // not the second one, so it doesn't span the entire macro expansion). 416 range = Lexer::makeFileCharRange( 417 CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc), 418 SourceMgr, LangOpts); 419 EXPECT_TRUE(range.isInvalid()); 420 } 421 422 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { 423 std::vector<Token> toks = 424 Lex("#define helper1 0\n" 425 "void helper2(const char *, ...);\n" 426 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" 427 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" 428 "void f1() { M2(\"a\", \"b\"); }"); 429 430 // Check the file corresponding to the "helper1" macro arg in M2. 431 // 432 // The lexer used to report its size as 31, meaning that the end of the 433 // expansion would be on the *next line* (just past `M2("a", "b")`). Make 434 // sure that we get the correct end location (the comma after "helper1"). 435 SourceLocation helper1ArgLoc = toks[20].getLocation(); 436 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); 437 } 438 439 TEST_F(LexerTest, DontOverallocateStringifyArgs) { 440 TrivialModuleLoader ModLoader; 441 auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader); 442 443 llvm::BumpPtrAllocator Allocator; 444 std::array<IdentifierInfo *, 3> ParamList; 445 MacroInfo *MI = PP->AllocateMacroInfo({}); 446 MI->setIsFunctionLike(); 447 MI->setParameterList(ParamList, Allocator); 448 EXPECT_EQ(3u, MI->getNumParams()); 449 EXPECT_TRUE(MI->isFunctionLike()); 450 451 Token Eof; 452 Eof.setKind(tok::eof); 453 std::vector<Token> ArgTokens; 454 while (1) { 455 Token tok; 456 PP->Lex(tok); 457 if (tok.is(tok::eof)) { 458 ArgTokens.push_back(Eof); 459 break; 460 } 461 if (tok.is(tok::comma)) 462 ArgTokens.push_back(Eof); 463 else 464 ArgTokens.push_back(tok); 465 } 466 467 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); }; 468 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA( 469 MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter); 470 auto StringifyArg = [&](int ArgNo) { 471 return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP, 472 /*Charify=*/false, {}, {}); 473 }; 474 Token Result = StringifyArg(0); 475 EXPECT_EQ(tok::string_literal, Result.getKind()); 476 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData()); 477 Result = StringifyArg(1); 478 EXPECT_EQ(tok::string_literal, Result.getKind()); 479 EXPECT_STREQ("\"5\"", Result.getLiteralData()); 480 Result = StringifyArg(2); 481 EXPECT_EQ(tok::string_literal, Result.getKind()); 482 EXPECT_STREQ("\"'C'\"", Result.getLiteralData()); 483 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST 484 EXPECT_DEATH(StringifyArg(3), "Invalid arg #"); 485 #endif 486 } 487 488 TEST_F(LexerTest, IsNewLineEscapedValid) { 489 auto hasNewLineEscaped = [](const char *S) { 490 return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); 491 }; 492 493 EXPECT_TRUE(hasNewLineEscaped("\\\r")); 494 EXPECT_TRUE(hasNewLineEscaped("\\\n")); 495 EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); 496 EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); 497 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); 498 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); 499 500 EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); 501 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); 502 EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); 503 EXPECT_FALSE(hasNewLineEscaped("\r")); 504 EXPECT_FALSE(hasNewLineEscaped("\n")); 505 EXPECT_FALSE(hasNewLineEscaped("\r\n")); 506 EXPECT_FALSE(hasNewLineEscaped("\n\r")); 507 EXPECT_FALSE(hasNewLineEscaped("\r\r")); 508 EXPECT_FALSE(hasNewLineEscaped("\n\n")); 509 } 510 511 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { 512 // Each line should have the same length for 513 // further offset calculation to be more straightforward. 514 const unsigned IdentifierLength = 8; 515 std::string TextToLex = "rabarbar\n" 516 "foo\\\nbar\n" 517 "foo\\\rbar\n" 518 "fo\\\r\nbar\n" 519 "foo\\\n\rba\n"; 520 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; 521 std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); 522 523 for (const Token &Tok : LexedTokens) { 524 std::pair<FileID, unsigned> OriginalLocation = 525 SourceMgr.getDecomposedLoc(Tok.getLocation()); 526 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { 527 SourceLocation LookupLocation = 528 Tok.getLocation().getLocWithOffset(Offset); 529 530 std::pair<FileID, unsigned> FoundLocation = 531 SourceMgr.getDecomposedExpansionLoc( 532 Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); 533 534 // Check that location returned by the GetBeginningOfToken 535 // is the same as original token location reported by Lexer. 536 EXPECT_EQ(FoundLocation.second, OriginalLocation.second); 537 } 538 } 539 } 540 541 TEST_F(LexerTest, AvoidPastEndOfStringDereference) { 542 EXPECT_TRUE(Lex(" // \\\n").empty()); 543 EXPECT_TRUE(Lex("#include <\\\\").empty()); 544 EXPECT_TRUE(Lex("#include <\\\\\n").empty()); 545 } 546 547 TEST_F(LexerTest, StringizingRasString) { 548 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". 549 std::string String1 = R"(foo 550 {"bar":[]} 551 baz)"; 552 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)". 553 SmallString<128> String2; 554 String2 += String1.c_str(); 555 556 // Corner cases. 557 std::string String3 = R"(\ 558 \n 559 \\n 560 \\)"; 561 SmallString<128> String4; 562 String4 += String3.c_str(); 563 std::string String5 = R"(a\ 564 565 566 \\b)"; 567 SmallString<128> String6; 568 String6 += String5.c_str(); 569 570 String1 = Lexer::Stringify(StringRef(String1)); 571 Lexer::Stringify(String2); 572 String3 = Lexer::Stringify(StringRef(String3)); 573 Lexer::Stringify(String4); 574 String5 = Lexer::Stringify(StringRef(String5)); 575 Lexer::Stringify(String6); 576 577 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)"); 578 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)"); 579 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)"); 580 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)"); 581 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)"); 582 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)"); 583 } 584 585 TEST_F(LexerTest, CharRangeOffByOne) { 586 std::vector<Token> toks = Lex(R"(#define MOO 1 587 void foo() { MOO; })"); 588 const Token &moo = toks[5]; 589 590 EXPECT_EQ(getSourceText(moo, moo), "MOO"); 591 592 SourceRange R{moo.getLocation(), moo.getLocation()}; 593 594 EXPECT_TRUE( 595 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); 596 EXPECT_TRUE( 597 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); 598 599 CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts); 600 601 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO". 602 } 603 604 TEST_F(LexerTest, FindNextToken) { 605 Lex("int abcd = 0;\n" 606 "// A comment.\n" 607 "int xyz = abcd;\n"); 608 std::vector<std::string> GeneratedByNextToken; 609 SourceLocation Loc = 610 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); 611 while (true) { 612 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts); 613 ASSERT_TRUE(T); 614 if (T->is(tok::eof)) 615 break; 616 GeneratedByNextToken.push_back(getSourceText(*T, *T)); 617 Loc = T->getLocation(); 618 } 619 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int", 620 "xyz", "=", "abcd", ";")); 621 } 622 623 TEST_F(LexerTest, FindNextTokenIncludingComments) { 624 Lex("int abcd = 0;\n" 625 "// A comment.\n" 626 "int xyz = abcd;\n"); 627 std::vector<std::string> GeneratedByNextToken; 628 SourceLocation Loc = 629 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); 630 while (true) { 631 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts, true); 632 ASSERT_TRUE(T); 633 if (T->is(tok::eof)) 634 break; 635 GeneratedByNextToken.push_back(getSourceText(*T, *T)); 636 Loc = T->getLocation(); 637 } 638 EXPECT_THAT(GeneratedByNextToken, 639 ElementsAre("abcd", "=", "0", ";", "// A comment.", "int", "xyz", 640 "=", "abcd", ";")); 641 } 642 643 TEST_F(LexerTest, FindPreviousToken) { 644 Lex("int abcd = 0;\n" 645 "// A comment.\n" 646 "int xyz = abcd;\n"); 647 std::vector<std::string> GeneratedByPrevToken; 648 SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID()); 649 while (true) { 650 auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, false); 651 if (!T.has_value()) 652 break; 653 GeneratedByPrevToken.push_back(getSourceText(*T, *T)); 654 Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts); 655 } 656 EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int", 657 ";", "0", "=", "abcd", "int")); 658 } 659 660 TEST_F(LexerTest, FindPreviousTokenIncludingComments) { 661 Lex("int abcd = 0;\n" 662 "// A comment.\n" 663 "int xyz = abcd;\n"); 664 std::vector<std::string> GeneratedByPrevToken; 665 SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID()); 666 while (true) { 667 auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, true); 668 if (!T.has_value()) 669 break; 670 GeneratedByPrevToken.push_back(getSourceText(*T, *T)); 671 Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts); 672 } 673 EXPECT_THAT(GeneratedByPrevToken, 674 ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";", 675 "0", "=", "abcd", "int")); 676 } 677 678 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { 679 TrivialModuleLoader ModLoader; 680 auto PP = CreatePP("", ModLoader); 681 PP->LexTokensUntilEOF(); 682 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), 683 1U); 684 } 685 686 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { 687 const llvm::StringLiteral Source = R"cpp( 688 // First line comment. 689 //* Second line comment which is ambigious. 690 ; // Have a non-comment token to make sure something is lexed. 691 )cpp"; 692 LangOpts.LineComment = false; 693 auto Toks = Lex(Source); 694 auto &SM = PP->getSourceManager(); 695 auto SrcBuffer = SM.getBufferData(SM.getMainFileID()); 696 Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(), 697 SrcBuffer.data(), SrcBuffer.data(), 698 SrcBuffer.data() + SrcBuffer.size()); 699 700 auto ToksView = llvm::ArrayRef(Toks); 701 clang::Token T; 702 EXPECT_FALSE(ToksView.empty()); 703 while (!L.LexFromRawLexer(T)) { 704 ASSERT_TRUE(!ToksView.empty()); 705 EXPECT_EQ(T.getKind(), ToksView.front().getKind()); 706 ToksView = ToksView.drop_front(); 707 } 708 EXPECT_TRUE(ToksView.empty()); 709 } 710 711 TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) { 712 const llvm::StringLiteral Source = R"cc( 713 #define ONE \ 714 1 715 716 int i = ONE; 717 )cc"; 718 std::vector<Token> Toks = 719 CheckLex(Source, {tok::kw_int, tok::identifier, tok::equal, 720 tok::numeric_constant, tok::semi}); 721 722 // Set up by getting the raw token for the `1` in the macro definition. 723 const Token &OneExpanded = Toks[3]; 724 Token Tok; 725 ASSERT_FALSE( 726 Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts)); 727 // The `ONE`. 728 ASSERT_EQ(Tok.getKind(), tok::raw_identifier); 729 ASSERT_FALSE( 730 Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()), 731 Tok, SourceMgr, LangOpts)); 732 // The `1` in the macro definition. 733 ASSERT_EQ(Tok.getKind(), tok::numeric_constant); 734 735 // Go back 4 characters: two spaces, one newline, and the backslash. 736 SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(-4); 737 // Expect true (=failure) because the whitespace immediately after the 738 // escaped newline is not ignored. 739 EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts, 740 /*IgnoreWhiteSpace=*/false)); 741 } 742 743 TEST(LexerPreambleTest, PreambleBounds) { 744 std::vector<std::string> Cases = { 745 R"cc([[ 746 #include <foo> 747 ]]int bar; 748 )cc", 749 R"cc([[ 750 #include <foo> 751 ]])cc", 752 R"cc([[ 753 // leading comment 754 #include <foo> 755 ]]// trailing comment 756 int bar; 757 )cc", 758 R"cc([[ 759 module; 760 #include <foo> 761 ]]module bar; 762 int x; 763 )cc", 764 }; 765 for (const auto& Case : Cases) { 766 llvm::Annotations A(Case); 767 clang::LangOptions LangOpts; 768 LangOpts.CPlusPlusModules = true; 769 auto Bounds = Lexer::ComputePreamble(A.code(), LangOpts); 770 EXPECT_EQ(Bounds.Size, A.range().End) << Case; 771 } 772 } 773 774 } // anonymous namespace 775