1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Lex/Lexer.h" 10 #include "clang/Basic/Diagnostic.h" 11 #include "clang/Basic/DiagnosticOptions.h" 12 #include "clang/Basic/FileManager.h" 13 #include "clang/Basic/LangOptions.h" 14 #include "clang/Basic/SourceLocation.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Basic/TargetInfo.h" 17 #include "clang/Basic/TargetOptions.h" 18 #include "clang/Basic/TokenKinds.h" 19 #include "clang/Lex/HeaderSearch.h" 20 #include "clang/Lex/HeaderSearchOptions.h" 21 #include "clang/Lex/LiteralSupport.h" 22 #include "clang/Lex/MacroArgs.h" 23 #include "clang/Lex/MacroInfo.h" 24 #include "clang/Lex/ModuleLoader.h" 25 #include "clang/Lex/Preprocessor.h" 26 #include "clang/Lex/PreprocessorOptions.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "gmock/gmock.h" 30 #include "gtest/gtest.h" 31 #include <memory> 32 #include <vector> 33 34 namespace { 35 using namespace clang; 36 using testing::ElementsAre; 37 38 // The test fixture. 39 class LexerTest : public ::testing::Test { 40 protected: 41 LexerTest() 42 : FileMgr(FileMgrOpts), 43 DiagID(new DiagnosticIDs()), 44 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), 45 SourceMgr(Diags, FileMgr), 46 TargetOpts(new TargetOptions) 47 { 48 TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; 49 Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); 50 } 51 52 std::unique_ptr<Preprocessor> CreatePP(StringRef Source, 53 TrivialModuleLoader &ModLoader) { 54 std::unique_ptr<llvm::MemoryBuffer> Buf = 55 llvm::MemoryBuffer::getMemBuffer(Source); 56 SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); 57 58 HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr, 59 Diags, LangOpts, Target.get()); 60 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( 61 std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr, 62 HeaderInfo, ModLoader, 63 /*IILookup =*/nullptr, 64 /*OwnsHeaderSearch =*/false); 65 PP->Initialize(*Target); 66 PP->EnterMainSourceFile(); 67 return PP; 68 } 69 70 std::vector<Token> Lex(StringRef Source) { 71 TrivialModuleLoader ModLoader; 72 PP = CreatePP(Source, ModLoader); 73 74 std::vector<Token> toks; 75 while (1) { 76 Token tok; 77 PP->Lex(tok); 78 if (tok.is(tok::eof)) 79 break; 80 toks.push_back(tok); 81 } 82 83 return toks; 84 } 85 86 std::vector<Token> CheckLex(StringRef Source, 87 ArrayRef<tok::TokenKind> ExpectedTokens) { 88 auto toks = Lex(Source); 89 EXPECT_EQ(ExpectedTokens.size(), toks.size()); 90 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { 91 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); 92 } 93 94 return toks; 95 } 96 97 std::string getSourceText(Token Begin, Token End) { 98 bool Invalid; 99 StringRef Str = 100 Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange( 101 Begin.getLocation(), End.getLocation())), 102 SourceMgr, LangOpts, &Invalid); 103 if (Invalid) 104 return "<INVALID>"; 105 return std::string(Str); 106 } 107 108 FileSystemOptions FileMgrOpts; 109 FileManager FileMgr; 110 IntrusiveRefCntPtr<DiagnosticIDs> DiagID; 111 DiagnosticsEngine Diags; 112 SourceManager SourceMgr; 113 LangOptions LangOpts; 114 std::shared_ptr<TargetOptions> TargetOpts; 115 IntrusiveRefCntPtr<TargetInfo> Target; 116 std::unique_ptr<Preprocessor> PP; 117 }; 118 119 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { 120 std::vector<tok::TokenKind> ExpectedTokens; 121 ExpectedTokens.push_back(tok::identifier); 122 ExpectedTokens.push_back(tok::l_paren); 123 ExpectedTokens.push_back(tok::identifier); 124 ExpectedTokens.push_back(tok::r_paren); 125 126 std::vector<Token> toks = CheckLex("#define M(x) x\n" 127 "M(f(M(i)))", 128 ExpectedTokens); 129 130 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2])); 131 } 132 133 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { 134 std::vector<tok::TokenKind> ExpectedTokens; 135 ExpectedTokens.push_back(tok::identifier); 136 ExpectedTokens.push_back(tok::identifier); 137 138 std::vector<Token> toks = CheckLex("#define M(x) x\n" 139 "M(M(i) c)", 140 ExpectedTokens); 141 142 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0])); 143 } 144 145 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { 146 std::vector<tok::TokenKind> ExpectedTokens; 147 ExpectedTokens.push_back(tok::identifier); 148 ExpectedTokens.push_back(tok::identifier); 149 ExpectedTokens.push_back(tok::identifier); 150 151 std::vector<Token> toks = CheckLex("#define M(x) x\n" 152 "M(c c M(i))", 153 ExpectedTokens); 154 155 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2])); 156 } 157 158 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { 159 std::vector<tok::TokenKind> ExpectedTokens; 160 ExpectedTokens.push_back(tok::identifier); 161 ExpectedTokens.push_back(tok::identifier); 162 ExpectedTokens.push_back(tok::identifier); 163 164 std::vector<Token> toks = CheckLex("#define M(x) x\n" 165 "M(M(i) c c)", 166 ExpectedTokens); 167 168 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1])); 169 } 170 171 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { 172 std::vector<tok::TokenKind> ExpectedTokens; 173 ExpectedTokens.push_back(tok::identifier); 174 ExpectedTokens.push_back(tok::identifier); 175 ExpectedTokens.push_back(tok::identifier); 176 ExpectedTokens.push_back(tok::identifier); 177 178 std::vector<Token> toks = CheckLex("#define M(x) x\n" 179 "M(c M(i)) M(M(i) c)", 180 ExpectedTokens); 181 182 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2])); 183 } 184 185 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { 186 std::vector<tok::TokenKind> ExpectedTokens; 187 ExpectedTokens.push_back(tok::identifier); 188 ExpectedTokens.push_back(tok::l_paren); 189 ExpectedTokens.push_back(tok::identifier); 190 ExpectedTokens.push_back(tok::r_paren); 191 192 std::vector<Token> toks = CheckLex("#define M(x) x\n" 193 "#define C(x) M(x##c)\n" 194 "M(f(C(i)))", 195 ExpectedTokens); 196 197 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2])); 198 } 199 200 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { 201 std::vector<tok::TokenKind> ExpectedTokens; 202 ExpectedTokens.push_back(tok::identifier); 203 ExpectedTokens.push_back(tok::l_paren); 204 ExpectedTokens.push_back(tok::identifier); 205 ExpectedTokens.push_back(tok::r_paren); 206 207 std::vector<Token> toks = CheckLex("#define M(x) x\n" 208 "f(M(M(i)))", 209 ExpectedTokens); 210 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2])); 211 } 212 213 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { 214 std::vector<tok::TokenKind> ExpectedTokens; 215 ExpectedTokens.push_back(tok::identifier); 216 ExpectedTokens.push_back(tok::l_paren); 217 ExpectedTokens.push_back(tok::identifier); 218 ExpectedTokens.push_back(tok::r_paren); 219 220 std::vector<Token> toks = CheckLex("#define M(x) x\n" 221 "M(f(i))", 222 ExpectedTokens); 223 EXPECT_EQ("i", getSourceText(toks[2], toks[2])); 224 } 225 226 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { 227 std::vector<tok::TokenKind> ExpectedTokens; 228 ExpectedTokens.push_back(tok::identifier); 229 ExpectedTokens.push_back(tok::l_paren); 230 ExpectedTokens.push_back(tok::identifier); 231 ExpectedTokens.push_back(tok::r_paren); 232 233 std::vector<Token> toks = CheckLex("#define M(x) x\n" 234 "#define C(x) x\n" 235 "f(C(M(i)))", 236 ExpectedTokens); 237 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2])); 238 } 239 240 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { 241 std::vector<tok::TokenKind> ExpectedTokens; 242 ExpectedTokens.push_back(tok::identifier); 243 ExpectedTokens.push_back(tok::l_paren); 244 ExpectedTokens.push_back(tok::identifier); 245 ExpectedTokens.push_back(tok::identifier); 246 ExpectedTokens.push_back(tok::r_paren); 247 248 std::vector<Token> toks = CheckLex("#define M(x) x\n" 249 "#define C(x) c x\n" 250 "f(C(M(i)))", 251 ExpectedTokens); 252 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 253 } 254 255 TEST_F(LexerTest, GetSourceTextExpandsRecursively) { 256 std::vector<tok::TokenKind> ExpectedTokens; 257 ExpectedTokens.push_back(tok::identifier); 258 ExpectedTokens.push_back(tok::identifier); 259 ExpectedTokens.push_back(tok::l_paren); 260 ExpectedTokens.push_back(tok::identifier); 261 ExpectedTokens.push_back(tok::r_paren); 262 263 std::vector<Token> toks = CheckLex("#define M(x) x\n" 264 "#define C(x) c M(x)\n" 265 "C(f(M(i)))", 266 ExpectedTokens); 267 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 268 } 269 270 TEST_F(LexerTest, LexAPI) { 271 std::vector<tok::TokenKind> ExpectedTokens; 272 // Line 1 (after the #defines) 273 ExpectedTokens.push_back(tok::l_square); 274 ExpectedTokens.push_back(tok::identifier); 275 ExpectedTokens.push_back(tok::r_square); 276 ExpectedTokens.push_back(tok::l_square); 277 ExpectedTokens.push_back(tok::identifier); 278 ExpectedTokens.push_back(tok::r_square); 279 // Line 2 280 ExpectedTokens.push_back(tok::identifier); 281 ExpectedTokens.push_back(tok::identifier); 282 ExpectedTokens.push_back(tok::identifier); 283 ExpectedTokens.push_back(tok::identifier); 284 285 std::vector<Token> toks = CheckLex("#define M(x) [x]\n" 286 "#define N(x) x\n" 287 "#define INN(x) x\n" 288 "#define NOF1 INN(val)\n" 289 "#define NOF2 val\n" 290 "M(foo) N([bar])\n" 291 "N(INN(val)) N(NOF1) N(NOF2) N(val)", 292 ExpectedTokens); 293 294 SourceLocation lsqrLoc = toks[0].getLocation(); 295 SourceLocation idLoc = toks[1].getLocation(); 296 SourceLocation rsqrLoc = toks[2].getLocation(); 297 CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc); 298 299 SourceLocation Loc; 300 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); 301 EXPECT_EQ(Loc, macroRange.getBegin()); 302 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 303 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 304 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); 305 EXPECT_EQ(Loc, macroRange.getEnd()); 306 EXPECT_TRUE(macroRange.isTokenRange()); 307 308 CharSourceRange range = Lexer::makeFileCharRange( 309 CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts); 310 EXPECT_TRUE(range.isInvalid()); 311 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc), 312 SourceMgr, LangOpts); 313 EXPECT_TRUE(range.isInvalid()); 314 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 315 SourceMgr, LangOpts); 316 EXPECT_TRUE(!range.isTokenRange()); 317 EXPECT_EQ(range.getAsRange(), 318 SourceRange(macroRange.getBegin(), 319 macroRange.getEnd().getLocWithOffset(1))); 320 321 StringRef text = Lexer::getSourceText( 322 CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 323 SourceMgr, LangOpts); 324 EXPECT_EQ(text, "M(foo)"); 325 326 SourceLocation macroLsqrLoc = toks[3].getLocation(); 327 SourceLocation macroIdLoc = toks[4].getLocation(); 328 SourceLocation macroRsqrLoc = toks[5].getLocation(); 329 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc); 330 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc); 331 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc); 332 333 range = Lexer::makeFileCharRange( 334 CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc), 335 SourceMgr, LangOpts); 336 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), 337 range.getAsRange()); 338 339 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc), 340 SourceMgr, LangOpts); 341 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), 342 range.getAsRange()); 343 344 macroRange = SourceMgr.getExpansionRange(macroLsqrLoc); 345 range = Lexer::makeFileCharRange( 346 CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc), 347 SourceMgr, LangOpts); 348 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), 349 range.getAsRange()); 350 351 text = Lexer::getSourceText( 352 CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)), 353 SourceMgr, LangOpts); 354 EXPECT_EQ(text, "[bar"); 355 356 357 SourceLocation idLoc1 = toks[6].getLocation(); 358 SourceLocation idLoc2 = toks[7].getLocation(); 359 SourceLocation idLoc3 = toks[8].getLocation(); 360 SourceLocation idLoc4 = toks[9].getLocation(); 361 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); 362 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); 363 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); 364 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); 365 } 366 367 TEST_F(LexerTest, HandlesSplitTokens) { 368 std::vector<tok::TokenKind> ExpectedTokens; 369 // Line 1 (after the #defines) 370 ExpectedTokens.push_back(tok::identifier); 371 ExpectedTokens.push_back(tok::less); 372 ExpectedTokens.push_back(tok::identifier); 373 ExpectedTokens.push_back(tok::less); 374 ExpectedTokens.push_back(tok::greatergreater); 375 // Line 2 376 ExpectedTokens.push_back(tok::identifier); 377 ExpectedTokens.push_back(tok::less); 378 ExpectedTokens.push_back(tok::identifier); 379 ExpectedTokens.push_back(tok::less); 380 ExpectedTokens.push_back(tok::greatergreater); 381 382 std::vector<Token> toks = CheckLex("#define TY ty\n" 383 "#define RANGLE ty<ty<>>\n" 384 "TY<ty<>>\n" 385 "RANGLE", 386 ExpectedTokens); 387 388 SourceLocation outerTyLoc = toks[0].getLocation(); 389 SourceLocation innerTyLoc = toks[2].getLocation(); 390 SourceLocation gtgtLoc = toks[4].getLocation(); 391 // Split the token to simulate the action of the parser and force creation of 392 // an `ExpansionTokenRange`. 393 SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1); 394 395 // Verify that it only captures the first greater-then and not the second one. 396 CharSourceRange range = Lexer::makeFileCharRange( 397 CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr, 398 LangOpts); 399 EXPECT_TRUE(range.isCharRange()); 400 EXPECT_EQ(range.getAsRange(), 401 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); 402 403 // Verify case where range begins in a macro expansion. 404 range = Lexer::makeFileCharRange( 405 CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr, 406 LangOpts); 407 EXPECT_TRUE(range.isCharRange()); 408 EXPECT_EQ(range.getAsRange(), 409 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), 410 gtgtLoc.getLocWithOffset(1))); 411 412 SourceLocation macroInnerTyLoc = toks[7].getLocation(); 413 SourceLocation macroGtgtLoc = toks[9].getLocation(); 414 // Split the token to simulate the action of the parser and force creation of 415 // an `ExpansionTokenRange`. 416 SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1); 417 418 // Verify that it fails (because it only captures the first greater-then and 419 // not the second one, so it doesn't span the entire macro expansion). 420 range = Lexer::makeFileCharRange( 421 CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc), 422 SourceMgr, LangOpts); 423 EXPECT_TRUE(range.isInvalid()); 424 } 425 426 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { 427 std::vector<Token> toks = 428 Lex("#define helper1 0\n" 429 "void helper2(const char *, ...);\n" 430 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" 431 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" 432 "void f1() { M2(\"a\", \"b\"); }"); 433 434 // Check the file corresponding to the "helper1" macro arg in M2. 435 // 436 // The lexer used to report its size as 31, meaning that the end of the 437 // expansion would be on the *next line* (just past `M2("a", "b")`). Make 438 // sure that we get the correct end location (the comma after "helper1"). 439 SourceLocation helper1ArgLoc = toks[20].getLocation(); 440 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); 441 } 442 443 TEST_F(LexerTest, DontOverallocateStringifyArgs) { 444 TrivialModuleLoader ModLoader; 445 auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader); 446 447 llvm::BumpPtrAllocator Allocator; 448 std::array<IdentifierInfo *, 3> ParamList; 449 MacroInfo *MI = PP->AllocateMacroInfo({}); 450 MI->setIsFunctionLike(); 451 MI->setParameterList(ParamList, Allocator); 452 EXPECT_EQ(3u, MI->getNumParams()); 453 EXPECT_TRUE(MI->isFunctionLike()); 454 455 Token Eof; 456 Eof.setKind(tok::eof); 457 std::vector<Token> ArgTokens; 458 while (1) { 459 Token tok; 460 PP->Lex(tok); 461 if (tok.is(tok::eof)) { 462 ArgTokens.push_back(Eof); 463 break; 464 } 465 if (tok.is(tok::comma)) 466 ArgTokens.push_back(Eof); 467 else 468 ArgTokens.push_back(tok); 469 } 470 471 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); }; 472 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA( 473 MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter); 474 auto StringifyArg = [&](int ArgNo) { 475 return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP, 476 /*Charify=*/false, {}, {}); 477 }; 478 Token Result = StringifyArg(0); 479 EXPECT_EQ(tok::string_literal, Result.getKind()); 480 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData()); 481 Result = StringifyArg(1); 482 EXPECT_EQ(tok::string_literal, Result.getKind()); 483 EXPECT_STREQ("\"5\"", Result.getLiteralData()); 484 Result = StringifyArg(2); 485 EXPECT_EQ(tok::string_literal, Result.getKind()); 486 EXPECT_STREQ("\"'C'\"", Result.getLiteralData()); 487 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST 488 EXPECT_DEATH(StringifyArg(3), "Invalid arg #"); 489 #endif 490 } 491 492 TEST_F(LexerTest, IsNewLineEscapedValid) { 493 auto hasNewLineEscaped = [](const char *S) { 494 return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); 495 }; 496 497 EXPECT_TRUE(hasNewLineEscaped("\\\r")); 498 EXPECT_TRUE(hasNewLineEscaped("\\\n")); 499 EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); 500 EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); 501 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); 502 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); 503 504 EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); 505 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); 506 EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); 507 EXPECT_FALSE(hasNewLineEscaped("\r")); 508 EXPECT_FALSE(hasNewLineEscaped("\n")); 509 EXPECT_FALSE(hasNewLineEscaped("\r\n")); 510 EXPECT_FALSE(hasNewLineEscaped("\n\r")); 511 EXPECT_FALSE(hasNewLineEscaped("\r\r")); 512 EXPECT_FALSE(hasNewLineEscaped("\n\n")); 513 } 514 515 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { 516 // Each line should have the same length for 517 // further offset calculation to be more straightforward. 518 const unsigned IdentifierLength = 8; 519 std::string TextToLex = "rabarbar\n" 520 "foo\\\nbar\n" 521 "foo\\\rbar\n" 522 "fo\\\r\nbar\n" 523 "foo\\\n\rba\n"; 524 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; 525 std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); 526 527 for (const Token &Tok : LexedTokens) { 528 std::pair<FileID, unsigned> OriginalLocation = 529 SourceMgr.getDecomposedLoc(Tok.getLocation()); 530 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { 531 SourceLocation LookupLocation = 532 Tok.getLocation().getLocWithOffset(Offset); 533 534 std::pair<FileID, unsigned> FoundLocation = 535 SourceMgr.getDecomposedExpansionLoc( 536 Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); 537 538 // Check that location returned by the GetBeginningOfToken 539 // is the same as original token location reported by Lexer. 540 EXPECT_EQ(FoundLocation.second, OriginalLocation.second); 541 } 542 } 543 } 544 545 TEST_F(LexerTest, AvoidPastEndOfStringDereference) { 546 EXPECT_TRUE(Lex(" // \\\n").empty()); 547 EXPECT_TRUE(Lex("#include <\\\\").empty()); 548 EXPECT_TRUE(Lex("#include <\\\\\n").empty()); 549 } 550 551 TEST_F(LexerTest, StringizingRasString) { 552 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". 553 std::string String1 = R"(foo 554 {"bar":[]} 555 baz)"; 556 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)". 557 SmallString<128> String2; 558 String2 += String1.c_str(); 559 560 // Corner cases. 561 std::string String3 = R"(\ 562 \n 563 \\n 564 \\)"; 565 SmallString<128> String4; 566 String4 += String3.c_str(); 567 std::string String5 = R"(a\ 568 569 570 \\b)"; 571 SmallString<128> String6; 572 String6 += String5.c_str(); 573 574 String1 = Lexer::Stringify(StringRef(String1)); 575 Lexer::Stringify(String2); 576 String3 = Lexer::Stringify(StringRef(String3)); 577 Lexer::Stringify(String4); 578 String5 = Lexer::Stringify(StringRef(String5)); 579 Lexer::Stringify(String6); 580 581 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)"); 582 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)"); 583 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)"); 584 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)"); 585 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)"); 586 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)"); 587 } 588 589 TEST_F(LexerTest, CharRangeOffByOne) { 590 std::vector<Token> toks = Lex(R"(#define MOO 1 591 void foo() { MOO; })"); 592 const Token &moo = toks[5]; 593 594 EXPECT_EQ(getSourceText(moo, moo), "MOO"); 595 596 SourceRange R{moo.getLocation(), moo.getLocation()}; 597 598 EXPECT_TRUE( 599 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); 600 EXPECT_TRUE( 601 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); 602 603 CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts); 604 605 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO". 606 } 607 608 TEST_F(LexerTest, FindNextToken) { 609 Lex("int abcd = 0;\n" 610 "int xyz = abcd;\n"); 611 std::vector<std::string> GeneratedByNextToken; 612 SourceLocation Loc = 613 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); 614 while (true) { 615 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts); 616 ASSERT_TRUE(T); 617 if (T->is(tok::eof)) 618 break; 619 GeneratedByNextToken.push_back(getSourceText(*T, *T)); 620 Loc = T->getLocation(); 621 } 622 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int", 623 "xyz", "=", "abcd", ";")); 624 } 625 626 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { 627 TrivialModuleLoader ModLoader; 628 auto PP = CreatePP("", ModLoader); 629 while (1) { 630 Token tok; 631 PP->Lex(tok); 632 if (tok.is(tok::eof)) 633 break; 634 } 635 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), 636 1U); 637 } 638 639 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { 640 const llvm::StringLiteral Source = R"cpp( 641 // First line comment. 642 //* Second line comment which is ambigious. 643 ; // Have a non-comment token to make sure something is lexed. 644 )cpp"; 645 LangOpts.LineComment = false; 646 auto Toks = Lex(Source); 647 auto &SM = PP->getSourceManager(); 648 auto SrcBuffer = SM.getBufferData(SM.getMainFileID()); 649 Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(), 650 SrcBuffer.data(), SrcBuffer.data(), 651 SrcBuffer.data() + SrcBuffer.size()); 652 653 auto ToksView = llvm::makeArrayRef(Toks); 654 clang::Token T; 655 EXPECT_FALSE(ToksView.empty()); 656 while (!L.LexFromRawLexer(T)) { 657 ASSERT_TRUE(!ToksView.empty()); 658 EXPECT_EQ(T.getKind(), ToksView.front().getKind()); 659 ToksView = ToksView.drop_front(); 660 } 661 EXPECT_TRUE(ToksView.empty()); 662 } 663 } // anonymous namespace 664