1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Lex/Lexer.h" 10 #include "clang/Basic/Diagnostic.h" 11 #include "clang/Basic/DiagnosticOptions.h" 12 #include "clang/Basic/FileManager.h" 13 #include "clang/Basic/LangOptions.h" 14 #include "clang/Basic/SourceLocation.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Basic/TargetInfo.h" 17 #include "clang/Basic/TargetOptions.h" 18 #include "clang/Basic/TokenKinds.h" 19 #include "clang/Lex/HeaderSearch.h" 20 #include "clang/Lex/HeaderSearchOptions.h" 21 #include "clang/Lex/MacroArgs.h" 22 #include "clang/Lex/MacroInfo.h" 23 #include "clang/Lex/ModuleLoader.h" 24 #include "clang/Lex/Preprocessor.h" 25 #include "clang/Lex/PreprocessorOptions.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "gmock/gmock.h" 29 #include "gtest/gtest.h" 30 #include <memory> 31 #include <vector> 32 33 namespace { 34 using namespace clang; 35 using testing::ElementsAre; 36 37 // The test fixture. 38 class LexerTest : public ::testing::Test { 39 protected: 40 LexerTest() 41 : FileMgr(FileMgrOpts), 42 DiagID(new DiagnosticIDs()), 43 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), 44 SourceMgr(Diags, FileMgr), 45 TargetOpts(new TargetOptions) 46 { 47 TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; 48 Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); 49 } 50 51 std::unique_ptr<Preprocessor> CreatePP(StringRef Source, 52 TrivialModuleLoader &ModLoader) { 53 std::unique_ptr<llvm::MemoryBuffer> Buf = 54 llvm::MemoryBuffer::getMemBuffer(Source); 55 SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); 56 57 HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr, 58 Diags, LangOpts, Target.get()); 59 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( 60 std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr, 61 HeaderInfo, ModLoader, 62 /*IILookup =*/nullptr, 63 /*OwnsHeaderSearch =*/false); 64 PP->Initialize(*Target); 65 PP->EnterMainSourceFile(); 66 return PP; 67 } 68 69 std::vector<Token> Lex(StringRef Source) { 70 TrivialModuleLoader ModLoader; 71 PP = CreatePP(Source, ModLoader); 72 73 std::vector<Token> toks; 74 while (1) { 75 Token tok; 76 PP->Lex(tok); 77 if (tok.is(tok::eof)) 78 break; 79 toks.push_back(tok); 80 } 81 82 return toks; 83 } 84 85 std::vector<Token> CheckLex(StringRef Source, 86 ArrayRef<tok::TokenKind> ExpectedTokens) { 87 auto toks = Lex(Source); 88 EXPECT_EQ(ExpectedTokens.size(), toks.size()); 89 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { 90 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); 91 } 92 93 return toks; 94 } 95 96 std::string getSourceText(Token Begin, Token End) { 97 bool Invalid; 98 StringRef Str = 99 Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange( 100 Begin.getLocation(), End.getLocation())), 101 SourceMgr, LangOpts, &Invalid); 102 if (Invalid) 103 return "<INVALID>"; 104 return std::string(Str); 105 } 106 107 FileSystemOptions FileMgrOpts; 108 FileManager FileMgr; 109 IntrusiveRefCntPtr<DiagnosticIDs> DiagID; 110 DiagnosticsEngine Diags; 111 SourceManager SourceMgr; 112 LangOptions LangOpts; 113 std::shared_ptr<TargetOptions> TargetOpts; 114 IntrusiveRefCntPtr<TargetInfo> Target; 115 std::unique_ptr<Preprocessor> PP; 116 }; 117 118 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { 119 std::vector<tok::TokenKind> ExpectedTokens; 120 ExpectedTokens.push_back(tok::identifier); 121 ExpectedTokens.push_back(tok::l_paren); 122 ExpectedTokens.push_back(tok::identifier); 123 ExpectedTokens.push_back(tok::r_paren); 124 125 std::vector<Token> toks = CheckLex("#define M(x) x\n" 126 "M(f(M(i)))", 127 ExpectedTokens); 128 129 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2])); 130 } 131 132 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { 133 std::vector<tok::TokenKind> ExpectedTokens; 134 ExpectedTokens.push_back(tok::identifier); 135 ExpectedTokens.push_back(tok::identifier); 136 137 std::vector<Token> toks = CheckLex("#define M(x) x\n" 138 "M(M(i) c)", 139 ExpectedTokens); 140 141 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0])); 142 } 143 144 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { 145 std::vector<tok::TokenKind> ExpectedTokens; 146 ExpectedTokens.push_back(tok::identifier); 147 ExpectedTokens.push_back(tok::identifier); 148 ExpectedTokens.push_back(tok::identifier); 149 150 std::vector<Token> toks = CheckLex("#define M(x) x\n" 151 "M(c c M(i))", 152 ExpectedTokens); 153 154 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2])); 155 } 156 157 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { 158 std::vector<tok::TokenKind> ExpectedTokens; 159 ExpectedTokens.push_back(tok::identifier); 160 ExpectedTokens.push_back(tok::identifier); 161 ExpectedTokens.push_back(tok::identifier); 162 163 std::vector<Token> toks = CheckLex("#define M(x) x\n" 164 "M(M(i) c c)", 165 ExpectedTokens); 166 167 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1])); 168 } 169 170 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { 171 std::vector<tok::TokenKind> ExpectedTokens; 172 ExpectedTokens.push_back(tok::identifier); 173 ExpectedTokens.push_back(tok::identifier); 174 ExpectedTokens.push_back(tok::identifier); 175 ExpectedTokens.push_back(tok::identifier); 176 177 std::vector<Token> toks = CheckLex("#define M(x) x\n" 178 "M(c M(i)) M(M(i) c)", 179 ExpectedTokens); 180 181 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2])); 182 } 183 184 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { 185 std::vector<tok::TokenKind> ExpectedTokens; 186 ExpectedTokens.push_back(tok::identifier); 187 ExpectedTokens.push_back(tok::l_paren); 188 ExpectedTokens.push_back(tok::identifier); 189 ExpectedTokens.push_back(tok::r_paren); 190 191 std::vector<Token> toks = CheckLex("#define M(x) x\n" 192 "#define C(x) M(x##c)\n" 193 "M(f(C(i)))", 194 ExpectedTokens); 195 196 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2])); 197 } 198 199 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { 200 std::vector<tok::TokenKind> ExpectedTokens; 201 ExpectedTokens.push_back(tok::identifier); 202 ExpectedTokens.push_back(tok::l_paren); 203 ExpectedTokens.push_back(tok::identifier); 204 ExpectedTokens.push_back(tok::r_paren); 205 206 std::vector<Token> toks = CheckLex("#define M(x) x\n" 207 "f(M(M(i)))", 208 ExpectedTokens); 209 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2])); 210 } 211 212 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { 213 std::vector<tok::TokenKind> ExpectedTokens; 214 ExpectedTokens.push_back(tok::identifier); 215 ExpectedTokens.push_back(tok::l_paren); 216 ExpectedTokens.push_back(tok::identifier); 217 ExpectedTokens.push_back(tok::r_paren); 218 219 std::vector<Token> toks = CheckLex("#define M(x) x\n" 220 "M(f(i))", 221 ExpectedTokens); 222 EXPECT_EQ("i", getSourceText(toks[2], toks[2])); 223 } 224 225 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { 226 std::vector<tok::TokenKind> ExpectedTokens; 227 ExpectedTokens.push_back(tok::identifier); 228 ExpectedTokens.push_back(tok::l_paren); 229 ExpectedTokens.push_back(tok::identifier); 230 ExpectedTokens.push_back(tok::r_paren); 231 232 std::vector<Token> toks = CheckLex("#define M(x) x\n" 233 "#define C(x) x\n" 234 "f(C(M(i)))", 235 ExpectedTokens); 236 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2])); 237 } 238 239 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { 240 std::vector<tok::TokenKind> ExpectedTokens; 241 ExpectedTokens.push_back(tok::identifier); 242 ExpectedTokens.push_back(tok::l_paren); 243 ExpectedTokens.push_back(tok::identifier); 244 ExpectedTokens.push_back(tok::identifier); 245 ExpectedTokens.push_back(tok::r_paren); 246 247 std::vector<Token> toks = CheckLex("#define M(x) x\n" 248 "#define C(x) c x\n" 249 "f(C(M(i)))", 250 ExpectedTokens); 251 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 252 } 253 254 TEST_F(LexerTest, GetSourceTextExpandsRecursively) { 255 std::vector<tok::TokenKind> ExpectedTokens; 256 ExpectedTokens.push_back(tok::identifier); 257 ExpectedTokens.push_back(tok::identifier); 258 ExpectedTokens.push_back(tok::l_paren); 259 ExpectedTokens.push_back(tok::identifier); 260 ExpectedTokens.push_back(tok::r_paren); 261 262 std::vector<Token> toks = CheckLex("#define M(x) x\n" 263 "#define C(x) c M(x)\n" 264 "C(f(M(i)))", 265 ExpectedTokens); 266 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 267 } 268 269 TEST_F(LexerTest, LexAPI) { 270 std::vector<tok::TokenKind> ExpectedTokens; 271 // Line 1 (after the #defines) 272 ExpectedTokens.push_back(tok::l_square); 273 ExpectedTokens.push_back(tok::identifier); 274 ExpectedTokens.push_back(tok::r_square); 275 ExpectedTokens.push_back(tok::l_square); 276 ExpectedTokens.push_back(tok::identifier); 277 ExpectedTokens.push_back(tok::r_square); 278 // Line 2 279 ExpectedTokens.push_back(tok::identifier); 280 ExpectedTokens.push_back(tok::identifier); 281 ExpectedTokens.push_back(tok::identifier); 282 ExpectedTokens.push_back(tok::identifier); 283 284 std::vector<Token> toks = CheckLex("#define M(x) [x]\n" 285 "#define N(x) x\n" 286 "#define INN(x) x\n" 287 "#define NOF1 INN(val)\n" 288 "#define NOF2 val\n" 289 "M(foo) N([bar])\n" 290 "N(INN(val)) N(NOF1) N(NOF2) N(val)", 291 ExpectedTokens); 292 293 SourceLocation lsqrLoc = toks[0].getLocation(); 294 SourceLocation idLoc = toks[1].getLocation(); 295 SourceLocation rsqrLoc = toks[2].getLocation(); 296 CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc); 297 298 SourceLocation Loc; 299 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); 300 EXPECT_EQ(Loc, macroRange.getBegin()); 301 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 302 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 303 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); 304 EXPECT_EQ(Loc, macroRange.getEnd()); 305 EXPECT_TRUE(macroRange.isTokenRange()); 306 307 CharSourceRange range = Lexer::makeFileCharRange( 308 CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts); 309 EXPECT_TRUE(range.isInvalid()); 310 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc), 311 SourceMgr, LangOpts); 312 EXPECT_TRUE(range.isInvalid()); 313 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 314 SourceMgr, LangOpts); 315 EXPECT_TRUE(!range.isTokenRange()); 316 EXPECT_EQ(range.getAsRange(), 317 SourceRange(macroRange.getBegin(), 318 macroRange.getEnd().getLocWithOffset(1))); 319 320 StringRef text = Lexer::getSourceText( 321 CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 322 SourceMgr, LangOpts); 323 EXPECT_EQ(text, "M(foo)"); 324 325 SourceLocation macroLsqrLoc = toks[3].getLocation(); 326 SourceLocation macroIdLoc = toks[4].getLocation(); 327 SourceLocation macroRsqrLoc = toks[5].getLocation(); 328 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc); 329 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc); 330 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc); 331 332 range = Lexer::makeFileCharRange( 333 CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc), 334 SourceMgr, LangOpts); 335 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), 336 range.getAsRange()); 337 338 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc), 339 SourceMgr, LangOpts); 340 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), 341 range.getAsRange()); 342 343 macroRange = SourceMgr.getExpansionRange(macroLsqrLoc); 344 range = Lexer::makeFileCharRange( 345 CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc), 346 SourceMgr, LangOpts); 347 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), 348 range.getAsRange()); 349 350 text = Lexer::getSourceText( 351 CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)), 352 SourceMgr, LangOpts); 353 EXPECT_EQ(text, "[bar"); 354 355 356 SourceLocation idLoc1 = toks[6].getLocation(); 357 SourceLocation idLoc2 = toks[7].getLocation(); 358 SourceLocation idLoc3 = toks[8].getLocation(); 359 SourceLocation idLoc4 = toks[9].getLocation(); 360 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); 361 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); 362 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); 363 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); 364 } 365 366 TEST_F(LexerTest, HandlesSplitTokens) { 367 std::vector<tok::TokenKind> ExpectedTokens; 368 // Line 1 (after the #defines) 369 ExpectedTokens.push_back(tok::identifier); 370 ExpectedTokens.push_back(tok::less); 371 ExpectedTokens.push_back(tok::identifier); 372 ExpectedTokens.push_back(tok::less); 373 ExpectedTokens.push_back(tok::greatergreater); 374 // Line 2 375 ExpectedTokens.push_back(tok::identifier); 376 ExpectedTokens.push_back(tok::less); 377 ExpectedTokens.push_back(tok::identifier); 378 ExpectedTokens.push_back(tok::less); 379 ExpectedTokens.push_back(tok::greatergreater); 380 381 std::vector<Token> toks = CheckLex("#define TY ty\n" 382 "#define RANGLE ty<ty<>>\n" 383 "TY<ty<>>\n" 384 "RANGLE", 385 ExpectedTokens); 386 387 SourceLocation outerTyLoc = toks[0].getLocation(); 388 SourceLocation innerTyLoc = toks[2].getLocation(); 389 SourceLocation gtgtLoc = toks[4].getLocation(); 390 // Split the token to simulate the action of the parser and force creation of 391 // an `ExpansionTokenRange`. 392 SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1); 393 394 // Verify that it only captures the first greater-then and not the second one. 395 CharSourceRange range = Lexer::makeFileCharRange( 396 CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr, 397 LangOpts); 398 EXPECT_TRUE(range.isCharRange()); 399 EXPECT_EQ(range.getAsRange(), 400 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); 401 402 // Verify case where range begins in a macro expansion. 403 range = Lexer::makeFileCharRange( 404 CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr, 405 LangOpts); 406 EXPECT_TRUE(range.isCharRange()); 407 EXPECT_EQ(range.getAsRange(), 408 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), 409 gtgtLoc.getLocWithOffset(1))); 410 411 SourceLocation macroInnerTyLoc = toks[7].getLocation(); 412 SourceLocation macroGtgtLoc = toks[9].getLocation(); 413 // Split the token to simulate the action of the parser and force creation of 414 // an `ExpansionTokenRange`. 415 SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1); 416 417 // Verify that it fails (because it only captures the first greater-then and 418 // not the second one, so it doesn't span the entire macro expansion). 419 range = Lexer::makeFileCharRange( 420 CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc), 421 SourceMgr, LangOpts); 422 EXPECT_TRUE(range.isInvalid()); 423 } 424 425 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { 426 std::vector<Token> toks = 427 Lex("#define helper1 0\n" 428 "void helper2(const char *, ...);\n" 429 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" 430 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" 431 "void f1() { M2(\"a\", \"b\"); }"); 432 433 // Check the file corresponding to the "helper1" macro arg in M2. 434 // 435 // The lexer used to report its size as 31, meaning that the end of the 436 // expansion would be on the *next line* (just past `M2("a", "b")`). Make 437 // sure that we get the correct end location (the comma after "helper1"). 438 SourceLocation helper1ArgLoc = toks[20].getLocation(); 439 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); 440 } 441 442 TEST_F(LexerTest, DontOverallocateStringifyArgs) { 443 TrivialModuleLoader ModLoader; 444 auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader); 445 446 llvm::BumpPtrAllocator Allocator; 447 std::array<IdentifierInfo *, 3> ParamList; 448 MacroInfo *MI = PP->AllocateMacroInfo({}); 449 MI->setIsFunctionLike(); 450 MI->setParameterList(ParamList, Allocator); 451 EXPECT_EQ(3u, MI->getNumParams()); 452 EXPECT_TRUE(MI->isFunctionLike()); 453 454 Token Eof; 455 Eof.setKind(tok::eof); 456 std::vector<Token> ArgTokens; 457 while (1) { 458 Token tok; 459 PP->Lex(tok); 460 if (tok.is(tok::eof)) { 461 ArgTokens.push_back(Eof); 462 break; 463 } 464 if (tok.is(tok::comma)) 465 ArgTokens.push_back(Eof); 466 else 467 ArgTokens.push_back(tok); 468 } 469 470 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); }; 471 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA( 472 MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter); 473 auto StringifyArg = [&](int ArgNo) { 474 return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP, 475 /*Charify=*/false, {}, {}); 476 }; 477 Token Result = StringifyArg(0); 478 EXPECT_EQ(tok::string_literal, Result.getKind()); 479 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData()); 480 Result = StringifyArg(1); 481 EXPECT_EQ(tok::string_literal, Result.getKind()); 482 EXPECT_STREQ("\"5\"", Result.getLiteralData()); 483 Result = StringifyArg(2); 484 EXPECT_EQ(tok::string_literal, Result.getKind()); 485 EXPECT_STREQ("\"'C'\"", Result.getLiteralData()); 486 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST 487 EXPECT_DEATH(StringifyArg(3), "Invalid arg #"); 488 #endif 489 } 490 491 TEST_F(LexerTest, IsNewLineEscapedValid) { 492 auto hasNewLineEscaped = [](const char *S) { 493 return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); 494 }; 495 496 EXPECT_TRUE(hasNewLineEscaped("\\\r")); 497 EXPECT_TRUE(hasNewLineEscaped("\\\n")); 498 EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); 499 EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); 500 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); 501 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); 502 503 EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); 504 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); 505 EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); 506 EXPECT_FALSE(hasNewLineEscaped("\r")); 507 EXPECT_FALSE(hasNewLineEscaped("\n")); 508 EXPECT_FALSE(hasNewLineEscaped("\r\n")); 509 EXPECT_FALSE(hasNewLineEscaped("\n\r")); 510 EXPECT_FALSE(hasNewLineEscaped("\r\r")); 511 EXPECT_FALSE(hasNewLineEscaped("\n\n")); 512 } 513 514 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { 515 // Each line should have the same length for 516 // further offset calculation to be more straightforward. 517 const unsigned IdentifierLength = 8; 518 std::string TextToLex = "rabarbar\n" 519 "foo\\\nbar\n" 520 "foo\\\rbar\n" 521 "fo\\\r\nbar\n" 522 "foo\\\n\rba\n"; 523 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; 524 std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); 525 526 for (const Token &Tok : LexedTokens) { 527 std::pair<FileID, unsigned> OriginalLocation = 528 SourceMgr.getDecomposedLoc(Tok.getLocation()); 529 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { 530 SourceLocation LookupLocation = 531 Tok.getLocation().getLocWithOffset(Offset); 532 533 std::pair<FileID, unsigned> FoundLocation = 534 SourceMgr.getDecomposedExpansionLoc( 535 Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); 536 537 // Check that location returned by the GetBeginningOfToken 538 // is the same as original token location reported by Lexer. 539 EXPECT_EQ(FoundLocation.second, OriginalLocation.second); 540 } 541 } 542 } 543 544 TEST_F(LexerTest, AvoidPastEndOfStringDereference) { 545 EXPECT_TRUE(Lex(" // \\\n").empty()); 546 EXPECT_TRUE(Lex("#include <\\\\").empty()); 547 EXPECT_TRUE(Lex("#include <\\\\\n").empty()); 548 } 549 550 TEST_F(LexerTest, StringizingRasString) { 551 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". 552 std::string String1 = R"(foo 553 {"bar":[]} 554 baz)"; 555 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)". 556 SmallString<128> String2; 557 String2 += String1.c_str(); 558 559 // Corner cases. 560 std::string String3 = R"(\ 561 \n 562 \\n 563 \\)"; 564 SmallString<128> String4; 565 String4 += String3.c_str(); 566 std::string String5 = R"(a\ 567 568 569 \\b)"; 570 SmallString<128> String6; 571 String6 += String5.c_str(); 572 573 String1 = Lexer::Stringify(StringRef(String1)); 574 Lexer::Stringify(String2); 575 String3 = Lexer::Stringify(StringRef(String3)); 576 Lexer::Stringify(String4); 577 String5 = Lexer::Stringify(StringRef(String5)); 578 Lexer::Stringify(String6); 579 580 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)"); 581 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)"); 582 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)"); 583 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)"); 584 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)"); 585 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)"); 586 } 587 588 TEST_F(LexerTest, CharRangeOffByOne) { 589 std::vector<Token> toks = Lex(R"(#define MOO 1 590 void foo() { MOO; })"); 591 const Token &moo = toks[5]; 592 593 EXPECT_EQ(getSourceText(moo, moo), "MOO"); 594 595 SourceRange R{moo.getLocation(), moo.getLocation()}; 596 597 EXPECT_TRUE( 598 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); 599 EXPECT_TRUE( 600 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); 601 602 CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts); 603 604 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO". 605 } 606 607 TEST_F(LexerTest, FindNextToken) { 608 Lex("int abcd = 0;\n" 609 "int xyz = abcd;\n"); 610 std::vector<std::string> GeneratedByNextToken; 611 SourceLocation Loc = 612 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); 613 while (true) { 614 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts); 615 ASSERT_TRUE(T.hasValue()); 616 if (T->is(tok::eof)) 617 break; 618 GeneratedByNextToken.push_back(getSourceText(*T, *T)); 619 Loc = T->getLocation(); 620 } 621 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int", 622 "xyz", "=", "abcd", ";")); 623 } 624 625 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { 626 TrivialModuleLoader ModLoader; 627 auto PP = CreatePP("", ModLoader); 628 while (1) { 629 Token tok; 630 PP->Lex(tok); 631 if (tok.is(tok::eof)) 632 break; 633 } 634 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), 635 1U); 636 } 637 638 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { 639 const llvm::StringLiteral Source = R"cpp( 640 // First line comment. 641 //* Second line comment which is ambigious. 642 )cpp"; 643 LangOpts.LineComment = false; 644 auto Toks = Lex(Source); 645 auto &SM = PP->getSourceManager(); 646 auto SrcBuffer = SM.getBufferData(SM.getMainFileID()); 647 Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(), 648 SrcBuffer.data(), SrcBuffer.data(), 649 SrcBuffer.data() + SrcBuffer.size()); 650 651 auto ToksView = llvm::makeArrayRef(Toks); 652 clang::Token T; 653 while (!L.LexFromRawLexer(T)) { 654 ASSERT_TRUE(!ToksView.empty()); 655 EXPECT_EQ(T.getKind(), ToksView.front().getKind()); 656 ToksView = ToksView.drop_front(); 657 } 658 EXPECT_TRUE(ToksView.empty()); 659 } 660 } // anonymous namespace 661