xref: /llvm-project/clang/unittests/Lex/LexerTest.cpp (revision 36f77e20d9aaaf93a9b00ec1bd6b7e3ceb4918b9)
1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/LiteralSupport.h"
22 #include "clang/Lex/MacroArgs.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/ModuleLoader.h"
25 #include "clang/Lex/Preprocessor.h"
26 #include "clang/Lex/PreprocessorOptions.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "gmock/gmock.h"
30 #include "gtest/gtest.h"
31 #include <memory>
32 #include <vector>
33 
34 namespace {
35 using namespace clang;
36 using testing::ElementsAre;
37 
38 // The test fixture.
39 class LexerTest : public ::testing::Test {
40 protected:
41   LexerTest()
42     : FileMgr(FileMgrOpts),
43       DiagID(new DiagnosticIDs()),
44       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
45       SourceMgr(Diags, FileMgr),
46       TargetOpts(new TargetOptions)
47   {
48     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
49     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
50   }
51 
52   std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
53                                          TrivialModuleLoader &ModLoader) {
54     std::unique_ptr<llvm::MemoryBuffer> Buf =
55         llvm::MemoryBuffer::getMemBuffer(Source);
56     SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
57 
58     HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
59                             Diags, LangOpts, Target.get());
60     std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
61         std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
62         HeaderInfo, ModLoader,
63         /*IILookup =*/nullptr,
64         /*OwnsHeaderSearch =*/false);
65     PP->Initialize(*Target);
66     PP->EnterMainSourceFile();
67     return PP;
68   }
69 
70   std::vector<Token> Lex(StringRef Source) {
71     TrivialModuleLoader ModLoader;
72     PP = CreatePP(Source, ModLoader);
73 
74     std::vector<Token> toks;
75     while (1) {
76       Token tok;
77       PP->Lex(tok);
78       if (tok.is(tok::eof))
79         break;
80       toks.push_back(tok);
81     }
82 
83     return toks;
84   }
85 
86   std::vector<Token> CheckLex(StringRef Source,
87                               ArrayRef<tok::TokenKind> ExpectedTokens) {
88     auto toks = Lex(Source);
89     EXPECT_EQ(ExpectedTokens.size(), toks.size());
90     for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
91       EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
92     }
93 
94     return toks;
95   }
96 
97   std::string getSourceText(Token Begin, Token End) {
98     bool Invalid;
99     StringRef Str =
100         Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
101                                     Begin.getLocation(), End.getLocation())),
102                              SourceMgr, LangOpts, &Invalid);
103     if (Invalid)
104       return "<INVALID>";
105     return std::string(Str);
106   }
107 
108   FileSystemOptions FileMgrOpts;
109   FileManager FileMgr;
110   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
111   DiagnosticsEngine Diags;
112   SourceManager SourceMgr;
113   LangOptions LangOpts;
114   std::shared_ptr<TargetOptions> TargetOpts;
115   IntrusiveRefCntPtr<TargetInfo> Target;
116   std::unique_ptr<Preprocessor> PP;
117 };
118 
119 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
120   std::vector<tok::TokenKind> ExpectedTokens;
121   ExpectedTokens.push_back(tok::identifier);
122   ExpectedTokens.push_back(tok::l_paren);
123   ExpectedTokens.push_back(tok::identifier);
124   ExpectedTokens.push_back(tok::r_paren);
125 
126   std::vector<Token> toks = CheckLex("#define M(x) x\n"
127                                      "M(f(M(i)))",
128                                      ExpectedTokens);
129 
130   EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
131 }
132 
133 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
134   std::vector<tok::TokenKind> ExpectedTokens;
135   ExpectedTokens.push_back(tok::identifier);
136   ExpectedTokens.push_back(tok::identifier);
137 
138   std::vector<Token> toks = CheckLex("#define M(x) x\n"
139                                      "M(M(i) c)",
140                                      ExpectedTokens);
141 
142   EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
143 }
144 
145 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
146   std::vector<tok::TokenKind> ExpectedTokens;
147   ExpectedTokens.push_back(tok::identifier);
148   ExpectedTokens.push_back(tok::identifier);
149   ExpectedTokens.push_back(tok::identifier);
150 
151   std::vector<Token> toks = CheckLex("#define M(x) x\n"
152                                      "M(c c M(i))",
153                                      ExpectedTokens);
154 
155   EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
156 }
157 
158 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
159   std::vector<tok::TokenKind> ExpectedTokens;
160   ExpectedTokens.push_back(tok::identifier);
161   ExpectedTokens.push_back(tok::identifier);
162   ExpectedTokens.push_back(tok::identifier);
163 
164   std::vector<Token> toks = CheckLex("#define M(x) x\n"
165                                      "M(M(i) c c)",
166                                      ExpectedTokens);
167 
168   EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
169 }
170 
171 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
172   std::vector<tok::TokenKind> ExpectedTokens;
173   ExpectedTokens.push_back(tok::identifier);
174   ExpectedTokens.push_back(tok::identifier);
175   ExpectedTokens.push_back(tok::identifier);
176   ExpectedTokens.push_back(tok::identifier);
177 
178   std::vector<Token> toks = CheckLex("#define M(x) x\n"
179                                      "M(c M(i)) M(M(i) c)",
180                                      ExpectedTokens);
181 
182   EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
183 }
184 
185 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
186   std::vector<tok::TokenKind> ExpectedTokens;
187   ExpectedTokens.push_back(tok::identifier);
188   ExpectedTokens.push_back(tok::l_paren);
189   ExpectedTokens.push_back(tok::identifier);
190   ExpectedTokens.push_back(tok::r_paren);
191 
192   std::vector<Token> toks = CheckLex("#define M(x) x\n"
193                                      "#define C(x) M(x##c)\n"
194                                      "M(f(C(i)))",
195                                      ExpectedTokens);
196 
197   EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
198 }
199 
200 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
201   std::vector<tok::TokenKind> ExpectedTokens;
202   ExpectedTokens.push_back(tok::identifier);
203   ExpectedTokens.push_back(tok::l_paren);
204   ExpectedTokens.push_back(tok::identifier);
205   ExpectedTokens.push_back(tok::r_paren);
206 
207   std::vector<Token> toks = CheckLex("#define M(x) x\n"
208                                      "f(M(M(i)))",
209                                      ExpectedTokens);
210   EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
211 }
212 
213 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
214   std::vector<tok::TokenKind> ExpectedTokens;
215   ExpectedTokens.push_back(tok::identifier);
216   ExpectedTokens.push_back(tok::l_paren);
217   ExpectedTokens.push_back(tok::identifier);
218   ExpectedTokens.push_back(tok::r_paren);
219 
220   std::vector<Token> toks = CheckLex("#define M(x) x\n"
221                                      "M(f(i))",
222                                      ExpectedTokens);
223   EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
224 }
225 
226 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
227   std::vector<tok::TokenKind> ExpectedTokens;
228   ExpectedTokens.push_back(tok::identifier);
229   ExpectedTokens.push_back(tok::l_paren);
230   ExpectedTokens.push_back(tok::identifier);
231   ExpectedTokens.push_back(tok::r_paren);
232 
233   std::vector<Token> toks = CheckLex("#define M(x) x\n"
234                                      "#define C(x) x\n"
235                                      "f(C(M(i)))",
236                                      ExpectedTokens);
237   EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
238 }
239 
240 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
241   std::vector<tok::TokenKind> ExpectedTokens;
242   ExpectedTokens.push_back(tok::identifier);
243   ExpectedTokens.push_back(tok::l_paren);
244   ExpectedTokens.push_back(tok::identifier);
245   ExpectedTokens.push_back(tok::identifier);
246   ExpectedTokens.push_back(tok::r_paren);
247 
248   std::vector<Token> toks = CheckLex("#define M(x) x\n"
249                                      "#define C(x) c x\n"
250                                      "f(C(M(i)))",
251                                      ExpectedTokens);
252   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
253 }
254 
255 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
256   std::vector<tok::TokenKind> ExpectedTokens;
257   ExpectedTokens.push_back(tok::identifier);
258   ExpectedTokens.push_back(tok::identifier);
259   ExpectedTokens.push_back(tok::l_paren);
260   ExpectedTokens.push_back(tok::identifier);
261   ExpectedTokens.push_back(tok::r_paren);
262 
263   std::vector<Token> toks = CheckLex("#define M(x) x\n"
264                                      "#define C(x) c M(x)\n"
265                                      "C(f(M(i)))",
266                                      ExpectedTokens);
267   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
268 }
269 
270 TEST_F(LexerTest, LexAPI) {
271   std::vector<tok::TokenKind> ExpectedTokens;
272   // Line 1 (after the #defines)
273   ExpectedTokens.push_back(tok::l_square);
274   ExpectedTokens.push_back(tok::identifier);
275   ExpectedTokens.push_back(tok::r_square);
276   ExpectedTokens.push_back(tok::l_square);
277   ExpectedTokens.push_back(tok::identifier);
278   ExpectedTokens.push_back(tok::r_square);
279   // Line 2
280   ExpectedTokens.push_back(tok::identifier);
281   ExpectedTokens.push_back(tok::identifier);
282   ExpectedTokens.push_back(tok::identifier);
283   ExpectedTokens.push_back(tok::identifier);
284 
285   std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
286                                      "#define N(x) x\n"
287                                      "#define INN(x) x\n"
288                                      "#define NOF1 INN(val)\n"
289                                      "#define NOF2 val\n"
290                                      "M(foo) N([bar])\n"
291                                      "N(INN(val)) N(NOF1) N(NOF2) N(val)",
292                                      ExpectedTokens);
293 
294   SourceLocation lsqrLoc = toks[0].getLocation();
295   SourceLocation idLoc = toks[1].getLocation();
296   SourceLocation rsqrLoc = toks[2].getLocation();
297   CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
298 
299   SourceLocation Loc;
300   EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
301   EXPECT_EQ(Loc, macroRange.getBegin());
302   EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
303   EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
304   EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
305   EXPECT_EQ(Loc, macroRange.getEnd());
306   EXPECT_TRUE(macroRange.isTokenRange());
307 
308   CharSourceRange range = Lexer::makeFileCharRange(
309            CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
310   EXPECT_TRUE(range.isInvalid());
311   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
312                                    SourceMgr, LangOpts);
313   EXPECT_TRUE(range.isInvalid());
314   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
315                                    SourceMgr, LangOpts);
316   EXPECT_TRUE(!range.isTokenRange());
317   EXPECT_EQ(range.getAsRange(),
318             SourceRange(macroRange.getBegin(),
319                         macroRange.getEnd().getLocWithOffset(1)));
320 
321   StringRef text = Lexer::getSourceText(
322                                CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
323                                SourceMgr, LangOpts);
324   EXPECT_EQ(text, "M(foo)");
325 
326   SourceLocation macroLsqrLoc = toks[3].getLocation();
327   SourceLocation macroIdLoc = toks[4].getLocation();
328   SourceLocation macroRsqrLoc = toks[5].getLocation();
329   SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
330   SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
331   SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
332 
333   range = Lexer::makeFileCharRange(
334       CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
335       SourceMgr, LangOpts);
336   EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
337             range.getAsRange());
338 
339   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
340                                    SourceMgr, LangOpts);
341   EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
342             range.getAsRange());
343 
344   macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
345   range = Lexer::makeFileCharRange(
346                      CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
347                      SourceMgr, LangOpts);
348   EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
349             range.getAsRange());
350 
351   text = Lexer::getSourceText(
352           CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
353           SourceMgr, LangOpts);
354   EXPECT_EQ(text, "[bar");
355 
356 
357   SourceLocation idLoc1 = toks[6].getLocation();
358   SourceLocation idLoc2 = toks[7].getLocation();
359   SourceLocation idLoc3 = toks[8].getLocation();
360   SourceLocation idLoc4 = toks[9].getLocation();
361   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
362   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
363   EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
364   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
365 }
366 
367 TEST_F(LexerTest, HandlesSplitTokens) {
368   std::vector<tok::TokenKind> ExpectedTokens;
369   // Line 1 (after the #defines)
370   ExpectedTokens.push_back(tok::identifier);
371   ExpectedTokens.push_back(tok::less);
372   ExpectedTokens.push_back(tok::identifier);
373   ExpectedTokens.push_back(tok::less);
374   ExpectedTokens.push_back(tok::greatergreater);
375   // Line 2
376   ExpectedTokens.push_back(tok::identifier);
377   ExpectedTokens.push_back(tok::less);
378   ExpectedTokens.push_back(tok::identifier);
379   ExpectedTokens.push_back(tok::less);
380   ExpectedTokens.push_back(tok::greatergreater);
381 
382   std::vector<Token> toks = CheckLex("#define TY ty\n"
383                                      "#define RANGLE ty<ty<>>\n"
384                                      "TY<ty<>>\n"
385                                      "RANGLE",
386                                      ExpectedTokens);
387 
388   SourceLocation outerTyLoc = toks[0].getLocation();
389   SourceLocation innerTyLoc = toks[2].getLocation();
390   SourceLocation gtgtLoc = toks[4].getLocation();
391   // Split the token to simulate the action of the parser and force creation of
392   // an `ExpansionTokenRange`.
393   SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
394 
395   // Verify that it only captures the first greater-then and not the second one.
396   CharSourceRange range = Lexer::makeFileCharRange(
397       CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
398       LangOpts);
399   EXPECT_TRUE(range.isCharRange());
400   EXPECT_EQ(range.getAsRange(),
401             SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
402 
403   // Verify case where range begins in a macro expansion.
404   range = Lexer::makeFileCharRange(
405       CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
406       LangOpts);
407   EXPECT_TRUE(range.isCharRange());
408   EXPECT_EQ(range.getAsRange(),
409             SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
410                         gtgtLoc.getLocWithOffset(1)));
411 
412   SourceLocation macroInnerTyLoc = toks[7].getLocation();
413   SourceLocation macroGtgtLoc = toks[9].getLocation();
414   // Split the token to simulate the action of the parser and force creation of
415   // an `ExpansionTokenRange`.
416   SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
417 
418   // Verify that it fails (because it only captures the first greater-then and
419   // not the second one, so it doesn't span the entire macro expansion).
420   range = Lexer::makeFileCharRange(
421       CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
422       SourceMgr, LangOpts);
423   EXPECT_TRUE(range.isInvalid());
424 }
425 
426 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
427   std::vector<Token> toks =
428       Lex("#define helper1 0\n"
429           "void helper2(const char *, ...);\n"
430           "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
431           "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
432           "void f1() { M2(\"a\", \"b\"); }");
433 
434   // Check the file corresponding to the "helper1" macro arg in M2.
435   //
436   // The lexer used to report its size as 31, meaning that the end of the
437   // expansion would be on the *next line* (just past `M2("a", "b")`). Make
438   // sure that we get the correct end location (the comma after "helper1").
439   SourceLocation helper1ArgLoc = toks[20].getLocation();
440   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
441 }
442 
443 TEST_F(LexerTest, DontOverallocateStringifyArgs) {
444   TrivialModuleLoader ModLoader;
445   auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
446 
447   llvm::BumpPtrAllocator Allocator;
448   std::array<IdentifierInfo *, 3> ParamList;
449   MacroInfo *MI = PP->AllocateMacroInfo({});
450   MI->setIsFunctionLike();
451   MI->setParameterList(ParamList, Allocator);
452   EXPECT_EQ(3u, MI->getNumParams());
453   EXPECT_TRUE(MI->isFunctionLike());
454 
455   Token Eof;
456   Eof.setKind(tok::eof);
457   std::vector<Token> ArgTokens;
458   while (1) {
459     Token tok;
460     PP->Lex(tok);
461     if (tok.is(tok::eof)) {
462       ArgTokens.push_back(Eof);
463       break;
464     }
465     if (tok.is(tok::comma))
466       ArgTokens.push_back(Eof);
467     else
468       ArgTokens.push_back(tok);
469   }
470 
471   auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
472   std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
473       MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
474   auto StringifyArg = [&](int ArgNo) {
475     return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
476                                  /*Charify=*/false, {}, {});
477   };
478   Token Result = StringifyArg(0);
479   EXPECT_EQ(tok::string_literal, Result.getKind());
480   EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
481   Result = StringifyArg(1);
482   EXPECT_EQ(tok::string_literal, Result.getKind());
483   EXPECT_STREQ("\"5\"", Result.getLiteralData());
484   Result = StringifyArg(2);
485   EXPECT_EQ(tok::string_literal, Result.getKind());
486   EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
487 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
488   EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
489 #endif
490 }
491 
492 TEST_F(LexerTest, IsNewLineEscapedValid) {
493   auto hasNewLineEscaped = [](const char *S) {
494     return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
495   };
496 
497   EXPECT_TRUE(hasNewLineEscaped("\\\r"));
498   EXPECT_TRUE(hasNewLineEscaped("\\\n"));
499   EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
500   EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
501   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
502   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
503 
504   EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
505   EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
506   EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
507   EXPECT_FALSE(hasNewLineEscaped("\r"));
508   EXPECT_FALSE(hasNewLineEscaped("\n"));
509   EXPECT_FALSE(hasNewLineEscaped("\r\n"));
510   EXPECT_FALSE(hasNewLineEscaped("\n\r"));
511   EXPECT_FALSE(hasNewLineEscaped("\r\r"));
512   EXPECT_FALSE(hasNewLineEscaped("\n\n"));
513 }
514 
515 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
516   // Each line should have the same length for
517   // further offset calculation to be more straightforward.
518   const unsigned IdentifierLength = 8;
519   std::string TextToLex = "rabarbar\n"
520                           "foo\\\nbar\n"
521                           "foo\\\rbar\n"
522                           "fo\\\r\nbar\n"
523                           "foo\\\n\rba\n";
524   std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
525   std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
526 
527   for (const Token &Tok : LexedTokens) {
528     std::pair<FileID, unsigned> OriginalLocation =
529         SourceMgr.getDecomposedLoc(Tok.getLocation());
530     for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
531       SourceLocation LookupLocation =
532           Tok.getLocation().getLocWithOffset(Offset);
533 
534       std::pair<FileID, unsigned> FoundLocation =
535           SourceMgr.getDecomposedExpansionLoc(
536               Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
537 
538       // Check that location returned by the GetBeginningOfToken
539       // is the same as original token location reported by Lexer.
540       EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
541     }
542   }
543 }
544 
545 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
546   EXPECT_TRUE(Lex("  //  \\\n").empty());
547   EXPECT_TRUE(Lex("#include <\\\\").empty());
548   EXPECT_TRUE(Lex("#include <\\\\\n").empty());
549 }
550 
551 TEST_F(LexerTest, StringizingRasString) {
552   // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
553   std::string String1 = R"(foo
554     {"bar":[]}
555     baz)";
556   // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
557   SmallString<128> String2;
558   String2 += String1.c_str();
559 
560   // Corner cases.
561   std::string String3 = R"(\
562     \n
563     \\n
564     \\)";
565   SmallString<128> String4;
566   String4 += String3.c_str();
567   std::string String5 = R"(a\
568 
569 
570     \\b)";
571   SmallString<128> String6;
572   String6 += String5.c_str();
573 
574   String1 = Lexer::Stringify(StringRef(String1));
575   Lexer::Stringify(String2);
576   String3 = Lexer::Stringify(StringRef(String3));
577   Lexer::Stringify(String4);
578   String5 = Lexer::Stringify(StringRef(String5));
579   Lexer::Stringify(String6);
580 
581   EXPECT_EQ(String1, R"(foo\n    {\"bar\":[]}\n    baz)");
582   EXPECT_EQ(String2, R"(foo\n    {\"bar\":[]}\n    baz)");
583   EXPECT_EQ(String3, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
584   EXPECT_EQ(String4, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
585   EXPECT_EQ(String5, R"(a\\\n\n\n    \\\\b)");
586   EXPECT_EQ(String6, R"(a\\\n\n\n    \\\\b)");
587 }
588 
589 TEST_F(LexerTest, CharRangeOffByOne) {
590   std::vector<Token> toks = Lex(R"(#define MOO 1
591     void foo() { MOO; })");
592   const Token &moo = toks[5];
593 
594   EXPECT_EQ(getSourceText(moo, moo), "MOO");
595 
596   SourceRange R{moo.getLocation(), moo.getLocation()};
597 
598   EXPECT_TRUE(
599       Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
600   EXPECT_TRUE(
601       Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
602 
603   CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
604 
605   EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
606 }
607 
608 TEST_F(LexerTest, FindNextToken) {
609   Lex("int abcd = 0;\n"
610       "int xyz = abcd;\n");
611   std::vector<std::string> GeneratedByNextToken;
612   SourceLocation Loc =
613       SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
614   while (true) {
615     auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
616     ASSERT_TRUE(T);
617     if (T->is(tok::eof))
618       break;
619     GeneratedByNextToken.push_back(getSourceText(*T, *T));
620     Loc = T->getLocation();
621   }
622   EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
623                                                 "xyz", "=", "abcd", ";"));
624 }
625 
626 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
627   TrivialModuleLoader ModLoader;
628   auto PP = CreatePP("", ModLoader);
629   while (1) {
630     Token tok;
631     PP->Lex(tok);
632     if (tok.is(tok::eof))
633       break;
634   }
635   EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
636             1U);
637 }
638 
639 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
640   const llvm::StringLiteral Source = R"cpp(
641   // First line comment.
642   //* Second line comment which is ambigious.
643   ; // Have a non-comment token to make sure something is lexed.
644   )cpp";
645   LangOpts.LineComment = false;
646   auto Toks = Lex(Source);
647   auto &SM = PP->getSourceManager();
648   auto SrcBuffer = SM.getBufferData(SM.getMainFileID());
649   Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(),
650           SrcBuffer.data(), SrcBuffer.data(),
651           SrcBuffer.data() + SrcBuffer.size());
652 
653   auto ToksView = llvm::makeArrayRef(Toks);
654   clang::Token T;
655   EXPECT_FALSE(ToksView.empty());
656   while (!L.LexFromRawLexer(T)) {
657     ASSERT_TRUE(!ToksView.empty());
658     EXPECT_EQ(T.getKind(), ToksView.front().getKind());
659     ToksView = ToksView.drop_front();
660   }
661   EXPECT_TRUE(ToksView.empty());
662 }
663 } // anonymous namespace
664