xref: /llvm-project/clang/unittests/Lex/LexerTest.cpp (revision ff77071a4d672fab7c8b30bea8525b89be8596fc)
1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/MacroArgs.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/ModuleLoader.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Lex/PreprocessorOptions.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include <memory>
31 #include <vector>
32 
33 namespace {
34 using namespace clang;
35 using testing::ElementsAre;
36 
37 // The test fixture.
38 class LexerTest : public ::testing::Test {
39 protected:
40   LexerTest()
41     : FileMgr(FileMgrOpts),
42       DiagID(new DiagnosticIDs()),
43       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
44       SourceMgr(Diags, FileMgr),
45       TargetOpts(new TargetOptions)
46   {
47     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
48     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
49   }
50 
51   std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
52                                          TrivialModuleLoader &ModLoader) {
53     std::unique_ptr<llvm::MemoryBuffer> Buf =
54         llvm::MemoryBuffer::getMemBuffer(Source);
55     SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
56 
57     HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
58                             Diags, LangOpts, Target.get());
59     std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
60         std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
61         HeaderInfo, ModLoader,
62         /*IILookup =*/nullptr,
63         /*OwnsHeaderSearch =*/false);
64     PP->Initialize(*Target);
65     PP->EnterMainSourceFile();
66     return PP;
67   }
68 
69   std::vector<Token> Lex(StringRef Source) {
70     TrivialModuleLoader ModLoader;
71     PP = CreatePP(Source, ModLoader);
72 
73     std::vector<Token> toks;
74     while (1) {
75       Token tok;
76       PP->Lex(tok);
77       if (tok.is(tok::eof))
78         break;
79       toks.push_back(tok);
80     }
81 
82     return toks;
83   }
84 
85   std::vector<Token> CheckLex(StringRef Source,
86                               ArrayRef<tok::TokenKind> ExpectedTokens) {
87     auto toks = Lex(Source);
88     EXPECT_EQ(ExpectedTokens.size(), toks.size());
89     for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
90       EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
91     }
92 
93     return toks;
94   }
95 
96   std::string getSourceText(Token Begin, Token End) {
97     bool Invalid;
98     StringRef Str =
99         Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
100                                     Begin.getLocation(), End.getLocation())),
101                              SourceMgr, LangOpts, &Invalid);
102     if (Invalid)
103       return "<INVALID>";
104     return std::string(Str);
105   }
106 
107   FileSystemOptions FileMgrOpts;
108   FileManager FileMgr;
109   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
110   DiagnosticsEngine Diags;
111   SourceManager SourceMgr;
112   LangOptions LangOpts;
113   std::shared_ptr<TargetOptions> TargetOpts;
114   IntrusiveRefCntPtr<TargetInfo> Target;
115   std::unique_ptr<Preprocessor> PP;
116 };
117 
118 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
119   std::vector<tok::TokenKind> ExpectedTokens;
120   ExpectedTokens.push_back(tok::identifier);
121   ExpectedTokens.push_back(tok::l_paren);
122   ExpectedTokens.push_back(tok::identifier);
123   ExpectedTokens.push_back(tok::r_paren);
124 
125   std::vector<Token> toks = CheckLex("#define M(x) x\n"
126                                      "M(f(M(i)))",
127                                      ExpectedTokens);
128 
129   EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
130 }
131 
132 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
133   std::vector<tok::TokenKind> ExpectedTokens;
134   ExpectedTokens.push_back(tok::identifier);
135   ExpectedTokens.push_back(tok::identifier);
136 
137   std::vector<Token> toks = CheckLex("#define M(x) x\n"
138                                      "M(M(i) c)",
139                                      ExpectedTokens);
140 
141   EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
142 }
143 
144 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
145   std::vector<tok::TokenKind> ExpectedTokens;
146   ExpectedTokens.push_back(tok::identifier);
147   ExpectedTokens.push_back(tok::identifier);
148   ExpectedTokens.push_back(tok::identifier);
149 
150   std::vector<Token> toks = CheckLex("#define M(x) x\n"
151                                      "M(c c M(i))",
152                                      ExpectedTokens);
153 
154   EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
155 }
156 
157 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
158   std::vector<tok::TokenKind> ExpectedTokens;
159   ExpectedTokens.push_back(tok::identifier);
160   ExpectedTokens.push_back(tok::identifier);
161   ExpectedTokens.push_back(tok::identifier);
162 
163   std::vector<Token> toks = CheckLex("#define M(x) x\n"
164                                      "M(M(i) c c)",
165                                      ExpectedTokens);
166 
167   EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
168 }
169 
170 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
171   std::vector<tok::TokenKind> ExpectedTokens;
172   ExpectedTokens.push_back(tok::identifier);
173   ExpectedTokens.push_back(tok::identifier);
174   ExpectedTokens.push_back(tok::identifier);
175   ExpectedTokens.push_back(tok::identifier);
176 
177   std::vector<Token> toks = CheckLex("#define M(x) x\n"
178                                      "M(c M(i)) M(M(i) c)",
179                                      ExpectedTokens);
180 
181   EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
182 }
183 
184 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
185   std::vector<tok::TokenKind> ExpectedTokens;
186   ExpectedTokens.push_back(tok::identifier);
187   ExpectedTokens.push_back(tok::l_paren);
188   ExpectedTokens.push_back(tok::identifier);
189   ExpectedTokens.push_back(tok::r_paren);
190 
191   std::vector<Token> toks = CheckLex("#define M(x) x\n"
192                                      "#define C(x) M(x##c)\n"
193                                      "M(f(C(i)))",
194                                      ExpectedTokens);
195 
196   EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
197 }
198 
199 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
200   std::vector<tok::TokenKind> ExpectedTokens;
201   ExpectedTokens.push_back(tok::identifier);
202   ExpectedTokens.push_back(tok::l_paren);
203   ExpectedTokens.push_back(tok::identifier);
204   ExpectedTokens.push_back(tok::r_paren);
205 
206   std::vector<Token> toks = CheckLex("#define M(x) x\n"
207                                      "f(M(M(i)))",
208                                      ExpectedTokens);
209   EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
210 }
211 
212 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
213   std::vector<tok::TokenKind> ExpectedTokens;
214   ExpectedTokens.push_back(tok::identifier);
215   ExpectedTokens.push_back(tok::l_paren);
216   ExpectedTokens.push_back(tok::identifier);
217   ExpectedTokens.push_back(tok::r_paren);
218 
219   std::vector<Token> toks = CheckLex("#define M(x) x\n"
220                                      "M(f(i))",
221                                      ExpectedTokens);
222   EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
223 }
224 
225 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
226   std::vector<tok::TokenKind> ExpectedTokens;
227   ExpectedTokens.push_back(tok::identifier);
228   ExpectedTokens.push_back(tok::l_paren);
229   ExpectedTokens.push_back(tok::identifier);
230   ExpectedTokens.push_back(tok::r_paren);
231 
232   std::vector<Token> toks = CheckLex("#define M(x) x\n"
233                                      "#define C(x) x\n"
234                                      "f(C(M(i)))",
235                                      ExpectedTokens);
236   EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
237 }
238 
239 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
240   std::vector<tok::TokenKind> ExpectedTokens;
241   ExpectedTokens.push_back(tok::identifier);
242   ExpectedTokens.push_back(tok::l_paren);
243   ExpectedTokens.push_back(tok::identifier);
244   ExpectedTokens.push_back(tok::identifier);
245   ExpectedTokens.push_back(tok::r_paren);
246 
247   std::vector<Token> toks = CheckLex("#define M(x) x\n"
248                                      "#define C(x) c x\n"
249                                      "f(C(M(i)))",
250                                      ExpectedTokens);
251   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
252 }
253 
254 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
255   std::vector<tok::TokenKind> ExpectedTokens;
256   ExpectedTokens.push_back(tok::identifier);
257   ExpectedTokens.push_back(tok::identifier);
258   ExpectedTokens.push_back(tok::l_paren);
259   ExpectedTokens.push_back(tok::identifier);
260   ExpectedTokens.push_back(tok::r_paren);
261 
262   std::vector<Token> toks = CheckLex("#define M(x) x\n"
263                                      "#define C(x) c M(x)\n"
264                                      "C(f(M(i)))",
265                                      ExpectedTokens);
266   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
267 }
268 
269 TEST_F(LexerTest, LexAPI) {
270   std::vector<tok::TokenKind> ExpectedTokens;
271   // Line 1 (after the #defines)
272   ExpectedTokens.push_back(tok::l_square);
273   ExpectedTokens.push_back(tok::identifier);
274   ExpectedTokens.push_back(tok::r_square);
275   ExpectedTokens.push_back(tok::l_square);
276   ExpectedTokens.push_back(tok::identifier);
277   ExpectedTokens.push_back(tok::r_square);
278   // Line 2
279   ExpectedTokens.push_back(tok::identifier);
280   ExpectedTokens.push_back(tok::identifier);
281   ExpectedTokens.push_back(tok::identifier);
282   ExpectedTokens.push_back(tok::identifier);
283 
284   std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
285                                      "#define N(x) x\n"
286                                      "#define INN(x) x\n"
287                                      "#define NOF1 INN(val)\n"
288                                      "#define NOF2 val\n"
289                                      "M(foo) N([bar])\n"
290                                      "N(INN(val)) N(NOF1) N(NOF2) N(val)",
291                                      ExpectedTokens);
292 
293   SourceLocation lsqrLoc = toks[0].getLocation();
294   SourceLocation idLoc = toks[1].getLocation();
295   SourceLocation rsqrLoc = toks[2].getLocation();
296   CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
297 
298   SourceLocation Loc;
299   EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
300   EXPECT_EQ(Loc, macroRange.getBegin());
301   EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
302   EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
303   EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
304   EXPECT_EQ(Loc, macroRange.getEnd());
305   EXPECT_TRUE(macroRange.isTokenRange());
306 
307   CharSourceRange range = Lexer::makeFileCharRange(
308            CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
309   EXPECT_TRUE(range.isInvalid());
310   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
311                                    SourceMgr, LangOpts);
312   EXPECT_TRUE(range.isInvalid());
313   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
314                                    SourceMgr, LangOpts);
315   EXPECT_TRUE(!range.isTokenRange());
316   EXPECT_EQ(range.getAsRange(),
317             SourceRange(macroRange.getBegin(),
318                         macroRange.getEnd().getLocWithOffset(1)));
319 
320   StringRef text = Lexer::getSourceText(
321                                CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
322                                SourceMgr, LangOpts);
323   EXPECT_EQ(text, "M(foo)");
324 
325   SourceLocation macroLsqrLoc = toks[3].getLocation();
326   SourceLocation macroIdLoc = toks[4].getLocation();
327   SourceLocation macroRsqrLoc = toks[5].getLocation();
328   SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
329   SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
330   SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
331 
332   range = Lexer::makeFileCharRange(
333       CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
334       SourceMgr, LangOpts);
335   EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
336             range.getAsRange());
337 
338   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
339                                    SourceMgr, LangOpts);
340   EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
341             range.getAsRange());
342 
343   macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
344   range = Lexer::makeFileCharRange(
345                      CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
346                      SourceMgr, LangOpts);
347   EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
348             range.getAsRange());
349 
350   text = Lexer::getSourceText(
351           CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
352           SourceMgr, LangOpts);
353   EXPECT_EQ(text, "[bar");
354 
355 
356   SourceLocation idLoc1 = toks[6].getLocation();
357   SourceLocation idLoc2 = toks[7].getLocation();
358   SourceLocation idLoc3 = toks[8].getLocation();
359   SourceLocation idLoc4 = toks[9].getLocation();
360   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
361   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
362   EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
363   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
364 }
365 
366 TEST_F(LexerTest, HandlesSplitTokens) {
367   std::vector<tok::TokenKind> ExpectedTokens;
368   // Line 1 (after the #defines)
369   ExpectedTokens.push_back(tok::identifier);
370   ExpectedTokens.push_back(tok::less);
371   ExpectedTokens.push_back(tok::identifier);
372   ExpectedTokens.push_back(tok::less);
373   ExpectedTokens.push_back(tok::greatergreater);
374   // Line 2
375   ExpectedTokens.push_back(tok::identifier);
376   ExpectedTokens.push_back(tok::less);
377   ExpectedTokens.push_back(tok::identifier);
378   ExpectedTokens.push_back(tok::less);
379   ExpectedTokens.push_back(tok::greatergreater);
380 
381   std::vector<Token> toks = CheckLex("#define TY ty\n"
382                                      "#define RANGLE ty<ty<>>\n"
383                                      "TY<ty<>>\n"
384                                      "RANGLE",
385                                      ExpectedTokens);
386 
387   SourceLocation outerTyLoc = toks[0].getLocation();
388   SourceLocation innerTyLoc = toks[2].getLocation();
389   SourceLocation gtgtLoc = toks[4].getLocation();
390   // Split the token to simulate the action of the parser and force creation of
391   // an `ExpansionTokenRange`.
392   SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
393 
394   // Verify that it only captures the first greater-then and not the second one.
395   CharSourceRange range = Lexer::makeFileCharRange(
396       CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
397       LangOpts);
398   EXPECT_TRUE(range.isCharRange());
399   EXPECT_EQ(range.getAsRange(),
400             SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
401 
402   // Verify case where range begins in a macro expansion.
403   range = Lexer::makeFileCharRange(
404       CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
405       LangOpts);
406   EXPECT_TRUE(range.isCharRange());
407   EXPECT_EQ(range.getAsRange(),
408             SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
409                         gtgtLoc.getLocWithOffset(1)));
410 
411   SourceLocation macroInnerTyLoc = toks[7].getLocation();
412   SourceLocation macroGtgtLoc = toks[9].getLocation();
413   // Split the token to simulate the action of the parser and force creation of
414   // an `ExpansionTokenRange`.
415   SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
416 
417   // Verify that it fails (because it only captures the first greater-then and
418   // not the second one, so it doesn't span the entire macro expansion).
419   range = Lexer::makeFileCharRange(
420       CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
421       SourceMgr, LangOpts);
422   EXPECT_TRUE(range.isInvalid());
423 }
424 
425 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
426   std::vector<Token> toks =
427       Lex("#define helper1 0\n"
428           "void helper2(const char *, ...);\n"
429           "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
430           "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
431           "void f1() { M2(\"a\", \"b\"); }");
432 
433   // Check the file corresponding to the "helper1" macro arg in M2.
434   //
435   // The lexer used to report its size as 31, meaning that the end of the
436   // expansion would be on the *next line* (just past `M2("a", "b")`). Make
437   // sure that we get the correct end location (the comma after "helper1").
438   SourceLocation helper1ArgLoc = toks[20].getLocation();
439   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
440 }
441 
442 TEST_F(LexerTest, DontOverallocateStringifyArgs) {
443   TrivialModuleLoader ModLoader;
444   auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
445 
446   llvm::BumpPtrAllocator Allocator;
447   std::array<IdentifierInfo *, 3> ParamList;
448   MacroInfo *MI = PP->AllocateMacroInfo({});
449   MI->setIsFunctionLike();
450   MI->setParameterList(ParamList, Allocator);
451   EXPECT_EQ(3u, MI->getNumParams());
452   EXPECT_TRUE(MI->isFunctionLike());
453 
454   Token Eof;
455   Eof.setKind(tok::eof);
456   std::vector<Token> ArgTokens;
457   while (1) {
458     Token tok;
459     PP->Lex(tok);
460     if (tok.is(tok::eof)) {
461       ArgTokens.push_back(Eof);
462       break;
463     }
464     if (tok.is(tok::comma))
465       ArgTokens.push_back(Eof);
466     else
467       ArgTokens.push_back(tok);
468   }
469 
470   auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
471   std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
472       MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
473   auto StringifyArg = [&](int ArgNo) {
474     return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
475                                  /*Charify=*/false, {}, {});
476   };
477   Token Result = StringifyArg(0);
478   EXPECT_EQ(tok::string_literal, Result.getKind());
479   EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
480   Result = StringifyArg(1);
481   EXPECT_EQ(tok::string_literal, Result.getKind());
482   EXPECT_STREQ("\"5\"", Result.getLiteralData());
483   Result = StringifyArg(2);
484   EXPECT_EQ(tok::string_literal, Result.getKind());
485   EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
486 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
487   EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
488 #endif
489 }
490 
491 TEST_F(LexerTest, IsNewLineEscapedValid) {
492   auto hasNewLineEscaped = [](const char *S) {
493     return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
494   };
495 
496   EXPECT_TRUE(hasNewLineEscaped("\\\r"));
497   EXPECT_TRUE(hasNewLineEscaped("\\\n"));
498   EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
499   EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
500   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
501   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
502 
503   EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
504   EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
505   EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
506   EXPECT_FALSE(hasNewLineEscaped("\r"));
507   EXPECT_FALSE(hasNewLineEscaped("\n"));
508   EXPECT_FALSE(hasNewLineEscaped("\r\n"));
509   EXPECT_FALSE(hasNewLineEscaped("\n\r"));
510   EXPECT_FALSE(hasNewLineEscaped("\r\r"));
511   EXPECT_FALSE(hasNewLineEscaped("\n\n"));
512 }
513 
514 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
515   // Each line should have the same length for
516   // further offset calculation to be more straightforward.
517   const unsigned IdentifierLength = 8;
518   std::string TextToLex = "rabarbar\n"
519                           "foo\\\nbar\n"
520                           "foo\\\rbar\n"
521                           "fo\\\r\nbar\n"
522                           "foo\\\n\rba\n";
523   std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
524   std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
525 
526   for (const Token &Tok : LexedTokens) {
527     std::pair<FileID, unsigned> OriginalLocation =
528         SourceMgr.getDecomposedLoc(Tok.getLocation());
529     for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
530       SourceLocation LookupLocation =
531           Tok.getLocation().getLocWithOffset(Offset);
532 
533       std::pair<FileID, unsigned> FoundLocation =
534           SourceMgr.getDecomposedExpansionLoc(
535               Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
536 
537       // Check that location returned by the GetBeginningOfToken
538       // is the same as original token location reported by Lexer.
539       EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
540     }
541   }
542 }
543 
544 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
545   EXPECT_TRUE(Lex("  //  \\\n").empty());
546   EXPECT_TRUE(Lex("#include <\\\\").empty());
547   EXPECT_TRUE(Lex("#include <\\\\\n").empty());
548 }
549 
550 TEST_F(LexerTest, StringizingRasString) {
551   // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
552   std::string String1 = R"(foo
553     {"bar":[]}
554     baz)";
555   // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
556   SmallString<128> String2;
557   String2 += String1.c_str();
558 
559   // Corner cases.
560   std::string String3 = R"(\
561     \n
562     \\n
563     \\)";
564   SmallString<128> String4;
565   String4 += String3.c_str();
566   std::string String5 = R"(a\
567 
568 
569     \\b)";
570   SmallString<128> String6;
571   String6 += String5.c_str();
572 
573   String1 = Lexer::Stringify(StringRef(String1));
574   Lexer::Stringify(String2);
575   String3 = Lexer::Stringify(StringRef(String3));
576   Lexer::Stringify(String4);
577   String5 = Lexer::Stringify(StringRef(String5));
578   Lexer::Stringify(String6);
579 
580   EXPECT_EQ(String1, R"(foo\n    {\"bar\":[]}\n    baz)");
581   EXPECT_EQ(String2, R"(foo\n    {\"bar\":[]}\n    baz)");
582   EXPECT_EQ(String3, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
583   EXPECT_EQ(String4, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
584   EXPECT_EQ(String5, R"(a\\\n\n\n    \\\\b)");
585   EXPECT_EQ(String6, R"(a\\\n\n\n    \\\\b)");
586 }
587 
588 TEST_F(LexerTest, CharRangeOffByOne) {
589   std::vector<Token> toks = Lex(R"(#define MOO 1
590     void foo() { MOO; })");
591   const Token &moo = toks[5];
592 
593   EXPECT_EQ(getSourceText(moo, moo), "MOO");
594 
595   SourceRange R{moo.getLocation(), moo.getLocation()};
596 
597   EXPECT_TRUE(
598       Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
599   EXPECT_TRUE(
600       Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
601 
602   CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
603 
604   EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
605 }
606 
607 TEST_F(LexerTest, FindNextToken) {
608   Lex("int abcd = 0;\n"
609       "int xyz = abcd;\n");
610   std::vector<std::string> GeneratedByNextToken;
611   SourceLocation Loc =
612       SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
613   while (true) {
614     auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
615     ASSERT_TRUE(T.hasValue());
616     if (T->is(tok::eof))
617       break;
618     GeneratedByNextToken.push_back(getSourceText(*T, *T));
619     Loc = T->getLocation();
620   }
621   EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
622                                                 "xyz", "=", "abcd", ";"));
623 }
624 
625 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
626   TrivialModuleLoader ModLoader;
627   auto PP = CreatePP("", ModLoader);
628   while (1) {
629     Token tok;
630     PP->Lex(tok);
631     if (tok.is(tok::eof))
632       break;
633   }
634   EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
635             1U);
636 }
637 
638 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
639   const llvm::StringLiteral Source = R"cpp(
640   // First line comment.
641   //* Second line comment which is ambigious.
642   )cpp";
643   LangOpts.LineComment = false;
644   auto Toks = Lex(Source);
645   auto &SM = PP->getSourceManager();
646   auto SrcBuffer = SM.getBufferData(SM.getMainFileID());
647   Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(),
648           SrcBuffer.data(), SrcBuffer.data(),
649           SrcBuffer.data() + SrcBuffer.size());
650 
651   auto ToksView = llvm::makeArrayRef(Toks);
652   clang::Token T;
653   while (!L.LexFromRawLexer(T)) {
654     ASSERT_TRUE(!ToksView.empty());
655     EXPECT_EQ(T.getKind(), ToksView.front().getKind());
656     ToksView = ToksView.drop_front();
657   }
658   EXPECT_TRUE(ToksView.empty());
659 }
660 } // anonymous namespace
661