xref: /llvm-project/clang/unittests/Lex/LexerTest.cpp (revision fbd86d05fe51d45f19df8d63aee41d979c268f8f)
1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/LiteralSupport.h"
22 #include "clang/Lex/MacroArgs.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/ModuleLoader.h"
25 #include "clang/Lex/Preprocessor.h"
26 #include "clang/Lex/PreprocessorOptions.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Testing/Annotations/Annotations.h"
30 #include "gmock/gmock.h"
31 #include "gtest/gtest.h"
32 #include <memory>
33 #include <string>
34 #include <vector>
35 
36 namespace {
37 using namespace clang;
38 using testing::ElementsAre;
39 
40 // The test fixture.
41 class LexerTest : public ::testing::Test {
42 protected:
43   LexerTest()
44     : FileMgr(FileMgrOpts),
45       DiagID(new DiagnosticIDs()),
46       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
47       SourceMgr(Diags, FileMgr),
48       TargetOpts(new TargetOptions)
49   {
50     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
51     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
52   }
53 
54   std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
55                                          TrivialModuleLoader &ModLoader) {
56     std::unique_ptr<llvm::MemoryBuffer> Buf =
57         llvm::MemoryBuffer::getMemBuffer(Source);
58     SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
59 
60     HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
61                             Diags, LangOpts, Target.get());
62     std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
63         std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
64         HeaderInfo, ModLoader,
65         /*IILookup =*/nullptr,
66         /*OwnsHeaderSearch =*/false);
67     PP->Initialize(*Target);
68     PP->EnterMainSourceFile();
69     return PP;
70   }
71 
72   std::vector<Token> Lex(StringRef Source) {
73     TrivialModuleLoader ModLoader;
74     PP = CreatePP(Source, ModLoader);
75 
76     std::vector<Token> toks;
77     PP->LexTokensUntilEOF(&toks);
78 
79     return toks;
80   }
81 
82   std::vector<Token> CheckLex(StringRef Source,
83                               ArrayRef<tok::TokenKind> ExpectedTokens) {
84     auto toks = Lex(Source);
85     EXPECT_EQ(ExpectedTokens.size(), toks.size());
86     for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
87       EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
88     }
89 
90     return toks;
91   }
92 
93   std::string getSourceText(Token Begin, Token End) {
94     bool Invalid;
95     StringRef Str =
96         Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
97                                     Begin.getLocation(), End.getLocation())),
98                              SourceMgr, LangOpts, &Invalid);
99     if (Invalid)
100       return "<INVALID>";
101     return std::string(Str);
102   }
103 
104   FileSystemOptions FileMgrOpts;
105   FileManager FileMgr;
106   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
107   DiagnosticsEngine Diags;
108   SourceManager SourceMgr;
109   LangOptions LangOpts;
110   std::shared_ptr<TargetOptions> TargetOpts;
111   IntrusiveRefCntPtr<TargetInfo> Target;
112   std::unique_ptr<Preprocessor> PP;
113 };
114 
115 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
116   std::vector<tok::TokenKind> ExpectedTokens;
117   ExpectedTokens.push_back(tok::identifier);
118   ExpectedTokens.push_back(tok::l_paren);
119   ExpectedTokens.push_back(tok::identifier);
120   ExpectedTokens.push_back(tok::r_paren);
121 
122   std::vector<Token> toks = CheckLex("#define M(x) x\n"
123                                      "M(f(M(i)))",
124                                      ExpectedTokens);
125 
126   EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
127 }
128 
129 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
130   std::vector<tok::TokenKind> ExpectedTokens;
131   ExpectedTokens.push_back(tok::identifier);
132   ExpectedTokens.push_back(tok::identifier);
133 
134   std::vector<Token> toks = CheckLex("#define M(x) x\n"
135                                      "M(M(i) c)",
136                                      ExpectedTokens);
137 
138   EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
139 }
140 
141 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
142   std::vector<tok::TokenKind> ExpectedTokens;
143   ExpectedTokens.push_back(tok::identifier);
144   ExpectedTokens.push_back(tok::identifier);
145   ExpectedTokens.push_back(tok::identifier);
146 
147   std::vector<Token> toks = CheckLex("#define M(x) x\n"
148                                      "M(c c M(i))",
149                                      ExpectedTokens);
150 
151   EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
152 }
153 
154 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
155   std::vector<tok::TokenKind> ExpectedTokens;
156   ExpectedTokens.push_back(tok::identifier);
157   ExpectedTokens.push_back(tok::identifier);
158   ExpectedTokens.push_back(tok::identifier);
159 
160   std::vector<Token> toks = CheckLex("#define M(x) x\n"
161                                      "M(M(i) c c)",
162                                      ExpectedTokens);
163 
164   EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
165 }
166 
167 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
168   std::vector<tok::TokenKind> ExpectedTokens;
169   ExpectedTokens.push_back(tok::identifier);
170   ExpectedTokens.push_back(tok::identifier);
171   ExpectedTokens.push_back(tok::identifier);
172   ExpectedTokens.push_back(tok::identifier);
173 
174   std::vector<Token> toks = CheckLex("#define M(x) x\n"
175                                      "M(c M(i)) M(M(i) c)",
176                                      ExpectedTokens);
177 
178   EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
179 }
180 
181 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
182   std::vector<tok::TokenKind> ExpectedTokens;
183   ExpectedTokens.push_back(tok::identifier);
184   ExpectedTokens.push_back(tok::l_paren);
185   ExpectedTokens.push_back(tok::identifier);
186   ExpectedTokens.push_back(tok::r_paren);
187 
188   std::vector<Token> toks = CheckLex("#define M(x) x\n"
189                                      "#define C(x) M(x##c)\n"
190                                      "M(f(C(i)))",
191                                      ExpectedTokens);
192 
193   EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
194 }
195 
196 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
197   std::vector<tok::TokenKind> ExpectedTokens;
198   ExpectedTokens.push_back(tok::identifier);
199   ExpectedTokens.push_back(tok::l_paren);
200   ExpectedTokens.push_back(tok::identifier);
201   ExpectedTokens.push_back(tok::r_paren);
202 
203   std::vector<Token> toks = CheckLex("#define M(x) x\n"
204                                      "f(M(M(i)))",
205                                      ExpectedTokens);
206   EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
207 }
208 
209 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
210   std::vector<tok::TokenKind> ExpectedTokens;
211   ExpectedTokens.push_back(tok::identifier);
212   ExpectedTokens.push_back(tok::l_paren);
213   ExpectedTokens.push_back(tok::identifier);
214   ExpectedTokens.push_back(tok::r_paren);
215 
216   std::vector<Token> toks = CheckLex("#define M(x) x\n"
217                                      "M(f(i))",
218                                      ExpectedTokens);
219   EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
220 }
221 
222 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
223   std::vector<tok::TokenKind> ExpectedTokens;
224   ExpectedTokens.push_back(tok::identifier);
225   ExpectedTokens.push_back(tok::l_paren);
226   ExpectedTokens.push_back(tok::identifier);
227   ExpectedTokens.push_back(tok::r_paren);
228 
229   std::vector<Token> toks = CheckLex("#define M(x) x\n"
230                                      "#define C(x) x\n"
231                                      "f(C(M(i)))",
232                                      ExpectedTokens);
233   EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
234 }
235 
236 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
237   std::vector<tok::TokenKind> ExpectedTokens;
238   ExpectedTokens.push_back(tok::identifier);
239   ExpectedTokens.push_back(tok::l_paren);
240   ExpectedTokens.push_back(tok::identifier);
241   ExpectedTokens.push_back(tok::identifier);
242   ExpectedTokens.push_back(tok::r_paren);
243 
244   std::vector<Token> toks = CheckLex("#define M(x) x\n"
245                                      "#define C(x) c x\n"
246                                      "f(C(M(i)))",
247                                      ExpectedTokens);
248   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
249 }
250 
251 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
252   std::vector<tok::TokenKind> ExpectedTokens;
253   ExpectedTokens.push_back(tok::identifier);
254   ExpectedTokens.push_back(tok::identifier);
255   ExpectedTokens.push_back(tok::l_paren);
256   ExpectedTokens.push_back(tok::identifier);
257   ExpectedTokens.push_back(tok::r_paren);
258 
259   std::vector<Token> toks = CheckLex("#define M(x) x\n"
260                                      "#define C(x) c M(x)\n"
261                                      "C(f(M(i)))",
262                                      ExpectedTokens);
263   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
264 }
265 
266 TEST_F(LexerTest, LexAPI) {
267   std::vector<tok::TokenKind> ExpectedTokens;
268   // Line 1 (after the #defines)
269   ExpectedTokens.push_back(tok::l_square);
270   ExpectedTokens.push_back(tok::identifier);
271   ExpectedTokens.push_back(tok::r_square);
272   ExpectedTokens.push_back(tok::l_square);
273   ExpectedTokens.push_back(tok::identifier);
274   ExpectedTokens.push_back(tok::r_square);
275   // Line 2
276   ExpectedTokens.push_back(tok::identifier);
277   ExpectedTokens.push_back(tok::identifier);
278   ExpectedTokens.push_back(tok::identifier);
279   ExpectedTokens.push_back(tok::identifier);
280 
281   std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
282                                      "#define N(x) x\n"
283                                      "#define INN(x) x\n"
284                                      "#define NOF1 INN(val)\n"
285                                      "#define NOF2 val\n"
286                                      "M(foo) N([bar])\n"
287                                      "N(INN(val)) N(NOF1) N(NOF2) N(val)",
288                                      ExpectedTokens);
289 
290   SourceLocation lsqrLoc = toks[0].getLocation();
291   SourceLocation idLoc = toks[1].getLocation();
292   SourceLocation rsqrLoc = toks[2].getLocation();
293   CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
294 
295   SourceLocation Loc;
296   EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
297   EXPECT_EQ(Loc, macroRange.getBegin());
298   EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
299   EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
300   EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
301   EXPECT_EQ(Loc, macroRange.getEnd());
302   EXPECT_TRUE(macroRange.isTokenRange());
303 
304   CharSourceRange range = Lexer::makeFileCharRange(
305            CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
306   EXPECT_TRUE(range.isInvalid());
307   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
308                                    SourceMgr, LangOpts);
309   EXPECT_TRUE(range.isInvalid());
310   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
311                                    SourceMgr, LangOpts);
312   EXPECT_TRUE(!range.isTokenRange());
313   EXPECT_EQ(range.getAsRange(),
314             SourceRange(macroRange.getBegin(),
315                         macroRange.getEnd().getLocWithOffset(1)));
316 
317   StringRef text = Lexer::getSourceText(
318                                CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
319                                SourceMgr, LangOpts);
320   EXPECT_EQ(text, "M(foo)");
321 
322   SourceLocation macroLsqrLoc = toks[3].getLocation();
323   SourceLocation macroIdLoc = toks[4].getLocation();
324   SourceLocation macroRsqrLoc = toks[5].getLocation();
325   SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
326   SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
327   SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
328 
329   range = Lexer::makeFileCharRange(
330       CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
331       SourceMgr, LangOpts);
332   EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
333             range.getAsRange());
334 
335   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
336                                    SourceMgr, LangOpts);
337   EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
338             range.getAsRange());
339 
340   macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
341   range = Lexer::makeFileCharRange(
342                      CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
343                      SourceMgr, LangOpts);
344   EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
345             range.getAsRange());
346 
347   text = Lexer::getSourceText(
348           CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
349           SourceMgr, LangOpts);
350   EXPECT_EQ(text, "[bar");
351 
352 
353   SourceLocation idLoc1 = toks[6].getLocation();
354   SourceLocation idLoc2 = toks[7].getLocation();
355   SourceLocation idLoc3 = toks[8].getLocation();
356   SourceLocation idLoc4 = toks[9].getLocation();
357   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
358   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
359   EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
360   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
361 }
362 
363 TEST_F(LexerTest, HandlesSplitTokens) {
364   std::vector<tok::TokenKind> ExpectedTokens;
365   // Line 1 (after the #defines)
366   ExpectedTokens.push_back(tok::identifier);
367   ExpectedTokens.push_back(tok::less);
368   ExpectedTokens.push_back(tok::identifier);
369   ExpectedTokens.push_back(tok::less);
370   ExpectedTokens.push_back(tok::greatergreater);
371   // Line 2
372   ExpectedTokens.push_back(tok::identifier);
373   ExpectedTokens.push_back(tok::less);
374   ExpectedTokens.push_back(tok::identifier);
375   ExpectedTokens.push_back(tok::less);
376   ExpectedTokens.push_back(tok::greatergreater);
377 
378   std::vector<Token> toks = CheckLex("#define TY ty\n"
379                                      "#define RANGLE ty<ty<>>\n"
380                                      "TY<ty<>>\n"
381                                      "RANGLE",
382                                      ExpectedTokens);
383 
384   SourceLocation outerTyLoc = toks[0].getLocation();
385   SourceLocation innerTyLoc = toks[2].getLocation();
386   SourceLocation gtgtLoc = toks[4].getLocation();
387   // Split the token to simulate the action of the parser and force creation of
388   // an `ExpansionTokenRange`.
389   SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
390 
391   // Verify that it only captures the first greater-then and not the second one.
392   CharSourceRange range = Lexer::makeFileCharRange(
393       CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
394       LangOpts);
395   EXPECT_TRUE(range.isCharRange());
396   EXPECT_EQ(range.getAsRange(),
397             SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
398 
399   // Verify case where range begins in a macro expansion.
400   range = Lexer::makeFileCharRange(
401       CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
402       LangOpts);
403   EXPECT_TRUE(range.isCharRange());
404   EXPECT_EQ(range.getAsRange(),
405             SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
406                         gtgtLoc.getLocWithOffset(1)));
407 
408   SourceLocation macroInnerTyLoc = toks[7].getLocation();
409   SourceLocation macroGtgtLoc = toks[9].getLocation();
410   // Split the token to simulate the action of the parser and force creation of
411   // an `ExpansionTokenRange`.
412   SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
413 
414   // Verify that it fails (because it only captures the first greater-then and
415   // not the second one, so it doesn't span the entire macro expansion).
416   range = Lexer::makeFileCharRange(
417       CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
418       SourceMgr, LangOpts);
419   EXPECT_TRUE(range.isInvalid());
420 }
421 
422 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
423   std::vector<Token> toks =
424       Lex("#define helper1 0\n"
425           "void helper2(const char *, ...);\n"
426           "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
427           "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
428           "void f1() { M2(\"a\", \"b\"); }");
429 
430   // Check the file corresponding to the "helper1" macro arg in M2.
431   //
432   // The lexer used to report its size as 31, meaning that the end of the
433   // expansion would be on the *next line* (just past `M2("a", "b")`). Make
434   // sure that we get the correct end location (the comma after "helper1").
435   SourceLocation helper1ArgLoc = toks[20].getLocation();
436   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
437 }
438 
439 TEST_F(LexerTest, DontOverallocateStringifyArgs) {
440   TrivialModuleLoader ModLoader;
441   auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
442 
443   llvm::BumpPtrAllocator Allocator;
444   std::array<IdentifierInfo *, 3> ParamList;
445   MacroInfo *MI = PP->AllocateMacroInfo({});
446   MI->setIsFunctionLike();
447   MI->setParameterList(ParamList, Allocator);
448   EXPECT_EQ(3u, MI->getNumParams());
449   EXPECT_TRUE(MI->isFunctionLike());
450 
451   Token Eof;
452   Eof.setKind(tok::eof);
453   std::vector<Token> ArgTokens;
454   while (1) {
455     Token tok;
456     PP->Lex(tok);
457     if (tok.is(tok::eof)) {
458       ArgTokens.push_back(Eof);
459       break;
460     }
461     if (tok.is(tok::comma))
462       ArgTokens.push_back(Eof);
463     else
464       ArgTokens.push_back(tok);
465   }
466 
467   auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
468   std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
469       MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
470   auto StringifyArg = [&](int ArgNo) {
471     return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
472                                  /*Charify=*/false, {}, {});
473   };
474   Token Result = StringifyArg(0);
475   EXPECT_EQ(tok::string_literal, Result.getKind());
476   EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
477   Result = StringifyArg(1);
478   EXPECT_EQ(tok::string_literal, Result.getKind());
479   EXPECT_STREQ("\"5\"", Result.getLiteralData());
480   Result = StringifyArg(2);
481   EXPECT_EQ(tok::string_literal, Result.getKind());
482   EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
483 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
484   EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
485 #endif
486 }
487 
488 TEST_F(LexerTest, IsNewLineEscapedValid) {
489   auto hasNewLineEscaped = [](const char *S) {
490     return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
491   };
492 
493   EXPECT_TRUE(hasNewLineEscaped("\\\r"));
494   EXPECT_TRUE(hasNewLineEscaped("\\\n"));
495   EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
496   EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
497   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
498   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
499 
500   EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
501   EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
502   EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
503   EXPECT_FALSE(hasNewLineEscaped("\r"));
504   EXPECT_FALSE(hasNewLineEscaped("\n"));
505   EXPECT_FALSE(hasNewLineEscaped("\r\n"));
506   EXPECT_FALSE(hasNewLineEscaped("\n\r"));
507   EXPECT_FALSE(hasNewLineEscaped("\r\r"));
508   EXPECT_FALSE(hasNewLineEscaped("\n\n"));
509 }
510 
511 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
512   // Each line should have the same length for
513   // further offset calculation to be more straightforward.
514   const unsigned IdentifierLength = 8;
515   std::string TextToLex = "rabarbar\n"
516                           "foo\\\nbar\n"
517                           "foo\\\rbar\n"
518                           "fo\\\r\nbar\n"
519                           "foo\\\n\rba\n";
520   std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
521   std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
522 
523   for (const Token &Tok : LexedTokens) {
524     std::pair<FileID, unsigned> OriginalLocation =
525         SourceMgr.getDecomposedLoc(Tok.getLocation());
526     for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
527       SourceLocation LookupLocation =
528           Tok.getLocation().getLocWithOffset(Offset);
529 
530       std::pair<FileID, unsigned> FoundLocation =
531           SourceMgr.getDecomposedExpansionLoc(
532               Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
533 
534       // Check that location returned by the GetBeginningOfToken
535       // is the same as original token location reported by Lexer.
536       EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
537     }
538   }
539 }
540 
541 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
542   EXPECT_TRUE(Lex("  //  \\\n").empty());
543   EXPECT_TRUE(Lex("#include <\\\\").empty());
544   EXPECT_TRUE(Lex("#include <\\\\\n").empty());
545 }
546 
547 TEST_F(LexerTest, StringizingRasString) {
548   // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
549   std::string String1 = R"(foo
550     {"bar":[]}
551     baz)";
552   // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
553   SmallString<128> String2;
554   String2 += String1.c_str();
555 
556   // Corner cases.
557   std::string String3 = R"(\
558     \n
559     \\n
560     \\)";
561   SmallString<128> String4;
562   String4 += String3.c_str();
563   std::string String5 = R"(a\
564 
565 
566     \\b)";
567   SmallString<128> String6;
568   String6 += String5.c_str();
569 
570   String1 = Lexer::Stringify(StringRef(String1));
571   Lexer::Stringify(String2);
572   String3 = Lexer::Stringify(StringRef(String3));
573   Lexer::Stringify(String4);
574   String5 = Lexer::Stringify(StringRef(String5));
575   Lexer::Stringify(String6);
576 
577   EXPECT_EQ(String1, R"(foo\n    {\"bar\":[]}\n    baz)");
578   EXPECT_EQ(String2, R"(foo\n    {\"bar\":[]}\n    baz)");
579   EXPECT_EQ(String3, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
580   EXPECT_EQ(String4, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
581   EXPECT_EQ(String5, R"(a\\\n\n\n    \\\\b)");
582   EXPECT_EQ(String6, R"(a\\\n\n\n    \\\\b)");
583 }
584 
585 TEST_F(LexerTest, CharRangeOffByOne) {
586   std::vector<Token> toks = Lex(R"(#define MOO 1
587     void foo() { MOO; })");
588   const Token &moo = toks[5];
589 
590   EXPECT_EQ(getSourceText(moo, moo), "MOO");
591 
592   SourceRange R{moo.getLocation(), moo.getLocation()};
593 
594   EXPECT_TRUE(
595       Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
596   EXPECT_TRUE(
597       Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
598 
599   CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
600 
601   EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
602 }
603 
604 TEST_F(LexerTest, FindNextToken) {
605   Lex("int abcd = 0;\n"
606       "// A comment.\n"
607       "int xyz = abcd;\n");
608   std::vector<std::string> GeneratedByNextToken;
609   SourceLocation Loc =
610       SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
611   while (true) {
612     auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
613     ASSERT_TRUE(T);
614     if (T->is(tok::eof))
615       break;
616     GeneratedByNextToken.push_back(getSourceText(*T, *T));
617     Loc = T->getLocation();
618   }
619   EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
620                                                 "xyz", "=", "abcd", ";"));
621 }
622 
623 TEST_F(LexerTest, FindNextTokenIncludingComments) {
624   Lex("int abcd = 0;\n"
625       "// A comment.\n"
626       "int xyz = abcd;\n");
627   std::vector<std::string> GeneratedByNextToken;
628   SourceLocation Loc =
629       SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
630   while (true) {
631     auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts, true);
632     ASSERT_TRUE(T);
633     if (T->is(tok::eof))
634       break;
635     GeneratedByNextToken.push_back(getSourceText(*T, *T));
636     Loc = T->getLocation();
637   }
638   EXPECT_THAT(GeneratedByNextToken,
639               ElementsAre("abcd", "=", "0", ";", "// A comment.", "int", "xyz",
640                           "=", "abcd", ";"));
641 }
642 
643 TEST_F(LexerTest, FindPreviousToken) {
644   Lex("int abcd = 0;\n"
645       "// A comment.\n"
646       "int xyz = abcd;\n");
647   std::vector<std::string> GeneratedByPrevToken;
648   SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
649   while (true) {
650     auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, false);
651     if (!T.has_value())
652       break;
653     GeneratedByPrevToken.push_back(getSourceText(*T, *T));
654     Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
655   }
656   EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
657                                                 ";", "0", "=", "abcd", "int"));
658 }
659 
660 TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
661   Lex("int abcd = 0;\n"
662       "// A comment.\n"
663       "int xyz = abcd;\n");
664   std::vector<std::string> GeneratedByPrevToken;
665   SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
666   while (true) {
667     auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, true);
668     if (!T.has_value())
669       break;
670     GeneratedByPrevToken.push_back(getSourceText(*T, *T));
671     Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
672   }
673   EXPECT_THAT(GeneratedByPrevToken,
674               ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
675                           "0", "=", "abcd", "int"));
676 }
677 
678 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
679   TrivialModuleLoader ModLoader;
680   auto PP = CreatePP("", ModLoader);
681   PP->LexTokensUntilEOF();
682   EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
683             1U);
684 }
685 
686 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
687   const llvm::StringLiteral Source = R"cpp(
688   // First line comment.
689   //* Second line comment which is ambigious.
690   ; // Have a non-comment token to make sure something is lexed.
691   )cpp";
692   LangOpts.LineComment = false;
693   auto Toks = Lex(Source);
694   auto &SM = PP->getSourceManager();
695   auto SrcBuffer = SM.getBufferData(SM.getMainFileID());
696   Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(),
697           SrcBuffer.data(), SrcBuffer.data(),
698           SrcBuffer.data() + SrcBuffer.size());
699 
700   auto ToksView = llvm::ArrayRef(Toks);
701   clang::Token T;
702   EXPECT_FALSE(ToksView.empty());
703   while (!L.LexFromRawLexer(T)) {
704     ASSERT_TRUE(!ToksView.empty());
705     EXPECT_EQ(T.getKind(), ToksView.front().getKind());
706     ToksView = ToksView.drop_front();
707   }
708   EXPECT_TRUE(ToksView.empty());
709 }
710 
711 TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) {
712   const llvm::StringLiteral Source = R"cc(
713   #define ONE \
714   1
715 
716   int i = ONE;
717   )cc";
718   std::vector<Token> Toks =
719       CheckLex(Source, {tok::kw_int, tok::identifier, tok::equal,
720                         tok::numeric_constant, tok::semi});
721 
722   // Set up by getting the raw token for the `1` in the macro definition.
723   const Token &OneExpanded = Toks[3];
724   Token Tok;
725   ASSERT_FALSE(
726       Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts));
727   // The `ONE`.
728   ASSERT_EQ(Tok.getKind(), tok::raw_identifier);
729   ASSERT_FALSE(
730       Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()),
731                          Tok, SourceMgr, LangOpts));
732   // The `1` in the macro definition.
733   ASSERT_EQ(Tok.getKind(), tok::numeric_constant);
734 
735   // Go back 4 characters: two spaces, one newline, and the backslash.
736   SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(-4);
737   // Expect true (=failure) because the whitespace immediately after the
738   // escaped newline is not ignored.
739   EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts,
740                                  /*IgnoreWhiteSpace=*/false));
741 }
742 
743 TEST(LexerPreambleTest, PreambleBounds) {
744   std::vector<std::string> Cases = {
745       R"cc([[
746         #include <foo>
747         ]]int bar;
748       )cc",
749       R"cc([[
750         #include <foo>
751       ]])cc",
752       R"cc([[
753         // leading comment
754         #include <foo>
755         ]]// trailing comment
756         int bar;
757       )cc",
758       R"cc([[
759         module;
760         #include <foo>
761         ]]module bar;
762         int x;
763       )cc",
764   };
765   for (const auto& Case : Cases) {
766     llvm::Annotations A(Case);
767     clang::LangOptions LangOpts;
768     LangOpts.CPlusPlusModules = true;
769     auto Bounds = Lexer::ComputePreamble(A.code(), LangOpts);
770     EXPECT_EQ(Bounds.Size, A.range().End) << Case;
771   }
772 }
773 
774 } // anonymous namespace
775