xref: /llvm-project/clang/unittests/Lex/LexerTest.cpp (revision 079c40e8860ccbc80b5a04a26e474b2923d92d48)
1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "clang/Lex/Lexer.h"
11 #include "clang/Basic/Diagnostic.h"
12 #include "clang/Basic/DiagnosticOptions.h"
13 #include "clang/Basic/FileManager.h"
14 #include "clang/Basic/LangOptions.h"
15 #include "clang/Basic/MemoryBufferCache.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Basic/TargetInfo.h"
18 #include "clang/Basic/TargetOptions.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/ModuleLoader.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Lex/PreprocessorOptions.h"
24 #include "gtest/gtest.h"
25 
26 using namespace clang;
27 
28 namespace {
29 
30 class VoidModuleLoader : public ModuleLoader {
31   ModuleLoadResult loadModule(SourceLocation ImportLoc,
32                               ModuleIdPath Path,
33                               Module::NameVisibilityKind Visibility,
34                               bool IsInclusionDirective) override {
35     return ModuleLoadResult();
36   }
37 
38   void makeModuleVisible(Module *Mod,
39                          Module::NameVisibilityKind Visibility,
40                          SourceLocation ImportLoc) override { }
41 
42   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
43     { return nullptr; }
44   bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
45     { return 0; }
46 };
47 
48 // The test fixture.
49 class LexerTest : public ::testing::Test {
50 protected:
51   LexerTest()
52     : FileMgr(FileMgrOpts),
53       DiagID(new DiagnosticIDs()),
54       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
55       SourceMgr(Diags, FileMgr),
56       TargetOpts(new TargetOptions)
57   {
58     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
59     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
60   }
61 
62   std::vector<Token> Lex(StringRef Source) {
63     std::unique_ptr<llvm::MemoryBuffer> Buf =
64         llvm::MemoryBuffer::getMemBuffer(Source);
65     SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
66 
67     VoidModuleLoader ModLoader;
68     MemoryBufferCache PCMCache;
69     HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
70                             Diags, LangOpts, Target.get());
71     Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts,
72                     SourceMgr, PCMCache, HeaderInfo, ModLoader,
73                     /*IILookup =*/nullptr,
74                     /*OwnsHeaderSearch =*/false);
75     PP.Initialize(*Target);
76     PP.EnterMainSourceFile();
77 
78     std::vector<Token> toks;
79     while (1) {
80       Token tok;
81       PP.Lex(tok);
82       if (tok.is(tok::eof))
83         break;
84       toks.push_back(tok);
85     }
86 
87     return toks;
88   }
89 
90   std::vector<Token> CheckLex(StringRef Source,
91                               ArrayRef<tok::TokenKind> ExpectedTokens) {
92     auto toks = Lex(Source);
93     EXPECT_EQ(ExpectedTokens.size(), toks.size());
94     for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
95       EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
96     }
97 
98     return toks;
99   }
100 
101   std::string getSourceText(Token Begin, Token End) {
102     bool Invalid;
103     StringRef Str =
104         Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
105                                     Begin.getLocation(), End.getLocation())),
106                              SourceMgr, LangOpts, &Invalid);
107     if (Invalid)
108       return "<INVALID>";
109     return Str;
110   }
111 
112   FileSystemOptions FileMgrOpts;
113   FileManager FileMgr;
114   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
115   DiagnosticsEngine Diags;
116   SourceManager SourceMgr;
117   LangOptions LangOpts;
118   std::shared_ptr<TargetOptions> TargetOpts;
119   IntrusiveRefCntPtr<TargetInfo> Target;
120 };
121 
122 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
123   std::vector<tok::TokenKind> ExpectedTokens;
124   ExpectedTokens.push_back(tok::identifier);
125   ExpectedTokens.push_back(tok::l_paren);
126   ExpectedTokens.push_back(tok::identifier);
127   ExpectedTokens.push_back(tok::r_paren);
128 
129   std::vector<Token> toks = CheckLex("#define M(x) x\n"
130                                      "M(f(M(i)))",
131                                      ExpectedTokens);
132 
133   EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
134 }
135 
136 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
137   std::vector<tok::TokenKind> ExpectedTokens;
138   ExpectedTokens.push_back(tok::identifier);
139   ExpectedTokens.push_back(tok::identifier);
140 
141   std::vector<Token> toks = CheckLex("#define M(x) x\n"
142                                      "M(M(i) c)",
143                                      ExpectedTokens);
144 
145   EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
146 }
147 
148 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
149   std::vector<tok::TokenKind> ExpectedTokens;
150   ExpectedTokens.push_back(tok::identifier);
151   ExpectedTokens.push_back(tok::identifier);
152   ExpectedTokens.push_back(tok::identifier);
153 
154   std::vector<Token> toks = CheckLex("#define M(x) x\n"
155                                      "M(c c M(i))",
156                                      ExpectedTokens);
157 
158   EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
159 }
160 
161 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
162   std::vector<tok::TokenKind> ExpectedTokens;
163   ExpectedTokens.push_back(tok::identifier);
164   ExpectedTokens.push_back(tok::identifier);
165   ExpectedTokens.push_back(tok::identifier);
166 
167   std::vector<Token> toks = CheckLex("#define M(x) x\n"
168                                      "M(M(i) c c)",
169                                      ExpectedTokens);
170 
171   EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
172 }
173 
174 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
175   std::vector<tok::TokenKind> ExpectedTokens;
176   ExpectedTokens.push_back(tok::identifier);
177   ExpectedTokens.push_back(tok::identifier);
178   ExpectedTokens.push_back(tok::identifier);
179   ExpectedTokens.push_back(tok::identifier);
180 
181   std::vector<Token> toks = CheckLex("#define M(x) x\n"
182                                      "M(c M(i)) M(M(i) c)",
183                                      ExpectedTokens);
184 
185   EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
186 }
187 
188 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
189   std::vector<tok::TokenKind> ExpectedTokens;
190   ExpectedTokens.push_back(tok::identifier);
191   ExpectedTokens.push_back(tok::l_paren);
192   ExpectedTokens.push_back(tok::identifier);
193   ExpectedTokens.push_back(tok::r_paren);
194 
195   std::vector<Token> toks = CheckLex("#define M(x) x\n"
196                                      "#define C(x) M(x##c)\n"
197                                      "M(f(C(i)))",
198                                      ExpectedTokens);
199 
200   EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
201 }
202 
203 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
204   std::vector<tok::TokenKind> ExpectedTokens;
205   ExpectedTokens.push_back(tok::identifier);
206   ExpectedTokens.push_back(tok::l_paren);
207   ExpectedTokens.push_back(tok::identifier);
208   ExpectedTokens.push_back(tok::r_paren);
209 
210   std::vector<Token> toks = CheckLex("#define M(x) x\n"
211                                      "f(M(M(i)))",
212                                      ExpectedTokens);
213   EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
214 }
215 
216 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
217   std::vector<tok::TokenKind> ExpectedTokens;
218   ExpectedTokens.push_back(tok::identifier);
219   ExpectedTokens.push_back(tok::l_paren);
220   ExpectedTokens.push_back(tok::identifier);
221   ExpectedTokens.push_back(tok::r_paren);
222 
223   std::vector<Token> toks = CheckLex("#define M(x) x\n"
224                                      "M(f(i))",
225                                      ExpectedTokens);
226   EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
227 }
228 
229 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
230   std::vector<tok::TokenKind> ExpectedTokens;
231   ExpectedTokens.push_back(tok::identifier);
232   ExpectedTokens.push_back(tok::l_paren);
233   ExpectedTokens.push_back(tok::identifier);
234   ExpectedTokens.push_back(tok::r_paren);
235 
236   std::vector<Token> toks = CheckLex("#define M(x) x\n"
237                                      "#define C(x) x\n"
238                                      "f(C(M(i)))",
239                                      ExpectedTokens);
240   EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
241 }
242 
243 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
244   std::vector<tok::TokenKind> ExpectedTokens;
245   ExpectedTokens.push_back(tok::identifier);
246   ExpectedTokens.push_back(tok::l_paren);
247   ExpectedTokens.push_back(tok::identifier);
248   ExpectedTokens.push_back(tok::identifier);
249   ExpectedTokens.push_back(tok::r_paren);
250 
251   std::vector<Token> toks = CheckLex("#define M(x) x\n"
252                                      "#define C(x) c x\n"
253                                      "f(C(M(i)))",
254                                      ExpectedTokens);
255   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
256 }
257 
258 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
259   std::vector<tok::TokenKind> ExpectedTokens;
260   ExpectedTokens.push_back(tok::identifier);
261   ExpectedTokens.push_back(tok::identifier);
262   ExpectedTokens.push_back(tok::l_paren);
263   ExpectedTokens.push_back(tok::identifier);
264   ExpectedTokens.push_back(tok::r_paren);
265 
266   std::vector<Token> toks = CheckLex("#define M(x) x\n"
267                                      "#define C(x) c M(x)\n"
268                                      "C(f(M(i)))",
269                                      ExpectedTokens);
270   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
271 }
272 
273 TEST_F(LexerTest, LexAPI) {
274   std::vector<tok::TokenKind> ExpectedTokens;
275   ExpectedTokens.push_back(tok::l_square);
276   ExpectedTokens.push_back(tok::identifier);
277   ExpectedTokens.push_back(tok::r_square);
278   ExpectedTokens.push_back(tok::l_square);
279   ExpectedTokens.push_back(tok::identifier);
280   ExpectedTokens.push_back(tok::r_square);
281   ExpectedTokens.push_back(tok::identifier);
282   ExpectedTokens.push_back(tok::identifier);
283   ExpectedTokens.push_back(tok::identifier);
284   ExpectedTokens.push_back(tok::identifier);
285 
286   std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
287                                      "#define N(x) x\n"
288                                      "#define INN(x) x\n"
289                                      "#define NOF1 INN(val)\n"
290                                      "#define NOF2 val\n"
291                                      "M(foo) N([bar])\n"
292                                      "N(INN(val)) N(NOF1) N(NOF2) N(val)",
293                                      ExpectedTokens);
294 
295   SourceLocation lsqrLoc = toks[0].getLocation();
296   SourceLocation idLoc = toks[1].getLocation();
297   SourceLocation rsqrLoc = toks[2].getLocation();
298   std::pair<SourceLocation,SourceLocation>
299     macroPair = SourceMgr.getExpansionRange(lsqrLoc);
300   SourceRange macroRange = SourceRange(macroPair.first, macroPair.second);
301 
302   SourceLocation Loc;
303   EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
304   EXPECT_EQ(Loc, macroRange.getBegin());
305   EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
306   EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
307   EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
308   EXPECT_EQ(Loc, macroRange.getEnd());
309 
310   CharSourceRange range = Lexer::makeFileCharRange(
311            CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
312   EXPECT_TRUE(range.isInvalid());
313   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
314                                    SourceMgr, LangOpts);
315   EXPECT_TRUE(range.isInvalid());
316   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
317                                    SourceMgr, LangOpts);
318   EXPECT_TRUE(!range.isTokenRange());
319   EXPECT_EQ(range.getAsRange(),
320             SourceRange(macroRange.getBegin(),
321                         macroRange.getEnd().getLocWithOffset(1)));
322 
323   StringRef text = Lexer::getSourceText(
324                                CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
325                                SourceMgr, LangOpts);
326   EXPECT_EQ(text, "M(foo)");
327 
328   SourceLocation macroLsqrLoc = toks[3].getLocation();
329   SourceLocation macroIdLoc = toks[4].getLocation();
330   SourceLocation macroRsqrLoc = toks[5].getLocation();
331   SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
332   SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
333   SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
334 
335   range = Lexer::makeFileCharRange(
336       CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
337       SourceMgr, LangOpts);
338   EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
339             range.getAsRange());
340 
341   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
342                                    SourceMgr, LangOpts);
343   EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
344             range.getAsRange());
345 
346   macroPair = SourceMgr.getExpansionRange(macroLsqrLoc);
347   range = Lexer::makeFileCharRange(
348                      CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
349                      SourceMgr, LangOpts);
350   EXPECT_EQ(SourceRange(macroPair.first, macroPair.second.getLocWithOffset(1)),
351             range.getAsRange());
352 
353   text = Lexer::getSourceText(
354           CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
355           SourceMgr, LangOpts);
356   EXPECT_EQ(text, "[bar");
357 
358 
359   SourceLocation idLoc1 = toks[6].getLocation();
360   SourceLocation idLoc2 = toks[7].getLocation();
361   SourceLocation idLoc3 = toks[8].getLocation();
362   SourceLocation idLoc4 = toks[9].getLocation();
363   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
364   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
365   EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
366   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
367 }
368 
369 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
370   std::vector<Token> toks =
371       Lex("#define helper1 0\n"
372           "void helper2(const char *, ...);\n"
373           "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
374           "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
375           "void f1() { M2(\"a\", \"b\"); }");
376 
377   // Check the file corresponding to the "helper1" macro arg in M2.
378   //
379   // The lexer used to report its size as 31, meaning that the end of the
380   // expansion would be on the *next line* (just past `M2("a", "b")`). Make
381   // sure that we get the correct end location (the comma after "helper1").
382   SourceLocation helper1ArgLoc = toks[20].getLocation();
383   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
384 }
385 
386 } // anonymous namespace
387