xref: /llvm-project/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp (revision 71f557355ddaea358c43b151de3a0e045aaa0863)
1 //===-- SourceCodeTests.cpp  ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Annotations.h"
9 #include "Protocol.h"
10 #include "SourceCode.h"
11 #include "TestTU.h"
12 #include "support/Context.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/TokenKinds.h"
16 #include "clang/Format/Format.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Testing/Support/Annotations.h"
19 #include "llvm/Testing/Support/Error.h"
20 #include "gmock/gmock.h"
21 #include "gtest/gtest.h"
22 #include <optional>
23 #include <tuple>
24 
25 namespace clang {
26 namespace clangd {
27 namespace {
28 
29 using llvm::Failed;
30 using llvm::FailedWithMessage;
31 using llvm::HasValue;
32 
33 MATCHER_P2(Pos, Line, Col, "") {
34   return arg.line == int(Line) && arg.character == int(Col);
35 }
36 
37 MATCHER_P(macroName, Name, "") { return arg.Name == Name; }
38 
39 /// A helper to make tests easier to read.
40 Position position(int Line, int Character) {
41   Position Pos;
42   Pos.line = Line;
43   Pos.character = Character;
44   return Pos;
45 }
46 
47 TEST(SourceCodeTests, lspLength) {
48   EXPECT_EQ(lspLength(""), 0UL);
49   EXPECT_EQ(lspLength("ascii"), 5UL);
50   // BMP
51   EXPECT_EQ(lspLength("↓"), 1UL);
52   EXPECT_EQ(lspLength("¥"), 1UL);
53   // astral
54   EXPECT_EQ(lspLength("��"), 2UL);
55 
56   WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8);
57   EXPECT_EQ(lspLength(""), 0UL);
58   EXPECT_EQ(lspLength("ascii"), 5UL);
59   // BMP
60   EXPECT_EQ(lspLength("↓"), 3UL);
61   EXPECT_EQ(lspLength("¥"), 2UL);
62   // astral
63   EXPECT_EQ(lspLength("��"), 4UL);
64 
65   WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32);
66   EXPECT_EQ(lspLength(""), 0UL);
67   EXPECT_EQ(lspLength("ascii"), 5UL);
68   // BMP
69   EXPECT_EQ(lspLength("↓"), 1UL);
70   EXPECT_EQ(lspLength("¥"), 1UL);
71   // astral
72   EXPECT_EQ(lspLength("��"), 1UL);
73 }
74 
75 TEST(SourceCodeTests, lspLengthBadUTF8) {
76   // Results are not well-defined if source file isn't valid UTF-8.
77   // However we shouldn't crash or return something totally wild.
78   const char *BadUTF8[] = {"\xa0", "\xff\xff\xff\xff\xff"};
79 
80   for (OffsetEncoding Encoding :
81        {OffsetEncoding::UTF8, OffsetEncoding::UTF16, OffsetEncoding::UTF32}) {
82     WithContextValue UTF32(kCurrentOffsetEncoding, Encoding);
83     for (const char *Bad : BadUTF8) {
84       EXPECT_GE(lspLength(Bad), 0u);
85       EXPECT_LE(lspLength(Bad), strlen(Bad));
86     }
87   }
88 }
89 
90 // The = → �� below are ASCII (1 byte), BMP (3 bytes), and astral (4 bytes).
91 const char File[] = R"(0:0 = 0
92 1:0 → 8
93 2:0 �� 18)";
94 struct Line {
95   unsigned Number;
96   unsigned Offset;
97   unsigned Length;
98 };
99 Line FileLines[] = {Line{0, 0, 7}, Line{1, 8, 9}, Line{2, 18, 11}};
100 
101 TEST(SourceCodeTests, PositionToOffset) {
102   // line out of bounds
103   EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed());
104   // first line
105   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, -1)),
106                        llvm::Failed()); // out of range
107   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 0)),
108                        llvm::HasValue(0)); // first character
109   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 3)),
110                        llvm::HasValue(3)); // middle character
111   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 6)),
112                        llvm::HasValue(6)); // last character
113   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7)),
114                        llvm::HasValue(7)); // the newline itself
115   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7), false),
116                        llvm::HasValue(7));
117   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8)),
118                        llvm::HasValue(7)); // out of range
119   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8), false),
120                        llvm::Failed()); // out of range
121   // middle line
122   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, -1)),
123                        llvm::Failed()); // out of range
124   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 0)),
125                        llvm::HasValue(8)); // first character
126   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3)),
127                        llvm::HasValue(11)); // middle character
128   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3), false),
129                        llvm::HasValue(11));
130   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 6)),
131                        llvm::HasValue(16)); // last character
132   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 7)),
133                        llvm::HasValue(17)); // the newline itself
134   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8)),
135                        llvm::HasValue(17)); // out of range
136   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8), false),
137                        llvm::Failed()); // out of range
138   // last line
139   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, -1)),
140                        llvm::Failed()); // out of range
141   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 0)),
142                        llvm::HasValue(18)); // first character
143   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 3)),
144                        llvm::HasValue(21)); // middle character
145   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 5), false),
146                        llvm::Failed()); // middle of surrogate pair
147   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 5)),
148                        llvm::HasValue(26)); // middle of surrogate pair
149   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 6), false),
150                        llvm::HasValue(26)); // end of surrogate pair
151   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 8)),
152                        llvm::HasValue(28)); // last character
153   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 9)),
154                        llvm::HasValue(29)); // EOF
155   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 10), false),
156                        llvm::Failed()); // out of range
157   // line out of bounds
158   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed());
159   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 1)), llvm::Failed());
160 
161   // Codepoints are similar, except near astral characters.
162   WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32);
163   // line out of bounds
164   EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed());
165   // first line
166   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, -1)),
167                        llvm::Failed()); // out of range
168   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 0)),
169                        llvm::HasValue(0)); // first character
170   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 3)),
171                        llvm::HasValue(3)); // middle character
172   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 6)),
173                        llvm::HasValue(6)); // last character
174   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7)),
175                        llvm::HasValue(7)); // the newline itself
176   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7), false),
177                        llvm::HasValue(7));
178   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8)),
179                        llvm::HasValue(7)); // out of range
180   EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8), false),
181                        llvm::Failed()); // out of range
182   // middle line
183   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, -1)),
184                        llvm::Failed()); // out of range
185   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 0)),
186                        llvm::HasValue(8)); // first character
187   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3)),
188                        llvm::HasValue(11)); // middle character
189   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3), false),
190                        llvm::HasValue(11));
191   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 6)),
192                        llvm::HasValue(16)); // last character
193   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 7)),
194                        llvm::HasValue(17)); // the newline itself
195   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8)),
196                        llvm::HasValue(17)); // out of range
197   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8), false),
198                        llvm::Failed()); // out of range
199   // last line
200   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, -1)),
201                        llvm::Failed()); // out of range
202   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 0)),
203                        llvm::HasValue(18)); // first character
204   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 4)),
205                        llvm::HasValue(22)); // Before astral character.
206   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 5), false),
207                        llvm::HasValue(26)); // after astral character
208   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 7)),
209                        llvm::HasValue(28)); // last character
210   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 8)),
211                        llvm::HasValue(29)); // EOF
212   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 9), false),
213                        llvm::Failed()); // out of range
214   // line out of bounds
215   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed());
216   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 1)), llvm::Failed());
217 
218   // Test UTF-8, where transformations are trivial.
219   WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8);
220   EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed());
221   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed());
222   for (Line L : FileLines) {
223     EXPECT_THAT_EXPECTED(positionToOffset(File, position(L.Number, -1)),
224                          llvm::Failed()); // out of range
225     for (unsigned I = 0; I <= L.Length; ++I)
226       EXPECT_THAT_EXPECTED(positionToOffset(File, position(L.Number, I)),
227                            llvm::HasValue(L.Offset + I));
228     EXPECT_THAT_EXPECTED(
229         positionToOffset(File, position(L.Number, L.Length + 1)),
230         llvm::HasValue(L.Offset + L.Length));
231     EXPECT_THAT_EXPECTED(
232         positionToOffset(File, position(L.Number, L.Length + 1), false),
233         llvm::Failed()); // out of range
234   }
235 }
236 
237 TEST(SourceCodeTests, OffsetToPosition) {
238   EXPECT_THAT(offsetToPosition(File, 0), Pos(0, 0)) << "start of file";
239   EXPECT_THAT(offsetToPosition(File, 3), Pos(0, 3)) << "in first line";
240   EXPECT_THAT(offsetToPosition(File, 6), Pos(0, 6)) << "end of first line";
241   EXPECT_THAT(offsetToPosition(File, 7), Pos(0, 7)) << "first newline";
242   EXPECT_THAT(offsetToPosition(File, 8), Pos(1, 0)) << "start of second line";
243   EXPECT_THAT(offsetToPosition(File, 12), Pos(1, 4)) << "before BMP char";
244   EXPECT_THAT(offsetToPosition(File, 13), Pos(1, 5)) << "in BMP char";
245   EXPECT_THAT(offsetToPosition(File, 15), Pos(1, 5)) << "after BMP char";
246   EXPECT_THAT(offsetToPosition(File, 16), Pos(1, 6)) << "end of second line";
247   EXPECT_THAT(offsetToPosition(File, 17), Pos(1, 7)) << "second newline";
248   EXPECT_THAT(offsetToPosition(File, 18), Pos(2, 0)) << "start of last line";
249   EXPECT_THAT(offsetToPosition(File, 21), Pos(2, 3)) << "in last line";
250   EXPECT_THAT(offsetToPosition(File, 22), Pos(2, 4)) << "before astral char";
251   EXPECT_THAT(offsetToPosition(File, 24), Pos(2, 6)) << "in astral char";
252   EXPECT_THAT(offsetToPosition(File, 26), Pos(2, 6)) << "after astral char";
253   EXPECT_THAT(offsetToPosition(File, 28), Pos(2, 8)) << "end of last line";
254   EXPECT_THAT(offsetToPosition(File, 29), Pos(2, 9)) << "EOF";
255   EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 9)) << "out of bounds";
256 
257   // Codepoints are similar, except near astral characters.
258   WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32);
259   EXPECT_THAT(offsetToPosition(File, 0), Pos(0, 0)) << "start of file";
260   EXPECT_THAT(offsetToPosition(File, 3), Pos(0, 3)) << "in first line";
261   EXPECT_THAT(offsetToPosition(File, 6), Pos(0, 6)) << "end of first line";
262   EXPECT_THAT(offsetToPosition(File, 7), Pos(0, 7)) << "first newline";
263   EXPECT_THAT(offsetToPosition(File, 8), Pos(1, 0)) << "start of second line";
264   EXPECT_THAT(offsetToPosition(File, 12), Pos(1, 4)) << "before BMP char";
265   EXPECT_THAT(offsetToPosition(File, 13), Pos(1, 5)) << "in BMP char";
266   EXPECT_THAT(offsetToPosition(File, 15), Pos(1, 5)) << "after BMP char";
267   EXPECT_THAT(offsetToPosition(File, 16), Pos(1, 6)) << "end of second line";
268   EXPECT_THAT(offsetToPosition(File, 17), Pos(1, 7)) << "second newline";
269   EXPECT_THAT(offsetToPosition(File, 18), Pos(2, 0)) << "start of last line";
270   EXPECT_THAT(offsetToPosition(File, 21), Pos(2, 3)) << "in last line";
271   EXPECT_THAT(offsetToPosition(File, 22), Pos(2, 4)) << "before astral char";
272   EXPECT_THAT(offsetToPosition(File, 24), Pos(2, 5)) << "in astral char";
273   EXPECT_THAT(offsetToPosition(File, 26), Pos(2, 5)) << "after astral char";
274   EXPECT_THAT(offsetToPosition(File, 28), Pos(2, 7)) << "end of last line";
275   EXPECT_THAT(offsetToPosition(File, 29), Pos(2, 8)) << "EOF";
276   EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 8)) << "out of bounds";
277 
278   WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8);
279   for (Line L : FileLines) {
280     for (unsigned I = 0; I <= L.Length; ++I)
281       EXPECT_THAT(offsetToPosition(File, L.Offset + I), Pos(L.Number, I));
282   }
283   EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 11)) << "out of bounds";
284 }
285 
286 TEST(SourceCodeTests, SourceLocationInMainFile) {
287   Annotations Source(R"cpp(
288     ^in^t ^foo
289     ^bar
290     ^baz ^() {}  {} {} {} { }^
291 )cpp");
292 
293   SourceManagerForFile Owner("foo.cpp", Source.code());
294   SourceManager &SM = Owner.get();
295 
296   SourceLocation StartOfFile = SM.getLocForStartOfFile(SM.getMainFileID());
297   EXPECT_THAT_EXPECTED(sourceLocationInMainFile(SM, position(0, 0)),
298                        HasValue(StartOfFile));
299   // End of file.
300   EXPECT_THAT_EXPECTED(
301       sourceLocationInMainFile(SM, position(4, 0)),
302       HasValue(StartOfFile.getLocWithOffset(Source.code().size())));
303   // Column number is too large.
304   EXPECT_THAT_EXPECTED(sourceLocationInMainFile(SM, position(0, 1)), Failed());
305   EXPECT_THAT_EXPECTED(sourceLocationInMainFile(SM, position(0, 100)),
306                        Failed());
307   EXPECT_THAT_EXPECTED(sourceLocationInMainFile(SM, position(4, 1)), Failed());
308   // Line number is too large.
309   EXPECT_THAT_EXPECTED(sourceLocationInMainFile(SM, position(5, 0)), Failed());
310   // Check all positions mentioned in the test return valid results.
311   for (auto P : Source.points()) {
312     size_t Offset = llvm::cantFail(positionToOffset(Source.code(), P));
313     EXPECT_THAT_EXPECTED(sourceLocationInMainFile(SM, P),
314                          HasValue(StartOfFile.getLocWithOffset(Offset)));
315   }
316 }
317 
318 TEST(SourceCodeTests, isReservedName) {
319   EXPECT_FALSE(isReservedName(""));
320   EXPECT_FALSE(isReservedName("_"));
321   EXPECT_FALSE(isReservedName("foo"));
322   EXPECT_FALSE(isReservedName("_foo"));
323   EXPECT_TRUE(isReservedName("__foo"));
324   EXPECT_TRUE(isReservedName("_Foo"));
325   EXPECT_FALSE(isReservedName("foo__bar")) << "FIXME";
326 }
327 
328 TEST(SourceCodeTests, CollectIdentifiers) {
329   auto Style = format::getLLVMStyle();
330   auto IDs = collectIdentifiers(R"cpp(
331   #include "a.h"
332   void foo() { int xyz; int abc = xyz; return foo(); }
333   )cpp",
334                                 Style);
335   EXPECT_EQ(IDs.size(), 7u);
336   EXPECT_EQ(IDs["include"], 1u);
337   EXPECT_EQ(IDs["void"], 1u);
338   EXPECT_EQ(IDs["int"], 2u);
339   EXPECT_EQ(IDs["xyz"], 2u);
340   EXPECT_EQ(IDs["abc"], 1u);
341   EXPECT_EQ(IDs["return"], 1u);
342   EXPECT_EQ(IDs["foo"], 2u);
343 }
344 
345 TEST(SourceCodeTests, CollectWords) {
346   auto Words = collectWords(R"cpp(
347   #define FIZZ_BUZZ
348   // this is a comment
349   std::string getSomeText() { return "magic word"; }
350   )cpp");
351   std::set<StringRef> ActualWords(Words.keys().begin(), Words.keys().end());
352   std::set<StringRef> ExpectedWords = {"define",  "fizz",   "buzz", "this",
353                                        "comment", "string", "some", "text",
354                                        "return",  "magic",  "word"};
355   EXPECT_EQ(ActualWords, ExpectedWords);
356 }
357 
358 class SpelledWordsTest : public ::testing::Test {
359   llvm::Optional<ParsedAST> AST;
360 
361   llvm::Optional<SpelledWord> tryWord(const char *Text) {
362     llvm::Annotations A(Text);
363     auto TU = TestTU::withCode(A.code());
364     AST = TU.build();
365     auto SW = SpelledWord::touching(
366         AST->getSourceManager().getComposedLoc(
367             AST->getSourceManager().getMainFileID(), A.point()),
368         AST->getTokens(), AST->getLangOpts());
369     if (A.ranges().size()) {
370       llvm::StringRef Want = A.code().slice(A.range().Begin, A.range().End);
371       EXPECT_EQ(Want, SW->Text) << Text;
372     }
373     return SW;
374   }
375 
376 protected:
377   SpelledWord word(const char *Text) {
378     auto Result = tryWord(Text);
379     EXPECT_TRUE(Result) << Text;
380     return Result.value_or(SpelledWord());
381   }
382 
383   void noWord(const char *Text) { EXPECT_FALSE(tryWord(Text)) << Text; }
384 };
385 
386 TEST_F(SpelledWordsTest, HeuristicBoundaries) {
387   word("// [[^foo]] ");
388   word("// [[f^oo]] ");
389   word("// [[foo^]] ");
390   word("// [[foo^]]+bar ");
391   noWord("//^ foo ");
392   noWord("// foo ^");
393 }
394 
395 TEST_F(SpelledWordsTest, LikelyIdentifier) {
396   EXPECT_FALSE(word("// ^foo ").LikelyIdentifier);
397   EXPECT_TRUE(word("// [[^foo_bar]] ").LikelyIdentifier);
398   EXPECT_TRUE(word("// [[^fooBar]] ").LikelyIdentifier);
399   EXPECT_FALSE(word("// H^TTP ").LikelyIdentifier);
400   EXPECT_TRUE(word("// \\p [[^foo]] ").LikelyIdentifier);
401   EXPECT_TRUE(word("// @param[in] [[^foo]] ").LikelyIdentifier);
402   EXPECT_TRUE(word("// `[[f^oo]]` ").LikelyIdentifier);
403   EXPECT_TRUE(word("// bar::[[f^oo]] ").LikelyIdentifier);
404   EXPECT_TRUE(word("// [[f^oo]]::bar ").LikelyIdentifier);
405 }
406 
407 TEST_F(SpelledWordsTest, Comment) {
408   auto W = word("// [[^foo]]");
409   EXPECT_FALSE(W.PartOfSpelledToken);
410   EXPECT_FALSE(W.SpelledToken);
411   EXPECT_FALSE(W.ExpandedToken);
412 }
413 
414 TEST_F(SpelledWordsTest, PartOfString) {
415   auto W = word(R"( auto str = "foo [[^bar]] baz"; )");
416   ASSERT_TRUE(W.PartOfSpelledToken);
417   EXPECT_EQ(W.PartOfSpelledToken->kind(), tok::string_literal);
418   EXPECT_FALSE(W.SpelledToken);
419   EXPECT_FALSE(W.ExpandedToken);
420 }
421 
422 TEST_F(SpelledWordsTest, DisabledSection) {
423   auto W = word(R"cpp(
424     #if 0
425     foo [[^bar]] baz
426     #endif
427     )cpp");
428   ASSERT_TRUE(W.SpelledToken);
429   EXPECT_EQ(W.SpelledToken->kind(), tok::identifier);
430   EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken);
431   EXPECT_FALSE(W.ExpandedToken);
432 }
433 
434 TEST_F(SpelledWordsTest, Macros) {
435   auto W = word(R"cpp(
436     #define ID(X) X
437     ID(int [[^i]]);
438     )cpp");
439   ASSERT_TRUE(W.SpelledToken);
440   EXPECT_EQ(W.SpelledToken->kind(), tok::identifier);
441   EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken);
442   ASSERT_TRUE(W.ExpandedToken);
443   EXPECT_EQ(W.ExpandedToken->kind(), tok::identifier);
444 
445   W = word(R"cpp(
446     #define OBJECT Expansion;
447     int [[^OBJECT]];
448     )cpp");
449   EXPECT_TRUE(W.SpelledToken);
450   EXPECT_FALSE(W.ExpandedToken) << "Expanded token is spelled differently";
451 }
452 
453 TEST(SourceCodeTests, VisibleNamespaces) {
454   std::vector<std::pair<const char *, std::vector<std::string>>> Cases = {
455       {
456           R"cpp(
457             // Using directive resolved against enclosing namespaces.
458             using namespace foo;
459             namespace ns {
460             using namespace bar;
461           )cpp",
462           {"ns", "", "bar", "foo", "ns::bar"},
463       },
464       {
465           R"cpp(
466             // Don't include namespaces we've closed, ignore namespace aliases.
467             using namespace clang;
468             using std::swap;
469             namespace clang {
470             namespace clangd {}
471             namespace ll = ::llvm;
472             }
473             namespace clang {
474           )cpp",
475           {"clang", ""},
476       },
477       {
478           R"cpp(
479             // Using directives visible even if a namespace is reopened.
480             // Ignore anonymous namespaces.
481             namespace foo{ using namespace bar; }
482             namespace foo{ namespace {
483           )cpp",
484           {"foo", "", "bar", "foo::bar"},
485       },
486       {
487           R"cpp(
488             // Mismatched braces
489             namespace foo{}
490             }}}
491             namespace bar{
492           )cpp",
493           {"bar", ""},
494       },
495       {
496           R"cpp(
497             // Namespaces with multiple chunks.
498             namespace a::b {
499               using namespace c::d;
500               namespace e::f {
501           )cpp",
502           {
503               "a::b::e::f",
504               "",
505               "a",
506               "a::b",
507               "a::b::c::d",
508               "a::b::e",
509               "a::c::d",
510               "c::d",
511           },
512       },
513       {
514           "",
515           {""},
516       },
517       {
518           R"cpp(
519             // Parse until EOF
520             namespace bar{})cpp",
521           {""},
522       },
523   };
524   for (const auto &Case : Cases) {
525     EXPECT_EQ(Case.second,
526               visibleNamespaces(Case.first, format::getFormattingLangOpts(
527                                                 format::getLLVMStyle())))
528         << Case.first;
529   }
530 }
531 
532 TEST(SourceCodeTests, GetMacros) {
533   Annotations Code(R"cpp(
534      #define MACRO 123
535      int abc = MA^CRO;
536    )cpp");
537   TestTU TU = TestTU::withCode(Code.code());
538   auto AST = TU.build();
539   auto CurLoc = sourceLocationInMainFile(AST.getSourceManager(), Code.point());
540   ASSERT_TRUE(bool(CurLoc));
541   const auto *Id = syntax::spelledIdentifierTouching(*CurLoc, AST.getTokens());
542   ASSERT_TRUE(Id);
543   auto Result = locateMacroAt(*Id, AST.getPreprocessor());
544   ASSERT_TRUE(Result);
545   EXPECT_THAT(*Result, macroName("MACRO"));
546 }
547 
548 TEST(SourceCodeTests, WorksAtBeginOfFile) {
549   Annotations Code("^MACRO");
550   TestTU TU = TestTU::withCode(Code.code());
551   TU.HeaderCode = "#define MACRO int x;";
552   auto AST = TU.build();
553   auto CurLoc = sourceLocationInMainFile(AST.getSourceManager(), Code.point());
554   ASSERT_TRUE(bool(CurLoc));
555   const auto *Id = syntax::spelledIdentifierTouching(*CurLoc, AST.getTokens());
556   ASSERT_TRUE(Id);
557   auto Result = locateMacroAt(*Id, AST.getPreprocessor());
558   ASSERT_TRUE(Result);
559   EXPECT_THAT(*Result, macroName("MACRO"));
560 }
561 
562 TEST(SourceCodeTests, IsInsideMainFile) {
563   TestTU TU;
564   TU.HeaderCode = R"cpp(
565     #define DEFINE_CLASS(X) class X {};
566     #define DEFINE_YY DEFINE_CLASS(YY)
567 
568     class Header1 {};
569     DEFINE_CLASS(Header2)
570     class Header {};
571   )cpp";
572   TU.Code = R"cpp(
573     #define DEFINE_MAIN4 class Main4{};
574     class Main1 {};
575     DEFINE_CLASS(Main2)
576     DEFINE_YY
577     class Main {};
578     DEFINE_MAIN4
579   )cpp";
580   TU.ExtraArgs.push_back("-DHeader=Header3");
581   TU.ExtraArgs.push_back("-DMain=Main3");
582   auto AST = TU.build();
583   const auto &SM = AST.getSourceManager();
584   auto DeclLoc = [&AST](llvm::StringRef Name) {
585     return findDecl(AST, Name).getLocation();
586   };
587   for (const auto *HeaderDecl : {"Header1", "Header2", "Header3"})
588     EXPECT_FALSE(isInsideMainFile(DeclLoc(HeaderDecl), SM)) << HeaderDecl;
589 
590   for (const auto *MainDecl : {"Main1", "Main2", "Main3", "Main4", "YY"})
591     EXPECT_TRUE(isInsideMainFile(DeclLoc(MainDecl), SM)) << MainDecl;
592 
593   // Main4 is *spelled* in the preamble, but in the main-file part of it.
594   EXPECT_TRUE(isInsideMainFile(SM.getSpellingLoc(DeclLoc("Main4")), SM));
595 }
596 
597 // Test for functions toHalfOpenFileRange and getHalfOpenFileRange
598 TEST(SourceCodeTests, HalfOpenFileRange) {
599   // Each marked range should be the file range of the decl with the same name
600   // and each name should be unique.
601   Annotations Test(R"cpp(
602     #define FOO(X, Y) int Y = ++X
603     #define BAR(X) X + 1
604     #define ECHO(X) X
605 
606     #define BUZZ BAZZ(ADD)
607     #define BAZZ(m) m(1)
608     #define ADD(a) int f = a + 1;
609     template<typename T>
610     class P {};
611 
612     int main() {
613       $a[[P<P<P<P<P<int>>>>> a]];
614       $b[[int b = 1]];
615       $c[[FOO(b, c)]];
616       $d[[FOO(BAR(BAR(b)), d)]];
617       // FIXME: We might want to select everything inside the outer ECHO.
618       ECHO(ECHO($e[[int) ECHO(e]]));
619       // Shouldn't crash.
620       $f[[BUZZ]];
621     }
622   )cpp");
623 
624   ParsedAST AST = TestTU::withCode(Test.code()).build();
625   llvm::errs() << Test.code();
626   const SourceManager &SM = AST.getSourceManager();
627   const LangOptions &LangOpts = AST.getLangOpts();
628   // Turn a SourceLocation into a pair of positions
629   auto SourceRangeToRange = [&SM](SourceRange SrcRange) {
630     return Range{sourceLocToPosition(SM, SrcRange.getBegin()),
631                  sourceLocToPosition(SM, SrcRange.getEnd())};
632   };
633   auto CheckRange = [&](llvm::StringRef Name) {
634     const NamedDecl &Decl = findUnqualifiedDecl(AST, Name);
635     auto FileRange = toHalfOpenFileRange(SM, LangOpts, Decl.getSourceRange());
636     SCOPED_TRACE("Checking range: " + Name);
637     ASSERT_NE(FileRange, std::nullopt);
638     Range HalfOpenRange = SourceRangeToRange(*FileRange);
639     EXPECT_EQ(HalfOpenRange, Test.ranges(Name)[0]);
640   };
641 
642   CheckRange("a");
643   CheckRange("b");
644   CheckRange("c");
645   CheckRange("d");
646   CheckRange("e");
647   CheckRange("f");
648 }
649 
650 TEST(SourceCodeTests, HalfOpenFileRangePathologicalPreprocessor) {
651   const char *Case = R"cpp(
652 #define MACRO while(1)
653     void test() {
654 [[#include "Expand.inc"
655         br^eak]];
656     }
657   )cpp";
658   Annotations Test(Case);
659   auto TU = TestTU::withCode(Test.code());
660   TU.AdditionalFiles["Expand.inc"] = "MACRO\n";
661   auto AST = TU.build();
662 
663   const auto &Func = cast<FunctionDecl>(findDecl(AST, "test"));
664   const auto &Body = cast<CompoundStmt>(Func.getBody());
665   const auto &Loop = cast<WhileStmt>(*Body->child_begin());
666   llvm::Optional<SourceRange> Range = toHalfOpenFileRange(
667       AST.getSourceManager(), AST.getLangOpts(), Loop->getSourceRange());
668   ASSERT_TRUE(Range) << "Failed to get file range";
669   EXPECT_EQ(AST.getSourceManager().getFileOffset(Range->getBegin()),
670             Test.llvm::Annotations::range().Begin);
671   EXPECT_EQ(AST.getSourceManager().getFileOffset(Range->getEnd()),
672             Test.llvm::Annotations::range().End);
673 }
674 
675 TEST(SourceCodeTests, IncludeHashLoc) {
676   const char *Case = R"cpp(
677 $foo^#include "foo.inc"
678 #define HEADER "bar.inc"
679   $bar^#  include HEADER
680   )cpp";
681   Annotations Test(Case);
682   auto TU = TestTU::withCode(Test.code());
683   TU.AdditionalFiles["foo.inc"] = "int foo;\n";
684   TU.AdditionalFiles["bar.inc"] = "int bar;\n";
685   auto AST = TU.build();
686   const auto &SM = AST.getSourceManager();
687 
688   FileID Foo = SM.getFileID(findDecl(AST, "foo").getLocation());
689   EXPECT_EQ(SM.getFileOffset(includeHashLoc(Foo, SM)),
690             Test.llvm::Annotations::point("foo"));
691   FileID Bar = SM.getFileID(findDecl(AST, "bar").getLocation());
692   EXPECT_EQ(SM.getFileOffset(includeHashLoc(Bar, SM)),
693             Test.llvm::Annotations::point("bar"));
694 }
695 
696 TEST(SourceCodeTests, GetEligiblePoints) {
697   constexpr struct {
698     const char *Code;
699     const char *FullyQualifiedName;
700     const char *EnclosingNamespace;
701   } Cases[] = {
702       {R"cpp(// FIXME: We should also mark positions before and after
703                  //declarations/definitions as eligible.
704               namespace ns1 {
705               namespace a { namespace ns2 {} }
706               namespace ns2 {^
707               void foo();
708               namespace {}
709               void bar() {}
710               namespace ns3 {}
711               class T {};
712               ^}
713               using namespace ns2;
714               })cpp",
715        "ns1::ns2::symbol", "ns1::ns2::"},
716       {R"cpp(
717               namespace ns1 {^
718               namespace a { namespace ns2 {} }
719               namespace b {}
720               namespace ns {}
721               ^})cpp",
722        "ns1::ns2::symbol", "ns1::"},
723       {R"cpp(
724               namespace x {
725               namespace a { namespace ns2 {} }
726               namespace b {}
727               namespace ns {}
728               }^)cpp",
729        "ns1::ns2::symbol", ""},
730       {R"cpp(
731               namespace ns1 {
732               namespace ns2 {^^}
733               namespace b {}
734               namespace ns2 {^^}
735               }
736               namespace ns1 {namespace ns2 {^^}})cpp",
737        "ns1::ns2::symbol", "ns1::ns2::"},
738       {R"cpp(
739               namespace ns1 {^
740               namespace ns {}
741               namespace b {}
742               namespace ns {}
743               ^}
744               namespace ns1 {^namespace ns {}^})cpp",
745        "ns1::ns2::symbol", "ns1::"},
746   };
747   for (auto Case : Cases) {
748     Annotations Test(Case.Code);
749 
750     auto Res = getEligiblePoints(
751         Test.code(), Case.FullyQualifiedName,
752         format::getFormattingLangOpts(format::getLLVMStyle()));
753     EXPECT_THAT(Res.EligiblePoints, testing::ElementsAreArray(Test.points()))
754         << Test.code();
755     EXPECT_EQ(Res.EnclosingNamespace, Case.EnclosingNamespace) << Test.code();
756   }
757 }
758 
759 TEST(SourceCodeTests, IdentifierRanges) {
760   Annotations Code(R"cpp(
761    class [[Foo]] {};
762    // Foo
763    /* Foo */
764    void f([[Foo]]* foo1) {
765      [[Foo]] foo2;
766      auto S = [[Foo]]();
767 // cross-line identifier is not supported.
768 F\
769 o\
770 o foo2;
771    }
772   )cpp");
773   LangOptions LangOpts;
774   LangOpts.CPlusPlus = true;
775   EXPECT_EQ(Code.ranges(),
776             collectIdentifierRanges("Foo", Code.code(), LangOpts));
777 }
778 
779 TEST(SourceCodeTests, isHeaderFile) {
780   // Without lang options.
781   EXPECT_TRUE(isHeaderFile("foo.h"));
782   EXPECT_TRUE(isHeaderFile("foo.hh"));
783   EXPECT_TRUE(isHeaderFile("foo.hpp"));
784 
785   EXPECT_FALSE(isHeaderFile("foo.cpp"));
786   EXPECT_FALSE(isHeaderFile("foo.c++"));
787   EXPECT_FALSE(isHeaderFile("foo.cxx"));
788   EXPECT_FALSE(isHeaderFile("foo.cc"));
789   EXPECT_FALSE(isHeaderFile("foo.c"));
790   EXPECT_FALSE(isHeaderFile("foo.mm"));
791   EXPECT_FALSE(isHeaderFile("foo.m"));
792 
793   // With lang options
794   LangOptions LangOpts;
795   LangOpts.IsHeaderFile = true;
796   EXPECT_TRUE(isHeaderFile("string", LangOpts));
797   // Emulate cases where there is no "-x header" flag for a .h file, we still
798   // want to treat it as a header.
799   LangOpts.IsHeaderFile = false;
800   EXPECT_TRUE(isHeaderFile("header.h", LangOpts));
801 }
802 
803 TEST(SourceCodeTests, isKeywords) {
804   LangOptions LangOpts;
805   LangOpts.CPlusPlus20 = true;
806   EXPECT_TRUE(isKeyword("int", LangOpts));
807   EXPECT_TRUE(isKeyword("return", LangOpts));
808   EXPECT_TRUE(isKeyword("co_await", LangOpts));
809 
810   // these are identifiers (not keywords!) with special meaning in some
811   // contexts.
812   EXPECT_FALSE(isKeyword("final", LangOpts));
813   EXPECT_FALSE(isKeyword("override", LangOpts));
814 }
815 
816 struct IncrementalTestStep {
817   llvm::StringRef Src;
818   llvm::StringRef Contents;
819 };
820 
821 int rangeLength(llvm::StringRef Code, const Range &Rng) {
822   llvm::Expected<size_t> Start = positionToOffset(Code, Rng.start);
823   llvm::Expected<size_t> End = positionToOffset(Code, Rng.end);
824   assert(Start);
825   assert(End);
826   return *End - *Start;
827 }
828 
829 /// Send the changes one by one to updateDraft, verify the intermediate results.
830 void stepByStep(llvm::ArrayRef<IncrementalTestStep> Steps) {
831   std::string Code = Annotations(Steps.front().Src).code().str();
832 
833   for (size_t I = 1; I < Steps.size(); I++) {
834     Annotations SrcBefore(Steps[I - 1].Src);
835     Annotations SrcAfter(Steps[I].Src);
836     llvm::StringRef Contents = Steps[I - 1].Contents;
837     TextDocumentContentChangeEvent Event{
838         SrcBefore.range(),
839         rangeLength(SrcBefore.code(), SrcBefore.range()),
840         Contents.str(),
841     };
842 
843     EXPECT_THAT_ERROR(applyChange(Code, Event), llvm::Succeeded());
844     EXPECT_EQ(Code, SrcAfter.code());
845   }
846 }
847 
848 TEST(ApplyEditsTest, Simple) {
849   // clang-format off
850   IncrementalTestStep Steps[] =
851     {
852       // Replace a range
853       {
854 R"cpp(static int
855 hello[[World]]()
856 {})cpp",
857         "Universe"
858       },
859       // Delete a range
860       {
861 R"cpp(static int
862 hello[[Universe]]()
863 {})cpp",
864         ""
865       },
866       // Add a range
867       {
868 R"cpp(static int
869 hello[[]]()
870 {})cpp",
871         "Monde"
872       },
873       {
874 R"cpp(static int
875 helloMonde()
876 {})cpp",
877         ""
878       }
879     };
880   // clang-format on
881 
882   stepByStep(Steps);
883 }
884 
885 TEST(ApplyEditsTest, MultiLine) {
886   // clang-format off
887   IncrementalTestStep Steps[] =
888     {
889       // Replace a range
890       {
891 R"cpp(static [[int
892 helloWorld]]()
893 {})cpp",
894 R"cpp(char
895 welcome)cpp"
896       },
897       // Delete a range
898       {
899 R"cpp(static char[[
900 welcome]]()
901 {})cpp",
902         ""
903       },
904       // Add a range
905       {
906 R"cpp(static char[[]]()
907 {})cpp",
908         R"cpp(
909 cookies)cpp"
910       },
911       // Replace the whole file
912       {
913 R"cpp([[static char
914 cookies()
915 {}]])cpp",
916         R"cpp(#include <stdio.h>
917 )cpp"
918       },
919       // Delete the whole file
920       {
921         R"cpp([[#include <stdio.h>
922 ]])cpp",
923         "",
924       },
925       // Add something to an empty file
926       {
927         "[[]]",
928         R"cpp(int main() {
929 )cpp",
930       },
931       {
932         R"cpp(int main() {
933 )cpp",
934         ""
935       }
936     };
937   // clang-format on
938 
939   stepByStep(Steps);
940 }
941 
942 TEST(ApplyEditsTest, WrongRangeLength) {
943   std::string Code = "int main() {}\n";
944 
945   TextDocumentContentChangeEvent Change;
946   Change.range.emplace();
947   Change.range->start.line = 0;
948   Change.range->start.character = 0;
949   Change.range->end.line = 0;
950   Change.range->end.character = 2;
951   Change.rangeLength = 10;
952 
953   EXPECT_THAT_ERROR(applyChange(Code, Change),
954                     FailedWithMessage("Change's rangeLength (10) doesn't match "
955                                       "the computed range length (2)."));
956 }
957 
958 // Test that we correct observed buggy edits from Neovim.
959 TEST(ApplyEditsTets, BuggyNeovimEdits) {
960   TextDocumentContentChangeEvent Change;
961   Change.range.emplace();
962 
963   // https://github.com/neovim/neovim/issues/17085
964   // Adding a blank line after a (missing) newline
965   std::string Code = "a";
966   Change.range->start.line = 1;
967   Change.range->start.character = 0;
968   Change.range->end.line = 1;
969   Change.range->start.character = 0;
970   Change.rangeLength = 0;
971   Change.text = "\n";
972   EXPECT_THAT_ERROR(applyChange(Code, Change), llvm::Succeeded());
973   EXPECT_EQ(Code, "a\n\n");
974 
975   // https://github.com/neovim/neovim/issues/17085#issuecomment-1269162264
976   // Replacing the (missing) newline with \n\n in an empty file.
977   Code = "";
978   Change.range->start.line = 0;
979   Change.range->start.character = 0;
980   Change.range->end.line = 1;
981   Change.range->end.character = 0;
982   Change.rangeLength = 1;
983   Change.text = "\n\n";
984 
985   EXPECT_THAT_ERROR(applyChange(Code, Change), llvm::Succeeded());
986   EXPECT_EQ(Code, "\n\n");
987 
988   // We do not apply the heuristic fixes if the rangeLength doesn't match.
989   Code = "";
990   Change.rangeLength = 0;
991   EXPECT_THAT_ERROR(applyChange(Code, Change),
992                     FailedWithMessage("Change's rangeLength (0) doesn't match "
993                                       "the computed range length (1)."));
994 }
995 
996 TEST(ApplyEditsTest, EndBeforeStart) {
997   std::string Code = "int main() {}\n";
998 
999   TextDocumentContentChangeEvent Change;
1000   Change.range.emplace();
1001   Change.range->start.line = 0;
1002   Change.range->start.character = 5;
1003   Change.range->end.line = 0;
1004   Change.range->end.character = 3;
1005 
1006   EXPECT_THAT_ERROR(
1007       applyChange(Code, Change),
1008       FailedWithMessage(
1009           "Range's end position (0:3) is before start position (0:5)"));
1010 }
1011 
1012 TEST(ApplyEditsTest, StartCharOutOfRange) {
1013   std::string Code = "int main() {}\n";
1014 
1015   TextDocumentContentChangeEvent Change;
1016   Change.range.emplace();
1017   Change.range->start.line = 0;
1018   Change.range->start.character = 100;
1019   Change.range->end.line = 0;
1020   Change.range->end.character = 100;
1021   Change.text = "foo";
1022 
1023   EXPECT_THAT_ERROR(
1024       applyChange(Code, Change),
1025       FailedWithMessage("utf-16 offset 100 is invalid for line 0"));
1026 }
1027 
1028 TEST(ApplyEditsTest, EndCharOutOfRange) {
1029   std::string Code = "int main() {}\n";
1030 
1031   TextDocumentContentChangeEvent Change;
1032   Change.range.emplace();
1033   Change.range->start.line = 0;
1034   Change.range->start.character = 0;
1035   Change.range->end.line = 0;
1036   Change.range->end.character = 100;
1037   Change.text = "foo";
1038 
1039   EXPECT_THAT_ERROR(
1040       applyChange(Code, Change),
1041       FailedWithMessage("utf-16 offset 100 is invalid for line 0"));
1042 }
1043 
1044 TEST(ApplyEditsTest, StartLineOutOfRange) {
1045   std::string Code = "int main() {}\n";
1046 
1047   TextDocumentContentChangeEvent Change;
1048   Change.range.emplace();
1049   Change.range->start.line = 100;
1050   Change.range->start.character = 0;
1051   Change.range->end.line = 100;
1052   Change.range->end.character = 0;
1053   Change.text = "foo";
1054 
1055   EXPECT_THAT_ERROR(applyChange(Code, Change),
1056                     FailedWithMessage("Line value is out of range (100)"));
1057 }
1058 
1059 TEST(ApplyEditsTest, EndLineOutOfRange) {
1060   std::string Code = "int main() {}\n";
1061 
1062   TextDocumentContentChangeEvent Change;
1063   Change.range.emplace();
1064   Change.range->start.line = 0;
1065   Change.range->start.character = 0;
1066   Change.range->end.line = 100;
1067   Change.range->end.character = 0;
1068   Change.text = "foo";
1069 
1070   EXPECT_THAT_ERROR(applyChange(Code, Change),
1071                     FailedWithMessage("Line value is out of range (100)"));
1072 }
1073 
1074 } // namespace
1075 } // namespace clangd
1076 } // namespace clang
1077