xref: /llvm-project/clang-tools-extra/clangd/unittests/SerializationTests.cpp (revision b8d6885ff67efbc3142a2b49506ed0cc2b95e054)
1 //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Headers.h"
10 #include "RIFF.h"
11 #include "index/Serialization.h"
12 #include "support/Logger.h"
13 #include "clang/Tooling/CompilationDatabase.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
16 #include "llvm/Support/Compression.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/ScopedPrinter.h"
19 #include "gmock/gmock.h"
20 #include "gtest/gtest.h"
21 #ifdef LLVM_ON_UNIX
22 #include <sys/resource.h>
23 #endif
24 
25 using ::testing::ElementsAre;
26 using ::testing::Pair;
27 using ::testing::UnorderedElementsAre;
28 using ::testing::UnorderedElementsAreArray;
29 
30 namespace clang {
31 namespace clangd {
32 namespace {
33 
34 const char *YAML = R"(
35 ---
36 !Symbol
37 ID: 057557CEBF6E6B2D
38 Name:   'Foo1'
39 Scope:   'clang::'
40 SymInfo:
41   Kind:            Function
42   Lang:            Cpp
43 CanonicalDeclaration:
44   FileURI:        file:///path/foo.h
45   Start:
46     Line: 1
47     Column: 0
48   End:
49     Line: 1
50     Column: 1
51 Flags:    129
52 Documentation:    'Foo doc'
53 ReturnType:    'int'
54 IncludeHeaders:
55   - Header:    'include1'
56     References:    7
57     Directives:      [ Include ]
58   - Header:    'include2'
59     References:    3
60     Directives:      [ Import ]
61   - Header:    'include3'
62     References:    2
63     Directives:      [ Include, Import ]
64   - Header:    'include4'
65     References:    1
66     Directives:      [ ]
67 ...
68 ---
69 !Symbol
70 ID: 057557CEBF6E6B2E
71 Name:   'Foo2'
72 Scope:   'clang::'
73 SymInfo:
74   Kind:            Function
75   Lang:            Cpp
76 CanonicalDeclaration:
77   FileURI:        file:///path/bar.h
78   Start:
79     Line: 1
80     Column: 0
81   End:
82     Line: 1
83     Column: 1
84 Flags:    2
85 Signature:    '-sig'
86 CompletionSnippetSuffix:    '-snippet'
87 ...
88 !Refs
89 ID: 057557CEBF6E6B2D
90 References:
91   - Kind: 4
92     Location:
93       FileURI:    file:///path/foo.cc
94       Start:
95         Line: 5
96         Column: 3
97       End:
98         Line: 5
99         Column: 8
100 ...
101 --- !Relations
102 Subject:
103   ID:              6481EE7AF2841756
104 Predicate:       0
105 Object:
106   ID:              6512AEC512EA3A2D
107 ...
108 --- !Cmd
109 Directory:       'testdir'
110 CommandLine:
111   - 'cmd1'
112   - 'cmd2'
113 ...
114 --- !Source
115 URI:             'file:///path/source1.cpp'
116 Flags:           1
117 Digest:          EED8F5EAF25C453C
118 DirectIncludes:
119   - 'file:///path/inc1.h'
120   - 'file:///path/inc2.h'
121 ...
122 )";
123 
124 MATCHER_P(id, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
125 MATCHER_P(qName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
126 MATCHER_P3(IncludeHeaderWithRefAndDirectives, IncludeHeader, References,
127            SupportedDirectives, "") {
128   return (arg.IncludeHeader == IncludeHeader) &&
129          (arg.References == References) &&
130          (arg.SupportedDirectives == SupportedDirectives);
131 }
132 
133 auto readIndexFile(llvm::StringRef Text) {
134   return readIndexFile(Text, SymbolOrigin::Static);
135 }
136 
137 TEST(SerializationTest, NoCrashOnEmptyYAML) {
138   EXPECT_TRUE(bool(readIndexFile("")));
139 }
140 
141 TEST(SerializationTest, YAMLConversions) {
142   auto ParsedYAML = readIndexFile(YAML);
143   ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
144   ASSERT_TRUE(bool(ParsedYAML->Symbols));
145   EXPECT_THAT(
146       *ParsedYAML->Symbols,
147       UnorderedElementsAre(id("057557CEBF6E6B2D"), id("057557CEBF6E6B2E")));
148 
149   auto Sym1 = *ParsedYAML->Symbols->find(
150       cantFail(SymbolID::fromStr("057557CEBF6E6B2D")));
151   auto Sym2 = *ParsedYAML->Symbols->find(
152       cantFail(SymbolID::fromStr("057557CEBF6E6B2E")));
153 
154   EXPECT_THAT(Sym1, qName("clang::Foo1"));
155   EXPECT_EQ(Sym1.Signature, "");
156   EXPECT_EQ(Sym1.Documentation, "Foo doc");
157   EXPECT_EQ(Sym1.ReturnType, "int");
158   EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
159   EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static);
160   EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129);
161   EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
162   EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
163   EXPECT_THAT(
164       Sym1.IncludeHeaders,
165       UnorderedElementsAre(
166           IncludeHeaderWithRefAndDirectives("include1", 7u, Symbol::Include),
167           IncludeHeaderWithRefAndDirectives("include2", 3u, Symbol::Import),
168           IncludeHeaderWithRefAndDirectives("include3", 2u,
169                                             Symbol::Include | Symbol::Import),
170           IncludeHeaderWithRefAndDirectives("include4", 1u, Symbol::Invalid)));
171 
172   EXPECT_THAT(Sym2, qName("clang::Foo2"));
173   EXPECT_EQ(Sym2.Signature, "-sig");
174   EXPECT_EQ(Sym2.ReturnType, "");
175   EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
176             "file:///path/bar.h");
177   EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
178   EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
179 
180   ASSERT_TRUE(bool(ParsedYAML->Refs));
181   EXPECT_THAT(
182       *ParsedYAML->Refs,
183       UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
184                                 ::testing::SizeIs(1))));
185   auto Ref1 = ParsedYAML->Refs->begin()->second.front();
186   EXPECT_EQ(Ref1.Kind, RefKind::Reference);
187   EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
188 
189   SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
190   SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
191   ASSERT_TRUE(bool(ParsedYAML->Relations));
192   EXPECT_THAT(
193       *ParsedYAML->Relations,
194       UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived}));
195 
196   ASSERT_TRUE(bool(ParsedYAML->Cmd));
197   auto &Cmd = *ParsedYAML->Cmd;
198   ASSERT_EQ(Cmd.Directory, "testdir");
199   EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2"));
200 
201   ASSERT_TRUE(bool(ParsedYAML->Sources));
202   const auto *URI = "file:///path/source1.cpp";
203   ASSERT_TRUE(ParsedYAML->Sources->count(URI));
204   auto IGNDeserialized = ParsedYAML->Sources->lookup(URI);
205   EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C");
206   EXPECT_THAT(IGNDeserialized.DirectIncludes,
207               ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
208   EXPECT_EQ(IGNDeserialized.URI, URI);
209   EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1));
210 }
211 
212 std::vector<std::string> yamlFromSymbols(const SymbolSlab &Slab) {
213   std::vector<std::string> Result;
214   for (const auto &Sym : Slab)
215     Result.push_back(toYAML(Sym));
216   return Result;
217 }
218 std::vector<std::string> yamlFromRefs(const RefSlab &Slab) {
219   std::vector<std::string> Result;
220   for (const auto &Refs : Slab)
221     Result.push_back(toYAML(Refs));
222   return Result;
223 }
224 
225 std::vector<std::string> yamlFromRelations(const RelationSlab &Slab) {
226   std::vector<std::string> Result;
227   for (const auto &Rel : Slab)
228     Result.push_back(toYAML(Rel));
229   return Result;
230 }
231 
232 TEST(SerializationTest, BinaryConversions) {
233   auto In = readIndexFile(YAML);
234   EXPECT_TRUE(bool(In)) << In.takeError();
235 
236   // Write to binary format, and parse again.
237   IndexFileOut Out(*In);
238   Out.Format = IndexFileFormat::RIFF;
239   std::string Serialized = llvm::to_string(Out);
240 
241   auto In2 = readIndexFile(Serialized);
242   ASSERT_TRUE(bool(In2)) << In2.takeError();
243   ASSERT_TRUE(In2->Symbols);
244   ASSERT_TRUE(In2->Refs);
245   ASSERT_TRUE(In2->Relations);
246 
247   // Assert the YAML serializations match, for nice comparisons and diffs.
248   EXPECT_THAT(yamlFromSymbols(*In2->Symbols),
249               UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
250   EXPECT_THAT(yamlFromRefs(*In2->Refs),
251               UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
252   EXPECT_THAT(yamlFromRelations(*In2->Relations),
253               UnorderedElementsAreArray(yamlFromRelations(*In->Relations)));
254 }
255 
256 TEST(SerializationTest, SrcsTest) {
257   auto In = readIndexFile(YAML);
258   EXPECT_TRUE(bool(In)) << In.takeError();
259 
260   std::string TestContent("TestContent");
261   IncludeGraphNode IGN;
262   IGN.Digest = digest(TestContent);
263   IGN.DirectIncludes = {"inc1", "inc2"};
264   IGN.URI = "URI";
265   IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;
266   IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
267   IncludeGraph Sources;
268   Sources[IGN.URI] = IGN;
269   // Write to binary format, and parse again.
270   IndexFileOut Out(*In);
271   Out.Format = IndexFileFormat::RIFF;
272   Out.Sources = &Sources;
273   {
274     std::string Serialized = llvm::to_string(Out);
275 
276     auto In = readIndexFile(Serialized);
277     ASSERT_TRUE(bool(In)) << In.takeError();
278     ASSERT_TRUE(In->Symbols);
279     ASSERT_TRUE(In->Refs);
280     ASSERT_TRUE(In->Sources);
281     ASSERT_TRUE(In->Sources->count(IGN.URI));
282     // Assert the YAML serializations match, for nice comparisons and diffs.
283     EXPECT_THAT(yamlFromSymbols(*In->Symbols),
284                 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
285     EXPECT_THAT(yamlFromRefs(*In->Refs),
286                 UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
287     auto IGNDeserialized = In->Sources->lookup(IGN.URI);
288     EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
289     EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
290     EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
291     EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags);
292   }
293 }
294 
295 TEST(SerializationTest, CmdlTest) {
296   auto In = readIndexFile(YAML);
297   EXPECT_TRUE(bool(In)) << In.takeError();
298 
299   tooling::CompileCommand Cmd;
300   Cmd.Directory = "testdir";
301   Cmd.CommandLine.push_back("cmd1");
302   Cmd.CommandLine.push_back("cmd2");
303   Cmd.Filename = "ignored";
304   Cmd.Heuristic = "ignored";
305   Cmd.Output = "ignored";
306 
307   IndexFileOut Out(*In);
308   Out.Format = IndexFileFormat::RIFF;
309   Out.Cmd = &Cmd;
310   {
311     std::string Serialized = llvm::to_string(Out);
312 
313     auto In = readIndexFile(Serialized);
314     ASSERT_TRUE(bool(In)) << In.takeError();
315     ASSERT_TRUE(In->Cmd);
316 
317     const tooling::CompileCommand &SerializedCmd = *In->Cmd;
318     EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine);
319     EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory);
320     EXPECT_NE(SerializedCmd.Filename, Cmd.Filename);
321     EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic);
322     EXPECT_NE(SerializedCmd.Output, Cmd.Output);
323   }
324 }
325 
326 // rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD.
327 // Sanitizers use a lot of address space, so we can't apply strict limits.
328 #if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD &&     \
329     !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_THREAD_SANITIZER_BUILD
330 class ScopedMemoryLimit {
331   struct rlimit OriginalLimit;
332   bool Succeeded = false;
333 
334 public:
335   ScopedMemoryLimit(rlim_t Bytes) {
336     if (!getrlimit(RLIMIT_AS, &OriginalLimit)) {
337       struct rlimit NewLimit = OriginalLimit;
338       NewLimit.rlim_cur = Bytes;
339       Succeeded = !setrlimit(RLIMIT_AS, &NewLimit);
340     }
341     if (!Succeeded)
342       log("Failed to set rlimit");
343   }
344 
345   ~ScopedMemoryLimit() {
346     if (Succeeded)
347       setrlimit(RLIMIT_AS, &OriginalLimit);
348   }
349 };
350 #else
351 class ScopedMemoryLimit {
352 public:
353   ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); }
354 };
355 #endif
356 
357 // Test that our deserialization detects invalid array sizes without allocating.
358 // If this detection fails, the test should allocate a huge array and crash.
359 TEST(SerializationTest, NoCrashOnBadArraySize) {
360   // This test is tricky because we need to construct a subtly invalid file.
361   // First, create a valid serialized file.
362   auto In = readIndexFile(YAML);
363   ASSERT_FALSE(!In) << In.takeError();
364   IndexFileOut Out(*In);
365   Out.Format = IndexFileFormat::RIFF;
366   std::string Serialized = llvm::to_string(Out);
367 
368   // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
369   auto Parsed = riff::readFile(Serialized);
370   ASSERT_FALSE(!Parsed) << Parsed.takeError();
371   auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
372     return C.ID == riff::fourCC("srcs");
373   });
374   ASSERT_NE(Srcs, Parsed->Chunks.end());
375 
376   // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
377   // The node has:
378   //  - 1 byte: flags (1)
379   //  - varint(stringID): URI
380   //  - 8 byte: file digest
381   //  - varint: DirectIncludes.length
382   //  - repeated varint(stringID): DirectIncludes
383   // We want to set DirectIncludes.length to a huge number.
384   // The offset isn't trivial to find, so we use the file digest.
385   std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
386   unsigned Pos = Srcs->Data.find_first_of(FileDigest);
387   ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest";
388   Pos += FileDigest.size();
389 
390   // Varints are little-endian base-128 numbers, where the top-bit of each byte
391   // indicates whether there are more. ffffffff0f -> 0xffffffff.
392   std::string CorruptSrcs =
393       (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") +
394        "some_random_garbage")
395           .str();
396   Srcs->Data = CorruptSrcs;
397 
398   // Try to crash rather than hang on large allocation.
399   ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
400 
401   std::string CorruptFile = llvm::to_string(*Parsed);
402   auto CorruptParsed = readIndexFile(CorruptFile);
403   ASSERT_TRUE(!CorruptParsed);
404   EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
405             "malformed or truncated include uri");
406 }
407 
408 // Check we detect invalid string table size size without allocating it first.
409 // If this detection fails, the test should allocate a huge array and crash.
410 TEST(SerializationTest, NoCrashOnBadStringTableSize) {
411   if (!llvm::compression::zlib::isAvailable()) {
412     log("skipping test, no zlib");
413     return;
414   }
415 
416   // First, create a valid serialized file.
417   auto In = readIndexFile(YAML);
418   ASSERT_FALSE(!In) << In.takeError();
419   IndexFileOut Out(*In);
420   Out.Format = IndexFileFormat::RIFF;
421   std::string Serialized = llvm::to_string(Out);
422 
423   // Low-level parse it again, we're going to replace the `stri` chunk.
424   auto Parsed = riff::readFile(Serialized);
425   ASSERT_FALSE(!Parsed) << Parsed.takeError();
426   auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
427     return C.ID == riff::fourCC("stri");
428   });
429   ASSERT_NE(Stri, Parsed->Chunks.end());
430 
431   // stri consists of an 8 byte uncompressed-size, and then compressed data.
432   // We'll claim our small amount of data expands to 4GB
433   std::string CorruptStri =
434       (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str();
435   Stri->Data = CorruptStri;
436   std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
437 
438   // Try to crash rather than hang on large allocation.
439   ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
440 
441   std::string CorruptFile = llvm::to_string(*Parsed);
442   auto CorruptParsed = readIndexFile(CorruptFile);
443   ASSERT_TRUE(!CorruptParsed);
444   EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
445               testing::HasSubstr("bytes is implausible"));
446 }
447 
448 } // namespace
449 } // namespace clangd
450 } // namespace clang
451