1 //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Headers.h" 10 #include "RIFF.h" 11 #include "index/Serialization.h" 12 #include "support/Logger.h" 13 #include "clang/Tooling/CompilationDatabase.h" 14 #include "llvm/ADT/StringExtras.h" 15 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX 16 #include "llvm/Support/Compression.h" 17 #include "llvm/Support/Error.h" 18 #include "llvm/Support/ScopedPrinter.h" 19 #include "gmock/gmock.h" 20 #include "gtest/gtest.h" 21 #ifdef LLVM_ON_UNIX 22 #include <sys/resource.h> 23 #endif 24 25 using ::testing::ElementsAre; 26 using ::testing::Pair; 27 using ::testing::UnorderedElementsAre; 28 using ::testing::UnorderedElementsAreArray; 29 30 namespace clang { 31 namespace clangd { 32 namespace { 33 34 const char *YAML = R"( 35 --- 36 !Symbol 37 ID: 057557CEBF6E6B2D 38 Name: 'Foo1' 39 Scope: 'clang::' 40 SymInfo: 41 Kind: Function 42 Lang: Cpp 43 CanonicalDeclaration: 44 FileURI: file:///path/foo.h 45 Start: 46 Line: 1 47 Column: 0 48 End: 49 Line: 1 50 Column: 1 51 Flags: 129 52 Documentation: 'Foo doc' 53 ReturnType: 'int' 54 IncludeHeaders: 55 - Header: 'include1' 56 References: 7 57 Directives: [ Include ] 58 - Header: 'include2' 59 References: 3 60 Directives: [ Import ] 61 - Header: 'include3' 62 References: 2 63 Directives: [ Include, Import ] 64 - Header: 'include4' 65 References: 1 66 Directives: [ ] 67 ... 68 --- 69 !Symbol 70 ID: 057557CEBF6E6B2E 71 Name: 'Foo2' 72 Scope: 'clang::' 73 SymInfo: 74 Kind: Function 75 Lang: Cpp 76 CanonicalDeclaration: 77 FileURI: file:///path/bar.h 78 Start: 79 Line: 1 80 Column: 0 81 End: 82 Line: 1 83 Column: 1 84 Flags: 2 85 Signature: '-sig' 86 CompletionSnippetSuffix: '-snippet' 87 ... 88 !Refs 89 ID: 057557CEBF6E6B2D 90 References: 91 - Kind: 4 92 Location: 93 FileURI: file:///path/foo.cc 94 Start: 95 Line: 5 96 Column: 3 97 End: 98 Line: 5 99 Column: 8 100 ... 101 --- !Relations 102 Subject: 103 ID: 6481EE7AF2841756 104 Predicate: 0 105 Object: 106 ID: 6512AEC512EA3A2D 107 ... 108 --- !Cmd 109 Directory: 'testdir' 110 CommandLine: 111 - 'cmd1' 112 - 'cmd2' 113 ... 114 --- !Source 115 URI: 'file:///path/source1.cpp' 116 Flags: 1 117 Digest: EED8F5EAF25C453C 118 DirectIncludes: 119 - 'file:///path/inc1.h' 120 - 'file:///path/inc2.h' 121 ... 122 )"; 123 124 MATCHER_P(id, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); } 125 MATCHER_P(qName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } 126 MATCHER_P3(IncludeHeaderWithRefAndDirectives, IncludeHeader, References, 127 SupportedDirectives, "") { 128 return (arg.IncludeHeader == IncludeHeader) && 129 (arg.References == References) && 130 (arg.SupportedDirectives == SupportedDirectives); 131 } 132 133 auto readIndexFile(llvm::StringRef Text) { 134 return readIndexFile(Text, SymbolOrigin::Static); 135 } 136 137 TEST(SerializationTest, NoCrashOnEmptyYAML) { 138 EXPECT_TRUE(bool(readIndexFile(""))); 139 } 140 141 TEST(SerializationTest, YAMLConversions) { 142 auto ParsedYAML = readIndexFile(YAML); 143 ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); 144 ASSERT_TRUE(bool(ParsedYAML->Symbols)); 145 EXPECT_THAT( 146 *ParsedYAML->Symbols, 147 UnorderedElementsAre(id("057557CEBF6E6B2D"), id("057557CEBF6E6B2E"))); 148 149 auto Sym1 = *ParsedYAML->Symbols->find( 150 cantFail(SymbolID::fromStr("057557CEBF6E6B2D"))); 151 auto Sym2 = *ParsedYAML->Symbols->find( 152 cantFail(SymbolID::fromStr("057557CEBF6E6B2E"))); 153 154 EXPECT_THAT(Sym1, qName("clang::Foo1")); 155 EXPECT_EQ(Sym1.Signature, ""); 156 EXPECT_EQ(Sym1.Documentation, "Foo doc"); 157 EXPECT_EQ(Sym1.ReturnType, "int"); 158 EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h"); 159 EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static); 160 EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129); 161 EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion); 162 EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated); 163 EXPECT_THAT( 164 Sym1.IncludeHeaders, 165 UnorderedElementsAre( 166 IncludeHeaderWithRefAndDirectives("include1", 7u, Symbol::Include), 167 IncludeHeaderWithRefAndDirectives("include2", 3u, Symbol::Import), 168 IncludeHeaderWithRefAndDirectives("include3", 2u, 169 Symbol::Include | Symbol::Import), 170 IncludeHeaderWithRefAndDirectives("include4", 1u, Symbol::Invalid))); 171 172 EXPECT_THAT(Sym2, qName("clang::Foo2")); 173 EXPECT_EQ(Sym2.Signature, "-sig"); 174 EXPECT_EQ(Sym2.ReturnType, ""); 175 EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI), 176 "file:///path/bar.h"); 177 EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); 178 EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); 179 180 ASSERT_TRUE(bool(ParsedYAML->Refs)); 181 EXPECT_THAT( 182 *ParsedYAML->Refs, 183 UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")), 184 ::testing::SizeIs(1)))); 185 auto Ref1 = ParsedYAML->Refs->begin()->second.front(); 186 EXPECT_EQ(Ref1.Kind, RefKind::Reference); 187 EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc"); 188 189 SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756")); 190 SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D")); 191 ASSERT_TRUE(bool(ParsedYAML->Relations)); 192 EXPECT_THAT( 193 *ParsedYAML->Relations, 194 UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived})); 195 196 ASSERT_TRUE(bool(ParsedYAML->Cmd)); 197 auto &Cmd = *ParsedYAML->Cmd; 198 ASSERT_EQ(Cmd.Directory, "testdir"); 199 EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2")); 200 201 ASSERT_TRUE(bool(ParsedYAML->Sources)); 202 const auto *URI = "file:///path/source1.cpp"; 203 ASSERT_TRUE(ParsedYAML->Sources->count(URI)); 204 auto IGNDeserialized = ParsedYAML->Sources->lookup(URI); 205 EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C"); 206 EXPECT_THAT(IGNDeserialized.DirectIncludes, 207 ElementsAre("file:///path/inc1.h", "file:///path/inc2.h")); 208 EXPECT_EQ(IGNDeserialized.URI, URI); 209 EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1)); 210 } 211 212 std::vector<std::string> yamlFromSymbols(const SymbolSlab &Slab) { 213 std::vector<std::string> Result; 214 for (const auto &Sym : Slab) 215 Result.push_back(toYAML(Sym)); 216 return Result; 217 } 218 std::vector<std::string> yamlFromRefs(const RefSlab &Slab) { 219 std::vector<std::string> Result; 220 for (const auto &Refs : Slab) 221 Result.push_back(toYAML(Refs)); 222 return Result; 223 } 224 225 std::vector<std::string> yamlFromRelations(const RelationSlab &Slab) { 226 std::vector<std::string> Result; 227 for (const auto &Rel : Slab) 228 Result.push_back(toYAML(Rel)); 229 return Result; 230 } 231 232 TEST(SerializationTest, BinaryConversions) { 233 auto In = readIndexFile(YAML); 234 EXPECT_TRUE(bool(In)) << In.takeError(); 235 236 // Write to binary format, and parse again. 237 IndexFileOut Out(*In); 238 Out.Format = IndexFileFormat::RIFF; 239 std::string Serialized = llvm::to_string(Out); 240 241 auto In2 = readIndexFile(Serialized); 242 ASSERT_TRUE(bool(In2)) << In2.takeError(); 243 ASSERT_TRUE(In2->Symbols); 244 ASSERT_TRUE(In2->Refs); 245 ASSERT_TRUE(In2->Relations); 246 247 // Assert the YAML serializations match, for nice comparisons and diffs. 248 EXPECT_THAT(yamlFromSymbols(*In2->Symbols), 249 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols))); 250 EXPECT_THAT(yamlFromRefs(*In2->Refs), 251 UnorderedElementsAreArray(yamlFromRefs(*In->Refs))); 252 EXPECT_THAT(yamlFromRelations(*In2->Relations), 253 UnorderedElementsAreArray(yamlFromRelations(*In->Relations))); 254 } 255 256 TEST(SerializationTest, SrcsTest) { 257 auto In = readIndexFile(YAML); 258 EXPECT_TRUE(bool(In)) << In.takeError(); 259 260 std::string TestContent("TestContent"); 261 IncludeGraphNode IGN; 262 IGN.Digest = digest(TestContent); 263 IGN.DirectIncludes = {"inc1", "inc2"}; 264 IGN.URI = "URI"; 265 IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU; 266 IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors; 267 IncludeGraph Sources; 268 Sources[IGN.URI] = IGN; 269 // Write to binary format, and parse again. 270 IndexFileOut Out(*In); 271 Out.Format = IndexFileFormat::RIFF; 272 Out.Sources = &Sources; 273 { 274 std::string Serialized = llvm::to_string(Out); 275 276 auto In = readIndexFile(Serialized); 277 ASSERT_TRUE(bool(In)) << In.takeError(); 278 ASSERT_TRUE(In->Symbols); 279 ASSERT_TRUE(In->Refs); 280 ASSERT_TRUE(In->Sources); 281 ASSERT_TRUE(In->Sources->count(IGN.URI)); 282 // Assert the YAML serializations match, for nice comparisons and diffs. 283 EXPECT_THAT(yamlFromSymbols(*In->Symbols), 284 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols))); 285 EXPECT_THAT(yamlFromRefs(*In->Refs), 286 UnorderedElementsAreArray(yamlFromRefs(*In->Refs))); 287 auto IGNDeserialized = In->Sources->lookup(IGN.URI); 288 EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest); 289 EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes); 290 EXPECT_EQ(IGNDeserialized.URI, IGN.URI); 291 EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags); 292 } 293 } 294 295 TEST(SerializationTest, CmdlTest) { 296 auto In = readIndexFile(YAML); 297 EXPECT_TRUE(bool(In)) << In.takeError(); 298 299 tooling::CompileCommand Cmd; 300 Cmd.Directory = "testdir"; 301 Cmd.CommandLine.push_back("cmd1"); 302 Cmd.CommandLine.push_back("cmd2"); 303 Cmd.Filename = "ignored"; 304 Cmd.Heuristic = "ignored"; 305 Cmd.Output = "ignored"; 306 307 IndexFileOut Out(*In); 308 Out.Format = IndexFileFormat::RIFF; 309 Out.Cmd = &Cmd; 310 { 311 std::string Serialized = llvm::to_string(Out); 312 313 auto In = readIndexFile(Serialized); 314 ASSERT_TRUE(bool(In)) << In.takeError(); 315 ASSERT_TRUE(In->Cmd); 316 317 const tooling::CompileCommand &SerializedCmd = *In->Cmd; 318 EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine); 319 EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory); 320 EXPECT_NE(SerializedCmd.Filename, Cmd.Filename); 321 EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic); 322 EXPECT_NE(SerializedCmd.Output, Cmd.Output); 323 } 324 } 325 326 // rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD. 327 // Sanitizers use a lot of address space, so we can't apply strict limits. 328 #if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD && \ 329 !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_THREAD_SANITIZER_BUILD 330 class ScopedMemoryLimit { 331 struct rlimit OriginalLimit; 332 bool Succeeded = false; 333 334 public: 335 ScopedMemoryLimit(rlim_t Bytes) { 336 if (!getrlimit(RLIMIT_AS, &OriginalLimit)) { 337 struct rlimit NewLimit = OriginalLimit; 338 NewLimit.rlim_cur = Bytes; 339 Succeeded = !setrlimit(RLIMIT_AS, &NewLimit); 340 } 341 if (!Succeeded) 342 log("Failed to set rlimit"); 343 } 344 345 ~ScopedMemoryLimit() { 346 if (Succeeded) 347 setrlimit(RLIMIT_AS, &OriginalLimit); 348 } 349 }; 350 #else 351 class ScopedMemoryLimit { 352 public: 353 ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); } 354 }; 355 #endif 356 357 // Test that our deserialization detects invalid array sizes without allocating. 358 // If this detection fails, the test should allocate a huge array and crash. 359 TEST(SerializationTest, NoCrashOnBadArraySize) { 360 // This test is tricky because we need to construct a subtly invalid file. 361 // First, create a valid serialized file. 362 auto In = readIndexFile(YAML); 363 ASSERT_FALSE(!In) << In.takeError(); 364 IndexFileOut Out(*In); 365 Out.Format = IndexFileFormat::RIFF; 366 std::string Serialized = llvm::to_string(Out); 367 368 // Low-level parse it again and find the `srcs` chunk we're going to corrupt. 369 auto Parsed = riff::readFile(Serialized); 370 ASSERT_FALSE(!Parsed) << Parsed.takeError(); 371 auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) { 372 return C.ID == riff::fourCC("srcs"); 373 }); 374 ASSERT_NE(Srcs, Parsed->Chunks.end()); 375 376 // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one. 377 // The node has: 378 // - 1 byte: flags (1) 379 // - varint(stringID): URI 380 // - 8 byte: file digest 381 // - varint: DirectIncludes.length 382 // - repeated varint(stringID): DirectIncludes 383 // We want to set DirectIncludes.length to a huge number. 384 // The offset isn't trivial to find, so we use the file digest. 385 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C"); 386 unsigned Pos = Srcs->Data.find_first_of(FileDigest); 387 ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest"; 388 Pos += FileDigest.size(); 389 390 // Varints are little-endian base-128 numbers, where the top-bit of each byte 391 // indicates whether there are more. ffffffff0f -> 0xffffffff. 392 std::string CorruptSrcs = 393 (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") + 394 "some_random_garbage") 395 .str(); 396 Srcs->Data = CorruptSrcs; 397 398 // Try to crash rather than hang on large allocation. 399 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB 400 401 std::string CorruptFile = llvm::to_string(*Parsed); 402 auto CorruptParsed = readIndexFile(CorruptFile); 403 ASSERT_TRUE(!CorruptParsed); 404 EXPECT_EQ(llvm::toString(CorruptParsed.takeError()), 405 "malformed or truncated include uri"); 406 } 407 408 // Check we detect invalid string table size size without allocating it first. 409 // If this detection fails, the test should allocate a huge array and crash. 410 TEST(SerializationTest, NoCrashOnBadStringTableSize) { 411 if (!llvm::compression::zlib::isAvailable()) { 412 log("skipping test, no zlib"); 413 return; 414 } 415 416 // First, create a valid serialized file. 417 auto In = readIndexFile(YAML); 418 ASSERT_FALSE(!In) << In.takeError(); 419 IndexFileOut Out(*In); 420 Out.Format = IndexFileFormat::RIFF; 421 std::string Serialized = llvm::to_string(Out); 422 423 // Low-level parse it again, we're going to replace the `stri` chunk. 424 auto Parsed = riff::readFile(Serialized); 425 ASSERT_FALSE(!Parsed) << Parsed.takeError(); 426 auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) { 427 return C.ID == riff::fourCC("stri"); 428 }); 429 ASSERT_NE(Stri, Parsed->Chunks.end()); 430 431 // stri consists of an 8 byte uncompressed-size, and then compressed data. 432 // We'll claim our small amount of data expands to 4GB 433 std::string CorruptStri = 434 (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str(); 435 Stri->Data = CorruptStri; 436 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C"); 437 438 // Try to crash rather than hang on large allocation. 439 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB 440 441 std::string CorruptFile = llvm::to_string(*Parsed); 442 auto CorruptParsed = readIndexFile(CorruptFile); 443 ASSERT_TRUE(!CorruptParsed); 444 EXPECT_THAT(llvm::toString(CorruptParsed.takeError()), 445 testing::HasSubstr("bytes is implausible")); 446 } 447 448 } // namespace 449 } // namespace clangd 450 } // namespace clang 451