1 //===-- Serialization.cpp - Binary serialization of index data ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Serialization.h" 10 #include "Headers.h" 11 #include "RIFF.h" 12 #include "index/MemIndex.h" 13 #include "index/SymbolLocation.h" 14 #include "index/SymbolOrigin.h" 15 #include "index/dex/Dex.h" 16 #include "support/Logger.h" 17 #include "support/Trace.h" 18 #include "clang/Tooling/CompilationDatabase.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/Compiler.h" 21 #include "llvm/Support/Compression.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cstdint> 26 #include <vector> 27 28 namespace clang { 29 namespace clangd { 30 namespace { 31 32 // IO PRIMITIVES 33 // We use little-endian 32 bit ints, sometimes with variable-length encoding. 34 // 35 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte 36 // to encode the number, and the top bit to indicate whether more bytes follow. 37 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop]. 38 // This represents 0x1a | 0x2f<<7 = 6042. 39 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact. 40 41 // Reads binary data from a StringRef, and keeps track of position. 42 class Reader { 43 const char *Begin, *End; 44 bool Err = false; 45 46 public: 47 Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {} 48 // The "error" bit is set by reading past EOF or reading invalid data. 49 // When in an error state, reads may return zero values: callers should check. 50 bool err() const { return Err; } 51 // Did we read all the data, or encounter an error? 52 bool eof() const { return Begin == End || Err; } 53 // All the data we didn't read yet. 54 llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); } 55 56 uint8_t consume8() { 57 if (LLVM_UNLIKELY(Begin == End)) { 58 Err = true; 59 return 0; 60 } 61 return *Begin++; 62 } 63 64 uint32_t consume32() { 65 if (LLVM_UNLIKELY(Begin + 4 > End)) { 66 Err = true; 67 return 0; 68 } 69 auto Ret = llvm::support::endian::read32le(Begin); 70 Begin += 4; 71 return Ret; 72 } 73 74 llvm::StringRef consume(int N) { 75 if (LLVM_UNLIKELY(Begin + N > End)) { 76 Err = true; 77 return llvm::StringRef(); 78 } 79 llvm::StringRef Ret(Begin, N); 80 Begin += N; 81 return Ret; 82 } 83 84 uint32_t consumeVar() { 85 constexpr static uint8_t More = 1 << 7; 86 87 // Use a 32 bit unsigned here to prevent promotion to signed int (unless int 88 // is wider than 32 bits). 89 uint32_t B = consume8(); 90 if (LLVM_LIKELY(!(B & More))) 91 return B; 92 uint32_t Val = B & ~More; 93 for (int Shift = 7; B & More && Shift < 32; Shift += 7) { 94 B = consume8(); 95 // 5th byte of a varint can only have lowest 4 bits set. 96 assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding"); 97 Val |= (B & ~More) << Shift; 98 } 99 return Val; 100 } 101 102 llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) { 103 auto StringIndex = consumeVar(); 104 if (LLVM_UNLIKELY(StringIndex >= Strings.size())) { 105 Err = true; 106 return llvm::StringRef(); 107 } 108 return Strings[StringIndex]; 109 } 110 111 SymbolID consumeID() { 112 llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated. 113 return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw); 114 } 115 116 // Read a varint (as consumeVar) and resize the container accordingly. 117 // If the size is invalid, return false and mark an error. 118 // (The caller should abort in this case). 119 template <typename T> [[nodiscard]] bool consumeSize(T &Container) { 120 auto Size = consumeVar(); 121 // Conservatively assume each element is at least one byte. 122 if (Size > (size_t)(End - Begin)) { 123 Err = true; 124 return false; 125 } 126 Container.resize(Size); 127 return true; 128 } 129 }; 130 131 void write32(uint32_t I, llvm::raw_ostream &OS) { 132 char Buf[4]; 133 llvm::support::endian::write32le(Buf, I); 134 OS.write(Buf, sizeof(Buf)); 135 } 136 137 void writeVar(uint32_t I, llvm::raw_ostream &OS) { 138 constexpr static uint8_t More = 1 << 7; 139 if (LLVM_LIKELY(I < 1 << 7)) { 140 OS.write(I); 141 return; 142 } 143 for (;;) { 144 OS.write(I | More); 145 I >>= 7; 146 if (I < 1 << 7) { 147 OS.write(I); 148 return; 149 } 150 } 151 } 152 153 // STRING TABLE ENCODING 154 // Index data has many string fields, and many strings are identical. 155 // We store each string once, and refer to them by index. 156 // 157 // The string table's format is: 158 // - UncompressedSize : uint32 (or 0 for no compression) 159 // - CompressedData : byte[CompressedSize] 160 // 161 // CompressedData is a zlib-compressed byte[UncompressedSize]. 162 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0". 163 // These are sorted to improve compression. 164 165 // Maps each string to a canonical representation. 166 // Strings remain owned externally (e.g. by SymbolSlab). 167 class StringTableOut { 168 llvm::DenseSet<llvm::StringRef> Unique; 169 std::vector<llvm::StringRef> Sorted; 170 // Since strings are interned, look up can be by pointer. 171 llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index; 172 173 public: 174 StringTableOut() { 175 // Ensure there's at least one string in the table. 176 // Table size zero is reserved to indicate no compression. 177 Unique.insert(""); 178 } 179 // Add a string to the table. Overwrites S if an identical string exists. 180 void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; }; 181 // Finalize the table and write it to OS. No more strings may be added. 182 void finalize(llvm::raw_ostream &OS) { 183 Sorted = {Unique.begin(), Unique.end()}; 184 llvm::sort(Sorted); 185 for (unsigned I = 0; I < Sorted.size(); ++I) 186 Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I); 187 188 std::string RawTable; 189 for (llvm::StringRef S : Sorted) { 190 RawTable.append(std::string(S)); 191 RawTable.push_back(0); 192 } 193 if (llvm::compression::zlib::isAvailable()) { 194 llvm::SmallVector<uint8_t, 0> Compressed; 195 llvm::compression::zlib::compress(llvm::arrayRefFromStringRef(RawTable), 196 Compressed); 197 write32(RawTable.size(), OS); 198 OS << llvm::toStringRef(Compressed); 199 } else { 200 write32(0, OS); // No compression. 201 OS << RawTable; 202 } 203 } 204 // Get the ID of an string, which must be interned. Table must be finalized. 205 unsigned index(llvm::StringRef S) const { 206 assert(!Sorted.empty() && "table not finalized"); 207 assert(Index.count({S.data(), S.size()}) && "string not interned"); 208 return Index.find({S.data(), S.size()})->second; 209 } 210 }; 211 212 struct StringTableIn { 213 llvm::BumpPtrAllocator Arena; 214 std::vector<llvm::StringRef> Strings; 215 }; 216 217 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) { 218 Reader R(Data); 219 size_t UncompressedSize = R.consume32(); 220 if (R.err()) 221 return error("Truncated string table"); 222 223 llvm::StringRef Uncompressed; 224 llvm::SmallVector<uint8_t, 0> UncompressedStorage; 225 if (UncompressedSize == 0) // No compression 226 Uncompressed = R.rest(); 227 else if (llvm::compression::zlib::isAvailable()) { 228 // Don't allocate a massive buffer if UncompressedSize was corrupted 229 // This is effective for sharded index, but not big monolithic ones, as 230 // once compressed size reaches 4MB nothing can be ruled out. 231 // Theoretical max ratio from https://zlib.net/zlib_tech.html 232 constexpr int MaxCompressionRatio = 1032; 233 if (UncompressedSize / MaxCompressionRatio > R.rest().size()) 234 return error("Bad stri table: uncompress {0} -> {1} bytes is implausible", 235 R.rest().size(), UncompressedSize); 236 237 if (llvm::Error E = llvm::compression::zlib::decompress( 238 llvm::arrayRefFromStringRef(R.rest()), UncompressedStorage, 239 UncompressedSize)) 240 return std::move(E); 241 Uncompressed = toStringRef(UncompressedStorage); 242 } else 243 return error("Compressed string table, but zlib is unavailable"); 244 245 StringTableIn Table; 246 llvm::StringSaver Saver(Table.Arena); 247 R = Reader(Uncompressed); 248 for (Reader R(Uncompressed); !R.eof();) { 249 auto Len = R.rest().find(0); 250 if (Len == llvm::StringRef::npos) 251 return error("Bad string table: not null terminated"); 252 Table.Strings.push_back(Saver.save(R.consume(Len))); 253 R.consume8(); 254 } 255 if (R.err()) 256 return error("Truncated string table"); 257 return std::move(Table); 258 } 259 260 // SYMBOL ENCODING 261 // Each field of clangd::Symbol is encoded in turn (see implementation). 262 // - StringRef fields encode as varint (index into the string table) 263 // - enums encode as the underlying type 264 // - most numbers encode as varint 265 266 void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings, 267 llvm::raw_ostream &OS) { 268 writeVar(Strings.index(Loc.FileURI), OS); 269 for (const auto &Endpoint : {Loc.Start, Loc.End}) { 270 writeVar(Endpoint.line(), OS); 271 writeVar(Endpoint.column(), OS); 272 } 273 } 274 275 SymbolLocation readLocation(Reader &Data, 276 llvm::ArrayRef<llvm::StringRef> Strings) { 277 SymbolLocation Loc; 278 Loc.FileURI = Data.consumeString(Strings).data(); 279 for (auto *Endpoint : {&Loc.Start, &Loc.End}) { 280 Endpoint->setLine(Data.consumeVar()); 281 Endpoint->setColumn(Data.consumeVar()); 282 } 283 return Loc; 284 } 285 286 IncludeGraphNode readIncludeGraphNode(Reader &Data, 287 llvm::ArrayRef<llvm::StringRef> Strings) { 288 IncludeGraphNode IGN; 289 IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8()); 290 IGN.URI = Data.consumeString(Strings); 291 llvm::StringRef Digest = Data.consume(IGN.Digest.size()); 292 std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin()); 293 if (!Data.consumeSize(IGN.DirectIncludes)) 294 return IGN; 295 for (llvm::StringRef &Include : IGN.DirectIncludes) 296 Include = Data.consumeString(Strings); 297 return IGN; 298 } 299 300 void writeIncludeGraphNode(const IncludeGraphNode &IGN, 301 const StringTableOut &Strings, 302 llvm::raw_ostream &OS) { 303 OS.write(static_cast<uint8_t>(IGN.Flags)); 304 writeVar(Strings.index(IGN.URI), OS); 305 llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()), 306 IGN.Digest.size()); 307 OS << Hash; 308 writeVar(IGN.DirectIncludes.size(), OS); 309 for (llvm::StringRef Include : IGN.DirectIncludes) 310 writeVar(Strings.index(Include), OS); 311 } 312 313 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings, 314 llvm::raw_ostream &OS) { 315 OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists, 316 // symbol IDs should probably be in a string table. 317 OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind)); 318 OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang)); 319 writeVar(Strings.index(Sym.Name), OS); 320 writeVar(Strings.index(Sym.Scope), OS); 321 writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS); 322 writeLocation(Sym.Definition, Strings, OS); 323 writeLocation(Sym.CanonicalDeclaration, Strings, OS); 324 writeVar(Sym.References, OS); 325 OS.write(static_cast<uint8_t>(Sym.Flags)); 326 writeVar(Strings.index(Sym.Signature), OS); 327 writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS); 328 writeVar(Strings.index(Sym.Documentation), OS); 329 writeVar(Strings.index(Sym.ReturnType), OS); 330 writeVar(Strings.index(Sym.Type), OS); 331 332 auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) { 333 writeVar(Strings.index(Include.IncludeHeader), OS); 334 writeVar((Include.References << 2) | Include.SupportedDirectives, OS); 335 }; 336 writeVar(Sym.IncludeHeaders.size(), OS); 337 for (const auto &Include : Sym.IncludeHeaders) 338 WriteInclude(Include); 339 } 340 341 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings, 342 SymbolOrigin Origin) { 343 Symbol Sym; 344 Sym.ID = Data.consumeID(); 345 Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8()); 346 Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8()); 347 Sym.Name = Data.consumeString(Strings); 348 Sym.Scope = Data.consumeString(Strings); 349 Sym.TemplateSpecializationArgs = Data.consumeString(Strings); 350 Sym.Definition = readLocation(Data, Strings); 351 Sym.CanonicalDeclaration = readLocation(Data, Strings); 352 Sym.References = Data.consumeVar(); 353 Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8()); 354 Sym.Origin = Origin; 355 Sym.Signature = Data.consumeString(Strings); 356 Sym.CompletionSnippetSuffix = Data.consumeString(Strings); 357 Sym.Documentation = Data.consumeString(Strings); 358 Sym.ReturnType = Data.consumeString(Strings); 359 Sym.Type = Data.consumeString(Strings); 360 if (!Data.consumeSize(Sym.IncludeHeaders)) 361 return Sym; 362 for (auto &I : Sym.IncludeHeaders) { 363 I.IncludeHeader = Data.consumeString(Strings); 364 uint32_t RefsWithDirectives = Data.consumeVar(); 365 I.References = RefsWithDirectives >> 2; 366 I.SupportedDirectives = RefsWithDirectives & 0x3; 367 } 368 return Sym; 369 } 370 371 // REFS ENCODING 372 // A refs section has data grouped by Symbol. Each symbol has: 373 // - SymbolID: 8 bytes 374 // - NumRefs: varint 375 // - Ref[NumRefs] 376 // Fields of Ref are encoded in turn, see implementation. 377 378 void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs, 379 const StringTableOut &Strings, llvm::raw_ostream &OS) { 380 OS << ID.raw(); 381 writeVar(Refs.size(), OS); 382 for (const auto &Ref : Refs) { 383 OS.write(static_cast<unsigned char>(Ref.Kind)); 384 writeLocation(Ref.Location, Strings, OS); 385 OS << Ref.Container.raw(); 386 } 387 } 388 389 std::pair<SymbolID, std::vector<Ref>> 390 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) { 391 std::pair<SymbolID, std::vector<Ref>> Result; 392 Result.first = Data.consumeID(); 393 if (!Data.consumeSize(Result.second)) 394 return Result; 395 for (auto &Ref : Result.second) { 396 Ref.Kind = static_cast<RefKind>(Data.consume8()); 397 Ref.Location = readLocation(Data, Strings); 398 Ref.Container = Data.consumeID(); 399 } 400 return Result; 401 } 402 403 // RELATIONS ENCODING 404 // A relations section is a flat list of relations. Each relation has: 405 // - SymbolID (subject): 8 bytes 406 // - relation kind (predicate): 1 byte 407 // - SymbolID (object): 8 bytes 408 // In the future, we might prefer a packed representation if the need arises. 409 410 void writeRelation(const Relation &R, llvm::raw_ostream &OS) { 411 OS << R.Subject.raw(); 412 OS.write(static_cast<uint8_t>(R.Predicate)); 413 OS << R.Object.raw(); 414 } 415 416 Relation readRelation(Reader &Data) { 417 SymbolID Subject = Data.consumeID(); 418 RelationKind Predicate = static_cast<RelationKind>(Data.consume8()); 419 SymbolID Object = Data.consumeID(); 420 return {Subject, Predicate, Object}; 421 } 422 423 struct InternedCompileCommand { 424 llvm::StringRef Directory; 425 std::vector<llvm::StringRef> CommandLine; 426 }; 427 428 void writeCompileCommand(const InternedCompileCommand &Cmd, 429 const StringTableOut &Strings, 430 llvm::raw_ostream &CmdOS) { 431 writeVar(Strings.index(Cmd.Directory), CmdOS); 432 writeVar(Cmd.CommandLine.size(), CmdOS); 433 for (llvm::StringRef C : Cmd.CommandLine) 434 writeVar(Strings.index(C), CmdOS); 435 } 436 437 InternedCompileCommand 438 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) { 439 InternedCompileCommand Cmd; 440 Cmd.Directory = CmdReader.consumeString(Strings); 441 if (!CmdReader.consumeSize(Cmd.CommandLine)) 442 return Cmd; 443 for (llvm::StringRef &C : Cmd.CommandLine) 444 C = CmdReader.consumeString(Strings); 445 return Cmd; 446 } 447 448 // FILE ENCODING 449 // A file is a RIFF chunk with type 'CdIx'. 450 // It contains the sections: 451 // - meta: version number 452 // - srcs: information related to include graph 453 // - stri: string table 454 // - symb: symbols 455 // - refs: references to symbols 456 457 // The current versioning scheme is simple - non-current versions are rejected. 458 // If you make a breaking change, bump this version number to invalidate stored 459 // data. Later we may want to support some backward compatibility. 460 constexpr static uint32_t Version = 20; 461 462 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, 463 SymbolOrigin Origin) { 464 auto RIFF = riff::readFile(Data); 465 if (!RIFF) 466 return RIFF.takeError(); 467 if (RIFF->Type != riff::fourCC("CdIx")) 468 return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type)); 469 llvm::StringMap<llvm::StringRef> Chunks; 470 for (const auto &Chunk : RIFF->Chunks) 471 Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()), 472 Chunk.Data); 473 474 if (!Chunks.count("meta")) 475 return error("missing meta chunk"); 476 Reader Meta(Chunks.lookup("meta")); 477 auto SeenVersion = Meta.consume32(); 478 if (SeenVersion != Version) 479 return error("wrong version: want {0}, got {1}", Version, SeenVersion); 480 481 // meta chunk is checked above, as we prefer the "version mismatch" error. 482 for (llvm::StringRef RequiredChunk : {"stri"}) 483 if (!Chunks.count(RequiredChunk)) 484 return error("missing required chunk {0}", RequiredChunk); 485 486 auto Strings = readStringTable(Chunks.lookup("stri")); 487 if (!Strings) 488 return Strings.takeError(); 489 490 IndexFileIn Result; 491 if (Chunks.count("srcs")) { 492 Reader SrcsReader(Chunks.lookup("srcs")); 493 Result.Sources.emplace(); 494 while (!SrcsReader.eof()) { 495 auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings); 496 auto Entry = Result.Sources->try_emplace(IGN.URI).first; 497 Entry->getValue() = std::move(IGN); 498 // We change all the strings inside the structure to point at the keys in 499 // the map, since it is the only copy of the string that's going to live. 500 Entry->getValue().URI = Entry->getKey(); 501 for (auto &Include : Entry->getValue().DirectIncludes) 502 Include = Result.Sources->try_emplace(Include).first->getKey(); 503 } 504 if (SrcsReader.err()) 505 return error("malformed or truncated include uri"); 506 } 507 508 if (Chunks.count("symb")) { 509 Reader SymbolReader(Chunks.lookup("symb")); 510 SymbolSlab::Builder Symbols; 511 while (!SymbolReader.eof()) 512 Symbols.insert(readSymbol(SymbolReader, Strings->Strings, Origin)); 513 if (SymbolReader.err()) 514 return error("malformed or truncated symbol"); 515 Result.Symbols = std::move(Symbols).build(); 516 } 517 if (Chunks.count("refs")) { 518 Reader RefsReader(Chunks.lookup("refs")); 519 RefSlab::Builder Refs; 520 while (!RefsReader.eof()) { 521 auto RefsBundle = readRefs(RefsReader, Strings->Strings); 522 for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert? 523 Refs.insert(RefsBundle.first, Ref); 524 } 525 if (RefsReader.err()) 526 return error("malformed or truncated refs"); 527 Result.Refs = std::move(Refs).build(); 528 } 529 if (Chunks.count("rela")) { 530 Reader RelationsReader(Chunks.lookup("rela")); 531 RelationSlab::Builder Relations; 532 while (!RelationsReader.eof()) 533 Relations.insert(readRelation(RelationsReader)); 534 if (RelationsReader.err()) 535 return error("malformed or truncated relations"); 536 Result.Relations = std::move(Relations).build(); 537 } 538 if (Chunks.count("cmdl")) { 539 Reader CmdReader(Chunks.lookup("cmdl")); 540 InternedCompileCommand Cmd = 541 readCompileCommand(CmdReader, Strings->Strings); 542 if (CmdReader.err()) 543 return error("malformed or truncated commandline section"); 544 Result.Cmd.emplace(); 545 Result.Cmd->Directory = std::string(Cmd.Directory); 546 Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size()); 547 for (llvm::StringRef C : Cmd.CommandLine) 548 Result.Cmd->CommandLine.emplace_back(C); 549 } 550 return std::move(Result); 551 } 552 553 template <class Callback> 554 void visitStrings(IncludeGraphNode &IGN, const Callback &CB) { 555 CB(IGN.URI); 556 for (llvm::StringRef &Include : IGN.DirectIncludes) 557 CB(Include); 558 } 559 560 void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) { 561 assert(Data.Symbols && "An index file without symbols makes no sense!"); 562 riff::File RIFF; 563 RIFF.Type = riff::fourCC("CdIx"); 564 565 llvm::SmallString<4> Meta; 566 { 567 llvm::raw_svector_ostream MetaOS(Meta); 568 write32(Version, MetaOS); 569 } 570 RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); 571 572 StringTableOut Strings; 573 std::vector<Symbol> Symbols; 574 for (const auto &Sym : *Data.Symbols) { 575 Symbols.emplace_back(Sym); 576 visitStrings(Symbols.back(), 577 [&](llvm::StringRef &S) { Strings.intern(S); }); 578 } 579 std::vector<IncludeGraphNode> Sources; 580 if (Data.Sources) 581 for (const auto &Source : *Data.Sources) { 582 Sources.push_back(Source.getValue()); 583 visitStrings(Sources.back(), 584 [&](llvm::StringRef &S) { Strings.intern(S); }); 585 } 586 587 std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs; 588 if (Data.Refs) { 589 for (const auto &Sym : *Data.Refs) { 590 Refs.emplace_back(Sym); 591 for (auto &Ref : Refs.back().second) { 592 llvm::StringRef File = Ref.Location.FileURI; 593 Strings.intern(File); 594 Ref.Location.FileURI = File.data(); 595 } 596 } 597 } 598 599 std::vector<Relation> Relations; 600 if (Data.Relations) { 601 for (const auto &Relation : *Data.Relations) { 602 Relations.emplace_back(Relation); 603 // No strings to be interned in relations. 604 } 605 } 606 607 InternedCompileCommand InternedCmd; 608 if (Data.Cmd) { 609 InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size()); 610 InternedCmd.Directory = Data.Cmd->Directory; 611 Strings.intern(InternedCmd.Directory); 612 for (llvm::StringRef C : Data.Cmd->CommandLine) { 613 InternedCmd.CommandLine.emplace_back(C); 614 Strings.intern(InternedCmd.CommandLine.back()); 615 } 616 } 617 618 std::string StringSection; 619 { 620 llvm::raw_string_ostream StringOS(StringSection); 621 Strings.finalize(StringOS); 622 } 623 RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection}); 624 625 std::string SymbolSection; 626 { 627 llvm::raw_string_ostream SymbolOS(SymbolSection); 628 for (const auto &Sym : Symbols) 629 writeSymbol(Sym, Strings, SymbolOS); 630 } 631 RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection}); 632 633 std::string RefsSection; 634 if (Data.Refs) { 635 { 636 llvm::raw_string_ostream RefsOS(RefsSection); 637 for (const auto &Sym : Refs) 638 writeRefs(Sym.first, Sym.second, Strings, RefsOS); 639 } 640 RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); 641 } 642 643 std::string RelationSection; 644 if (Data.Relations) { 645 { 646 llvm::raw_string_ostream RelationOS{RelationSection}; 647 for (const auto &Relation : Relations) 648 writeRelation(Relation, RelationOS); 649 } 650 RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection}); 651 } 652 653 std::string SrcsSection; 654 { 655 { 656 llvm::raw_string_ostream SrcsOS(SrcsSection); 657 for (const auto &SF : Sources) 658 writeIncludeGraphNode(SF, Strings, SrcsOS); 659 } 660 RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection}); 661 } 662 663 std::string CmdlSection; 664 if (Data.Cmd) { 665 { 666 llvm::raw_string_ostream CmdOS(CmdlSection); 667 writeCompileCommand(InternedCmd, Strings, CmdOS); 668 } 669 RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection}); 670 } 671 672 OS << RIFF; 673 } 674 675 } // namespace 676 677 // Defined in YAMLSerialization.cpp. 678 void writeYAML(const IndexFileOut &, llvm::raw_ostream &); 679 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef, SymbolOrigin Origin); 680 681 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) { 682 switch (O.Format) { 683 case IndexFileFormat::RIFF: 684 writeRIFF(O, OS); 685 break; 686 case IndexFileFormat::YAML: 687 writeYAML(O, OS); 688 break; 689 } 690 return OS; 691 } 692 693 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data, 694 SymbolOrigin Origin) { 695 if (Data.starts_with("RIFF")) { 696 return readRIFF(Data, Origin); 697 } 698 if (auto YAMLContents = readYAML(Data, Origin)) { 699 return std::move(*YAMLContents); 700 } else { 701 return error("Not a RIFF file and failed to parse as YAML: {0}", 702 YAMLContents.takeError()); 703 } 704 } 705 706 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename, 707 SymbolOrigin Origin, bool UseDex, 708 bool SupportContainedRefs) { 709 trace::Span OverallTracer("LoadIndex"); 710 auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); 711 if (!Buffer) { 712 elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message()); 713 return nullptr; 714 } 715 716 SymbolSlab Symbols; 717 RefSlab Refs; 718 RelationSlab Relations; 719 { 720 trace::Span Tracer("ParseIndex"); 721 if (auto I = readIndexFile(Buffer->get()->getBuffer(), Origin)) { 722 if (I->Symbols) 723 Symbols = std::move(*I->Symbols); 724 if (I->Refs) 725 Refs = std::move(*I->Refs); 726 if (I->Relations) 727 Relations = std::move(*I->Relations); 728 } else { 729 elog("Bad index file: {0}", I.takeError()); 730 return nullptr; 731 } 732 } 733 734 size_t NumSym = Symbols.size(); 735 size_t NumRefs = Refs.numRefs(); 736 size_t NumRelations = Relations.size(); 737 738 trace::Span Tracer("BuildIndex"); 739 auto Index = UseDex 740 ? dex::Dex::build(std::move(Symbols), std::move(Refs), 741 std::move(Relations), SupportContainedRefs) 742 : MemIndex::build(std::move(Symbols), std::move(Refs), 743 std::move(Relations)); 744 vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n" 745 " - number of symbols: {3}\n" 746 " - number of refs: {4}\n" 747 " - number of relations: {5}", 748 UseDex ? "Dex" : "MemIndex", SymbolFilename, 749 Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations); 750 return Index; 751 } 752 753 } // namespace clangd 754 } // namespace clang 755