1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "FileIndex.h" 10 #include "CollectMacros.h" 11 #include "ParsedAST.h" 12 #include "clang-include-cleaner/Record.h" 13 #include "index/Index.h" 14 #include "index/MemIndex.h" 15 #include "index/Merge.h" 16 #include "index/Ref.h" 17 #include "index/Relation.h" 18 #include "index/Serialization.h" 19 #include "index/Symbol.h" 20 #include "index/SymbolCollector.h" 21 #include "index/SymbolID.h" 22 #include "index/SymbolOrigin.h" 23 #include "index/dex/Dex.h" 24 #include "support/Logger.h" 25 #include "support/MemoryTree.h" 26 #include "support/Path.h" 27 #include "clang/AST/ASTContext.h" 28 #include "clang/Index/IndexingAction.h" 29 #include "clang/Index/IndexingOptions.h" 30 #include "clang/Lex/Preprocessor.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/StringMap.h" 34 #include "llvm/ADT/StringRef.h" 35 #include <algorithm> 36 #include <memory> 37 #include <optional> 38 #include <tuple> 39 #include <utility> 40 #include <vector> 41 42 namespace clang { 43 namespace clangd { 44 namespace { 45 46 SlabTuple indexSymbols(ASTContext &AST, Preprocessor &PP, 47 llvm::ArrayRef<Decl *> DeclsToIndex, 48 const MainFileMacros *MacroRefsToIndex, 49 const include_cleaner::PragmaIncludes &PI, 50 bool IsIndexMainAST, llvm::StringRef Version, 51 bool CollectMainFileRefs) { 52 SymbolCollector::Options CollectorOpts; 53 CollectorOpts.CollectIncludePath = true; 54 CollectorOpts.PragmaIncludes = &PI; 55 CollectorOpts.CountReferences = false; 56 CollectorOpts.Origin = 57 IsIndexMainAST ? SymbolOrigin::Open : SymbolOrigin::Preamble; 58 CollectorOpts.CollectMainFileRefs = CollectMainFileRefs; 59 // We want stdlib implementation details in the index only if we've opened the 60 // file in question. This does means xrefs won't work, though. 61 CollectorOpts.CollectReserved = IsIndexMainAST; 62 63 index::IndexingOptions IndexOpts; 64 // We only need declarations, because we don't count references. 65 IndexOpts.SystemSymbolFilter = 66 index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly; 67 // We index function-local classes and its member functions only. 68 IndexOpts.IndexFunctionLocals = true; 69 if (IsIndexMainAST) { 70 // We only collect refs when indexing main AST. 71 CollectorOpts.RefFilter = RefKind::All; 72 // Comments for main file can always be obtained from sema, do not store 73 // them in the index. 74 CollectorOpts.StoreAllDocumentation = false; 75 } else { 76 IndexOpts.IndexMacrosInPreprocessor = true; 77 CollectorOpts.CollectMacro = true; 78 CollectorOpts.StoreAllDocumentation = true; 79 } 80 81 SymbolCollector Collector(std::move(CollectorOpts)); 82 Collector.setPreprocessor(PP); 83 index::indexTopLevelDecls(AST, PP, DeclsToIndex, Collector, IndexOpts); 84 if (MacroRefsToIndex) 85 Collector.handleMacros(*MacroRefsToIndex); 86 87 const auto &SM = AST.getSourceManager(); 88 const auto MainFileEntry = SM.getFileEntryRefForID(SM.getMainFileID()); 89 std::string FileName = 90 std::string(MainFileEntry ? MainFileEntry->getName() : ""); 91 92 auto Syms = Collector.takeSymbols(); 93 auto Refs = Collector.takeRefs(); 94 auto Relations = Collector.takeRelations(); 95 96 vlog("indexed {0} AST for {1} version {2}:\n" 97 " symbol slab: {3} symbols, {4} bytes\n" 98 " ref slab: {5} symbols, {6} refs, {7} bytes\n" 99 " relations slab: {8} relations, {9} bytes", 100 IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(), 101 Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(), 102 Relations.size(), Relations.bytes()); 103 return std::make_tuple(std::move(Syms), std::move(Refs), 104 std::move(Relations)); 105 } 106 107 // We keep only the node "U" and its edges. Any node other than "U" will be 108 // empty in the resultant graph. 109 IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) { 110 IncludeGraph IG; 111 112 auto Entry = IG.try_emplace(URI).first; 113 auto &Node = Entry->getValue(); 114 Node = FullGraph.lookup(Entry->getKey()); 115 Node.URI = Entry->getKey(); 116 117 // URIs inside nodes must point into the keys of the same IncludeGraph. 118 for (auto &Include : Node.DirectIncludes) { 119 auto I = IG.try_emplace(Include).first; 120 I->getValue().URI = I->getKey(); 121 Include = I->getKey(); 122 } 123 return IG; 124 } 125 } // namespace 126 127 FileShardedIndex::FileShardedIndex(IndexFileIn Input) 128 : Index(std::move(Input)) { 129 // Used to build RelationSlabs. 130 llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile; 131 132 // Attribute each Symbol to both their declaration and definition locations. 133 if (Index.Symbols) { 134 for (const auto &S : *Index.Symbols) { 135 auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI); 136 It.first->getValue().Symbols.insert(&S); 137 SymbolIDToFile[S.ID] = &It.first->getValue(); 138 // Only bother if definition file is different than declaration file. 139 if (S.Definition && 140 S.Definition.FileURI != S.CanonicalDeclaration.FileURI) { 141 auto It = Shards.try_emplace(S.Definition.FileURI); 142 It.first->getValue().Symbols.insert(&S); 143 } 144 } 145 } 146 // Attribute references into each file they occured in. 147 if (Index.Refs) { 148 for (const auto &SymRefs : *Index.Refs) { 149 for (const auto &R : SymRefs.second) { 150 const auto It = Shards.try_emplace(R.Location.FileURI); 151 It.first->getValue().Refs.insert(&R); 152 RefToSymID[&R] = SymRefs.first; 153 } 154 } 155 } 156 // The Subject and/or Object shards might be part of multiple TUs. In 157 // such cases there will be a race and the last TU to write the shard 158 // will win and all the other relations will be lost. To avoid this, 159 // we store relations in both shards. A race might still happen if the 160 // same translation unit produces different relations under different 161 // configurations, but that's something clangd doesn't handle in general. 162 if (Index.Relations) { 163 for (const auto &R : *Index.Relations) { 164 // FIXME: RelationSlab shouldn't contain dangling relations. 165 FileShard *SubjectFile = SymbolIDToFile.lookup(R.Subject); 166 FileShard *ObjectFile = SymbolIDToFile.lookup(R.Object); 167 if (SubjectFile) 168 SubjectFile->Relations.insert(&R); 169 if (ObjectFile && ObjectFile != SubjectFile) 170 ObjectFile->Relations.insert(&R); 171 } 172 } 173 // Store only the direct includes of a file in a shard. 174 if (Index.Sources) { 175 const auto &FullGraph = *Index.Sources; 176 for (const auto &It : FullGraph) { 177 auto ShardIt = Shards.try_emplace(It.first()); 178 ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph); 179 } 180 } 181 } 182 std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const { 183 // It should be enough to construct a vector with {Shards.keys().begin(), 184 // Shards.keys().end()} but MSVC fails to compile that. 185 std::vector<PathRef> Result; 186 Result.reserve(Shards.size()); 187 for (auto Key : Shards.keys()) 188 Result.push_back(Key); 189 return Result; 190 } 191 192 std::optional<IndexFileIn> 193 FileShardedIndex::getShard(llvm::StringRef Uri) const { 194 auto It = Shards.find(Uri); 195 if (It == Shards.end()) 196 return std::nullopt; 197 198 IndexFileIn IF; 199 IF.Sources = It->getValue().IG; 200 IF.Cmd = Index.Cmd; 201 202 SymbolSlab::Builder SymB; 203 for (const auto *S : It->getValue().Symbols) 204 SymB.insert(*S); 205 IF.Symbols = std::move(SymB).build(); 206 207 RefSlab::Builder RefB; 208 for (const auto *Ref : It->getValue().Refs) { 209 auto SID = RefToSymID.lookup(Ref); 210 RefB.insert(SID, *Ref); 211 } 212 IF.Refs = std::move(RefB).build(); 213 214 RelationSlab::Builder RelB; 215 for (const auto *Rel : It->getValue().Relations) { 216 RelB.insert(*Rel); 217 } 218 IF.Relations = std::move(RelB).build(); 219 // Explicit move here is needed by some compilers. 220 return std::move(IF); 221 } 222 223 SlabTuple indexMainDecls(ParsedAST &AST) { 224 return indexSymbols( 225 AST.getASTContext(), AST.getPreprocessor(), AST.getLocalTopLevelDecls(), 226 &AST.getMacros(), AST.getPragmaIncludes(), 227 /*IsIndexMainAST=*/true, AST.version(), /*CollectMainFileRefs=*/true); 228 } 229 230 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, 231 Preprocessor &PP, 232 const include_cleaner::PragmaIncludes &PI) { 233 std::vector<Decl *> DeclsToIndex( 234 AST.getTranslationUnitDecl()->decls().begin(), 235 AST.getTranslationUnitDecl()->decls().end()); 236 return indexSymbols(AST, PP, DeclsToIndex, 237 /*MainFileMacros=*/nullptr, PI, 238 /*IsIndexMainAST=*/false, Version, 239 /*CollectMainFileRefs=*/false); 240 } 241 242 FileSymbols::FileSymbols(IndexContents IdxContents, bool SupportContainedRefs) 243 : IdxContents(IdxContents), SupportContainedRefs(SupportContainedRefs) {} 244 245 void FileSymbols::update(llvm::StringRef Key, 246 std::unique_ptr<SymbolSlab> Symbols, 247 std::unique_ptr<RefSlab> Refs, 248 std::unique_ptr<RelationSlab> Relations, 249 bool CountReferences) { 250 std::lock_guard<std::mutex> Lock(Mutex); 251 ++Version; 252 if (!Symbols) 253 SymbolsSnapshot.erase(Key); 254 else 255 SymbolsSnapshot[Key] = std::move(Symbols); 256 if (!Refs) { 257 RefsSnapshot.erase(Key); 258 } else { 259 RefSlabAndCountReferences Item; 260 Item.CountReferences = CountReferences; 261 Item.Slab = std::move(Refs); 262 RefsSnapshot[Key] = std::move(Item); 263 } 264 if (!Relations) 265 RelationsSnapshot.erase(Key); 266 else 267 RelationsSnapshot[Key] = std::move(Relations); 268 } 269 270 std::unique_ptr<SymbolIndex> 271 FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle, 272 size_t *Version) { 273 std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs; 274 std::vector<std::shared_ptr<RefSlab>> RefSlabs; 275 std::vector<std::shared_ptr<RelationSlab>> RelationSlabs; 276 llvm::StringSet<> Files; 277 std::vector<RefSlab *> MainFileRefs; 278 { 279 std::lock_guard<std::mutex> Lock(Mutex); 280 for (const auto &FileAndSymbols : SymbolsSnapshot) { 281 SymbolSlabs.push_back(FileAndSymbols.second); 282 Files.insert(FileAndSymbols.first()); 283 } 284 for (const auto &FileAndRefs : RefsSnapshot) { 285 RefSlabs.push_back(FileAndRefs.second.Slab); 286 Files.insert(FileAndRefs.first()); 287 if (FileAndRefs.second.CountReferences) 288 MainFileRefs.push_back(RefSlabs.back().get()); 289 } 290 for (const auto &FileAndRelations : RelationsSnapshot) { 291 Files.insert(FileAndRelations.first()); 292 RelationSlabs.push_back(FileAndRelations.second); 293 } 294 295 if (Version) 296 *Version = this->Version; 297 } 298 std::vector<const Symbol *> AllSymbols; 299 std::vector<Symbol> SymsStorage; 300 switch (DuplicateHandle) { 301 case DuplicateHandling::Merge: { 302 llvm::DenseMap<SymbolID, Symbol> Merged; 303 for (const auto &Slab : SymbolSlabs) { 304 for (const auto &Sym : *Slab) { 305 assert(Sym.References == 0 && 306 "Symbol with non-zero references sent to FileSymbols"); 307 auto I = Merged.try_emplace(Sym.ID, Sym); 308 if (!I.second) 309 I.first->second = mergeSymbol(I.first->second, Sym); 310 } 311 } 312 for (const RefSlab *Refs : MainFileRefs) 313 for (const auto &Sym : *Refs) { 314 auto It = Merged.find(Sym.first); 315 // This might happen while background-index is still running. 316 if (It == Merged.end()) 317 continue; 318 It->getSecond().References += Sym.second.size(); 319 } 320 SymsStorage.reserve(Merged.size()); 321 for (auto &Sym : Merged) { 322 SymsStorage.push_back(std::move(Sym.second)); 323 AllSymbols.push_back(&SymsStorage.back()); 324 } 325 break; 326 } 327 case DuplicateHandling::PickOne: { 328 llvm::DenseSet<SymbolID> AddedSymbols; 329 for (const auto &Slab : SymbolSlabs) 330 for (const auto &Sym : *Slab) { 331 assert(Sym.References == 0 && 332 "Symbol with non-zero references sent to FileSymbols"); 333 if (AddedSymbols.insert(Sym.ID).second) 334 AllSymbols.push_back(&Sym); 335 } 336 break; 337 } 338 } 339 340 std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID. 341 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs; 342 { 343 llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs; 344 size_t Count = 0; 345 for (const auto &RefSlab : RefSlabs) 346 for (const auto &Sym : *RefSlab) { 347 MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end()); 348 Count += Sym.second.size(); 349 } 350 RefsStorage.reserve(Count); 351 AllRefs.reserve(MergedRefs.size()); 352 for (auto &Sym : MergedRefs) { 353 auto &SymRefs = Sym.second; 354 // Sorting isn't required, but yields more stable results over rebuilds. 355 llvm::sort(SymRefs); 356 llvm::copy(SymRefs, back_inserter(RefsStorage)); 357 AllRefs.try_emplace( 358 Sym.first, 359 llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()], 360 SymRefs.size())); 361 } 362 } 363 364 std::vector<Relation> AllRelations; 365 for (const auto &RelationSlab : RelationSlabs) { 366 for (const auto &R : *RelationSlab) 367 AllRelations.push_back(R); 368 } 369 // Sort relations and remove duplicates that could arise due to 370 // relations being stored in both the shards containing their 371 // subject and object. 372 llvm::sort(AllRelations); 373 AllRelations.erase(std::unique(AllRelations.begin(), AllRelations.end()), 374 AllRelations.end()); 375 376 size_t StorageSize = 377 RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol); 378 for (const auto &Slab : SymbolSlabs) 379 StorageSize += Slab->bytes(); 380 for (const auto &RefSlab : RefSlabs) 381 StorageSize += RefSlab->bytes(); 382 383 // Index must keep the slabs and contiguous ranges alive. 384 switch (Type) { 385 case IndexType::Light: 386 return std::make_unique<MemIndex>( 387 llvm::make_pointee_range(AllSymbols), std::move(AllRefs), 388 std::move(AllRelations), std::move(Files), IdxContents, 389 std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs), 390 std::move(RefsStorage), std::move(SymsStorage)), 391 StorageSize); 392 case IndexType::Heavy: 393 return std::make_unique<dex::Dex>( 394 llvm::make_pointee_range(AllSymbols), std::move(AllRefs), 395 std::move(AllRelations), std::move(Files), IdxContents, 396 std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs), 397 std::move(RefsStorage), std::move(SymsStorage)), 398 StorageSize, SupportContainedRefs); 399 } 400 llvm_unreachable("Unknown clangd::IndexType"); 401 } 402 403 void FileSymbols::profile(MemoryTree &MT) const { 404 std::lock_guard<std::mutex> Lock(Mutex); 405 for (const auto &SymSlab : SymbolsSnapshot) { 406 MT.detail(SymSlab.first()) 407 .child("symbols") 408 .addUsage(SymSlab.second->bytes()); 409 } 410 for (const auto &RefSlab : RefsSnapshot) { 411 MT.detail(RefSlab.first()) 412 .child("references") 413 .addUsage(RefSlab.second.Slab->bytes()); 414 } 415 for (const auto &RelSlab : RelationsSnapshot) { 416 MT.detail(RelSlab.first()) 417 .child("relations") 418 .addUsage(RelSlab.second->bytes()); 419 } 420 } 421 422 FileIndex::FileIndex(bool SupportContainedRefs) 423 : MergedIndex(&MainFileIndex, &PreambleIndex), 424 PreambleSymbols(IndexContents::Symbols | IndexContents::Relations, 425 SupportContainedRefs), 426 PreambleIndex(std::make_unique<MemIndex>()), 427 MainFileSymbols(IndexContents::All, SupportContainedRefs), 428 MainFileIndex(std::make_unique<MemIndex>()) {} 429 430 void FileIndex::updatePreamble(IndexFileIn IF) { 431 FileShardedIndex ShardedIndex(std::move(IF)); 432 for (auto Uri : ShardedIndex.getAllSources()) { 433 auto IF = ShardedIndex.getShard(Uri); 434 // We are using the key received from ShardedIndex, so it should always 435 // exist. 436 assert(IF); 437 PreambleSymbols.update( 438 Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)), 439 std::make_unique<RefSlab>(), 440 std::make_unique<RelationSlab>(std::move(*IF->Relations)), 441 /*CountReferences=*/false); 442 } 443 size_t IndexVersion = 0; 444 auto NewIndex = PreambleSymbols.buildIndex( 445 IndexType::Heavy, DuplicateHandling::PickOne, &IndexVersion); 446 { 447 std::lock_guard<std::mutex> Lock(UpdateIndexMu); 448 if (IndexVersion <= PreambleIndexVersion) { 449 // We lost the race, some other thread built a later version. 450 return; 451 } 452 PreambleIndexVersion = IndexVersion; 453 PreambleIndex.reset(std::move(NewIndex)); 454 vlog( 455 "Build dynamic index for header symbols with estimated memory usage of " 456 "{0} bytes", 457 PreambleIndex.estimateMemoryUsage()); 458 } 459 } 460 461 void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, 462 ASTContext &AST, Preprocessor &PP, 463 const include_cleaner::PragmaIncludes &PI) { 464 IndexFileIn IF; 465 std::tie(IF.Symbols, std::ignore, IF.Relations) = 466 indexHeaderSymbols(Version, AST, PP, PI); 467 updatePreamble(std::move(IF)); 468 } 469 470 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) { 471 auto Contents = indexMainDecls(AST); 472 MainFileSymbols.update( 473 URI::create(Path).toString(), 474 std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))), 475 std::make_unique<RefSlab>(std::move(std::get<1>(Contents))), 476 std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))), 477 /*CountReferences=*/true); 478 size_t IndexVersion = 0; 479 auto NewIndex = MainFileSymbols.buildIndex( 480 IndexType::Light, DuplicateHandling::Merge, &IndexVersion); 481 { 482 std::lock_guard<std::mutex> Lock(UpdateIndexMu); 483 if (IndexVersion <= MainIndexVersion) { 484 // We lost the race, some other thread built a later version. 485 return; 486 } 487 MainIndexVersion = IndexVersion; 488 MainFileIndex.reset(std::move(NewIndex)); 489 vlog( 490 "Build dynamic index for main-file symbols with estimated memory usage " 491 "of {0} bytes", 492 MainFileIndex.estimateMemoryUsage()); 493 } 494 } 495 496 void FileIndex::profile(MemoryTree &MT) const { 497 PreambleSymbols.profile(MT.child("preamble").child("slabs")); 498 MT.child("preamble") 499 .child("index") 500 .addUsage(PreambleIndex.estimateMemoryUsage()); 501 MainFileSymbols.profile(MT.child("main_file").child("slabs")); 502 MT.child("main_file") 503 .child("index") 504 .addUsage(MainFileIndex.estimateMemoryUsage()); 505 } 506 } // namespace clangd 507 } // namespace clang 508