1 //===-- Background.cpp - Build an index in a background thread ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "index/Background.h" 10 #include "Compiler.h" 11 #include "Config.h" 12 #include "Headers.h" 13 #include "SourceCode.h" 14 #include "URI.h" 15 #include "index/BackgroundIndexLoader.h" 16 #include "index/FileIndex.h" 17 #include "index/Index.h" 18 #include "index/IndexAction.h" 19 #include "index/MemIndex.h" 20 #include "index/Ref.h" 21 #include "index/Relation.h" 22 #include "index/Serialization.h" 23 #include "index/Symbol.h" 24 #include "index/SymbolCollector.h" 25 #include "support/Context.h" 26 #include "support/Logger.h" 27 #include "support/Path.h" 28 #include "support/Threading.h" 29 #include "support/ThreadsafeFS.h" 30 #include "support/Trace.h" 31 #include "clang/Basic/SourceLocation.h" 32 #include "clang/Basic/SourceManager.h" 33 #include "clang/Basic/Stack.h" 34 #include "clang/Frontend/FrontendAction.h" 35 #include "llvm/ADT/ArrayRef.h" 36 #include "llvm/ADT/DenseSet.h" 37 #include "llvm/ADT/STLExtras.h" 38 #include "llvm/ADT/StringMap.h" 39 #include "llvm/ADT/StringRef.h" 40 #include "llvm/Support/Error.h" 41 #include "llvm/Support/Path.h" 42 #include "llvm/Support/Threading.h" 43 #include "llvm/Support/xxhash.h" 44 45 #include <algorithm> 46 #include <atomic> 47 #include <chrono> 48 #include <condition_variable> 49 #include <cstddef> 50 #include <memory> 51 #include <mutex> 52 #include <numeric> 53 #include <optional> 54 #include <queue> 55 #include <random> 56 #include <string> 57 #include <thread> 58 #include <utility> 59 #include <vector> 60 61 namespace clang { 62 namespace clangd { 63 namespace { 64 65 // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or 66 // relative to Cmd.Directory, which might not be the same as current working 67 // directory. 68 llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { 69 llvm::SmallString<128> AbsolutePath; 70 if (llvm::sys::path::is_absolute(Cmd.Filename)) { 71 AbsolutePath = Cmd.Filename; 72 } else { 73 AbsolutePath = Cmd.Directory; 74 llvm::sys::path::append(AbsolutePath, Cmd.Filename); 75 llvm::sys::path::remove_dots(AbsolutePath, true); 76 } 77 return AbsolutePath; 78 } 79 80 bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) { 81 auto Buf = FS->getBufferForFile(LS.AbsolutePath); 82 if (!Buf) { 83 vlog("Background-index: Couldn't read {0} to validate stored index: {1}", 84 LS.AbsolutePath, Buf.getError().message()); 85 // There is no point in indexing an unreadable file. 86 return false; 87 } 88 return digest(Buf->get()->getBuffer()) != LS.Digest; 89 } 90 91 } // namespace 92 93 BackgroundIndex::BackgroundIndex( 94 const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB, 95 BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts) 96 : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB), 97 IndexingPriority(Opts.IndexingPriority), 98 ContextProvider(std::move(Opts.ContextProvider)), 99 IndexedSymbols(IndexContents::All, Opts.SupportContainedRefs), 100 Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), 101 IndexStorageFactory(std::move(IndexStorageFactory)), 102 Queue(std::move(Opts.OnProgress)), 103 CommandsChanged( 104 CDB.watch([&](const std::vector<std::string> &ChangedFiles) { 105 enqueue(ChangedFiles); 106 })) { 107 assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero."); 108 assert(this->IndexStorageFactory && "Storage factory can not be null!"); 109 for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) { 110 ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1), 111 [this, Ctx(Context::current().clone())]() mutable { 112 clang::noteBottomOfStack(); 113 WithContext BGContext(std::move(Ctx)); 114 Queue.work([&] { Rebuilder.idle(); }); 115 }); 116 } 117 } 118 119 BackgroundIndex::~BackgroundIndex() { 120 stop(); 121 ThreadPool.wait(); 122 } 123 124 BackgroundQueue::Task BackgroundIndex::changedFilesTask( 125 const std::vector<std::string> &ChangedFiles) { 126 BackgroundQueue::Task T([this, ChangedFiles] { 127 trace::Span Tracer("BackgroundIndexEnqueue"); 128 129 std::optional<WithContext> WithProvidedContext; 130 if (ContextProvider) 131 WithProvidedContext.emplace(ContextProvider(/*Path=*/"")); 132 133 // We're doing this asynchronously, because we'll read shards here too. 134 log("Enqueueing {0} commands for indexing", ChangedFiles.size()); 135 SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size())); 136 137 auto NeedsReIndexing = loadProject(std::move(ChangedFiles)); 138 // Run indexing for files that need to be updated. 139 std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(), 140 std::mt19937(std::random_device{}())); 141 std::vector<BackgroundQueue::Task> Tasks; 142 Tasks.reserve(NeedsReIndexing.size()); 143 for (const auto &File : NeedsReIndexing) 144 Tasks.push_back(indexFileTask(std::move(File))); 145 Queue.append(std::move(Tasks)); 146 }); 147 148 T.QueuePri = LoadShards; 149 T.ThreadPri = llvm::ThreadPriority::Default; 150 return T; 151 } 152 153 static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) { 154 Path = llvm::sys::path::filename(Path); 155 return Path.drop_back(llvm::sys::path::extension(Path).size()); 156 } 157 158 BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) { 159 std::string Tag = filenameWithoutExtension(Path).str(); 160 uint64_t Key = llvm::xxh3_64bits(Path); 161 BackgroundQueue::Task T([this, Path(std::move(Path))] { 162 std::optional<WithContext> WithProvidedContext; 163 if (ContextProvider) 164 WithProvidedContext.emplace(ContextProvider(Path)); 165 auto Cmd = CDB.getCompileCommand(Path); 166 if (!Cmd) 167 return; 168 if (auto Error = index(std::move(*Cmd))) 169 elog("Indexing {0} failed: {1}", Path, std::move(Error)); 170 }); 171 T.QueuePri = IndexFile; 172 T.ThreadPri = IndexingPriority; 173 T.Tag = std::move(Tag); 174 T.Key = Key; 175 return T; 176 } 177 178 void BackgroundIndex::boostRelated(llvm::StringRef Path) { 179 if (isHeaderFile(Path)) 180 Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile); 181 } 182 183 /// Given index results from a TU, only update symbols coming from files that 184 /// are different or missing from than \p ShardVersionsSnapshot. Also stores new 185 /// index information on IndexStorage. 186 void BackgroundIndex::update( 187 llvm::StringRef MainFile, IndexFileIn Index, 188 const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, 189 bool HadErrors) { 190 // Keys are URIs. 191 llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate; 192 // Note that sources do not contain any information regarding missing headers, 193 // since we don't even know what absolute path they should fall in. 194 for (const auto &IndexIt : *Index.Sources) { 195 const auto &IGN = IndexIt.getValue(); 196 auto AbsPath = URI::resolve(IGN.URI, MainFile); 197 if (!AbsPath) { 198 elog("Failed to resolve URI: {0}", AbsPath.takeError()); 199 continue; 200 } 201 const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath); 202 // File has different contents, or indexing was successful this time. 203 if (DigestIt == ShardVersionsSnapshot.end() || 204 DigestIt->getValue().Digest != IGN.Digest || 205 (DigestIt->getValue().HadErrors && !HadErrors)) 206 FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; 207 } 208 209 // Shard slabs into files. 210 FileShardedIndex ShardedIndex(std::move(Index)); 211 212 // Build and store new slabs for each updated file. 213 for (const auto &FileIt : FilesToUpdate) { 214 auto Uri = FileIt.first(); 215 auto IF = ShardedIndex.getShard(Uri); 216 assert(IF && "no shard for file in Index.Sources?"); 217 PathRef Path = FileIt.getValue().first; 218 219 // Only store command line hash for main files of the TU, since our 220 // current model keeps only one version of a header file. 221 if (Path != MainFile) 222 IF->Cmd.reset(); 223 224 // We need to store shards before updating the index, since the latter 225 // consumes slabs. 226 // FIXME: Also skip serializing the shard if it is already up-to-date. 227 if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF)) 228 elog("Failed to write background-index shard for file {0}: {1}", Path, 229 std::move(Error)); 230 231 { 232 std::lock_guard<std::mutex> Lock(ShardVersionsMu); 233 const auto &Hash = FileIt.getValue().second; 234 auto DigestIt = ShardVersions.try_emplace(Path); 235 ShardVersion &SV = DigestIt.first->second; 236 // Skip if file is already up to date, unless previous index was broken 237 // and this one is not. 238 if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors) 239 continue; 240 SV.Digest = Hash; 241 SV.HadErrors = HadErrors; 242 243 // This can override a newer version that is added in another thread, if 244 // this thread sees the older version but finishes later. This should be 245 // rare in practice. 246 IndexedSymbols.update( 247 Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)), 248 std::make_unique<RefSlab>(std::move(*IF->Refs)), 249 std::make_unique<RelationSlab>(std::move(*IF->Relations)), 250 Path == MainFile); 251 } 252 } 253 } 254 255 llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { 256 trace::Span Tracer("BackgroundIndex"); 257 SPAN_ATTACH(Tracer, "file", Cmd.Filename); 258 auto AbsolutePath = getAbsolutePath(Cmd); 259 260 auto FS = TFS.view(Cmd.Directory); 261 auto Buf = FS->getBufferForFile(AbsolutePath); 262 if (!Buf) 263 return llvm::errorCodeToError(Buf.getError()); 264 auto Hash = digest(Buf->get()->getBuffer()); 265 266 // Take a snapshot of the versions to avoid locking for each file in the TU. 267 llvm::StringMap<ShardVersion> ShardVersionsSnapshot; 268 { 269 std::lock_guard<std::mutex> Lock(ShardVersionsMu); 270 ShardVersionsSnapshot = ShardVersions; 271 } 272 273 vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash)); 274 ParseInputs Inputs; 275 Inputs.TFS = &TFS; 276 Inputs.CompileCommand = std::move(Cmd); 277 IgnoreDiagnostics IgnoreDiags; 278 auto CI = buildCompilerInvocation(Inputs, IgnoreDiags); 279 if (!CI) 280 return error("Couldn't build compiler invocation"); 281 282 auto Clang = 283 prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr, 284 std::move(*Buf), std::move(FS), IgnoreDiags); 285 if (!Clang) 286 return error("Couldn't build compiler instance"); 287 288 SymbolCollector::Options IndexOpts; 289 // Creates a filter to not collect index results from files with unchanged 290 // digests. 291 IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM, 292 FileID FID) { 293 const auto F = SM.getFileEntryRefForID(FID); 294 if (!F) 295 return false; // Skip invalid files. 296 auto AbsPath = getCanonicalPath(*F, SM.getFileManager()); 297 if (!AbsPath) 298 return false; // Skip files without absolute path. 299 auto Digest = digestFile(SM, FID); 300 if (!Digest) 301 return false; 302 auto D = ShardVersionsSnapshot.find(*AbsPath); 303 if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest && 304 !D->second.HadErrors) 305 return false; // Skip files that haven't changed, without errors. 306 return true; 307 }; 308 IndexOpts.CollectMainFileRefs = true; 309 310 IndexFileIn Index; 311 auto Action = createStaticIndexingAction( 312 IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); }, 313 [&](RefSlab R) { Index.Refs = std::move(R); }, 314 [&](RelationSlab R) { Index.Relations = std::move(R); }, 315 [&](IncludeGraph IG) { Index.Sources = std::move(IG); }); 316 317 // We're going to run clang here, and it could potentially crash. 318 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal, 319 // but the leaky "recovery" is pretty scary too in a long-running process. 320 // If crashes are a real problem, maybe we should fork a child process. 321 322 const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front(); 323 if (!Action->BeginSourceFile(*Clang, Input)) 324 return error("BeginSourceFile() failed"); 325 if (llvm::Error Err = Action->Execute()) 326 return Err; 327 328 Action->EndSourceFile(); 329 330 Index.Cmd = Inputs.CompileCommand; 331 assert(Index.Symbols && Index.Refs && Index.Sources && 332 "Symbols, Refs and Sources must be set."); 333 334 log("Indexed {0} ({1} symbols, {2} refs, {3} files)", 335 Inputs.CompileCommand.Filename, Index.Symbols->size(), 336 Index.Refs->numRefs(), Index.Sources->size()); 337 SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size())); 338 SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs())); 339 SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size())); 340 341 bool HadErrors = Clang->hasDiagnostics() && 342 Clang->getDiagnostics().hasUncompilableErrorOccurred(); 343 if (HadErrors) { 344 log("Failed to compile {0}, index may be incomplete", AbsolutePath); 345 for (auto &It : *Index.Sources) 346 It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors; 347 } 348 update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors); 349 350 Rebuilder.indexedTU(); 351 return llvm::Error::success(); 352 } 353 354 // Restores shards for \p MainFiles from index storage. Then checks staleness of 355 // those shards and returns a list of TUs that needs to be indexed to update 356 // staleness. 357 std::vector<std::string> 358 BackgroundIndex::loadProject(std::vector<std::string> MainFiles) { 359 // Drop files where background indexing is disabled in config. 360 if (ContextProvider) 361 llvm::erase_if(MainFiles, [&](const std::string &TU) { 362 // Load the config for each TU, as indexing may be selectively enabled. 363 WithContext WithProvidedContext(ContextProvider(TU)); 364 return Config::current().Index.Background == 365 Config::BackgroundPolicy::Skip; 366 }); 367 Rebuilder.startLoading(); 368 // Load shards for all of the mainfiles. 369 const std::vector<LoadedShard> Result = 370 loadIndexShards(MainFiles, IndexStorageFactory, CDB); 371 size_t LoadedShards = 0; 372 { 373 // Update in-memory state. 374 std::lock_guard<std::mutex> Lock(ShardVersionsMu); 375 for (auto &LS : Result) { 376 if (!LS.Shard) 377 continue; 378 auto SS = 379 LS.Shard->Symbols 380 ? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols)) 381 : nullptr; 382 auto RS = LS.Shard->Refs 383 ? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs)) 384 : nullptr; 385 auto RelS = 386 LS.Shard->Relations 387 ? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations)) 388 : nullptr; 389 ShardVersion &SV = ShardVersions[LS.AbsolutePath]; 390 SV.Digest = LS.Digest; 391 SV.HadErrors = LS.HadErrors; 392 ++LoadedShards; 393 394 IndexedSymbols.update(URI::create(LS.AbsolutePath).toString(), 395 std::move(SS), std::move(RS), std::move(RelS), 396 LS.CountReferences); 397 } 398 } 399 Rebuilder.loadedShard(LoadedShards); 400 Rebuilder.doneLoading(); 401 402 auto FS = TFS.view(/*CWD=*/std::nullopt); 403 llvm::DenseSet<PathRef> TUsToIndex; 404 // We'll accept data from stale shards, but ensure the files get reindexed 405 // soon. 406 for (auto &LS : Result) { 407 if (!shardIsStale(LS, FS.get())) 408 continue; 409 PathRef TUForFile = LS.DependentTU; 410 assert(!TUForFile.empty() && "File without a TU!"); 411 412 // FIXME: Currently, we simply schedule indexing on a TU whenever any of 413 // its dependencies needs re-indexing. We might do it smarter by figuring 414 // out a minimal set of TUs that will cover all the stale dependencies. 415 // FIXME: Try looking at other TUs if no compile commands are available 416 // for this TU, i.e TU was deleted after we performed indexing. 417 TUsToIndex.insert(TUForFile); 418 } 419 420 return {TUsToIndex.begin(), TUsToIndex.end()}; 421 } 422 423 void BackgroundIndex::profile(MemoryTree &MT) const { 424 IndexedSymbols.profile(MT.child("slabs")); 425 // We don't want to mix memory used by index and symbols, so call base class. 426 MT.child("index").addUsage(SwapIndex::estimateMemoryUsage()); 427 } 428 } // namespace clangd 429 } // namespace clang 430