xref: /llvm-project/clang-tools-extra/clangd/index/Background.h (revision 61fe67a4017375fd675f75652e857e837f77fa51)
1 //===--- Background.h - Build an index in a background thread ----*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H
11 
12 #include "GlobalCompilationDatabase.h"
13 #include "SourceCode.h"
14 #include "index/BackgroundRebuild.h"
15 #include "index/FileIndex.h"
16 #include "index/Index.h"
17 #include "index/Serialization.h"
18 #include "support/Context.h"
19 #include "support/MemoryTree.h"
20 #include "support/Path.h"
21 #include "support/Threading.h"
22 #include "support/ThreadsafeFS.h"
23 #include "clang/Tooling/CompilationDatabase.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/Support/Threading.h"
26 #include <atomic>
27 #include <condition_variable>
28 #include <deque>
29 #include <mutex>
30 #include <optional>
31 #include <queue>
32 #include <string>
33 #include <thread>
34 #include <vector>
35 
36 namespace clang {
37 namespace clangd {
38 
39 // Handles storage and retrieval of index shards. Both store and load
40 // operations can be called from multiple-threads concurrently.
41 class BackgroundIndexStorage {
42 public:
43   virtual ~BackgroundIndexStorage() = default;
44 
45   // Shards of the index are stored and retrieved independently, keyed by shard
46   // identifier - in practice this is a source file name
47   virtual llvm::Error storeShard(llvm::StringRef ShardIdentifier,
48                                  IndexFileOut Shard) const = 0;
49 
50   // Tries to load shard with given identifier, returns nullptr if shard
51   // couldn't be loaded.
52   virtual std::unique_ptr<IndexFileIn>
53   loadShard(llvm::StringRef ShardIdentifier) const = 0;
54 
55   // The factory provides storage for each File.
56   // It keeps ownership of the storage instances, and should manage caching
57   // itself. Factory must be threadsafe and never returns nullptr.
58   using Factory = llvm::unique_function<BackgroundIndexStorage *(PathRef)>;
59 
60   // Creates an Index Storage that saves shards into disk. Index storage uses
61   // CDBDirectory + ".cache/clangd/index/" as the folder to save shards.
62   // CDBDirectory is the first directory containing a CDB in parent directories
63   // of a file, or user cache directory if none was found, e.g. stdlib headers.
64   static Factory createDiskBackedStorageFactory(
65       std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo);
66 };
67 
68 // A priority queue of tasks which can be run on (external) worker threads.
69 class BackgroundQueue {
70 public:
71   /// A work item on the thread pool's queue.
72   struct Task {
73     explicit Task(std::function<void()> Run) : Run(std::move(Run)) {}
74 
75     std::function<void()> Run;
76     llvm::ThreadPriority ThreadPri = llvm::ThreadPriority::Low;
77     unsigned QueuePri = 0; // Higher-priority tasks will run first.
78     std::string Tag;       // Allows priority to be boosted later.
79     uint64_t Key = 0;      // If the key matches a previous task, drop this one.
80                            // (in practice this means we never reindex a file).
81 
82     bool operator<(const Task &O) const { return QueuePri < O.QueuePri; }
83   };
84 
85   // Describes the number of tasks processed by the queue.
86   struct Stats {
87     unsigned Enqueued = 0;  // Total number of tasks ever enqueued.
88     unsigned Active = 0;    // Tasks being currently processed by a worker.
89     unsigned Completed = 0; // Tasks that have been finished.
90     unsigned LastIdle = 0;  // Number of completed tasks when last empty.
91   };
92 
93   BackgroundQueue(std::function<void(Stats)> OnProgress = nullptr)
94       : OnProgress(OnProgress) {}
95 
96   // Add tasks to the queue.
97   void push(Task);
98   void append(std::vector<Task>);
99   // Boost priority of current and new tasks with matching Tag, if they are
100   // lower priority.
101   // Reducing the boost of a tag affects future tasks but not current ones.
102   void boost(llvm::StringRef Tag, unsigned NewPriority);
103 
104   // Process items on the queue until the queue is stopped.
105   // If the queue becomes empty, OnIdle will be called (on one worker).
106   void work(std::function<void()> OnIdle = nullptr);
107 
108   // Stop processing new tasks, allowing all work() calls to return soon.
109   void stop();
110 
111   // Disables thread priority lowering to ensure progress on loaded systems.
112   // Only affects tasks that run after the call.
113   static void preventThreadStarvationInTests();
114   [[nodiscard]] bool
115   blockUntilIdleForTest(std::optional<double> TimeoutSeconds);
116 
117 private:
118   void notifyProgress() const; // Requires lock Mu
119   bool adjust(Task &T);
120 
121   std::mutex Mu;
122   Stats Stat;
123   std::condition_variable CV;
124   bool ShouldStop = false;
125   std::vector<Task> Queue; // max-heap
126   llvm::StringMap<unsigned> Boosts;
127   std::function<void(Stats)> OnProgress;
128   llvm::DenseSet<uint64_t> SeenKeys;
129 };
130 
131 // Builds an in-memory index by by running the static indexer action over
132 // all commands in a compilation database. Indexing happens in the background.
133 // FIXME: it should watch for changes to files on disk.
134 class BackgroundIndex : public SwapIndex {
135 public:
136   struct Options {
137     // Arbitrary value to ensure some concurrency in tests.
138     // In production an explicit value is specified.
139     size_t ThreadPoolSize = 4;
140     // Thread priority when indexing files.
141     llvm::ThreadPriority IndexingPriority = llvm::ThreadPriority::Low;
142     // Callback that provides notifications as indexing makes progress.
143     std::function<void(BackgroundQueue::Stats)> OnProgress = nullptr;
144     // Function called to obtain the Context to use while indexing the specified
145     // file. Called with the empty string for other tasks.
146     // (When called, the context from BackgroundIndex construction is active).
147     std::function<Context(PathRef)> ContextProvider = nullptr;
148     // Whether the index needs to support the containedRefs() operation.
149     // May use extra memory.
150     bool SupportContainedRefs = true;
151   };
152 
153   /// Creates a new background index and starts its threads.
154   /// The current Context will be propagated to each worker thread.
155   BackgroundIndex(const ThreadsafeFS &, const GlobalCompilationDatabase &CDB,
156                   BackgroundIndexStorage::Factory IndexStorageFactory,
157                   Options Opts);
158   ~BackgroundIndex(); // Blocks while the current task finishes.
159 
160   // Enqueue translation units for indexing.
161   // The indexing happens in a background thread, so the symbols will be
162   // available sometime later.
163   void enqueue(const std::vector<std::string> &ChangedFiles) {
164     Queue.push(changedFilesTask(ChangedFiles));
165   }
166 
167   /// Boosts priority of indexing related to Path.
168   /// Typically used to index TUs when headers are opened.
169   void boostRelated(llvm::StringRef Path);
170 
171   // Cause background threads to stop after ther current task, any remaining
172   // tasks will be discarded.
173   void stop() {
174     Rebuilder.shutdown();
175     Queue.stop();
176   }
177 
178   // Wait until the queue is empty, to allow deterministic testing.
179   [[nodiscard]] bool
180   blockUntilIdleForTest(std::optional<double> TimeoutSeconds = 10) {
181     return Queue.blockUntilIdleForTest(TimeoutSeconds);
182   }
183 
184   void profile(MemoryTree &MT) const;
185 
186 private:
187   /// Represents the state of a single file when indexing was performed.
188   struct ShardVersion {
189     FileDigest Digest{{0}};
190     bool HadErrors = false;
191   };
192 
193   /// Given index results from a TU, only update symbols coming from files with
194   /// different digests than \p ShardVersionsSnapshot. Also stores new index
195   /// information on IndexStorage.
196   void update(llvm::StringRef MainFile, IndexFileIn Index,
197               const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
198               bool HadErrors);
199 
200   // configuration
201   const ThreadsafeFS &TFS;
202   const GlobalCompilationDatabase &CDB;
203   llvm::ThreadPriority IndexingPriority;
204   std::function<Context(PathRef)> ContextProvider;
205 
206   llvm::Error index(tooling::CompileCommand);
207 
208   FileSymbols IndexedSymbols;
209   BackgroundIndexRebuilder Rebuilder;
210   llvm::StringMap<ShardVersion> ShardVersions; // Key is absolute file path.
211   std::mutex ShardVersionsMu;
212 
213   BackgroundIndexStorage::Factory IndexStorageFactory;
214   // Tries to load shards for the MainFiles and their dependencies.
215   std::vector<std::string> loadProject(std::vector<std::string> MainFiles);
216 
217   BackgroundQueue::Task
218   changedFilesTask(const std::vector<std::string> &ChangedFiles);
219   BackgroundQueue::Task indexFileTask(std::string Path);
220 
221   // from lowest to highest priority
222   enum QueuePriority {
223     IndexFile,
224     IndexBoostedFile,
225     LoadShards,
226   };
227   BackgroundQueue Queue;
228   AsyncTaskRunner ThreadPool;
229   GlobalCompilationDatabase::CommandChanged::Subscription CommandsChanged;
230 };
231 
232 } // namespace clangd
233 } // namespace clang
234 
235 #endif
236