xref: /llvm-project/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp (revision a2063ba7ffdbbb4faf5da5f32739ab761c2e4289)
1 //===--- GlobalCompilationDatabase.cpp ---------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GlobalCompilationDatabase.h"
10 #include "Config.h"
11 #include "FS.h"
12 #include "ProjectModules.h"
13 #include "ScanningProjectModules.h"
14 #include "SourceCode.h"
15 #include "support/Logger.h"
16 #include "support/Path.h"
17 #include "support/Threading.h"
18 #include "support/ThreadsafeFS.h"
19 #include "clang/Tooling/ArgumentsAdjusters.h"
20 #include "clang/Tooling/CompilationDatabase.h"
21 #include "clang/Tooling/CompilationDatabasePluginRegistry.h"
22 #include "clang/Tooling/JSONCompilationDatabase.h"
23 #include "clang/Tooling/Tooling.h"
24 #include "llvm/ADT/PointerIntPair.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/ScopeExit.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/StringMap.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/VirtualFileSystem.h"
31 #include "llvm/TargetParser/Host.h"
32 #include <atomic>
33 #include <chrono>
34 #include <condition_variable>
35 #include <deque>
36 #include <mutex>
37 #include <optional>
38 #include <string>
39 #include <tuple>
40 #include <vector>
41 
42 namespace clang {
43 namespace clangd {
44 namespace {
45 
46 // Runs the given action on all parent directories of filename, starting from
47 // deepest directory and going up to root. Stops whenever action succeeds.
48 void actOnAllParentDirectories(PathRef FileName,
49                                llvm::function_ref<bool(PathRef)> Action) {
50   for (auto Path = absoluteParent(FileName); !Path.empty() && !Action(Path);
51        Path = absoluteParent(Path))
52     ;
53 }
54 
55 } // namespace
56 
57 tooling::CompileCommand
58 GlobalCompilationDatabase::getFallbackCommand(PathRef File) const {
59   std::vector<std::string> Argv = {"clang"};
60   // Clang treats .h files as C by default and files without extension as linker
61   // input, resulting in unhelpful diagnostics.
62   // Parsing as Objective C++ is friendly to more cases.
63   auto FileExtension = llvm::sys::path::extension(File);
64   if (FileExtension.empty() || FileExtension == ".h")
65     Argv.push_back("-xobjective-c++-header");
66   Argv.push_back(std::string(File));
67   tooling::CompileCommand Cmd(llvm::sys::path::parent_path(File),
68                               llvm::sys::path::filename(File), std::move(Argv),
69                               /*Output=*/"");
70   Cmd.Heuristic = "clangd fallback";
71   return Cmd;
72 }
73 
74 // Loads and caches the CDB from a single directory.
75 //
76 // This class is threadsafe, which is to say we have independent locks for each
77 // directory we're searching for a CDB.
78 // Loading is deferred until first access.
79 //
80 // The DirectoryBasedCDB keeps a map from path => DirectoryCache.
81 // Typical usage is to:
82 //  - 1) determine all the paths that might be searched
83 //  - 2) acquire the map lock and get-or-create all the DirectoryCache entries
84 //  - 3) release the map lock and query the caches as desired
85 class DirectoryBasedGlobalCompilationDatabase::DirectoryCache {
86   using stopwatch = std::chrono::steady_clock;
87 
88   // CachedFile is used to read a CDB file on disk (e.g. compile_commands.json).
89   // It specializes in being able to quickly bail out if the file is unchanged,
90   // which is the common case.
91   // Internally, it stores file metadata so a stat() can verify it's unchanged.
92   // We don't actually cache the content as it's not needed - if the file is
93   // unchanged then the previous CDB is valid.
94   struct CachedFile {
95     CachedFile(llvm::StringRef Parent, llvm::StringRef Rel) {
96       llvm::SmallString<256> Path = Parent;
97       llvm::sys::path::append(Path, Rel);
98       this->Path = Path.str().str();
99     }
100     std::string Path;
101     size_t Size = NoFileCached;
102     llvm::sys::TimePoint<> ModifiedTime;
103     FileDigest ContentHash;
104 
105     static constexpr size_t NoFileCached = -1;
106 
107     struct LoadResult {
108       enum {
109         FileNotFound,
110         TransientError,
111         FoundSameData,
112         FoundNewData,
113       } Result;
114       std::unique_ptr<llvm::MemoryBuffer> Buffer; // Set only if FoundNewData
115     };
116 
117     LoadResult load(llvm::vfs::FileSystem &FS, bool HasOldData);
118   };
119 
120   // If we've looked for a CDB here and found none, the time when that happened.
121   // (Atomics make it possible for get() to return without taking a lock)
122   std::atomic<stopwatch::rep> NoCDBAt = {
123       stopwatch::time_point::min().time_since_epoch().count()};
124 
125   // Guards the following cache state.
126   std::mutex Mu;
127   // When was the cache last known to be in sync with disk state?
128   stopwatch::time_point CachePopulatedAt = stopwatch::time_point::min();
129   // Whether a new CDB has been loaded but not broadcast yet.
130   bool NeedsBroadcast = false;
131   // Last loaded CDB, meaningful if CachePopulatedAt was ever set.
132   // shared_ptr so we can overwrite this when callers are still using the CDB.
133   std::shared_ptr<tooling::CompilationDatabase> CDB;
134   // File metadata for the CDB files we support tracking directly.
135   CachedFile CompileCommandsJson;
136   CachedFile BuildCompileCommandsJson;
137   CachedFile CompileFlagsTxt;
138   // CachedFile member corresponding to CDB.
139   //   CDB  | ACF  | Scenario
140   //   null | null | no CDB found, or initial empty cache
141   //   set  | null | CDB was loaded via generic plugin interface
142   //   null | set  | found known CDB file, but parsing it failed
143   //   set  | set  | CDB was parsed from a known file
144   CachedFile *ActiveCachedFile = nullptr;
145 
146 public:
147   DirectoryCache(llvm::StringRef Path)
148       : CompileCommandsJson(Path, "compile_commands.json"),
149         BuildCompileCommandsJson(Path, "build/compile_commands.json"),
150         CompileFlagsTxt(Path, "compile_flags.txt"), Path(Path) {
151     assert(llvm::sys::path::is_absolute(Path));
152   }
153 
154   // Absolute canonical path that we're the cache for. (Not case-folded).
155   const std::string Path;
156 
157   // Get the CDB associated with this directory.
158   // ShouldBroadcast:
159   //  - as input, signals whether the caller is willing to broadcast a
160   //    newly-discovered CDB. (e.g. to trigger background indexing)
161   //  - as output, signals whether the caller should do so.
162   // (If a new CDB is discovered and ShouldBroadcast is false, we mark the
163   // CDB as needing broadcast, and broadcast it next time we can).
164   std::shared_ptr<const tooling::CompilationDatabase>
165   get(const ThreadsafeFS &TFS, bool &ShouldBroadcast,
166       stopwatch::time_point FreshTime, stopwatch::time_point FreshTimeMissing) {
167     // Fast path for common case without taking lock.
168     if (stopwatch::time_point(stopwatch::duration(NoCDBAt.load())) >
169         FreshTimeMissing) {
170       ShouldBroadcast = false;
171       return nullptr;
172     }
173 
174     std::lock_guard<std::mutex> Lock(Mu);
175     auto RequestBroadcast = llvm::make_scope_exit([&, OldCDB(CDB.get())] {
176       // If we loaded a new CDB, it should be broadcast at some point.
177       if (CDB != nullptr && CDB.get() != OldCDB)
178         NeedsBroadcast = true;
179       else if (CDB == nullptr) // nothing to broadcast anymore!
180         NeedsBroadcast = false;
181       // If we have something to broadcast, then do so iff allowed.
182       if (!ShouldBroadcast)
183         return;
184       ShouldBroadcast = NeedsBroadcast;
185       NeedsBroadcast = false;
186     });
187 
188     // If our cache is valid, serve from it.
189     if (CachePopulatedAt > FreshTime)
190       return CDB;
191 
192     if (/*MayCache=*/load(*TFS.view(/*CWD=*/std::nullopt))) {
193       // Use new timestamp, as loading may be slow.
194       CachePopulatedAt = stopwatch::now();
195       NoCDBAt.store((CDB ? stopwatch::time_point::min() : CachePopulatedAt)
196                         .time_since_epoch()
197                         .count());
198     }
199 
200     return CDB;
201   }
202 
203 private:
204   // Updates `CDB` from disk state. Returns false on failure.
205   bool load(llvm::vfs::FileSystem &FS);
206 };
207 
208 DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::LoadResult
209 DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::load(
210     llvm::vfs::FileSystem &FS, bool HasOldData) {
211   auto Stat = FS.status(Path);
212   if (!Stat || !Stat->isRegularFile()) {
213     Size = NoFileCached;
214     ContentHash = {};
215     return {LoadResult::FileNotFound, nullptr};
216   }
217   // If both the size and mtime match, presume unchanged without reading.
218   if (HasOldData && Stat->getLastModificationTime() == ModifiedTime &&
219       Stat->getSize() == Size)
220     return {LoadResult::FoundSameData, nullptr};
221   auto Buf = FS.getBufferForFile(Path);
222   if (!Buf || (*Buf)->getBufferSize() != Stat->getSize()) {
223     // Don't clear the cache - possible we're seeing inconsistent size as the
224     // file is being recreated. If it ends up identical later, great!
225     //
226     // This isn't a complete solution: if we see a partial file but stat/read
227     // agree on its size, we're ultimately going to have spurious CDB reloads.
228     // May be worth fixing if generators don't write atomically (CMake does).
229     elog("Failed to read {0}: {1}", Path,
230          Buf ? "size changed" : Buf.getError().message());
231     return {LoadResult::TransientError, nullptr};
232   }
233 
234   FileDigest NewContentHash = digest((*Buf)->getBuffer());
235   if (HasOldData && NewContentHash == ContentHash) {
236     // mtime changed but data is the same: avoid rebuilding the CDB.
237     ModifiedTime = Stat->getLastModificationTime();
238     return {LoadResult::FoundSameData, nullptr};
239   }
240 
241   Size = (*Buf)->getBufferSize();
242   ModifiedTime = Stat->getLastModificationTime();
243   ContentHash = NewContentHash;
244   return {LoadResult::FoundNewData, std::move(*Buf)};
245 }
246 
247 // Adapt CDB-loading functions to a common interface for DirectoryCache::load().
248 static std::unique_ptr<tooling::CompilationDatabase>
249 parseJSON(PathRef Path, llvm::StringRef Data, std::string &Error) {
250   if (auto CDB = tooling::JSONCompilationDatabase::loadFromBuffer(
251           Data, Error, tooling::JSONCommandLineSyntax::AutoDetect)) {
252     // FS used for expanding response files.
253     // FIXME: ExpandResponseFilesDatabase appears not to provide the usual
254     // thread-safety guarantees, as the access to FS is not locked!
255     // For now, use the real FS, which is known to be threadsafe (if we don't
256     // use/change working directory, which ExpandResponseFilesDatabase doesn't).
257     // NOTE: response files have to be expanded before inference because
258     // inference needs full command line to check/fix driver mode and file type.
259     auto FS = llvm::vfs::getRealFileSystem();
260     return tooling::inferMissingCompileCommands(
261         expandResponseFiles(std::move(CDB), std::move(FS)));
262   }
263   return nullptr;
264 }
265 static std::unique_ptr<tooling::CompilationDatabase>
266 parseFixed(PathRef Path, llvm::StringRef Data, std::string &Error) {
267   return tooling::FixedCompilationDatabase::loadFromBuffer(
268       llvm::sys::path::parent_path(Path), Data, Error);
269 }
270 
271 bool DirectoryBasedGlobalCompilationDatabase::DirectoryCache::load(
272     llvm::vfs::FileSystem &FS) {
273   dlog("Probing directory {0}", Path);
274   std::string Error;
275 
276   // Load from the specially-supported compilation databases (JSON + Fixed).
277   // For these, we know the files they read and cache their metadata so we can
278   // cheaply validate whether they've changed, and hot-reload if they have.
279   // (As a bonus, these are also VFS-clean)!
280   struct CDBFile {
281     CachedFile *File;
282     // Wrapper for {Fixed,JSON}CompilationDatabase::loadFromBuffer.
283     std::unique_ptr<tooling::CompilationDatabase> (*Parser)(
284         PathRef,
285         /*Data*/ llvm::StringRef,
286         /*ErrorMsg*/ std::string &);
287   };
288   for (const auto &Entry : {CDBFile{&CompileCommandsJson, parseJSON},
289                             CDBFile{&BuildCompileCommandsJson, parseJSON},
290                             CDBFile{&CompileFlagsTxt, parseFixed}}) {
291     bool Active = ActiveCachedFile == Entry.File;
292     auto Loaded = Entry.File->load(FS, Active);
293     switch (Loaded.Result) {
294     case CachedFile::LoadResult::FileNotFound:
295       if (Active) {
296         log("Unloaded compilation database from {0}", Entry.File->Path);
297         ActiveCachedFile = nullptr;
298         CDB = nullptr;
299       }
300       // Continue looking at other candidates.
301       break;
302     case CachedFile::LoadResult::TransientError:
303       // File existed but we couldn't read it. Reuse the cache, retry later.
304       return false; // Load again next time.
305     case CachedFile::LoadResult::FoundSameData:
306       assert(Active && "CachedFile may not return 'same data' if !HasOldData");
307       // This is the critical file, and it hasn't changed.
308       return true;
309     case CachedFile::LoadResult::FoundNewData:
310       // We have a new CDB!
311       CDB = Entry.Parser(Entry.File->Path, Loaded.Buffer->getBuffer(), Error);
312       if (CDB)
313         log("{0} compilation database from {1}", Active ? "Reloaded" : "Loaded",
314             Entry.File->Path);
315       else
316         elog("Failed to load compilation database from {0}: {1}",
317              Entry.File->Path, Error);
318       ActiveCachedFile = Entry.File;
319       return true;
320     }
321   }
322 
323   // Fall back to generic handling of compilation databases.
324   // We don't know what files they read, so can't efficiently check whether
325   // they need to be reloaded. So we never do that.
326   // FIXME: the interface doesn't provide a way to virtualize FS access.
327 
328   // Don't try these more than once. If we've scanned before, we're done.
329   if (CachePopulatedAt > stopwatch::time_point::min())
330     return true;
331   for (const auto &Entry :
332        tooling::CompilationDatabasePluginRegistry::entries()) {
333     // Avoid duplicating the special cases handled above.
334     if (Entry.getName() == "fixed-compilation-database" ||
335         Entry.getName() == "json-compilation-database")
336       continue;
337     auto Plugin = Entry.instantiate();
338     if (auto CDB = Plugin->loadFromDirectory(Path, Error)) {
339       log("Loaded compilation database from {0} with plugin {1}", Path,
340           Entry.getName());
341       this->CDB = std::move(CDB);
342       return true;
343     }
344     // Don't log Error here, it's usually just "couldn't find <file>".
345   }
346   dlog("No compilation database at {0}", Path);
347   return true;
348 }
349 
350 DirectoryBasedGlobalCompilationDatabase::
351     DirectoryBasedGlobalCompilationDatabase(const Options &Opts)
352     : Opts(Opts), Broadcaster(std::make_unique<BroadcastThread>(*this)) {
353   if (!this->Opts.ContextProvider)
354     this->Opts.ContextProvider = [](llvm::StringRef) {
355       return Context::current().clone();
356     };
357 }
358 
359 DirectoryBasedGlobalCompilationDatabase::
360     ~DirectoryBasedGlobalCompilationDatabase() = default;
361 
362 std::optional<tooling::CompileCommand>
363 DirectoryBasedGlobalCompilationDatabase::getCompileCommand(PathRef File) const {
364   CDBLookupRequest Req;
365   Req.FileName = File;
366   Req.ShouldBroadcast = true;
367   auto Now = std::chrono::steady_clock::now();
368   Req.FreshTime = Now - Opts.RevalidateAfter;
369   Req.FreshTimeMissing = Now - Opts.RevalidateMissingAfter;
370 
371   auto Res = lookupCDB(Req);
372   if (!Res) {
373     log("Failed to find compilation database for {0}", File);
374     return std::nullopt;
375   }
376 
377   auto Candidates = Res->CDB->getCompileCommands(File);
378   if (!Candidates.empty())
379     return std::move(Candidates.front());
380 
381   return std::nullopt;
382 }
383 
384 std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
385 DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
386     llvm::ArrayRef<llvm::StringRef> Dirs) const {
387   std::vector<std::string> FoldedDirs;
388   FoldedDirs.reserve(Dirs.size());
389   for (const auto &Dir : Dirs) {
390 #ifndef NDEBUG
391     if (!llvm::sys::path::is_absolute(Dir))
392       elog("Trying to cache CDB for relative {0}");
393 #endif
394     FoldedDirs.push_back(maybeCaseFoldPath(Dir));
395   }
396 
397   std::vector<DirectoryCache *> Ret;
398   Ret.reserve(Dirs.size());
399 
400   std::lock_guard<std::mutex> Lock(DirCachesMutex);
401   for (unsigned I = 0; I < Dirs.size(); ++I)
402     Ret.push_back(&DirCaches.try_emplace(FoldedDirs[I], Dirs[I]).first->second);
403   return Ret;
404 }
405 
406 std::optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult>
407 DirectoryBasedGlobalCompilationDatabase::lookupCDB(
408     CDBLookupRequest Request) const {
409   assert(llvm::sys::path::is_absolute(Request.FileName) &&
410          "path must be absolute");
411 
412   std::string Storage;
413   std::vector<llvm::StringRef> SearchDirs;
414   if (Opts.CompileCommandsDir) // FIXME: unify this case with config.
415     SearchDirs = {*Opts.CompileCommandsDir};
416   else {
417     WithContext WithProvidedContext(Opts.ContextProvider(Request.FileName));
418     const auto &Spec = Config::current().CompileFlags.CDBSearch;
419     switch (Spec.Policy) {
420     case Config::CDBSearchSpec::NoCDBSearch:
421       return std::nullopt;
422     case Config::CDBSearchSpec::FixedDir:
423       Storage = *Spec.FixedCDBPath;
424       SearchDirs = {Storage};
425       break;
426     case Config::CDBSearchSpec::Ancestors:
427       // Traverse the canonical version to prevent false positives. i.e.:
428       // src/build/../a.cc can detect a CDB in /src/build if not
429       // canonicalized.
430       Storage = removeDots(Request.FileName);
431       actOnAllParentDirectories(Storage, [&](llvm::StringRef Dir) {
432         SearchDirs.push_back(Dir);
433         return false;
434       });
435     }
436   }
437 
438   std::shared_ptr<const tooling::CompilationDatabase> CDB = nullptr;
439   bool ShouldBroadcast = false;
440   DirectoryCache *DirCache = nullptr;
441   for (DirectoryCache *Candidate : getDirectoryCaches(SearchDirs)) {
442     bool CandidateShouldBroadcast = Request.ShouldBroadcast;
443     if ((CDB = Candidate->get(Opts.TFS, CandidateShouldBroadcast,
444                               Request.FreshTime, Request.FreshTimeMissing))) {
445       DirCache = Candidate;
446       ShouldBroadcast = CandidateShouldBroadcast;
447       break;
448     }
449   }
450 
451   if (!CDB)
452     return std::nullopt;
453 
454   CDBLookupResult Result;
455   Result.CDB = std::move(CDB);
456   Result.PI.SourceRoot = DirCache->Path;
457 
458   if (ShouldBroadcast)
459     broadcastCDB(Result);
460   return Result;
461 }
462 
463 // The broadcast thread announces files with new compile commands to the world.
464 // Primarily this is used to enqueue them for background indexing.
465 //
466 // It's on a separate thread because:
467 //  - otherwise it would block the first parse of the initial file
468 //  - we need to enumerate all files in the CDB, of which there are many
469 //  - we (will) have to evaluate config for every file in the CDB, which is slow
470 class DirectoryBasedGlobalCompilationDatabase::BroadcastThread {
471   class Filter;
472   DirectoryBasedGlobalCompilationDatabase &Parent;
473 
474   std::mutex Mu;
475   std::condition_variable CV;
476   // Shutdown flag (CV is notified after writing).
477   // This is atomic so that broadcasts can also observe it and abort early.
478   std::atomic<bool> ShouldStop = {false};
479   struct Task {
480     CDBLookupResult Lookup;
481     Context Ctx;
482   };
483   std::deque<Task> Queue;
484   std::optional<Task> ActiveTask;
485   std::thread Thread; // Must be last member.
486 
487   // Thread body: this is just the basic queue procesing boilerplate.
488   void run() {
489     std::unique_lock<std::mutex> Lock(Mu);
490     while (true) {
491       bool Stopping = false;
492       CV.wait(Lock, [&] {
493         return (Stopping = ShouldStop.load(std::memory_order_acquire)) ||
494                !Queue.empty();
495       });
496       if (Stopping) {
497         Queue.clear();
498         CV.notify_all();
499         return;
500       }
501       ActiveTask = std::move(Queue.front());
502       Queue.pop_front();
503 
504       Lock.unlock();
505       {
506         WithContext WithCtx(std::move(ActiveTask->Ctx));
507         process(ActiveTask->Lookup);
508       }
509       Lock.lock();
510       ActiveTask.reset();
511       CV.notify_all();
512     }
513   }
514 
515   // Inspects a new CDB and broadcasts the files it owns.
516   void process(const CDBLookupResult &T);
517 
518 public:
519   BroadcastThread(DirectoryBasedGlobalCompilationDatabase &Parent)
520       : Parent(Parent), Thread([this] { run(); }) {}
521 
522   void enqueue(CDBLookupResult Lookup) {
523     {
524       assert(!Lookup.PI.SourceRoot.empty());
525       std::lock_guard<std::mutex> Lock(Mu);
526       // New CDB takes precedence over any queued one for the same directory.
527       llvm::erase_if(Queue, [&](const Task &T) {
528         return T.Lookup.PI.SourceRoot == Lookup.PI.SourceRoot;
529       });
530       Queue.push_back({std::move(Lookup), Context::current().clone()});
531     }
532     CV.notify_all();
533   }
534 
535   bool blockUntilIdle(Deadline Timeout) {
536     std::unique_lock<std::mutex> Lock(Mu);
537     return wait(Lock, CV, Timeout,
538                 [&] { return Queue.empty() && !ActiveTask; });
539   }
540 
541   ~BroadcastThread() {
542     {
543       std::lock_guard<std::mutex> Lock(Mu);
544       ShouldStop.store(true, std::memory_order_release);
545     }
546     CV.notify_all();
547     Thread.join();
548   }
549 };
550 
551 // The DirBasedCDB associates each file with a specific CDB.
552 // When a CDB is discovered, it may claim to describe files that we associate
553 // with a different CDB. We do not want to broadcast discovery of these, and
554 // trigger background indexing of them.
555 //
556 // We must filter the list, and check whether they are associated with this CDB.
557 // This class attempts to do so efficiently.
558 //
559 // Roughly, it:
560 //  - loads the config for each file, and determines the relevant search path
561 //  - gathers all directories that are part of any search path
562 //  - (lazily) checks for a CDB in each such directory at most once
563 //  - walks the search path for each file and determines whether to include it.
564 class DirectoryBasedGlobalCompilationDatabase::BroadcastThread::Filter {
565   llvm::StringRef ThisDir;
566   DirectoryBasedGlobalCompilationDatabase &Parent;
567 
568   // Keep track of all directories we might check for CDBs.
569   struct DirInfo {
570     DirectoryCache *Cache = nullptr;
571     enum { Unknown, Missing, TargetCDB, OtherCDB } State = Unknown;
572     DirInfo *Parent = nullptr;
573   };
574   llvm::StringMap<DirInfo> Dirs;
575 
576   // A search path starts at a directory, and either includes ancestors or not.
577   using SearchPath = llvm::PointerIntPair<DirInfo *, 1>;
578 
579   // Add all ancestor directories of FilePath to the tracked set.
580   // Returns the immediate parent of the file.
581   DirInfo *addParents(llvm::StringRef FilePath) {
582     DirInfo *Leaf = nullptr;
583     DirInfo *Child = nullptr;
584     actOnAllParentDirectories(FilePath, [&](llvm::StringRef Dir) {
585       auto &Info = Dirs[Dir];
586       // If this is the first iteration, then this node is the overall result.
587       if (!Leaf)
588         Leaf = &Info;
589       // Fill in the parent link from the previous iteration to this parent.
590       if (Child)
591         Child->Parent = &Info;
592       // Keep walking, whether we inserted or not, if parent link is missing.
593       // (If it's present, parent links must be present up to the root, so stop)
594       Child = &Info;
595       return Info.Parent != nullptr;
596     });
597     return Leaf;
598   }
599 
600   // Populates DirInfo::Cache (and State, if it is TargetCDB).
601   void grabCaches() {
602     // Fast path out if there were no files, or CDB loading is off.
603     if (Dirs.empty())
604       return;
605 
606     std::vector<llvm::StringRef> DirKeys;
607     std::vector<DirInfo *> DirValues;
608     DirKeys.reserve(Dirs.size() + 1);
609     DirValues.reserve(Dirs.size());
610     for (auto &E : Dirs) {
611       DirKeys.push_back(E.first());
612       DirValues.push_back(&E.second);
613     }
614 
615     // Also look up the cache entry for the CDB we're broadcasting.
616     // Comparing DirectoryCache pointers is more robust than checking string
617     // equality, e.g. reuses the case-sensitivity handling.
618     DirKeys.push_back(ThisDir);
619     auto DirCaches = Parent.getDirectoryCaches(DirKeys);
620     const DirectoryCache *ThisCache = DirCaches.back();
621     DirCaches.pop_back();
622     DirKeys.pop_back();
623 
624     for (unsigned I = 0; I < DirKeys.size(); ++I) {
625       DirValues[I]->Cache = DirCaches[I];
626       if (DirCaches[I] == ThisCache)
627         DirValues[I]->State = DirInfo::TargetCDB;
628     }
629   }
630 
631   // Should we include a file from this search path?
632   bool shouldInclude(SearchPath P) {
633     DirInfo *Info = P.getPointer();
634     if (!Info)
635       return false;
636     if (Info->State == DirInfo::Unknown) {
637       assert(Info->Cache && "grabCaches() should have filled this");
638       // Given that we know that CDBs have been moved/generated, don't trust
639       // caches. (This should be rare, so it's OK to add a little latency).
640       constexpr auto IgnoreCache = std::chrono::steady_clock::time_point::max();
641       // Don't broadcast CDBs discovered while broadcasting!
642       bool ShouldBroadcast = false;
643       bool Exists =
644           nullptr != Info->Cache->get(Parent.Opts.TFS, ShouldBroadcast,
645                                       /*FreshTime=*/IgnoreCache,
646                                       /*FreshTimeMissing=*/IgnoreCache);
647       Info->State = Exists ? DirInfo::OtherCDB : DirInfo::Missing;
648     }
649     // If we have a CDB, include the file if it's the target CDB only.
650     if (Info->State != DirInfo::Missing)
651       return Info->State == DirInfo::TargetCDB;
652     // If we have no CDB and no relevant parent, don't include the file.
653     if (!P.getInt() || !Info->Parent)
654       return false;
655     // Walk up to the next parent.
656     return shouldInclude(SearchPath(Info->Parent, 1));
657   }
658 
659 public:
660   Filter(llvm::StringRef ThisDir,
661          DirectoryBasedGlobalCompilationDatabase &Parent)
662       : ThisDir(ThisDir), Parent(Parent) {}
663 
664   std::vector<std::string> filter(std::vector<std::string> AllFiles,
665                                   std::atomic<bool> &ShouldStop) {
666     std::vector<std::string> Filtered;
667     // Allow for clean early-exit of the slow parts.
668     auto ExitEarly = [&] {
669       if (ShouldStop.load(std::memory_order_acquire)) {
670         log("Giving up on broadcasting CDB, as we're shutting down");
671         Filtered.clear();
672         return true;
673       }
674       return false;
675     };
676     // Compute search path for each file.
677     std::vector<SearchPath> SearchPaths(AllFiles.size());
678     for (unsigned I = 0; I < AllFiles.size(); ++I) {
679       if (Parent.Opts.CompileCommandsDir) { // FIXME: unify with config
680         SearchPaths[I].setPointer(&Dirs[*Parent.Opts.CompileCommandsDir]);
681         continue;
682       }
683       if (ExitEarly()) // loading config may be slow
684         return Filtered;
685       WithContext WithProvidedContent(Parent.Opts.ContextProvider(AllFiles[I]));
686       const Config::CDBSearchSpec &Spec =
687           Config::current().CompileFlags.CDBSearch;
688       switch (Spec.Policy) {
689       case Config::CDBSearchSpec::NoCDBSearch:
690         break;
691       case Config::CDBSearchSpec::Ancestors:
692         SearchPaths[I].setInt(/*Recursive=*/1);
693         SearchPaths[I].setPointer(addParents(AllFiles[I]));
694         break;
695       case Config::CDBSearchSpec::FixedDir:
696         SearchPaths[I].setPointer(&Dirs[*Spec.FixedCDBPath]);
697         break;
698       }
699     }
700     // Get the CDB cache for each dir on the search path, but don't load yet.
701     grabCaches();
702     // Now work out which files we want to keep, loading CDBs where needed.
703     for (unsigned I = 0; I < AllFiles.size(); ++I) {
704       if (ExitEarly()) // loading CDBs may be slow
705         return Filtered;
706       if (shouldInclude(SearchPaths[I]))
707         Filtered.push_back(std::move(AllFiles[I]));
708     }
709     return Filtered;
710   }
711 };
712 
713 void DirectoryBasedGlobalCompilationDatabase::BroadcastThread::process(
714     const CDBLookupResult &T) {
715   vlog("Broadcasting compilation database from {0}", T.PI.SourceRoot);
716   std::vector<std::string> GovernedFiles =
717       Filter(T.PI.SourceRoot, Parent).filter(T.CDB->getAllFiles(), ShouldStop);
718   if (!GovernedFiles.empty())
719     Parent.OnCommandChanged.broadcast(std::move(GovernedFiles));
720 }
721 
722 void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
723     CDBLookupResult Result) const {
724   assert(Result.CDB && "Trying to broadcast an invalid CDB!");
725   Broadcaster->enqueue(Result);
726 }
727 
728 bool DirectoryBasedGlobalCompilationDatabase::blockUntilIdle(
729     Deadline Timeout) const {
730   return Broadcaster->blockUntilIdle(Timeout);
731 }
732 
733 std::optional<ProjectInfo>
734 DirectoryBasedGlobalCompilationDatabase::getProjectInfo(PathRef File) const {
735   CDBLookupRequest Req;
736   Req.FileName = File;
737   Req.ShouldBroadcast = false;
738   Req.FreshTime = Req.FreshTimeMissing =
739       std::chrono::steady_clock::time_point::min();
740   auto Res = lookupCDB(Req);
741   if (!Res)
742     return std::nullopt;
743   return Res->PI;
744 }
745 
746 std::unique_ptr<ProjectModules>
747 DirectoryBasedGlobalCompilationDatabase::getProjectModules(PathRef File) const {
748   CDBLookupRequest Req;
749   Req.FileName = File;
750   Req.ShouldBroadcast = false;
751   Req.FreshTime = Req.FreshTimeMissing =
752       std::chrono::steady_clock::time_point::min();
753   auto Res = lookupCDB(Req);
754   if (!Res)
755     return {};
756 
757   return scanningProjectModules(Res->CDB, Opts.TFS);
758 }
759 
760 OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base,
761                        std::vector<std::string> FallbackFlags,
762                        CommandMangler Mangler)
763     : DelegatingCDB(Base), Mangler(std::move(Mangler)),
764       FallbackFlags(std::move(FallbackFlags)) {}
765 
766 std::optional<tooling::CompileCommand>
767 OverlayCDB::getCompileCommand(PathRef File) const {
768   std::optional<tooling::CompileCommand> Cmd;
769   {
770     std::lock_guard<std::mutex> Lock(Mutex);
771     auto It = Commands.find(removeDots(File));
772     if (It != Commands.end())
773       Cmd = It->second;
774   }
775   if (Cmd) {
776     // FS used for expanding response files.
777     // FIXME: ExpandResponseFiles appears not to provide the usual
778     // thread-safety guarantees, as the access to FS is not locked!
779     // For now, use the real FS, which is known to be threadsafe (if we don't
780     // use/change working directory, which ExpandResponseFiles doesn't).
781     auto FS = llvm::vfs::getRealFileSystem();
782     auto Tokenizer = llvm::Triple(llvm::sys::getProcessTriple()).isOSWindows()
783                          ? llvm::cl::TokenizeWindowsCommandLine
784                          : llvm::cl::TokenizeGNUCommandLine;
785     // Compile command pushed via LSP protocol may have response files that need
786     // to be expanded before further processing. For CDB for files it happens in
787     // the main CDB when reading it from the JSON file.
788     tooling::addExpandedResponseFiles(Cmd->CommandLine, Cmd->Directory,
789                                       Tokenizer, *FS);
790   }
791   if (!Cmd)
792     Cmd = DelegatingCDB::getCompileCommand(File);
793   if (!Cmd)
794     return std::nullopt;
795   if (Mangler)
796     Mangler(*Cmd, File);
797   return Cmd;
798 }
799 
800 tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const {
801   auto Cmd = DelegatingCDB::getFallbackCommand(File);
802   std::lock_guard<std::mutex> Lock(Mutex);
803   Cmd.CommandLine.insert(Cmd.CommandLine.end(), FallbackFlags.begin(),
804                          FallbackFlags.end());
805   if (Mangler)
806     Mangler(Cmd, File);
807   return Cmd;
808 }
809 
810 bool OverlayCDB::setCompileCommand(PathRef File,
811                                    std::optional<tooling::CompileCommand> Cmd) {
812   // We store a canonical version internally to prevent mismatches between set
813   // and get compile commands. Also it assures clients listening to broadcasts
814   // doesn't receive different names for the same file.
815   std::string CanonPath = removeDots(File);
816   {
817     std::unique_lock<std::mutex> Lock(Mutex);
818     if (Cmd) {
819       if (auto [It, Inserted] =
820               Commands.try_emplace(CanonPath, std::move(*Cmd));
821           !Inserted) {
822         if (It->second == *Cmd)
823           return false;
824         It->second = *Cmd;
825       }
826     } else
827       Commands.erase(CanonPath);
828   }
829   OnCommandChanged.broadcast({CanonPath});
830   return true;
831 }
832 
833 std::unique_ptr<ProjectModules>
834 OverlayCDB::getProjectModules(PathRef File) const {
835   auto MDB = DelegatingCDB::getProjectModules(File);
836   MDB->setCommandMangler([&Mangler = Mangler](tooling::CompileCommand &Command,
837                                               PathRef CommandPath) {
838     Mangler(Command, CommandPath);
839   });
840   return MDB;
841 }
842 
843 DelegatingCDB::DelegatingCDB(const GlobalCompilationDatabase *Base)
844     : Base(Base) {
845   if (Base)
846     BaseChanged = Base->watch([this](const std::vector<std::string> Changes) {
847       OnCommandChanged.broadcast(Changes);
848     });
849 }
850 
851 DelegatingCDB::DelegatingCDB(std::unique_ptr<GlobalCompilationDatabase> Base)
852     : DelegatingCDB(Base.get()) {
853   BaseOwner = std::move(Base);
854 }
855 
856 std::optional<tooling::CompileCommand>
857 DelegatingCDB::getCompileCommand(PathRef File) const {
858   if (!Base)
859     return std::nullopt;
860   return Base->getCompileCommand(File);
861 }
862 
863 std::optional<ProjectInfo> DelegatingCDB::getProjectInfo(PathRef File) const {
864   if (!Base)
865     return std::nullopt;
866   return Base->getProjectInfo(File);
867 }
868 
869 std::unique_ptr<ProjectModules>
870 DelegatingCDB::getProjectModules(PathRef File) const {
871   if (!Base)
872     return nullptr;
873   return Base->getProjectModules(File);
874 }
875 
876 tooling::CompileCommand DelegatingCDB::getFallbackCommand(PathRef File) const {
877   if (!Base)
878     return GlobalCompilationDatabase::getFallbackCommand(File);
879   return Base->getFallbackCommand(File);
880 }
881 
882 bool DelegatingCDB::blockUntilIdle(Deadline D) const {
883   if (!Base)
884     return true;
885   return Base->blockUntilIdle(D);
886 }
887 
888 } // namespace clangd
889 } // namespace clang
890