xref: /llvm-project/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h (revision efdb3ae23247850d3886e3708400f0d991ed59e1)
1e1f4c4aaSAlex Lorenz //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2e1f4c4aaSAlex Lorenz //
3e1f4c4aaSAlex Lorenz // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e1f4c4aaSAlex Lorenz // See https://llvm.org/LICENSE.txt for license information.
5e1f4c4aaSAlex Lorenz // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e1f4c4aaSAlex Lorenz //
7e1f4c4aaSAlex Lorenz //===----------------------------------------------------------------------===//
8e1f4c4aaSAlex Lorenz 
9b4682816SKazu Hirata #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10b4682816SKazu Hirata #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11e1f4c4aaSAlex Lorenz 
12e1f4c4aaSAlex Lorenz #include "clang/Basic/LLVM.h"
13b4c83a13SArgyrios Kyrtzidis #include "clang/Lex/DependencyDirectivesScanner.h"
1478a05b92SKazu Hirata #include "llvm/ADT/DenseMap.h"
15e1f4c4aaSAlex Lorenz #include "llvm/ADT/StringMap.h"
16e1f4c4aaSAlex Lorenz #include "llvm/Support/Allocator.h"
17e1f4c4aaSAlex Lorenz #include "llvm/Support/ErrorOr.h"
18e1f4c4aaSAlex Lorenz #include "llvm/Support/VirtualFileSystem.h"
19e1f4c4aaSAlex Lorenz #include <mutex>
20a1580d7bSKazu Hirata #include <optional>
21e1f4c4aaSAlex Lorenz 
22e1f4c4aaSAlex Lorenz namespace clang {
23e1f4c4aaSAlex Lorenz namespace tooling {
24e1f4c4aaSAlex Lorenz namespace dependencies {
25e1f4c4aaSAlex Lorenz 
26b4c83a13SArgyrios Kyrtzidis using DependencyDirectivesTy =
27b4c83a13SArgyrios Kyrtzidis     SmallVector<dependency_directives_scan::Directive, 20>;
28b4c83a13SArgyrios Kyrtzidis 
29b4c83a13SArgyrios Kyrtzidis /// Contents and directive tokens of a cached file entry. Single instance can
305daeada3SJan Svoboda /// be shared between multiple entries.
315daeada3SJan Svoboda struct CachedFileContents {
32b4c83a13SArgyrios Kyrtzidis   CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
33b4c83a13SArgyrios Kyrtzidis       : Original(std::move(Contents)), DepDirectives(nullptr) {}
345daeada3SJan Svoboda 
35b58a420fSArgyrios Kyrtzidis   /// Owning storage for the original contents.
365daeada3SJan Svoboda   std::unique_ptr<llvm::MemoryBuffer> Original;
375daeada3SJan Svoboda 
38b58a420fSArgyrios Kyrtzidis   /// The mutex that must be locked before mutating directive tokens.
395daeada3SJan Svoboda   std::mutex ValueLock;
40b4c83a13SArgyrios Kyrtzidis   SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
41b58a420fSArgyrios Kyrtzidis   /// Accessor to the directive tokens that's atomic to avoid data races.
42b4c83a13SArgyrios Kyrtzidis   /// \p CachedFileContents has ownership of the pointer.
436ad0788cSKazu Hirata   std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
44b4c83a13SArgyrios Kyrtzidis 
45b4c83a13SArgyrios Kyrtzidis   ~CachedFileContents() { delete DepDirectives.load(); }
465daeada3SJan Svoboda };
475daeada3SJan Svoboda 
48e1f4c4aaSAlex Lorenz /// An in-memory representation of a file system entity that is of interest to
49e1f4c4aaSAlex Lorenz /// the dependency scanning filesystem.
50e1f4c4aaSAlex Lorenz ///
51e1f4c4aaSAlex Lorenz /// It represents one of the following:
52b58a420fSArgyrios Kyrtzidis /// - opened file with contents and a stat value,
53b58a420fSArgyrios Kyrtzidis /// - opened file with contents, directive tokens and a stat value,
54f6680345SJan Svoboda /// - directory entry with its stat value,
555daeada3SJan Svoboda /// - filesystem error.
565daeada3SJan Svoboda ///
575daeada3SJan Svoboda /// Single instance of this class can be shared across different filenames (e.g.
585daeada3SJan Svoboda /// a regular file and a symlink). For this reason the status filename is empty
595daeada3SJan Svoboda /// and is only materialized by \c EntryRef that knows the requested filename.
60e1f4c4aaSAlex Lorenz class CachedFileSystemEntry {
61e1f4c4aaSAlex Lorenz public:
625daeada3SJan Svoboda   /// Creates an entry without contents: either a filesystem error or
635daeada3SJan Svoboda   /// a directory with stat value.
645daeada3SJan Svoboda   CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
655daeada3SJan Svoboda       : MaybeStat(std::move(Stat)), Contents(nullptr) {
665daeada3SJan Svoboda     clearStatName();
675daeada3SJan Svoboda   }
68e1f4c4aaSAlex Lorenz 
695daeada3SJan Svoboda   /// Creates an entry representing a file with contents.
705daeada3SJan Svoboda   CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
715daeada3SJan Svoboda                         CachedFileContents *Contents)
725daeada3SJan Svoboda       : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
735daeada3SJan Svoboda     clearStatName();
743031fd71SJan Svoboda   }
75e1f4c4aaSAlex Lorenz 
76ced077e1SJan Svoboda   /// \returns True if the entry is a filesystem error.
77ced077e1SJan Svoboda   bool isError() const { return !MaybeStat; }
78ee30b0ecSAlex Lorenz 
795daeada3SJan Svoboda   /// \returns True if the current entry represents a directory.
80ced077e1SJan Svoboda   bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
81ced077e1SJan Svoboda 
82ced077e1SJan Svoboda   /// \returns Original contents of the file.
83ced077e1SJan Svoboda   StringRef getOriginalContents() const {
84ced077e1SJan Svoboda     assert(!isError() && "error");
85ced077e1SJan Svoboda     assert(!MaybeStat->isDirectory() && "not a file");
865daeada3SJan Svoboda     assert(Contents && "contents not initialized");
875daeada3SJan Svoboda     return Contents->Original->getBuffer();
88f6680345SJan Svoboda   }
89f6680345SJan Svoboda 
90b58a420fSArgyrios Kyrtzidis   /// \returns The scanned preprocessor directive tokens of the file that are
91b58a420fSArgyrios Kyrtzidis   /// used to speed up preprocessing, if available.
926ad0788cSKazu Hirata   std::optional<ArrayRef<dependency_directives_scan::Directive>>
93b4c83a13SArgyrios Kyrtzidis   getDirectiveTokens() const {
94ced077e1SJan Svoboda     assert(!isError() && "error");
95b4c83a13SArgyrios Kyrtzidis     assert(!isDirectory() && "not a file");
965daeada3SJan Svoboda     assert(Contents && "contents not initialized");
97b4c83a13SArgyrios Kyrtzidis     if (auto *Directives = Contents->DepDirectives.load()) {
9853daa177SKazu Hirata       if (Directives->has_value())
99f4b90773SFangrui Song         return ArrayRef<dependency_directives_scan::Directive>(**Directives);
100b4c83a13SArgyrios Kyrtzidis     }
1015891420eSKazu Hirata     return std::nullopt;
102f6680345SJan Svoboda   }
103f6680345SJan Svoboda 
104ced077e1SJan Svoboda   /// \returns The error.
1055daeada3SJan Svoboda   std::error_code getError() const { return MaybeStat.getError(); }
106ced077e1SJan Svoboda 
1075daeada3SJan Svoboda   /// \returns The entry status with empty filename.
108ced077e1SJan Svoboda   llvm::vfs::Status getStatus() const {
109ced077e1SJan Svoboda     assert(!isError() && "error");
1105daeada3SJan Svoboda     assert(MaybeStat->getName().empty() && "stat name must be empty");
111ced077e1SJan Svoboda     return *MaybeStat;
112e1f4c4aaSAlex Lorenz   }
113e1f4c4aaSAlex Lorenz 
1145daeada3SJan Svoboda   /// \returns The unique ID of the entry.
1155daeada3SJan Svoboda   llvm::sys::fs::UniqueID getUniqueID() const {
116ced077e1SJan Svoboda     assert(!isError() && "error");
1175daeada3SJan Svoboda     return MaybeStat->getUniqueID();
118e1f4c4aaSAlex Lorenz   }
119e1f4c4aaSAlex Lorenz 
120b58a420fSArgyrios Kyrtzidis   /// \returns The data structure holding both contents and directive tokens.
121b58a420fSArgyrios Kyrtzidis   CachedFileContents *getCachedContents() const {
1225daeada3SJan Svoboda     assert(!isError() && "error");
1235daeada3SJan Svoboda     assert(!isDirectory() && "not a file");
1245daeada3SJan Svoboda     return Contents;
125ca6e6097SAlex Lorenz   }
126ca6e6097SAlex Lorenz 
127e1f4c4aaSAlex Lorenz private:
1285daeada3SJan Svoboda   void clearStatName() {
1295daeada3SJan Svoboda     if (MaybeStat)
1305daeada3SJan Svoboda       MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
1315daeada3SJan Svoboda   }
1325daeada3SJan Svoboda 
1335daeada3SJan Svoboda   /// Either the filesystem error or status of the entry.
1345daeada3SJan Svoboda   /// The filename is empty and only materialized by \c EntryRef.
135e1f4c4aaSAlex Lorenz   llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
136f6680345SJan Svoboda 
1375daeada3SJan Svoboda   /// Non-owning pointer to the file contents.
1385daeada3SJan Svoboda   ///
1395daeada3SJan Svoboda   /// We're using pointer here to keep the size of this class small. Instances
1405daeada3SJan Svoboda   /// representing directories and filesystem errors don't hold any contents
1415daeada3SJan Svoboda   /// anyway.
1425daeada3SJan Svoboda   CachedFileContents *Contents;
143e1f4c4aaSAlex Lorenz };
144e1f4c4aaSAlex Lorenz 
145a11a4324SJan Svoboda using CachedRealPath = llvm::ErrorOr<std::string>;
146a11a4324SJan Svoboda 
147e1f4c4aaSAlex Lorenz /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
148b58a420fSArgyrios Kyrtzidis /// underlying real file system, and the scanned preprocessor directives of
149bc1a2979SJan Svoboda /// files.
150e1f4c4aaSAlex Lorenz ///
151e1f4c4aaSAlex Lorenz /// It is sharded based on the hash of the key to reduce the lock contention for
152e1f4c4aaSAlex Lorenz /// the worker threads.
153e1f4c4aaSAlex Lorenz class DependencyScanningFilesystemSharedCache {
154e1f4c4aaSAlex Lorenz public:
1555daeada3SJan Svoboda   struct CacheShard {
1565daeada3SJan Svoboda     /// The mutex that needs to be locked before mutation of any member.
1575daeada3SJan Svoboda     mutable std::mutex CacheLock;
1585daeada3SJan Svoboda 
159a11a4324SJan Svoboda     /// Map from filenames to cached entries and real paths.
160a11a4324SJan Svoboda     llvm::StringMap<
161a11a4324SJan Svoboda         std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
162a11a4324SJan Svoboda         llvm::BumpPtrAllocator>
163a11a4324SJan Svoboda         CacheByFilename;
1645daeada3SJan Svoboda 
1655daeada3SJan Svoboda     /// Map from unique IDs to cached entries.
1665daeada3SJan Svoboda     llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
1675daeada3SJan Svoboda         EntriesByUID;
1685daeada3SJan Svoboda 
1695daeada3SJan Svoboda     /// The backing storage for cached entries.
1705daeada3SJan Svoboda     llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
1715daeada3SJan Svoboda 
1725daeada3SJan Svoboda     /// The backing storage for cached contents.
1735daeada3SJan Svoboda     llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
1745daeada3SJan Svoboda 
175a11a4324SJan Svoboda     /// The backing storage for cached real paths.
176a11a4324SJan Svoboda     llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
177a11a4324SJan Svoboda 
1785daeada3SJan Svoboda     /// Returns entry associated with the filename or nullptr if none is found.
1795daeada3SJan Svoboda     const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
1805daeada3SJan Svoboda 
1815daeada3SJan Svoboda     /// Returns entry associated with the unique ID or nullptr if none is found.
1825daeada3SJan Svoboda     const CachedFileSystemEntry *
1835daeada3SJan Svoboda     findEntryByUID(llvm::sys::fs::UniqueID UID) const;
1845daeada3SJan Svoboda 
1855daeada3SJan Svoboda     /// Returns entry associated with the filename if there is some. Otherwise,
1865daeada3SJan Svoboda     /// constructs new one with the given status, associates it with the
1875daeada3SJan Svoboda     /// filename and returns the result.
1885daeada3SJan Svoboda     const CachedFileSystemEntry &
1895daeada3SJan Svoboda     getOrEmplaceEntryForFilename(StringRef Filename,
1905daeada3SJan Svoboda                                  llvm::ErrorOr<llvm::vfs::Status> Stat);
1915daeada3SJan Svoboda 
1925daeada3SJan Svoboda     /// Returns entry associated with the unique ID if there is some. Otherwise,
1935daeada3SJan Svoboda     /// constructs new one with the given status and contents, associates it
1945daeada3SJan Svoboda     /// with the unique ID and returns the result.
1955daeada3SJan Svoboda     const CachedFileSystemEntry &
1965daeada3SJan Svoboda     getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
1975daeada3SJan Svoboda                             std::unique_ptr<llvm::MemoryBuffer> Contents);
1985daeada3SJan Svoboda 
1995daeada3SJan Svoboda     /// Returns entry associated with the filename if there is some. Otherwise,
2005daeada3SJan Svoboda     /// associates the given entry with the filename and returns it.
2015daeada3SJan Svoboda     const CachedFileSystemEntry &
2025daeada3SJan Svoboda     getOrInsertEntryForFilename(StringRef Filename,
2035daeada3SJan Svoboda                                 const CachedFileSystemEntry &Entry);
204a11a4324SJan Svoboda 
205a11a4324SJan Svoboda     /// Returns the real path associated with the filename or nullptr if none is
206a11a4324SJan Svoboda     /// found.
207a11a4324SJan Svoboda     const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
208a11a4324SJan Svoboda 
209a11a4324SJan Svoboda     /// Returns the real path associated with the filename if there is some.
210a11a4324SJan Svoboda     /// Otherwise, constructs new one with the given one, associates it with the
211a11a4324SJan Svoboda     /// filename and returns the result.
212a11a4324SJan Svoboda     const CachedRealPath &
213a11a4324SJan Svoboda     getOrEmplaceRealPathForFilename(StringRef Filename,
214a11a4324SJan Svoboda                                     llvm::ErrorOr<StringRef> RealPath);
215e1f4c4aaSAlex Lorenz   };
216e1f4c4aaSAlex Lorenz 
217f6680345SJan Svoboda   DependencyScanningFilesystemSharedCache();
218f6680345SJan Svoboda 
2195daeada3SJan Svoboda   /// Returns shard for the given key.
2205daeada3SJan Svoboda   CacheShard &getShardForFilename(StringRef Filename) const;
2215daeada3SJan Svoboda   CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
222e1f4c4aaSAlex Lorenz 
223e1f4c4aaSAlex Lorenz private:
224e1f4c4aaSAlex Lorenz   std::unique_ptr<CacheShard[]> CacheShards;
225e1f4c4aaSAlex Lorenz   unsigned NumShards;
226e1f4c4aaSAlex Lorenz };
227e1f4c4aaSAlex Lorenz 
228bc1a2979SJan Svoboda /// This class is a local cache, that caches the 'stat' and 'open' calls to the
229b58a420fSArgyrios Kyrtzidis /// underlying real file system.
230bc1a2979SJan Svoboda class DependencyScanningFilesystemLocalCache {
231a11a4324SJan Svoboda   llvm::StringMap<
232a11a4324SJan Svoboda       std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
233a11a4324SJan Svoboda       llvm::BumpPtrAllocator>
234a11a4324SJan Svoboda       Cache;
235bc1a2979SJan Svoboda 
236bc1a2979SJan Svoboda public:
2375daeada3SJan Svoboda   /// Returns entry associated with the filename or nullptr if none is found.
2385daeada3SJan Svoboda   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
23936b37c77SArgyrios Kyrtzidis     assert(llvm::sys::path::is_absolute_gnu(Filename));
2405daeada3SJan Svoboda     auto It = Cache.find(Filename);
241a11a4324SJan Svoboda     return It == Cache.end() ? nullptr : It->getValue().first;
2425daeada3SJan Svoboda   }
2435daeada3SJan Svoboda 
2445daeada3SJan Svoboda   /// Associates the given entry with the filename and returns the given entry
2455daeada3SJan Svoboda   /// pointer (for convenience).
2465daeada3SJan Svoboda   const CachedFileSystemEntry &
2475daeada3SJan Svoboda   insertEntryForFilename(StringRef Filename,
2485daeada3SJan Svoboda                          const CachedFileSystemEntry &Entry) {
24936b37c77SArgyrios Kyrtzidis     assert(llvm::sys::path::is_absolute_gnu(Filename));
250a11a4324SJan Svoboda     auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
251a11a4324SJan Svoboda     auto &[CachedEntry, CachedRealPath] = It->getValue();
252a11a4324SJan Svoboda     if (!Inserted) {
253a11a4324SJan Svoboda       // The file is already present in the local cache. If we got here, it only
254a11a4324SJan Svoboda       // contains the real path. Let's make sure the entry is populated too.
255a11a4324SJan Svoboda       assert((!CachedEntry && CachedRealPath) && "entry already present");
256a11a4324SJan Svoboda       CachedEntry = &Entry;
257a11a4324SJan Svoboda     }
258a11a4324SJan Svoboda     return *CachedEntry;
259a11a4324SJan Svoboda   }
260a11a4324SJan Svoboda 
261a11a4324SJan Svoboda   /// Returns real path associated with the filename or nullptr if none is
262a11a4324SJan Svoboda   /// found.
263a11a4324SJan Svoboda   const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
264a11a4324SJan Svoboda     assert(llvm::sys::path::is_absolute_gnu(Filename));
265a11a4324SJan Svoboda     auto It = Cache.find(Filename);
266a11a4324SJan Svoboda     return It == Cache.end() ? nullptr : It->getValue().second;
267a11a4324SJan Svoboda   }
268a11a4324SJan Svoboda 
269a11a4324SJan Svoboda   /// Associates the given real path with the filename and returns the given
270a11a4324SJan Svoboda   /// entry pointer (for convenience).
271a11a4324SJan Svoboda   const CachedRealPath &
272a11a4324SJan Svoboda   insertRealPathForFilename(StringRef Filename,
273a11a4324SJan Svoboda                             const CachedRealPath &RealPath) {
274a11a4324SJan Svoboda     assert(llvm::sys::path::is_absolute_gnu(Filename));
275a11a4324SJan Svoboda     auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
276a11a4324SJan Svoboda     auto &[CachedEntry, CachedRealPath] = It->getValue();
277a11a4324SJan Svoboda     if (!Inserted) {
278a11a4324SJan Svoboda       // The file is already present in the local cache. If we got here, it only
279a11a4324SJan Svoboda       // contains the entry. Let's make sure the real path is populated too.
280a11a4324SJan Svoboda       assert((!CachedRealPath && CachedEntry) && "real path already present");
281a11a4324SJan Svoboda       CachedRealPath = &RealPath;
282a11a4324SJan Svoboda     }
283a11a4324SJan Svoboda     return *CachedRealPath;
284f6680345SJan Svoboda   }
285f6680345SJan Svoboda };
286f6680345SJan Svoboda 
287f6680345SJan Svoboda /// Reference to a CachedFileSystemEntry.
288b58a420fSArgyrios Kyrtzidis /// If the underlying entry is an opened file, this wrapper returns the file
289b58a420fSArgyrios Kyrtzidis /// contents and the scanned preprocessor directives.
290f6680345SJan Svoboda class EntryRef {
2915daeada3SJan Svoboda   /// The filename used to access this entry.
2925daeada3SJan Svoboda   std::string Filename;
2935daeada3SJan Svoboda 
294f6680345SJan Svoboda   /// The underlying cached entry.
295bcdf7f5eSJan Svoboda   const CachedFileSystemEntry &Entry;
296f6680345SJan Svoboda 
297b768a8c1SJan Svoboda   friend class DependencyScanningWorkerFilesystem;
298b768a8c1SJan Svoboda 
299f6680345SJan Svoboda public:
300b4c83a13SArgyrios Kyrtzidis   EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
301b4c83a13SArgyrios Kyrtzidis       : Filename(Name), Entry(Entry) {}
302f6680345SJan Svoboda 
303ced077e1SJan Svoboda   llvm::vfs::Status getStatus() const {
304ced077e1SJan Svoboda     llvm::vfs::Status Stat = Entry.getStatus();
3055daeada3SJan Svoboda     if (!Stat.isDirectory())
3065daeada3SJan Svoboda       Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
3075daeada3SJan Svoboda     return llvm::vfs::Status::copyWithNewName(Stat, Filename);
308bc1a2979SJan Svoboda   }
309bc1a2979SJan Svoboda 
310ced077e1SJan Svoboda   bool isError() const { return Entry.isError(); }
311bcdf7f5eSJan Svoboda   bool isDirectory() const { return Entry.isDirectory(); }
312f6680345SJan Svoboda 
313ced077e1SJan Svoboda   /// If the cached entry represents an error, promotes it into `ErrorOr`.
314ced077e1SJan Svoboda   llvm::ErrorOr<EntryRef> unwrapError() const {
315ced077e1SJan Svoboda     if (isError())
316ced077e1SJan Svoboda       return Entry.getError();
317ced077e1SJan Svoboda     return *this;
318ced077e1SJan Svoboda   }
319ced077e1SJan Svoboda 
320b4c83a13SArgyrios Kyrtzidis   StringRef getContents() const { return Entry.getOriginalContents(); }
321f6680345SJan Svoboda 
3226ad0788cSKazu Hirata   std::optional<ArrayRef<dependency_directives_scan::Directive>>
323b4c83a13SArgyrios Kyrtzidis   getDirectiveTokens() const {
324b4c83a13SArgyrios Kyrtzidis     return Entry.getDirectiveTokens();
325bc1a2979SJan Svoboda   }
326bc1a2979SJan Svoboda };
327bc1a2979SJan Svoboda 
328e1f4c4aaSAlex Lorenz /// A virtual file system optimized for the dependency discovery.
329e1f4c4aaSAlex Lorenz ///
33094738a5aSRageking8 /// It is primarily designed to work with source files whose contents was
331e1f4c4aaSAlex Lorenz /// preprocessed to remove any tokens that are unlikely to affect the dependency
332e1f4c4aaSAlex Lorenz /// computation.
333e1f4c4aaSAlex Lorenz ///
334e1f4c4aaSAlex Lorenz /// This is not a thread safe VFS. A single instance is meant to be used only in
335e1f4c4aaSAlex Lorenz /// one thread. Multiple instances are allowed to service multiple threads
336e1f4c4aaSAlex Lorenz /// running in parallel.
3377847e445SMichael Spencer class DependencyScanningWorkerFilesystem
3387847e445SMichael Spencer     : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
3397847e445SMichael Spencer                                llvm::vfs::ProxyFileSystem> {
340e1f4c4aaSAlex Lorenz public:
3417847e445SMichael Spencer   static const char ID;
3427847e445SMichael Spencer 
343e1f4c4aaSAlex Lorenz   DependencyScanningWorkerFilesystem(
344e1f4c4aaSAlex Lorenz       DependencyScanningFilesystemSharedCache &SharedCache,
34536b37c77SArgyrios Kyrtzidis       IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
346e1f4c4aaSAlex Lorenz 
347e1f4c4aaSAlex Lorenz   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
348e1f4c4aaSAlex Lorenz   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
349*efdb3ae2SAbhina Sreeskantharajan   openFileForRead(const Twine &Path) override;
350e1f4c4aaSAlex Lorenz 
351a11a4324SJan Svoboda   std::error_code getRealPath(const Twine &Path,
352a11a4324SJan Svoboda                               SmallVectorImpl<char> &Output) override;
353a11a4324SJan Svoboda 
35436b37c77SArgyrios Kyrtzidis   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
35536b37c77SArgyrios Kyrtzidis 
35655323ca6SJan Svoboda   /// Make it so that no paths bypass this VFS.
35755323ca6SJan Svoboda   void resetBypassedPathPrefix() { BypassedPathPrefix.reset(); }
35855323ca6SJan Svoboda   /// Set the prefix for paths that should bypass this VFS and go straight to
35955323ca6SJan Svoboda   /// the underlying VFS.
36055323ca6SJan Svoboda   void setBypassedPathPrefix(StringRef Prefix) { BypassedPathPrefix = Prefix; }
36155323ca6SJan Svoboda 
3625daeada3SJan Svoboda   /// Returns entry for the given filename.
3635daeada3SJan Svoboda   ///
3645daeada3SJan Svoboda   /// Attempts to use the local and shared caches first, then falls back to
3655daeada3SJan Svoboda   /// using the underlying filesystem.
366b768a8c1SJan Svoboda   llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
367b768a8c1SJan Svoboda 
368b768a8c1SJan Svoboda   /// Ensure the directive tokens are populated for this file entry.
369b768a8c1SJan Svoboda   ///
370b768a8c1SJan Svoboda   /// Returns true if the directive tokens are populated for this file entry,
371b768a8c1SJan Svoboda   /// false if not (i.e. this entry is not a file or its scan fails).
3720559eaffSNishith Kumar M Shah   bool ensureDirectiveTokensArePopulated(EntryRef Entry);
3734abac533SKousik Kumar 
374779ba604SArtem Chikin   /// Check whether \p Path exists. By default checks cached result of \c
375779ba604SArtem Chikin   /// status(), and falls back on FS if unable to do so.
376779ba604SArtem Chikin   bool exists(const Twine &Path) override;
377779ba604SArtem Chikin 
378b4c83a13SArgyrios Kyrtzidis private:
3795daeada3SJan Svoboda   /// For a filename that's not yet associated with any entry in the caches,
3805daeada3SJan Svoboda   /// uses the underlying filesystem to either look up the entry based in the
3815daeada3SJan Svoboda   /// shared cache indexed by unique ID, or creates new entry from scratch.
38236b37c77SArgyrios Kyrtzidis   /// \p FilenameForLookup will always be an absolute path, and different than
38336b37c77SArgyrios Kyrtzidis   /// \p OriginalFilename if \p OriginalFilename is relative.
3845daeada3SJan Svoboda   llvm::ErrorOr<const CachedFileSystemEntry &>
38536b37c77SArgyrios Kyrtzidis   computeAndStoreResult(StringRef OriginalFilename,
38636b37c77SArgyrios Kyrtzidis                         StringRef FilenameForLookup);
3875daeada3SJan Svoboda 
3885daeada3SJan Svoboda   /// Represents a filesystem entry that has been stat-ed (and potentially read)
3895daeada3SJan Svoboda   /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
3905daeada3SJan Svoboda   struct TentativeEntry {
3915daeada3SJan Svoboda     llvm::vfs::Status Status;
3925daeada3SJan Svoboda     std::unique_ptr<llvm::MemoryBuffer> Contents;
3935daeada3SJan Svoboda 
3945daeada3SJan Svoboda     TentativeEntry(llvm::vfs::Status Status,
3955daeada3SJan Svoboda                    std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
3965daeada3SJan Svoboda         : Status(std::move(Status)), Contents(std::move(Contents)) {}
3975daeada3SJan Svoboda   };
3985daeada3SJan Svoboda 
3995daeada3SJan Svoboda   /// Reads file at the given path. Enforces consistency between the file size
4005daeada3SJan Svoboda   /// in status and size of read contents.
4015daeada3SJan Svoboda   llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
4025daeada3SJan Svoboda 
4035daeada3SJan Svoboda   /// Returns entry associated with the unique ID of the given tentative entry
4045daeada3SJan Svoboda   /// if there is some in the shared cache. Otherwise, constructs new one,
4055daeada3SJan Svoboda   /// associates it with the unique ID and returns the result.
4065daeada3SJan Svoboda   const CachedFileSystemEntry &
4075daeada3SJan Svoboda   getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
4085daeada3SJan Svoboda 
4095daeada3SJan Svoboda   /// Returns entry associated with the filename or nullptr if none is found.
4105daeada3SJan Svoboda   ///
4115daeada3SJan Svoboda   /// Returns entry from local cache if there is some. Otherwise, if the entry
4125daeada3SJan Svoboda   /// is found in the shared cache, writes it through the local cache and
4135daeada3SJan Svoboda   /// returns it. Otherwise returns nullptr.
4145daeada3SJan Svoboda   const CachedFileSystemEntry *
4155daeada3SJan Svoboda   findEntryByFilenameWithWriteThrough(StringRef Filename);
4165daeada3SJan Svoboda 
4175daeada3SJan Svoboda   /// Returns entry associated with the unique ID in the shared cache or nullptr
4185daeada3SJan Svoboda   /// if none is found.
4195daeada3SJan Svoboda   const CachedFileSystemEntry *
4205daeada3SJan Svoboda   findSharedEntryByUID(llvm::vfs::Status Stat) const {
4215daeada3SJan Svoboda     return SharedCache.getShardForUID(Stat.getUniqueID())
4225daeada3SJan Svoboda         .findEntryByUID(Stat.getUniqueID());
4235daeada3SJan Svoboda   }
4245daeada3SJan Svoboda 
4255daeada3SJan Svoboda   /// Associates the given entry with the filename in the local cache and
4265daeada3SJan Svoboda   /// returns it.
4275daeada3SJan Svoboda   const CachedFileSystemEntry &
4285daeada3SJan Svoboda   insertLocalEntryForFilename(StringRef Filename,
4295daeada3SJan Svoboda                               const CachedFileSystemEntry &Entry) {
4305daeada3SJan Svoboda     return LocalCache.insertEntryForFilename(Filename, Entry);
4315daeada3SJan Svoboda   }
4325daeada3SJan Svoboda 
4335daeada3SJan Svoboda   /// Returns entry associated with the filename in the shared cache if there is
4345daeada3SJan Svoboda   /// some. Otherwise, constructs new one with the given error code, associates
4355daeada3SJan Svoboda   /// it with the filename and returns the result.
4365daeada3SJan Svoboda   const CachedFileSystemEntry &
4375daeada3SJan Svoboda   getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
4385daeada3SJan Svoboda     return SharedCache.getShardForFilename(Filename)
4395daeada3SJan Svoboda         .getOrEmplaceEntryForFilename(Filename, EC);
4405daeada3SJan Svoboda   }
4415daeada3SJan Svoboda 
4425daeada3SJan Svoboda   /// Returns entry associated with the filename in the shared cache if there is
4435daeada3SJan Svoboda   /// some. Otherwise, associates the given entry with the filename and returns
4445daeada3SJan Svoboda   /// it.
4455daeada3SJan Svoboda   const CachedFileSystemEntry &
4465daeada3SJan Svoboda   getOrInsertSharedEntryForFilename(StringRef Filename,
4475daeada3SJan Svoboda                                     const CachedFileSystemEntry &Entry) {
4485daeada3SJan Svoboda     return SharedCache.getShardForFilename(Filename)
4495daeada3SJan Svoboda         .getOrInsertEntryForFilename(Filename, Entry);
4505daeada3SJan Svoboda   }
4515daeada3SJan Svoboda 
45264435396SJan Svoboda   void printImpl(raw_ostream &OS, PrintType Type,
45364435396SJan Svoboda                  unsigned IndentLevel) const override {
45464435396SJan Svoboda     printIndent(OS, IndentLevel);
45564435396SJan Svoboda     OS << "DependencyScanningFilesystem\n";
45664435396SJan Svoboda     getUnderlyingFS().print(OS, Type, IndentLevel + 1);
45764435396SJan Svoboda   }
45864435396SJan Svoboda 
45955323ca6SJan Svoboda   /// Whether this path should bypass this VFS and go straight to the underlying
46055323ca6SJan Svoboda   /// VFS.
46155323ca6SJan Svoboda   bool shouldBypass(StringRef Path) const;
46255323ca6SJan Svoboda 
463bc1a2979SJan Svoboda   /// The global cache shared between worker threads.
464e1f4c4aaSAlex Lorenz   DependencyScanningFilesystemSharedCache &SharedCache;
465e1f4c4aaSAlex Lorenz   /// The local cache is used by the worker thread to cache file system queries
466e1f4c4aaSAlex Lorenz   /// locally instead of querying the global cache every time.
467195a5294SJan Svoboda   DependencyScanningFilesystemLocalCache LocalCache;
46836b37c77SArgyrios Kyrtzidis 
46955323ca6SJan Svoboda   /// Prefix of paths that should go straight to the underlying VFS.
47055323ca6SJan Svoboda   std::optional<std::string> BypassedPathPrefix;
47155323ca6SJan Svoboda 
47236b37c77SArgyrios Kyrtzidis   /// The working directory to use for making relative paths absolute before
47336b37c77SArgyrios Kyrtzidis   /// using them for cache lookups.
47436b37c77SArgyrios Kyrtzidis   llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
47536b37c77SArgyrios Kyrtzidis 
47636b37c77SArgyrios Kyrtzidis   void updateWorkingDirForCacheLookup();
477fe59cb25SJan Svoboda 
478fe59cb25SJan Svoboda   llvm::ErrorOr<StringRef>
479fe59cb25SJan Svoboda   tryGetFilenameForLookup(StringRef OriginalFilename,
480fe59cb25SJan Svoboda                           llvm::SmallVectorImpl<char> &PathBuf) const;
481e1f4c4aaSAlex Lorenz };
482e1f4c4aaSAlex Lorenz 
483e1f4c4aaSAlex Lorenz } // end namespace dependencies
484e1f4c4aaSAlex Lorenz } // end namespace tooling
485e1f4c4aaSAlex Lorenz } // end namespace clang
486e1f4c4aaSAlex Lorenz 
487b4682816SKazu Hirata #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
488