xref: /openbsd-src/gnu/llvm/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1e5dd7070Spatrick //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick 
9e5dd7070Spatrick #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10e5dd7070Spatrick #include "llvm/Support/MemoryBuffer.h"
11*12c85518Srobert #include "llvm/Support/SmallVectorMemoryBuffer.h"
12e5dd7070Spatrick #include "llvm/Support/Threading.h"
13*12c85518Srobert #include <optional>
14e5dd7070Spatrick 
15e5dd7070Spatrick using namespace clang;
16e5dd7070Spatrick using namespace tooling;
17e5dd7070Spatrick using namespace dependencies;
18e5dd7070Spatrick 
19*12c85518Srobert llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
readFile(StringRef Filename)20*12c85518Srobert DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21e5dd7070Spatrick   // Load the file and its content from the file system.
22*12c85518Srobert   auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23e5dd7070Spatrick   if (!MaybeFile)
24e5dd7070Spatrick     return MaybeFile.getError();
25*12c85518Srobert   auto File = std::move(*MaybeFile);
26e5dd7070Spatrick 
27*12c85518Srobert   auto MaybeStat = File->status();
28*12c85518Srobert   if (!MaybeStat)
29*12c85518Srobert     return MaybeStat.getError();
30*12c85518Srobert   auto Stat = std::move(*MaybeStat);
31*12c85518Srobert 
32*12c85518Srobert   auto MaybeBuffer = File->getBuffer(Stat.getName());
33e5dd7070Spatrick   if (!MaybeBuffer)
34e5dd7070Spatrick     return MaybeBuffer.getError();
35*12c85518Srobert   auto Buffer = std::move(*MaybeBuffer);
36e5dd7070Spatrick 
37*12c85518Srobert   // If the file size changed between read and stat, pretend it didn't.
38*12c85518Srobert   if (Stat.getSize() != Buffer->getBufferSize())
39*12c85518Srobert     Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40*12c85518Srobert 
41*12c85518Srobert   return TentativeEntry(Stat, std::move(Buffer));
42e5dd7070Spatrick }
43e5dd7070Spatrick 
scanForDirectivesIfNecessary(const CachedFileSystemEntry & Entry,StringRef Filename,bool Disable)44*12c85518Srobert EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
45*12c85518Srobert     const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
46*12c85518Srobert   if (Entry.isError() || Entry.isDirectory() || Disable ||
47*12c85518Srobert       !shouldScanForDirectives(Filename))
48*12c85518Srobert     return EntryRef(Filename, Entry);
49e5dd7070Spatrick 
50*12c85518Srobert   CachedFileContents *Contents = Entry.getCachedContents();
51*12c85518Srobert   assert(Contents && "contents not initialized");
52e5dd7070Spatrick 
53*12c85518Srobert   // Double-checked locking.
54*12c85518Srobert   if (Contents->DepDirectives.load())
55*12c85518Srobert     return EntryRef(Filename, Entry);
56*12c85518Srobert 
57*12c85518Srobert   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58*12c85518Srobert 
59*12c85518Srobert   // Double-checked locking.
60*12c85518Srobert   if (Contents->DepDirectives.load())
61*12c85518Srobert     return EntryRef(Filename, Entry);
62*12c85518Srobert 
63*12c85518Srobert   SmallVector<dependency_directives_scan::Directive, 64> Directives;
64*12c85518Srobert   // Scan the file for preprocessor directives that might affect the
65*12c85518Srobert   // dependencies.
66*12c85518Srobert   if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67*12c85518Srobert                                         Contents->DepDirectiveTokens,
68*12c85518Srobert                                         Directives)) {
69*12c85518Srobert     Contents->DepDirectiveTokens.clear();
70*12c85518Srobert     // FIXME: Propagate the diagnostic if desired by the client.
71*12c85518Srobert     Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72*12c85518Srobert     return EntryRef(Filename, Entry);
73e5dd7070Spatrick   }
74e5dd7070Spatrick 
75*12c85518Srobert   // This function performed double-checked locking using `DepDirectives`.
76*12c85518Srobert   // Assigning it must be the last thing this function does, otherwise other
77*12c85518Srobert   // threads may skip the
78*12c85518Srobert   // critical section (`DepDirectives != nullptr`), leading to a data race.
79*12c85518Srobert   Contents->DepDirectives.store(
80*12c85518Srobert       new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81*12c85518Srobert   return EntryRef(Filename, Entry);
82e5dd7070Spatrick }
83e5dd7070Spatrick 
84*12c85518Srobert DependencyScanningFilesystemSharedCache::
DependencyScanningFilesystemSharedCache()85*12c85518Srobert     DependencyScanningFilesystemSharedCache() {
86e5dd7070Spatrick   // This heuristic was chosen using a empirical testing on a
87e5dd7070Spatrick   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88e5dd7070Spatrick   // sharding gives a performance edge by reducing the lock contention.
89e5dd7070Spatrick   // FIXME: A better heuristic might also consider the OS to account for
90e5dd7070Spatrick   // the different cost of lock contention on different OSes.
91ec727ea7Spatrick   NumShards =
92ec727ea7Spatrick       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93e5dd7070Spatrick   CacheShards = std::make_unique<CacheShard[]>(NumShards);
94e5dd7070Spatrick }
95e5dd7070Spatrick 
96*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard &
getShardForFilename(StringRef Filename) const97*12c85518Srobert DependencyScanningFilesystemSharedCache::getShardForFilename(
98*12c85518Srobert     StringRef Filename) const {
99*12c85518Srobert   return CacheShards[llvm::hash_value(Filename) % NumShards];
100e5dd7070Spatrick }
101e5dd7070Spatrick 
102*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard &
getShardForUID(llvm::sys::fs::UniqueID UID) const103*12c85518Srobert DependencyScanningFilesystemSharedCache::getShardForUID(
104*12c85518Srobert     llvm::sys::fs::UniqueID UID) const {
105*12c85518Srobert   auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
106*12c85518Srobert   return CacheShards[Hash % NumShards];
107*12c85518Srobert }
108*12c85518Srobert 
109*12c85518Srobert const CachedFileSystemEntry *
findEntryByFilename(StringRef Filename) const110*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
111*12c85518Srobert     StringRef Filename) const {
112*12c85518Srobert   std::lock_guard<std::mutex> LockGuard(CacheLock);
113*12c85518Srobert   auto It = EntriesByFilename.find(Filename);
114*12c85518Srobert   return It == EntriesByFilename.end() ? nullptr : It->getValue();
115*12c85518Srobert }
116*12c85518Srobert 
117*12c85518Srobert const CachedFileSystemEntry *
findEntryByUID(llvm::sys::fs::UniqueID UID) const118*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
119*12c85518Srobert     llvm::sys::fs::UniqueID UID) const {
120*12c85518Srobert   std::lock_guard<std::mutex> LockGuard(CacheLock);
121*12c85518Srobert   auto It = EntriesByUID.find(UID);
122*12c85518Srobert   return It == EntriesByUID.end() ? nullptr : It->getSecond();
123*12c85518Srobert }
124*12c85518Srobert 
125*12c85518Srobert const CachedFileSystemEntry &
126*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,llvm::ErrorOr<llvm::vfs::Status> Stat)127*12c85518Srobert     getOrEmplaceEntryForFilename(StringRef Filename,
128*12c85518Srobert                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
129*12c85518Srobert   std::lock_guard<std::mutex> LockGuard(CacheLock);
130*12c85518Srobert   auto Insertion = EntriesByFilename.insert({Filename, nullptr});
131*12c85518Srobert   if (Insertion.second)
132*12c85518Srobert     Insertion.first->second =
133*12c85518Srobert         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
134*12c85518Srobert   return *Insertion.first->second;
135*12c85518Srobert }
136*12c85518Srobert 
137*12c85518Srobert const CachedFileSystemEntry &
getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID,llvm::vfs::Status Stat,std::unique_ptr<llvm::MemoryBuffer> Contents)138*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
139*12c85518Srobert     llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
140*12c85518Srobert     std::unique_ptr<llvm::MemoryBuffer> Contents) {
141*12c85518Srobert   std::lock_guard<std::mutex> LockGuard(CacheLock);
142*12c85518Srobert   auto Insertion = EntriesByUID.insert({UID, nullptr});
143*12c85518Srobert   if (Insertion.second) {
144*12c85518Srobert     CachedFileContents *StoredContents = nullptr;
145*12c85518Srobert     if (Contents)
146*12c85518Srobert       StoredContents = new (ContentsStorage.Allocate())
147*12c85518Srobert           CachedFileContents(std::move(Contents));
148*12c85518Srobert     Insertion.first->second = new (EntryStorage.Allocate())
149*12c85518Srobert         CachedFileSystemEntry(std::move(Stat), StoredContents);
150*12c85518Srobert   }
151*12c85518Srobert   return *Insertion.first->second;
152*12c85518Srobert }
153*12c85518Srobert 
154*12c85518Srobert const CachedFileSystemEntry &
155*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::
getOrInsertEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)156*12c85518Srobert     getOrInsertEntryForFilename(StringRef Filename,
157*12c85518Srobert                                 const CachedFileSystemEntry &Entry) {
158*12c85518Srobert   std::lock_guard<std::mutex> LockGuard(CacheLock);
159*12c85518Srobert   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
160a9ac8606Spatrick }
161a9ac8606Spatrick 
162e5dd7070Spatrick /// Whitelist file extensions that should be minimized, treating no extension as
163e5dd7070Spatrick /// a source file that should be minimized.
164e5dd7070Spatrick ///
165e5dd7070Spatrick /// This is kinda hacky, it would be better if we knew what kind of file Clang
166e5dd7070Spatrick /// was expecting instead.
shouldScanForDirectivesBasedOnExtension(StringRef Filename)167*12c85518Srobert static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
168e5dd7070Spatrick   StringRef Ext = llvm::sys::path::extension(Filename);
169e5dd7070Spatrick   if (Ext.empty())
170e5dd7070Spatrick     return true; // C++ standard library
171e5dd7070Spatrick   return llvm::StringSwitch<bool>(Ext)
172e5dd7070Spatrick       .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
173e5dd7070Spatrick       .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
174e5dd7070Spatrick       .CasesLower(".m", ".mm", true)
175e5dd7070Spatrick       .CasesLower(".i", ".ii", ".mi", ".mmi", true)
176e5dd7070Spatrick       .CasesLower(".def", ".inc", true)
177e5dd7070Spatrick       .Default(false);
178e5dd7070Spatrick }
179e5dd7070Spatrick 
shouldCacheStatFailures(StringRef Filename)180e5dd7070Spatrick static bool shouldCacheStatFailures(StringRef Filename) {
181e5dd7070Spatrick   StringRef Ext = llvm::sys::path::extension(Filename);
182e5dd7070Spatrick   if (Ext.empty())
183e5dd7070Spatrick     return false; // This may be the module cache directory.
184*12c85518Srobert   // Only cache stat failures on source files.
185*12c85518Srobert   return shouldScanForDirectivesBasedOnExtension(Filename);
186e5dd7070Spatrick }
187e5dd7070Spatrick 
shouldScanForDirectives(StringRef Filename)188*12c85518Srobert bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
189*12c85518Srobert     StringRef Filename) {
190*12c85518Srobert   return shouldScanForDirectivesBasedOnExtension(Filename);
191a9ac8606Spatrick }
192a9ac8606Spatrick 
193*12c85518Srobert const CachedFileSystemEntry &
getOrEmplaceSharedEntryForUID(TentativeEntry TEntry)194*12c85518Srobert DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
195*12c85518Srobert     TentativeEntry TEntry) {
196*12c85518Srobert   auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
197*12c85518Srobert   return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
198*12c85518Srobert                                        std::move(TEntry.Status),
199*12c85518Srobert                                        std::move(TEntry.Contents));
200a9ac8606Spatrick }
201a9ac8606Spatrick 
202*12c85518Srobert const CachedFileSystemEntry *
findEntryByFilenameWithWriteThrough(StringRef Filename)203*12c85518Srobert DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
204*12c85518Srobert     StringRef Filename) {
205*12c85518Srobert   if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
206e5dd7070Spatrick     return Entry;
207*12c85518Srobert   auto &Shard = SharedCache.getShardForFilename(Filename);
208*12c85518Srobert   if (const auto *Entry = Shard.findEntryByFilename(Filename))
209*12c85518Srobert     return &LocalCache.insertEntryForFilename(Filename, *Entry);
210*12c85518Srobert   return nullptr;
211*12c85518Srobert }
212e5dd7070Spatrick 
213*12c85518Srobert llvm::ErrorOr<const CachedFileSystemEntry &>
computeAndStoreResult(StringRef Filename)214*12c85518Srobert DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
215*12c85518Srobert   llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
216*12c85518Srobert   if (!Stat) {
217e5dd7070Spatrick     if (!shouldCacheStatFailures(Filename))
218*12c85518Srobert       return Stat.getError();
219*12c85518Srobert     const auto &Entry =
220*12c85518Srobert         getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
221*12c85518Srobert     return insertLocalEntryForFilename(Filename, Entry);
222e5dd7070Spatrick   }
223e5dd7070Spatrick 
224*12c85518Srobert   if (const auto *Entry = findSharedEntryByUID(*Stat))
225*12c85518Srobert     return insertLocalEntryForFilename(Filename, *Entry);
226*12c85518Srobert 
227*12c85518Srobert   auto TEntry =
228*12c85518Srobert       Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
229*12c85518Srobert 
230*12c85518Srobert   const CachedFileSystemEntry *SharedEntry = [&]() {
231*12c85518Srobert     if (TEntry) {
232*12c85518Srobert       const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
233*12c85518Srobert       return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
234*12c85518Srobert     }
235*12c85518Srobert     return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
236*12c85518Srobert   }();
237*12c85518Srobert 
238*12c85518Srobert   return insertLocalEntryForFilename(Filename, *SharedEntry);
239e5dd7070Spatrick }
240e5dd7070Spatrick 
241*12c85518Srobert llvm::ErrorOr<EntryRef>
getOrCreateFileSystemEntry(StringRef Filename,bool DisableDirectivesScanning)242*12c85518Srobert DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
243*12c85518Srobert     StringRef Filename, bool DisableDirectivesScanning) {
244*12c85518Srobert   if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
245*12c85518Srobert     return scanForDirectivesIfNecessary(*Entry, Filename,
246*12c85518Srobert                                         DisableDirectivesScanning)
247*12c85518Srobert         .unwrapError();
248*12c85518Srobert   auto MaybeEntry = computeAndStoreResult(Filename);
249*12c85518Srobert   if (!MaybeEntry)
250*12c85518Srobert     return MaybeEntry.getError();
251*12c85518Srobert   return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
252*12c85518Srobert                                       DisableDirectivesScanning)
253*12c85518Srobert       .unwrapError();
254e5dd7070Spatrick }
255e5dd7070Spatrick 
256e5dd7070Spatrick llvm::ErrorOr<llvm::vfs::Status>
status(const Twine & Path)257e5dd7070Spatrick DependencyScanningWorkerFilesystem::status(const Twine &Path) {
258e5dd7070Spatrick   SmallString<256> OwnedFilename;
259e5dd7070Spatrick   StringRef Filename = Path.toStringRef(OwnedFilename);
260*12c85518Srobert 
261*12c85518Srobert   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
262e5dd7070Spatrick   if (!Result)
263e5dd7070Spatrick     return Result.getError();
264*12c85518Srobert   return Result->getStatus();
265e5dd7070Spatrick }
266e5dd7070Spatrick 
267e5dd7070Spatrick namespace {
268e5dd7070Spatrick 
269e5dd7070Spatrick /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
270e5dd7070Spatrick /// this subclass.
271*12c85518Srobert class DepScanFile final : public llvm::vfs::File {
272e5dd7070Spatrick public:
DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,llvm::vfs::Status Stat)273*12c85518Srobert   DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
274e5dd7070Spatrick               llvm::vfs::Status Stat)
275e5dd7070Spatrick       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
276e5dd7070Spatrick 
277*12c85518Srobert   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
278e5dd7070Spatrick 
status()279a9ac8606Spatrick   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
280e5dd7070Spatrick 
281e5dd7070Spatrick   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBuffer(const Twine & Name,int64_t FileSize,bool RequiresNullTerminator,bool IsVolatile)282e5dd7070Spatrick   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
283e5dd7070Spatrick             bool IsVolatile) override {
284e5dd7070Spatrick     return std::move(Buffer);
285e5dd7070Spatrick   }
286e5dd7070Spatrick 
close()287e5dd7070Spatrick   std::error_code close() override { return {}; }
288e5dd7070Spatrick 
289e5dd7070Spatrick private:
290e5dd7070Spatrick   std::unique_ptr<llvm::MemoryBuffer> Buffer;
291e5dd7070Spatrick   llvm::vfs::Status Stat;
292e5dd7070Spatrick };
293e5dd7070Spatrick 
294a9ac8606Spatrick } // end anonymous namespace
295a9ac8606Spatrick 
296*12c85518Srobert llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
create(EntryRef Entry)297*12c85518Srobert DepScanFile::create(EntryRef Entry) {
298*12c85518Srobert   assert(!Entry.isError() && "error");
299*12c85518Srobert 
300*12c85518Srobert   if (Entry.isDirectory())
301*12c85518Srobert     return std::make_error_code(std::errc::is_a_directory);
302*12c85518Srobert 
303*12c85518Srobert   auto Result = std::make_unique<DepScanFile>(
304*12c85518Srobert       llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
305*12c85518Srobert                                        Entry.getStatus().getName(),
306e5dd7070Spatrick                                        /*RequiresNullTerminator=*/false),
307*12c85518Srobert       Entry.getStatus());
308*12c85518Srobert 
309e5dd7070Spatrick   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
310e5dd7070Spatrick       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
311e5dd7070Spatrick }
312e5dd7070Spatrick 
313e5dd7070Spatrick llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine & Path)314e5dd7070Spatrick DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
315e5dd7070Spatrick   SmallString<256> OwnedFilename;
316e5dd7070Spatrick   StringRef Filename = Path.toStringRef(OwnedFilename);
317e5dd7070Spatrick 
318*12c85518Srobert   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
319e5dd7070Spatrick   if (!Result)
320e5dd7070Spatrick     return Result.getError();
321*12c85518Srobert   return DepScanFile::create(Result.get());
322e5dd7070Spatrick }
323