1e5dd7070Spatrick //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick
9e5dd7070Spatrick #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10e5dd7070Spatrick #include "llvm/Support/MemoryBuffer.h"
11*12c85518Srobert #include "llvm/Support/SmallVectorMemoryBuffer.h"
12e5dd7070Spatrick #include "llvm/Support/Threading.h"
13*12c85518Srobert #include <optional>
14e5dd7070Spatrick
15e5dd7070Spatrick using namespace clang;
16e5dd7070Spatrick using namespace tooling;
17e5dd7070Spatrick using namespace dependencies;
18e5dd7070Spatrick
19*12c85518Srobert llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
readFile(StringRef Filename)20*12c85518Srobert DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21e5dd7070Spatrick // Load the file and its content from the file system.
22*12c85518Srobert auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23e5dd7070Spatrick if (!MaybeFile)
24e5dd7070Spatrick return MaybeFile.getError();
25*12c85518Srobert auto File = std::move(*MaybeFile);
26e5dd7070Spatrick
27*12c85518Srobert auto MaybeStat = File->status();
28*12c85518Srobert if (!MaybeStat)
29*12c85518Srobert return MaybeStat.getError();
30*12c85518Srobert auto Stat = std::move(*MaybeStat);
31*12c85518Srobert
32*12c85518Srobert auto MaybeBuffer = File->getBuffer(Stat.getName());
33e5dd7070Spatrick if (!MaybeBuffer)
34e5dd7070Spatrick return MaybeBuffer.getError();
35*12c85518Srobert auto Buffer = std::move(*MaybeBuffer);
36e5dd7070Spatrick
37*12c85518Srobert // If the file size changed between read and stat, pretend it didn't.
38*12c85518Srobert if (Stat.getSize() != Buffer->getBufferSize())
39*12c85518Srobert Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40*12c85518Srobert
41*12c85518Srobert return TentativeEntry(Stat, std::move(Buffer));
42e5dd7070Spatrick }
43e5dd7070Spatrick
scanForDirectivesIfNecessary(const CachedFileSystemEntry & Entry,StringRef Filename,bool Disable)44*12c85518Srobert EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
45*12c85518Srobert const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
46*12c85518Srobert if (Entry.isError() || Entry.isDirectory() || Disable ||
47*12c85518Srobert !shouldScanForDirectives(Filename))
48*12c85518Srobert return EntryRef(Filename, Entry);
49e5dd7070Spatrick
50*12c85518Srobert CachedFileContents *Contents = Entry.getCachedContents();
51*12c85518Srobert assert(Contents && "contents not initialized");
52e5dd7070Spatrick
53*12c85518Srobert // Double-checked locking.
54*12c85518Srobert if (Contents->DepDirectives.load())
55*12c85518Srobert return EntryRef(Filename, Entry);
56*12c85518Srobert
57*12c85518Srobert std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58*12c85518Srobert
59*12c85518Srobert // Double-checked locking.
60*12c85518Srobert if (Contents->DepDirectives.load())
61*12c85518Srobert return EntryRef(Filename, Entry);
62*12c85518Srobert
63*12c85518Srobert SmallVector<dependency_directives_scan::Directive, 64> Directives;
64*12c85518Srobert // Scan the file for preprocessor directives that might affect the
65*12c85518Srobert // dependencies.
66*12c85518Srobert if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67*12c85518Srobert Contents->DepDirectiveTokens,
68*12c85518Srobert Directives)) {
69*12c85518Srobert Contents->DepDirectiveTokens.clear();
70*12c85518Srobert // FIXME: Propagate the diagnostic if desired by the client.
71*12c85518Srobert Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72*12c85518Srobert return EntryRef(Filename, Entry);
73e5dd7070Spatrick }
74e5dd7070Spatrick
75*12c85518Srobert // This function performed double-checked locking using `DepDirectives`.
76*12c85518Srobert // Assigning it must be the last thing this function does, otherwise other
77*12c85518Srobert // threads may skip the
78*12c85518Srobert // critical section (`DepDirectives != nullptr`), leading to a data race.
79*12c85518Srobert Contents->DepDirectives.store(
80*12c85518Srobert new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81*12c85518Srobert return EntryRef(Filename, Entry);
82e5dd7070Spatrick }
83e5dd7070Spatrick
84*12c85518Srobert DependencyScanningFilesystemSharedCache::
DependencyScanningFilesystemSharedCache()85*12c85518Srobert DependencyScanningFilesystemSharedCache() {
86e5dd7070Spatrick // This heuristic was chosen using a empirical testing on a
87e5dd7070Spatrick // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88e5dd7070Spatrick // sharding gives a performance edge by reducing the lock contention.
89e5dd7070Spatrick // FIXME: A better heuristic might also consider the OS to account for
90e5dd7070Spatrick // the different cost of lock contention on different OSes.
91ec727ea7Spatrick NumShards =
92ec727ea7Spatrick std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93e5dd7070Spatrick CacheShards = std::make_unique<CacheShard[]>(NumShards);
94e5dd7070Spatrick }
95e5dd7070Spatrick
96*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard &
getShardForFilename(StringRef Filename) const97*12c85518Srobert DependencyScanningFilesystemSharedCache::getShardForFilename(
98*12c85518Srobert StringRef Filename) const {
99*12c85518Srobert return CacheShards[llvm::hash_value(Filename) % NumShards];
100e5dd7070Spatrick }
101e5dd7070Spatrick
102*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard &
getShardForUID(llvm::sys::fs::UniqueID UID) const103*12c85518Srobert DependencyScanningFilesystemSharedCache::getShardForUID(
104*12c85518Srobert llvm::sys::fs::UniqueID UID) const {
105*12c85518Srobert auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
106*12c85518Srobert return CacheShards[Hash % NumShards];
107*12c85518Srobert }
108*12c85518Srobert
109*12c85518Srobert const CachedFileSystemEntry *
findEntryByFilename(StringRef Filename) const110*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
111*12c85518Srobert StringRef Filename) const {
112*12c85518Srobert std::lock_guard<std::mutex> LockGuard(CacheLock);
113*12c85518Srobert auto It = EntriesByFilename.find(Filename);
114*12c85518Srobert return It == EntriesByFilename.end() ? nullptr : It->getValue();
115*12c85518Srobert }
116*12c85518Srobert
117*12c85518Srobert const CachedFileSystemEntry *
findEntryByUID(llvm::sys::fs::UniqueID UID) const118*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
119*12c85518Srobert llvm::sys::fs::UniqueID UID) const {
120*12c85518Srobert std::lock_guard<std::mutex> LockGuard(CacheLock);
121*12c85518Srobert auto It = EntriesByUID.find(UID);
122*12c85518Srobert return It == EntriesByUID.end() ? nullptr : It->getSecond();
123*12c85518Srobert }
124*12c85518Srobert
125*12c85518Srobert const CachedFileSystemEntry &
126*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,llvm::ErrorOr<llvm::vfs::Status> Stat)127*12c85518Srobert getOrEmplaceEntryForFilename(StringRef Filename,
128*12c85518Srobert llvm::ErrorOr<llvm::vfs::Status> Stat) {
129*12c85518Srobert std::lock_guard<std::mutex> LockGuard(CacheLock);
130*12c85518Srobert auto Insertion = EntriesByFilename.insert({Filename, nullptr});
131*12c85518Srobert if (Insertion.second)
132*12c85518Srobert Insertion.first->second =
133*12c85518Srobert new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
134*12c85518Srobert return *Insertion.first->second;
135*12c85518Srobert }
136*12c85518Srobert
137*12c85518Srobert const CachedFileSystemEntry &
getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID,llvm::vfs::Status Stat,std::unique_ptr<llvm::MemoryBuffer> Contents)138*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
139*12c85518Srobert llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
140*12c85518Srobert std::unique_ptr<llvm::MemoryBuffer> Contents) {
141*12c85518Srobert std::lock_guard<std::mutex> LockGuard(CacheLock);
142*12c85518Srobert auto Insertion = EntriesByUID.insert({UID, nullptr});
143*12c85518Srobert if (Insertion.second) {
144*12c85518Srobert CachedFileContents *StoredContents = nullptr;
145*12c85518Srobert if (Contents)
146*12c85518Srobert StoredContents = new (ContentsStorage.Allocate())
147*12c85518Srobert CachedFileContents(std::move(Contents));
148*12c85518Srobert Insertion.first->second = new (EntryStorage.Allocate())
149*12c85518Srobert CachedFileSystemEntry(std::move(Stat), StoredContents);
150*12c85518Srobert }
151*12c85518Srobert return *Insertion.first->second;
152*12c85518Srobert }
153*12c85518Srobert
154*12c85518Srobert const CachedFileSystemEntry &
155*12c85518Srobert DependencyScanningFilesystemSharedCache::CacheShard::
getOrInsertEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)156*12c85518Srobert getOrInsertEntryForFilename(StringRef Filename,
157*12c85518Srobert const CachedFileSystemEntry &Entry) {
158*12c85518Srobert std::lock_guard<std::mutex> LockGuard(CacheLock);
159*12c85518Srobert return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
160a9ac8606Spatrick }
161a9ac8606Spatrick
162e5dd7070Spatrick /// Whitelist file extensions that should be minimized, treating no extension as
163e5dd7070Spatrick /// a source file that should be minimized.
164e5dd7070Spatrick ///
165e5dd7070Spatrick /// This is kinda hacky, it would be better if we knew what kind of file Clang
166e5dd7070Spatrick /// was expecting instead.
shouldScanForDirectivesBasedOnExtension(StringRef Filename)167*12c85518Srobert static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
168e5dd7070Spatrick StringRef Ext = llvm::sys::path::extension(Filename);
169e5dd7070Spatrick if (Ext.empty())
170e5dd7070Spatrick return true; // C++ standard library
171e5dd7070Spatrick return llvm::StringSwitch<bool>(Ext)
172e5dd7070Spatrick .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
173e5dd7070Spatrick .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
174e5dd7070Spatrick .CasesLower(".m", ".mm", true)
175e5dd7070Spatrick .CasesLower(".i", ".ii", ".mi", ".mmi", true)
176e5dd7070Spatrick .CasesLower(".def", ".inc", true)
177e5dd7070Spatrick .Default(false);
178e5dd7070Spatrick }
179e5dd7070Spatrick
shouldCacheStatFailures(StringRef Filename)180e5dd7070Spatrick static bool shouldCacheStatFailures(StringRef Filename) {
181e5dd7070Spatrick StringRef Ext = llvm::sys::path::extension(Filename);
182e5dd7070Spatrick if (Ext.empty())
183e5dd7070Spatrick return false; // This may be the module cache directory.
184*12c85518Srobert // Only cache stat failures on source files.
185*12c85518Srobert return shouldScanForDirectivesBasedOnExtension(Filename);
186e5dd7070Spatrick }
187e5dd7070Spatrick
shouldScanForDirectives(StringRef Filename)188*12c85518Srobert bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
189*12c85518Srobert StringRef Filename) {
190*12c85518Srobert return shouldScanForDirectivesBasedOnExtension(Filename);
191a9ac8606Spatrick }
192a9ac8606Spatrick
193*12c85518Srobert const CachedFileSystemEntry &
getOrEmplaceSharedEntryForUID(TentativeEntry TEntry)194*12c85518Srobert DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
195*12c85518Srobert TentativeEntry TEntry) {
196*12c85518Srobert auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
197*12c85518Srobert return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
198*12c85518Srobert std::move(TEntry.Status),
199*12c85518Srobert std::move(TEntry.Contents));
200a9ac8606Spatrick }
201a9ac8606Spatrick
202*12c85518Srobert const CachedFileSystemEntry *
findEntryByFilenameWithWriteThrough(StringRef Filename)203*12c85518Srobert DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
204*12c85518Srobert StringRef Filename) {
205*12c85518Srobert if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
206e5dd7070Spatrick return Entry;
207*12c85518Srobert auto &Shard = SharedCache.getShardForFilename(Filename);
208*12c85518Srobert if (const auto *Entry = Shard.findEntryByFilename(Filename))
209*12c85518Srobert return &LocalCache.insertEntryForFilename(Filename, *Entry);
210*12c85518Srobert return nullptr;
211*12c85518Srobert }
212e5dd7070Spatrick
213*12c85518Srobert llvm::ErrorOr<const CachedFileSystemEntry &>
computeAndStoreResult(StringRef Filename)214*12c85518Srobert DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
215*12c85518Srobert llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
216*12c85518Srobert if (!Stat) {
217e5dd7070Spatrick if (!shouldCacheStatFailures(Filename))
218*12c85518Srobert return Stat.getError();
219*12c85518Srobert const auto &Entry =
220*12c85518Srobert getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
221*12c85518Srobert return insertLocalEntryForFilename(Filename, Entry);
222e5dd7070Spatrick }
223e5dd7070Spatrick
224*12c85518Srobert if (const auto *Entry = findSharedEntryByUID(*Stat))
225*12c85518Srobert return insertLocalEntryForFilename(Filename, *Entry);
226*12c85518Srobert
227*12c85518Srobert auto TEntry =
228*12c85518Srobert Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
229*12c85518Srobert
230*12c85518Srobert const CachedFileSystemEntry *SharedEntry = [&]() {
231*12c85518Srobert if (TEntry) {
232*12c85518Srobert const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
233*12c85518Srobert return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
234*12c85518Srobert }
235*12c85518Srobert return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
236*12c85518Srobert }();
237*12c85518Srobert
238*12c85518Srobert return insertLocalEntryForFilename(Filename, *SharedEntry);
239e5dd7070Spatrick }
240e5dd7070Spatrick
241*12c85518Srobert llvm::ErrorOr<EntryRef>
getOrCreateFileSystemEntry(StringRef Filename,bool DisableDirectivesScanning)242*12c85518Srobert DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
243*12c85518Srobert StringRef Filename, bool DisableDirectivesScanning) {
244*12c85518Srobert if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
245*12c85518Srobert return scanForDirectivesIfNecessary(*Entry, Filename,
246*12c85518Srobert DisableDirectivesScanning)
247*12c85518Srobert .unwrapError();
248*12c85518Srobert auto MaybeEntry = computeAndStoreResult(Filename);
249*12c85518Srobert if (!MaybeEntry)
250*12c85518Srobert return MaybeEntry.getError();
251*12c85518Srobert return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
252*12c85518Srobert DisableDirectivesScanning)
253*12c85518Srobert .unwrapError();
254e5dd7070Spatrick }
255e5dd7070Spatrick
256e5dd7070Spatrick llvm::ErrorOr<llvm::vfs::Status>
status(const Twine & Path)257e5dd7070Spatrick DependencyScanningWorkerFilesystem::status(const Twine &Path) {
258e5dd7070Spatrick SmallString<256> OwnedFilename;
259e5dd7070Spatrick StringRef Filename = Path.toStringRef(OwnedFilename);
260*12c85518Srobert
261*12c85518Srobert llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
262e5dd7070Spatrick if (!Result)
263e5dd7070Spatrick return Result.getError();
264*12c85518Srobert return Result->getStatus();
265e5dd7070Spatrick }
266e5dd7070Spatrick
267e5dd7070Spatrick namespace {
268e5dd7070Spatrick
269e5dd7070Spatrick /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
270e5dd7070Spatrick /// this subclass.
271*12c85518Srobert class DepScanFile final : public llvm::vfs::File {
272e5dd7070Spatrick public:
DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,llvm::vfs::Status Stat)273*12c85518Srobert DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
274e5dd7070Spatrick llvm::vfs::Status Stat)
275e5dd7070Spatrick : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
276e5dd7070Spatrick
277*12c85518Srobert static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
278e5dd7070Spatrick
status()279a9ac8606Spatrick llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
280e5dd7070Spatrick
281e5dd7070Spatrick llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBuffer(const Twine & Name,int64_t FileSize,bool RequiresNullTerminator,bool IsVolatile)282e5dd7070Spatrick getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
283e5dd7070Spatrick bool IsVolatile) override {
284e5dd7070Spatrick return std::move(Buffer);
285e5dd7070Spatrick }
286e5dd7070Spatrick
close()287e5dd7070Spatrick std::error_code close() override { return {}; }
288e5dd7070Spatrick
289e5dd7070Spatrick private:
290e5dd7070Spatrick std::unique_ptr<llvm::MemoryBuffer> Buffer;
291e5dd7070Spatrick llvm::vfs::Status Stat;
292e5dd7070Spatrick };
293e5dd7070Spatrick
294a9ac8606Spatrick } // end anonymous namespace
295a9ac8606Spatrick
296*12c85518Srobert llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
create(EntryRef Entry)297*12c85518Srobert DepScanFile::create(EntryRef Entry) {
298*12c85518Srobert assert(!Entry.isError() && "error");
299*12c85518Srobert
300*12c85518Srobert if (Entry.isDirectory())
301*12c85518Srobert return std::make_error_code(std::errc::is_a_directory);
302*12c85518Srobert
303*12c85518Srobert auto Result = std::make_unique<DepScanFile>(
304*12c85518Srobert llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
305*12c85518Srobert Entry.getStatus().getName(),
306e5dd7070Spatrick /*RequiresNullTerminator=*/false),
307*12c85518Srobert Entry.getStatus());
308*12c85518Srobert
309e5dd7070Spatrick return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
310e5dd7070Spatrick std::unique_ptr<llvm::vfs::File>(std::move(Result)));
311e5dd7070Spatrick }
312e5dd7070Spatrick
313e5dd7070Spatrick llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine & Path)314e5dd7070Spatrick DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
315e5dd7070Spatrick SmallString<256> OwnedFilename;
316e5dd7070Spatrick StringRef Filename = Path.toStringRef(OwnedFilename);
317e5dd7070Spatrick
318*12c85518Srobert llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
319e5dd7070Spatrick if (!Result)
320e5dd7070Spatrick return Result.getError();
321*12c85518Srobert return DepScanFile::create(Result.get());
322e5dd7070Spatrick }
323