1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "llvm/Support/MemoryBuffer.h"
11 #include "llvm/Support/SmallVectorMemoryBuffer.h"
12 #include "llvm/Support/Threading.h"
13 #include <optional>
14
15 using namespace clang;
16 using namespace tooling;
17 using namespace dependencies;
18
19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
readFile(StringRef Filename)20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21 // Load the file and its content from the file system.
22 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23 if (!MaybeFile)
24 return MaybeFile.getError();
25 auto File = std::move(*MaybeFile);
26
27 auto MaybeStat = File->status();
28 if (!MaybeStat)
29 return MaybeStat.getError();
30 auto Stat = std::move(*MaybeStat);
31
32 auto MaybeBuffer = File->getBuffer(Stat.getName());
33 if (!MaybeBuffer)
34 return MaybeBuffer.getError();
35 auto Buffer = std::move(*MaybeBuffer);
36
37 // If the file size changed between read and stat, pretend it didn't.
38 if (Stat.getSize() != Buffer->getBufferSize())
39 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40
41 return TentativeEntry(Stat, std::move(Buffer));
42 }
43
scanForDirectivesIfNecessary(const CachedFileSystemEntry & Entry,StringRef Filename,bool Disable)44 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
45 const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
46 if (Entry.isError() || Entry.isDirectory() || Disable ||
47 !shouldScanForDirectives(Filename))
48 return EntryRef(Filename, Entry);
49
50 CachedFileContents *Contents = Entry.getCachedContents();
51 assert(Contents && "contents not initialized");
52
53 // Double-checked locking.
54 if (Contents->DepDirectives.load())
55 return EntryRef(Filename, Entry);
56
57 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58
59 // Double-checked locking.
60 if (Contents->DepDirectives.load())
61 return EntryRef(Filename, Entry);
62
63 SmallVector<dependency_directives_scan::Directive, 64> Directives;
64 // Scan the file for preprocessor directives that might affect the
65 // dependencies.
66 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67 Contents->DepDirectiveTokens,
68 Directives)) {
69 Contents->DepDirectiveTokens.clear();
70 // FIXME: Propagate the diagnostic if desired by the client.
71 Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72 return EntryRef(Filename, Entry);
73 }
74
75 // This function performed double-checked locking using `DepDirectives`.
76 // Assigning it must be the last thing this function does, otherwise other
77 // threads may skip the
78 // critical section (`DepDirectives != nullptr`), leading to a data race.
79 Contents->DepDirectives.store(
80 new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81 return EntryRef(Filename, Entry);
82 }
83
84 DependencyScanningFilesystemSharedCache::
DependencyScanningFilesystemSharedCache()85 DependencyScanningFilesystemSharedCache() {
86 // This heuristic was chosen using a empirical testing on a
87 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88 // sharding gives a performance edge by reducing the lock contention.
89 // FIXME: A better heuristic might also consider the OS to account for
90 // the different cost of lock contention on different OSes.
91 NumShards =
92 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93 CacheShards = std::make_unique<CacheShard[]>(NumShards);
94 }
95
96 DependencyScanningFilesystemSharedCache::CacheShard &
getShardForFilename(StringRef Filename) const97 DependencyScanningFilesystemSharedCache::getShardForFilename(
98 StringRef Filename) const {
99 return CacheShards[llvm::hash_value(Filename) % NumShards];
100 }
101
102 DependencyScanningFilesystemSharedCache::CacheShard &
getShardForUID(llvm::sys::fs::UniqueID UID) const103 DependencyScanningFilesystemSharedCache::getShardForUID(
104 llvm::sys::fs::UniqueID UID) const {
105 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
106 return CacheShards[Hash % NumShards];
107 }
108
109 const CachedFileSystemEntry *
findEntryByFilename(StringRef Filename) const110 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
111 StringRef Filename) const {
112 std::lock_guard<std::mutex> LockGuard(CacheLock);
113 auto It = EntriesByFilename.find(Filename);
114 return It == EntriesByFilename.end() ? nullptr : It->getValue();
115 }
116
117 const CachedFileSystemEntry *
findEntryByUID(llvm::sys::fs::UniqueID UID) const118 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
119 llvm::sys::fs::UniqueID UID) const {
120 std::lock_guard<std::mutex> LockGuard(CacheLock);
121 auto It = EntriesByUID.find(UID);
122 return It == EntriesByUID.end() ? nullptr : It->getSecond();
123 }
124
125 const CachedFileSystemEntry &
126 DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,llvm::ErrorOr<llvm::vfs::Status> Stat)127 getOrEmplaceEntryForFilename(StringRef Filename,
128 llvm::ErrorOr<llvm::vfs::Status> Stat) {
129 std::lock_guard<std::mutex> LockGuard(CacheLock);
130 auto Insertion = EntriesByFilename.insert({Filename, nullptr});
131 if (Insertion.second)
132 Insertion.first->second =
133 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
134 return *Insertion.first->second;
135 }
136
137 const CachedFileSystemEntry &
getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID,llvm::vfs::Status Stat,std::unique_ptr<llvm::MemoryBuffer> Contents)138 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
139 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
140 std::unique_ptr<llvm::MemoryBuffer> Contents) {
141 std::lock_guard<std::mutex> LockGuard(CacheLock);
142 auto Insertion = EntriesByUID.insert({UID, nullptr});
143 if (Insertion.second) {
144 CachedFileContents *StoredContents = nullptr;
145 if (Contents)
146 StoredContents = new (ContentsStorage.Allocate())
147 CachedFileContents(std::move(Contents));
148 Insertion.first->second = new (EntryStorage.Allocate())
149 CachedFileSystemEntry(std::move(Stat), StoredContents);
150 }
151 return *Insertion.first->second;
152 }
153
154 const CachedFileSystemEntry &
155 DependencyScanningFilesystemSharedCache::CacheShard::
getOrInsertEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)156 getOrInsertEntryForFilename(StringRef Filename,
157 const CachedFileSystemEntry &Entry) {
158 std::lock_guard<std::mutex> LockGuard(CacheLock);
159 return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
160 }
161
162 /// Whitelist file extensions that should be minimized, treating no extension as
163 /// a source file that should be minimized.
164 ///
165 /// This is kinda hacky, it would be better if we knew what kind of file Clang
166 /// was expecting instead.
shouldScanForDirectivesBasedOnExtension(StringRef Filename)167 static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
168 StringRef Ext = llvm::sys::path::extension(Filename);
169 if (Ext.empty())
170 return true; // C++ standard library
171 return llvm::StringSwitch<bool>(Ext)
172 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
173 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
174 .CasesLower(".m", ".mm", true)
175 .CasesLower(".i", ".ii", ".mi", ".mmi", true)
176 .CasesLower(".def", ".inc", true)
177 .Default(false);
178 }
179
shouldCacheStatFailures(StringRef Filename)180 static bool shouldCacheStatFailures(StringRef Filename) {
181 StringRef Ext = llvm::sys::path::extension(Filename);
182 if (Ext.empty())
183 return false; // This may be the module cache directory.
184 // Only cache stat failures on source files.
185 return shouldScanForDirectivesBasedOnExtension(Filename);
186 }
187
shouldScanForDirectives(StringRef Filename)188 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
189 StringRef Filename) {
190 return shouldScanForDirectivesBasedOnExtension(Filename);
191 }
192
193 const CachedFileSystemEntry &
getOrEmplaceSharedEntryForUID(TentativeEntry TEntry)194 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
195 TentativeEntry TEntry) {
196 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
197 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
198 std::move(TEntry.Status),
199 std::move(TEntry.Contents));
200 }
201
202 const CachedFileSystemEntry *
findEntryByFilenameWithWriteThrough(StringRef Filename)203 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
204 StringRef Filename) {
205 if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
206 return Entry;
207 auto &Shard = SharedCache.getShardForFilename(Filename);
208 if (const auto *Entry = Shard.findEntryByFilename(Filename))
209 return &LocalCache.insertEntryForFilename(Filename, *Entry);
210 return nullptr;
211 }
212
213 llvm::ErrorOr<const CachedFileSystemEntry &>
computeAndStoreResult(StringRef Filename)214 DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
215 llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
216 if (!Stat) {
217 if (!shouldCacheStatFailures(Filename))
218 return Stat.getError();
219 const auto &Entry =
220 getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
221 return insertLocalEntryForFilename(Filename, Entry);
222 }
223
224 if (const auto *Entry = findSharedEntryByUID(*Stat))
225 return insertLocalEntryForFilename(Filename, *Entry);
226
227 auto TEntry =
228 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
229
230 const CachedFileSystemEntry *SharedEntry = [&]() {
231 if (TEntry) {
232 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
233 return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
234 }
235 return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
236 }();
237
238 return insertLocalEntryForFilename(Filename, *SharedEntry);
239 }
240
241 llvm::ErrorOr<EntryRef>
getOrCreateFileSystemEntry(StringRef Filename,bool DisableDirectivesScanning)242 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
243 StringRef Filename, bool DisableDirectivesScanning) {
244 if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
245 return scanForDirectivesIfNecessary(*Entry, Filename,
246 DisableDirectivesScanning)
247 .unwrapError();
248 auto MaybeEntry = computeAndStoreResult(Filename);
249 if (!MaybeEntry)
250 return MaybeEntry.getError();
251 return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
252 DisableDirectivesScanning)
253 .unwrapError();
254 }
255
256 llvm::ErrorOr<llvm::vfs::Status>
status(const Twine & Path)257 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
258 SmallString<256> OwnedFilename;
259 StringRef Filename = Path.toStringRef(OwnedFilename);
260
261 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
262 if (!Result)
263 return Result.getError();
264 return Result->getStatus();
265 }
266
267 namespace {
268
269 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
270 /// this subclass.
271 class DepScanFile final : public llvm::vfs::File {
272 public:
DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,llvm::vfs::Status Stat)273 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
274 llvm::vfs::Status Stat)
275 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
276
277 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
278
status()279 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
280
281 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBuffer(const Twine & Name,int64_t FileSize,bool RequiresNullTerminator,bool IsVolatile)282 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
283 bool IsVolatile) override {
284 return std::move(Buffer);
285 }
286
close()287 std::error_code close() override { return {}; }
288
289 private:
290 std::unique_ptr<llvm::MemoryBuffer> Buffer;
291 llvm::vfs::Status Stat;
292 };
293
294 } // end anonymous namespace
295
296 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
create(EntryRef Entry)297 DepScanFile::create(EntryRef Entry) {
298 assert(!Entry.isError() && "error");
299
300 if (Entry.isDirectory())
301 return std::make_error_code(std::errc::is_a_directory);
302
303 auto Result = std::make_unique<DepScanFile>(
304 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
305 Entry.getStatus().getName(),
306 /*RequiresNullTerminator=*/false),
307 Entry.getStatus());
308
309 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
310 std::unique_ptr<llvm::vfs::File>(std::move(Result)));
311 }
312
313 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine & Path)314 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
315 SmallString<256> OwnedFilename;
316 StringRef Filename = Path.toStringRef(OwnedFilename);
317
318 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
319 if (!Result)
320 return Result.getError();
321 return DepScanFile::create(Result.get());
322 }
323