1a7dea167SDimitry Andric //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2a7dea167SDimitry Andric //
3a7dea167SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a7dea167SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5a7dea167SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a7dea167SDimitry Andric //
7a7dea167SDimitry Andric //===----------------------------------------------------------------------===//
8a7dea167SDimitry Andric 
9a7dea167SDimitry Andric #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10a7dea167SDimitry Andric #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
11a7dea167SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
12*0eae32dcSDimitry Andric #include "llvm/Support/SmallVectorMemoryBuffer.h"
13a7dea167SDimitry Andric #include "llvm/Support/Threading.h"
14a7dea167SDimitry Andric 
15a7dea167SDimitry Andric using namespace clang;
16a7dea167SDimitry Andric using namespace tooling;
17a7dea167SDimitry Andric using namespace dependencies;
18a7dea167SDimitry Andric 
19*0eae32dcSDimitry Andric llvm::ErrorOr<llvm::vfs::Status>
20*0eae32dcSDimitry Andric CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
21a7dea167SDimitry Andric   // Load the file and its content from the file system.
22*0eae32dcSDimitry Andric   auto MaybeFile = FS.openFileForRead(Filename);
23a7dea167SDimitry Andric   if (!MaybeFile)
24a7dea167SDimitry Andric     return MaybeFile.getError();
25*0eae32dcSDimitry Andric   auto File = std::move(*MaybeFile);
26a7dea167SDimitry Andric 
27*0eae32dcSDimitry Andric   auto MaybeStat = File->status();
28*0eae32dcSDimitry Andric   if (!MaybeStat)
29*0eae32dcSDimitry Andric     return MaybeStat.getError();
30*0eae32dcSDimitry Andric   auto Stat = std::move(*MaybeStat);
31*0eae32dcSDimitry Andric 
32*0eae32dcSDimitry Andric   auto MaybeBuffer = File->getBuffer(Stat.getName());
33a7dea167SDimitry Andric   if (!MaybeBuffer)
34a7dea167SDimitry Andric     return MaybeBuffer.getError();
35*0eae32dcSDimitry Andric   auto Buffer = std::move(*MaybeBuffer);
36*0eae32dcSDimitry Andric 
37*0eae32dcSDimitry Andric   OriginalContents = std::move(Buffer);
38*0eae32dcSDimitry Andric   return Stat;
39*0eae32dcSDimitry Andric }
40*0eae32dcSDimitry Andric 
41*0eae32dcSDimitry Andric void CachedFileSystemEntry::minimizeFile() {
42*0eae32dcSDimitry Andric   assert(OriginalContents && "minimizing missing contents");
43a7dea167SDimitry Andric 
44a7dea167SDimitry Andric   llvm::SmallString<1024> MinimizedFileContents;
45a7dea167SDimitry Andric   // Minimize the file down to directives that might affect the dependencies.
46a7dea167SDimitry Andric   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
47*0eae32dcSDimitry Andric   if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(),
48*0eae32dcSDimitry Andric                                            MinimizedFileContents, Tokens)) {
49*0eae32dcSDimitry Andric     // FIXME: Propagate the diagnostic if desired by the client.
50*0eae32dcSDimitry Andric     // Use the original file if the minimization failed.
51*0eae32dcSDimitry Andric     MinimizedContentsStorage =
52*0eae32dcSDimitry Andric         llvm::MemoryBuffer::getMemBuffer(*OriginalContents);
53*0eae32dcSDimitry Andric     MinimizedContentsAccess.store(MinimizedContentsStorage.get());
54*0eae32dcSDimitry Andric     return;
55a7dea167SDimitry Andric   }
56a7dea167SDimitry Andric 
57a7dea167SDimitry Andric   // The contents produced by the minimizer must be null terminated.
58a7dea167SDimitry Andric   assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
59a7dea167SDimitry Andric          "not null terminated contents");
60a7dea167SDimitry Andric 
61a7dea167SDimitry Andric   // Compute the skipped PP ranges that speedup skipping over inactive
62a7dea167SDimitry Andric   // preprocessor blocks.
63a7dea167SDimitry Andric   llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32>
64a7dea167SDimitry Andric       SkippedRanges;
65a7dea167SDimitry Andric   minimize_source_to_dependency_directives::computeSkippedRanges(Tokens,
66a7dea167SDimitry Andric                                                                  SkippedRanges);
67a7dea167SDimitry Andric   PreprocessorSkippedRangeMapping Mapping;
68a7dea167SDimitry Andric   for (const auto &Range : SkippedRanges) {
69a7dea167SDimitry Andric     if (Range.Length < 16) {
70a7dea167SDimitry Andric       // Ignore small ranges as non-profitable.
71a7dea167SDimitry Andric       // FIXME: This is a heuristic, its worth investigating the tradeoffs
72a7dea167SDimitry Andric       // when it should be applied.
73a7dea167SDimitry Andric       continue;
74a7dea167SDimitry Andric     }
75a7dea167SDimitry Andric     Mapping[Range.Offset] = Range.Length;
76a7dea167SDimitry Andric   }
77*0eae32dcSDimitry Andric   PPSkippedRangeMapping = std::move(Mapping);
78a7dea167SDimitry Andric 
79*0eae32dcSDimitry Andric   MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
80*0eae32dcSDimitry Andric       std::move(MinimizedFileContents));
81*0eae32dcSDimitry Andric   // The algorithm in `getOrCreateFileSystemEntry` uses the presence of
82*0eae32dcSDimitry Andric   // minimized contents to decide whether an entry is up-to-date or not.
83*0eae32dcSDimitry Andric   // If it is up-to-date, the skipped range mappings must be already computed.
84*0eae32dcSDimitry Andric   // This is why we need to store the minimized contents **after** storing the
85*0eae32dcSDimitry Andric   // skipped range mappings. Failing to do so would lead to a data race.
86*0eae32dcSDimitry Andric   MinimizedContentsAccess.store(MinimizedContentsStorage.get());
87a7dea167SDimitry Andric }
88a7dea167SDimitry Andric 
89*0eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache::
90*0eae32dcSDimitry Andric     DependencyScanningFilesystemSharedCache() {
91a7dea167SDimitry Andric   // This heuristic was chosen using a empirical testing on a
92a7dea167SDimitry Andric   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
93a7dea167SDimitry Andric   // sharding gives a performance edge by reducing the lock contention.
94a7dea167SDimitry Andric   // FIXME: A better heuristic might also consider the OS to account for
95a7dea167SDimitry Andric   // the different cost of lock contention on different OSes.
965ffd83dbSDimitry Andric   NumShards =
975ffd83dbSDimitry Andric       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
98a7dea167SDimitry Andric   CacheShards = std::make_unique<CacheShard[]>(NumShards);
99a7dea167SDimitry Andric }
100a7dea167SDimitry Andric 
101a7dea167SDimitry Andric DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
102*0eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache::get(StringRef Key) {
103a7dea167SDimitry Andric   CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
104*0eae32dcSDimitry Andric   std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
105a7dea167SDimitry Andric   auto It = Shard.Cache.try_emplace(Key);
106a7dea167SDimitry Andric   return It.first->getValue();
107a7dea167SDimitry Andric }
108a7dea167SDimitry Andric 
109480093f4SDimitry Andric /// Whitelist file extensions that should be minimized, treating no extension as
110480093f4SDimitry Andric /// a source file that should be minimized.
111480093f4SDimitry Andric ///
112480093f4SDimitry Andric /// This is kinda hacky, it would be better if we knew what kind of file Clang
113480093f4SDimitry Andric /// was expecting instead.
1144824e7fdSDimitry Andric static bool shouldMinimizeBasedOnExtension(StringRef Filename) {
115480093f4SDimitry Andric   StringRef Ext = llvm::sys::path::extension(Filename);
116480093f4SDimitry Andric   if (Ext.empty())
117480093f4SDimitry Andric     return true; // C++ standard library
118480093f4SDimitry Andric   return llvm::StringSwitch<bool>(Ext)
119480093f4SDimitry Andric       .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
120480093f4SDimitry Andric       .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
121480093f4SDimitry Andric       .CasesLower(".m", ".mm", true)
122480093f4SDimitry Andric       .CasesLower(".i", ".ii", ".mi", ".mmi", true)
123480093f4SDimitry Andric       .CasesLower(".def", ".inc", true)
124480093f4SDimitry Andric       .Default(false);
125480093f4SDimitry Andric }
126480093f4SDimitry Andric 
127480093f4SDimitry Andric static bool shouldCacheStatFailures(StringRef Filename) {
128480093f4SDimitry Andric   StringRef Ext = llvm::sys::path::extension(Filename);
129480093f4SDimitry Andric   if (Ext.empty())
130480093f4SDimitry Andric     return false; // This may be the module cache directory.
1314824e7fdSDimitry Andric   // Only cache stat failures on source files.
1324824e7fdSDimitry Andric   return shouldMinimizeBasedOnExtension(Filename);
133480093f4SDimitry Andric }
134480093f4SDimitry Andric 
1354824e7fdSDimitry Andric void DependencyScanningWorkerFilesystem::disableMinimization(
136fe6060f1SDimitry Andric     StringRef RawFilename) {
137fe6060f1SDimitry Andric   llvm::SmallString<256> Filename;
138fe6060f1SDimitry Andric   llvm::sys::path::native(RawFilename, Filename);
1394824e7fdSDimitry Andric   NotToBeMinimized.insert(Filename);
1404824e7fdSDimitry Andric }
1414824e7fdSDimitry Andric 
1424824e7fdSDimitry Andric bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
1434824e7fdSDimitry Andric   if (!shouldMinimizeBasedOnExtension(RawFilename))
1444824e7fdSDimitry Andric     return false;
1454824e7fdSDimitry Andric 
1464824e7fdSDimitry Andric   llvm::SmallString<256> Filename;
1474824e7fdSDimitry Andric   llvm::sys::path::native(RawFilename, Filename);
1484824e7fdSDimitry Andric   return !NotToBeMinimized.contains(Filename);
1494824e7fdSDimitry Andric }
1504824e7fdSDimitry Andric 
151*0eae32dcSDimitry Andric void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
152*0eae32dcSDimitry Andric                                  StringRef Filename,
153*0eae32dcSDimitry Andric                                  llvm::vfs::FileSystem &FS) {
154*0eae32dcSDimitry Andric   if (!MaybeStatus || MaybeStatus->isDirectory())
155*0eae32dcSDimitry Andric     MaybeStat = std::move(MaybeStatus);
156*0eae32dcSDimitry Andric   else
157*0eae32dcSDimitry Andric     MaybeStat = initFile(Filename, FS);
158fe6060f1SDimitry Andric }
159fe6060f1SDimitry Andric 
160*0eae32dcSDimitry Andric llvm::ErrorOr<EntryRef>
161a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
162*0eae32dcSDimitry Andric     StringRef Filename) {
163*0eae32dcSDimitry Andric   bool ShouldBeMinimized = shouldMinimize(Filename);
164fe6060f1SDimitry Andric 
165*0eae32dcSDimitry Andric   const auto *Entry = LocalCache.getCachedEntry(Filename);
166*0eae32dcSDimitry Andric   if (Entry && !Entry->needsUpdate(ShouldBeMinimized))
167*0eae32dcSDimitry Andric     return EntryRef(ShouldBeMinimized, *Entry);
168a7dea167SDimitry Andric 
169a7dea167SDimitry Andric   // FIXME: Handle PCM/PCH files.
170a7dea167SDimitry Andric   // FIXME: Handle module map files.
171a7dea167SDimitry Andric 
172*0eae32dcSDimitry Andric   auto &SharedCacheEntry = SharedCache.get(Filename);
173a7dea167SDimitry Andric   {
174*0eae32dcSDimitry Andric     std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
175a7dea167SDimitry Andric     CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
176a7dea167SDimitry Andric 
177*0eae32dcSDimitry Andric     if (!CacheEntry.isInitialized()) {
1784824e7fdSDimitry Andric       auto MaybeStatus = getUnderlyingFS().status(Filename);
1794824e7fdSDimitry Andric       if (!MaybeStatus && !shouldCacheStatFailures(Filename))
180480093f4SDimitry Andric         // HACK: We need to always restat non source files if the stat fails.
181480093f4SDimitry Andric         //   This is because Clang first looks up the module cache and module
182480093f4SDimitry Andric         //   files before building them, and then looks for them again. If we
183480093f4SDimitry Andric         //   cache the stat failure, it won't see them the second time.
184480093f4SDimitry Andric         return MaybeStatus.getError();
185*0eae32dcSDimitry Andric       CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS());
186a7dea167SDimitry Andric     }
187a7dea167SDimitry Andric 
188*0eae32dcSDimitry Andric     // Checking `needsUpdate` verifies the entry represents an opened file.
189*0eae32dcSDimitry Andric     // Only checking `needsMinimization` could lead to minimization of files
190*0eae32dcSDimitry Andric     // that we failed to load (such files don't have `OriginalContents`).
191*0eae32dcSDimitry Andric     if (CacheEntry.needsUpdate(ShouldBeMinimized))
192*0eae32dcSDimitry Andric       CacheEntry.minimizeFile();
193a7dea167SDimitry Andric   }
194a7dea167SDimitry Andric 
195a7dea167SDimitry Andric   // Store the result in the local cache.
196*0eae32dcSDimitry Andric   Entry = &SharedCacheEntry.Value;
197*0eae32dcSDimitry Andric   return EntryRef(ShouldBeMinimized, *Entry);
198a7dea167SDimitry Andric }
199a7dea167SDimitry Andric 
200a7dea167SDimitry Andric llvm::ErrorOr<llvm::vfs::Status>
201a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::status(const Twine &Path) {
202a7dea167SDimitry Andric   SmallString<256> OwnedFilename;
203a7dea167SDimitry Andric   StringRef Filename = Path.toStringRef(OwnedFilename);
204*0eae32dcSDimitry Andric 
205*0eae32dcSDimitry Andric   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
206a7dea167SDimitry Andric   if (!Result)
207a7dea167SDimitry Andric     return Result.getError();
208*0eae32dcSDimitry Andric   return Result->getStatus();
209a7dea167SDimitry Andric }
210a7dea167SDimitry Andric 
211a7dea167SDimitry Andric namespace {
212a7dea167SDimitry Andric 
213a7dea167SDimitry Andric /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
214a7dea167SDimitry Andric /// this subclass.
215a7dea167SDimitry Andric class MinimizedVFSFile final : public llvm::vfs::File {
216a7dea167SDimitry Andric public:
217a7dea167SDimitry Andric   MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
218a7dea167SDimitry Andric                    llvm::vfs::Status Stat)
219a7dea167SDimitry Andric       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
220a7dea167SDimitry Andric 
221e8d8bef9SDimitry Andric   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
222*0eae32dcSDimitry Andric   create(EntryRef Entry,
223e8d8bef9SDimitry Andric          ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
224a7dea167SDimitry Andric 
225e8d8bef9SDimitry Andric   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
226a7dea167SDimitry Andric 
227a7dea167SDimitry Andric   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
228a7dea167SDimitry Andric   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
229a7dea167SDimitry Andric             bool IsVolatile) override {
230a7dea167SDimitry Andric     return std::move(Buffer);
231a7dea167SDimitry Andric   }
232a7dea167SDimitry Andric 
233a7dea167SDimitry Andric   std::error_code close() override { return {}; }
234a7dea167SDimitry Andric 
235a7dea167SDimitry Andric private:
236a7dea167SDimitry Andric   std::unique_ptr<llvm::MemoryBuffer> Buffer;
237a7dea167SDimitry Andric   llvm::vfs::Status Stat;
238a7dea167SDimitry Andric };
239a7dea167SDimitry Andric 
240e8d8bef9SDimitry Andric } // end anonymous namespace
241e8d8bef9SDimitry Andric 
242e8d8bef9SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
243*0eae32dcSDimitry Andric     EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
244*0eae32dcSDimitry Andric   if (Entry.isDirectory())
245*0eae32dcSDimitry Andric     return std::make_error_code(std::errc::is_a_directory);
246*0eae32dcSDimitry Andric 
247*0eae32dcSDimitry Andric   llvm::ErrorOr<StringRef> Contents = Entry.getContents();
248a7dea167SDimitry Andric   if (!Contents)
249a7dea167SDimitry Andric     return Contents.getError();
250a7dea167SDimitry Andric   auto Result = std::make_unique<MinimizedVFSFile>(
251*0eae32dcSDimitry Andric       llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(),
252a7dea167SDimitry Andric                                        /*RequiresNullTerminator=*/false),
253*0eae32dcSDimitry Andric       *Entry.getStatus());
254*0eae32dcSDimitry Andric 
255*0eae32dcSDimitry Andric   const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
256*0eae32dcSDimitry Andric   if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings)
257*0eae32dcSDimitry Andric     (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings;
258*0eae32dcSDimitry Andric 
259a7dea167SDimitry Andric   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
260a7dea167SDimitry Andric       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
261a7dea167SDimitry Andric }
262a7dea167SDimitry Andric 
263a7dea167SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
264a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
265a7dea167SDimitry Andric   SmallString<256> OwnedFilename;
266a7dea167SDimitry Andric   StringRef Filename = Path.toStringRef(OwnedFilename);
267a7dea167SDimitry Andric 
268*0eae32dcSDimitry Andric   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
269a7dea167SDimitry Andric   if (!Result)
270a7dea167SDimitry Andric     return Result.getError();
271e8d8bef9SDimitry Andric   return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
272a7dea167SDimitry Andric }
273