1a7dea167SDimitry Andric //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2a7dea167SDimitry Andric // 3a7dea167SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a7dea167SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5a7dea167SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a7dea167SDimitry Andric // 7a7dea167SDimitry Andric //===----------------------------------------------------------------------===// 8a7dea167SDimitry Andric 9a7dea167SDimitry Andric #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10a7dea167SDimitry Andric #include "clang/Lex/DependencyDirectivesSourceMinimizer.h" 11a7dea167SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 12*0eae32dcSDimitry Andric #include "llvm/Support/SmallVectorMemoryBuffer.h" 13a7dea167SDimitry Andric #include "llvm/Support/Threading.h" 14a7dea167SDimitry Andric 15a7dea167SDimitry Andric using namespace clang; 16a7dea167SDimitry Andric using namespace tooling; 17a7dea167SDimitry Andric using namespace dependencies; 18a7dea167SDimitry Andric 19*0eae32dcSDimitry Andric llvm::ErrorOr<llvm::vfs::Status> 20*0eae32dcSDimitry Andric CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) { 21a7dea167SDimitry Andric // Load the file and its content from the file system. 22*0eae32dcSDimitry Andric auto MaybeFile = FS.openFileForRead(Filename); 23a7dea167SDimitry Andric if (!MaybeFile) 24a7dea167SDimitry Andric return MaybeFile.getError(); 25*0eae32dcSDimitry Andric auto File = std::move(*MaybeFile); 26a7dea167SDimitry Andric 27*0eae32dcSDimitry Andric auto MaybeStat = File->status(); 28*0eae32dcSDimitry Andric if (!MaybeStat) 29*0eae32dcSDimitry Andric return MaybeStat.getError(); 30*0eae32dcSDimitry Andric auto Stat = std::move(*MaybeStat); 31*0eae32dcSDimitry Andric 32*0eae32dcSDimitry Andric auto MaybeBuffer = File->getBuffer(Stat.getName()); 33a7dea167SDimitry Andric if (!MaybeBuffer) 34a7dea167SDimitry Andric return MaybeBuffer.getError(); 35*0eae32dcSDimitry Andric auto Buffer = std::move(*MaybeBuffer); 36*0eae32dcSDimitry Andric 37*0eae32dcSDimitry Andric OriginalContents = std::move(Buffer); 38*0eae32dcSDimitry Andric return Stat; 39*0eae32dcSDimitry Andric } 40*0eae32dcSDimitry Andric 41*0eae32dcSDimitry Andric void CachedFileSystemEntry::minimizeFile() { 42*0eae32dcSDimitry Andric assert(OriginalContents && "minimizing missing contents"); 43a7dea167SDimitry Andric 44a7dea167SDimitry Andric llvm::SmallString<1024> MinimizedFileContents; 45a7dea167SDimitry Andric // Minimize the file down to directives that might affect the dependencies. 46a7dea167SDimitry Andric SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens; 47*0eae32dcSDimitry Andric if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(), 48*0eae32dcSDimitry Andric MinimizedFileContents, Tokens)) { 49*0eae32dcSDimitry Andric // FIXME: Propagate the diagnostic if desired by the client. 50*0eae32dcSDimitry Andric // Use the original file if the minimization failed. 51*0eae32dcSDimitry Andric MinimizedContentsStorage = 52*0eae32dcSDimitry Andric llvm::MemoryBuffer::getMemBuffer(*OriginalContents); 53*0eae32dcSDimitry Andric MinimizedContentsAccess.store(MinimizedContentsStorage.get()); 54*0eae32dcSDimitry Andric return; 55a7dea167SDimitry Andric } 56a7dea167SDimitry Andric 57a7dea167SDimitry Andric // The contents produced by the minimizer must be null terminated. 58a7dea167SDimitry Andric assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && 59a7dea167SDimitry Andric "not null terminated contents"); 60a7dea167SDimitry Andric 61a7dea167SDimitry Andric // Compute the skipped PP ranges that speedup skipping over inactive 62a7dea167SDimitry Andric // preprocessor blocks. 63a7dea167SDimitry Andric llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32> 64a7dea167SDimitry Andric SkippedRanges; 65a7dea167SDimitry Andric minimize_source_to_dependency_directives::computeSkippedRanges(Tokens, 66a7dea167SDimitry Andric SkippedRanges); 67a7dea167SDimitry Andric PreprocessorSkippedRangeMapping Mapping; 68a7dea167SDimitry Andric for (const auto &Range : SkippedRanges) { 69a7dea167SDimitry Andric if (Range.Length < 16) { 70a7dea167SDimitry Andric // Ignore small ranges as non-profitable. 71a7dea167SDimitry Andric // FIXME: This is a heuristic, its worth investigating the tradeoffs 72a7dea167SDimitry Andric // when it should be applied. 73a7dea167SDimitry Andric continue; 74a7dea167SDimitry Andric } 75a7dea167SDimitry Andric Mapping[Range.Offset] = Range.Length; 76a7dea167SDimitry Andric } 77*0eae32dcSDimitry Andric PPSkippedRangeMapping = std::move(Mapping); 78a7dea167SDimitry Andric 79*0eae32dcSDimitry Andric MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>( 80*0eae32dcSDimitry Andric std::move(MinimizedFileContents)); 81*0eae32dcSDimitry Andric // The algorithm in `getOrCreateFileSystemEntry` uses the presence of 82*0eae32dcSDimitry Andric // minimized contents to decide whether an entry is up-to-date or not. 83*0eae32dcSDimitry Andric // If it is up-to-date, the skipped range mappings must be already computed. 84*0eae32dcSDimitry Andric // This is why we need to store the minimized contents **after** storing the 85*0eae32dcSDimitry Andric // skipped range mappings. Failing to do so would lead to a data race. 86*0eae32dcSDimitry Andric MinimizedContentsAccess.store(MinimizedContentsStorage.get()); 87a7dea167SDimitry Andric } 88a7dea167SDimitry Andric 89*0eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache:: 90*0eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache() { 91a7dea167SDimitry Andric // This heuristic was chosen using a empirical testing on a 92a7dea167SDimitry Andric // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 93a7dea167SDimitry Andric // sharding gives a performance edge by reducing the lock contention. 94a7dea167SDimitry Andric // FIXME: A better heuristic might also consider the OS to account for 95a7dea167SDimitry Andric // the different cost of lock contention on different OSes. 965ffd83dbSDimitry Andric NumShards = 975ffd83dbSDimitry Andric std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 98a7dea167SDimitry Andric CacheShards = std::make_unique<CacheShard[]>(NumShards); 99a7dea167SDimitry Andric } 100a7dea167SDimitry Andric 101a7dea167SDimitry Andric DependencyScanningFilesystemSharedCache::SharedFileSystemEntry & 102*0eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache::get(StringRef Key) { 103a7dea167SDimitry Andric CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards]; 104*0eae32dcSDimitry Andric std::lock_guard<std::mutex> LockGuard(Shard.CacheLock); 105a7dea167SDimitry Andric auto It = Shard.Cache.try_emplace(Key); 106a7dea167SDimitry Andric return It.first->getValue(); 107a7dea167SDimitry Andric } 108a7dea167SDimitry Andric 109480093f4SDimitry Andric /// Whitelist file extensions that should be minimized, treating no extension as 110480093f4SDimitry Andric /// a source file that should be minimized. 111480093f4SDimitry Andric /// 112480093f4SDimitry Andric /// This is kinda hacky, it would be better if we knew what kind of file Clang 113480093f4SDimitry Andric /// was expecting instead. 1144824e7fdSDimitry Andric static bool shouldMinimizeBasedOnExtension(StringRef Filename) { 115480093f4SDimitry Andric StringRef Ext = llvm::sys::path::extension(Filename); 116480093f4SDimitry Andric if (Ext.empty()) 117480093f4SDimitry Andric return true; // C++ standard library 118480093f4SDimitry Andric return llvm::StringSwitch<bool>(Ext) 119480093f4SDimitry Andric .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) 120480093f4SDimitry Andric .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) 121480093f4SDimitry Andric .CasesLower(".m", ".mm", true) 122480093f4SDimitry Andric .CasesLower(".i", ".ii", ".mi", ".mmi", true) 123480093f4SDimitry Andric .CasesLower(".def", ".inc", true) 124480093f4SDimitry Andric .Default(false); 125480093f4SDimitry Andric } 126480093f4SDimitry Andric 127480093f4SDimitry Andric static bool shouldCacheStatFailures(StringRef Filename) { 128480093f4SDimitry Andric StringRef Ext = llvm::sys::path::extension(Filename); 129480093f4SDimitry Andric if (Ext.empty()) 130480093f4SDimitry Andric return false; // This may be the module cache directory. 1314824e7fdSDimitry Andric // Only cache stat failures on source files. 1324824e7fdSDimitry Andric return shouldMinimizeBasedOnExtension(Filename); 133480093f4SDimitry Andric } 134480093f4SDimitry Andric 1354824e7fdSDimitry Andric void DependencyScanningWorkerFilesystem::disableMinimization( 136fe6060f1SDimitry Andric StringRef RawFilename) { 137fe6060f1SDimitry Andric llvm::SmallString<256> Filename; 138fe6060f1SDimitry Andric llvm::sys::path::native(RawFilename, Filename); 1394824e7fdSDimitry Andric NotToBeMinimized.insert(Filename); 1404824e7fdSDimitry Andric } 1414824e7fdSDimitry Andric 1424824e7fdSDimitry Andric bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) { 1434824e7fdSDimitry Andric if (!shouldMinimizeBasedOnExtension(RawFilename)) 1444824e7fdSDimitry Andric return false; 1454824e7fdSDimitry Andric 1464824e7fdSDimitry Andric llvm::SmallString<256> Filename; 1474824e7fdSDimitry Andric llvm::sys::path::native(RawFilename, Filename); 1484824e7fdSDimitry Andric return !NotToBeMinimized.contains(Filename); 1494824e7fdSDimitry Andric } 1504824e7fdSDimitry Andric 151*0eae32dcSDimitry Andric void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, 152*0eae32dcSDimitry Andric StringRef Filename, 153*0eae32dcSDimitry Andric llvm::vfs::FileSystem &FS) { 154*0eae32dcSDimitry Andric if (!MaybeStatus || MaybeStatus->isDirectory()) 155*0eae32dcSDimitry Andric MaybeStat = std::move(MaybeStatus); 156*0eae32dcSDimitry Andric else 157*0eae32dcSDimitry Andric MaybeStat = initFile(Filename, FS); 158fe6060f1SDimitry Andric } 159fe6060f1SDimitry Andric 160*0eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> 161a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 162*0eae32dcSDimitry Andric StringRef Filename) { 163*0eae32dcSDimitry Andric bool ShouldBeMinimized = shouldMinimize(Filename); 164fe6060f1SDimitry Andric 165*0eae32dcSDimitry Andric const auto *Entry = LocalCache.getCachedEntry(Filename); 166*0eae32dcSDimitry Andric if (Entry && !Entry->needsUpdate(ShouldBeMinimized)) 167*0eae32dcSDimitry Andric return EntryRef(ShouldBeMinimized, *Entry); 168a7dea167SDimitry Andric 169a7dea167SDimitry Andric // FIXME: Handle PCM/PCH files. 170a7dea167SDimitry Andric // FIXME: Handle module map files. 171a7dea167SDimitry Andric 172*0eae32dcSDimitry Andric auto &SharedCacheEntry = SharedCache.get(Filename); 173a7dea167SDimitry Andric { 174*0eae32dcSDimitry Andric std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock); 175a7dea167SDimitry Andric CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; 176a7dea167SDimitry Andric 177*0eae32dcSDimitry Andric if (!CacheEntry.isInitialized()) { 1784824e7fdSDimitry Andric auto MaybeStatus = getUnderlyingFS().status(Filename); 1794824e7fdSDimitry Andric if (!MaybeStatus && !shouldCacheStatFailures(Filename)) 180480093f4SDimitry Andric // HACK: We need to always restat non source files if the stat fails. 181480093f4SDimitry Andric // This is because Clang first looks up the module cache and module 182480093f4SDimitry Andric // files before building them, and then looks for them again. If we 183480093f4SDimitry Andric // cache the stat failure, it won't see them the second time. 184480093f4SDimitry Andric return MaybeStatus.getError(); 185*0eae32dcSDimitry Andric CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS()); 186a7dea167SDimitry Andric } 187a7dea167SDimitry Andric 188*0eae32dcSDimitry Andric // Checking `needsUpdate` verifies the entry represents an opened file. 189*0eae32dcSDimitry Andric // Only checking `needsMinimization` could lead to minimization of files 190*0eae32dcSDimitry Andric // that we failed to load (such files don't have `OriginalContents`). 191*0eae32dcSDimitry Andric if (CacheEntry.needsUpdate(ShouldBeMinimized)) 192*0eae32dcSDimitry Andric CacheEntry.minimizeFile(); 193a7dea167SDimitry Andric } 194a7dea167SDimitry Andric 195a7dea167SDimitry Andric // Store the result in the local cache. 196*0eae32dcSDimitry Andric Entry = &SharedCacheEntry.Value; 197*0eae32dcSDimitry Andric return EntryRef(ShouldBeMinimized, *Entry); 198a7dea167SDimitry Andric } 199a7dea167SDimitry Andric 200a7dea167SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> 201a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::status(const Twine &Path) { 202a7dea167SDimitry Andric SmallString<256> OwnedFilename; 203a7dea167SDimitry Andric StringRef Filename = Path.toStringRef(OwnedFilename); 204*0eae32dcSDimitry Andric 205*0eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 206a7dea167SDimitry Andric if (!Result) 207a7dea167SDimitry Andric return Result.getError(); 208*0eae32dcSDimitry Andric return Result->getStatus(); 209a7dea167SDimitry Andric } 210a7dea167SDimitry Andric 211a7dea167SDimitry Andric namespace { 212a7dea167SDimitry Andric 213a7dea167SDimitry Andric /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 214a7dea167SDimitry Andric /// this subclass. 215a7dea167SDimitry Andric class MinimizedVFSFile final : public llvm::vfs::File { 216a7dea167SDimitry Andric public: 217a7dea167SDimitry Andric MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 218a7dea167SDimitry Andric llvm::vfs::Status Stat) 219a7dea167SDimitry Andric : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 220a7dea167SDimitry Andric 221e8d8bef9SDimitry Andric static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 222*0eae32dcSDimitry Andric create(EntryRef Entry, 223e8d8bef9SDimitry Andric ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings); 224a7dea167SDimitry Andric 225e8d8bef9SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 226a7dea167SDimitry Andric 227a7dea167SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 228a7dea167SDimitry Andric getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 229a7dea167SDimitry Andric bool IsVolatile) override { 230a7dea167SDimitry Andric return std::move(Buffer); 231a7dea167SDimitry Andric } 232a7dea167SDimitry Andric 233a7dea167SDimitry Andric std::error_code close() override { return {}; } 234a7dea167SDimitry Andric 235a7dea167SDimitry Andric private: 236a7dea167SDimitry Andric std::unique_ptr<llvm::MemoryBuffer> Buffer; 237a7dea167SDimitry Andric llvm::vfs::Status Stat; 238a7dea167SDimitry Andric }; 239a7dea167SDimitry Andric 240e8d8bef9SDimitry Andric } // end anonymous namespace 241e8d8bef9SDimitry Andric 242e8d8bef9SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create( 243*0eae32dcSDimitry Andric EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) { 244*0eae32dcSDimitry Andric if (Entry.isDirectory()) 245*0eae32dcSDimitry Andric return std::make_error_code(std::errc::is_a_directory); 246*0eae32dcSDimitry Andric 247*0eae32dcSDimitry Andric llvm::ErrorOr<StringRef> Contents = Entry.getContents(); 248a7dea167SDimitry Andric if (!Contents) 249a7dea167SDimitry Andric return Contents.getError(); 250a7dea167SDimitry Andric auto Result = std::make_unique<MinimizedVFSFile>( 251*0eae32dcSDimitry Andric llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(), 252a7dea167SDimitry Andric /*RequiresNullTerminator=*/false), 253*0eae32dcSDimitry Andric *Entry.getStatus()); 254*0eae32dcSDimitry Andric 255*0eae32dcSDimitry Andric const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); 256*0eae32dcSDimitry Andric if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings) 257*0eae32dcSDimitry Andric (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings; 258*0eae32dcSDimitry Andric 259a7dea167SDimitry Andric return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 260a7dea167SDimitry Andric std::unique_ptr<llvm::vfs::File>(std::move(Result))); 261a7dea167SDimitry Andric } 262a7dea167SDimitry Andric 263a7dea167SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 264a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 265a7dea167SDimitry Andric SmallString<256> OwnedFilename; 266a7dea167SDimitry Andric StringRef Filename = Path.toStringRef(OwnedFilename); 267a7dea167SDimitry Andric 268*0eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 269a7dea167SDimitry Andric if (!Result) 270a7dea167SDimitry Andric return Result.getError(); 271e8d8bef9SDimitry Andric return MinimizedVFSFile::create(Result.get(), PPSkipMappings); 272a7dea167SDimitry Andric } 273