1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10 #include "llvm/Support/MemoryBuffer.h" 11 #include "llvm/Support/SmallVectorMemoryBuffer.h" 12 #include "llvm/Support/Threading.h" 13 #include <optional> 14 15 using namespace clang; 16 using namespace tooling; 17 using namespace dependencies; 18 19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> 20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { 21 // Load the file and its content from the file system. 22 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); 23 if (!MaybeFile) 24 return MaybeFile.getError(); 25 auto File = std::move(*MaybeFile); 26 27 auto MaybeStat = File->status(); 28 if (!MaybeStat) 29 return MaybeStat.getError(); 30 auto Stat = std::move(*MaybeStat); 31 32 auto MaybeBuffer = File->getBuffer(Stat.getName()); 33 if (!MaybeBuffer) 34 return MaybeBuffer.getError(); 35 auto Buffer = std::move(*MaybeBuffer); 36 37 // If the file size changed between read and stat, pretend it didn't. 38 if (Stat.getSize() != Buffer->getBufferSize()) 39 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); 40 41 return TentativeEntry(Stat, std::move(Buffer)); 42 } 43 44 bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated( 45 EntryRef Ref) { 46 auto &Entry = Ref.Entry; 47 48 if (Entry.isError() || Entry.isDirectory()) 49 return false; 50 51 CachedFileContents *Contents = Entry.getCachedContents(); 52 assert(Contents && "contents not initialized"); 53 54 // Double-checked locking. 55 if (Contents->DepDirectives.load()) 56 return true; 57 58 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); 59 60 // Double-checked locking. 61 if (Contents->DepDirectives.load()) 62 return true; 63 64 SmallVector<dependency_directives_scan::Directive, 64> Directives; 65 // Scan the file for preprocessor directives that might affect the 66 // dependencies. 67 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), 68 Contents->DepDirectiveTokens, 69 Directives)) { 70 Contents->DepDirectiveTokens.clear(); 71 // FIXME: Propagate the diagnostic if desired by the client. 72 Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>()); 73 return false; 74 } 75 76 // This function performed double-checked locking using `DepDirectives`. 77 // Assigning it must be the last thing this function does, otherwise other 78 // threads may skip the critical section (`DepDirectives != nullptr`), leading 79 // to a data race. 80 Contents->DepDirectives.store( 81 new std::optional<DependencyDirectivesTy>(std::move(Directives))); 82 return true; 83 } 84 85 DependencyScanningFilesystemSharedCache:: 86 DependencyScanningFilesystemSharedCache() { 87 // This heuristic was chosen using a empirical testing on a 88 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 89 // sharding gives a performance edge by reducing the lock contention. 90 // FIXME: A better heuristic might also consider the OS to account for 91 // the different cost of lock contention on different OSes. 92 NumShards = 93 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 94 CacheShards = std::make_unique<CacheShard[]>(NumShards); 95 } 96 97 DependencyScanningFilesystemSharedCache::CacheShard & 98 DependencyScanningFilesystemSharedCache::getShardForFilename( 99 StringRef Filename) const { 100 assert(llvm::sys::path::is_absolute_gnu(Filename)); 101 return CacheShards[llvm::hash_value(Filename) % NumShards]; 102 } 103 104 DependencyScanningFilesystemSharedCache::CacheShard & 105 DependencyScanningFilesystemSharedCache::getShardForUID( 106 llvm::sys::fs::UniqueID UID) const { 107 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); 108 return CacheShards[Hash % NumShards]; 109 } 110 111 const CachedFileSystemEntry * 112 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( 113 StringRef Filename) const { 114 assert(llvm::sys::path::is_absolute_gnu(Filename)); 115 std::lock_guard<std::mutex> LockGuard(CacheLock); 116 auto It = CacheByFilename.find(Filename); 117 return It == CacheByFilename.end() ? nullptr : It->getValue().first; 118 } 119 120 const CachedFileSystemEntry * 121 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( 122 llvm::sys::fs::UniqueID UID) const { 123 std::lock_guard<std::mutex> LockGuard(CacheLock); 124 auto It = EntriesByUID.find(UID); 125 return It == EntriesByUID.end() ? nullptr : It->getSecond(); 126 } 127 128 const CachedFileSystemEntry & 129 DependencyScanningFilesystemSharedCache::CacheShard:: 130 getOrEmplaceEntryForFilename(StringRef Filename, 131 llvm::ErrorOr<llvm::vfs::Status> Stat) { 132 std::lock_guard<std::mutex> LockGuard(CacheLock); 133 auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}}); 134 auto &[CachedEntry, CachedRealPath] = It->getValue(); 135 if (!CachedEntry) { 136 // The entry is not present in the shared cache. Either the cache doesn't 137 // know about the file at all, or it only knows about its real path. 138 assert((Inserted || CachedRealPath) && "existing file with empty pair"); 139 CachedEntry = 140 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); 141 } 142 return *CachedEntry; 143 } 144 145 const CachedFileSystemEntry & 146 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( 147 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 148 std::unique_ptr<llvm::MemoryBuffer> Contents) { 149 std::lock_guard<std::mutex> LockGuard(CacheLock); 150 auto [It, Inserted] = EntriesByUID.insert({UID, nullptr}); 151 auto &CachedEntry = It->getSecond(); 152 if (Inserted) { 153 CachedFileContents *StoredContents = nullptr; 154 if (Contents) 155 StoredContents = new (ContentsStorage.Allocate()) 156 CachedFileContents(std::move(Contents)); 157 CachedEntry = new (EntryStorage.Allocate()) 158 CachedFileSystemEntry(std::move(Stat), StoredContents); 159 } 160 return *CachedEntry; 161 } 162 163 const CachedFileSystemEntry & 164 DependencyScanningFilesystemSharedCache::CacheShard:: 165 getOrInsertEntryForFilename(StringRef Filename, 166 const CachedFileSystemEntry &Entry) { 167 std::lock_guard<std::mutex> LockGuard(CacheLock); 168 auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}}); 169 auto &[CachedEntry, CachedRealPath] = It->getValue(); 170 if (!Inserted || !CachedEntry) 171 CachedEntry = &Entry; 172 return *CachedEntry; 173 } 174 175 const CachedRealPath * 176 DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename( 177 StringRef Filename) const { 178 assert(llvm::sys::path::is_absolute_gnu(Filename)); 179 std::lock_guard<std::mutex> LockGuard(CacheLock); 180 auto It = CacheByFilename.find(Filename); 181 return It == CacheByFilename.end() ? nullptr : It->getValue().second; 182 } 183 184 const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: 185 getOrEmplaceRealPathForFilename(StringRef Filename, 186 llvm::ErrorOr<llvm::StringRef> RealPath) { 187 std::lock_guard<std::mutex> LockGuard(CacheLock); 188 189 const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second; 190 if (!StoredRealPath) { 191 auto OwnedRealPath = [&]() -> CachedRealPath { 192 if (!RealPath) 193 return RealPath.getError(); 194 return RealPath->str(); 195 }(); 196 197 StoredRealPath = new (RealPathStorage.Allocate()) 198 CachedRealPath(std::move(OwnedRealPath)); 199 } 200 201 return *StoredRealPath; 202 } 203 204 bool DependencyScanningWorkerFilesystem::shouldBypass(StringRef Path) const { 205 return BypassedPathPrefix && Path.starts_with(*BypassedPathPrefix); 206 } 207 208 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( 209 DependencyScanningFilesystemSharedCache &SharedCache, 210 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) 211 : llvm::RTTIExtends<DependencyScanningWorkerFilesystem, 212 llvm::vfs::ProxyFileSystem>(std::move(FS)), 213 SharedCache(SharedCache), 214 WorkingDirForCacheLookup(llvm::errc::invalid_argument) { 215 updateWorkingDirForCacheLookup(); 216 } 217 218 const CachedFileSystemEntry & 219 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( 220 TentativeEntry TEntry) { 221 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); 222 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), 223 std::move(TEntry.Status), 224 std::move(TEntry.Contents)); 225 } 226 227 const CachedFileSystemEntry * 228 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( 229 StringRef Filename) { 230 if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) 231 return Entry; 232 auto &Shard = SharedCache.getShardForFilename(Filename); 233 if (const auto *Entry = Shard.findEntryByFilename(Filename)) 234 return &LocalCache.insertEntryForFilename(Filename, *Entry); 235 return nullptr; 236 } 237 238 llvm::ErrorOr<const CachedFileSystemEntry &> 239 DependencyScanningWorkerFilesystem::computeAndStoreResult( 240 StringRef OriginalFilename, StringRef FilenameForLookup) { 241 llvm::ErrorOr<llvm::vfs::Status> Stat = 242 getUnderlyingFS().status(OriginalFilename); 243 if (!Stat) { 244 const auto &Entry = 245 getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); 246 return insertLocalEntryForFilename(FilenameForLookup, Entry); 247 } 248 249 if (const auto *Entry = findSharedEntryByUID(*Stat)) 250 return insertLocalEntryForFilename(FilenameForLookup, *Entry); 251 252 auto TEntry = 253 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); 254 255 const CachedFileSystemEntry *SharedEntry = [&]() { 256 if (TEntry) { 257 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); 258 return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); 259 } 260 return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, 261 TEntry.getError()); 262 }(); 263 264 return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); 265 } 266 267 llvm::ErrorOr<EntryRef> 268 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 269 StringRef OriginalFilename) { 270 SmallString<256> PathBuf; 271 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); 272 if (!FilenameForLookup) 273 return FilenameForLookup.getError(); 274 275 if (const auto *Entry = 276 findEntryByFilenameWithWriteThrough(*FilenameForLookup)) 277 return EntryRef(OriginalFilename, *Entry).unwrapError(); 278 auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup); 279 if (!MaybeEntry) 280 return MaybeEntry.getError(); 281 return EntryRef(OriginalFilename, *MaybeEntry).unwrapError(); 282 } 283 284 llvm::ErrorOr<llvm::vfs::Status> 285 DependencyScanningWorkerFilesystem::status(const Twine &Path) { 286 SmallString<256> OwnedFilename; 287 StringRef Filename = Path.toStringRef(OwnedFilename); 288 289 if (shouldBypass(Filename)) 290 return getUnderlyingFS().status(Path); 291 292 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 293 if (!Result) 294 return Result.getError(); 295 return Result->getStatus(); 296 } 297 298 bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) { 299 // While some VFS overlay filesystems may implement more-efficient 300 // mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem` 301 // typically wraps `RealFileSystem` which does not specialize `exists`, 302 // so it is not likely to benefit from such optimizations. Instead, 303 // it is more-valuable to have this query go through the 304 // cached-`status` code-path of the `DependencyScanningWorkerFilesystem`. 305 llvm::ErrorOr<llvm::vfs::Status> Status = status(Path); 306 return Status && Status->exists(); 307 } 308 309 namespace { 310 311 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 312 /// this subclass. 313 class DepScanFile final : public llvm::vfs::File { 314 public: 315 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 316 llvm::vfs::Status Stat) 317 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 318 319 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry); 320 321 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 322 323 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 324 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 325 bool IsVolatile) override { 326 return std::move(Buffer); 327 } 328 329 std::error_code close() override { return {}; } 330 331 private: 332 std::unique_ptr<llvm::MemoryBuffer> Buffer; 333 llvm::vfs::Status Stat; 334 }; 335 336 } // end anonymous namespace 337 338 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 339 DepScanFile::create(EntryRef Entry) { 340 assert(!Entry.isError() && "error"); 341 342 if (Entry.isDirectory()) 343 return std::make_error_code(std::errc::is_a_directory); 344 345 auto Result = std::make_unique<DepScanFile>( 346 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), 347 Entry.getStatus().getName(), 348 /*RequiresNullTerminator=*/false), 349 Entry.getStatus()); 350 351 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 352 std::unique_ptr<llvm::vfs::File>(std::move(Result))); 353 } 354 355 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 356 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 357 SmallString<256> OwnedFilename; 358 StringRef Filename = Path.toStringRef(OwnedFilename); 359 360 if (shouldBypass(Filename)) 361 return getUnderlyingFS().openFileForRead(Path); 362 363 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 364 if (!Result) 365 return Result.getError(); 366 return DepScanFile::create(Result.get()); 367 } 368 369 std::error_code 370 DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path, 371 SmallVectorImpl<char> &Output) { 372 SmallString<256> OwnedFilename; 373 StringRef OriginalFilename = Path.toStringRef(OwnedFilename); 374 375 if (shouldBypass(OriginalFilename)) 376 return getUnderlyingFS().getRealPath(Path, Output); 377 378 SmallString<256> PathBuf; 379 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); 380 if (!FilenameForLookup) 381 return FilenameForLookup.getError(); 382 383 auto HandleCachedRealPath = 384 [&Output](const CachedRealPath &RealPath) -> std::error_code { 385 if (!RealPath) 386 return RealPath.getError(); 387 Output.assign(RealPath->begin(), RealPath->end()); 388 return {}; 389 }; 390 391 // If we already have the result in local cache, no work required. 392 if (const auto *RealPath = 393 LocalCache.findRealPathByFilename(*FilenameForLookup)) 394 return HandleCachedRealPath(*RealPath); 395 396 // If we have the result in the shared cache, cache it locally. 397 auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup); 398 if (const auto *ShardRealPath = 399 Shard.findRealPathByFilename(*FilenameForLookup)) { 400 const auto &RealPath = LocalCache.insertRealPathForFilename( 401 *FilenameForLookup, *ShardRealPath); 402 return HandleCachedRealPath(RealPath); 403 } 404 405 // If we don't know the real path, compute it... 406 std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output); 407 llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC; 408 if (!EC) 409 ComputedRealPath = StringRef{Output.data(), Output.size()}; 410 411 // ...and try to write it into the shared cache. In case some other thread won 412 // this race and already wrote its own result there, just adopt it. Write 413 // whatever is in the shared cache into the local one. 414 const auto &RealPath = Shard.getOrEmplaceRealPathForFilename( 415 *FilenameForLookup, ComputedRealPath); 416 return HandleCachedRealPath( 417 LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath)); 418 } 419 420 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( 421 const Twine &Path) { 422 std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path); 423 updateWorkingDirForCacheLookup(); 424 return EC; 425 } 426 427 void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { 428 llvm::ErrorOr<std::string> CWD = 429 getUnderlyingFS().getCurrentWorkingDirectory(); 430 if (!CWD) { 431 WorkingDirForCacheLookup = CWD.getError(); 432 } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) { 433 WorkingDirForCacheLookup = llvm::errc::invalid_argument; 434 } else { 435 WorkingDirForCacheLookup = *CWD; 436 } 437 assert(!WorkingDirForCacheLookup || 438 llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); 439 } 440 441 llvm::ErrorOr<StringRef> 442 DependencyScanningWorkerFilesystem::tryGetFilenameForLookup( 443 StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const { 444 StringRef FilenameForLookup; 445 if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) { 446 FilenameForLookup = OriginalFilename; 447 } else if (!WorkingDirForCacheLookup) { 448 return WorkingDirForCacheLookup.getError(); 449 } else { 450 StringRef RelFilename = OriginalFilename; 451 RelFilename.consume_front("./"); 452 PathBuf.assign(WorkingDirForCacheLookup->begin(), 453 WorkingDirForCacheLookup->end()); 454 llvm::sys::path::append(PathBuf, RelFilename); 455 FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()}; 456 } 457 assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup)); 458 return FilenameForLookup; 459 } 460 461 const char DependencyScanningWorkerFilesystem::ID = 0; 462