1 //===--- FileManager.cpp - File System Probing and Caching ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the FileManager interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // TODO: This should index all interesting directories with dirent calls. 15 // getdirentries ? 16 // opendir/readdir_r/closedir ? 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "clang/Basic/FileManager.h" 21 #include "clang/Basic/FileSystemStatCache.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <string> 30 31 using namespace clang; 32 33 /// NON_EXISTENT_DIR - A special value distinct from null that is used to 34 /// represent a dir name that doesn't exist on the disk. 35 #define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1) 36 37 /// NON_EXISTENT_FILE - A special value distinct from null that is used to 38 /// represent a filename that doesn't exist on the disk. 39 #define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1) 40 41 //===----------------------------------------------------------------------===// 42 // Common logic. 43 //===----------------------------------------------------------------------===// 44 45 FileManager::FileManager(const FileSystemOptions &FSO, 46 IntrusiveRefCntPtr<vfs::FileSystem> FS) 47 : FS(FS), FileSystemOpts(FSO), 48 SeenDirEntries(64), SeenFileEntries(64), NextFileUID(0) { 49 NumDirLookups = NumFileLookups = 0; 50 NumDirCacheMisses = NumFileCacheMisses = 0; 51 52 // If the caller doesn't provide a virtual file system, just grab the real 53 // file system. 54 if (!FS) 55 this->FS = vfs::getRealFileSystem(); 56 } 57 58 FileManager::~FileManager() = default; 59 60 void FileManager::addStatCache(std::unique_ptr<FileSystemStatCache> statCache, 61 bool AtBeginning) { 62 assert(statCache && "No stat cache provided?"); 63 if (AtBeginning || !StatCache.get()) { 64 statCache->setNextStatCache(std::move(StatCache)); 65 StatCache = std::move(statCache); 66 return; 67 } 68 69 FileSystemStatCache *LastCache = StatCache.get(); 70 while (LastCache->getNextStatCache()) 71 LastCache = LastCache->getNextStatCache(); 72 73 LastCache->setNextStatCache(std::move(statCache)); 74 } 75 76 void FileManager::removeStatCache(FileSystemStatCache *statCache) { 77 if (!statCache) 78 return; 79 80 if (StatCache.get() == statCache) { 81 // This is the first stat cache. 82 StatCache = StatCache->takeNextStatCache(); 83 return; 84 } 85 86 // Find the stat cache in the list. 87 FileSystemStatCache *PrevCache = StatCache.get(); 88 while (PrevCache && PrevCache->getNextStatCache() != statCache) 89 PrevCache = PrevCache->getNextStatCache(); 90 91 assert(PrevCache && "Stat cache not found for removal"); 92 PrevCache->setNextStatCache(statCache->takeNextStatCache()); 93 } 94 95 void FileManager::clearStatCaches() { 96 StatCache.reset(); 97 } 98 99 /// \brief Retrieve the directory that the given file name resides in. 100 /// Filename can point to either a real file or a virtual file. 101 static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr, 102 StringRef Filename, 103 bool CacheFailure) { 104 if (Filename.empty()) 105 return nullptr; 106 107 if (llvm::sys::path::is_separator(Filename[Filename.size() - 1])) 108 return nullptr; // If Filename is a directory. 109 110 StringRef DirName = llvm::sys::path::parent_path(Filename); 111 // Use the current directory if file has no path component. 112 if (DirName.empty()) 113 DirName = "."; 114 115 return FileMgr.getDirectory(DirName, CacheFailure); 116 } 117 118 /// Add all ancestors of the given path (pointing to either a file or 119 /// a directory) as virtual directories. 120 void FileManager::addAncestorsAsVirtualDirs(StringRef Path) { 121 StringRef DirName = llvm::sys::path::parent_path(Path); 122 if (DirName.empty()) 123 DirName = "."; 124 125 auto &NamedDirEnt = 126 *SeenDirEntries.insert(std::make_pair(DirName, nullptr)).first; 127 128 // When caching a virtual directory, we always cache its ancestors 129 // at the same time. Therefore, if DirName is already in the cache, 130 // we don't need to recurse as its ancestors must also already be in 131 // the cache. 132 if (NamedDirEnt.second && NamedDirEnt.second != NON_EXISTENT_DIR) 133 return; 134 135 // Add the virtual directory to the cache. 136 auto UDE = llvm::make_unique<DirectoryEntry>(); 137 UDE->Name = NamedDirEnt.first().data(); 138 NamedDirEnt.second = UDE.get(); 139 VirtualDirectoryEntries.push_back(std::move(UDE)); 140 141 // Recursively add the other ancestors. 142 addAncestorsAsVirtualDirs(DirName); 143 } 144 145 const DirectoryEntry *FileManager::getDirectory(StringRef DirName, 146 bool CacheFailure) { 147 // stat doesn't like trailing separators except for root directory. 148 // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'. 149 // (though it can strip '\\') 150 if (DirName.size() > 1 && 151 DirName != llvm::sys::path::root_path(DirName) && 152 llvm::sys::path::is_separator(DirName.back())) 153 DirName = DirName.substr(0, DirName.size()-1); 154 #ifdef LLVM_ON_WIN32 155 // Fixing a problem with "clang C:test.c" on Windows. 156 // Stat("C:") does not recognize "C:" as a valid directory 157 std::string DirNameStr; 158 if (DirName.size() > 1 && DirName.back() == ':' && 159 DirName.equals_lower(llvm::sys::path::root_name(DirName))) { 160 DirNameStr = DirName.str() + '.'; 161 DirName = DirNameStr; 162 } 163 #endif 164 165 ++NumDirLookups; 166 auto &NamedDirEnt = 167 *SeenDirEntries.insert(std::make_pair(DirName, nullptr)).first; 168 169 // See if there was already an entry in the map. Note that the map 170 // contains both virtual and real directories. 171 if (NamedDirEnt.second) 172 return NamedDirEnt.second == NON_EXISTENT_DIR ? nullptr 173 : NamedDirEnt.second; 174 175 ++NumDirCacheMisses; 176 177 // By default, initialize it to invalid. 178 NamedDirEnt.second = NON_EXISTENT_DIR; 179 180 // Get the null-terminated directory name as stored as the key of the 181 // SeenDirEntries map. 182 const char *InterndDirName = NamedDirEnt.first().data(); 183 184 // Check to see if the directory exists. 185 FileData Data; 186 if (getStatValue(InterndDirName, Data, false, nullptr /*directory lookup*/)) { 187 // There's no real directory at the given path. 188 if (!CacheFailure) 189 SeenDirEntries.erase(DirName); 190 return nullptr; 191 } 192 193 // It exists. See if we have already opened a directory with the 194 // same inode (this occurs on Unix-like systems when one dir is 195 // symlinked to another, for example) or the same path (on 196 // Windows). 197 DirectoryEntry &UDE = UniqueRealDirs[Data.UniqueID]; 198 199 NamedDirEnt.second = &UDE; 200 if (!UDE.getName()) { 201 // We don't have this directory yet, add it. We use the string 202 // key from the SeenDirEntries map as the string. 203 UDE.Name = InterndDirName; 204 } 205 206 return &UDE; 207 } 208 209 const FileEntry *FileManager::getFile(StringRef Filename, bool openFile, 210 bool CacheFailure) { 211 ++NumFileLookups; 212 213 // See if there is already an entry in the map. 214 auto &NamedFileEnt = 215 *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first; 216 217 // See if there is already an entry in the map. 218 if (NamedFileEnt.second) 219 return NamedFileEnt.second == NON_EXISTENT_FILE ? nullptr 220 : NamedFileEnt.second; 221 222 ++NumFileCacheMisses; 223 224 // By default, initialize it to invalid. 225 NamedFileEnt.second = NON_EXISTENT_FILE; 226 227 // Get the null-terminated file name as stored as the key of the 228 // SeenFileEntries map. 229 const char *InterndFileName = NamedFileEnt.first().data(); 230 231 // Look up the directory for the file. When looking up something like 232 // sys/foo.h we'll discover all of the search directories that have a 'sys' 233 // subdirectory. This will let us avoid having to waste time on known-to-fail 234 // searches when we go to find sys/bar.h, because all the search directories 235 // without a 'sys' subdir will get a cached failure result. 236 const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename, 237 CacheFailure); 238 if (DirInfo == nullptr) { // Directory doesn't exist, file can't exist. 239 if (!CacheFailure) 240 SeenFileEntries.erase(Filename); 241 242 return nullptr; 243 } 244 245 // FIXME: Use the directory info to prune this, before doing the stat syscall. 246 // FIXME: This will reduce the # syscalls. 247 248 // Nope, there isn't. Check to see if the file exists. 249 std::unique_ptr<vfs::File> F; 250 FileData Data; 251 if (getStatValue(InterndFileName, Data, true, openFile ? &F : nullptr)) { 252 // There's no real file at the given path. 253 if (!CacheFailure) 254 SeenFileEntries.erase(Filename); 255 256 return nullptr; 257 } 258 259 assert((openFile || !F) && "undesired open file"); 260 261 // It exists. See if we have already opened a file with the same inode. 262 // This occurs when one dir is symlinked to another, for example. 263 FileEntry &UFE = UniqueRealFiles[Data.UniqueID]; 264 265 NamedFileEnt.second = &UFE; 266 267 // If the name returned by getStatValue is different than Filename, re-intern 268 // the name. 269 if (Data.Name != Filename) { 270 auto &NamedFileEnt = 271 *SeenFileEntries.insert(std::make_pair(Data.Name, nullptr)).first; 272 if (!NamedFileEnt.second) 273 NamedFileEnt.second = &UFE; 274 else 275 assert(NamedFileEnt.second == &UFE && 276 "filename from getStatValue() refers to wrong file"); 277 InterndFileName = NamedFileEnt.first().data(); 278 } 279 280 if (UFE.isValid()) { // Already have an entry with this inode, return it. 281 282 // FIXME: this hack ensures that if we look up a file by a virtual path in 283 // the VFS that the getDir() will have the virtual path, even if we found 284 // the file by a 'real' path first. This is required in order to find a 285 // module's structure when its headers/module map are mapped in the VFS. 286 // We should remove this as soon as we can properly support a file having 287 // multiple names. 288 if (DirInfo != UFE.Dir && Data.IsVFSMapped) 289 UFE.Dir = DirInfo; 290 291 // Always update the name to use the last name by which a file was accessed. 292 // FIXME: Neither this nor always using the first name is correct; we want 293 // to switch towards a design where we return a FileName object that 294 // encapsulates both the name by which the file was accessed and the 295 // corresponding FileEntry. 296 UFE.Name = InterndFileName; 297 298 return &UFE; 299 } 300 301 // Otherwise, we don't have this file yet, add it. 302 UFE.Name = InterndFileName; 303 UFE.Size = Data.Size; 304 UFE.ModTime = Data.ModTime; 305 UFE.Dir = DirInfo; 306 UFE.UID = NextFileUID++; 307 UFE.UniqueID = Data.UniqueID; 308 UFE.IsNamedPipe = Data.IsNamedPipe; 309 UFE.InPCH = Data.InPCH; 310 UFE.File = std::move(F); 311 UFE.IsValid = true; 312 if (UFE.File) 313 if (auto RealPathName = UFE.File->getName()) 314 UFE.RealPathName = *RealPathName; 315 return &UFE; 316 } 317 318 const FileEntry * 319 FileManager::getVirtualFile(StringRef Filename, off_t Size, 320 time_t ModificationTime) { 321 ++NumFileLookups; 322 323 // See if there is already an entry in the map. 324 auto &NamedFileEnt = 325 *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first; 326 327 // See if there is already an entry in the map. 328 if (NamedFileEnt.second && NamedFileEnt.second != NON_EXISTENT_FILE) 329 return NamedFileEnt.second; 330 331 ++NumFileCacheMisses; 332 333 // By default, initialize it to invalid. 334 NamedFileEnt.second = NON_EXISTENT_FILE; 335 336 addAncestorsAsVirtualDirs(Filename); 337 FileEntry *UFE = nullptr; 338 339 // Now that all ancestors of Filename are in the cache, the 340 // following call is guaranteed to find the DirectoryEntry from the 341 // cache. 342 const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename, 343 /*CacheFailure=*/true); 344 assert(DirInfo && 345 "The directory of a virtual file should already be in the cache."); 346 347 // Check to see if the file exists. If so, drop the virtual file 348 FileData Data; 349 const char *InterndFileName = NamedFileEnt.first().data(); 350 if (getStatValue(InterndFileName, Data, true, nullptr) == 0) { 351 Data.Size = Size; 352 Data.ModTime = ModificationTime; 353 UFE = &UniqueRealFiles[Data.UniqueID]; 354 355 NamedFileEnt.second = UFE; 356 357 // If we had already opened this file, close it now so we don't 358 // leak the descriptor. We're not going to use the file 359 // descriptor anyway, since this is a virtual file. 360 if (UFE->File) 361 UFE->closeFile(); 362 363 // If we already have an entry with this inode, return it. 364 if (UFE->isValid()) 365 return UFE; 366 367 UFE->UniqueID = Data.UniqueID; 368 UFE->IsNamedPipe = Data.IsNamedPipe; 369 UFE->InPCH = Data.InPCH; 370 } 371 372 if (!UFE) { 373 VirtualFileEntries.push_back(llvm::make_unique<FileEntry>()); 374 UFE = VirtualFileEntries.back().get(); 375 NamedFileEnt.second = UFE; 376 } 377 378 UFE->Name = InterndFileName; 379 UFE->Size = Size; 380 UFE->ModTime = ModificationTime; 381 UFE->Dir = DirInfo; 382 UFE->UID = NextFileUID++; 383 UFE->File.reset(); 384 return UFE; 385 } 386 387 bool FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const { 388 StringRef pathRef(path.data(), path.size()); 389 390 if (FileSystemOpts.WorkingDir.empty() 391 || llvm::sys::path::is_absolute(pathRef)) 392 return false; 393 394 SmallString<128> NewPath(FileSystemOpts.WorkingDir); 395 llvm::sys::path::append(NewPath, pathRef); 396 path = NewPath; 397 return true; 398 } 399 400 bool FileManager::makeAbsolutePath(SmallVectorImpl<char> &Path) const { 401 bool Changed = FixupRelativePath(Path); 402 403 if (!llvm::sys::path::is_absolute(StringRef(Path.data(), Path.size()))) { 404 llvm::sys::fs::make_absolute(Path); 405 Changed = true; 406 } 407 408 return Changed; 409 } 410 411 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 412 FileManager::getBufferForFile(const FileEntry *Entry, bool isVolatile, 413 bool ShouldCloseOpenFile) { 414 uint64_t FileSize = Entry->getSize(); 415 // If there's a high enough chance that the file have changed since we 416 // got its size, force a stat before opening it. 417 if (isVolatile) 418 FileSize = -1; 419 420 const char *Filename = Entry->getName(); 421 // If the file is already open, use the open file descriptor. 422 if (Entry->File) { 423 auto Result = 424 Entry->File->getBuffer(Filename, FileSize, 425 /*RequiresNullTerminator=*/true, isVolatile); 426 // FIXME: we need a set of APIs that can make guarantees about whether a 427 // FileEntry is open or not. 428 if (ShouldCloseOpenFile) 429 Entry->closeFile(); 430 return Result; 431 } 432 433 // Otherwise, open the file. 434 435 if (FileSystemOpts.WorkingDir.empty()) 436 return FS->getBufferForFile(Filename, FileSize, 437 /*RequiresNullTerminator=*/true, isVolatile); 438 439 SmallString<128> FilePath(Entry->getName()); 440 FixupRelativePath(FilePath); 441 return FS->getBufferForFile(FilePath, FileSize, 442 /*RequiresNullTerminator=*/true, isVolatile); 443 } 444 445 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 446 FileManager::getBufferForFile(StringRef Filename) { 447 if (FileSystemOpts.WorkingDir.empty()) 448 return FS->getBufferForFile(Filename); 449 450 SmallString<128> FilePath(Filename); 451 FixupRelativePath(FilePath); 452 return FS->getBufferForFile(FilePath.c_str()); 453 } 454 455 /// getStatValue - Get the 'stat' information for the specified path, 456 /// using the cache to accelerate it if possible. This returns true 457 /// if the path points to a virtual file or does not exist, or returns 458 /// false if it's an existent real file. If FileDescriptor is NULL, 459 /// do directory look-up instead of file look-up. 460 bool FileManager::getStatValue(const char *Path, FileData &Data, bool isFile, 461 std::unique_ptr<vfs::File> *F) { 462 // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be 463 // absolute! 464 if (FileSystemOpts.WorkingDir.empty()) 465 return FileSystemStatCache::get(Path, Data, isFile, F,StatCache.get(), *FS); 466 467 SmallString<128> FilePath(Path); 468 FixupRelativePath(FilePath); 469 470 return FileSystemStatCache::get(FilePath.c_str(), Data, isFile, F, 471 StatCache.get(), *FS); 472 } 473 474 bool FileManager::getNoncachedStatValue(StringRef Path, 475 vfs::Status &Result) { 476 SmallString<128> FilePath(Path); 477 FixupRelativePath(FilePath); 478 479 llvm::ErrorOr<vfs::Status> S = FS->status(FilePath.c_str()); 480 if (!S) 481 return true; 482 Result = *S; 483 return false; 484 } 485 486 void FileManager::invalidateCache(const FileEntry *Entry) { 487 assert(Entry && "Cannot invalidate a NULL FileEntry"); 488 489 SeenFileEntries.erase(Entry->getName()); 490 491 // FileEntry invalidation should not block future optimizations in the file 492 // caches. Possible alternatives are cache truncation (invalidate last N) or 493 // invalidation of the whole cache. 494 UniqueRealFiles.erase(Entry->getUniqueID()); 495 } 496 497 498 void FileManager::GetUniqueIDMapping( 499 SmallVectorImpl<const FileEntry *> &UIDToFiles) const { 500 UIDToFiles.clear(); 501 UIDToFiles.resize(NextFileUID); 502 503 // Map file entries 504 for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator 505 FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end(); 506 FE != FEEnd; ++FE) 507 if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE) 508 UIDToFiles[FE->getValue()->getUID()] = FE->getValue(); 509 510 // Map virtual file entries 511 for (const auto &VFE : VirtualFileEntries) 512 if (VFE && VFE.get() != NON_EXISTENT_FILE) 513 UIDToFiles[VFE->getUID()] = VFE.get(); 514 } 515 516 void FileManager::modifyFileEntry(FileEntry *File, 517 off_t Size, time_t ModificationTime) { 518 File->Size = Size; 519 File->ModTime = ModificationTime; 520 } 521 522 StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) { 523 // FIXME: use llvm::sys::fs::canonical() when it gets implemented 524 llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known 525 = CanonicalDirNames.find(Dir); 526 if (Known != CanonicalDirNames.end()) 527 return Known->second; 528 529 StringRef CanonicalName(Dir->getName()); 530 531 #ifdef LLVM_ON_UNIX 532 char CanonicalNameBuf[PATH_MAX]; 533 if (realpath(Dir->getName(), CanonicalNameBuf)) 534 CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage); 535 #else 536 SmallString<256> CanonicalNameBuf(CanonicalName); 537 llvm::sys::fs::make_absolute(CanonicalNameBuf); 538 llvm::sys::path::native(CanonicalNameBuf); 539 // We've run into needing to remove '..' here in the wild though, so 540 // remove it. 541 // On Windows, symlinks are significantly less prevalent, so removing 542 // '..' is pretty safe. 543 // Ideally we'd have an equivalent of `realpath` and could implement 544 // sys::fs::canonical across all the platforms. 545 llvm::sys::path::remove_dots(CanonicalNameBuf, /* remove_dot_dot */ true); 546 CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage); 547 #endif 548 549 CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName)); 550 return CanonicalName; 551 } 552 553 void FileManager::PrintStats() const { 554 llvm::errs() << "\n*** File Manager Stats:\n"; 555 llvm::errs() << UniqueRealFiles.size() << " real files found, " 556 << UniqueRealDirs.size() << " real dirs found.\n"; 557 llvm::errs() << VirtualFileEntries.size() << " virtual files found, " 558 << VirtualDirectoryEntries.size() << " virtual dirs found.\n"; 559 llvm::errs() << NumDirLookups << " dir lookups, " 560 << NumDirCacheMisses << " dir cache misses.\n"; 561 llvm::errs() << NumFileLookups << " file lookups, " 562 << NumFileCacheMisses << " file cache misses.\n"; 563 564 //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups; 565 } 566