xref: /llvm-project/clang/lib/Basic/FileManager.cpp (revision d92b1ae1d769bbb4175e9de3dbe8b14cdd889975)
1 //===--- FileManager.cpp - File System Probing and Caching ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the FileManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // TODO: This should index all interesting directories with dirent calls.
15 //  getdirentries ?
16 //  opendir/readdir_r/closedir ?
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "clang/Basic/FileManager.h"
21 #include "clang/Basic/FileSystemStatCache.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <climits>
32 #include <cstdint>
33 #include <cstdlib>
34 #include <string>
35 #include <utility>
36 
37 using namespace clang;
38 
39 /// NON_EXISTENT_DIR - A special value distinct from null that is used to
40 /// represent a dir name that doesn't exist on the disk.
41 #define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1)
42 
43 /// NON_EXISTENT_FILE - A special value distinct from null that is used to
44 /// represent a filename that doesn't exist on the disk.
45 #define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1)
46 
47 //===----------------------------------------------------------------------===//
48 // Common logic.
49 //===----------------------------------------------------------------------===//
50 
51 FileManager::FileManager(const FileSystemOptions &FSO,
52                          IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
53     : FS(std::move(FS)), FileSystemOpts(FSO), SeenDirEntries(64),
54       SeenFileEntries(64), NextFileUID(0) {
55   NumDirLookups = NumFileLookups = 0;
56   NumDirCacheMisses = NumFileCacheMisses = 0;
57 
58   // If the caller doesn't provide a virtual file system, just grab the real
59   // file system.
60   if (!this->FS)
61     this->FS = llvm::vfs::getRealFileSystem();
62 }
63 
64 FileManager::~FileManager() = default;
65 
66 void FileManager::setStatCache(std::unique_ptr<FileSystemStatCache> statCache) {
67   assert(statCache && "No stat cache provided?");
68   StatCache = std::move(statCache);
69 }
70 
71 void FileManager::clearStatCache() { StatCache.reset(); }
72 
73 /// Retrieve the directory that the given file name resides in.
74 /// Filename can point to either a real file or a virtual file.
75 static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr,
76                                                   StringRef Filename,
77                                                   bool CacheFailure) {
78   if (Filename.empty())
79     return nullptr;
80 
81   if (llvm::sys::path::is_separator(Filename[Filename.size() - 1]))
82     return nullptr; // If Filename is a directory.
83 
84   StringRef DirName = llvm::sys::path::parent_path(Filename);
85   // Use the current directory if file has no path component.
86   if (DirName.empty())
87     DirName = ".";
88 
89   return FileMgr.getDirectory(DirName, CacheFailure);
90 }
91 
92 /// Add all ancestors of the given path (pointing to either a file or
93 /// a directory) as virtual directories.
94 void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
95   StringRef DirName = llvm::sys::path::parent_path(Path);
96   if (DirName.empty())
97     DirName = ".";
98 
99   auto &NamedDirEnt =
100       *SeenDirEntries.insert(std::make_pair(DirName, nullptr)).first;
101 
102   // When caching a virtual directory, we always cache its ancestors
103   // at the same time.  Therefore, if DirName is already in the cache,
104   // we don't need to recurse as its ancestors must also already be in
105   // the cache.
106   if (NamedDirEnt.second && NamedDirEnt.second != NON_EXISTENT_DIR)
107     return;
108 
109   // Add the virtual directory to the cache.
110   auto UDE = llvm::make_unique<DirectoryEntry>();
111   UDE->Name = NamedDirEnt.first();
112   NamedDirEnt.second = UDE.get();
113   VirtualDirectoryEntries.push_back(std::move(UDE));
114 
115   // Recursively add the other ancestors.
116   addAncestorsAsVirtualDirs(DirName);
117 }
118 
119 const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
120                                                 bool CacheFailure) {
121   // stat doesn't like trailing separators except for root directory.
122   // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'.
123   // (though it can strip '\\')
124   if (DirName.size() > 1 &&
125       DirName != llvm::sys::path::root_path(DirName) &&
126       llvm::sys::path::is_separator(DirName.back()))
127     DirName = DirName.substr(0, DirName.size()-1);
128 #ifdef _WIN32
129   // Fixing a problem with "clang C:test.c" on Windows.
130   // Stat("C:") does not recognize "C:" as a valid directory
131   std::string DirNameStr;
132   if (DirName.size() > 1 && DirName.back() == ':' &&
133       DirName.equals_lower(llvm::sys::path::root_name(DirName))) {
134     DirNameStr = DirName.str() + '.';
135     DirName = DirNameStr;
136   }
137 #endif
138 
139   ++NumDirLookups;
140   auto &NamedDirEnt =
141       *SeenDirEntries.insert(std::make_pair(DirName, nullptr)).first;
142 
143   // See if there was already an entry in the map.  Note that the map
144   // contains both virtual and real directories.
145   if (NamedDirEnt.second)
146     return NamedDirEnt.second == NON_EXISTENT_DIR ? nullptr
147                                                   : NamedDirEnt.second;
148 
149   ++NumDirCacheMisses;
150 
151   // By default, initialize it to invalid.
152   NamedDirEnt.second = NON_EXISTENT_DIR;
153 
154   // Get the null-terminated directory name as stored as the key of the
155   // SeenDirEntries map.
156   StringRef InterndDirName = NamedDirEnt.first();
157 
158   // Check to see if the directory exists.
159   FileData Data;
160   if (getStatValue(InterndDirName, Data, false, nullptr /*directory lookup*/)) {
161     // There's no real directory at the given path.
162     if (!CacheFailure)
163       SeenDirEntries.erase(DirName);
164     return nullptr;
165   }
166 
167   // It exists.  See if we have already opened a directory with the
168   // same inode (this occurs on Unix-like systems when one dir is
169   // symlinked to another, for example) or the same path (on
170   // Windows).
171   DirectoryEntry &UDE = UniqueRealDirs[Data.UniqueID];
172 
173   NamedDirEnt.second = &UDE;
174   if (UDE.getName().empty()) {
175     // We don't have this directory yet, add it.  We use the string
176     // key from the SeenDirEntries map as the string.
177     UDE.Name  = InterndDirName;
178   }
179 
180   return &UDE;
181 }
182 
183 const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
184                                       bool CacheFailure) {
185   ++NumFileLookups;
186 
187   // See if there is already an entry in the map.
188   auto &NamedFileEnt =
189       *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first;
190 
191   // See if there is already an entry in the map.
192   if (NamedFileEnt.second) {
193     if (NamedFileEnt.second == NON_EXISTENT_FILE)
194       return nullptr;
195     // Entry exists: return it *unless* it wasn't opened and open is requested.
196     if (!(NamedFileEnt.second->DeferredOpen && openFile))
197       return NamedFileEnt.second;
198     // We previously stat()ed the file, but didn't open it: do that below.
199     // FIXME: the below does other redundant work too (stats the dir and file).
200   } else {
201     // By default, initialize it to invalid.
202     NamedFileEnt.second = NON_EXISTENT_FILE;
203   }
204 
205   ++NumFileCacheMisses;
206 
207   // Get the null-terminated file name as stored as the key of the
208   // SeenFileEntries map.
209   StringRef InterndFileName = NamedFileEnt.first();
210 
211   // Look up the directory for the file.  When looking up something like
212   // sys/foo.h we'll discover all of the search directories that have a 'sys'
213   // subdirectory.  This will let us avoid having to waste time on known-to-fail
214   // searches when we go to find sys/bar.h, because all the search directories
215   // without a 'sys' subdir will get a cached failure result.
216   const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
217                                                        CacheFailure);
218   if (DirInfo == nullptr) { // Directory doesn't exist, file can't exist.
219     if (!CacheFailure)
220       SeenFileEntries.erase(Filename);
221 
222     return nullptr;
223   }
224 
225   // FIXME: Use the directory info to prune this, before doing the stat syscall.
226   // FIXME: This will reduce the # syscalls.
227 
228   // Nope, there isn't.  Check to see if the file exists.
229   std::unique_ptr<llvm::vfs::File> F;
230   FileData Data;
231   if (getStatValue(InterndFileName, Data, true, openFile ? &F : nullptr)) {
232     // There's no real file at the given path.
233     if (!CacheFailure)
234       SeenFileEntries.erase(Filename);
235 
236     return nullptr;
237   }
238 
239   assert((openFile || !F) && "undesired open file");
240 
241   // It exists.  See if we have already opened a file with the same inode.
242   // This occurs when one dir is symlinked to another, for example.
243   FileEntry &UFE = UniqueRealFiles[Data.UniqueID];
244   UFE.DeferredOpen = !openFile;
245 
246   NamedFileEnt.second = &UFE;
247 
248   // If the name returned by getStatValue is different than Filename, re-intern
249   // the name.
250   if (Data.Name != Filename) {
251     auto &NamedFileEnt =
252         *SeenFileEntries.insert(std::make_pair(Data.Name, nullptr)).first;
253     if (!NamedFileEnt.second)
254       NamedFileEnt.second = &UFE;
255     else
256       assert(NamedFileEnt.second == &UFE &&
257              "filename from getStatValue() refers to wrong file");
258     InterndFileName = NamedFileEnt.first().data();
259   }
260 
261   // If we opened the file for the first time, record the resulting info.
262   // Do this even if the cache entry was valid, maybe we didn't previously open.
263   if (F && !UFE.File) {
264     if (auto PathName = F->getName())
265       fillRealPathName(&UFE, *PathName);
266     UFE.File = std::move(F);
267     assert(!UFE.DeferredOpen && "we just opened it!");
268   }
269 
270   if (UFE.isValid()) { // Already have an entry with this inode, return it.
271 
272     // FIXME: this hack ensures that if we look up a file by a virtual path in
273     // the VFS that the getDir() will have the virtual path, even if we found
274     // the file by a 'real' path first. This is required in order to find a
275     // module's structure when its headers/module map are mapped in the VFS.
276     // We should remove this as soon as we can properly support a file having
277     // multiple names.
278     if (DirInfo != UFE.Dir && Data.IsVFSMapped)
279       UFE.Dir = DirInfo;
280 
281     // Always update the name to use the last name by which a file was accessed.
282     // FIXME: Neither this nor always using the first name is correct; we want
283     // to switch towards a design where we return a FileName object that
284     // encapsulates both the name by which the file was accessed and the
285     // corresponding FileEntry.
286     UFE.Name = InterndFileName;
287 
288     return &UFE;
289   }
290 
291   // Otherwise, we don't have this file yet, add it.
292   UFE.Name    = InterndFileName;
293   UFE.Size = Data.Size;
294   UFE.ModTime = Data.ModTime;
295   UFE.Dir     = DirInfo;
296   UFE.UID     = NextFileUID++;
297   UFE.UniqueID = Data.UniqueID;
298   UFE.IsNamedPipe = Data.IsNamedPipe;
299   UFE.InPCH = Data.InPCH;
300   UFE.IsValid = true;
301   // Note File and DeferredOpen were initialized above.
302 
303   return &UFE;
304 }
305 
306 const FileEntry *
307 FileManager::getVirtualFile(StringRef Filename, off_t Size,
308                             time_t ModificationTime) {
309   ++NumFileLookups;
310 
311   // See if there is already an entry in the map.
312   auto &NamedFileEnt =
313       *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first;
314 
315   // See if there is already an entry in the map.
316   if (NamedFileEnt.second && NamedFileEnt.second != NON_EXISTENT_FILE)
317     return NamedFileEnt.second;
318 
319   ++NumFileCacheMisses;
320 
321   // By default, initialize it to invalid.
322   NamedFileEnt.second = NON_EXISTENT_FILE;
323 
324   addAncestorsAsVirtualDirs(Filename);
325   FileEntry *UFE = nullptr;
326 
327   // Now that all ancestors of Filename are in the cache, the
328   // following call is guaranteed to find the DirectoryEntry from the
329   // cache.
330   const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
331                                                        /*CacheFailure=*/true);
332   assert(DirInfo &&
333          "The directory of a virtual file should already be in the cache.");
334 
335   // Check to see if the file exists. If so, drop the virtual file
336   FileData Data;
337   const char *InterndFileName = NamedFileEnt.first().data();
338   if (getStatValue(InterndFileName, Data, true, nullptr) == 0) {
339     Data.Size = Size;
340     Data.ModTime = ModificationTime;
341     UFE = &UniqueRealFiles[Data.UniqueID];
342 
343     NamedFileEnt.second = UFE;
344 
345     // If we had already opened this file, close it now so we don't
346     // leak the descriptor. We're not going to use the file
347     // descriptor anyway, since this is a virtual file.
348     if (UFE->File)
349       UFE->closeFile();
350 
351     // If we already have an entry with this inode, return it.
352     if (UFE->isValid())
353       return UFE;
354 
355     UFE->UniqueID = Data.UniqueID;
356     UFE->IsNamedPipe = Data.IsNamedPipe;
357     UFE->InPCH = Data.InPCH;
358     fillRealPathName(UFE, Data.Name);
359   }
360 
361   if (!UFE) {
362     VirtualFileEntries.push_back(llvm::make_unique<FileEntry>());
363     UFE = VirtualFileEntries.back().get();
364     NamedFileEnt.second = UFE;
365   }
366 
367   UFE->Name    = InterndFileName;
368   UFE->Size    = Size;
369   UFE->ModTime = ModificationTime;
370   UFE->Dir     = DirInfo;
371   UFE->UID     = NextFileUID++;
372   UFE->IsValid = true;
373   UFE->File.reset();
374   UFE->DeferredOpen = false;
375   return UFE;
376 }
377 
378 bool FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
379   StringRef pathRef(path.data(), path.size());
380 
381   if (FileSystemOpts.WorkingDir.empty()
382       || llvm::sys::path::is_absolute(pathRef))
383     return false;
384 
385   SmallString<128> NewPath(FileSystemOpts.WorkingDir);
386   llvm::sys::path::append(NewPath, pathRef);
387   path = NewPath;
388   return true;
389 }
390 
391 bool FileManager::makeAbsolutePath(SmallVectorImpl<char> &Path) const {
392   bool Changed = FixupRelativePath(Path);
393 
394   if (!llvm::sys::path::is_absolute(StringRef(Path.data(), Path.size()))) {
395     FS->makeAbsolute(Path);
396     Changed = true;
397   }
398 
399   return Changed;
400 }
401 
402 void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
403   llvm::SmallString<128> AbsPath(FileName);
404   // This is not the same as `VFS::getRealPath()`, which resolves symlinks
405   // but can be very expensive on real file systems.
406   // FIXME: the semantic of RealPathName is unclear, and the name might be
407   // misleading. We need to clean up the interface here.
408   makeAbsolutePath(AbsPath);
409   llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
410   UFE->RealPathName = AbsPath.str();
411 }
412 
413 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
414 FileManager::getBufferForFile(const FileEntry *Entry, bool isVolatile,
415                               bool ShouldCloseOpenFile) {
416   uint64_t FileSize = Entry->getSize();
417   // If there's a high enough chance that the file have changed since we
418   // got its size, force a stat before opening it.
419   if (isVolatile)
420     FileSize = -1;
421 
422   StringRef Filename = Entry->getName();
423   // If the file is already open, use the open file descriptor.
424   if (Entry->File) {
425     auto Result =
426         Entry->File->getBuffer(Filename, FileSize,
427                                /*RequiresNullTerminator=*/true, isVolatile);
428     // FIXME: we need a set of APIs that can make guarantees about whether a
429     // FileEntry is open or not.
430     if (ShouldCloseOpenFile)
431       Entry->closeFile();
432     return Result;
433   }
434 
435   // Otherwise, open the file.
436 
437   if (FileSystemOpts.WorkingDir.empty())
438     return FS->getBufferForFile(Filename, FileSize,
439                                 /*RequiresNullTerminator=*/true, isVolatile);
440 
441   SmallString<128> FilePath(Entry->getName());
442   FixupRelativePath(FilePath);
443   return FS->getBufferForFile(FilePath, FileSize,
444                               /*RequiresNullTerminator=*/true, isVolatile);
445 }
446 
447 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
448 FileManager::getBufferForFile(StringRef Filename, bool isVolatile) {
449   if (FileSystemOpts.WorkingDir.empty())
450     return FS->getBufferForFile(Filename, -1, true, isVolatile);
451 
452   SmallString<128> FilePath(Filename);
453   FixupRelativePath(FilePath);
454   return FS->getBufferForFile(FilePath.c_str(), -1, true, isVolatile);
455 }
456 
457 /// getStatValue - Get the 'stat' information for the specified path,
458 /// using the cache to accelerate it if possible.  This returns true
459 /// if the path points to a virtual file or does not exist, or returns
460 /// false if it's an existent real file.  If FileDescriptor is NULL,
461 /// do directory look-up instead of file look-up.
462 bool FileManager::getStatValue(StringRef Path, FileData &Data, bool isFile,
463                                std::unique_ptr<llvm::vfs::File> *F) {
464   // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
465   // absolute!
466   if (FileSystemOpts.WorkingDir.empty())
467     return FileSystemStatCache::get(Path, Data, isFile, F,StatCache.get(), *FS);
468 
469   SmallString<128> FilePath(Path);
470   FixupRelativePath(FilePath);
471 
472   return FileSystemStatCache::get(FilePath.c_str(), Data, isFile, F,
473                                   StatCache.get(), *FS);
474 }
475 
476 bool FileManager::getNoncachedStatValue(StringRef Path,
477                                         llvm::vfs::Status &Result) {
478   SmallString<128> FilePath(Path);
479   FixupRelativePath(FilePath);
480 
481   llvm::ErrorOr<llvm::vfs::Status> S = FS->status(FilePath.c_str());
482   if (!S)
483     return true;
484   Result = *S;
485   return false;
486 }
487 
488 void FileManager::invalidateCache(const FileEntry *Entry) {
489   assert(Entry && "Cannot invalidate a NULL FileEntry");
490 
491   SeenFileEntries.erase(Entry->getName());
492 
493   // FileEntry invalidation should not block future optimizations in the file
494   // caches. Possible alternatives are cache truncation (invalidate last N) or
495   // invalidation of the whole cache.
496   UniqueRealFiles.erase(Entry->getUniqueID());
497 }
498 
499 void FileManager::GetUniqueIDMapping(
500                    SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
501   UIDToFiles.clear();
502   UIDToFiles.resize(NextFileUID);
503 
504   // Map file entries
505   for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
506          FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end();
507        FE != FEEnd; ++FE)
508     if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE)
509       UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
510 
511   // Map virtual file entries
512   for (const auto &VFE : VirtualFileEntries)
513     if (VFE && VFE.get() != NON_EXISTENT_FILE)
514       UIDToFiles[VFE->getUID()] = VFE.get();
515 }
516 
517 void FileManager::modifyFileEntry(FileEntry *File,
518                                   off_t Size, time_t ModificationTime) {
519   File->Size = Size;
520   File->ModTime = ModificationTime;
521 }
522 
523 StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) {
524   // FIXME: use llvm::sys::fs::canonical() when it gets implemented
525   llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known
526     = CanonicalDirNames.find(Dir);
527   if (Known != CanonicalDirNames.end())
528     return Known->second;
529 
530   StringRef CanonicalName(Dir->getName());
531 
532   SmallString<4096> CanonicalNameBuf;
533   if (!FS->getRealPath(Dir->getName(), CanonicalNameBuf))
534     CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage);
535 
536   CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName));
537   return CanonicalName;
538 }
539 
540 void FileManager::PrintStats() const {
541   llvm::errs() << "\n*** File Manager Stats:\n";
542   llvm::errs() << UniqueRealFiles.size() << " real files found, "
543                << UniqueRealDirs.size() << " real dirs found.\n";
544   llvm::errs() << VirtualFileEntries.size() << " virtual files found, "
545                << VirtualDirectoryEntries.size() << " virtual dirs found.\n";
546   llvm::errs() << NumDirLookups << " dir lookups, "
547                << NumDirCacheMisses << " dir cache misses.\n";
548   llvm::errs() << NumFileLookups << " file lookups, "
549                << NumFileCacheMisses << " file cache misses.\n";
550 
551   //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
552 }
553