xref: /llvm-project/clang/lib/Basic/FileManager.cpp (revision 09b6989ef0589d979c017eca2f3763a4170743f8)
1 //===--- FileManager.cpp - File System Probing and Caching ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the FileManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // TODO: This should index all interesting directories with dirent calls.
15 //  getdirentries ?
16 //  opendir/readdir_r/closedir ?
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "clang/Basic/FileManager.h"
21 #include "clang/Basic/FileSystemStatCache.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/system_error.h"
29 #include "llvm/Config/config.h"
30 #include <map>
31 #include <set>
32 #include <string>
33 
34 // FIXME: This is terrible, we need this for ::close.
35 #if !defined(_MSC_VER) && !defined(__MINGW32__)
36 #include <unistd.h>
37 #include <sys/uio.h>
38 #else
39 #include <io.h>
40 #endif
41 using namespace clang;
42 
43 // FIXME: Enhance libsystem to support inode and other fields.
44 #include <sys/stat.h>
45 
46 /// NON_EXISTENT_DIR - A special value distinct from null that is used to
47 /// represent a dir name that doesn't exist on the disk.
48 #define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1)
49 
50 /// NON_EXISTENT_FILE - A special value distinct from null that is used to
51 /// represent a filename that doesn't exist on the disk.
52 #define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1)
53 
54 
55 FileEntry::~FileEntry() {
56   // If this FileEntry owns an open file descriptor that never got used, close
57   // it.
58   if (FD != -1) ::close(FD);
59 }
60 
61 //===----------------------------------------------------------------------===//
62 // Windows.
63 //===----------------------------------------------------------------------===//
64 
65 #ifdef LLVM_ON_WIN32
66 
67 #define IS_DIR_SEPARATOR_CHAR(x) ((x) == '/' || (x) == '\\')
68 
69 namespace {
70   static std::string GetFullPath(const char *relPath) {
71     char *absPathStrPtr = _fullpath(NULL, relPath, 0);
72     assert(absPathStrPtr && "_fullpath() returned NULL!");
73 
74     std::string absPath(absPathStrPtr);
75 
76     free(absPathStrPtr);
77     return absPath;
78   }
79 }
80 
81 class FileManager::UniqueDirContainer {
82   /// UniqueDirs - Cache from full path to existing directories/files.
83   ///
84   llvm::StringMap<DirectoryEntry> UniqueDirs;
85 
86 public:
87   DirectoryEntry &getDirectory(const char *Name, struct stat &StatBuf) {
88     std::string FullPath(GetFullPath(Name));
89     return UniqueDirs.GetOrCreateValue(FullPath).getValue();
90   }
91 
92   size_t size() const { return UniqueDirs.size(); }
93 };
94 
95 class FileManager::UniqueFileContainer {
96   /// UniqueFiles - Cache from full path to existing directories/files.
97   ///
98   llvm::StringMap<FileEntry, llvm::BumpPtrAllocator> UniqueFiles;
99 
100 public:
101   FileEntry &getFile(const char *Name, struct stat &StatBuf) {
102     std::string FullPath(GetFullPath(Name));
103 
104     // LowercaseString because Windows filesystem is case insensitive.
105     FullPath = llvm::LowercaseString(FullPath);
106     return UniqueFiles.GetOrCreateValue(FullPath).getValue();
107   }
108 
109   size_t size() const { return UniqueFiles.size(); }
110 };
111 
112 //===----------------------------------------------------------------------===//
113 // Unix-like Systems.
114 //===----------------------------------------------------------------------===//
115 
116 #else
117 
118 #define IS_DIR_SEPARATOR_CHAR(x) ((x) == '/')
119 
120 class FileManager::UniqueDirContainer {
121   /// UniqueDirs - Cache from ID's to existing directories/files.
122   std::map<std::pair<dev_t, ino_t>, DirectoryEntry> UniqueDirs;
123 
124 public:
125   DirectoryEntry &getDirectory(const char *Name, struct stat &StatBuf) {
126     return UniqueDirs[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)];
127   }
128 
129   size_t size() const { return UniqueDirs.size(); }
130 };
131 
132 class FileManager::UniqueFileContainer {
133   /// UniqueFiles - Cache from ID's to existing directories/files.
134   std::set<FileEntry> UniqueFiles;
135 
136 public:
137   FileEntry &getFile(const char *Name, struct stat &StatBuf) {
138     return
139       const_cast<FileEntry&>(
140                     *UniqueFiles.insert(FileEntry(StatBuf.st_dev,
141                                                   StatBuf.st_ino,
142                                                   StatBuf.st_mode)).first);
143   }
144 
145   size_t size() const { return UniqueFiles.size(); }
146 };
147 
148 #endif
149 
150 //===----------------------------------------------------------------------===//
151 // Common logic.
152 //===----------------------------------------------------------------------===//
153 
154 FileManager::FileManager(const FileSystemOptions &FSO)
155   : FileSystemOpts(FSO),
156     UniqueDirs(*new UniqueDirContainer()),
157     UniqueFiles(*new UniqueFileContainer()),
158     DirEntries(64), FileEntries(64), NextFileUID(0) {
159   NumDirLookups = NumFileLookups = 0;
160   NumDirCacheMisses = NumFileCacheMisses = 0;
161 }
162 
163 FileManager::~FileManager() {
164   delete &UniqueDirs;
165   delete &UniqueFiles;
166   for (unsigned i = 0, e = VirtualFileEntries.size(); i != e; ++i)
167     delete VirtualFileEntries[i];
168 }
169 
170 void FileManager::addStatCache(FileSystemStatCache *statCache,
171                                bool AtBeginning) {
172   assert(statCache && "No stat cache provided?");
173   if (AtBeginning || StatCache.get() == 0) {
174     statCache->setNextStatCache(StatCache.take());
175     StatCache.reset(statCache);
176     return;
177   }
178 
179   FileSystemStatCache *LastCache = StatCache.get();
180   while (LastCache->getNextStatCache())
181     LastCache = LastCache->getNextStatCache();
182 
183   LastCache->setNextStatCache(statCache);
184 }
185 
186 void FileManager::removeStatCache(FileSystemStatCache *statCache) {
187   if (!statCache)
188     return;
189 
190   if (StatCache.get() == statCache) {
191     // This is the first stat cache.
192     StatCache.reset(StatCache->takeNextStatCache());
193     return;
194   }
195 
196   // Find the stat cache in the list.
197   FileSystemStatCache *PrevCache = StatCache.get();
198   while (PrevCache && PrevCache->getNextStatCache() != statCache)
199     PrevCache = PrevCache->getNextStatCache();
200 
201   assert(PrevCache && "Stat cache not found for removal");
202   PrevCache->setNextStatCache(statCache->getNextStatCache());
203 }
204 
205 /// \brief Retrieve the directory that the given file name resides in.
206 static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr,
207                                                   llvm::StringRef Filename) {
208   // Figure out what directory it is in.   If the string contains a / in it,
209   // strip off everything after it.
210   // FIXME: this logic should be in sys::Path.
211   size_t SlashPos = Filename.size();
212   while (SlashPos != 0 && !IS_DIR_SEPARATOR_CHAR(Filename[SlashPos-1]))
213     --SlashPos;
214 
215   // Use the current directory if file has no path component.
216   if (SlashPos == 0)
217     return FileMgr.getDirectory(".");
218 
219   if (SlashPos == Filename.size()-1)
220     return 0;       // If filename ends with a /, it's a directory.
221 
222   // Ignore repeated //'s.
223   while (SlashPos != 0 && IS_DIR_SEPARATOR_CHAR(Filename[SlashPos-1]))
224     --SlashPos;
225 
226   return FileMgr.getDirectory(Filename.substr(0, SlashPos));
227 }
228 
229 /// getDirectory - Lookup, cache, and verify the specified directory.  This
230 /// returns null if the directory doesn't exist.
231 ///
232 const DirectoryEntry *FileManager::getDirectory(llvm::StringRef Filename) {
233   // stat doesn't like trailing separators (at least on Windows).
234   if (Filename.size() > 1 && IS_DIR_SEPARATOR_CHAR(Filename.back()))
235     Filename = Filename.substr(0, Filename.size()-1);
236 
237   ++NumDirLookups;
238   llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
239     DirEntries.GetOrCreateValue(Filename);
240 
241   // See if there is already an entry in the map.
242   if (NamedDirEnt.getValue())
243     return NamedDirEnt.getValue() == NON_EXISTENT_DIR
244               ? 0 : NamedDirEnt.getValue();
245 
246   ++NumDirCacheMisses;
247 
248   // By default, initialize it to invalid.
249   NamedDirEnt.setValue(NON_EXISTENT_DIR);
250 
251   // Get the null-terminated directory name as stored as the key of the
252   // DirEntries map.
253   const char *InterndDirName = NamedDirEnt.getKeyData();
254 
255   // Check to see if the directory exists.
256   struct stat StatBuf;
257   if (getStatValue(InterndDirName, StatBuf, 0/*directory lookup*/))
258     return 0;
259 
260   // It exists.  See if we have already opened a directory with the same inode.
261   // This occurs when one dir is symlinked to another, for example.
262   DirectoryEntry &UDE = UniqueDirs.getDirectory(InterndDirName, StatBuf);
263 
264   NamedDirEnt.setValue(&UDE);
265   if (UDE.getName()) // Already have an entry with this inode, return it.
266     return &UDE;
267 
268   // Otherwise, we don't have this directory yet, add it.  We use the string
269   // key from the DirEntries map as the string.
270   UDE.Name  = InterndDirName;
271   return &UDE;
272 }
273 
274 /// getFile - Lookup, cache, and verify the specified file.  This returns null
275 /// if the file doesn't exist.
276 ///
277 const FileEntry *FileManager::getFile(llvm::StringRef Filename) {
278   ++NumFileLookups;
279 
280   // See if there is already an entry in the map.
281   llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
282     FileEntries.GetOrCreateValue(Filename);
283 
284   // See if there is already an entry in the map.
285   if (NamedFileEnt.getValue())
286     return NamedFileEnt.getValue() == NON_EXISTENT_FILE
287                  ? 0 : NamedFileEnt.getValue();
288 
289   ++NumFileCacheMisses;
290 
291   // By default, initialize it to invalid.
292   NamedFileEnt.setValue(NON_EXISTENT_FILE);
293 
294 
295   // Get the null-terminated file name as stored as the key of the
296   // FileEntries map.
297   const char *InterndFileName = NamedFileEnt.getKeyData();
298 
299 
300   // Look up the directory for the file.  When looking up something like
301   // sys/foo.h we'll discover all of the search directories that have a 'sys'
302   // subdirectory.  This will let us avoid having to waste time on known-to-fail
303   // searches when we go to find sys/bar.h, because all the search directories
304   // without a 'sys' subdir will get a cached failure result.
305   const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename);
306   if (DirInfo == 0)  // Directory doesn't exist, file can't exist.
307     return 0;
308 
309   // FIXME: Use the directory info to prune this, before doing the stat syscall.
310   // FIXME: This will reduce the # syscalls.
311 
312   // Nope, there isn't.  Check to see if the file exists.
313   int FileDescriptor = -1;
314   struct stat StatBuf;
315   if (getStatValue(InterndFileName, StatBuf, &FileDescriptor))
316     return 0;
317 
318   // It exists.  See if we have already opened a file with the same inode.
319   // This occurs when one dir is symlinked to another, for example.
320   FileEntry &UFE = UniqueFiles.getFile(InterndFileName, StatBuf);
321 
322   NamedFileEnt.setValue(&UFE);
323   if (UFE.getName()) { // Already have an entry with this inode, return it.
324     // If the stat process opened the file, close it to avoid a FD leak.
325     if (FileDescriptor != -1)
326       close(FileDescriptor);
327 
328     return &UFE;
329   }
330 
331   // Otherwise, we don't have this directory yet, add it.
332   // FIXME: Change the name to be a char* that points back to the 'FileEntries'
333   // key.
334   UFE.Name    = InterndFileName;
335   UFE.Size    = StatBuf.st_size;
336   UFE.ModTime = StatBuf.st_mtime;
337   UFE.Dir     = DirInfo;
338   UFE.UID     = NextFileUID++;
339   UFE.FD      = FileDescriptor;
340   return &UFE;
341 }
342 
343 const FileEntry *
344 FileManager::getVirtualFile(llvm::StringRef Filename, off_t Size,
345                             time_t ModificationTime) {
346   ++NumFileLookups;
347 
348   // See if there is already an entry in the map.
349   llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
350     FileEntries.GetOrCreateValue(Filename);
351 
352   // See if there is already an entry in the map.
353   if (NamedFileEnt.getValue() && NamedFileEnt.getValue() != NON_EXISTENT_FILE)
354     return NamedFileEnt.getValue();
355 
356   ++NumFileCacheMisses;
357 
358   // By default, initialize it to invalid.
359   NamedFileEnt.setValue(NON_EXISTENT_FILE);
360 
361   // We allow the directory to not exist. If it does exist we store it.
362   FileEntry *UFE = 0;
363   const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename);
364   if (DirInfo) {
365     // Check to see if the file exists. If so, drop the virtual file
366     int FileDescriptor = -1;
367     struct stat StatBuf;
368     const char *InterndFileName = NamedFileEnt.getKeyData();
369     if (getStatValue(InterndFileName, StatBuf, &FileDescriptor) == 0) {
370       // If the stat process opened the file, close it to avoid a FD leak.
371       if (FileDescriptor != -1)
372         close(FileDescriptor);
373 
374       StatBuf.st_size = Size;
375       StatBuf.st_mtime = ModificationTime;
376       UFE = &UniqueFiles.getFile(InterndFileName, StatBuf);
377 
378       NamedFileEnt.setValue(UFE);
379 
380       // If we had already opened this file, close it now so we don't
381       // leak the descriptor. We're not going to use the file
382       // descriptor anyway, since this is a virtual file.
383       if (UFE->FD != -1) {
384         close(UFE->FD);
385         UFE->FD = -1;
386       }
387 
388       // If we already have an entry with this inode, return it.
389       if (UFE->getName())
390         return UFE;
391     }
392   }
393 
394   if (!UFE) {
395     UFE = new FileEntry();
396     VirtualFileEntries.push_back(UFE);
397     NamedFileEnt.setValue(UFE);
398   }
399 
400   // Get the null-terminated file name as stored as the key of the
401   // FileEntries map.
402   const char *InterndFileName = NamedFileEnt.getKeyData();
403 
404   UFE->Name    = InterndFileName;
405   UFE->Size    = Size;
406   UFE->ModTime = ModificationTime;
407   UFE->Dir     = DirInfo;
408   UFE->UID     = NextFileUID++;
409   UFE->FD      = -1;
410   return UFE;
411 }
412 
413 void FileManager::FixupRelativePath(llvm::sys::Path &path,
414                                     const FileSystemOptions &FSOpts) {
415   if (FSOpts.WorkingDir.empty() || llvm::sys::path::is_absolute(path.str()))
416     return;
417 
418   llvm::SmallString<128> NewPath(FSOpts.WorkingDir);
419   llvm::sys::path::append(NewPath, path.str());
420   path = NewPath;
421 }
422 
423 llvm::MemoryBuffer *FileManager::
424 getBufferForFile(const FileEntry *Entry, std::string *ErrorStr) {
425   llvm::OwningPtr<llvm::MemoryBuffer> Result;
426   llvm::error_code ec;
427   if (FileSystemOpts.WorkingDir.empty()) {
428     const char *Filename = Entry->getName();
429     // If the file is already open, use the open file descriptor.
430     if (Entry->FD != -1) {
431       ec = llvm::MemoryBuffer::getOpenFile(Entry->FD, Filename, Result,
432                                            Entry->getSize());
433       if (ErrorStr)
434         *ErrorStr = ec.message();
435 
436       close(Entry->FD);
437       Entry->FD = -1;
438       return Result.take();
439     }
440 
441     // Otherwise, open the file.
442     ec = llvm::MemoryBuffer::getFile(Filename, Result, Entry->getSize());
443     if (ec && ErrorStr)
444       *ErrorStr = ec.message();
445     return Result.take();
446   }
447 
448   llvm::sys::Path FilePath(Entry->getName());
449   FixupRelativePath(FilePath, FileSystemOpts);
450   ec = llvm::MemoryBuffer::getFile(FilePath.c_str(), Result, Entry->getSize());
451   if (ec && ErrorStr)
452     *ErrorStr = ec.message();
453   return Result.take();
454 }
455 
456 llvm::MemoryBuffer *FileManager::
457 getBufferForFile(llvm::StringRef Filename, std::string *ErrorStr) {
458   llvm::OwningPtr<llvm::MemoryBuffer> Result;
459   llvm::error_code ec;
460   if (FileSystemOpts.WorkingDir.empty()) {
461     ec = llvm::MemoryBuffer::getFile(Filename, Result);
462     if (ec && ErrorStr)
463       *ErrorStr = ec.message();
464     return Result.take();
465   }
466 
467   llvm::sys::Path FilePath(Filename);
468   FixupRelativePath(FilePath, FileSystemOpts);
469   ec = llvm::MemoryBuffer::getFile(FilePath.c_str(), Result);
470   if (ec && ErrorStr)
471     *ErrorStr = ec.message();
472   return Result.take();
473 }
474 
475 /// getStatValue - Get the 'stat' information for the specified path, using the
476 /// cache to accelerate it if possible.  This returns true if the path does not
477 /// exist or false if it exists.
478 ///
479 /// The isForDir member indicates whether this is a directory lookup or not.
480 /// This will return failure if the lookup isn't the expected kind.
481 bool FileManager::getStatValue(const char *Path, struct stat &StatBuf,
482                                int *FileDescriptor) {
483   // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
484   // absolute!
485   if (FileSystemOpts.WorkingDir.empty())
486     return FileSystemStatCache::get(Path, StatBuf, FileDescriptor,
487                                     StatCache.get());
488 
489   llvm::sys::Path FilePath(Path);
490   FixupRelativePath(FilePath, FileSystemOpts);
491 
492   return FileSystemStatCache::get(FilePath.c_str(), StatBuf, FileDescriptor,
493                                   StatCache.get());
494 }
495 
496 void FileManager::GetUniqueIDMapping(
497                    llvm::SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
498   UIDToFiles.clear();
499   UIDToFiles.resize(NextFileUID);
500 
501   // Map file entries
502   for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
503          FE = FileEntries.begin(), FEEnd = FileEntries.end();
504        FE != FEEnd; ++FE)
505     if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE)
506       UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
507 
508   // Map virtual file entries
509   for (llvm::SmallVector<FileEntry*, 4>::const_iterator
510          VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end();
511        VFE != VFEEnd; ++VFE)
512     if (*VFE && *VFE != NON_EXISTENT_FILE)
513       UIDToFiles[(*VFE)->getUID()] = *VFE;
514 }
515 
516 
517 void FileManager::PrintStats() const {
518   llvm::errs() << "\n*** File Manager Stats:\n";
519   llvm::errs() << UniqueFiles.size() << " files found, "
520                << UniqueDirs.size() << " dirs found.\n";
521   llvm::errs() << NumDirLookups << " dir lookups, "
522                << NumDirCacheMisses << " dir cache misses.\n";
523   llvm::errs() << NumFileLookups << " file lookups, "
524                << NumFileCacheMisses << " file cache misses.\n";
525 
526   //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
527 }
528 
529