1 //===-- FileCollector.h -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_SUPPORT_FILECOLLECTOR_H 10 #define LLVM_SUPPORT_FILECOLLECTOR_H 11 12 #include "llvm/ADT/SmallVector.h" 13 #include "llvm/ADT/StringMap.h" 14 #include "llvm/ADT/StringSet.h" 15 #include "llvm/Support/VirtualFileSystem.h" 16 #include <mutex> 17 #include <string> 18 19 namespace llvm { 20 class FileCollectorFileSystem; 21 class Twine; 22 23 class FileCollectorBase { 24 public: 25 FileCollectorBase(); 26 virtual ~FileCollectorBase(); 27 28 void addFile(const Twine &file); 29 void addDirectory(const Twine &Dir); 30 31 protected: markAsSeen(StringRef Path)32 bool markAsSeen(StringRef Path) { 33 if (Path.empty()) 34 return false; 35 return Seen.insert(Path).second; 36 } 37 38 virtual void addFileImpl(StringRef SrcPath) = 0; 39 40 virtual llvm::vfs::directory_iterator 41 addDirectoryImpl(const llvm::Twine &Dir, 42 IntrusiveRefCntPtr<vfs::FileSystem> FS, 43 std::error_code &EC) = 0; 44 45 /// Synchronizes access to internal data structures. 46 std::mutex Mutex; 47 48 /// Tracks already seen files so they can be skipped. 49 StringSet<> Seen; 50 }; 51 52 /// Captures file system interaction and generates data to be later replayed 53 /// with the RedirectingFileSystem. 54 /// 55 /// For any file that gets accessed we eventually create: 56 /// - a copy of the file inside Root 57 /// - a record in RedirectingFileSystem mapping that maps: 58 /// current real path -> path to the copy in Root 59 /// 60 /// That intent is that later when the mapping is used by RedirectingFileSystem 61 /// it simulates the state of FS that we collected. 62 /// 63 /// We generate file copies and mapping lazily - see writeMapping and copyFiles. 64 /// We don't try to capture the state of the file at the exact time when it's 65 /// accessed. Files might get changed, deleted ... we record only the "final" 66 /// state. 67 /// 68 /// In order to preserve the relative topology of files we use their real paths 69 /// as relative paths inside of the Root. 70 class FileCollector : public FileCollectorBase { 71 public: 72 /// Helper utility that encapsulates the logic for canonicalizing a virtual 73 /// path and a path to copy from. 74 class PathCanonicalizer { 75 public: 76 struct PathStorage { 77 SmallString<256> CopyFrom; 78 SmallString<256> VirtualPath; 79 }; 80 81 /// Canonicalize a pair of virtual and real paths. 82 PathStorage canonicalize(StringRef SrcPath); 83 84 private: 85 /// Replace with a (mostly) real path, or don't modify. Resolves symlinks 86 /// in the directory, using \a CachedDirs to avoid redundant lookups, but 87 /// leaves the filename as a possible symlink. 88 void updateWithRealPath(SmallVectorImpl<char> &Path); 89 90 StringMap<std::string> CachedDirs; 91 }; 92 93 /// \p Root is the directory where collected files are will be stored. 94 /// \p OverlayRoot is VFS mapping root. 95 /// \p Root directory gets created in copyFiles unless it already exists. 96 FileCollector(std::string Root, std::string OverlayRoot); 97 98 /// Write the yaml mapping (for the VFS) to the given file. 99 std::error_code writeMapping(StringRef MappingFile); 100 101 /// Copy the files into the root directory. 102 /// 103 /// When StopOnError is true (the default) we abort as soon as one file 104 /// cannot be copied. This is relatively common, for example when a file was 105 /// removed after it was added to the mapping. 106 std::error_code copyFiles(bool StopOnError = true); 107 108 /// Create a VFS that uses \p Collector to collect files accessed via \p 109 /// BaseFS. 110 static IntrusiveRefCntPtr<vfs::FileSystem> 111 createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS, 112 std::shared_ptr<FileCollector> Collector); 113 114 private: 115 friend FileCollectorFileSystem; 116 addFileToMapping(StringRef VirtualPath,StringRef RealPath)117 void addFileToMapping(StringRef VirtualPath, StringRef RealPath) { 118 if (sys::fs::is_directory(VirtualPath)) 119 VFSWriter.addDirectoryMapping(VirtualPath, RealPath); 120 else 121 VFSWriter.addFileMapping(VirtualPath, RealPath); 122 } 123 124 protected: 125 void addFileImpl(StringRef SrcPath) override; 126 127 llvm::vfs::directory_iterator 128 addDirectoryImpl(const llvm::Twine &Dir, 129 IntrusiveRefCntPtr<vfs::FileSystem> FS, 130 std::error_code &EC) override; 131 132 /// The directory where collected files are copied to in copyFiles(). 133 const std::string Root; 134 135 /// The root directory where the VFS overlay lives. 136 const std::string OverlayRoot; 137 138 /// The yaml mapping writer. 139 vfs::YAMLVFSWriter VFSWriter; 140 141 /// Helper utility for canonicalizing paths. 142 PathCanonicalizer Canonicalizer; 143 }; 144 145 } // end namespace llvm 146 147 #endif // LLVM_SUPPORT_FILECOLLECTOR_H 148