18bcb0991SDimitry Andric //===-- FileCollector.cpp ---------------------------------------*- C++ -*-===// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68bcb0991SDimitry Andric // 78bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 88bcb0991SDimitry Andric 98bcb0991SDimitry Andric #include "llvm/Support/FileCollector.h" 108bcb0991SDimitry Andric #include "llvm/ADT/SmallString.h" 115ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h" 128bcb0991SDimitry Andric #include "llvm/Support/FileSystem.h" 138bcb0991SDimitry Andric #include "llvm/Support/Path.h" 148bcb0991SDimitry Andric #include "llvm/Support/Process.h" 158bcb0991SDimitry Andric 168bcb0991SDimitry Andric using namespace llvm; 178bcb0991SDimitry Andric 18e8d8bef9SDimitry Andric FileCollectorBase::FileCollectorBase() = default; 19e8d8bef9SDimitry Andric FileCollectorBase::~FileCollectorBase() = default; 20e8d8bef9SDimitry Andric 21e8d8bef9SDimitry Andric void FileCollectorBase::addFile(const Twine &File) { 22e8d8bef9SDimitry Andric std::lock_guard<std::mutex> lock(Mutex); 23e8d8bef9SDimitry Andric std::string FileStr = File.str(); 24e8d8bef9SDimitry Andric if (markAsSeen(FileStr)) 25e8d8bef9SDimitry Andric addFileImpl(FileStr); 26e8d8bef9SDimitry Andric } 27e8d8bef9SDimitry Andric 28e8d8bef9SDimitry Andric void FileCollectorBase::addDirectory(const Twine &Dir) { 29e8d8bef9SDimitry Andric assert(sys::fs::is_directory(Dir)); 30e8d8bef9SDimitry Andric std::error_code EC; 31e8d8bef9SDimitry Andric addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC); 32e8d8bef9SDimitry Andric } 33e8d8bef9SDimitry Andric 348bcb0991SDimitry Andric static bool isCaseSensitivePath(StringRef Path) { 358bcb0991SDimitry Andric SmallString<256> TmpDest = Path, UpperDest, RealDest; 368bcb0991SDimitry Andric 378bcb0991SDimitry Andric // Remove component traversals, links, etc. 38fe6060f1SDimitry Andric if (sys::fs::real_path(Path, TmpDest)) 398bcb0991SDimitry Andric return true; // Current default value in vfs.yaml 408bcb0991SDimitry Andric Path = TmpDest; 418bcb0991SDimitry Andric 428bcb0991SDimitry Andric // Change path to all upper case and ask for its real path, if the latter 438bcb0991SDimitry Andric // exists and is equal to path, it's not case sensitive. Default to case 448bcb0991SDimitry Andric // sensitive in the absence of real_path, since this is the YAMLVFSWriter 458bcb0991SDimitry Andric // default. 468bcb0991SDimitry Andric UpperDest = Path.upper(); 47*0fca6ea1SDimitry Andric if (!sys::fs::real_path(UpperDest, RealDest) && Path == RealDest) 488bcb0991SDimitry Andric return false; 498bcb0991SDimitry Andric return true; 508bcb0991SDimitry Andric } 518bcb0991SDimitry Andric 528bcb0991SDimitry Andric FileCollector::FileCollector(std::string Root, std::string OverlayRoot) 53bdd1243dSDimitry Andric : Root(Root), OverlayRoot(OverlayRoot) { 54bdd1243dSDimitry Andric assert(sys::path::is_absolute(Root) && "Root not absolute"); 55bdd1243dSDimitry Andric assert(sys::path::is_absolute(OverlayRoot) && "OverlayRoot not absolute"); 568bcb0991SDimitry Andric } 578bcb0991SDimitry Andric 58e8d8bef9SDimitry Andric void FileCollector::PathCanonicalizer::updateWithRealPath( 59e8d8bef9SDimitry Andric SmallVectorImpl<char> &Path) { 60e8d8bef9SDimitry Andric StringRef SrcPath(Path.begin(), Path.size()); 61e8d8bef9SDimitry Andric StringRef Filename = sys::path::filename(SrcPath); 62e8d8bef9SDimitry Andric StringRef Directory = sys::path::parent_path(SrcPath); 638bcb0991SDimitry Andric 64e8d8bef9SDimitry Andric // Use real_path to fix any symbolic link component present in the directory 65e8d8bef9SDimitry Andric // part of the path, caching the search because computing the real path is 66e8d8bef9SDimitry Andric // expensive. 67e8d8bef9SDimitry Andric SmallString<256> RealPath; 68e8d8bef9SDimitry Andric auto DirWithSymlink = CachedDirs.find(Directory); 69e8d8bef9SDimitry Andric if (DirWithSymlink == CachedDirs.end()) { 70e8d8bef9SDimitry Andric // FIXME: Should this be a call to FileSystem::getRealpath(), in some 71e8d8bef9SDimitry Andric // cases? What if there is nothing on disk? 72e8d8bef9SDimitry Andric if (sys::fs::real_path(Directory, RealPath)) 73e8d8bef9SDimitry Andric return; 747a6dacacSDimitry Andric CachedDirs[Directory] = std::string(RealPath); 758bcb0991SDimitry Andric } else { 768bcb0991SDimitry Andric RealPath = DirWithSymlink->second; 778bcb0991SDimitry Andric } 788bcb0991SDimitry Andric 79e8d8bef9SDimitry Andric // Finish recreating the path by appending the original filename, since we 80e8d8bef9SDimitry Andric // don't need to resolve symlinks in the filename. 81e8d8bef9SDimitry Andric // 82e8d8bef9SDimitry Andric // FIXME: If we can cope with this, maybe we can cope without calling 83e8d8bef9SDimitry Andric // getRealPath() at all when there's no ".." component. 84e8d8bef9SDimitry Andric sys::path::append(RealPath, Filename); 85e8d8bef9SDimitry Andric 86e8d8bef9SDimitry Andric // Swap to create the output. 87e8d8bef9SDimitry Andric Path.swap(RealPath); 888bcb0991SDimitry Andric } 898bcb0991SDimitry Andric 90e8d8bef9SDimitry Andric /// Make Path absolute. 91e8d8bef9SDimitry Andric static void makeAbsolute(SmallVectorImpl<char> &Path) { 928bcb0991SDimitry Andric // We need an absolute src path to append to the root. 93e8d8bef9SDimitry Andric sys::fs::make_absolute(Path); 948bcb0991SDimitry Andric 958bcb0991SDimitry Andric // Canonicalize src to a native path to avoid mixed separator styles. 96e8d8bef9SDimitry Andric sys::path::native(Path); 978bcb0991SDimitry Andric 988bcb0991SDimitry Andric // Remove redundant leading "./" pieces and consecutive separators. 99e8d8bef9SDimitry Andric Path.erase(Path.begin(), sys::path::remove_leading_dotslash( 100e8d8bef9SDimitry Andric StringRef(Path.begin(), Path.size())) 101e8d8bef9SDimitry Andric .begin()); 102e8d8bef9SDimitry Andric } 1038bcb0991SDimitry Andric 104e8d8bef9SDimitry Andric FileCollector::PathCanonicalizer::PathStorage 105e8d8bef9SDimitry Andric FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) { 106e8d8bef9SDimitry Andric PathStorage Paths; 107e8d8bef9SDimitry Andric Paths.VirtualPath = SrcPath; 108e8d8bef9SDimitry Andric makeAbsolute(Paths.VirtualPath); 1098bcb0991SDimitry Andric 1108bcb0991SDimitry Andric // If a ".." component is present after a symlink component, remove_dots may 1118bcb0991SDimitry Andric // lead to the wrong real destination path. Let the source be canonicalized 1128bcb0991SDimitry Andric // like that but make sure we always use the real path for the destination. 113e8d8bef9SDimitry Andric Paths.CopyFrom = Paths.VirtualPath; 114e8d8bef9SDimitry Andric updateWithRealPath(Paths.CopyFrom); 115e8d8bef9SDimitry Andric 116e8d8bef9SDimitry Andric // Canonicalize the virtual path by removing "..", "." components. 117e8d8bef9SDimitry Andric sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true); 118e8d8bef9SDimitry Andric 119e8d8bef9SDimitry Andric return Paths; 120e8d8bef9SDimitry Andric } 121e8d8bef9SDimitry Andric 122e8d8bef9SDimitry Andric void FileCollector::addFileImpl(StringRef SrcPath) { 123e8d8bef9SDimitry Andric PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath); 1248bcb0991SDimitry Andric 1258bcb0991SDimitry Andric SmallString<256> DstPath = StringRef(Root); 126e8d8bef9SDimitry Andric sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom)); 1278bcb0991SDimitry Andric 1288bcb0991SDimitry Andric // Always map a canonical src path to its real path into the YAML, by doing 1298bcb0991SDimitry Andric // this we map different virtual src paths to the same entry in the VFS 1308bcb0991SDimitry Andric // overlay, which is a way to emulate symlink inside the VFS; this is also 1318bcb0991SDimitry Andric // needed for correctness, not doing that can lead to module redefinition 1328bcb0991SDimitry Andric // errors. 133e8d8bef9SDimitry Andric addFileToMapping(Paths.VirtualPath, DstPath); 1348bcb0991SDimitry Andric } 1358bcb0991SDimitry Andric 1365ffd83dbSDimitry Andric llvm::vfs::directory_iterator 1375ffd83dbSDimitry Andric FileCollector::addDirectoryImpl(const llvm::Twine &Dir, 1385ffd83dbSDimitry Andric IntrusiveRefCntPtr<vfs::FileSystem> FS, 1395ffd83dbSDimitry Andric std::error_code &EC) { 1405ffd83dbSDimitry Andric auto It = FS->dir_begin(Dir, EC); 1415ffd83dbSDimitry Andric if (EC) 1425ffd83dbSDimitry Andric return It; 1435ffd83dbSDimitry Andric addFile(Dir); 1445ffd83dbSDimitry Andric for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) { 1455ffd83dbSDimitry Andric if (It->type() == sys::fs::file_type::regular_file || 1465ffd83dbSDimitry Andric It->type() == sys::fs::file_type::directory_file || 1475ffd83dbSDimitry Andric It->type() == sys::fs::file_type::symlink_file) { 1485ffd83dbSDimitry Andric addFile(It->path()); 1495ffd83dbSDimitry Andric } 1505ffd83dbSDimitry Andric } 1515ffd83dbSDimitry Andric if (EC) 1525ffd83dbSDimitry Andric return It; 1535ffd83dbSDimitry Andric // Return a new iterator. 1545ffd83dbSDimitry Andric return FS->dir_begin(Dir, EC); 1555ffd83dbSDimitry Andric } 1565ffd83dbSDimitry Andric 1578bcb0991SDimitry Andric /// Set the access and modification time for the given file from the given 1588bcb0991SDimitry Andric /// status object. 1598bcb0991SDimitry Andric static std::error_code 1608bcb0991SDimitry Andric copyAccessAndModificationTime(StringRef Filename, 1618bcb0991SDimitry Andric const sys::fs::file_status &Stat) { 1628bcb0991SDimitry Andric int FD; 1638bcb0991SDimitry Andric 1648bcb0991SDimitry Andric if (auto EC = 1658bcb0991SDimitry Andric sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting)) 1668bcb0991SDimitry Andric return EC; 1678bcb0991SDimitry Andric 1688bcb0991SDimitry Andric if (auto EC = sys::fs::setLastAccessAndModificationTime( 1698bcb0991SDimitry Andric FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime())) 1708bcb0991SDimitry Andric return EC; 1718bcb0991SDimitry Andric 1728bcb0991SDimitry Andric if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) 1738bcb0991SDimitry Andric return EC; 1748bcb0991SDimitry Andric 1758bcb0991SDimitry Andric return {}; 1768bcb0991SDimitry Andric } 1778bcb0991SDimitry Andric 1788bcb0991SDimitry Andric std::error_code FileCollector::copyFiles(bool StopOnError) { 1795ffd83dbSDimitry Andric auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true); 1805ffd83dbSDimitry Andric if (Err) { 1815ffd83dbSDimitry Andric return Err; 1825ffd83dbSDimitry Andric } 1835ffd83dbSDimitry Andric 1845ffd83dbSDimitry Andric std::lock_guard<std::mutex> lock(Mutex); 1855ffd83dbSDimitry Andric 1868bcb0991SDimitry Andric for (auto &entry : VFSWriter.getMappings()) { 1878bcb0991SDimitry Andric // Get the status of the original file/directory. 1888bcb0991SDimitry Andric sys::fs::file_status Stat; 1898bcb0991SDimitry Andric if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) { 1908bcb0991SDimitry Andric if (StopOnError) 1918bcb0991SDimitry Andric return EC; 1928bcb0991SDimitry Andric continue; 1938bcb0991SDimitry Andric } 1948bcb0991SDimitry Andric 195e8d8bef9SDimitry Andric // Continue if the file doesn't exist. 196e8d8bef9SDimitry Andric if (Stat.type() == sys::fs::file_type::file_not_found) 197e8d8bef9SDimitry Andric continue; 198e8d8bef9SDimitry Andric 199e8d8bef9SDimitry Andric // Create directory tree. 200e8d8bef9SDimitry Andric if (std::error_code EC = 201e8d8bef9SDimitry Andric sys::fs::create_directories(sys::path::parent_path(entry.RPath), 202e8d8bef9SDimitry Andric /*IgnoreExisting=*/true)) { 203e8d8bef9SDimitry Andric if (StopOnError) 204e8d8bef9SDimitry Andric return EC; 205e8d8bef9SDimitry Andric } 206e8d8bef9SDimitry Andric 2078bcb0991SDimitry Andric if (Stat.type() == sys::fs::file_type::directory_file) { 2088bcb0991SDimitry Andric // Construct a directory when it's just a directory entry. 2098bcb0991SDimitry Andric if (std::error_code EC = 2108bcb0991SDimitry Andric sys::fs::create_directories(entry.RPath, 2118bcb0991SDimitry Andric /*IgnoreExisting=*/true)) { 2128bcb0991SDimitry Andric if (StopOnError) 2138bcb0991SDimitry Andric return EC; 2148bcb0991SDimitry Andric } 2158bcb0991SDimitry Andric continue; 2168bcb0991SDimitry Andric } 2178bcb0991SDimitry Andric 2188bcb0991SDimitry Andric // Copy file over. 2198bcb0991SDimitry Andric if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) { 2208bcb0991SDimitry Andric if (StopOnError) 2218bcb0991SDimitry Andric return EC; 2228bcb0991SDimitry Andric } 2238bcb0991SDimitry Andric 2248bcb0991SDimitry Andric // Copy over permissions. 2258bcb0991SDimitry Andric if (auto perms = sys::fs::getPermissions(entry.VPath)) { 2268bcb0991SDimitry Andric if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) { 2278bcb0991SDimitry Andric if (StopOnError) 2288bcb0991SDimitry Andric return EC; 2298bcb0991SDimitry Andric } 2308bcb0991SDimitry Andric } 2318bcb0991SDimitry Andric 2328bcb0991SDimitry Andric // Copy over modification time. 2338bcb0991SDimitry Andric copyAccessAndModificationTime(entry.RPath, Stat); 2348bcb0991SDimitry Andric } 2358bcb0991SDimitry Andric return {}; 2368bcb0991SDimitry Andric } 2378bcb0991SDimitry Andric 2385ffd83dbSDimitry Andric std::error_code FileCollector::writeMapping(StringRef MappingFile) { 2398bcb0991SDimitry Andric std::lock_guard<std::mutex> lock(Mutex); 2408bcb0991SDimitry Andric 2418bcb0991SDimitry Andric VFSWriter.setOverlayDir(OverlayRoot); 2428bcb0991SDimitry Andric VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot)); 2438bcb0991SDimitry Andric VFSWriter.setUseExternalNames(false); 2448bcb0991SDimitry Andric 2458bcb0991SDimitry Andric std::error_code EC; 246fe6060f1SDimitry Andric raw_fd_ostream os(MappingFile, EC, sys::fs::OF_TextWithCRLF); 2478bcb0991SDimitry Andric if (EC) 2488bcb0991SDimitry Andric return EC; 2498bcb0991SDimitry Andric 2508bcb0991SDimitry Andric VFSWriter.write(os); 2518bcb0991SDimitry Andric 2528bcb0991SDimitry Andric return {}; 2538bcb0991SDimitry Andric } 2548bcb0991SDimitry Andric 2555ffd83dbSDimitry Andric namespace llvm { 2568bcb0991SDimitry Andric 2578bcb0991SDimitry Andric class FileCollectorFileSystem : public vfs::FileSystem { 2588bcb0991SDimitry Andric public: 2598bcb0991SDimitry Andric explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS, 2608bcb0991SDimitry Andric std::shared_ptr<FileCollector> Collector) 2618bcb0991SDimitry Andric : FS(std::move(FS)), Collector(std::move(Collector)) {} 2628bcb0991SDimitry Andric 2638bcb0991SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override { 2648bcb0991SDimitry Andric auto Result = FS->status(Path); 2658bcb0991SDimitry Andric if (Result && Result->exists()) 2668bcb0991SDimitry Andric Collector->addFile(Path); 2678bcb0991SDimitry Andric return Result; 2688bcb0991SDimitry Andric } 2698bcb0991SDimitry Andric 2708bcb0991SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 2718bcb0991SDimitry Andric openFileForRead(const Twine &Path) override { 2728bcb0991SDimitry Andric auto Result = FS->openFileForRead(Path); 2738bcb0991SDimitry Andric if (Result && *Result) 2748bcb0991SDimitry Andric Collector->addFile(Path); 2758bcb0991SDimitry Andric return Result; 2768bcb0991SDimitry Andric } 2778bcb0991SDimitry Andric 2788bcb0991SDimitry Andric llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir, 2798bcb0991SDimitry Andric std::error_code &EC) override { 2805ffd83dbSDimitry Andric return Collector->addDirectoryImpl(Dir, FS, EC); 2818bcb0991SDimitry Andric } 2828bcb0991SDimitry Andric 2838bcb0991SDimitry Andric std::error_code getRealPath(const Twine &Path, 284*0fca6ea1SDimitry Andric SmallVectorImpl<char> &Output) override { 2858bcb0991SDimitry Andric auto EC = FS->getRealPath(Path, Output); 2868bcb0991SDimitry Andric if (!EC) { 2878bcb0991SDimitry Andric Collector->addFile(Path); 2888bcb0991SDimitry Andric if (Output.size() > 0) 2898bcb0991SDimitry Andric Collector->addFile(Output); 2908bcb0991SDimitry Andric } 2918bcb0991SDimitry Andric return EC; 2928bcb0991SDimitry Andric } 2938bcb0991SDimitry Andric 2948bcb0991SDimitry Andric std::error_code isLocal(const Twine &Path, bool &Result) override { 2958bcb0991SDimitry Andric return FS->isLocal(Path, Result); 2968bcb0991SDimitry Andric } 2978bcb0991SDimitry Andric 2988bcb0991SDimitry Andric llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { 2998bcb0991SDimitry Andric return FS->getCurrentWorkingDirectory(); 3008bcb0991SDimitry Andric } 3018bcb0991SDimitry Andric 3028bcb0991SDimitry Andric std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override { 3038bcb0991SDimitry Andric return FS->setCurrentWorkingDirectory(Path); 3048bcb0991SDimitry Andric } 3058bcb0991SDimitry Andric 3068bcb0991SDimitry Andric private: 3078bcb0991SDimitry Andric IntrusiveRefCntPtr<vfs::FileSystem> FS; 3088bcb0991SDimitry Andric std::shared_ptr<FileCollector> Collector; 3098bcb0991SDimitry Andric }; 3108bcb0991SDimitry Andric 3115ffd83dbSDimitry Andric } // namespace llvm 3128bcb0991SDimitry Andric 3138bcb0991SDimitry Andric IntrusiveRefCntPtr<vfs::FileSystem> 3148bcb0991SDimitry Andric FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS, 3158bcb0991SDimitry Andric std::shared_ptr<FileCollector> Collector) { 3168bcb0991SDimitry Andric return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector)); 3178bcb0991SDimitry Andric } 318