xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/FileCollector.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
18bcb0991SDimitry Andric //===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
28bcb0991SDimitry Andric //
38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68bcb0991SDimitry Andric //
78bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
88bcb0991SDimitry Andric 
98bcb0991SDimitry Andric #include "llvm/Support/FileCollector.h"
108bcb0991SDimitry Andric #include "llvm/ADT/SmallString.h"
115ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h"
128bcb0991SDimitry Andric #include "llvm/Support/FileSystem.h"
138bcb0991SDimitry Andric #include "llvm/Support/Path.h"
148bcb0991SDimitry Andric #include "llvm/Support/Process.h"
158bcb0991SDimitry Andric 
168bcb0991SDimitry Andric using namespace llvm;
178bcb0991SDimitry Andric 
18e8d8bef9SDimitry Andric FileCollectorBase::FileCollectorBase() = default;
19e8d8bef9SDimitry Andric FileCollectorBase::~FileCollectorBase() = default;
20e8d8bef9SDimitry Andric 
21e8d8bef9SDimitry Andric void FileCollectorBase::addFile(const Twine &File) {
22e8d8bef9SDimitry Andric   std::lock_guard<std::mutex> lock(Mutex);
23e8d8bef9SDimitry Andric   std::string FileStr = File.str();
24e8d8bef9SDimitry Andric   if (markAsSeen(FileStr))
25e8d8bef9SDimitry Andric     addFileImpl(FileStr);
26e8d8bef9SDimitry Andric }
27e8d8bef9SDimitry Andric 
28e8d8bef9SDimitry Andric void FileCollectorBase::addDirectory(const Twine &Dir) {
29e8d8bef9SDimitry Andric   assert(sys::fs::is_directory(Dir));
30e8d8bef9SDimitry Andric   std::error_code EC;
31e8d8bef9SDimitry Andric   addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
32e8d8bef9SDimitry Andric }
33e8d8bef9SDimitry Andric 
348bcb0991SDimitry Andric static bool isCaseSensitivePath(StringRef Path) {
358bcb0991SDimitry Andric   SmallString<256> TmpDest = Path, UpperDest, RealDest;
368bcb0991SDimitry Andric 
378bcb0991SDimitry Andric   // Remove component traversals, links, etc.
38fe6060f1SDimitry Andric   if (sys::fs::real_path(Path, TmpDest))
398bcb0991SDimitry Andric     return true; // Current default value in vfs.yaml
408bcb0991SDimitry Andric   Path = TmpDest;
418bcb0991SDimitry Andric 
428bcb0991SDimitry Andric   // Change path to all upper case and ask for its real path, if the latter
438bcb0991SDimitry Andric   // exists and is equal to path, it's not case sensitive. Default to case
448bcb0991SDimitry Andric   // sensitive in the absence of real_path, since this is the YAMLVFSWriter
458bcb0991SDimitry Andric   // default.
468bcb0991SDimitry Andric   UpperDest = Path.upper();
47*0fca6ea1SDimitry Andric   if (!sys::fs::real_path(UpperDest, RealDest) && Path == RealDest)
488bcb0991SDimitry Andric     return false;
498bcb0991SDimitry Andric   return true;
508bcb0991SDimitry Andric }
518bcb0991SDimitry Andric 
528bcb0991SDimitry Andric FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
53bdd1243dSDimitry Andric     : Root(Root), OverlayRoot(OverlayRoot) {
54bdd1243dSDimitry Andric   assert(sys::path::is_absolute(Root) && "Root not absolute");
55bdd1243dSDimitry Andric   assert(sys::path::is_absolute(OverlayRoot) && "OverlayRoot not absolute");
568bcb0991SDimitry Andric }
578bcb0991SDimitry Andric 
58e8d8bef9SDimitry Andric void FileCollector::PathCanonicalizer::updateWithRealPath(
59e8d8bef9SDimitry Andric     SmallVectorImpl<char> &Path) {
60e8d8bef9SDimitry Andric   StringRef SrcPath(Path.begin(), Path.size());
61e8d8bef9SDimitry Andric   StringRef Filename = sys::path::filename(SrcPath);
62e8d8bef9SDimitry Andric   StringRef Directory = sys::path::parent_path(SrcPath);
638bcb0991SDimitry Andric 
64e8d8bef9SDimitry Andric   // Use real_path to fix any symbolic link component present in the directory
65e8d8bef9SDimitry Andric   // part of the path, caching the search because computing the real path is
66e8d8bef9SDimitry Andric   // expensive.
67e8d8bef9SDimitry Andric   SmallString<256> RealPath;
68e8d8bef9SDimitry Andric   auto DirWithSymlink = CachedDirs.find(Directory);
69e8d8bef9SDimitry Andric   if (DirWithSymlink == CachedDirs.end()) {
70e8d8bef9SDimitry Andric     // FIXME: Should this be a call to FileSystem::getRealpath(), in some
71e8d8bef9SDimitry Andric     // cases? What if there is nothing on disk?
72e8d8bef9SDimitry Andric     if (sys::fs::real_path(Directory, RealPath))
73e8d8bef9SDimitry Andric       return;
747a6dacacSDimitry Andric     CachedDirs[Directory] = std::string(RealPath);
758bcb0991SDimitry Andric   } else {
768bcb0991SDimitry Andric     RealPath = DirWithSymlink->second;
778bcb0991SDimitry Andric   }
788bcb0991SDimitry Andric 
79e8d8bef9SDimitry Andric   // Finish recreating the path by appending the original filename, since we
80e8d8bef9SDimitry Andric   // don't need to resolve symlinks in the filename.
81e8d8bef9SDimitry Andric   //
82e8d8bef9SDimitry Andric   // FIXME: If we can cope with this, maybe we can cope without calling
83e8d8bef9SDimitry Andric   // getRealPath() at all when there's no ".." component.
84e8d8bef9SDimitry Andric   sys::path::append(RealPath, Filename);
85e8d8bef9SDimitry Andric 
86e8d8bef9SDimitry Andric   // Swap to create the output.
87e8d8bef9SDimitry Andric   Path.swap(RealPath);
888bcb0991SDimitry Andric }
898bcb0991SDimitry Andric 
90e8d8bef9SDimitry Andric /// Make Path absolute.
91e8d8bef9SDimitry Andric static void makeAbsolute(SmallVectorImpl<char> &Path) {
928bcb0991SDimitry Andric   // We need an absolute src path to append to the root.
93e8d8bef9SDimitry Andric   sys::fs::make_absolute(Path);
948bcb0991SDimitry Andric 
958bcb0991SDimitry Andric   // Canonicalize src to a native path to avoid mixed separator styles.
96e8d8bef9SDimitry Andric   sys::path::native(Path);
978bcb0991SDimitry Andric 
988bcb0991SDimitry Andric   // Remove redundant leading "./" pieces and consecutive separators.
99e8d8bef9SDimitry Andric   Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
100e8d8bef9SDimitry Andric                                StringRef(Path.begin(), Path.size()))
101e8d8bef9SDimitry Andric                                .begin());
102e8d8bef9SDimitry Andric }
1038bcb0991SDimitry Andric 
104e8d8bef9SDimitry Andric FileCollector::PathCanonicalizer::PathStorage
105e8d8bef9SDimitry Andric FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
106e8d8bef9SDimitry Andric   PathStorage Paths;
107e8d8bef9SDimitry Andric   Paths.VirtualPath = SrcPath;
108e8d8bef9SDimitry Andric   makeAbsolute(Paths.VirtualPath);
1098bcb0991SDimitry Andric 
1108bcb0991SDimitry Andric   // If a ".." component is present after a symlink component, remove_dots may
1118bcb0991SDimitry Andric   // lead to the wrong real destination path. Let the source be canonicalized
1128bcb0991SDimitry Andric   // like that but make sure we always use the real path for the destination.
113e8d8bef9SDimitry Andric   Paths.CopyFrom = Paths.VirtualPath;
114e8d8bef9SDimitry Andric   updateWithRealPath(Paths.CopyFrom);
115e8d8bef9SDimitry Andric 
116e8d8bef9SDimitry Andric   // Canonicalize the virtual path by removing "..", "." components.
117e8d8bef9SDimitry Andric   sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
118e8d8bef9SDimitry Andric 
119e8d8bef9SDimitry Andric   return Paths;
120e8d8bef9SDimitry Andric }
121e8d8bef9SDimitry Andric 
122e8d8bef9SDimitry Andric void FileCollector::addFileImpl(StringRef SrcPath) {
123e8d8bef9SDimitry Andric   PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
1248bcb0991SDimitry Andric 
1258bcb0991SDimitry Andric   SmallString<256> DstPath = StringRef(Root);
126e8d8bef9SDimitry Andric   sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
1278bcb0991SDimitry Andric 
1288bcb0991SDimitry Andric   // Always map a canonical src path to its real path into the YAML, by doing
1298bcb0991SDimitry Andric   // this we map different virtual src paths to the same entry in the VFS
1308bcb0991SDimitry Andric   // overlay, which is a way to emulate symlink inside the VFS; this is also
1318bcb0991SDimitry Andric   // needed for correctness, not doing that can lead to module redefinition
1328bcb0991SDimitry Andric   // errors.
133e8d8bef9SDimitry Andric   addFileToMapping(Paths.VirtualPath, DstPath);
1348bcb0991SDimitry Andric }
1358bcb0991SDimitry Andric 
1365ffd83dbSDimitry Andric llvm::vfs::directory_iterator
1375ffd83dbSDimitry Andric FileCollector::addDirectoryImpl(const llvm::Twine &Dir,
1385ffd83dbSDimitry Andric                                 IntrusiveRefCntPtr<vfs::FileSystem> FS,
1395ffd83dbSDimitry Andric                                 std::error_code &EC) {
1405ffd83dbSDimitry Andric   auto It = FS->dir_begin(Dir, EC);
1415ffd83dbSDimitry Andric   if (EC)
1425ffd83dbSDimitry Andric     return It;
1435ffd83dbSDimitry Andric   addFile(Dir);
1445ffd83dbSDimitry Andric   for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) {
1455ffd83dbSDimitry Andric     if (It->type() == sys::fs::file_type::regular_file ||
1465ffd83dbSDimitry Andric         It->type() == sys::fs::file_type::directory_file ||
1475ffd83dbSDimitry Andric         It->type() == sys::fs::file_type::symlink_file) {
1485ffd83dbSDimitry Andric       addFile(It->path());
1495ffd83dbSDimitry Andric     }
1505ffd83dbSDimitry Andric   }
1515ffd83dbSDimitry Andric   if (EC)
1525ffd83dbSDimitry Andric     return It;
1535ffd83dbSDimitry Andric   // Return a new iterator.
1545ffd83dbSDimitry Andric   return FS->dir_begin(Dir, EC);
1555ffd83dbSDimitry Andric }
1565ffd83dbSDimitry Andric 
1578bcb0991SDimitry Andric /// Set the access and modification time for the given file from the given
1588bcb0991SDimitry Andric /// status object.
1598bcb0991SDimitry Andric static std::error_code
1608bcb0991SDimitry Andric copyAccessAndModificationTime(StringRef Filename,
1618bcb0991SDimitry Andric                               const sys::fs::file_status &Stat) {
1628bcb0991SDimitry Andric   int FD;
1638bcb0991SDimitry Andric 
1648bcb0991SDimitry Andric   if (auto EC =
1658bcb0991SDimitry Andric           sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
1668bcb0991SDimitry Andric     return EC;
1678bcb0991SDimitry Andric 
1688bcb0991SDimitry Andric   if (auto EC = sys::fs::setLastAccessAndModificationTime(
1698bcb0991SDimitry Andric           FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
1708bcb0991SDimitry Andric     return EC;
1718bcb0991SDimitry Andric 
1728bcb0991SDimitry Andric   if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
1738bcb0991SDimitry Andric     return EC;
1748bcb0991SDimitry Andric 
1758bcb0991SDimitry Andric   return {};
1768bcb0991SDimitry Andric }
1778bcb0991SDimitry Andric 
1788bcb0991SDimitry Andric std::error_code FileCollector::copyFiles(bool StopOnError) {
1795ffd83dbSDimitry Andric   auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true);
1805ffd83dbSDimitry Andric   if (Err) {
1815ffd83dbSDimitry Andric     return Err;
1825ffd83dbSDimitry Andric   }
1835ffd83dbSDimitry Andric 
1845ffd83dbSDimitry Andric   std::lock_guard<std::mutex> lock(Mutex);
1855ffd83dbSDimitry Andric 
1868bcb0991SDimitry Andric   for (auto &entry : VFSWriter.getMappings()) {
1878bcb0991SDimitry Andric     // Get the status of the original file/directory.
1888bcb0991SDimitry Andric     sys::fs::file_status Stat;
1898bcb0991SDimitry Andric     if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
1908bcb0991SDimitry Andric       if (StopOnError)
1918bcb0991SDimitry Andric         return EC;
1928bcb0991SDimitry Andric       continue;
1938bcb0991SDimitry Andric     }
1948bcb0991SDimitry Andric 
195e8d8bef9SDimitry Andric     // Continue if the file doesn't exist.
196e8d8bef9SDimitry Andric     if (Stat.type() == sys::fs::file_type::file_not_found)
197e8d8bef9SDimitry Andric       continue;
198e8d8bef9SDimitry Andric 
199e8d8bef9SDimitry Andric     // Create directory tree.
200e8d8bef9SDimitry Andric     if (std::error_code EC =
201e8d8bef9SDimitry Andric             sys::fs::create_directories(sys::path::parent_path(entry.RPath),
202e8d8bef9SDimitry Andric                                         /*IgnoreExisting=*/true)) {
203e8d8bef9SDimitry Andric       if (StopOnError)
204e8d8bef9SDimitry Andric         return EC;
205e8d8bef9SDimitry Andric     }
206e8d8bef9SDimitry Andric 
2078bcb0991SDimitry Andric     if (Stat.type() == sys::fs::file_type::directory_file) {
2088bcb0991SDimitry Andric       // Construct a directory when it's just a directory entry.
2098bcb0991SDimitry Andric       if (std::error_code EC =
2108bcb0991SDimitry Andric               sys::fs::create_directories(entry.RPath,
2118bcb0991SDimitry Andric                                           /*IgnoreExisting=*/true)) {
2128bcb0991SDimitry Andric         if (StopOnError)
2138bcb0991SDimitry Andric           return EC;
2148bcb0991SDimitry Andric       }
2158bcb0991SDimitry Andric       continue;
2168bcb0991SDimitry Andric     }
2178bcb0991SDimitry Andric 
2188bcb0991SDimitry Andric     // Copy file over.
2198bcb0991SDimitry Andric     if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) {
2208bcb0991SDimitry Andric       if (StopOnError)
2218bcb0991SDimitry Andric         return EC;
2228bcb0991SDimitry Andric     }
2238bcb0991SDimitry Andric 
2248bcb0991SDimitry Andric     // Copy over permissions.
2258bcb0991SDimitry Andric     if (auto perms = sys::fs::getPermissions(entry.VPath)) {
2268bcb0991SDimitry Andric       if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) {
2278bcb0991SDimitry Andric         if (StopOnError)
2288bcb0991SDimitry Andric           return EC;
2298bcb0991SDimitry Andric       }
2308bcb0991SDimitry Andric     }
2318bcb0991SDimitry Andric 
2328bcb0991SDimitry Andric     // Copy over modification time.
2338bcb0991SDimitry Andric     copyAccessAndModificationTime(entry.RPath, Stat);
2348bcb0991SDimitry Andric   }
2358bcb0991SDimitry Andric   return {};
2368bcb0991SDimitry Andric }
2378bcb0991SDimitry Andric 
2385ffd83dbSDimitry Andric std::error_code FileCollector::writeMapping(StringRef MappingFile) {
2398bcb0991SDimitry Andric   std::lock_guard<std::mutex> lock(Mutex);
2408bcb0991SDimitry Andric 
2418bcb0991SDimitry Andric   VFSWriter.setOverlayDir(OverlayRoot);
2428bcb0991SDimitry Andric   VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot));
2438bcb0991SDimitry Andric   VFSWriter.setUseExternalNames(false);
2448bcb0991SDimitry Andric 
2458bcb0991SDimitry Andric   std::error_code EC;
246fe6060f1SDimitry Andric   raw_fd_ostream os(MappingFile, EC, sys::fs::OF_TextWithCRLF);
2478bcb0991SDimitry Andric   if (EC)
2488bcb0991SDimitry Andric     return EC;
2498bcb0991SDimitry Andric 
2508bcb0991SDimitry Andric   VFSWriter.write(os);
2518bcb0991SDimitry Andric 
2528bcb0991SDimitry Andric   return {};
2538bcb0991SDimitry Andric }
2548bcb0991SDimitry Andric 
2555ffd83dbSDimitry Andric namespace llvm {
2568bcb0991SDimitry Andric 
2578bcb0991SDimitry Andric class FileCollectorFileSystem : public vfs::FileSystem {
2588bcb0991SDimitry Andric public:
2598bcb0991SDimitry Andric   explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,
2608bcb0991SDimitry Andric                                    std::shared_ptr<FileCollector> Collector)
2618bcb0991SDimitry Andric       : FS(std::move(FS)), Collector(std::move(Collector)) {}
2628bcb0991SDimitry Andric 
2638bcb0991SDimitry Andric   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
2648bcb0991SDimitry Andric     auto Result = FS->status(Path);
2658bcb0991SDimitry Andric     if (Result && Result->exists())
2668bcb0991SDimitry Andric       Collector->addFile(Path);
2678bcb0991SDimitry Andric     return Result;
2688bcb0991SDimitry Andric   }
2698bcb0991SDimitry Andric 
2708bcb0991SDimitry Andric   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
2718bcb0991SDimitry Andric   openFileForRead(const Twine &Path) override {
2728bcb0991SDimitry Andric     auto Result = FS->openFileForRead(Path);
2738bcb0991SDimitry Andric     if (Result && *Result)
2748bcb0991SDimitry Andric       Collector->addFile(Path);
2758bcb0991SDimitry Andric     return Result;
2768bcb0991SDimitry Andric   }
2778bcb0991SDimitry Andric 
2788bcb0991SDimitry Andric   llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir,
2798bcb0991SDimitry Andric                                           std::error_code &EC) override {
2805ffd83dbSDimitry Andric     return Collector->addDirectoryImpl(Dir, FS, EC);
2818bcb0991SDimitry Andric   }
2828bcb0991SDimitry Andric 
2838bcb0991SDimitry Andric   std::error_code getRealPath(const Twine &Path,
284*0fca6ea1SDimitry Andric                               SmallVectorImpl<char> &Output) override {
2858bcb0991SDimitry Andric     auto EC = FS->getRealPath(Path, Output);
2868bcb0991SDimitry Andric     if (!EC) {
2878bcb0991SDimitry Andric       Collector->addFile(Path);
2888bcb0991SDimitry Andric       if (Output.size() > 0)
2898bcb0991SDimitry Andric         Collector->addFile(Output);
2908bcb0991SDimitry Andric     }
2918bcb0991SDimitry Andric     return EC;
2928bcb0991SDimitry Andric   }
2938bcb0991SDimitry Andric 
2948bcb0991SDimitry Andric   std::error_code isLocal(const Twine &Path, bool &Result) override {
2958bcb0991SDimitry Andric     return FS->isLocal(Path, Result);
2968bcb0991SDimitry Andric   }
2978bcb0991SDimitry Andric 
2988bcb0991SDimitry Andric   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
2998bcb0991SDimitry Andric     return FS->getCurrentWorkingDirectory();
3008bcb0991SDimitry Andric   }
3018bcb0991SDimitry Andric 
3028bcb0991SDimitry Andric   std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override {
3038bcb0991SDimitry Andric     return FS->setCurrentWorkingDirectory(Path);
3048bcb0991SDimitry Andric   }
3058bcb0991SDimitry Andric 
3068bcb0991SDimitry Andric private:
3078bcb0991SDimitry Andric   IntrusiveRefCntPtr<vfs::FileSystem> FS;
3088bcb0991SDimitry Andric   std::shared_ptr<FileCollector> Collector;
3098bcb0991SDimitry Andric };
3108bcb0991SDimitry Andric 
3115ffd83dbSDimitry Andric } // namespace llvm
3128bcb0991SDimitry Andric 
3138bcb0991SDimitry Andric IntrusiveRefCntPtr<vfs::FileSystem>
3148bcb0991SDimitry Andric FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
3158bcb0991SDimitry Andric                                   std::shared_ptr<FileCollector> Collector) {
3168bcb0991SDimitry Andric   return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector));
3178bcb0991SDimitry Andric }
318