1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Frontend/CompilerInvocation.h" 15 #include "clang/Frontend/Utils.h" 16 #include "clang/Lex/HeaderSearch.h" 17 #include "clang/Lex/PPCallbacks.h" 18 #include "clang/Serialization/ASTReader.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <string> 23 #include <unordered_map> 24 25 namespace clang { 26 namespace tooling { 27 namespace dependencies { 28 29 class DependencyConsumer; 30 31 /// Modular dependency that has already been built prior to the dependency scan. 32 struct PrebuiltModuleDep { 33 std::string ModuleName; 34 std::string PCMFile; 35 std::string ModuleMapFile; 36 37 explicit PrebuiltModuleDep(const Module *M) 38 : ModuleName(M->getTopLevelModuleName()), 39 PCMFile(M->getASTFile()->getName()), 40 ModuleMapFile(M->PresumedModuleMapFile) {} 41 }; 42 43 /// This is used to identify a specific module. 44 struct ModuleID { 45 /// The name of the module. This may include `:` for C++20 module partitions, 46 /// or a header-name for C++20 header units. 47 std::string ModuleName; 48 49 /// The context hash of a module represents the set of compiler options that 50 /// may make one version of a module incompatible with another. This includes 51 /// things like language mode, predefined macros, header search paths, etc... 52 /// 53 /// Modules with the same name but a different \c ContextHash should be 54 /// treated as separate modules for the purpose of a build. 55 std::string ContextHash; 56 57 bool operator==(const ModuleID &Other) const { 58 return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash; 59 } 60 }; 61 62 struct ModuleIDHasher { 63 std::size_t operator()(const ModuleID &MID) const { 64 return llvm::hash_combine(MID.ModuleName, MID.ContextHash); 65 } 66 }; 67 68 struct ModuleDeps { 69 /// The identifier of the module. 70 ModuleID ID; 71 72 /// Whether this is a "system" module. 73 bool IsSystem; 74 75 /// The path to the modulemap file which defines this module. 76 /// 77 /// This can be used to explicitly build this module. This file will 78 /// additionally appear in \c FileDeps as a dependency. 79 std::string ClangModuleMapFile; 80 81 /// The path to where an implicit build would put the PCM for this module. 82 std::string ImplicitModulePCMPath; 83 84 /// A collection of absolute paths to files that this module directly depends 85 /// on, not including transitive dependencies. 86 llvm::StringSet<> FileDeps; 87 88 /// A collection of prebuilt modular dependencies this module directly depends 89 /// on, not including transitive dependencies. 90 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 91 92 /// A list of module identifiers this module directly depends on, not 93 /// including transitive dependencies. 94 /// 95 /// This may include modules with a different context hash when it can be 96 /// determined that the differences are benign for this compilation. 97 std::vector<ModuleID> ClangModuleDeps; 98 99 // Used to track which modules that were discovered were directly imported by 100 // the primary TU. 101 bool ImportedByMainFile = false; 102 103 /// Compiler invocation that can be used to build this module (without paths). 104 CompilerInvocation BuildInvocation; 105 106 /// Gets the canonical command line suitable for passing to clang. 107 /// 108 /// \param LookupPCMPath This function is called to fill in "-fmodule-file=" 109 /// arguments and the "-o" argument. It needs to return 110 /// a path for where the PCM for the given module is to 111 /// be located. 112 /// \param LookupModuleDeps This function is called to collect the full 113 /// transitive set of dependencies for this 114 /// compilation and fill in "-fmodule-map-file=" 115 /// arguments. 116 std::vector<std::string> getCanonicalCommandLine( 117 std::function<StringRef(ModuleID)> LookupPCMPath, 118 std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps) const; 119 120 /// Gets the canonical command line suitable for passing to clang, excluding 121 /// arguments containing modules-related paths: "-fmodule-file=", "-o", 122 /// "-fmodule-map-file=". 123 std::vector<std::string> getCanonicalCommandLineWithoutModulePaths() const; 124 }; 125 126 namespace detail { 127 /// Collect the paths of PCM and module map files for the modules in \c Modules 128 /// transitively. 129 void collectPCMAndModuleMapPaths( 130 llvm::ArrayRef<ModuleID> Modules, 131 std::function<StringRef(ModuleID)> LookupPCMPath, 132 std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps, 133 std::vector<std::string> &PCMPaths, std::vector<std::string> &ModMapPaths); 134 } // namespace detail 135 136 class ModuleDepCollector; 137 138 /// Callback that records textual includes and direct modular includes/imports 139 /// during preprocessing. At the end of the main file, it also collects 140 /// transitive modular dependencies and passes everything to the 141 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 142 class ModuleDepCollectorPP final : public PPCallbacks { 143 public: 144 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 145 146 void FileChanged(SourceLocation Loc, FileChangeReason Reason, 147 SrcMgr::CharacteristicKind FileType, 148 FileID PrevFID) override; 149 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 150 StringRef FileName, bool IsAngled, 151 CharSourceRange FilenameRange, const FileEntry *File, 152 StringRef SearchPath, StringRef RelativePath, 153 const Module *Imported, 154 SrcMgr::CharacteristicKind FileType) override; 155 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 156 const Module *Imported) override; 157 158 void EndOfMainFile() override; 159 160 private: 161 /// The parent dependency collector. 162 ModuleDepCollector &MDC; 163 /// Working set of direct modular dependencies. 164 llvm::DenseSet<const Module *> DirectModularDeps; 165 /// Working set of direct modular dependencies that have already been built. 166 llvm::DenseSet<const Module *> DirectPrebuiltModularDeps; 167 168 void handleImport(const Module *Imported); 169 170 /// Adds direct modular dependencies that have already been built to the 171 /// ModuleDeps instance. 172 void 173 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 174 llvm::DenseSet<const Module *> &SeenSubmodules); 175 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 176 llvm::DenseSet<const Module *> &SeenSubmodules); 177 178 /// Traverses the previously collected direct modular dependencies to discover 179 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 180 /// with both. 181 ModuleID handleTopLevelModule(const Module *M); 182 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 183 llvm::DenseSet<const Module *> &AddedModules); 184 void addModuleDep(const Module *M, ModuleDeps &MD, 185 llvm::DenseSet<const Module *> &AddedModules); 186 }; 187 188 /// Collects modular and non-modular dependencies of the main file by attaching 189 /// \c ModuleDepCollectorPP to the preprocessor. 190 class ModuleDepCollector final : public DependencyCollector { 191 public: 192 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 193 CompilerInstance &ScanInstance, DependencyConsumer &C, 194 CompilerInvocation &&OriginalCI, bool OptimizeArgs); 195 196 void attachToPreprocessor(Preprocessor &PP) override; 197 void attachToASTReader(ASTReader &R) override; 198 199 private: 200 friend ModuleDepCollectorPP; 201 202 /// The compiler instance for scanning the current translation unit. 203 CompilerInstance &ScanInstance; 204 /// The consumer of collected dependency information. 205 DependencyConsumer &Consumer; 206 /// Path to the main source file. 207 std::string MainFile; 208 /// Hash identifying the compilation conditions of the current TU. 209 std::string ContextHash; 210 /// Non-modular file dependencies. This includes the main source file and 211 /// textually included header files. 212 std::vector<std::string> FileDeps; 213 /// Direct and transitive modular dependencies of the main source file. 214 std::unordered_map<const Module *, ModuleDeps> ModularDeps; 215 /// Options that control the dependency output generation. 216 std::unique_ptr<DependencyOutputOptions> Opts; 217 /// The original Clang invocation passed to dependency scanner. 218 CompilerInvocation OriginalInvocation; 219 /// Whether to optimize the modules' command-line arguments. 220 bool OptimizeArgs; 221 222 /// Checks whether the module is known as being prebuilt. 223 bool isPrebuiltModule(const Module *M); 224 225 /// Constructs a CompilerInvocation that can be used to build the given 226 /// module, excluding paths to discovered modular dependencies that are yet to 227 /// be built. 228 CompilerInvocation makeInvocationForModuleBuildWithoutPaths( 229 const ModuleDeps &Deps, 230 llvm::function_ref<void(CompilerInvocation &)> Optimize) const; 231 }; 232 233 } // end namespace dependencies 234 } // end namespace tooling 235 } // end namespace clang 236 237 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 238