1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Frontend/CompilerInvocation.h" 15 #include "clang/Frontend/Utils.h" 16 #include "clang/Lex/HeaderSearch.h" 17 #include "clang/Lex/PPCallbacks.h" 18 #include "clang/Serialization/ASTReader.h" 19 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/Hashing.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include <optional> 25 #include <string> 26 #include <unordered_map> 27 #include <variant> 28 29 namespace clang { 30 namespace tooling { 31 namespace dependencies { 32 33 class DependencyActionController; 34 class DependencyConsumer; 35 36 /// Modular dependency that has already been built prior to the dependency scan. 37 struct PrebuiltModuleDep { 38 std::string ModuleName; 39 std::string PCMFile; 40 std::string ModuleMapFile; 41 42 explicit PrebuiltModuleDep(const Module *M) 43 : ModuleName(M->getTopLevelModuleName()), 44 PCMFile(M->getASTFile()->getName()), 45 ModuleMapFile(M->PresumedModuleMapFile) {} 46 }; 47 48 /// This is used to identify a specific module. 49 struct ModuleID { 50 /// The name of the module. This may include `:` for C++20 module partitions, 51 /// or a header-name for C++20 header units. 52 std::string ModuleName; 53 54 /// The context hash of a module represents the compiler options that affect 55 /// the resulting command-line invocation. 56 /// 57 /// Modules with the same name and ContextHash but different invocations could 58 /// cause non-deterministic build results. 59 /// 60 /// Modules with the same name but a different \c ContextHash should be 61 /// treated as separate modules for the purpose of a build. 62 std::string ContextHash; 63 64 bool operator==(const ModuleID &Other) const { 65 return std::tie(ModuleName, ContextHash) == 66 std::tie(Other.ModuleName, Other.ContextHash); 67 } 68 69 bool operator<(const ModuleID& Other) const { 70 return std::tie(ModuleName, ContextHash) < 71 std::tie(Other.ModuleName, Other.ContextHash); 72 } 73 }; 74 75 /// P1689ModuleInfo - Represents the needed information of standard C++20 76 /// modules for P1689 format. 77 struct P1689ModuleInfo { 78 /// The name of the module. This may include `:` for partitions. 79 std::string ModuleName; 80 81 /// Optional. The source path to the module. 82 std::string SourcePath; 83 84 /// If this module is a standard c++ interface unit. 85 bool IsStdCXXModuleInterface = true; 86 87 enum class ModuleType { 88 NamedCXXModule 89 // To be supported 90 // AngleHeaderUnit, 91 // QuoteHeaderUnit 92 }; 93 ModuleType Type = ModuleType::NamedCXXModule; 94 }; 95 96 /// An output from a module compilation, such as the path of the module file. 97 enum class ModuleOutputKind { 98 /// The module file (.pcm). Required. 99 ModuleFile, 100 /// The path of the dependency file (.d), if any. 101 DependencyFile, 102 /// The null-separated list of names to use as the targets in the dependency 103 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. 104 DependencyTargets, 105 /// The path of the serialized diagnostic file (.dia), if any. 106 DiagnosticSerializationFile, 107 }; 108 109 struct ModuleDeps { 110 /// The identifier of the module. 111 ModuleID ID; 112 113 /// Whether this is a "system" module. 114 bool IsSystem; 115 116 /// The path to the modulemap file which defines this module. 117 /// 118 /// This can be used to explicitly build this module. This file will 119 /// additionally appear in \c FileDeps as a dependency. 120 std::string ClangModuleMapFile; 121 122 /// A collection of absolute paths to files that this module directly depends 123 /// on, not including transitive dependencies. 124 llvm::StringSet<> FileDeps; 125 126 /// A collection of absolute paths to module map files that this module needs 127 /// to know about. The ordering is significant. 128 std::vector<std::string> ModuleMapFileDeps; 129 130 /// A collection of prebuilt modular dependencies this module directly depends 131 /// on, not including transitive dependencies. 132 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 133 134 /// A list of module identifiers this module directly depends on, not 135 /// including transitive dependencies. 136 /// 137 /// This may include modules with a different context hash when it can be 138 /// determined that the differences are benign for this compilation. 139 std::vector<ModuleID> ClangModuleDeps; 140 141 /// Get (or compute) the compiler invocation that can be used to build this 142 /// module. Does not include argv[0]. 143 const std::vector<std::string> &getBuildArguments(); 144 145 private: 146 friend class ModuleDepCollectorPP; 147 148 std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>> 149 BuildInfo; 150 }; 151 152 class ModuleDepCollector; 153 154 /// Callback that records textual includes and direct modular includes/imports 155 /// during preprocessing. At the end of the main file, it also collects 156 /// transitive modular dependencies and passes everything to the 157 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 158 class ModuleDepCollectorPP final : public PPCallbacks { 159 public: 160 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 161 162 void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, 163 SrcMgr::CharacteristicKind FileType, FileID PrevFID, 164 SourceLocation Loc) override; 165 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 166 StringRef FileName, bool IsAngled, 167 CharSourceRange FilenameRange, 168 OptionalFileEntryRef File, StringRef SearchPath, 169 StringRef RelativePath, const Module *Imported, 170 SrcMgr::CharacteristicKind FileType) override; 171 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 172 const Module *Imported) override; 173 174 void EndOfMainFile() override; 175 176 private: 177 /// The parent dependency collector. 178 ModuleDepCollector &MDC; 179 180 void handleImport(const Module *Imported); 181 182 /// Adds direct modular dependencies that have already been built to the 183 /// ModuleDeps instance. 184 void 185 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 186 llvm::DenseSet<const Module *> &SeenSubmodules); 187 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 188 llvm::DenseSet<const Module *> &SeenSubmodules); 189 190 /// Traverses the previously collected direct modular dependencies to discover 191 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 192 /// with both. 193 /// Returns the ID or nothing if the dependency is spurious and is ignored. 194 std::optional<ModuleID> handleTopLevelModule(const Module *M); 195 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 196 llvm::DenseSet<const Module *> &AddedModules); 197 void addModuleDep(const Module *M, ModuleDeps &MD, 198 llvm::DenseSet<const Module *> &AddedModules); 199 200 /// Traverses the affecting modules and updates \c MD with references to the 201 /// parent \c ModuleDepCollector info. 202 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD, 203 llvm::DenseSet<const Module *> &AddedModules); 204 void addAffectingClangModule(const Module *M, ModuleDeps &MD, 205 llvm::DenseSet<const Module *> &AddedModules); 206 }; 207 208 /// Collects modular and non-modular dependencies of the main file by attaching 209 /// \c ModuleDepCollectorPP to the preprocessor. 210 class ModuleDepCollector final : public DependencyCollector { 211 public: 212 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 213 CompilerInstance &ScanInstance, DependencyConsumer &C, 214 DependencyActionController &Controller, 215 CompilerInvocation OriginalCI, 216 ScanningOptimizations OptimizeArgs, bool EagerLoadModules, 217 bool IsStdModuleP1689Format); 218 219 void attachToPreprocessor(Preprocessor &PP) override; 220 void attachToASTReader(ASTReader &R) override; 221 222 /// Apply any changes implied by the discovered dependencies to the given 223 /// invocation, (e.g. disable implicit modules, add explicit module paths). 224 void applyDiscoveredDependencies(CompilerInvocation &CI); 225 226 private: 227 friend ModuleDepCollectorPP; 228 229 /// The compiler instance for scanning the current translation unit. 230 CompilerInstance &ScanInstance; 231 /// The consumer of collected dependency information. 232 DependencyConsumer &Consumer; 233 /// Callbacks for computing dependency information. 234 DependencyActionController &Controller; 235 /// Path to the main source file. 236 std::string MainFile; 237 /// Hash identifying the compilation conditions of the current TU. 238 std::string ContextHash; 239 /// Non-modular file dependencies. This includes the main source file and 240 /// textually included header files. 241 std::vector<std::string> FileDeps; 242 /// Direct and transitive modular dependencies of the main source file. 243 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; 244 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without 245 /// a preprocessor. Storage owned by \c ModularDeps. 246 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID; 247 /// Direct modular dependencies that have already been built. 248 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps; 249 /// Working set of direct modular dependencies. 250 llvm::SetVector<const Module *> DirectModularDeps; 251 /// Options that control the dependency output generation. 252 std::unique_ptr<DependencyOutputOptions> Opts; 253 /// A Clang invocation that's based on the original TU invocation and that has 254 /// been partially transformed into one that can perform explicit build of 255 /// a discovered modular dependency. Note that this still needs to be adjusted 256 /// for each individual module. 257 CowCompilerInvocation CommonInvocation; 258 /// Whether to optimize the modules' command-line arguments. 259 ScanningOptimizations OptimizeArgs; 260 /// Whether to set up command-lines to load PCM files eagerly. 261 bool EagerLoadModules; 262 /// If we're generating dependency output in P1689 format 263 /// for standard C++ modules. 264 bool IsStdModuleP1689Format; 265 266 std::optional<P1689ModuleInfo> ProvidedStdCXXModule; 267 std::vector<P1689ModuleInfo> RequiredStdCXXModules; 268 269 /// Checks whether the module is known as being prebuilt. 270 bool isPrebuiltModule(const Module *M); 271 272 /// Adds \p Path to \c FileDeps, making it absolute if necessary. 273 void addFileDep(StringRef Path); 274 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary. 275 void addFileDep(ModuleDeps &MD, StringRef Path); 276 277 /// Get a Clang invocation adjusted to build the given modular dependency. 278 /// This excludes paths that are yet-to-be-provided by the build system. 279 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs( 280 const ModuleDeps &Deps, 281 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const; 282 283 /// Collect module map files for given modules. 284 llvm::DenseSet<const FileEntry *> 285 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const; 286 287 /// Add module map files to the invocation, if needed. 288 void addModuleMapFiles(CompilerInvocation &CI, 289 ArrayRef<ModuleID> ClangModuleDeps) const; 290 /// Add module files (pcm) to the invocation, if needed. 291 void addModuleFiles(CompilerInvocation &CI, 292 ArrayRef<ModuleID> ClangModuleDeps) const; 293 void addModuleFiles(CowCompilerInvocation &CI, 294 ArrayRef<ModuleID> ClangModuleDeps) const; 295 296 /// Add paths that require looking up outputs to the given dependencies. 297 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps); 298 299 /// Compute the context hash for \p Deps, and create the mapping 300 /// \c ModuleDepsByID[Deps.ID] = &Deps. 301 void associateWithContextHash(const CowCompilerInvocation &CI, 302 ModuleDeps &Deps); 303 }; 304 305 } // end namespace dependencies 306 } // end namespace tooling 307 } // end namespace clang 308 309 namespace llvm { 310 inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) { 311 return hash_combine(ID.ModuleName, ID.ContextHash); 312 } 313 314 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> { 315 using ModuleID = clang::tooling::dependencies::ModuleID; 316 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; } 317 static inline ModuleID getTombstoneKey() { 318 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash 319 } 320 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); } 321 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) { 322 return LHS == RHS; 323 } 324 }; 325 } // namespace llvm 326 327 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 328