1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/Module.h" 14 #include "clang/Basic/SourceManager.h" 15 #include "clang/Frontend/CompilerInvocation.h" 16 #include "clang/Frontend/Utils.h" 17 #include "clang/Lex/HeaderSearch.h" 18 #include "clang/Lex/PPCallbacks.h" 19 #include "clang/Serialization/ASTReader.h" 20 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/Hashing.h" 23 #include "llvm/ADT/StringSet.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <optional> 26 #include <string> 27 #include <unordered_map> 28 #include <variant> 29 30 namespace clang { 31 namespace tooling { 32 namespace dependencies { 33 34 class DependencyActionController; 35 class DependencyConsumer; 36 37 /// Modular dependency that has already been built prior to the dependency scan. 38 struct PrebuiltModuleDep { 39 std::string ModuleName; 40 std::string PCMFile; 41 std::string ModuleMapFile; 42 43 explicit PrebuiltModuleDep(const Module *M) 44 : ModuleName(M->getTopLevelModuleName()), 45 PCMFile(M->getASTFile()->getName()), 46 ModuleMapFile(M->PresumedModuleMapFile) {} 47 }; 48 49 /// This is used to identify a specific module. 50 struct ModuleID { 51 /// The name of the module. This may include `:` for C++20 module partitions, 52 /// or a header-name for C++20 header units. 53 std::string ModuleName; 54 55 /// The context hash of a module represents the compiler options that affect 56 /// the resulting command-line invocation. 57 /// 58 /// Modules with the same name and ContextHash but different invocations could 59 /// cause non-deterministic build results. 60 /// 61 /// Modules with the same name but a different \c ContextHash should be 62 /// treated as separate modules for the purpose of a build. 63 std::string ContextHash; 64 65 bool operator==(const ModuleID &Other) const { 66 return std::tie(ModuleName, ContextHash) == 67 std::tie(Other.ModuleName, Other.ContextHash); 68 } 69 70 bool operator<(const ModuleID& Other) const { 71 return std::tie(ModuleName, ContextHash) < 72 std::tie(Other.ModuleName, Other.ContextHash); 73 } 74 }; 75 76 /// P1689ModuleInfo - Represents the needed information of standard C++20 77 /// modules for P1689 format. 78 struct P1689ModuleInfo { 79 /// The name of the module. This may include `:` for partitions. 80 std::string ModuleName; 81 82 /// Optional. The source path to the module. 83 std::string SourcePath; 84 85 /// If this module is a standard c++ interface unit. 86 bool IsStdCXXModuleInterface = true; 87 88 enum class ModuleType { 89 NamedCXXModule 90 // To be supported 91 // AngleHeaderUnit, 92 // QuoteHeaderUnit 93 }; 94 ModuleType Type = ModuleType::NamedCXXModule; 95 }; 96 97 /// An output from a module compilation, such as the path of the module file. 98 enum class ModuleOutputKind { 99 /// The module file (.pcm). Required. 100 ModuleFile, 101 /// The path of the dependency file (.d), if any. 102 DependencyFile, 103 /// The null-separated list of names to use as the targets in the dependency 104 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. 105 DependencyTargets, 106 /// The path of the serialized diagnostic file (.dia), if any. 107 DiagnosticSerializationFile, 108 }; 109 110 struct ModuleDeps { 111 /// The identifier of the module. 112 ModuleID ID; 113 114 /// Whether this is a "system" module. 115 bool IsSystem; 116 117 /// The path to the modulemap file which defines this module. 118 /// 119 /// This can be used to explicitly build this module. This file will 120 /// additionally appear in \c FileDeps as a dependency. 121 std::string ClangModuleMapFile; 122 123 /// A collection of absolute paths to files that this module directly depends 124 /// on, not including transitive dependencies. 125 llvm::StringSet<> FileDeps; 126 127 /// A collection of absolute paths to module map files that this module needs 128 /// to know about. The ordering is significant. 129 std::vector<std::string> ModuleMapFileDeps; 130 131 /// A collection of prebuilt modular dependencies this module directly depends 132 /// on, not including transitive dependencies. 133 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 134 135 /// A list of module identifiers this module directly depends on, not 136 /// including transitive dependencies. 137 /// 138 /// This may include modules with a different context hash when it can be 139 /// determined that the differences are benign for this compilation. 140 std::vector<ModuleID> ClangModuleDeps; 141 142 /// The set of libraries or frameworks to link against when 143 /// an entity from this module is used. 144 llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries; 145 146 /// Get (or compute) the compiler invocation that can be used to build this 147 /// module. Does not include argv[0]. 148 const std::vector<std::string> &getBuildArguments(); 149 150 private: 151 friend class ModuleDepCollectorPP; 152 153 std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>> 154 BuildInfo; 155 }; 156 157 using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>; 158 159 class ModuleDepCollector; 160 161 /// Callback that records textual includes and direct modular includes/imports 162 /// during preprocessing. At the end of the main file, it also collects 163 /// transitive modular dependencies and passes everything to the 164 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 165 class ModuleDepCollectorPP final : public PPCallbacks { 166 public: 167 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 168 169 void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, 170 SrcMgr::CharacteristicKind FileType, FileID PrevFID, 171 SourceLocation Loc) override; 172 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 173 StringRef FileName, bool IsAngled, 174 CharSourceRange FilenameRange, 175 OptionalFileEntryRef File, StringRef SearchPath, 176 StringRef RelativePath, const Module *SuggestedModule, 177 bool ModuleImported, 178 SrcMgr::CharacteristicKind FileType) override; 179 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 180 const Module *Imported) override; 181 182 void EndOfMainFile() override; 183 184 private: 185 /// The parent dependency collector. 186 ModuleDepCollector &MDC; 187 188 void handleImport(const Module *Imported); 189 190 /// Adds direct modular dependencies that have already been built to the 191 /// ModuleDeps instance. 192 void 193 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 194 llvm::DenseSet<const Module *> &SeenSubmodules); 195 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 196 llvm::DenseSet<const Module *> &SeenSubmodules); 197 198 /// Traverses the previously collected direct modular dependencies to discover 199 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 200 /// with both. 201 /// Returns the ID or nothing if the dependency is spurious and is ignored. 202 std::optional<ModuleID> handleTopLevelModule(const Module *M); 203 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 204 llvm::DenseSet<const Module *> &AddedModules); 205 void addModuleDep(const Module *M, ModuleDeps &MD, 206 llvm::DenseSet<const Module *> &AddedModules); 207 208 /// Traverses the affecting modules and updates \c MD with references to the 209 /// parent \c ModuleDepCollector info. 210 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD, 211 llvm::DenseSet<const Module *> &AddedModules); 212 void addAffectingClangModule(const Module *M, ModuleDeps &MD, 213 llvm::DenseSet<const Module *> &AddedModules); 214 }; 215 216 /// Collects modular and non-modular dependencies of the main file by attaching 217 /// \c ModuleDepCollectorPP to the preprocessor. 218 class ModuleDepCollector final : public DependencyCollector { 219 public: 220 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 221 CompilerInstance &ScanInstance, DependencyConsumer &C, 222 DependencyActionController &Controller, 223 CompilerInvocation OriginalCI, 224 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap, 225 ScanningOptimizations OptimizeArgs, bool EagerLoadModules, 226 bool IsStdModuleP1689Format); 227 228 void attachToPreprocessor(Preprocessor &PP) override; 229 void attachToASTReader(ASTReader &R) override; 230 231 /// Apply any changes implied by the discovered dependencies to the given 232 /// invocation, (e.g. disable implicit modules, add explicit module paths). 233 void applyDiscoveredDependencies(CompilerInvocation &CI); 234 235 private: 236 friend ModuleDepCollectorPP; 237 238 /// The compiler instance for scanning the current translation unit. 239 CompilerInstance &ScanInstance; 240 /// The consumer of collected dependency information. 241 DependencyConsumer &Consumer; 242 /// Callbacks for computing dependency information. 243 DependencyActionController &Controller; 244 /// Mapping from prebuilt AST files to their sorted list of VFS overlay files. 245 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap; 246 /// Path to the main source file. 247 std::string MainFile; 248 /// Hash identifying the compilation conditions of the current TU. 249 std::string ContextHash; 250 /// Non-modular file dependencies. This includes the main source file and 251 /// textually included header files. 252 std::vector<std::string> FileDeps; 253 /// Direct and transitive modular dependencies of the main source file. 254 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; 255 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without 256 /// a preprocessor. Storage owned by \c ModularDeps. 257 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID; 258 /// Direct modular dependencies that have already been built. 259 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps; 260 /// Working set of direct modular dependencies. 261 llvm::SetVector<const Module *> DirectModularDeps; 262 /// Options that control the dependency output generation. 263 std::unique_ptr<DependencyOutputOptions> Opts; 264 /// A Clang invocation that's based on the original TU invocation and that has 265 /// been partially transformed into one that can perform explicit build of 266 /// a discovered modular dependency. Note that this still needs to be adjusted 267 /// for each individual module. 268 CowCompilerInvocation CommonInvocation; 269 /// Whether to optimize the modules' command-line arguments. 270 ScanningOptimizations OptimizeArgs; 271 /// Whether to set up command-lines to load PCM files eagerly. 272 bool EagerLoadModules; 273 /// If we're generating dependency output in P1689 format 274 /// for standard C++ modules. 275 bool IsStdModuleP1689Format; 276 277 std::optional<P1689ModuleInfo> ProvidedStdCXXModule; 278 std::vector<P1689ModuleInfo> RequiredStdCXXModules; 279 280 /// Checks whether the module is known as being prebuilt. 281 bool isPrebuiltModule(const Module *M); 282 283 /// Adds \p Path to \c FileDeps, making it absolute if necessary. 284 void addFileDep(StringRef Path); 285 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary. 286 void addFileDep(ModuleDeps &MD, StringRef Path); 287 288 /// Get a Clang invocation adjusted to build the given modular dependency. 289 /// This excludes paths that are yet-to-be-provided by the build system. 290 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs( 291 const ModuleDeps &Deps, 292 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const; 293 294 /// Collect module map files for given modules. 295 llvm::DenseSet<const FileEntry *> 296 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const; 297 298 /// Add module map files to the invocation, if needed. 299 void addModuleMapFiles(CompilerInvocation &CI, 300 ArrayRef<ModuleID> ClangModuleDeps) const; 301 /// Add module files (pcm) to the invocation, if needed. 302 void addModuleFiles(CompilerInvocation &CI, 303 ArrayRef<ModuleID> ClangModuleDeps) const; 304 void addModuleFiles(CowCompilerInvocation &CI, 305 ArrayRef<ModuleID> ClangModuleDeps) const; 306 307 /// Add paths that require looking up outputs to the given dependencies. 308 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps); 309 310 /// Compute the context hash for \p Deps, and create the mapping 311 /// \c ModuleDepsByID[Deps.ID] = &Deps. 312 void associateWithContextHash(const CowCompilerInvocation &CI, 313 ModuleDeps &Deps); 314 }; 315 316 /// Resets codegen options that don't affect modules/PCH. 317 void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, 318 const LangOptions &LangOpts, 319 CodeGenOptions &CGOpts); 320 321 } // end namespace dependencies 322 } // end namespace tooling 323 } // end namespace clang 324 325 namespace llvm { 326 inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) { 327 return hash_combine(ID.ModuleName, ID.ContextHash); 328 } 329 330 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> { 331 using ModuleID = clang::tooling::dependencies::ModuleID; 332 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; } 333 static inline ModuleID getTombstoneKey() { 334 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash 335 } 336 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); } 337 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) { 338 return LHS == RHS; 339 } 340 }; 341 } // namespace llvm 342 343 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 344