1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/Module.h" 14 #include "clang/Basic/SourceManager.h" 15 #include "clang/Frontend/CompilerInvocation.h" 16 #include "clang/Frontend/Utils.h" 17 #include "clang/Lex/HeaderSearch.h" 18 #include "clang/Lex/PPCallbacks.h" 19 #include "clang/Serialization/ASTReader.h" 20 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/Hashing.h" 23 #include "llvm/ADT/StringSet.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <optional> 26 #include <string> 27 #include <unordered_map> 28 #include <variant> 29 30 namespace clang { 31 namespace tooling { 32 namespace dependencies { 33 34 class DependencyActionController; 35 class DependencyConsumer; 36 37 /// Modular dependency that has already been built prior to the dependency scan. 38 struct PrebuiltModuleDep { 39 std::string ModuleName; 40 std::string PCMFile; 41 std::string ModuleMapFile; 42 43 explicit PrebuiltModuleDep(const Module *M) 44 : ModuleName(M->getTopLevelModuleName()), 45 PCMFile(M->getASTFile()->getName()), 46 ModuleMapFile(M->PresumedModuleMapFile) {} 47 }; 48 49 /// This is used to identify a specific module. 50 struct ModuleID { 51 /// The name of the module. This may include `:` for C++20 module partitions, 52 /// or a header-name for C++20 header units. 53 std::string ModuleName; 54 55 /// The context hash of a module represents the compiler options that affect 56 /// the resulting command-line invocation. 57 /// 58 /// Modules with the same name and ContextHash but different invocations could 59 /// cause non-deterministic build results. 60 /// 61 /// Modules with the same name but a different \c ContextHash should be 62 /// treated as separate modules for the purpose of a build. 63 std::string ContextHash; 64 65 bool operator==(const ModuleID &Other) const { 66 return std::tie(ModuleName, ContextHash) == 67 std::tie(Other.ModuleName, Other.ContextHash); 68 } 69 70 bool operator<(const ModuleID& Other) const { 71 return std::tie(ModuleName, ContextHash) < 72 std::tie(Other.ModuleName, Other.ContextHash); 73 } 74 }; 75 76 /// P1689ModuleInfo - Represents the needed information of standard C++20 77 /// modules for P1689 format. 78 struct P1689ModuleInfo { 79 /// The name of the module. This may include `:` for partitions. 80 std::string ModuleName; 81 82 /// Optional. The source path to the module. 83 std::string SourcePath; 84 85 /// If this module is a standard c++ interface unit. 86 bool IsStdCXXModuleInterface = true; 87 88 enum class ModuleType { 89 NamedCXXModule 90 // To be supported 91 // AngleHeaderUnit, 92 // QuoteHeaderUnit 93 }; 94 ModuleType Type = ModuleType::NamedCXXModule; 95 }; 96 97 /// An output from a module compilation, such as the path of the module file. 98 enum class ModuleOutputKind { 99 /// The module file (.pcm). Required. 100 ModuleFile, 101 /// The path of the dependency file (.d), if any. 102 DependencyFile, 103 /// The null-separated list of names to use as the targets in the dependency 104 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. 105 DependencyTargets, 106 /// The path of the serialized diagnostic file (.dia), if any. 107 DiagnosticSerializationFile, 108 }; 109 110 struct ModuleDeps { 111 /// The identifier of the module. 112 ModuleID ID; 113 114 /// Whether this is a "system" module. 115 bool IsSystem; 116 117 /// The path to the modulemap file which defines this module. 118 /// 119 /// This can be used to explicitly build this module. This file will 120 /// additionally appear in \c FileDeps as a dependency. 121 std::string ClangModuleMapFile; 122 123 /// A collection of absolute paths to module map files that this module needs 124 /// to know about. The ordering is significant. 125 std::vector<std::string> ModuleMapFileDeps; 126 127 /// A collection of prebuilt modular dependencies this module directly depends 128 /// on, not including transitive dependencies. 129 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 130 131 /// A list of module identifiers this module directly depends on, not 132 /// including transitive dependencies. 133 /// 134 /// This may include modules with a different context hash when it can be 135 /// determined that the differences are benign for this compilation. 136 std::vector<ModuleID> ClangModuleDeps; 137 138 /// The set of libraries or frameworks to link against when 139 /// an entity from this module is used. 140 llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries; 141 142 /// Invokes \c Cb for all file dependencies of this module. Each provided 143 /// \c StringRef is only valid within the individual callback invocation. 144 void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const; 145 146 /// Get (or compute) the compiler invocation that can be used to build this 147 /// module. Does not include argv[0]. 148 const std::vector<std::string> &getBuildArguments(); 149 150 private: 151 friend class ModuleDepCollector; 152 friend class ModuleDepCollectorPP; 153 154 /// The base directory for relative paths in \c FileDeps. 155 std::string FileDepsBaseDir; 156 157 /// A collection of paths to files that this module directly depends on, not 158 /// including transitive dependencies. 159 std::vector<std::string> FileDeps; 160 161 std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>> 162 BuildInfo; 163 }; 164 165 using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>; 166 167 class ModuleDepCollector; 168 169 /// Callback that records textual includes and direct modular includes/imports 170 /// during preprocessing. At the end of the main file, it also collects 171 /// transitive modular dependencies and passes everything to the 172 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 173 class ModuleDepCollectorPP final : public PPCallbacks { 174 public: 175 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 176 177 void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, 178 SrcMgr::CharacteristicKind FileType, FileID PrevFID, 179 SourceLocation Loc) override; 180 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 181 StringRef FileName, bool IsAngled, 182 CharSourceRange FilenameRange, 183 OptionalFileEntryRef File, StringRef SearchPath, 184 StringRef RelativePath, const Module *SuggestedModule, 185 bool ModuleImported, 186 SrcMgr::CharacteristicKind FileType) override; 187 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 188 const Module *Imported) override; 189 190 void EndOfMainFile() override; 191 192 private: 193 /// The parent dependency collector. 194 ModuleDepCollector &MDC; 195 196 void handleImport(const Module *Imported); 197 198 /// Adds direct modular dependencies that have already been built to the 199 /// ModuleDeps instance. 200 void 201 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 202 llvm::DenseSet<const Module *> &SeenSubmodules); 203 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 204 llvm::DenseSet<const Module *> &SeenSubmodules); 205 206 /// Traverses the previously collected direct modular dependencies to discover 207 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 208 /// with both. 209 /// Returns the ID or nothing if the dependency is spurious and is ignored. 210 std::optional<ModuleID> handleTopLevelModule(const Module *M); 211 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 212 llvm::DenseSet<const Module *> &AddedModules); 213 void addModuleDep(const Module *M, ModuleDeps &MD, 214 llvm::DenseSet<const Module *> &AddedModules); 215 216 /// Traverses the affecting modules and updates \c MD with references to the 217 /// parent \c ModuleDepCollector info. 218 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD, 219 llvm::DenseSet<const Module *> &AddedModules); 220 void addAffectingClangModule(const Module *M, ModuleDeps &MD, 221 llvm::DenseSet<const Module *> &AddedModules); 222 }; 223 224 /// Collects modular and non-modular dependencies of the main file by attaching 225 /// \c ModuleDepCollectorPP to the preprocessor. 226 class ModuleDepCollector final : public DependencyCollector { 227 public: 228 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 229 CompilerInstance &ScanInstance, DependencyConsumer &C, 230 DependencyActionController &Controller, 231 CompilerInvocation OriginalCI, 232 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap, 233 ScanningOptimizations OptimizeArgs, bool EagerLoadModules, 234 bool IsStdModuleP1689Format); 235 236 void attachToPreprocessor(Preprocessor &PP) override; 237 void attachToASTReader(ASTReader &R) override; 238 239 /// Apply any changes implied by the discovered dependencies to the given 240 /// invocation, (e.g. disable implicit modules, add explicit module paths). 241 void applyDiscoveredDependencies(CompilerInvocation &CI); 242 243 private: 244 friend ModuleDepCollectorPP; 245 246 /// The compiler instance for scanning the current translation unit. 247 CompilerInstance &ScanInstance; 248 /// The consumer of collected dependency information. 249 DependencyConsumer &Consumer; 250 /// Callbacks for computing dependency information. 251 DependencyActionController &Controller; 252 /// Mapping from prebuilt AST files to their sorted list of VFS overlay files. 253 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap; 254 /// Path to the main source file. 255 std::string MainFile; 256 /// Hash identifying the compilation conditions of the current TU. 257 std::string ContextHash; 258 /// Non-modular file dependencies. This includes the main source file and 259 /// textually included header files. 260 std::vector<std::string> FileDeps; 261 /// Direct and transitive modular dependencies of the main source file. 262 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; 263 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without 264 /// a preprocessor. Storage owned by \c ModularDeps. 265 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID; 266 /// Direct modular dependencies that have already been built. 267 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps; 268 /// Working set of direct modular dependencies. 269 llvm::SetVector<const Module *> DirectModularDeps; 270 /// Options that control the dependency output generation. 271 std::unique_ptr<DependencyOutputOptions> Opts; 272 /// A Clang invocation that's based on the original TU invocation and that has 273 /// been partially transformed into one that can perform explicit build of 274 /// a discovered modular dependency. Note that this still needs to be adjusted 275 /// for each individual module. 276 CowCompilerInvocation CommonInvocation; 277 /// Whether to optimize the modules' command-line arguments. 278 ScanningOptimizations OptimizeArgs; 279 /// Whether to set up command-lines to load PCM files eagerly. 280 bool EagerLoadModules; 281 /// If we're generating dependency output in P1689 format 282 /// for standard C++ modules. 283 bool IsStdModuleP1689Format; 284 285 std::optional<P1689ModuleInfo> ProvidedStdCXXModule; 286 std::vector<P1689ModuleInfo> RequiredStdCXXModules; 287 288 /// Checks whether the module is known as being prebuilt. 289 bool isPrebuiltModule(const Module *M); 290 291 /// Adds \p Path to \c FileDeps, making it absolute if necessary. 292 void addFileDep(StringRef Path); 293 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary. 294 void addFileDep(ModuleDeps &MD, StringRef Path); 295 296 /// Get a Clang invocation adjusted to build the given modular dependency. 297 /// This excludes paths that are yet-to-be-provided by the build system. 298 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs( 299 const ModuleDeps &Deps, 300 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const; 301 302 /// Collect module map files for given modules. 303 llvm::DenseSet<const FileEntry *> 304 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const; 305 306 /// Add module map files to the invocation, if needed. 307 void addModuleMapFiles(CompilerInvocation &CI, 308 ArrayRef<ModuleID> ClangModuleDeps) const; 309 /// Add module files (pcm) to the invocation, if needed. 310 void addModuleFiles(CompilerInvocation &CI, 311 ArrayRef<ModuleID> ClangModuleDeps) const; 312 void addModuleFiles(CowCompilerInvocation &CI, 313 ArrayRef<ModuleID> ClangModuleDeps) const; 314 315 /// Add paths that require looking up outputs to the given dependencies. 316 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps); 317 318 /// Compute the context hash for \p Deps, and create the mapping 319 /// \c ModuleDepsByID[Deps.ID] = &Deps. 320 void associateWithContextHash(const CowCompilerInvocation &CI, 321 ModuleDeps &Deps); 322 }; 323 324 /// Resets codegen options that don't affect modules/PCH. 325 void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, 326 const LangOptions &LangOpts, 327 CodeGenOptions &CGOpts); 328 329 } // end namespace dependencies 330 } // end namespace tooling 331 } // end namespace clang 332 333 namespace llvm { 334 inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) { 335 return hash_combine(ID.ModuleName, ID.ContextHash); 336 } 337 338 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> { 339 using ModuleID = clang::tooling::dependencies::ModuleID; 340 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; } 341 static inline ModuleID getTombstoneKey() { 342 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash 343 } 344 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); } 345 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) { 346 return LHS == RHS; 347 } 348 }; 349 } // namespace llvm 350 351 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 352