xref: /llvm-project/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h (revision 9d4837f47c48c634d4a0ac799188e1f5332495ef)
1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/Module.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Frontend/CompilerInvocation.h"
16 #include "clang/Frontend/Utils.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/PPCallbacks.h"
19 #include "clang/Serialization/ASTReader.h"
20 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/Hashing.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <optional>
26 #include <string>
27 #include <unordered_map>
28 #include <variant>
29 
30 namespace clang {
31 namespace tooling {
32 namespace dependencies {
33 
34 class DependencyActionController;
35 class DependencyConsumer;
36 
37 /// Modular dependency that has already been built prior to the dependency scan.
38 struct PrebuiltModuleDep {
39   std::string ModuleName;
40   std::string PCMFile;
41   std::string ModuleMapFile;
42 
43   explicit PrebuiltModuleDep(const Module *M)
44       : ModuleName(M->getTopLevelModuleName()),
45         PCMFile(M->getASTFile()->getName()),
46         ModuleMapFile(M->PresumedModuleMapFile) {}
47 };
48 
49 /// This is used to identify a specific module.
50 struct ModuleID {
51   /// The name of the module. This may include `:` for C++20 module partitions,
52   /// or a header-name for C++20 header units.
53   std::string ModuleName;
54 
55   /// The context hash of a module represents the compiler options that affect
56   /// the resulting command-line invocation.
57   ///
58   /// Modules with the same name and ContextHash but different invocations could
59   /// cause non-deterministic build results.
60   ///
61   /// Modules with the same name but a different \c ContextHash should be
62   /// treated as separate modules for the purpose of a build.
63   std::string ContextHash;
64 
65   bool operator==(const ModuleID &Other) const {
66     return std::tie(ModuleName, ContextHash) ==
67            std::tie(Other.ModuleName, Other.ContextHash);
68   }
69 
70   bool operator<(const ModuleID& Other) const {
71     return std::tie(ModuleName, ContextHash) <
72            std::tie(Other.ModuleName, Other.ContextHash);
73   }
74 };
75 
76 /// P1689ModuleInfo - Represents the needed information of standard C++20
77 /// modules for P1689 format.
78 struct P1689ModuleInfo {
79   /// The name of the module. This may include `:` for partitions.
80   std::string ModuleName;
81 
82   /// Optional. The source path to the module.
83   std::string SourcePath;
84 
85   /// If this module is a standard c++ interface unit.
86   bool IsStdCXXModuleInterface = true;
87 
88   enum class ModuleType {
89     NamedCXXModule
90     // To be supported
91     // AngleHeaderUnit,
92     // QuoteHeaderUnit
93   };
94   ModuleType Type = ModuleType::NamedCXXModule;
95 };
96 
97 /// An output from a module compilation, such as the path of the module file.
98 enum class ModuleOutputKind {
99   /// The module file (.pcm). Required.
100   ModuleFile,
101   /// The path of the dependency file (.d), if any.
102   DependencyFile,
103   /// The null-separated list of names to use as the targets in the dependency
104   /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
105   DependencyTargets,
106   /// The path of the serialized diagnostic file (.dia), if any.
107   DiagnosticSerializationFile,
108 };
109 
110 struct ModuleDeps {
111   /// The identifier of the module.
112   ModuleID ID;
113 
114   /// Whether this is a "system" module.
115   bool IsSystem;
116 
117   /// The path to the modulemap file which defines this module.
118   ///
119   /// This can be used to explicitly build this module. This file will
120   /// additionally appear in \c FileDeps as a dependency.
121   std::string ClangModuleMapFile;
122 
123   /// A collection of absolute paths to module map files that this module needs
124   /// to know about. The ordering is significant.
125   std::vector<std::string> ModuleMapFileDeps;
126 
127   /// A collection of prebuilt modular dependencies this module directly depends
128   /// on, not including transitive dependencies.
129   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
130 
131   /// A list of module identifiers this module directly depends on, not
132   /// including transitive dependencies.
133   ///
134   /// This may include modules with a different context hash when it can be
135   /// determined that the differences are benign for this compilation.
136   std::vector<ModuleID> ClangModuleDeps;
137 
138   /// The set of libraries or frameworks to link against when
139   /// an entity from this module is used.
140   llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries;
141 
142   /// Invokes \c Cb for all file dependencies of this module. Each provided
143   /// \c StringRef is only valid within the individual callback invocation.
144   void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
145 
146   /// Get (or compute) the compiler invocation that can be used to build this
147   /// module. Does not include argv[0].
148   const std::vector<std::string> &getBuildArguments();
149 
150 private:
151   friend class ModuleDepCollector;
152   friend class ModuleDepCollectorPP;
153 
154   /// The base directory for relative paths in \c FileDeps.
155   std::string FileDepsBaseDir;
156 
157   /// A collection of paths to files that this module directly depends on, not
158   /// including transitive dependencies.
159   std::vector<std::string> FileDeps;
160 
161   std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
162       BuildInfo;
163 };
164 
165 using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>;
166 
167 class ModuleDepCollector;
168 
169 /// Callback that records textual includes and direct modular includes/imports
170 /// during preprocessing. At the end of the main file, it also collects
171 /// transitive modular dependencies and passes everything to the
172 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
173 class ModuleDepCollectorPP final : public PPCallbacks {
174 public:
175   ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
176 
177   void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
178                         SrcMgr::CharacteristicKind FileType, FileID PrevFID,
179                         SourceLocation Loc) override;
180   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
181                           StringRef FileName, bool IsAngled,
182                           CharSourceRange FilenameRange,
183                           OptionalFileEntryRef File, StringRef SearchPath,
184                           StringRef RelativePath, const Module *SuggestedModule,
185                           bool ModuleImported,
186                           SrcMgr::CharacteristicKind FileType) override;
187   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
188                     const Module *Imported) override;
189 
190   void EndOfMainFile() override;
191 
192 private:
193   /// The parent dependency collector.
194   ModuleDepCollector &MDC;
195 
196   void handleImport(const Module *Imported);
197 
198   /// Adds direct modular dependencies that have already been built to the
199   /// ModuleDeps instance.
200   void
201   addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
202                               llvm::DenseSet<const Module *> &SeenSubmodules);
203   void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
204                              llvm::DenseSet<const Module *> &SeenSubmodules);
205 
206   /// Traverses the previously collected direct modular dependencies to discover
207   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
208   /// with both.
209   /// Returns the ID or nothing if the dependency is spurious and is ignored.
210   std::optional<ModuleID> handleTopLevelModule(const Module *M);
211   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
212                            llvm::DenseSet<const Module *> &AddedModules);
213   void addModuleDep(const Module *M, ModuleDeps &MD,
214                     llvm::DenseSet<const Module *> &AddedModules);
215 
216   /// Traverses the affecting modules and updates \c MD with references to the
217   /// parent \c ModuleDepCollector info.
218   void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
219                               llvm::DenseSet<const Module *> &AddedModules);
220   void addAffectingClangModule(const Module *M, ModuleDeps &MD,
221                           llvm::DenseSet<const Module *> &AddedModules);
222 };
223 
224 /// Collects modular and non-modular dependencies of the main file by attaching
225 /// \c ModuleDepCollectorPP to the preprocessor.
226 class ModuleDepCollector final : public DependencyCollector {
227 public:
228   ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
229                      CompilerInstance &ScanInstance, DependencyConsumer &C,
230                      DependencyActionController &Controller,
231                      CompilerInvocation OriginalCI,
232                      PrebuiltModuleVFSMapT PrebuiltModuleVFSMap,
233                      ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
234                      bool IsStdModuleP1689Format);
235 
236   void attachToPreprocessor(Preprocessor &PP) override;
237   void attachToASTReader(ASTReader &R) override;
238 
239   /// Apply any changes implied by the discovered dependencies to the given
240   /// invocation, (e.g. disable implicit modules, add explicit module paths).
241   void applyDiscoveredDependencies(CompilerInvocation &CI);
242 
243 private:
244   friend ModuleDepCollectorPP;
245 
246   /// The compiler instance for scanning the current translation unit.
247   CompilerInstance &ScanInstance;
248   /// The consumer of collected dependency information.
249   DependencyConsumer &Consumer;
250   /// Callbacks for computing dependency information.
251   DependencyActionController &Controller;
252   /// Mapping from prebuilt AST files to their sorted list of VFS overlay files.
253   PrebuiltModuleVFSMapT PrebuiltModuleVFSMap;
254   /// Path to the main source file.
255   std::string MainFile;
256   /// Hash identifying the compilation conditions of the current TU.
257   std::string ContextHash;
258   /// Non-modular file dependencies. This includes the main source file and
259   /// textually included header files.
260   std::vector<std::string> FileDeps;
261   /// Direct and transitive modular dependencies of the main source file.
262   llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
263   /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
264   /// a preprocessor. Storage owned by \c ModularDeps.
265   llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
266   /// Direct modular dependencies that have already been built.
267   llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
268   /// Working set of direct modular dependencies.
269   llvm::SetVector<const Module *> DirectModularDeps;
270   /// Options that control the dependency output generation.
271   std::unique_ptr<DependencyOutputOptions> Opts;
272   /// A Clang invocation that's based on the original TU invocation and that has
273   /// been partially transformed into one that can perform explicit build of
274   /// a discovered modular dependency. Note that this still needs to be adjusted
275   /// for each individual module.
276   CowCompilerInvocation CommonInvocation;
277   /// Whether to optimize the modules' command-line arguments.
278   ScanningOptimizations OptimizeArgs;
279   /// Whether to set up command-lines to load PCM files eagerly.
280   bool EagerLoadModules;
281   /// If we're generating dependency output in P1689 format
282   /// for standard C++ modules.
283   bool IsStdModuleP1689Format;
284 
285   std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
286   std::vector<P1689ModuleInfo> RequiredStdCXXModules;
287 
288   /// Checks whether the module is known as being prebuilt.
289   bool isPrebuiltModule(const Module *M);
290 
291   /// Adds \p Path to \c FileDeps, making it absolute if necessary.
292   void addFileDep(StringRef Path);
293   /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
294   void addFileDep(ModuleDeps &MD, StringRef Path);
295 
296   /// Get a Clang invocation adjusted to build the given modular dependency.
297   /// This excludes paths that are yet-to-be-provided by the build system.
298   CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
299       const ModuleDeps &Deps,
300       llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
301 
302   /// Collect module map files for given modules.
303   llvm::DenseSet<const FileEntry *>
304   collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
305 
306   /// Add module map files to the invocation, if needed.
307   void addModuleMapFiles(CompilerInvocation &CI,
308                          ArrayRef<ModuleID> ClangModuleDeps) const;
309   /// Add module files (pcm) to the invocation, if needed.
310   void addModuleFiles(CompilerInvocation &CI,
311                       ArrayRef<ModuleID> ClangModuleDeps) const;
312   void addModuleFiles(CowCompilerInvocation &CI,
313                       ArrayRef<ModuleID> ClangModuleDeps) const;
314 
315   /// Add paths that require looking up outputs to the given dependencies.
316   void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
317 
318   /// Compute the context hash for \p Deps, and create the mapping
319   /// \c ModuleDepsByID[Deps.ID] = &Deps.
320   void associateWithContextHash(const CowCompilerInvocation &CI,
321                                 ModuleDeps &Deps);
322 };
323 
324 /// Resets codegen options that don't affect modules/PCH.
325 void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction,
326                                const LangOptions &LangOpts,
327                                CodeGenOptions &CGOpts);
328 
329 } // end namespace dependencies
330 } // end namespace tooling
331 } // end namespace clang
332 
333 namespace llvm {
334 inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) {
335   return hash_combine(ID.ModuleName, ID.ContextHash);
336 }
337 
338 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
339   using ModuleID = clang::tooling::dependencies::ModuleID;
340   static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
341   static inline ModuleID getTombstoneKey() {
342     return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
343   }
344   static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
345   static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
346     return LHS == RHS;
347   }
348 };
349 } // namespace llvm
350 
351 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
352