xref: /freebsd-src/contrib/llvm-project/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h (revision aa1a8ff2d6dbc51ef058f46f3db5a8bb77967145)
1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Frontend/CompilerInvocation.h"
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Lex/PPCallbacks.h"
18 #include "clang/Serialization/ASTReader.h"
19 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/Hashing.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <optional>
25 #include <string>
26 #include <unordered_map>
27 #include <variant>
28 
29 namespace clang {
30 namespace tooling {
31 namespace dependencies {
32 
33 class DependencyActionController;
34 class DependencyConsumer;
35 
36 /// Modular dependency that has already been built prior to the dependency scan.
37 struct PrebuiltModuleDep {
38   std::string ModuleName;
39   std::string PCMFile;
40   std::string ModuleMapFile;
41 
42   explicit PrebuiltModuleDep(const Module *M)
43       : ModuleName(M->getTopLevelModuleName()),
44         PCMFile(M->getASTFile()->getName()),
45         ModuleMapFile(M->PresumedModuleMapFile) {}
46 };
47 
48 /// This is used to identify a specific module.
49 struct ModuleID {
50   /// The name of the module. This may include `:` for C++20 module partitions,
51   /// or a header-name for C++20 header units.
52   std::string ModuleName;
53 
54   /// The context hash of a module represents the compiler options that affect
55   /// the resulting command-line invocation.
56   ///
57   /// Modules with the same name and ContextHash but different invocations could
58   /// cause non-deterministic build results.
59   ///
60   /// Modules with the same name but a different \c ContextHash should be
61   /// treated as separate modules for the purpose of a build.
62   std::string ContextHash;
63 
64   bool operator==(const ModuleID &Other) const {
65     return std::tie(ModuleName, ContextHash) ==
66            std::tie(Other.ModuleName, Other.ContextHash);
67   }
68 
69   bool operator<(const ModuleID& Other) const {
70     return std::tie(ModuleName, ContextHash) <
71            std::tie(Other.ModuleName, Other.ContextHash);
72   }
73 };
74 
75 /// P1689ModuleInfo - Represents the needed information of standard C++20
76 /// modules for P1689 format.
77 struct P1689ModuleInfo {
78   /// The name of the module. This may include `:` for partitions.
79   std::string ModuleName;
80 
81   /// Optional. The source path to the module.
82   std::string SourcePath;
83 
84   /// If this module is a standard c++ interface unit.
85   bool IsStdCXXModuleInterface = true;
86 
87   enum class ModuleType {
88     NamedCXXModule
89     // To be supported
90     // AngleHeaderUnit,
91     // QuoteHeaderUnit
92   };
93   ModuleType Type = ModuleType::NamedCXXModule;
94 };
95 
96 /// An output from a module compilation, such as the path of the module file.
97 enum class ModuleOutputKind {
98   /// The module file (.pcm). Required.
99   ModuleFile,
100   /// The path of the dependency file (.d), if any.
101   DependencyFile,
102   /// The null-separated list of names to use as the targets in the dependency
103   /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
104   DependencyTargets,
105   /// The path of the serialized diagnostic file (.dia), if any.
106   DiagnosticSerializationFile,
107 };
108 
109 struct ModuleDeps {
110   /// The identifier of the module.
111   ModuleID ID;
112 
113   /// Whether this is a "system" module.
114   bool IsSystem;
115 
116   /// The path to the modulemap file which defines this module.
117   ///
118   /// This can be used to explicitly build this module. This file will
119   /// additionally appear in \c FileDeps as a dependency.
120   std::string ClangModuleMapFile;
121 
122   /// A collection of absolute paths to files that this module directly depends
123   /// on, not including transitive dependencies.
124   llvm::StringSet<> FileDeps;
125 
126   /// A collection of absolute paths to module map files that this module needs
127   /// to know about. The ordering is significant.
128   std::vector<std::string> ModuleMapFileDeps;
129 
130   /// A collection of prebuilt modular dependencies this module directly depends
131   /// on, not including transitive dependencies.
132   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
133 
134   /// A list of module identifiers this module directly depends on, not
135   /// including transitive dependencies.
136   ///
137   /// This may include modules with a different context hash when it can be
138   /// determined that the differences are benign for this compilation.
139   std::vector<ModuleID> ClangModuleDeps;
140 
141   /// Get (or compute) the compiler invocation that can be used to build this
142   /// module. Does not include argv[0].
143   const std::vector<std::string> &getBuildArguments();
144 
145 private:
146   friend class ModuleDepCollectorPP;
147 
148   std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
149       BuildInfo;
150 };
151 
152 class ModuleDepCollector;
153 
154 /// Callback that records textual includes and direct modular includes/imports
155 /// during preprocessing. At the end of the main file, it also collects
156 /// transitive modular dependencies and passes everything to the
157 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
158 class ModuleDepCollectorPP final : public PPCallbacks {
159 public:
160   ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
161 
162   void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
163                         SrcMgr::CharacteristicKind FileType, FileID PrevFID,
164                         SourceLocation Loc) override;
165   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
166                           StringRef FileName, bool IsAngled,
167                           CharSourceRange FilenameRange,
168                           OptionalFileEntryRef File, StringRef SearchPath,
169                           StringRef RelativePath, const Module *Imported,
170                           SrcMgr::CharacteristicKind FileType) override;
171   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
172                     const Module *Imported) override;
173 
174   void EndOfMainFile() override;
175 
176 private:
177   /// The parent dependency collector.
178   ModuleDepCollector &MDC;
179 
180   void handleImport(const Module *Imported);
181 
182   /// Adds direct modular dependencies that have already been built to the
183   /// ModuleDeps instance.
184   void
185   addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
186                               llvm::DenseSet<const Module *> &SeenSubmodules);
187   void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
188                              llvm::DenseSet<const Module *> &SeenSubmodules);
189 
190   /// Traverses the previously collected direct modular dependencies to discover
191   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
192   /// with both.
193   /// Returns the ID or nothing if the dependency is spurious and is ignored.
194   std::optional<ModuleID> handleTopLevelModule(const Module *M);
195   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
196                            llvm::DenseSet<const Module *> &AddedModules);
197   void addModuleDep(const Module *M, ModuleDeps &MD,
198                     llvm::DenseSet<const Module *> &AddedModules);
199 
200   /// Traverses the affecting modules and updates \c MD with references to the
201   /// parent \c ModuleDepCollector info.
202   void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
203                               llvm::DenseSet<const Module *> &AddedModules);
204   void addAffectingClangModule(const Module *M, ModuleDeps &MD,
205                           llvm::DenseSet<const Module *> &AddedModules);
206 };
207 
208 /// Collects modular and non-modular dependencies of the main file by attaching
209 /// \c ModuleDepCollectorPP to the preprocessor.
210 class ModuleDepCollector final : public DependencyCollector {
211 public:
212   ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
213                      CompilerInstance &ScanInstance, DependencyConsumer &C,
214                      DependencyActionController &Controller,
215                      CompilerInvocation OriginalCI,
216                      ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
217                      bool IsStdModuleP1689Format);
218 
219   void attachToPreprocessor(Preprocessor &PP) override;
220   void attachToASTReader(ASTReader &R) override;
221 
222   /// Apply any changes implied by the discovered dependencies to the given
223   /// invocation, (e.g. disable implicit modules, add explicit module paths).
224   void applyDiscoveredDependencies(CompilerInvocation &CI);
225 
226 private:
227   friend ModuleDepCollectorPP;
228 
229   /// The compiler instance for scanning the current translation unit.
230   CompilerInstance &ScanInstance;
231   /// The consumer of collected dependency information.
232   DependencyConsumer &Consumer;
233   /// Callbacks for computing dependency information.
234   DependencyActionController &Controller;
235   /// Path to the main source file.
236   std::string MainFile;
237   /// Hash identifying the compilation conditions of the current TU.
238   std::string ContextHash;
239   /// Non-modular file dependencies. This includes the main source file and
240   /// textually included header files.
241   std::vector<std::string> FileDeps;
242   /// Direct and transitive modular dependencies of the main source file.
243   llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
244   /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
245   /// a preprocessor. Storage owned by \c ModularDeps.
246   llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
247   /// Direct modular dependencies that have already been built.
248   llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
249   /// Working set of direct modular dependencies.
250   llvm::SetVector<const Module *> DirectModularDeps;
251   /// Options that control the dependency output generation.
252   std::unique_ptr<DependencyOutputOptions> Opts;
253   /// A Clang invocation that's based on the original TU invocation and that has
254   /// been partially transformed into one that can perform explicit build of
255   /// a discovered modular dependency. Note that this still needs to be adjusted
256   /// for each individual module.
257   CowCompilerInvocation CommonInvocation;
258   /// Whether to optimize the modules' command-line arguments.
259   ScanningOptimizations OptimizeArgs;
260   /// Whether to set up command-lines to load PCM files eagerly.
261   bool EagerLoadModules;
262   /// If we're generating dependency output in P1689 format
263   /// for standard C++ modules.
264   bool IsStdModuleP1689Format;
265 
266   std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
267   std::vector<P1689ModuleInfo> RequiredStdCXXModules;
268 
269   /// Checks whether the module is known as being prebuilt.
270   bool isPrebuiltModule(const Module *M);
271 
272   /// Adds \p Path to \c FileDeps, making it absolute if necessary.
273   void addFileDep(StringRef Path);
274   /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
275   void addFileDep(ModuleDeps &MD, StringRef Path);
276 
277   /// Get a Clang invocation adjusted to build the given modular dependency.
278   /// This excludes paths that are yet-to-be-provided by the build system.
279   CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
280       const ModuleDeps &Deps,
281       llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
282 
283   /// Collect module map files for given modules.
284   llvm::DenseSet<const FileEntry *>
285   collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
286 
287   /// Add module map files to the invocation, if needed.
288   void addModuleMapFiles(CompilerInvocation &CI,
289                          ArrayRef<ModuleID> ClangModuleDeps) const;
290   /// Add module files (pcm) to the invocation, if needed.
291   void addModuleFiles(CompilerInvocation &CI,
292                       ArrayRef<ModuleID> ClangModuleDeps) const;
293   void addModuleFiles(CowCompilerInvocation &CI,
294                       ArrayRef<ModuleID> ClangModuleDeps) const;
295 
296   /// Add paths that require looking up outputs to the given dependencies.
297   void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
298 
299   /// Compute the context hash for \p Deps, and create the mapping
300   /// \c ModuleDepsByID[Deps.ID] = &Deps.
301   void associateWithContextHash(const CowCompilerInvocation &CI,
302                                 ModuleDeps &Deps);
303 };
304 
305 } // end namespace dependencies
306 } // end namespace tooling
307 } // end namespace clang
308 
309 namespace llvm {
310 inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) {
311   return hash_combine(ID.ModuleName, ID.ContextHash);
312 }
313 
314 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
315   using ModuleID = clang::tooling::dependencies::ModuleID;
316   static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
317   static inline ModuleID getTombstoneKey() {
318     return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
319   }
320   static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
321   static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
322     return LHS == RHS;
323   }
324 };
325 } // namespace llvm
326 
327 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
328