xref: /llvm-project/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp (revision 9d4837f47c48c634d4a0ac799188e1f5332495ef)
1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Basic/MakeSupport.h"
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Lex/Preprocessor.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/Support/BLAKE3.h"
17 #include "llvm/Support/StringSaver.h"
18 #include <optional>
19 
20 using namespace clang;
21 using namespace tooling;
22 using namespace dependencies;
23 
24 void ModuleDeps::forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const {
25   SmallString<0> PathBuf;
26   PathBuf.reserve(256);
27   for (StringRef FileDep : FileDeps) {
28     auto ResolvedFileDep =
29         ASTReader::ResolveImportedPath(PathBuf, FileDep, FileDepsBaseDir);
30     Cb(*ResolvedFileDep);
31   }
32 }
33 
34 const std::vector<std::string> &ModuleDeps::getBuildArguments() {
35   assert(!std::holds_alternative<std::monostate>(BuildInfo) &&
36          "Using uninitialized ModuleDeps");
37   if (const auto *CI = std::get_if<CowCompilerInvocation>(&BuildInfo))
38     BuildInfo = CI->getCC1CommandLine();
39   return std::get<std::vector<std::string>>(BuildInfo);
40 }
41 
42 static void
43 optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, ASTReader &Reader,
44                          const serialization::ModuleFile &MF,
45                          const PrebuiltModuleVFSMapT &PrebuiltModuleVFSMap,
46                          ScanningOptimizations OptimizeArgs) {
47   if (any(OptimizeArgs & ScanningOptimizations::HeaderSearch)) {
48     // Only preserve search paths that were used during the dependency scan.
49     std::vector<HeaderSearchOptions::Entry> Entries;
50     std::swap(Opts.UserEntries, Entries);
51 
52     llvm::BitVector SearchPathUsage(Entries.size());
53     llvm::DenseSet<const serialization::ModuleFile *> Visited;
54     std::function<void(const serialization::ModuleFile *)> VisitMF =
55         [&](const serialization::ModuleFile *MF) {
56           SearchPathUsage |= MF->SearchPathUsage;
57           Visited.insert(MF);
58           for (const serialization::ModuleFile *Import : MF->Imports)
59             if (!Visited.contains(Import))
60               VisitMF(Import);
61         };
62     VisitMF(&MF);
63 
64     if (SearchPathUsage.size() != Entries.size())
65       llvm::report_fatal_error(
66           "Inconsistent search path options between modules detected");
67 
68     for (auto Idx : SearchPathUsage.set_bits())
69       Opts.UserEntries.push_back(std::move(Entries[Idx]));
70   }
71   if (any(OptimizeArgs & ScanningOptimizations::VFS)) {
72     std::vector<std::string> VFSOverlayFiles;
73     std::swap(Opts.VFSOverlayFiles, VFSOverlayFiles);
74 
75     llvm::BitVector VFSUsage(VFSOverlayFiles.size());
76     llvm::DenseSet<const serialization::ModuleFile *> Visited;
77     std::function<void(const serialization::ModuleFile *)> VisitMF =
78         [&](const serialization::ModuleFile *MF) {
79           Visited.insert(MF);
80           if (MF->Kind == serialization::MK_ImplicitModule) {
81             VFSUsage |= MF->VFSUsage;
82             // We only need to recurse into implicit modules. Other module types
83             // will have the correct set of VFSs for anything they depend on.
84             for (const serialization::ModuleFile *Import : MF->Imports)
85               if (!Visited.contains(Import))
86                 VisitMF(Import);
87           } else {
88             // This is not an implicitly built module, so it may have different
89             // VFS options. Fall back to a string comparison instead.
90             auto VFSMap = PrebuiltModuleVFSMap.find(MF->FileName);
91             if (VFSMap == PrebuiltModuleVFSMap.end())
92               return;
93             for (std::size_t I = 0, E = VFSOverlayFiles.size(); I != E; ++I) {
94               if (VFSMap->second.contains(VFSOverlayFiles[I]))
95                 VFSUsage[I] = true;
96             }
97           }
98         };
99     VisitMF(&MF);
100 
101     if (VFSUsage.size() != VFSOverlayFiles.size())
102       llvm::report_fatal_error(
103           "Inconsistent -ivfsoverlay options between modules detected");
104 
105     for (auto Idx : VFSUsage.set_bits())
106       Opts.VFSOverlayFiles.push_back(std::move(VFSOverlayFiles[Idx]));
107   }
108 }
109 
110 static void optimizeDiagnosticOpts(DiagnosticOptions &Opts,
111                                    bool IsSystemModule) {
112   // If this is not a system module or -Wsystem-headers was passed, don't
113   // optimize.
114   if (!IsSystemModule)
115     return;
116   bool Wsystem_headers = false;
117   for (StringRef Opt : Opts.Warnings) {
118     bool isPositive = !Opt.consume_front("no-");
119     if (Opt == "system-headers")
120       Wsystem_headers = isPositive;
121   }
122   if (Wsystem_headers)
123     return;
124 
125   // Remove all warning flags. System modules suppress most, but not all,
126   // warnings.
127   Opts.Warnings.clear();
128   Opts.UndefPrefixes.clear();
129   Opts.Remarks.clear();
130 }
131 
132 static std::vector<std::string> splitString(std::string S, char Separator) {
133   SmallVector<StringRef> Segments;
134   StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
135   std::vector<std::string> Result;
136   Result.reserve(Segments.size());
137   for (StringRef Segment : Segments)
138     Result.push_back(Segment.str());
139   return Result;
140 }
141 
142 void ModuleDepCollector::addOutputPaths(CowCompilerInvocation &CI,
143                                         ModuleDeps &Deps) {
144   CI.getMutFrontendOpts().OutputFile =
145       Controller.lookupModuleOutput(Deps.ID, ModuleOutputKind::ModuleFile);
146   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
147     CI.getMutDiagnosticOpts().DiagnosticSerializationFile =
148         Controller.lookupModuleOutput(
149             Deps.ID, ModuleOutputKind::DiagnosticSerializationFile);
150   if (!CI.getDependencyOutputOpts().OutputFile.empty()) {
151     CI.getMutDependencyOutputOpts().OutputFile = Controller.lookupModuleOutput(
152         Deps.ID, ModuleOutputKind::DependencyFile);
153     CI.getMutDependencyOutputOpts().Targets =
154         splitString(Controller.lookupModuleOutput(
155                         Deps.ID, ModuleOutputKind::DependencyTargets),
156                     '\0');
157     if (!CI.getDependencyOutputOpts().OutputFile.empty() &&
158         CI.getDependencyOutputOpts().Targets.empty()) {
159       // Fallback to -o as dependency target, as in the driver.
160       SmallString<128> Target;
161       quoteMakeTarget(CI.getFrontendOpts().OutputFile, Target);
162       CI.getMutDependencyOutputOpts().Targets.push_back(std::string(Target));
163     }
164   }
165 }
166 
167 void dependencies::resetBenignCodeGenOptions(frontend::ActionKind ProgramAction,
168                                              const LangOptions &LangOpts,
169                                              CodeGenOptions &CGOpts) {
170   // TODO: Figure out better way to set options to their default value.
171   if (ProgramAction == frontend::GenerateModule) {
172     CGOpts.MainFileName.clear();
173     CGOpts.DwarfDebugFlags.clear();
174   }
175   if (ProgramAction == frontend::GeneratePCH ||
176       (ProgramAction == frontend::GenerateModule && !LangOpts.ModulesCodegen)) {
177     CGOpts.DebugCompilationDir.clear();
178     CGOpts.CoverageCompilationDir.clear();
179     CGOpts.CoverageDataFile.clear();
180     CGOpts.CoverageNotesFile.clear();
181     CGOpts.ProfileInstrumentUsePath.clear();
182     CGOpts.SampleProfileFile.clear();
183     CGOpts.ProfileRemappingFile.clear();
184   }
185 }
186 
187 static CowCompilerInvocation
188 makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
189   CI.resetNonModularOptions();
190   CI.clearImplicitModuleBuildOptions();
191 
192   // The scanner takes care to avoid passing non-affecting module maps to the
193   // explicit compiles. No need to do extra work just to find out there are no
194   // module map files to prune.
195   CI.getHeaderSearchOpts().ModulesPruneNonAffectingModuleMaps = false;
196 
197   // Remove options incompatible with explicit module build or are likely to
198   // differ between identical modules discovered from different translation
199   // units.
200   CI.getFrontendOpts().Inputs.clear();
201   CI.getFrontendOpts().OutputFile.clear();
202   // LLVM options are not going to affect the AST
203   CI.getFrontendOpts().LLVMArgs.clear();
204 
205   resetBenignCodeGenOptions(frontend::GenerateModule, CI.getLangOpts(),
206                             CI.getCodeGenOpts());
207 
208   // Map output paths that affect behaviour to "-" so their existence is in the
209   // context hash. The final path will be computed in addOutputPaths.
210   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
211     CI.getDiagnosticOpts().DiagnosticSerializationFile = "-";
212   if (!CI.getDependencyOutputOpts().OutputFile.empty())
213     CI.getDependencyOutputOpts().OutputFile = "-";
214   CI.getDependencyOutputOpts().Targets.clear();
215 
216   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
217   CI.getFrontendOpts().ARCMTAction = FrontendOptions::ARCMT_None;
218   CI.getFrontendOpts().ObjCMTAction = FrontendOptions::ObjCMT_None;
219   CI.getFrontendOpts().MTMigrateDir.clear();
220   CI.getLangOpts().ModuleName.clear();
221 
222   // Remove any macro definitions that are explicitly ignored.
223   if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
224     llvm::erase_if(
225         CI.getPreprocessorOpts().Macros,
226         [&CI](const std::pair<std::string, bool> &Def) {
227           StringRef MacroDef = Def.first;
228           return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
229               llvm::CachedHashString(MacroDef.split('=').first));
230         });
231     // Remove the now unused option.
232     CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
233   }
234 
235   return CI;
236 }
237 
238 CowCompilerInvocation
239 ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs(
240     const ModuleDeps &Deps,
241     llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const {
242   CowCompilerInvocation CI = CommonInvocation;
243 
244   CI.getMutLangOpts().ModuleName = Deps.ID.ModuleName;
245   CI.getMutFrontendOpts().IsSystemModule = Deps.IsSystem;
246 
247   // Inputs
248   InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(),
249                                InputKind::Format::ModuleMap);
250   CI.getMutFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile,
251                                               ModuleMapInputKind);
252 
253   auto CurrentModuleMapEntry =
254       ScanInstance.getFileManager().getOptionalFileRef(Deps.ClangModuleMapFile);
255   assert(CurrentModuleMapEntry && "module map file entry not found");
256 
257   // Remove directly passed modulemap files. They will get added back if they
258   // were actually used.
259   CI.getMutFrontendOpts().ModuleMapFiles.clear();
260 
261   auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps);
262   for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) {
263     // TODO: Track these as `FileEntryRef` to simplify the equality check below.
264     auto ModuleMapEntry =
265         ScanInstance.getFileManager().getOptionalFileRef(ModuleMapFile);
266     assert(ModuleMapEntry && "module map file entry not found");
267 
268     // Don't report module maps describing eagerly-loaded dependency. This
269     // information will be deserialized from the PCM.
270     // TODO: Verify this works fine when modulemap for module A is eagerly
271     // loaded from A.pcm, and module map passed on the command line contains
272     // definition of a submodule: "explicit module A.Private { ... }".
273     if (EagerLoadModules && DepModuleMapFiles.contains(*ModuleMapEntry))
274       continue;
275 
276     // Don't report module map file of the current module unless it also
277     // describes a dependency (for symmetry).
278     if (*ModuleMapEntry == *CurrentModuleMapEntry &&
279         !DepModuleMapFiles.contains(*ModuleMapEntry))
280       continue;
281 
282     CI.getMutFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
283   }
284 
285   // Report the prebuilt modules this module uses.
286   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
287     CI.getMutFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
288 
289   // Add module file inputs from dependencies.
290   addModuleFiles(CI, Deps.ClangModuleDeps);
291 
292   if (!CI.getDiagnosticOpts().SystemHeaderWarningsModules.empty()) {
293     // Apply -Wsystem-headers-in-module for the current module.
294     if (llvm::is_contained(CI.getDiagnosticOpts().SystemHeaderWarningsModules,
295                            Deps.ID.ModuleName))
296       CI.getMutDiagnosticOpts().Warnings.push_back("system-headers");
297     // Remove the now unused option(s).
298     CI.getMutDiagnosticOpts().SystemHeaderWarningsModules.clear();
299   }
300 
301   Optimize(CI);
302 
303   return CI;
304 }
305 
306 llvm::DenseSet<const FileEntry *> ModuleDepCollector::collectModuleMapFiles(
307     ArrayRef<ModuleID> ClangModuleDeps) const {
308   llvm::DenseSet<const FileEntry *> ModuleMapFiles;
309   for (const ModuleID &MID : ClangModuleDeps) {
310     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
311     assert(MD && "Inconsistent dependency info");
312     // TODO: Track ClangModuleMapFile as `FileEntryRef`.
313     auto FE = ScanInstance.getFileManager().getOptionalFileRef(
314         MD->ClangModuleMapFile);
315     assert(FE && "Missing module map file that was previously found");
316     ModuleMapFiles.insert(*FE);
317   }
318   return ModuleMapFiles;
319 }
320 
321 void ModuleDepCollector::addModuleMapFiles(
322     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
323   if (EagerLoadModules)
324     return; // Only pcm is needed for eager load.
325 
326   for (const ModuleID &MID : ClangModuleDeps) {
327     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
328     assert(MD && "Inconsistent dependency info");
329     CI.getFrontendOpts().ModuleMapFiles.push_back(MD->ClangModuleMapFile);
330   }
331 }
332 
333 void ModuleDepCollector::addModuleFiles(
334     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
335   for (const ModuleID &MID : ClangModuleDeps) {
336     std::string PCMPath =
337         Controller.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
338     if (EagerLoadModules)
339       CI.getFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
340     else
341       CI.getHeaderSearchOpts().PrebuiltModuleFiles.insert(
342           {MID.ModuleName, std::move(PCMPath)});
343   }
344 }
345 
346 void ModuleDepCollector::addModuleFiles(
347     CowCompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
348   for (const ModuleID &MID : ClangModuleDeps) {
349     std::string PCMPath =
350         Controller.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
351     if (EagerLoadModules)
352       CI.getMutFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
353     else
354       CI.getMutHeaderSearchOpts().PrebuiltModuleFiles.insert(
355           {MID.ModuleName, std::move(PCMPath)});
356   }
357 }
358 
359 static bool needsModules(FrontendInputFile FIF) {
360   switch (FIF.getKind().getLanguage()) {
361   case Language::Unknown:
362   case Language::Asm:
363   case Language::LLVM_IR:
364     return false;
365   default:
366     return true;
367   }
368 }
369 
370 void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
371   CI.clearImplicitModuleBuildOptions();
372   resetBenignCodeGenOptions(CI.getFrontendOpts().ProgramAction,
373                             CI.getLangOpts(), CI.getCodeGenOpts());
374 
375   if (llvm::any_of(CI.getFrontendOpts().Inputs, needsModules)) {
376     Preprocessor &PP = ScanInstance.getPreprocessor();
377     if (Module *CurrentModule = PP.getCurrentModuleImplementation())
378       if (OptionalFileEntryRef CurrentModuleMap =
379               PP.getHeaderSearchInfo()
380                   .getModuleMap()
381                   .getModuleMapFileForUniquing(CurrentModule))
382         CI.getFrontendOpts().ModuleMapFiles.emplace_back(
383             CurrentModuleMap->getNameAsRequested());
384 
385     SmallVector<ModuleID> DirectDeps;
386     for (const auto &KV : ModularDeps)
387       if (DirectModularDeps.contains(KV.first))
388         DirectDeps.push_back(KV.second->ID);
389 
390     // TODO: Report module maps the same way it's done for modular dependencies.
391     addModuleMapFiles(CI, DirectDeps);
392 
393     addModuleFiles(CI, DirectDeps);
394 
395     for (const auto &KV : DirectPrebuiltModularDeps)
396       CI.getFrontendOpts().ModuleFiles.push_back(KV.second.PCMFile);
397   }
398 }
399 
400 static std::string getModuleContextHash(const ModuleDeps &MD,
401                                         const CowCompilerInvocation &CI,
402                                         bool EagerLoadModules,
403                                         llvm::vfs::FileSystem &VFS) {
404   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
405       HashBuilder;
406   SmallString<32> Scratch;
407 
408   // Hash the compiler version and serialization version to ensure the module
409   // will be readable.
410   HashBuilder.add(getClangFullRepositoryVersion());
411   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
412   llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
413   if (CWD)
414     HashBuilder.add(*CWD);
415 
416   // Hash the BuildInvocation without any input files.
417   SmallString<0> ArgVec;
418   ArgVec.reserve(4096);
419   CI.generateCC1CommandLine([&](const Twine &Arg) {
420     Arg.toVector(ArgVec);
421     ArgVec.push_back('\0');
422   });
423   HashBuilder.add(ArgVec);
424 
425   // Hash the module dependencies. These paths may differ even if the invocation
426   // is identical if they depend on the contents of the files in the TU -- for
427   // example, case-insensitive paths to modulemap files. Usually such a case
428   // would indicate a missed optimization to canonicalize, but it may be
429   // difficult to canonicalize all cases when there is a VFS.
430   for (const auto &ID : MD.ClangModuleDeps) {
431     HashBuilder.add(ID.ModuleName);
432     HashBuilder.add(ID.ContextHash);
433   }
434 
435   HashBuilder.add(EagerLoadModules);
436 
437   llvm::BLAKE3Result<16> Hash = HashBuilder.final();
438   std::array<uint64_t, 2> Words;
439   static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
440   std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
441   return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
442 }
443 
444 void ModuleDepCollector::associateWithContextHash(
445     const CowCompilerInvocation &CI, ModuleDeps &Deps) {
446   Deps.ID.ContextHash = getModuleContextHash(
447       Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
448   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
449   (void)Inserted;
450   assert(Inserted && "duplicate module mapping");
451 }
452 
453 void ModuleDepCollectorPP::LexedFileChanged(FileID FID,
454                                             LexedFileChangeReason Reason,
455                                             SrcMgr::CharacteristicKind FileType,
456                                             FileID PrevFID,
457                                             SourceLocation Loc) {
458   if (Reason != LexedFileChangeReason::EnterFile)
459     return;
460 
461   // This has to be delayed as the context hash can change at the start of
462   // `CompilerInstance::ExecuteAction`.
463   if (MDC.ContextHash.empty()) {
464     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
465     MDC.Consumer.handleContextHash(MDC.ContextHash);
466   }
467 
468   SourceManager &SM = MDC.ScanInstance.getSourceManager();
469 
470   // Dependency generation really does want to go all the way to the
471   // file entry for a source location to find out what is depended on.
472   // We do not want #line markers to affect dependency generation!
473   if (std::optional<StringRef> Filename = SM.getNonBuiltinFilenameForID(FID))
474     MDC.addFileDep(llvm::sys::path::remove_leading_dotslash(*Filename));
475 }
476 
477 void ModuleDepCollectorPP::InclusionDirective(
478     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
479     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
480     StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule,
481     bool ModuleImported, SrcMgr::CharacteristicKind FileType) {
482   if (!File && !ModuleImported) {
483     // This is a non-modular include that HeaderSearch failed to find. Add it
484     // here as `FileChanged` will never see it.
485     MDC.addFileDep(FileName);
486   }
487   handleImport(SuggestedModule);
488 }
489 
490 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
491                                         ModuleIdPath Path,
492                                         const Module *Imported) {
493   if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) {
494     P1689ModuleInfo RequiredModule;
495     RequiredModule.ModuleName = Path[0].first->getName().str();
496     RequiredModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
497     MDC.RequiredStdCXXModules.push_back(RequiredModule);
498     return;
499   }
500 
501   handleImport(Imported);
502 }
503 
504 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
505   if (!Imported)
506     return;
507 
508   const Module *TopLevelModule = Imported->getTopLevelModule();
509 
510   if (MDC.isPrebuiltModule(TopLevelModule))
511     MDC.DirectPrebuiltModularDeps.insert(
512         {TopLevelModule, PrebuiltModuleDep{TopLevelModule}});
513   else
514     MDC.DirectModularDeps.insert(TopLevelModule);
515 }
516 
517 void ModuleDepCollectorPP::EndOfMainFile() {
518   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
519   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
520                                  .getFileEntryRefForID(MainFileID)
521                                  ->getName());
522 
523   auto &PP = MDC.ScanInstance.getPreprocessor();
524   if (PP.isInNamedModule()) {
525     P1689ModuleInfo ProvidedModule;
526     ProvidedModule.ModuleName = PP.getNamedModuleName();
527     ProvidedModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
528     ProvidedModule.IsStdCXXModuleInterface = PP.isInNamedInterfaceUnit();
529     // Don't put implementation (non partition) unit as Provide.
530     // Put the module as required instead. Since the implementation
531     // unit will import the primary module implicitly.
532     if (PP.isInImplementationUnit())
533       MDC.RequiredStdCXXModules.push_back(ProvidedModule);
534     else
535       MDC.ProvidedStdCXXModule = ProvidedModule;
536   }
537 
538   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
539     MDC.addFileDep(MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
540 
541   for (const Module *M :
542        MDC.ScanInstance.getPreprocessor().getAffectingClangModules())
543     if (!MDC.isPrebuiltModule(M))
544       MDC.DirectModularDeps.insert(M);
545 
546   for (const Module *M : MDC.DirectModularDeps)
547     handleTopLevelModule(M);
548 
549   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
550 
551   if (MDC.IsStdModuleP1689Format)
552     MDC.Consumer.handleProvidedAndRequiredStdCXXModules(
553         MDC.ProvidedStdCXXModule, MDC.RequiredStdCXXModules);
554 
555   for (auto &&I : MDC.ModularDeps)
556     MDC.Consumer.handleModuleDependency(*I.second);
557 
558   for (const Module *M : MDC.DirectModularDeps) {
559     auto It = MDC.ModularDeps.find(M);
560     // Only report direct dependencies that were successfully handled.
561     if (It != MDC.ModularDeps.end())
562       MDC.Consumer.handleDirectModuleDependency(It->second->ID);
563   }
564 
565   for (auto &&I : MDC.FileDeps)
566     MDC.Consumer.handleFileDependency(I);
567 
568   for (auto &&I : MDC.DirectPrebuiltModularDeps)
569     MDC.Consumer.handlePrebuiltModuleDependency(I.second);
570 }
571 
572 std::optional<ModuleID>
573 ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
574   assert(M == M->getTopLevelModule() && "Expected top level module!");
575 
576   // A top-level module might not be actually imported as a module when
577   // -fmodule-name is used to compile a translation unit that imports this
578   // module. In that case it can be skipped. The appropriate header
579   // dependencies will still be reported as expected.
580   if (!M->getASTFile())
581     return {};
582 
583   // If this module has been handled already, just return its ID.
584   if (auto ModI = MDC.ModularDeps.find(M); ModI != MDC.ModularDeps.end())
585     return ModI->second->ID;
586 
587   auto OwnedMD = std::make_unique<ModuleDeps>();
588   ModuleDeps &MD = *OwnedMD;
589 
590   MD.ID.ModuleName = M->getFullModuleName();
591   MD.IsSystem = M->IsSystem;
592   // For modules which use export_as link name, the linked product that of the
593   // corresponding export_as-named module.
594   if (!M->UseExportAsModuleLinkName)
595     MD.LinkLibraries = M->LinkLibraries;
596 
597   ModuleMap &ModMapInfo =
598       MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
599 
600   if (auto ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M)) {
601     SmallString<128> Path = ModuleMap->getNameAsRequested();
602     ModMapInfo.canonicalizeModuleMapPath(Path);
603     MD.ClangModuleMapFile = std::string(Path);
604   }
605 
606   serialization::ModuleFile *MF =
607       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
608           *M->getASTFile());
609   MD.FileDepsBaseDir = MF->BaseDirectory;
610   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
611       *MF, /*IncludeSystem=*/true,
612       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
613         // The __inferred_module.map file is an insignificant implementation
614         // detail of implicitly-built modules. The PCM will also report the
615         // actual on-disk module map file that allowed inferring the module,
616         // which is what we need for building the module explicitly
617         // Let's ignore this file.
618         if (IFI.UnresolvedImportedFilename.ends_with("__inferred_module.map"))
619           return;
620         MDC.addFileDep(MD, IFI.UnresolvedImportedFilename);
621       });
622 
623   llvm::DenseSet<const Module *> SeenDeps;
624   addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
625   addAllSubmoduleDeps(M, MD, SeenDeps);
626   addAllAffectingClangModules(M, MD, SeenDeps);
627 
628   SmallString<0> PathBuf;
629   PathBuf.reserve(256);
630   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
631       *MF, /*IncludeSystem=*/true,
632       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
633         if (!(IFI.TopLevel && IFI.ModuleMap))
634           return;
635         if (IFI.UnresolvedImportedFilenameAsRequested.ends_with(
636                 "__inferred_module.map"))
637           return;
638         auto ResolvedFilenameAsRequested = ASTReader::ResolveImportedPath(
639             PathBuf, IFI.UnresolvedImportedFilenameAsRequested,
640             MF->BaseDirectory);
641         MD.ModuleMapFileDeps.emplace_back(*ResolvedFilenameAsRequested);
642       });
643 
644   CowCompilerInvocation CI =
645       MDC.getInvocationAdjustedForModuleBuildWithoutOutputs(
646           MD, [&](CowCompilerInvocation &BuildInvocation) {
647             if (any(MDC.OptimizeArgs & (ScanningOptimizations::HeaderSearch |
648                                         ScanningOptimizations::VFS)))
649               optimizeHeaderSearchOpts(BuildInvocation.getMutHeaderSearchOpts(),
650                                        *MDC.ScanInstance.getASTReader(), *MF,
651                                        MDC.PrebuiltModuleVFSMap,
652                                        MDC.OptimizeArgs);
653             if (any(MDC.OptimizeArgs & ScanningOptimizations::SystemWarnings))
654               optimizeDiagnosticOpts(
655                   BuildInvocation.getMutDiagnosticOpts(),
656                   BuildInvocation.getFrontendOpts().IsSystemModule);
657           });
658 
659   MDC.associateWithContextHash(CI, MD);
660 
661   // Finish the compiler invocation. Requires dependencies and the context hash.
662   MDC.addOutputPaths(CI, MD);
663 
664   MD.BuildInfo = std::move(CI);
665 
666   MDC.ModularDeps.insert({M, std::move(OwnedMD)});
667 
668   return MD.ID;
669 }
670 
671 static void forEachSubmoduleSorted(const Module *M,
672                                    llvm::function_ref<void(const Module *)> F) {
673   // Submodule order depends on order of header includes for inferred submodules
674   // we don't care about the exact order, so sort so that it's consistent across
675   // TUs to improve sharing.
676   SmallVector<const Module *> Submodules(M->submodules());
677   llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
678     return A->Name < B->Name;
679   });
680   for (const Module *SubM : Submodules)
681     F(SubM);
682 }
683 
684 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
685     const Module *M, ModuleDeps &MD,
686     llvm::DenseSet<const Module *> &SeenSubmodules) {
687   addModulePrebuiltDeps(M, MD, SeenSubmodules);
688 
689   forEachSubmoduleSorted(M, [&](const Module *SubM) {
690     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
691   });
692 }
693 
694 void ModuleDepCollectorPP::addModulePrebuiltDeps(
695     const Module *M, ModuleDeps &MD,
696     llvm::DenseSet<const Module *> &SeenSubmodules) {
697   for (const Module *Import : M->Imports)
698     if (Import->getTopLevelModule() != M->getTopLevelModule())
699       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
700         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
701           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
702 }
703 
704 void ModuleDepCollectorPP::addAllSubmoduleDeps(
705     const Module *M, ModuleDeps &MD,
706     llvm::DenseSet<const Module *> &AddedModules) {
707   addModuleDep(M, MD, AddedModules);
708 
709   forEachSubmoduleSorted(M, [&](const Module *SubM) {
710     addAllSubmoduleDeps(SubM, MD, AddedModules);
711   });
712 }
713 
714 void ModuleDepCollectorPP::addModuleDep(
715     const Module *M, ModuleDeps &MD,
716     llvm::DenseSet<const Module *> &AddedModules) {
717   for (const Module *Import : M->Imports) {
718     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
719         !MDC.isPrebuiltModule(Import)) {
720       if (auto ImportID = handleTopLevelModule(Import->getTopLevelModule()))
721         if (AddedModules.insert(Import->getTopLevelModule()).second)
722           MD.ClangModuleDeps.push_back(*ImportID);
723     }
724   }
725 }
726 
727 void ModuleDepCollectorPP::addAllAffectingClangModules(
728     const Module *M, ModuleDeps &MD,
729     llvm::DenseSet<const Module *> &AddedModules) {
730   addAffectingClangModule(M, MD, AddedModules);
731 
732   for (const Module *SubM : M->submodules())
733     addAllAffectingClangModules(SubM, MD, AddedModules);
734 }
735 
736 void ModuleDepCollectorPP::addAffectingClangModule(
737     const Module *M, ModuleDeps &MD,
738     llvm::DenseSet<const Module *> &AddedModules) {
739   for (const Module *Affecting : M->AffectingClangModules) {
740     assert(Affecting == Affecting->getTopLevelModule() &&
741            "Not quite import not top-level module");
742     if (Affecting != M->getTopLevelModule() &&
743         !MDC.isPrebuiltModule(Affecting)) {
744       if (auto ImportID = handleTopLevelModule(Affecting))
745         if (AddedModules.insert(Affecting).second)
746           MD.ClangModuleDeps.push_back(*ImportID);
747     }
748   }
749 }
750 
751 ModuleDepCollector::ModuleDepCollector(
752     std::unique_ptr<DependencyOutputOptions> Opts,
753     CompilerInstance &ScanInstance, DependencyConsumer &C,
754     DependencyActionController &Controller, CompilerInvocation OriginalCI,
755     PrebuiltModuleVFSMapT PrebuiltModuleVFSMap,
756     ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
757     bool IsStdModuleP1689Format)
758     : ScanInstance(ScanInstance), Consumer(C), Controller(Controller),
759       PrebuiltModuleVFSMap(std::move(PrebuiltModuleVFSMap)),
760       Opts(std::move(Opts)),
761       CommonInvocation(
762           makeCommonInvocationForModuleBuild(std::move(OriginalCI))),
763       OptimizeArgs(OptimizeArgs), EagerLoadModules(EagerLoadModules),
764       IsStdModuleP1689Format(IsStdModuleP1689Format) {}
765 
766 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
767   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
768 }
769 
770 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
771 
772 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
773   std::string Name(M->getTopLevelModuleName());
774   const auto &PrebuiltModuleFiles =
775       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
776   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
777   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
778     return false;
779   assert("Prebuilt module came from the expected AST file" &&
780          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
781   return true;
782 }
783 
784 static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path,
785                                           SmallVectorImpl<char> &Storage) {
786   if (llvm::sys::path::is_absolute(Path) &&
787       !llvm::sys::path::is_style_windows(llvm::sys::path::Style::native))
788     return Path;
789   Storage.assign(Path.begin(), Path.end());
790   CI.getFileManager().makeAbsolutePath(Storage);
791   llvm::sys::path::make_preferred(Storage);
792   return StringRef(Storage.data(), Storage.size());
793 }
794 
795 void ModuleDepCollector::addFileDep(StringRef Path) {
796   if (IsStdModuleP1689Format) {
797     // Within P1689 format, we don't want all the paths to be absolute path
798     // since it may violate the traditional make style dependencies info.
799     FileDeps.emplace_back(Path);
800     return;
801   }
802 
803   llvm::SmallString<256> Storage;
804   Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
805   FileDeps.emplace_back(Path);
806 }
807 
808 void ModuleDepCollector::addFileDep(ModuleDeps &MD, StringRef Path) {
809   MD.FileDeps.emplace_back(Path);
810 }
811