xref: /llvm-project/clang/tools/clang-scan-deps/ClangScanDeps.cpp (revision fdd7cafb9078e146634a3fbb72a8949108dca425)
1 //===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Driver/Compilation.h"
10 #include "clang/Driver/Driver.h"
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Frontend/TextDiagnosticPrinter.h"
13 #include "clang/Tooling/CommonOptionsParser.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
15 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
16 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
17 #include "clang/Tooling/JSONCompilationDatabase.h"
18 #include "clang/Tooling/Tooling.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/Format.h"
24 #include "llvm/Support/JSON.h"
25 #include "llvm/Support/LLVMDriver.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/TargetSelect.h"
29 #include "llvm/Support/ThreadPool.h"
30 #include "llvm/Support/Threading.h"
31 #include "llvm/Support/Timer.h"
32 #include "llvm/Support/VirtualFileSystem.h"
33 #include "llvm/TargetParser/Host.h"
34 #include <mutex>
35 #include <optional>
36 #include <thread>
37 
38 #include "Opts.inc"
39 
40 using namespace clang;
41 using namespace tooling::dependencies;
42 
43 namespace {
44 
45 using namespace llvm::opt;
46 enum ID {
47   OPT_INVALID = 0, // This is not an option ID.
48 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
49 #include "Opts.inc"
50 #undef OPTION
51 };
52 
53 #define OPTTABLE_STR_TABLE_CODE
54 #include "Opts.inc"
55 #undef OPTTABLE_STR_TABLE_CODE
56 
57 #define OPTTABLE_PREFIXES_TABLE_CODE
58 #include "Opts.inc"
59 #undef OPTTABLE_PREFIXES_TABLE_CODE
60 
61 const llvm::opt::OptTable::Info InfoTable[] = {
62 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
63 #include "Opts.inc"
64 #undef OPTION
65 };
66 
67 class ScanDepsOptTable : public llvm::opt::GenericOptTable {
68 public:
69   ScanDepsOptTable()
70       : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
71     setGroupedShortOptions(true);
72   }
73 };
74 
75 enum ResourceDirRecipeKind {
76   RDRK_ModifyCompilerPath,
77   RDRK_InvokeCompiler,
78 };
79 
80 static std::string OutputFileName = "-";
81 static ScanningMode ScanMode = ScanningMode::DependencyDirectivesScan;
82 static ScanningOutputFormat Format = ScanningOutputFormat::Make;
83 static ScanningOptimizations OptimizeArgs;
84 static std::string ModuleFilesDir;
85 static bool EagerLoadModules;
86 static unsigned NumThreads = 0;
87 static std::string CompilationDB;
88 static std::string ModuleName;
89 static std::vector<std::string> ModuleDepTargets;
90 static bool DeprecatedDriverCommand;
91 static ResourceDirRecipeKind ResourceDirRecipe;
92 static bool Verbose;
93 static bool PrintTiming;
94 static llvm::BumpPtrAllocator Alloc;
95 static llvm::StringSaver Saver{Alloc};
96 static std::vector<const char *> CommandLine;
97 
98 #ifndef NDEBUG
99 static constexpr bool DoRoundTripDefault = true;
100 #else
101 static constexpr bool DoRoundTripDefault = false;
102 #endif
103 
104 static bool RoundTripArgs = DoRoundTripDefault;
105 
106 static void ParseArgs(int argc, char **argv) {
107   ScanDepsOptTable Tbl;
108   llvm::StringRef ToolName = argv[0];
109   llvm::opt::InputArgList Args =
110       Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
111         llvm::errs() << Msg << '\n';
112         std::exit(1);
113       });
114 
115   if (Args.hasArg(OPT_help)) {
116     Tbl.printHelp(llvm::outs(), "clang-scan-deps [options]", "clang-scan-deps");
117     std::exit(0);
118   }
119   if (Args.hasArg(OPT_version)) {
120     llvm::outs() << ToolName << '\n';
121     llvm::cl::PrintVersionMessage();
122     std::exit(0);
123   }
124   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_mode_EQ)) {
125     auto ModeType =
126         llvm::StringSwitch<std::optional<ScanningMode>>(A->getValue())
127             .Case("preprocess-dependency-directives",
128                   ScanningMode::DependencyDirectivesScan)
129             .Case("preprocess", ScanningMode::CanonicalPreprocessing)
130             .Default(std::nullopt);
131     if (!ModeType) {
132       llvm::errs() << ToolName
133                    << ": for the --mode option: Cannot find option named '"
134                    << A->getValue() << "'\n";
135       std::exit(1);
136     }
137     ScanMode = *ModeType;
138   }
139 
140   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_format_EQ)) {
141     auto FormatType =
142         llvm::StringSwitch<std::optional<ScanningOutputFormat>>(A->getValue())
143             .Case("make", ScanningOutputFormat::Make)
144             .Case("p1689", ScanningOutputFormat::P1689)
145             .Case("experimental-full", ScanningOutputFormat::Full)
146             .Default(std::nullopt);
147     if (!FormatType) {
148       llvm::errs() << ToolName
149                    << ": for the --format option: Cannot find option named '"
150                    << A->getValue() << "'\n";
151       std::exit(1);
152     }
153     Format = *FormatType;
154   }
155 
156   std::vector<std::string> OptimizationFlags =
157       Args.getAllArgValues(OPT_optimize_args_EQ);
158   OptimizeArgs = ScanningOptimizations::None;
159   for (const auto &Arg : OptimizationFlags) {
160     auto Optimization =
161         llvm::StringSwitch<std::optional<ScanningOptimizations>>(Arg)
162             .Case("none", ScanningOptimizations::None)
163             .Case("header-search", ScanningOptimizations::HeaderSearch)
164             .Case("system-warnings", ScanningOptimizations::SystemWarnings)
165             .Case("vfs", ScanningOptimizations::VFS)
166             .Case("canonicalize-macros", ScanningOptimizations::Macros)
167             .Case("all", ScanningOptimizations::All)
168             .Default(std::nullopt);
169     if (!Optimization) {
170       llvm::errs()
171           << ToolName
172           << ": for the --optimize-args option: Cannot find option named '"
173           << Arg << "'\n";
174       std::exit(1);
175     }
176     OptimizeArgs |= *Optimization;
177   }
178   if (OptimizationFlags.empty())
179     OptimizeArgs = ScanningOptimizations::Default;
180 
181   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_files_dir_EQ))
182     ModuleFilesDir = A->getValue();
183 
184   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_o))
185     OutputFileName = A->getValue();
186 
187   EagerLoadModules = Args.hasArg(OPT_eager_load_pcm);
188 
189   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_j)) {
190     StringRef S{A->getValue()};
191     if (!llvm::to_integer(S, NumThreads, 0)) {
192       llvm::errs() << ToolName << ": for the -j option: '" << S
193                    << "' value invalid for uint argument!\n";
194       std::exit(1);
195     }
196   }
197 
198   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_compilation_database_EQ))
199     CompilationDB = A->getValue();
200 
201   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_name_EQ))
202     ModuleName = A->getValue();
203 
204   for (const llvm::opt::Arg *A : Args.filtered(OPT_dependency_target_EQ))
205     ModuleDepTargets.emplace_back(A->getValue());
206 
207   DeprecatedDriverCommand = Args.hasArg(OPT_deprecated_driver_command);
208 
209   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_resource_dir_recipe_EQ)) {
210     auto Kind =
211         llvm::StringSwitch<std::optional<ResourceDirRecipeKind>>(A->getValue())
212             .Case("modify-compiler-path", RDRK_ModifyCompilerPath)
213             .Case("invoke-compiler", RDRK_InvokeCompiler)
214             .Default(std::nullopt);
215     if (!Kind) {
216       llvm::errs() << ToolName
217                    << ": for the --resource-dir-recipe option: Cannot find "
218                       "option named '"
219                    << A->getValue() << "'\n";
220       std::exit(1);
221     }
222     ResourceDirRecipe = *Kind;
223   }
224 
225   PrintTiming = Args.hasArg(OPT_print_timing);
226 
227   Verbose = Args.hasArg(OPT_verbose);
228 
229   RoundTripArgs = Args.hasArg(OPT_round_trip_args);
230 
231   if (const llvm::opt::Arg *A = Args.getLastArgNoClaim(OPT_DASH_DASH))
232     CommandLine.assign(A->getValues().begin(), A->getValues().end());
233 }
234 
235 class SharedStream {
236 public:
237   SharedStream(raw_ostream &OS) : OS(OS) {}
238   void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) {
239     std::unique_lock<std::mutex> LockGuard(Lock);
240     Fn(OS);
241     OS.flush();
242   }
243 
244 private:
245   std::mutex Lock;
246   raw_ostream &OS;
247 };
248 
249 class ResourceDirectoryCache {
250 public:
251   /// findResourceDir finds the resource directory relative to the clang
252   /// compiler being used in Args, by running it with "-print-resource-dir"
253   /// option and cache the results for reuse. \returns resource directory path
254   /// associated with the given invocation command or empty string if the
255   /// compiler path is NOT an absolute path.
256   StringRef findResourceDir(const tooling::CommandLineArguments &Args,
257                             bool ClangCLMode) {
258     if (Args.size() < 1)
259       return "";
260 
261     const std::string &ClangBinaryPath = Args[0];
262     if (!llvm::sys::path::is_absolute(ClangBinaryPath))
263       return "";
264 
265     const std::string &ClangBinaryName =
266         std::string(llvm::sys::path::filename(ClangBinaryPath));
267 
268     std::unique_lock<std::mutex> LockGuard(CacheLock);
269     const auto &CachedResourceDir = Cache.find(ClangBinaryPath);
270     if (CachedResourceDir != Cache.end())
271       return CachedResourceDir->second;
272 
273     std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName};
274     if (ClangCLMode)
275       PrintResourceDirArgs.push_back("/clang:-print-resource-dir");
276     else
277       PrintResourceDirArgs.push_back("-print-resource-dir");
278 
279     llvm::SmallString<64> OutputFile, ErrorFile;
280     llvm::sys::fs::createTemporaryFile("print-resource-dir-output",
281                                        "" /*no-suffix*/, OutputFile);
282     llvm::sys::fs::createTemporaryFile("print-resource-dir-error",
283                                        "" /*no-suffix*/, ErrorFile);
284     llvm::FileRemover OutputRemover(OutputFile.c_str());
285     llvm::FileRemover ErrorRemover(ErrorFile.c_str());
286     std::optional<StringRef> Redirects[] = {
287         {""}, // Stdin
288         OutputFile.str(),
289         ErrorFile.str(),
290     };
291     if (llvm::sys::ExecuteAndWait(ClangBinaryPath, PrintResourceDirArgs, {},
292                                   Redirects)) {
293       auto ErrorBuf = llvm::MemoryBuffer::getFile(ErrorFile.c_str());
294       llvm::errs() << ErrorBuf.get()->getBuffer();
295       return "";
296     }
297 
298     auto OutputBuf = llvm::MemoryBuffer::getFile(OutputFile.c_str());
299     if (!OutputBuf)
300       return "";
301     StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n');
302 
303     Cache[ClangBinaryPath] = Output.str();
304     return Cache[ClangBinaryPath];
305   }
306 
307 private:
308   std::map<std::string, std::string> Cache;
309   std::mutex CacheLock;
310 };
311 
312 } // end anonymous namespace
313 
314 /// Takes the result of a dependency scan and prints error / dependency files
315 /// based on the result.
316 ///
317 /// \returns True on error.
318 static bool
319 handleMakeDependencyToolResult(const std::string &Input,
320                                llvm::Expected<std::string> &MaybeFile,
321                                SharedStream &OS, SharedStream &Errs) {
322   if (!MaybeFile) {
323     llvm::handleAllErrors(
324         MaybeFile.takeError(), [&Input, &Errs](llvm::StringError &Err) {
325           Errs.applyLocked([&](raw_ostream &OS) {
326             OS << "Error while scanning dependencies for " << Input << ":\n";
327             OS << Err.getMessage();
328           });
329         });
330     return true;
331   }
332   OS.applyLocked([&](raw_ostream &OS) { OS << *MaybeFile; });
333   return false;
334 }
335 
336 template <typename Container>
337 static auto toJSONStrings(llvm::json::OStream &JOS, Container &&Strings) {
338   return [&JOS, Strings = std::forward<Container>(Strings)] {
339     for (StringRef Str : Strings)
340       JOS.value(Str);
341   };
342 }
343 
344 // Technically, we don't need to sort the dependency list to get determinism.
345 // Leaving these be will simply preserve the import order.
346 static auto toJSONSorted(llvm::json::OStream &JOS, std::vector<ModuleID> V) {
347   llvm::sort(V);
348   return [&JOS, V = std::move(V)] {
349     for (const ModuleID &MID : V)
350       JOS.object([&] {
351         JOS.attribute("context-hash", StringRef(MID.ContextHash));
352         JOS.attribute("module-name", StringRef(MID.ModuleName));
353       });
354   };
355 }
356 
357 static auto toJSONSorted(llvm::json::OStream &JOS,
358                          SmallVector<Module::LinkLibrary, 2> LinkLibs) {
359   llvm::sort(LinkLibs, [](const auto &LHS, const auto &RHS) {
360     return LHS.Library < RHS.Library;
361   });
362   return [&JOS, LinkLibs = std::move(LinkLibs)] {
363     for (const auto &LL : LinkLibs)
364       JOS.object([&] {
365         JOS.attribute("isFramework", LL.IsFramework);
366         JOS.attribute("link-name", StringRef(LL.Library));
367       });
368   };
369 }
370 
371 // Thread safe.
372 class FullDeps {
373 public:
374   FullDeps(size_t NumInputs) : Inputs(NumInputs) {}
375 
376   void mergeDeps(StringRef Input, TranslationUnitDeps TUDeps,
377                  size_t InputIndex) {
378     mergeDeps(std::move(TUDeps.ModuleGraph), InputIndex);
379 
380     InputDeps ID;
381     ID.FileName = std::string(Input);
382     ID.ContextHash = std::move(TUDeps.ID.ContextHash);
383     ID.FileDeps = std::move(TUDeps.FileDeps);
384     ID.ModuleDeps = std::move(TUDeps.ClangModuleDeps);
385     ID.DriverCommandLine = std::move(TUDeps.DriverCommandLine);
386     ID.Commands = std::move(TUDeps.Commands);
387 
388     assert(InputIndex < Inputs.size() && "Input index out of bounds");
389     assert(Inputs[InputIndex].FileName.empty() && "Result already populated");
390     Inputs[InputIndex] = std::move(ID);
391   }
392 
393   void mergeDeps(ModuleDepsGraph Graph, size_t InputIndex) {
394     std::vector<ModuleDeps *> NewMDs;
395     {
396       std::unique_lock<std::mutex> ul(Lock);
397       for (const ModuleDeps &MD : Graph) {
398         auto I = Modules.find({MD.ID, 0});
399         if (I != Modules.end()) {
400           I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
401           continue;
402         }
403         auto Res = Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)});
404         NewMDs.push_back(&Res->second);
405       }
406       // First call to \c getBuildArguments is somewhat expensive. Let's call it
407       // on the current thread (instead of the main one), and outside the
408       // critical section.
409       for (ModuleDeps *MD : NewMDs)
410         (void)MD->getBuildArguments();
411     }
412   }
413 
414   bool roundTripCommand(ArrayRef<std::string> ArgStrs,
415                         DiagnosticsEngine &Diags) {
416     if (ArgStrs.empty() || ArgStrs[0] != "-cc1")
417       return false;
418     SmallVector<const char *> Args;
419     for (const std::string &Arg : ArgStrs)
420       Args.push_back(Arg.c_str());
421     return !CompilerInvocation::checkCC1RoundTrip(Args, Diags);
422   }
423 
424   // Returns \c true if any command lines fail to round-trip. We expect
425   // commands already be canonical when output by the scanner.
426   bool roundTripCommands(raw_ostream &ErrOS) {
427     IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions{};
428     TextDiagnosticPrinter DiagConsumer(ErrOS, &*DiagOpts);
429     IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
430         CompilerInstance::createDiagnostics(*llvm::vfs::getRealFileSystem(),
431                                             &*DiagOpts, &DiagConsumer,
432                                             /*ShouldOwnClient=*/false);
433 
434     for (auto &&M : Modules)
435       if (roundTripCommand(M.second.getBuildArguments(), *Diags))
436         return true;
437 
438     for (auto &&I : Inputs)
439       for (const auto &Cmd : I.Commands)
440         if (roundTripCommand(Cmd.Arguments, *Diags))
441           return true;
442 
443     return false;
444   }
445 
446   void printFullOutput(raw_ostream &OS) {
447     // Skip sorting modules and constructing the JSON object if the output
448     // cannot be observed anyway. This makes timings less noisy.
449     if (&OS == &llvm::nulls())
450       return;
451 
452     // Sort the modules by name to get a deterministic order.
453     std::vector<IndexedModuleID> ModuleIDs;
454     for (auto &&M : Modules)
455       ModuleIDs.push_back(M.first);
456     llvm::sort(ModuleIDs);
457 
458     llvm::json::OStream JOS(OS, /*IndentSize=*/2);
459 
460     JOS.object([&] {
461       JOS.attributeArray("modules", [&] {
462         for (auto &&ModID : ModuleIDs) {
463           auto &MD = Modules[ModID];
464           JOS.object([&] {
465             JOS.attributeArray("clang-module-deps",
466                                toJSONSorted(JOS, MD.ClangModuleDeps));
467             JOS.attribute("clang-modulemap-file",
468                           StringRef(MD.ClangModuleMapFile));
469             JOS.attributeArray("command-line",
470                                toJSONStrings(JOS, MD.getBuildArguments()));
471             JOS.attribute("context-hash", StringRef(MD.ID.ContextHash));
472             JOS.attributeArray("file-deps", [&] {
473               MD.forEachFileDep([&](StringRef FileDep) { JOS.value(FileDep); });
474             });
475             JOS.attributeArray("link-libraries",
476                                toJSONSorted(JOS, MD.LinkLibraries));
477             JOS.attribute("name", StringRef(MD.ID.ModuleName));
478           });
479         }
480       });
481 
482       JOS.attributeArray("translation-units", [&] {
483         for (auto &&I : Inputs) {
484           JOS.object([&] {
485             JOS.attributeArray("commands", [&] {
486               if (I.DriverCommandLine.empty()) {
487                 for (const auto &Cmd : I.Commands) {
488                   JOS.object([&] {
489                     JOS.attribute("clang-context-hash",
490                                   StringRef(I.ContextHash));
491                     JOS.attributeArray("clang-module-deps",
492                                        toJSONSorted(JOS, I.ModuleDeps));
493                     JOS.attributeArray("command-line",
494                                        toJSONStrings(JOS, Cmd.Arguments));
495                     JOS.attribute("executable", StringRef(Cmd.Executable));
496                     JOS.attributeArray("file-deps",
497                                        toJSONStrings(JOS, I.FileDeps));
498                     JOS.attribute("input-file", StringRef(I.FileName));
499                   });
500                 }
501               } else {
502                 JOS.object([&] {
503                   JOS.attribute("clang-context-hash", StringRef(I.ContextHash));
504                   JOS.attributeArray("clang-module-deps",
505                                      toJSONSorted(JOS, I.ModuleDeps));
506                   JOS.attributeArray("command-line",
507                                      toJSONStrings(JOS, I.DriverCommandLine));
508                   JOS.attribute("executable", "clang");
509                   JOS.attributeArray("file-deps",
510                                      toJSONStrings(JOS, I.FileDeps));
511                   JOS.attribute("input-file", StringRef(I.FileName));
512                 });
513               }
514             });
515           });
516         }
517       });
518     });
519   }
520 
521 private:
522   struct IndexedModuleID {
523     ModuleID ID;
524 
525     // FIXME: This is mutable so that it can still be updated after insertion
526     //  into an unordered associative container. This is "fine", since this
527     //  field doesn't contribute to the hash, but it's a brittle hack.
528     mutable size_t InputIndex;
529 
530     bool operator==(const IndexedModuleID &Other) const {
531       return ID == Other.ID;
532     }
533 
534     bool operator<(const IndexedModuleID &Other) const {
535       /// We need the output of clang-scan-deps to be deterministic. However,
536       /// the dependency graph may contain two modules with the same name. How
537       /// do we decide which one to print first? If we made that decision based
538       /// on the context hash, the ordering would be deterministic, but
539       /// different across machines. This can happen for example when the inputs
540       /// or the SDKs (which both contribute to the "context" hash) live in
541       /// different absolute locations. We solve that by tracking the index of
542       /// the first input TU that (transitively) imports the dependency, which
543       /// is always the same for the same input, resulting in deterministic
544       /// sorting that's also reproducible across machines.
545       return std::tie(ID.ModuleName, InputIndex) <
546              std::tie(Other.ID.ModuleName, Other.InputIndex);
547     }
548 
549     struct Hasher {
550       std::size_t operator()(const IndexedModuleID &IMID) const {
551         return llvm::hash_value(IMID.ID);
552       }
553     };
554   };
555 
556   struct InputDeps {
557     std::string FileName;
558     std::string ContextHash;
559     std::vector<std::string> FileDeps;
560     std::vector<ModuleID> ModuleDeps;
561     std::vector<std::string> DriverCommandLine;
562     std::vector<Command> Commands;
563   };
564 
565   std::mutex Lock;
566   std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleID::Hasher>
567       Modules;
568   std::vector<InputDeps> Inputs;
569 };
570 
571 static bool handleTranslationUnitResult(
572     StringRef Input, llvm::Expected<TranslationUnitDeps> &MaybeTUDeps,
573     FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
574   if (!MaybeTUDeps) {
575     llvm::handleAllErrors(
576         MaybeTUDeps.takeError(), [&Input, &Errs](llvm::StringError &Err) {
577           Errs.applyLocked([&](raw_ostream &OS) {
578             OS << "Error while scanning dependencies for " << Input << ":\n";
579             OS << Err.getMessage();
580           });
581         });
582     return true;
583   }
584   FD.mergeDeps(Input, std::move(*MaybeTUDeps), InputIndex);
585   return false;
586 }
587 
588 static bool handleModuleResult(
589     StringRef ModuleName, llvm::Expected<ModuleDepsGraph> &MaybeModuleGraph,
590     FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
591   if (!MaybeModuleGraph) {
592     llvm::handleAllErrors(MaybeModuleGraph.takeError(),
593                           [&ModuleName, &Errs](llvm::StringError &Err) {
594                             Errs.applyLocked([&](raw_ostream &OS) {
595                               OS << "Error while scanning dependencies for "
596                                  << ModuleName << ":\n";
597                               OS << Err.getMessage();
598                             });
599                           });
600     return true;
601   }
602   FD.mergeDeps(std::move(*MaybeModuleGraph), InputIndex);
603   return false;
604 }
605 
606 class P1689Deps {
607 public:
608   void printDependencies(raw_ostream &OS) {
609     addSourcePathsToRequires();
610     // Sort the modules by name to get a deterministic order.
611     llvm::sort(Rules, [](const P1689Rule &A, const P1689Rule &B) {
612       return A.PrimaryOutput < B.PrimaryOutput;
613     });
614 
615     using namespace llvm::json;
616     Array OutputRules;
617     for (const P1689Rule &R : Rules) {
618       Object O{{"primary-output", R.PrimaryOutput}};
619 
620       if (R.Provides) {
621         Array Provides;
622         Object Provided{{"logical-name", R.Provides->ModuleName},
623                         {"source-path", R.Provides->SourcePath},
624                         {"is-interface", R.Provides->IsStdCXXModuleInterface}};
625         Provides.push_back(std::move(Provided));
626         O.insert({"provides", std::move(Provides)});
627       }
628 
629       Array Requires;
630       for (const P1689ModuleInfo &Info : R.Requires) {
631         Object RequiredInfo{{"logical-name", Info.ModuleName}};
632         if (!Info.SourcePath.empty())
633           RequiredInfo.insert({"source-path", Info.SourcePath});
634         Requires.push_back(std::move(RequiredInfo));
635       }
636 
637       if (!Requires.empty())
638         O.insert({"requires", std::move(Requires)});
639 
640       OutputRules.push_back(std::move(O));
641     }
642 
643     Object Output{
644         {"version", 1}, {"revision", 0}, {"rules", std::move(OutputRules)}};
645 
646     OS << llvm::formatv("{0:2}\n", Value(std::move(Output)));
647   }
648 
649   void addRules(P1689Rule &Rule) {
650     std::unique_lock<std::mutex> LockGuard(Lock);
651     Rules.push_back(Rule);
652   }
653 
654 private:
655   void addSourcePathsToRequires() {
656     llvm::DenseMap<StringRef, StringRef> ModuleSourceMapper;
657     for (const P1689Rule &R : Rules)
658       if (R.Provides && !R.Provides->SourcePath.empty())
659         ModuleSourceMapper[R.Provides->ModuleName] = R.Provides->SourcePath;
660 
661     for (P1689Rule &R : Rules) {
662       for (P1689ModuleInfo &Info : R.Requires) {
663         auto Iter = ModuleSourceMapper.find(Info.ModuleName);
664         if (Iter != ModuleSourceMapper.end())
665           Info.SourcePath = Iter->second;
666       }
667     }
668   }
669 
670   std::mutex Lock;
671   std::vector<P1689Rule> Rules;
672 };
673 
674 static bool
675 handleP1689DependencyToolResult(const std::string &Input,
676                                 llvm::Expected<P1689Rule> &MaybeRule,
677                                 P1689Deps &PD, SharedStream &Errs) {
678   if (!MaybeRule) {
679     llvm::handleAllErrors(
680         MaybeRule.takeError(), [&Input, &Errs](llvm::StringError &Err) {
681           Errs.applyLocked([&](raw_ostream &OS) {
682             OS << "Error while scanning dependencies for " << Input << ":\n";
683             OS << Err.getMessage();
684           });
685         });
686     return true;
687   }
688   PD.addRules(*MaybeRule);
689   return false;
690 }
691 
692 /// Construct a path for the explicitly built PCM.
693 static std::string constructPCMPath(ModuleID MID, StringRef OutputDir) {
694   SmallString<256> ExplicitPCMPath(OutputDir);
695   llvm::sys::path::append(ExplicitPCMPath, MID.ContextHash,
696                           MID.ModuleName + "-" + MID.ContextHash + ".pcm");
697   return std::string(ExplicitPCMPath);
698 }
699 
700 static std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK,
701                                       StringRef OutputDir) {
702   std::string PCMPath = constructPCMPath(MID, OutputDir);
703   switch (MOK) {
704   case ModuleOutputKind::ModuleFile:
705     return PCMPath;
706   case ModuleOutputKind::DependencyFile:
707     return PCMPath + ".d";
708   case ModuleOutputKind::DependencyTargets:
709     // Null-separate the list of targets.
710     return join(ModuleDepTargets, StringRef("\0", 1));
711   case ModuleOutputKind::DiagnosticSerializationFile:
712     return PCMPath + ".diag";
713   }
714   llvm_unreachable("Fully covered switch above!");
715 }
716 
717 static std::string getModuleCachePath(ArrayRef<std::string> Args) {
718   for (StringRef Arg : llvm::reverse(Args)) {
719     Arg.consume_front("/clang:");
720     if (Arg.consume_front("-fmodules-cache-path="))
721       return std::string(Arg);
722   }
723   SmallString<128> Path;
724   driver::Driver::getDefaultModuleCachePath(Path);
725   return std::string(Path);
726 }
727 
728 /// Attempts to construct the compilation database from '-compilation-database'
729 /// or from the arguments following the positional '--'.
730 static std::unique_ptr<tooling::CompilationDatabase>
731 getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) {
732   ParseArgs(argc, argv);
733 
734   if (!(CommandLine.empty() ^ CompilationDB.empty())) {
735     llvm::errs() << "The compilation command line must be provided either via "
736                     "'-compilation-database' or after '--'.";
737     return nullptr;
738   }
739 
740   if (!CompilationDB.empty())
741     return tooling::JSONCompilationDatabase::loadFromFile(
742         CompilationDB, ErrorMessage,
743         tooling::JSONCommandLineSyntax::AutoDetect);
744 
745   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
746       CompilerInstance::createDiagnostics(*llvm::vfs::getRealFileSystem(),
747                                           new DiagnosticOptions);
748   driver::Driver TheDriver(CommandLine[0], llvm::sys::getDefaultTargetTriple(),
749                            *Diags);
750   TheDriver.setCheckInputsExist(false);
751   std::unique_ptr<driver::Compilation> C(
752       TheDriver.BuildCompilation(CommandLine));
753   if (!C || C->getJobs().empty())
754     return nullptr;
755 
756   auto Cmd = C->getJobs().begin();
757   auto CI = std::make_unique<CompilerInvocation>();
758   CompilerInvocation::CreateFromArgs(*CI, Cmd->getArguments(), *Diags,
759                                      CommandLine[0]);
760   if (!CI)
761     return nullptr;
762 
763   FrontendOptions &FEOpts = CI->getFrontendOpts();
764   if (FEOpts.Inputs.size() != 1) {
765     llvm::errs()
766         << "Exactly one input file is required in the per-file mode ('--').\n";
767     return nullptr;
768   }
769 
770   // There might be multiple jobs for a compilation. Extract the specified
771   // output filename from the last job.
772   auto LastCmd = C->getJobs().end();
773   LastCmd--;
774   if (LastCmd->getOutputFilenames().size() != 1) {
775     llvm::errs()
776         << "Exactly one output file is required in the per-file mode ('--').\n";
777     return nullptr;
778   }
779   StringRef OutputFile = LastCmd->getOutputFilenames().front();
780 
781   class InplaceCompilationDatabase : public tooling::CompilationDatabase {
782   public:
783     InplaceCompilationDatabase(StringRef InputFile, StringRef OutputFile,
784                                ArrayRef<const char *> CommandLine)
785         : Command(".", InputFile, {}, OutputFile) {
786       for (auto *C : CommandLine)
787         Command.CommandLine.push_back(C);
788     }
789 
790     std::vector<tooling::CompileCommand>
791     getCompileCommands(StringRef FilePath) const override {
792       if (FilePath != Command.Filename)
793         return {};
794       return {Command};
795     }
796 
797     std::vector<std::string> getAllFiles() const override {
798       return {Command.Filename};
799     }
800 
801     std::vector<tooling::CompileCommand>
802     getAllCompileCommands() const override {
803       return {Command};
804     }
805 
806   private:
807     tooling::CompileCommand Command;
808   };
809 
810   return std::make_unique<InplaceCompilationDatabase>(
811       FEOpts.Inputs[0].getFile(), OutputFile, CommandLine);
812 }
813 
814 int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
815   llvm::InitializeAllTargetInfos();
816   std::string ErrorMessage;
817   std::unique_ptr<tooling::CompilationDatabase> Compilations =
818       getCompilationDatabase(argc, argv, ErrorMessage);
819   if (!Compilations) {
820     llvm::errs() << ErrorMessage << "\n";
821     return 1;
822   }
823 
824   llvm::cl::PrintOptionValues();
825 
826   // Expand response files in advance, so that we can "see" all the arguments
827   // when adjusting below.
828   Compilations = expandResponseFiles(std::move(Compilations),
829                                      llvm::vfs::getRealFileSystem());
830 
831   Compilations = inferTargetAndDriverMode(std::move(Compilations));
832 
833   Compilations = inferToolLocation(std::move(Compilations));
834 
835   // The command options are rewritten to run Clang in preprocessor only mode.
836   auto AdjustingCompilations =
837       std::make_unique<tooling::ArgumentsAdjustingCompilations>(
838           std::move(Compilations));
839   ResourceDirectoryCache ResourceDirCache;
840 
841   AdjustingCompilations->appendArgumentsAdjuster(
842       [&ResourceDirCache](const tooling::CommandLineArguments &Args,
843                           StringRef FileName) {
844         std::string LastO;
845         bool HasResourceDir = false;
846         bool ClangCLMode = false;
847         auto FlagsEnd = llvm::find(Args, "--");
848         if (FlagsEnd != Args.begin()) {
849           ClangCLMode =
850               llvm::sys::path::stem(Args[0]).contains_insensitive("clang-cl") ||
851               llvm::is_contained(Args, "--driver-mode=cl");
852 
853           // Reverse scan, starting at the end or at the element before "--".
854           auto R = std::make_reverse_iterator(FlagsEnd);
855           auto E = Args.rend();
856           // Don't include Args[0] in the iteration; that's the executable, not
857           // an option.
858           if (E != R)
859             E--;
860           for (auto I = R; I != E; ++I) {
861             StringRef Arg = *I;
862             if (ClangCLMode) {
863               // Ignore arguments that are preceded by "-Xclang".
864               if ((I + 1) != E && I[1] == "-Xclang")
865                 continue;
866               if (LastO.empty()) {
867                 // With clang-cl, the output obj file can be specified with
868                 // "/opath", "/o path", "/Fopath", and the dash counterparts.
869                 // Also, clang-cl adds ".obj" extension if none is found.
870                 if ((Arg == "-o" || Arg == "/o") && I != R)
871                   LastO = I[-1]; // Next argument (reverse iterator)
872                 else if (Arg.starts_with("/Fo") || Arg.starts_with("-Fo"))
873                   LastO = Arg.drop_front(3).str();
874                 else if (Arg.starts_with("/o") || Arg.starts_with("-o"))
875                   LastO = Arg.drop_front(2).str();
876 
877                 if (!LastO.empty() && !llvm::sys::path::has_extension(LastO))
878                   LastO.append(".obj");
879               }
880             }
881             if (Arg == "-resource-dir")
882               HasResourceDir = true;
883           }
884         }
885         tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd);
886         // The clang-cl driver passes "-o -" to the frontend. Inject the real
887         // file here to ensure "-MT" can be deduced if need be.
888         if (ClangCLMode && !LastO.empty()) {
889           AdjustedArgs.push_back("/clang:-o");
890           AdjustedArgs.push_back("/clang:" + LastO);
891         }
892 
893         if (!HasResourceDir && ResourceDirRecipe == RDRK_InvokeCompiler) {
894           StringRef ResourceDir =
895               ResourceDirCache.findResourceDir(Args, ClangCLMode);
896           if (!ResourceDir.empty()) {
897             AdjustedArgs.push_back("-resource-dir");
898             AdjustedArgs.push_back(std::string(ResourceDir));
899           }
900         }
901         AdjustedArgs.insert(AdjustedArgs.end(), FlagsEnd, Args.end());
902         return AdjustedArgs;
903       });
904 
905   SharedStream Errs(llvm::errs());
906 
907   std::optional<llvm::raw_fd_ostream> FileOS;
908   llvm::raw_ostream &ThreadUnsafeDependencyOS = [&]() -> llvm::raw_ostream & {
909     if (OutputFileName == "-")
910       return llvm::outs();
911 
912     if (OutputFileName == "/dev/null")
913       return llvm::nulls();
914 
915     std::error_code EC;
916     FileOS.emplace(OutputFileName, EC, llvm::sys::fs::OF_Text);
917     if (EC) {
918       llvm::errs() << "Failed to open output file '" << OutputFileName
919                    << "': " << llvm::errorCodeToError(EC) << '\n';
920       std::exit(1);
921     }
922     return *FileOS;
923   }();
924   SharedStream DependencyOS(ThreadUnsafeDependencyOS);
925 
926   std::vector<tooling::CompileCommand> Inputs =
927       AdjustingCompilations->getAllCompileCommands();
928 
929   std::atomic<bool> HadErrors(false);
930   std::optional<FullDeps> FD;
931   P1689Deps PD;
932 
933   std::mutex Lock;
934   size_t Index = 0;
935   auto GetNextInputIndex = [&]() -> std::optional<size_t> {
936     std::unique_lock<std::mutex> LockGuard(Lock);
937     if (Index < Inputs.size())
938       return Index++;
939     return {};
940   };
941 
942   if (Format == ScanningOutputFormat::Full)
943     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
944 
945   std::atomic<size_t> NumStatusCalls = 0;
946   std::atomic<size_t> NumOpenFileForReadCalls = 0;
947   std::atomic<size_t> NumDirBeginCalls = 0;
948   std::atomic<size_t> NumGetRealPathCalls = 0;
949   std::atomic<size_t> NumExistsCalls = 0;
950   std::atomic<size_t> NumIsLocalCalls = 0;
951 
952   auto ScanningTask = [&](DependencyScanningService &Service) {
953     DependencyScanningTool WorkerTool(Service);
954 
955     llvm::DenseSet<ModuleID> AlreadySeenModules;
956     while (auto MaybeInputIndex = GetNextInputIndex()) {
957       size_t LocalIndex = *MaybeInputIndex;
958       const tooling::CompileCommand *Input = &Inputs[LocalIndex];
959       std::string Filename = std::move(Input->Filename);
960       std::string CWD = std::move(Input->Directory);
961 
962       std::optional<StringRef> MaybeModuleName;
963       if (!ModuleName.empty())
964         MaybeModuleName = ModuleName;
965 
966       std::string OutputDir(ModuleFilesDir);
967       if (OutputDir.empty())
968         OutputDir = getModuleCachePath(Input->CommandLine);
969       auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
970         return ::lookupModuleOutput(MID, MOK, OutputDir);
971       };
972 
973       // Run the tool on it.
974       if (Format == ScanningOutputFormat::Make) {
975         auto MaybeFile = WorkerTool.getDependencyFile(Input->CommandLine, CWD);
976         if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
977                                            Errs))
978           HadErrors = true;
979       } else if (Format == ScanningOutputFormat::P1689) {
980         // It is useful to generate the make-format dependency output during
981         // the scanning for P1689. Otherwise the users need to scan again for
982         // it. We will generate the make-format dependency output if we find
983         // `-MF` in the command lines.
984         std::string MakeformatOutputPath;
985         std::string MakeformatOutput;
986 
987         auto MaybeRule = WorkerTool.getP1689ModuleDependencyFile(
988             *Input, CWD, MakeformatOutput, MakeformatOutputPath);
989 
990         if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
991           HadErrors = true;
992 
993         if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
994             !HadErrors) {
995           static std::mutex Lock;
996           // With compilation database, we may open different files
997           // concurrently or we may write the same file concurrently. So we
998           // use a map here to allow multiple compile commands to write to the
999           // same file. Also we need a lock here to avoid data race.
1000           static llvm::StringMap<llvm::raw_fd_ostream> OSs;
1001           std::unique_lock<std::mutex> LockGuard(Lock);
1002 
1003           auto OSIter = OSs.find(MakeformatOutputPath);
1004           if (OSIter == OSs.end()) {
1005             std::error_code EC;
1006             OSIter = OSs.try_emplace(MakeformatOutputPath, MakeformatOutputPath,
1007                                      EC, llvm::sys::fs::OF_Text)
1008                          .first;
1009             if (EC)
1010               llvm::errs() << "Failed to open P1689 make format output file \""
1011                            << MakeformatOutputPath << "\" for " << EC.message()
1012                            << "\n";
1013           }
1014 
1015           SharedStream MakeformatOS(OSIter->second);
1016           llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
1017           if (handleMakeDependencyToolResult(Filename, MaybeOutput,
1018                                              MakeformatOS, Errs))
1019             HadErrors = true;
1020         }
1021       } else if (MaybeModuleName) {
1022         auto MaybeModuleDepsGraph = WorkerTool.getModuleDependencies(
1023             *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
1024             LookupOutput);
1025         if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
1026                                LocalIndex, DependencyOS, Errs))
1027           HadErrors = true;
1028       } else {
1029         auto MaybeTUDeps = WorkerTool.getTranslationUnitDependencies(
1030             Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
1031         if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
1032                                         DependencyOS, Errs))
1033           HadErrors = true;
1034       }
1035     }
1036 
1037     WorkerTool.getWorkerVFS().visit([&](llvm::vfs::FileSystem &VFS) {
1038       if (auto *T = dyn_cast_or_null<llvm::vfs::TracingFileSystem>(&VFS)) {
1039         NumStatusCalls += T->NumStatusCalls;
1040         NumOpenFileForReadCalls += T->NumOpenFileForReadCalls;
1041         NumDirBeginCalls += T->NumDirBeginCalls;
1042         NumGetRealPathCalls += T->NumGetRealPathCalls;
1043         NumExistsCalls += T->NumExistsCalls;
1044         NumIsLocalCalls += T->NumIsLocalCalls;
1045       }
1046     });
1047   };
1048 
1049   DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
1050                                     EagerLoadModules, /*TraceVFS=*/Verbose);
1051 
1052   llvm::Timer T;
1053   T.startTimer();
1054 
1055   if (Inputs.size() == 1) {
1056     ScanningTask(Service);
1057   } else {
1058     llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
1059 
1060     if (Verbose) {
1061       llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
1062                    << " files using " << Pool.getMaxConcurrency()
1063                    << " workers\n";
1064     }
1065 
1066     for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
1067       Pool.async([ScanningTask, &Service]() { ScanningTask(Service); });
1068 
1069     Pool.wait();
1070   }
1071 
1072   T.stopTimer();
1073 
1074   if (Verbose)
1075     llvm::errs() << "\n*** Virtual File System Stats:\n"
1076                  << NumStatusCalls << " status() calls\n"
1077                  << NumOpenFileForReadCalls << " openFileForRead() calls\n"
1078                  << NumDirBeginCalls << " dir_begin() calls\n"
1079                  << NumGetRealPathCalls << " getRealPath() calls\n"
1080                  << NumExistsCalls << " exists() calls\n"
1081                  << NumIsLocalCalls << " isLocal() calls\n";
1082 
1083   if (PrintTiming) {
1084     llvm::errs() << "wall time [s]\t"
1085                  << "process time [s]\t"
1086                  << "instruction count\n";
1087     const llvm::TimeRecord &R = T.getTotalTime();
1088     llvm::errs() << llvm::format("%0.4f", R.getWallTime()) << "\t"
1089                  << llvm::format("%0.4f", R.getProcessTime()) << "\t"
1090                  << llvm::format("%llu", R.getInstructionsExecuted()) << "\n";
1091   }
1092 
1093   if (RoundTripArgs)
1094     if (FD && FD->roundTripCommands(llvm::errs()))
1095       HadErrors = true;
1096 
1097   if (Format == ScanningOutputFormat::Full)
1098     FD->printFullOutput(ThreadUnsafeDependencyOS);
1099   else if (Format == ScanningOutputFormat::P1689)
1100     PD.printDependencies(ThreadUnsafeDependencyOS);
1101 
1102   return HadErrors;
1103 }
1104