xref: /llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision 4312075efa02ad861db0a19a0db8e6003aa06965)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalAlias.h"
24 #include "llvm/IR/GlobalObject.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/ModuleSummaryIndex.h"
30 #include "llvm/IRReader/IRReader.h"
31 #include "llvm/Linker/IRMover.h"
32 #include "llvm/ProfileData/PGOCtxProfReader.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <string>
50 #include <system_error>
51 #include <tuple>
52 #include <utility>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "function-import"
57 
58 STATISTIC(NumImportedFunctionsThinLink,
59           "Number of functions thin link decided to import");
60 STATISTIC(NumImportedHotFunctionsThinLink,
61           "Number of hot functions thin link decided to import");
62 STATISTIC(NumImportedCriticalFunctionsThinLink,
63           "Number of critical functions thin link decided to import");
64 STATISTIC(NumImportedGlobalVarsThinLink,
65           "Number of global variables thin link decided to import");
66 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
67 STATISTIC(NumImportedGlobalVars,
68           "Number of global variables imported in backend");
69 STATISTIC(NumImportedModules, "Number of modules imported from");
70 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
71 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
72 
73 /// Limit on instruction count of imported functions.
74 static cl::opt<unsigned> ImportInstrLimit(
75     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
76     cl::desc("Only import functions with less than N instructions"));
77 
78 static cl::opt<int> ImportCutoff(
79     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
80     cl::desc("Only import first N functions if N>=0 (default -1)"));
81 
82 static cl::opt<bool>
83     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
84                    cl::desc("Import functions with noinline attribute"));
85 
86 static cl::opt<float>
87     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
88                       cl::Hidden, cl::value_desc("x"),
89                       cl::desc("As we import functions, multiply the "
90                                "`import-instr-limit` threshold by this factor "
91                                "before processing newly imported functions"));
92 
93 static cl::opt<float> ImportHotInstrFactor(
94     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
95     cl::value_desc("x"),
96     cl::desc("As we import functions called from hot callsite, multiply the "
97              "`import-instr-limit` threshold by this factor "
98              "before processing newly imported functions"));
99 
100 static cl::opt<float> ImportHotMultiplier(
101     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
102     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
103 
104 static cl::opt<float> ImportCriticalMultiplier(
105     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
106     cl::value_desc("x"),
107     cl::desc(
108         "Multiply the `import-instr-limit` threshold for critical callsites"));
109 
110 // FIXME: This multiplier was not really tuned up.
111 static cl::opt<float> ImportColdMultiplier(
112     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
113     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
114 
115 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
116                                   cl::desc("Print imported functions"));
117 
118 static cl::opt<bool> PrintImportFailures(
119     "print-import-failures", cl::init(false), cl::Hidden,
120     cl::desc("Print information for functions rejected for importing"));
121 
122 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
123                                  cl::desc("Compute dead symbols"));
124 
125 static cl::opt<bool> EnableImportMetadata(
126     "enable-import-metadata", cl::init(false), cl::Hidden,
127     cl::desc("Enable import metadata like 'thinlto_src_module' and "
128              "'thinlto_src_file'"));
129 
130 /// Summary file to use for function importing when using -function-import from
131 /// the command line.
132 static cl::opt<std::string>
133     SummaryFile("summary-file",
134                 cl::desc("The summary file to use for function importing."));
135 
136 /// Used when testing importing from distributed indexes via opt
137 // -function-import.
138 static cl::opt<bool>
139     ImportAllIndex("import-all-index",
140                    cl::desc("Import all external functions in index."));
141 
142 /// This is a test-only option.
143 /// If this option is enabled, the ThinLTO indexing step will import each
144 /// function declaration as a fallback. In a real build this may increase ram
145 /// usage of the indexing step unnecessarily.
146 /// TODO: Implement selective import (based on combined summary analysis) to
147 /// ensure the imported function has a use case in the postlink pipeline.
148 static cl::opt<bool> ImportDeclaration(
149     "import-declaration", cl::init(false), cl::Hidden,
150     cl::desc("If true, import function declaration as fallback if the function "
151              "definition is not imported."));
152 
153 /// Pass a workload description file - an example of workload would be the
154 /// functions executed to satisfy a RPC request. A workload is defined by a root
155 /// function and the list of functions that are (frequently) needed to satisfy
156 /// it. The module that defines the root will have all those functions imported.
157 /// The file contains a JSON dictionary. The keys are root functions, the values
158 /// are lists of functions to import in the module defining the root. It is
159 /// assumed -funique-internal-linkage-names was used, thus ensuring function
160 /// names are unique even for local linkage ones.
161 static cl::opt<std::string> WorkloadDefinitions(
162     "thinlto-workload-def",
163     cl::desc("Pass a workload definition. This is a file containing a JSON "
164              "dictionary. The keys are root functions, the values are lists of "
165              "functions to import in the module defining the root. It is "
166              "assumed -funique-internal-linkage-names was used, to ensure "
167              "local linkage functions have unique names. For example: \n"
168              "{\n"
169              "  \"rootFunction_1\": [\"function_to_import_1\", "
170              "\"function_to_import_2\"], \n"
171              "  \"rootFunction_2\": [\"function_to_import_3\", "
172              "\"function_to_import_4\"] \n"
173              "}"),
174     cl::Hidden);
175 
176 extern cl::opt<std::string> UseCtxProfile;
177 
178 namespace llvm {
179 extern cl::opt<bool> EnableMemProfContextDisambiguation;
180 }
181 
182 // Load lazily a module from \p FileName in \p Context.
183 static std::unique_ptr<Module> loadFile(const std::string &FileName,
184                                         LLVMContext &Context) {
185   SMDiagnostic Err;
186   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
187   // Metadata isn't loaded until functions are imported, to minimize
188   // the memory overhead.
189   std::unique_ptr<Module> Result =
190       getLazyIRFileModule(FileName, Err, Context,
191                           /* ShouldLazyLoadMetadata = */ true);
192   if (!Result) {
193     Err.print("function-import", errs());
194     report_fatal_error("Abort");
195   }
196 
197   return Result;
198 }
199 
200 static bool shouldSkipLocalInAnotherModule(const GlobalValueSummary *RefSummary,
201                                            size_t NumDefs,
202                                            StringRef ImporterModule) {
203   // We can import a local when there is one definition.
204   if (NumDefs == 1)
205     return false;
206   // In other cases, make sure we import the copy in the caller's module if the
207   // referenced value has local linkage. The only time a local variable can
208   // share an entry in the index is if there is a local with the same name in
209   // another module that had the same source file name (in a different
210   // directory), where each was compiled in their own directory so there was not
211   // distinguishing path.
212   return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
213          RefSummary->modulePath() != ImporterModule;
214 }
215 
216 /// Given a list of possible callee implementation for a call site, qualify the
217 /// legality of importing each. The return is a range of pairs. Each pair
218 /// corresponds to a candidate. The first value is the ImportFailureReason for
219 /// that candidate, the second is the candidate.
220 static auto qualifyCalleeCandidates(
221     const ModuleSummaryIndex &Index,
222     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
223     StringRef CallerModulePath) {
224   return llvm::map_range(
225       CalleeSummaryList,
226       [&Index, CalleeSummaryList,
227        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
228           -> std::pair<FunctionImporter::ImportFailureReason,
229                        const GlobalValueSummary *> {
230         auto *GVSummary = SummaryPtr.get();
231         if (!Index.isGlobalValueLive(GVSummary))
232           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
233 
234         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
235           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
236                   GVSummary};
237 
238         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
239 
240         // Ignore any callees that aren't actually functions. This could happen
241         // in the case of GUID hash collisions. It could also happen in theory
242         // for SamplePGO profiles collected on old versions of the code after
243         // renaming, since we synthesize edges to any inlined callees appearing
244         // in the profile.
245         if (!Summary)
246           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
247 
248         // If this is a local function, make sure we import the copy in the
249         // caller's module. The only time a local function can share an entry in
250         // the index is if there is a local with the same name in another module
251         // that had the same source file name (in a different directory), where
252         // each was compiled in their own directory so there was not
253         // distinguishing path.
254         // If the local function is from another module, it must be a reference
255         // due to indirect call profile data since a function pointer can point
256         // to a local in another module. Do the import from another module if
257         // there is only one entry in the list or when all files in the program
258         // are compiled with full path - in both cases the local function has
259         // unique PGO name and GUID.
260         if (shouldSkipLocalInAnotherModule(Summary, CalleeSummaryList.size(),
261                                            CallerModulePath))
262           return {
263               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
264               GVSummary};
265 
266         // Skip if it isn't legal to import (e.g. may reference unpromotable
267         // locals).
268         if (Summary->notEligibleToImport())
269           return {FunctionImporter::ImportFailureReason::NotEligible,
270                   GVSummary};
271 
272         return {FunctionImporter::ImportFailureReason::None, GVSummary};
273       });
274 }
275 
276 /// Given a list of possible callee implementation for a call site, select one
277 /// that fits the \p Threshold for function definition import. If none are
278 /// found, the Reason will give the last reason for the failure (last, in the
279 /// order of CalleeSummaryList entries). While looking for a callee definition,
280 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
281 /// candidate; other modules may want to know the function summary or
282 /// declaration even if a definition is not needed.
283 ///
284 /// FIXME: select "best" instead of first that fits. But what is "best"?
285 /// - The smallest: more likely to be inlined.
286 /// - The one with the least outgoing edges (already well optimized).
287 /// - One from a module already being imported from in order to reduce the
288 ///   number of source modules parsed/linked.
289 /// - One that has PGO data attached.
290 /// - [insert you fancy metric here]
291 static const GlobalValueSummary *
292 selectCallee(const ModuleSummaryIndex &Index,
293              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
294              unsigned Threshold, StringRef CallerModulePath,
295              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
296              FunctionImporter::ImportFailureReason &Reason) {
297   // Records the last summary with reason noinline or too-large.
298   TooLargeOrNoInlineSummary = nullptr;
299   auto QualifiedCandidates =
300       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
301   for (auto QualifiedValue : QualifiedCandidates) {
302     Reason = QualifiedValue.first;
303     // Skip a summary if its import is not (proved to be) legal.
304     if (Reason != FunctionImporter::ImportFailureReason::None)
305       continue;
306     auto *Summary =
307         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
308 
309     // Don't bother importing the definition if the chance of inlining it is
310     // not high enough (except under `--force-import-all`).
311     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
312         !ForceImportAll) {
313       TooLargeOrNoInlineSummary = Summary;
314       Reason = FunctionImporter::ImportFailureReason::TooLarge;
315       continue;
316     }
317 
318     // Don't bother importing the definition if we can't inline it anyway.
319     if (Summary->fflags().NoInline && !ForceImportAll) {
320       TooLargeOrNoInlineSummary = Summary;
321       Reason = FunctionImporter::ImportFailureReason::NoInline;
322       continue;
323     }
324 
325     return Summary;
326   }
327   return nullptr;
328 }
329 
330 namespace {
331 
332 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
333 
334 } // anonymous namespace
335 
336 FunctionImporter::ImportMapTy::AddDefinitionStatus
337 FunctionImporter::ImportMapTy::addDefinition(StringRef FromModule,
338                                              GlobalValue::GUID GUID) {
339   auto [Def, Decl] = IDs.createImportIDs(FromModule, GUID);
340   if (!Imports.insert(Def).second)
341     // Already there.
342     return AddDefinitionStatus::NoChange;
343 
344   // Remove Decl in case it's there.  Note that a definition takes precedence
345   // over a declaration for a given GUID.
346   return Imports.erase(Decl) ? AddDefinitionStatus::ChangedToDefinition
347                              : AddDefinitionStatus::Inserted;
348 }
349 
350 void FunctionImporter::ImportMapTy::maybeAddDeclaration(
351     StringRef FromModule, GlobalValue::GUID GUID) {
352   auto [Def, Decl] = IDs.createImportIDs(FromModule, GUID);
353   // Insert Decl only if Def is not present.  Note that a definition takes
354   // precedence over a declaration for a given GUID.
355   if (!Imports.contains(Def))
356     Imports.insert(Decl);
357 }
358 
359 SmallVector<StringRef, 0>
360 FunctionImporter::ImportMapTy::getSourceModules() const {
361   SetVector<StringRef> ModuleSet;
362   for (const auto &[SrcMod, GUID, ImportType] : *this)
363     ModuleSet.insert(SrcMod);
364   SmallVector<StringRef, 0> Modules = ModuleSet.takeVector();
365   llvm::sort(Modules);
366   return Modules;
367 }
368 
369 std::optional<GlobalValueSummary::ImportKind>
370 FunctionImporter::ImportMapTy::getImportType(StringRef FromModule,
371                                              GlobalValue::GUID GUID) const {
372   if (auto IDPair = IDs.getImportIDs(FromModule, GUID)) {
373     auto [Def, Decl] = *IDPair;
374     if (Imports.contains(Def))
375       return GlobalValueSummary::Definition;
376     if (Imports.contains(Decl))
377       return GlobalValueSummary::Declaration;
378   }
379   return std::nullopt;
380 }
381 
382 /// Import globals referenced by a function or other globals that are being
383 /// imported, if importing such global is possible.
384 class GlobalsImporter final {
385   const ModuleSummaryIndex &Index;
386   const GVSummaryMapTy &DefinedGVSummaries;
387   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
388       IsPrevailing;
389   FunctionImporter::ImportMapTy &ImportList;
390   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
391 
392   bool shouldImportGlobal(const ValueInfo &VI) {
393     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
394     if (GVS == DefinedGVSummaries.end())
395       return true;
396     // We should not skip import if the module contains a non-prevailing
397     // definition with interposable linkage type. This is required for
398     // correctness in the situation where there is a prevailing def available
399     // for import and marked read-only. In this case, the non-prevailing def
400     // will be converted to a declaration, while the prevailing one becomes
401     // internal, thus no definitions will be available for linking. In order to
402     // prevent undefined symbol link error, the prevailing definition must be
403     // imported.
404     // FIXME: Consider adding a check that the suitable prevailing definition
405     // exists and marked read-only.
406     if (VI.getSummaryList().size() > 1 &&
407         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
408         !IsPrevailing(VI.getGUID(), GVS->second))
409       return true;
410 
411     return false;
412   }
413 
414   void
415   onImportingSummaryImpl(const GlobalValueSummary &Summary,
416                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
417     for (const auto &VI : Summary.refs()) {
418       if (!shouldImportGlobal(VI)) {
419         LLVM_DEBUG(
420             dbgs() << "Ref ignored! Target already in destination module.\n");
421         continue;
422       }
423 
424       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
425 
426       for (const auto &RefSummary : VI.getSummaryList()) {
427         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
428         // Functions could be referenced by global vars - e.g. a vtable; but we
429         // don't currently imagine a reason those would be imported here, rather
430         // than as part of the logic deciding which functions to import (i.e.
431         // based on profile information). Should we decide to handle them here,
432         // we can refactor accordingly at that time.
433         bool CanImportDecl = false;
434         if (!GVS ||
435             shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
436                                            Summary.modulePath()) ||
437             !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true,
438                                       CanImportDecl)) {
439           if (ImportDeclaration && CanImportDecl)
440             ImportList.maybeAddDeclaration(RefSummary->modulePath(),
441                                            VI.getGUID());
442 
443           continue;
444         }
445 
446         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
447         // Otherwise, definition should take precedence over declaration.
448         if (ImportList.addDefinition(RefSummary->modulePath(), VI.getGUID()) !=
449             FunctionImporter::ImportMapTy::AddDefinitionStatus::Inserted)
450           break;
451 
452         // Only update stat and exports if we haven't already imported this
453         // variable.
454         NumImportedGlobalVarsThinLink++;
455         // Any references made by this variable will be marked exported
456         // later, in ComputeCrossModuleImport, after import decisions are
457         // complete, which is more efficient than adding them here.
458         if (ExportLists)
459           (*ExportLists)[RefSummary->modulePath()].insert(VI);
460 
461         // If variable is not writeonly we attempt to recursively analyze
462         // its references in order to import referenced constants.
463         if (!Index.isWriteOnly(GVS))
464           Worklist.emplace_back(GVS);
465         break;
466       }
467     }
468   }
469 
470 public:
471   GlobalsImporter(
472       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
473       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
474           IsPrevailing,
475       FunctionImporter::ImportMapTy &ImportList,
476       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
477       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
478         IsPrevailing(IsPrevailing), ImportList(ImportList),
479         ExportLists(ExportLists) {}
480 
481   void onImportingSummary(const GlobalValueSummary &Summary) {
482     SmallVector<const GlobalVarSummary *, 128> Worklist;
483     onImportingSummaryImpl(Summary, Worklist);
484     while (!Worklist.empty())
485       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
486   }
487 };
488 
489 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
490 
491 /// Determine the list of imports and exports for each module.
492 class ModuleImportsManager {
493 protected:
494   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
495       IsPrevailing;
496   const ModuleSummaryIndex &Index;
497   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
498 
499   ModuleImportsManager(
500       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
501           IsPrevailing,
502       const ModuleSummaryIndex &Index,
503       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
504       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
505 
506 public:
507   virtual ~ModuleImportsManager() = default;
508 
509   /// Given the list of globals defined in a module, compute the list of imports
510   /// as well as the list of "exports", i.e. the list of symbols referenced from
511   /// another module (that may require promotion).
512   virtual void
513   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
514                          StringRef ModName,
515                          FunctionImporter::ImportMapTy &ImportList);
516 
517   static std::unique_ptr<ModuleImportsManager>
518   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
519              IsPrevailing,
520          const ModuleSummaryIndex &Index,
521          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
522              nullptr);
523 };
524 
525 /// A ModuleImportsManager that operates based on a workload definition (see
526 /// -thinlto-workload-def). For modules that do not define workload roots, it
527 /// applies the base ModuleImportsManager import policy.
528 class WorkloadImportsManager : public ModuleImportsManager {
529   // Keep a module name -> value infos to import association. We use it to
530   // determine if a module's import list should be done by the base
531   // ModuleImportsManager or by us.
532   StringMap<DenseSet<ValueInfo>> Workloads;
533 
534   void
535   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
536                          StringRef ModName,
537                          FunctionImporter::ImportMapTy &ImportList) override {
538     auto SetIter = Workloads.find(ModName);
539     if (SetIter == Workloads.end()) {
540       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
541                         << " does not contain the root of any context.\n");
542       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
543                                                           ModName, ImportList);
544     }
545     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
546                       << " contains the root(s) of context(s).\n");
547 
548     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
549                         ExportLists);
550     auto &ValueInfos = SetIter->second;
551     SmallVector<EdgeInfo, 128> GlobWorklist;
552     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
553       auto It = DefinedGVSummaries.find(VI.getGUID());
554       if (It != DefinedGVSummaries.end() &&
555           IsPrevailing(VI.getGUID(), It->second)) {
556         LLVM_DEBUG(
557             dbgs() << "[Workload] " << VI.name()
558                    << " has the prevailing variant already in the module "
559                    << ModName << ". No need to import\n");
560         continue;
561       }
562       auto Candidates =
563           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
564 
565       const GlobalValueSummary *GVS = nullptr;
566       auto PotentialCandidates = llvm::map_range(
567           llvm::make_filter_range(
568               Candidates,
569               [&](const auto &Candidate) {
570                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
571                                   << " from " << Candidate.second->modulePath()
572                                   << " ImportFailureReason: "
573                                   << getFailureName(Candidate.first) << "\n");
574                 return Candidate.first ==
575                         FunctionImporter::ImportFailureReason::None;
576               }),
577           [](const auto &Candidate) { return Candidate.second; });
578       if (PotentialCandidates.empty()) {
579         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
580                           << " because can't find eligible Callee. Guid is: "
581                           << Function::getGUID(VI.name()) << "\n");
582         continue;
583       }
584       /// We will prefer importing the prevailing candidate, if not, we'll
585       /// still pick the first available candidate. The reason we want to make
586       /// sure we do import the prevailing candidate is because the goal of
587       /// workload-awareness is to enable optimizations specializing the call
588       /// graph of that workload. Suppose a function is already defined in the
589       /// module, but it's not the prevailing variant. Suppose also we do not
590       /// inline it (in fact, if it were interposable, we can't inline it),
591       /// but we could specialize it to the workload in other ways. However,
592       /// the linker would drop it in the favor of the prevailing copy.
593       /// Instead, by importing the prevailing variant (assuming also the use
594       /// of `-avail-extern-to-local`), we keep the specialization. We could
595       /// alteranatively make the non-prevailing variant local, but the
596       /// prevailing one is also the one for which we would have previously
597       /// collected profiles, making it preferrable.
598       auto PrevailingCandidates = llvm::make_filter_range(
599           PotentialCandidates, [&](const auto *Candidate) {
600             return IsPrevailing(VI.getGUID(), Candidate);
601           });
602       if (PrevailingCandidates.empty()) {
603         GVS = *PotentialCandidates.begin();
604         if (!llvm::hasSingleElement(PotentialCandidates) &&
605             GlobalValue::isLocalLinkage(GVS->linkage()))
606           LLVM_DEBUG(
607               dbgs()
608               << "[Workload] Found multiple non-prevailing candidates for "
609               << VI.name()
610               << ". This is unexpected. Are module paths passed to the "
611                  "compiler unique for the modules passed to the linker?");
612         // We could in theory have multiple (interposable) copies of a symbol
613         // when there is no prevailing candidate, if say the prevailing copy was
614         // in a native object being linked in. However, we should in theory be
615         // marking all of these non-prevailing IR copies dead in that case, in
616         // which case they won't be candidates.
617         assert(GVS->isLive());
618       } else {
619         assert(llvm::hasSingleElement(PrevailingCandidates));
620         GVS = *PrevailingCandidates.begin();
621       }
622 
623       auto ExportingModule = GVS->modulePath();
624       // We checked that for the prevailing case, but if we happen to have for
625       // example an internal that's defined in this module, it'd have no
626       // PrevailingCandidates.
627       if (ExportingModule == ModName) {
628         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
629                           << " because its defining module is the same as the "
630                              "current module\n");
631         continue;
632       }
633       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
634                         << ExportingModule << " : "
635                         << Function::getGUID(VI.name()) << "\n");
636       ImportList.addDefinition(ExportingModule, VI.getGUID());
637       GVI.onImportingSummary(*GVS);
638       if (ExportLists)
639         (*ExportLists)[ExportingModule].insert(VI);
640     }
641     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
642   }
643 
644   void loadFromJson() {
645     // Since the workload def uses names, we need a quick lookup
646     // name->ValueInfo.
647     StringMap<ValueInfo> NameToValueInfo;
648     StringSet<> AmbiguousNames;
649     for (auto &I : Index) {
650       ValueInfo VI = Index.getValueInfo(I);
651       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
652         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
653     }
654     auto DbgReportIfAmbiguous = [&](StringRef Name) {
655       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
656         dbgs() << "[Workload] Function name " << Name
657                << " present in the workload definition is ambiguous. Consider "
658                   "compiling with -funique-internal-linkage-names.";
659       });
660     };
661     std::error_code EC;
662     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
663     if (std::error_code EC = BufferOrErr.getError()) {
664       report_fatal_error("Failed to open context file");
665       return;
666     }
667     auto Buffer = std::move(BufferOrErr.get());
668     std::map<std::string, std::vector<std::string>> WorkloadDefs;
669     json::Path::Root NullRoot;
670     // The JSON is supposed to contain a dictionary matching the type of
671     // WorkloadDefs. For example:
672     // {
673     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
674     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
675     // }
676     auto Parsed = json::parse(Buffer->getBuffer());
677     if (!Parsed)
678       report_fatal_error(Parsed.takeError());
679     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
680       report_fatal_error("Invalid thinlto contextual profile format.");
681     for (const auto &Workload : WorkloadDefs) {
682       const auto &Root = Workload.first;
683       DbgReportIfAmbiguous(Root);
684       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
685       const auto &AllCallees = Workload.second;
686       auto RootIt = NameToValueInfo.find(Root);
687       if (RootIt == NameToValueInfo.end()) {
688         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
689                           << " not found in this linkage unit.\n");
690         continue;
691       }
692       auto RootVI = RootIt->second;
693       if (RootVI.getSummaryList().size() != 1) {
694         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
695                           << " should have exactly one summary, but has "
696                           << RootVI.getSummaryList().size() << ". Skipping.\n");
697         continue;
698       }
699       StringRef RootDefiningModule =
700           RootVI.getSummaryList().front()->modulePath();
701       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
702                         << " is : " << RootDefiningModule << "\n");
703       auto &Set = Workloads[RootDefiningModule];
704       for (const auto &Callee : AllCallees) {
705         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
706         DbgReportIfAmbiguous(Callee);
707         auto ElemIt = NameToValueInfo.find(Callee);
708         if (ElemIt == NameToValueInfo.end()) {
709           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
710           continue;
711         }
712         Set.insert(ElemIt->second);
713       }
714     }
715   }
716 
717   void loadFromCtxProf() {
718     std::error_code EC;
719     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(UseCtxProfile);
720     if (std::error_code EC = BufferOrErr.getError()) {
721       report_fatal_error("Failed to open contextual profile file");
722       return;
723     }
724     auto Buffer = std::move(BufferOrErr.get());
725 
726     PGOCtxProfileReader Reader(Buffer->getBuffer());
727     auto Ctx = Reader.loadContexts();
728     if (!Ctx) {
729       report_fatal_error("Failed to parse contextual profiles");
730       return;
731     }
732     const auto &CtxMap = *Ctx;
733     SetVector<GlobalValue::GUID> ContainedGUIDs;
734     for (const auto &[RootGuid, Root] : CtxMap) {
735       // Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for
736       // subsequent roots, but clear its contents.
737       ContainedGUIDs.clear();
738 
739       auto RootVI = Index.getValueInfo(RootGuid);
740       if (!RootVI) {
741         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
742                           << " not found in this linkage unit.\n");
743         continue;
744       }
745       if (RootVI.getSummaryList().size() != 1) {
746         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
747                           << " should have exactly one summary, but has "
748                           << RootVI.getSummaryList().size() << ". Skipping.\n");
749         continue;
750       }
751       StringRef RootDefiningModule =
752           RootVI.getSummaryList().front()->modulePath();
753       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid
754                         << " is : " << RootDefiningModule << "\n");
755       auto &Set = Workloads[RootDefiningModule];
756       Root.getContainedGuids(ContainedGUIDs);
757       for (auto Guid : ContainedGUIDs)
758         if (auto VI = Index.getValueInfo(Guid))
759           Set.insert(VI);
760     }
761   }
762 
763 public:
764   WorkloadImportsManager(
765       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
766           IsPrevailing,
767       const ModuleSummaryIndex &Index,
768       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
769       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
770     if (UseCtxProfile.empty() == WorkloadDefinitions.empty()) {
771       report_fatal_error(
772           "Pass only one of: -thinlto-pgo-ctx-prof or -thinlto-workload-def");
773       return;
774     }
775     if (!UseCtxProfile.empty())
776       loadFromCtxProf();
777     else
778       loadFromJson();
779     LLVM_DEBUG({
780       for (const auto &[Root, Set] : Workloads) {
781         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
782                << " distinct callees.\n";
783         for (const auto &VI : Set) {
784           dbgs() << "[Workload] Root: " << Root
785                  << " Would include: " << VI.getGUID() << "\n";
786         }
787       }
788     });
789   }
790 };
791 
792 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
793     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
794         IsPrevailing,
795     const ModuleSummaryIndex &Index,
796     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
797   if (WorkloadDefinitions.empty() && UseCtxProfile.empty()) {
798     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
799     return std::unique_ptr<ModuleImportsManager>(
800         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
801   }
802   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
803   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
804                                                   ExportLists);
805 }
806 
807 static const char *
808 getFailureName(FunctionImporter::ImportFailureReason Reason) {
809   switch (Reason) {
810   case FunctionImporter::ImportFailureReason::None:
811     return "None";
812   case FunctionImporter::ImportFailureReason::GlobalVar:
813     return "GlobalVar";
814   case FunctionImporter::ImportFailureReason::NotLive:
815     return "NotLive";
816   case FunctionImporter::ImportFailureReason::TooLarge:
817     return "TooLarge";
818   case FunctionImporter::ImportFailureReason::InterposableLinkage:
819     return "InterposableLinkage";
820   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
821     return "LocalLinkageNotInModule";
822   case FunctionImporter::ImportFailureReason::NotEligible:
823     return "NotEligible";
824   case FunctionImporter::ImportFailureReason::NoInline:
825     return "NoInline";
826   }
827   llvm_unreachable("invalid reason");
828 }
829 
830 /// Compute the list of functions to import for a given caller. Mark these
831 /// imported functions and the symbols they reference in their source module as
832 /// exported from their source module.
833 static void computeImportForFunction(
834     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
835     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
836     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
837         isPrevailing,
838     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
839     FunctionImporter::ImportMapTy &ImportList,
840     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
841     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
842   GVImporter.onImportingSummary(Summary);
843   static int ImportCount = 0;
844   for (const auto &Edge : Summary.calls()) {
845     ValueInfo VI = Edge.first;
846     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
847                       << "\n");
848 
849     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
850       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
851                         << " reached.\n");
852       continue;
853     }
854 
855     if (DefinedGVSummaries.count(VI.getGUID())) {
856       // FIXME: Consider not skipping import if the module contains
857       // a non-prevailing def with interposable linkage. The prevailing copy
858       // can safely be imported (see shouldImportGlobal()).
859       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
860       continue;
861     }
862 
863     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
864       if (Hotness == CalleeInfo::HotnessType::Hot)
865         return ImportHotMultiplier;
866       if (Hotness == CalleeInfo::HotnessType::Cold)
867         return ImportColdMultiplier;
868       if (Hotness == CalleeInfo::HotnessType::Critical)
869         return ImportCriticalMultiplier;
870       return 1.0;
871     };
872 
873     const auto NewThreshold =
874         Threshold * GetBonusMultiplier(Edge.second.getHotness());
875 
876     auto IT = ImportThresholds.insert(std::make_pair(
877         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
878     bool PreviouslyVisited = !IT.second;
879     auto &ProcessedThreshold = std::get<0>(IT.first->second);
880     auto &CalleeSummary = std::get<1>(IT.first->second);
881     auto &FailureInfo = std::get<2>(IT.first->second);
882 
883     bool IsHotCallsite =
884         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
885     bool IsCriticalCallsite =
886         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
887 
888     const FunctionSummary *ResolvedCalleeSummary = nullptr;
889     if (CalleeSummary) {
890       assert(PreviouslyVisited);
891       // Since the traversal of the call graph is DFS, we can revisit a function
892       // a second time with a higher threshold. In this case, it is added back
893       // to the worklist with the new threshold (so that its own callee chains
894       // can be considered with the higher threshold).
895       if (NewThreshold <= ProcessedThreshold) {
896         LLVM_DEBUG(
897             dbgs() << "ignored! Target was already imported with Threshold "
898                    << ProcessedThreshold << "\n");
899         continue;
900       }
901       // Update with new larger threshold.
902       ProcessedThreshold = NewThreshold;
903       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
904     } else {
905       // If we already rejected importing a callee at the same or higher
906       // threshold, don't waste time calling selectCallee.
907       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
908         LLVM_DEBUG(
909             dbgs() << "ignored! Target was already rejected with Threshold "
910             << ProcessedThreshold << "\n");
911         if (PrintImportFailures) {
912           assert(FailureInfo &&
913                  "Expected FailureInfo for previously rejected candidate");
914           FailureInfo->Attempts++;
915         }
916         continue;
917       }
918 
919       FunctionImporter::ImportFailureReason Reason{};
920 
921       // `SummaryForDeclImport` is an summary eligible for declaration import.
922       const GlobalValueSummary *SummaryForDeclImport = nullptr;
923       CalleeSummary =
924           selectCallee(Index, VI.getSummaryList(), NewThreshold,
925                        Summary.modulePath(), SummaryForDeclImport, Reason);
926       if (!CalleeSummary) {
927         // There isn't a callee for definition import but one for declaration
928         // import.
929         if (ImportDeclaration && SummaryForDeclImport) {
930           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
931 
932           // Note `ExportLists` only keeps track of exports due to imported
933           // definitions.
934           ImportList.maybeAddDeclaration(DeclSourceModule, VI.getGUID());
935         }
936         // Update with new larger threshold if this was a retry (otherwise
937         // we would have already inserted with NewThreshold above). Also
938         // update failure info if requested.
939         if (PreviouslyVisited) {
940           ProcessedThreshold = NewThreshold;
941           if (PrintImportFailures) {
942             assert(FailureInfo &&
943                    "Expected FailureInfo for previously rejected candidate");
944             FailureInfo->Reason = Reason;
945             FailureInfo->Attempts++;
946             FailureInfo->MaxHotness =
947                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
948           }
949         } else if (PrintImportFailures) {
950           assert(!FailureInfo &&
951                  "Expected no FailureInfo for newly rejected candidate");
952           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
953               VI, Edge.second.getHotness(), Reason, 1);
954         }
955         if (ForceImportAll) {
956           std::string Msg = std::string("Failed to import function ") +
957                             VI.name().str() + " due to " +
958                             getFailureName(Reason);
959           auto Error = make_error<StringError>(
960               Msg, make_error_code(errc::not_supported));
961           logAllUnhandledErrors(std::move(Error), errs(),
962                                 "Error importing module: ");
963           break;
964         } else {
965           LLVM_DEBUG(dbgs()
966                      << "ignored! No qualifying callee with summary found.\n");
967           continue;
968         }
969       }
970 
971       // "Resolve" the summary
972       CalleeSummary = CalleeSummary->getBaseObject();
973       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
974 
975       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
976               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
977              "selectCallee() didn't honor the threshold");
978 
979       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
980 
981       // Try emplace the definition entry, and update stats based on insertion
982       // status.
983       if (ImportList.addDefinition(ExportModulePath, VI.getGUID()) !=
984           FunctionImporter::ImportMapTy::AddDefinitionStatus::NoChange) {
985         NumImportedFunctionsThinLink++;
986         if (IsHotCallsite)
987           NumImportedHotFunctionsThinLink++;
988         if (IsCriticalCallsite)
989           NumImportedCriticalFunctionsThinLink++;
990       }
991 
992       // Any calls/references made by this function will be marked exported
993       // later, in ComputeCrossModuleImport, after import decisions are
994       // complete, which is more efficient than adding them here.
995       if (ExportLists)
996         (*ExportLists)[ExportModulePath].insert(VI);
997     }
998 
999     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
1000       // Adjust the threshold for next level of imported functions.
1001       // The threshold is different for hot callsites because we can then
1002       // inline chains of hot calls.
1003       if (IsHotCallsite)
1004         return Threshold * ImportHotInstrFactor;
1005       return Threshold * ImportInstrFactor;
1006     };
1007 
1008     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
1009 
1010     ImportCount++;
1011 
1012     // Insert the newly imported function to the worklist.
1013     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
1014   }
1015 }
1016 
1017 void ModuleImportsManager::computeImportForModule(
1018     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
1019     FunctionImporter::ImportMapTy &ImportList) {
1020   // Worklist contains the list of function imported in this module, for which
1021   // we will analyse the callees and may import further down the callgraph.
1022   SmallVector<EdgeInfo, 128> Worklist;
1023   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
1024                       ExportLists);
1025   FunctionImporter::ImportThresholdsTy ImportThresholds;
1026 
1027   // Populate the worklist with the import for the functions in the current
1028   // module
1029   for (const auto &GVSummary : DefinedGVSummaries) {
1030 #ifndef NDEBUG
1031     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
1032     // so this map look up (and possibly others) can be avoided.
1033     auto VI = Index.getValueInfo(GVSummary.first);
1034 #endif
1035     if (!Index.isGlobalValueLive(GVSummary.second)) {
1036       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
1037       continue;
1038     }
1039     auto *FuncSummary =
1040         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
1041     if (!FuncSummary)
1042       // Skip import for global variables
1043       continue;
1044     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
1045     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
1046                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
1047                              ImportList, ExportLists, ImportThresholds);
1048   }
1049 
1050   // Process the newly imported functions and add callees to the worklist.
1051   while (!Worklist.empty()) {
1052     auto GVInfo = Worklist.pop_back_val();
1053     auto *Summary = std::get<0>(GVInfo);
1054     auto Threshold = std::get<1>(GVInfo);
1055 
1056     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
1057       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
1058                                IsPrevailing, Worklist, GVI, ImportList,
1059                                ExportLists, ImportThresholds);
1060   }
1061 
1062   // Print stats about functions considered but rejected for importing
1063   // when requested.
1064   if (PrintImportFailures) {
1065     dbgs() << "Missed imports into module " << ModName << "\n";
1066     for (auto &I : ImportThresholds) {
1067       auto &ProcessedThreshold = std::get<0>(I.second);
1068       auto &CalleeSummary = std::get<1>(I.second);
1069       auto &FailureInfo = std::get<2>(I.second);
1070       if (CalleeSummary)
1071         continue; // We are going to import.
1072       assert(FailureInfo);
1073       FunctionSummary *FS = nullptr;
1074       if (!FailureInfo->VI.getSummaryList().empty())
1075         FS = dyn_cast<FunctionSummary>(
1076             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
1077       dbgs() << FailureInfo->VI
1078              << ": Reason = " << getFailureName(FailureInfo->Reason)
1079              << ", Threshold = " << ProcessedThreshold
1080              << ", Size = " << (FS ? (int)FS->instCount() : -1)
1081              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
1082              << ", Attempts = " << FailureInfo->Attempts << "\n";
1083     }
1084   }
1085 }
1086 
1087 #ifndef NDEBUG
1088 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
1089   auto SL = VI.getSummaryList();
1090   return SL.empty()
1091              ? false
1092              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
1093 }
1094 
1095 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
1096                                GlobalValue::GUID G) {
1097   if (const auto &VI = Index.getValueInfo(G))
1098     return isGlobalVarSummary(Index, VI);
1099   return false;
1100 }
1101 
1102 // Return the number of global variable summaries in ExportSet.
1103 static unsigned
1104 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1105                       FunctionImporter::ExportSetTy &ExportSet) {
1106   unsigned NumGVS = 0;
1107   for (auto &VI : ExportSet)
1108     if (isGlobalVarSummary(Index, VI.getGUID()))
1109       ++NumGVS;
1110   return NumGVS;
1111 }
1112 
1113 struct ImportStatistics {
1114   unsigned NumGVS = 0;
1115   unsigned DefinedFS = 0;
1116   unsigned Count = 0;
1117 };
1118 
1119 // Compute import statistics for each source module in ImportList.
1120 static DenseMap<StringRef, ImportStatistics>
1121 collectImportStatistics(const ModuleSummaryIndex &Index,
1122                         const FunctionImporter::ImportMapTy &ImportList) {
1123   DenseMap<StringRef, ImportStatistics> Histogram;
1124 
1125   for (const auto &[FromModule, GUID, Type] : ImportList) {
1126     ImportStatistics &Entry = Histogram[FromModule];
1127     ++Entry.Count;
1128     if (isGlobalVarSummary(Index, GUID))
1129       ++Entry.NumGVS;
1130     else if (Type == GlobalValueSummary::Definition)
1131       ++Entry.DefinedFS;
1132   }
1133   return Histogram;
1134 }
1135 #endif
1136 
1137 #ifndef NDEBUG
1138 static bool checkVariableImport(
1139     const ModuleSummaryIndex &Index,
1140     FunctionImporter::ImportListsTy &ImportLists,
1141     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1142   DenseSet<GlobalValue::GUID> FlattenedImports;
1143 
1144   for (const auto &ImportPerModule : ImportLists)
1145     for (const auto &[FromModule, GUID, ImportType] : ImportPerModule.second)
1146       FlattenedImports.insert(GUID);
1147 
1148   // Checks that all GUIDs of read/writeonly vars we see in export lists
1149   // are also in the import lists. Otherwise we my face linker undefs,
1150   // because readonly and writeonly vars are internalized in their
1151   // source modules. The exception would be if it has a linkage type indicating
1152   // that there may have been a copy existing in the importing module (e.g.
1153   // linkonce_odr). In that case we cannot accurately do this checking.
1154   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1155                                                   const ValueInfo &VI) {
1156     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1157         Index.findSummaryInModule(VI, ModulePath));
1158     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1159            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1160              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1161              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1162   };
1163 
1164   for (auto &ExportPerModule : ExportLists)
1165     for (auto &VI : ExportPerModule.second)
1166       if (!FlattenedImports.count(VI.getGUID()) &&
1167           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1168         return false;
1169 
1170   return true;
1171 }
1172 #endif
1173 
1174 /// Compute all the import and export for every module using the Index.
1175 void llvm::ComputeCrossModuleImport(
1176     const ModuleSummaryIndex &Index,
1177     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1178     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1179         isPrevailing,
1180     FunctionImporter::ImportListsTy &ImportLists,
1181     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1182   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1183   // For each module that has function defined, compute the import/export lists.
1184   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1185     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1186     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1187                       << DefinedGVSummaries.first << "'\n");
1188     MIS->computeImportForModule(DefinedGVSummaries.second,
1189                                 DefinedGVSummaries.first, ImportList);
1190   }
1191 
1192   // When computing imports we only added the variables and functions being
1193   // imported to the export list. We also need to mark any references and calls
1194   // they make as exported as well. We do this here, as it is more efficient
1195   // since we may import the same values multiple times into different modules
1196   // during the import computation.
1197   for (auto &ELI : ExportLists) {
1198     // `NewExports` tracks the VI that gets exported because the full definition
1199     // of its user/referencer gets exported.
1200     FunctionImporter::ExportSetTy NewExports;
1201     const auto &DefinedGVSummaries =
1202         ModuleToDefinedGVSummaries.lookup(ELI.first);
1203     for (auto &EI : ELI.second) {
1204       // Find the copy defined in the exporting module so that we can mark the
1205       // values it references in that specific definition as exported.
1206       // Below we will add all references and called values, without regard to
1207       // whether they are also defined in this module. We subsequently prune the
1208       // list to only include those defined in the exporting module, see comment
1209       // there as to why.
1210       auto DS = DefinedGVSummaries.find(EI.getGUID());
1211       // Anything marked exported during the import computation must have been
1212       // defined in the exporting module.
1213       assert(DS != DefinedGVSummaries.end());
1214       auto *S = DS->getSecond();
1215       S = S->getBaseObject();
1216       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1217         // Export referenced functions and variables. We don't export/promote
1218         // objects referenced by writeonly variable initializer, because
1219         // we convert such variables initializers to "zeroinitializer".
1220         // See processGlobalForThinLTO.
1221         if (!Index.isWriteOnly(GVS))
1222           for (const auto &VI : GVS->refs())
1223             NewExports.insert(VI);
1224       } else {
1225         auto *FS = cast<FunctionSummary>(S);
1226         for (const auto &Edge : FS->calls())
1227           NewExports.insert(Edge.first);
1228         for (const auto &Ref : FS->refs())
1229           NewExports.insert(Ref);
1230       }
1231     }
1232     // Prune list computed above to only include values defined in the
1233     // exporting module. We do this after the above insertion since we may hit
1234     // the same ref/call target multiple times in above loop, and it is more
1235     // efficient to avoid a set lookup each time.
1236     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1237       if (!DefinedGVSummaries.count(EI->getGUID()))
1238         NewExports.erase(EI++);
1239       else
1240         ++EI;
1241     }
1242     ELI.second.insert(NewExports.begin(), NewExports.end());
1243   }
1244 
1245   assert(checkVariableImport(Index, ImportLists, ExportLists));
1246 #ifndef NDEBUG
1247   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1248                     << " modules:\n");
1249   for (const auto &ModuleImports : ImportLists) {
1250     auto ModName = ModuleImports.first;
1251     auto &Exports = ExportLists[ModName];
1252     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1253     DenseMap<StringRef, ImportStatistics> Histogram =
1254         collectImportStatistics(Index, ModuleImports.second);
1255     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1256                       << Exports.size() - NumGVS << " functions and " << NumGVS
1257                       << " vars. Imports from " << Histogram.size()
1258                       << " modules.\n");
1259     for (const auto &[SrcModName, Stats] : Histogram) {
1260       LLVM_DEBUG(dbgs() << " - " << Stats.DefinedFS
1261                         << " function definitions and "
1262                         << Stats.Count - Stats.NumGVS - Stats.DefinedFS
1263                         << " function declarations imported from " << SrcModName
1264                         << "\n");
1265       LLVM_DEBUG(dbgs() << " - " << Stats.NumGVS
1266                         << " global vars imported from " << SrcModName << "\n");
1267     }
1268   }
1269 #endif
1270 }
1271 
1272 #ifndef NDEBUG
1273 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1274                                     StringRef ModulePath,
1275                                     FunctionImporter::ImportMapTy &ImportList) {
1276   DenseMap<StringRef, ImportStatistics> Histogram =
1277       collectImportStatistics(Index, ImportList);
1278   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1279                     << Histogram.size() << " modules.\n");
1280   for (const auto &[SrcModName, Stats] : Histogram) {
1281     LLVM_DEBUG(dbgs() << " - " << Stats.DefinedFS
1282                       << " function definitions and "
1283                       << Stats.Count - Stats.DefinedFS - Stats.NumGVS
1284                       << " function declarations imported from " << SrcModName
1285                       << "\n");
1286     LLVM_DEBUG(dbgs() << " - " << Stats.NumGVS << " vars imported from "
1287                       << SrcModName << "\n");
1288   }
1289 }
1290 #endif
1291 
1292 /// Compute all the imports for the given module using the Index.
1293 ///
1294 /// \p isPrevailing is a callback that will be called with a global value's GUID
1295 /// and summary and should return whether the module corresponding to the
1296 /// summary contains the linker-prevailing copy of that value.
1297 ///
1298 /// \p ImportList will be populated with a map that can be passed to
1299 /// FunctionImporter::importFunctions() above (see description there).
1300 static void ComputeCrossModuleImportForModuleForTest(
1301     StringRef ModulePath,
1302     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1303         isPrevailing,
1304     const ModuleSummaryIndex &Index,
1305     FunctionImporter::ImportMapTy &ImportList) {
1306   // Collect the list of functions this module defines.
1307   // GUID -> Summary
1308   GVSummaryMapTy FunctionSummaryMap;
1309   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1310 
1311   // Compute the import list for this module.
1312   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1313   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1314   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1315 
1316 #ifndef NDEBUG
1317   dumpImportListForModule(Index, ModulePath, ImportList);
1318 #endif
1319 }
1320 
1321 /// Mark all external summaries in \p Index for import into the given module.
1322 /// Used for testing the case of distributed builds using a distributed index.
1323 ///
1324 /// \p ImportList will be populated with a map that can be passed to
1325 /// FunctionImporter::importFunctions() above (see description there).
1326 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1327     StringRef ModulePath, const ModuleSummaryIndex &Index,
1328     FunctionImporter::ImportMapTy &ImportList) {
1329   for (const auto &GlobalList : Index) {
1330     // Ignore entries for undefined references.
1331     if (GlobalList.second.SummaryList.empty())
1332       continue;
1333 
1334     auto GUID = GlobalList.first;
1335     assert(GlobalList.second.SummaryList.size() == 1 &&
1336            "Expected individual combined index to have one summary per GUID");
1337     auto &Summary = GlobalList.second.SummaryList[0];
1338     // Skip the summaries for the importing module. These are included to
1339     // e.g. record required linkage changes.
1340     if (Summary->modulePath() == ModulePath)
1341       continue;
1342     // Add an entry to provoke importing by thinBackend.
1343     ImportList.addGUID(Summary->modulePath(), GUID, Summary->importType());
1344   }
1345 #ifndef NDEBUG
1346   dumpImportListForModule(Index, ModulePath, ImportList);
1347 #endif
1348 }
1349 
1350 // For SamplePGO, the indirect call targets for local functions will
1351 // have its original name annotated in profile. We try to find the
1352 // corresponding PGOFuncName as the GUID, and fix up the edges
1353 // accordingly.
1354 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1355                                      FunctionSummary *FS) {
1356   for (auto &EI : FS->mutableCalls()) {
1357     if (!EI.first.getSummaryList().empty())
1358       continue;
1359     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1360     if (GUID == 0)
1361       continue;
1362     // Update the edge to point directly to the correct GUID.
1363     auto VI = Index.getValueInfo(GUID);
1364     if (llvm::any_of(
1365             VI.getSummaryList(),
1366             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1367               // The mapping from OriginalId to GUID may return a GUID
1368               // that corresponds to a static variable. Filter it out here.
1369               // This can happen when
1370               // 1) There is a call to a library function which is not defined
1371               // in the index.
1372               // 2) There is a static variable with the  OriginalGUID identical
1373               // to the GUID of the library function in 1);
1374               // When this happens the static variable in 2) will be found,
1375               // which needs to be filtered out.
1376               return SummaryPtr->getSummaryKind() ==
1377                      GlobalValueSummary::GlobalVarKind;
1378             }))
1379       continue;
1380     EI.first = VI;
1381   }
1382 }
1383 
1384 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1385   for (const auto &Entry : Index) {
1386     for (const auto &S : Entry.second.SummaryList) {
1387       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1388         updateValueInfoForIndirectCalls(Index, FS);
1389     }
1390   }
1391 }
1392 
1393 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1394     ModuleSummaryIndex &Index,
1395     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1396     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1397   assert(!Index.withGlobalValueDeadStripping());
1398   if (!ComputeDead ||
1399       // Don't do anything when nothing is live, this is friendly with tests.
1400       GUIDPreservedSymbols.empty()) {
1401     // Still need to update indirect calls.
1402     updateIndirectCalls(Index);
1403     return;
1404   }
1405   unsigned LiveSymbols = 0;
1406   SmallVector<ValueInfo, 128> Worklist;
1407   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1408   for (auto GUID : GUIDPreservedSymbols) {
1409     ValueInfo VI = Index.getValueInfo(GUID);
1410     if (!VI)
1411       continue;
1412     for (const auto &S : VI.getSummaryList())
1413       S->setLive(true);
1414   }
1415 
1416   // Add values flagged in the index as live roots to the worklist.
1417   for (const auto &Entry : Index) {
1418     auto VI = Index.getValueInfo(Entry);
1419     for (const auto &S : Entry.second.SummaryList) {
1420       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1421         updateValueInfoForIndirectCalls(Index, FS);
1422       if (S->isLive()) {
1423         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1424         Worklist.push_back(VI);
1425         ++LiveSymbols;
1426         break;
1427       }
1428     }
1429   }
1430 
1431   // Make value live and add it to the worklist if it was not live before.
1432   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1433     // FIXME: If we knew which edges were created for indirect call profiles,
1434     // we could skip them here. Any that are live should be reached via
1435     // other edges, e.g. reference edges. Otherwise, using a profile collected
1436     // on a slightly different binary might provoke preserving, importing
1437     // and ultimately promoting calls to functions not linked into this
1438     // binary, which increases the binary size unnecessarily. Note that
1439     // if this code changes, the importer needs to change so that edges
1440     // to functions marked dead are skipped.
1441 
1442     if (llvm::any_of(VI.getSummaryList(),
1443                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1444                        return S->isLive();
1445                      }))
1446       return;
1447 
1448     // We only keep live symbols that are known to be non-prevailing if any are
1449     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1450     // later in the EliminateAvailableExternally pass and setting them to
1451     // not-live could break downstreams users of liveness information (PR36483)
1452     // or limit optimization opportunities.
1453     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1454       bool KeepAliveLinkage = false;
1455       bool Interposable = false;
1456       for (const auto &S : VI.getSummaryList()) {
1457         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1458             S->linkage() == GlobalValue::WeakODRLinkage ||
1459             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1460           KeepAliveLinkage = true;
1461         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1462           Interposable = true;
1463       }
1464 
1465       if (!IsAliasee) {
1466         if (!KeepAliveLinkage)
1467           return;
1468 
1469         if (Interposable)
1470           report_fatal_error(
1471               "Interposable and available_externally/linkonce_odr/weak_odr "
1472               "symbol");
1473       }
1474     }
1475 
1476     for (const auto &S : VI.getSummaryList())
1477       S->setLive(true);
1478     ++LiveSymbols;
1479     Worklist.push_back(VI);
1480   };
1481 
1482   while (!Worklist.empty()) {
1483     auto VI = Worklist.pop_back_val();
1484     for (const auto &Summary : VI.getSummaryList()) {
1485       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1486         // If this is an alias, visit the aliasee VI to ensure that all copies
1487         // are marked live and it is added to the worklist for further
1488         // processing of its references.
1489         visit(AS->getAliaseeVI(), true);
1490         continue;
1491       }
1492       for (auto Ref : Summary->refs())
1493         visit(Ref, false);
1494       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1495         for (auto Call : FS->calls())
1496           visit(Call.first, false);
1497     }
1498   }
1499   Index.setWithGlobalValueDeadStripping();
1500 
1501   unsigned DeadSymbols = Index.size() - LiveSymbols;
1502   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1503                     << " symbols Dead \n");
1504   NumDeadSymbols += DeadSymbols;
1505   NumLiveSymbols += LiveSymbols;
1506 }
1507 
1508 // Compute dead symbols and propagate constants in combined index.
1509 void llvm::computeDeadSymbolsWithConstProp(
1510     ModuleSummaryIndex &Index,
1511     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1512     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1513     bool ImportEnabled) {
1514   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1515                                            isPrevailing);
1516   if (ImportEnabled)
1517     Index.propagateAttributes(GUIDPreservedSymbols);
1518 }
1519 
1520 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1521 /// \p ModulePath.
1522 void llvm::gatherImportedSummariesForModule(
1523     StringRef ModulePath,
1524     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1525     const FunctionImporter::ImportMapTy &ImportList,
1526     ModuleToSummariesForIndexTy &ModuleToSummariesForIndex,
1527     GVSummaryPtrSet &DecSummaries) {
1528   // Include all summaries from the importing module.
1529   ModuleToSummariesForIndex[std::string(ModulePath)] =
1530       ModuleToDefinedGVSummaries.lookup(ModulePath);
1531 
1532   // Forward port the heterogeneous std::map::operator[]() from C++26, which
1533   // lets us look up the map without allocating an instance of std::string when
1534   // the key-value pair exists in the map.
1535   // TODO: Remove this in favor of the heterogenous std::map::operator[]() from
1536   // C++26 when it becomes available for our codebase.
1537   auto LookupOrCreate = [](ModuleToSummariesForIndexTy &Map,
1538                            StringRef Key) -> GVSummaryMapTy & {
1539     auto It = Map.find(Key);
1540     if (It == Map.end())
1541       std::tie(It, std::ignore) =
1542           Map.try_emplace(std::string(Key), GVSummaryMapTy());
1543     return It->second;
1544   };
1545 
1546   // Include summaries for imports.
1547   for (const auto &[FromModule, GUID, ImportType] : ImportList) {
1548     auto &SummariesForIndex =
1549         LookupOrCreate(ModuleToSummariesForIndex, FromModule);
1550 
1551     const auto &DefinedGVSummaries = ModuleToDefinedGVSummaries.at(FromModule);
1552     const auto &DS = DefinedGVSummaries.find(GUID);
1553     assert(DS != DefinedGVSummaries.end() &&
1554            "Expected a defined summary for imported global value");
1555     if (ImportType == GlobalValueSummary::Declaration)
1556       DecSummaries.insert(DS->second);
1557 
1558     SummariesForIndex[GUID] = DS->second;
1559   }
1560 }
1561 
1562 /// Emit the files \p ModulePath will import from into \p OutputFilename.
1563 Error llvm::EmitImportsFiles(
1564     StringRef ModulePath, StringRef OutputFilename,
1565     const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex) {
1566   std::error_code EC;
1567   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1568   if (EC)
1569     return createFileError("cannot open " + OutputFilename,
1570                            errorCodeToError(EC));
1571   for (const auto &ILI : ModuleToSummariesForIndex)
1572     // The ModuleToSummariesForIndex map includes an entry for the current
1573     // Module (needed for writing out the index files). We don't want to
1574     // include it in the imports file, however, so filter it out.
1575     if (ILI.first != ModulePath)
1576       ImportsOS << ILI.first << "\n";
1577   return Error::success();
1578 }
1579 
1580 bool llvm::convertToDeclaration(GlobalValue &GV) {
1581   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1582                     << "\n");
1583   if (Function *F = dyn_cast<Function>(&GV)) {
1584     F->deleteBody();
1585     F->clearMetadata();
1586     F->setComdat(nullptr);
1587   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1588     V->setInitializer(nullptr);
1589     V->setLinkage(GlobalValue::ExternalLinkage);
1590     V->clearMetadata();
1591     V->setComdat(nullptr);
1592   } else {
1593     GlobalValue *NewGV;
1594     if (GV.getValueType()->isFunctionTy())
1595       NewGV =
1596           Function::Create(cast<FunctionType>(GV.getValueType()),
1597                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1598                            "", GV.getParent());
1599     else
1600       NewGV =
1601           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1602                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1603                              /*init*/ nullptr, "",
1604                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1605                              GV.getType()->getAddressSpace());
1606     NewGV->takeName(&GV);
1607     GV.replaceAllUsesWith(NewGV);
1608     return false;
1609   }
1610   if (!GV.isImplicitDSOLocal())
1611     GV.setDSOLocal(false);
1612   return true;
1613 }
1614 
1615 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1616                                    const GVSummaryMapTy &DefinedGlobals,
1617                                    bool PropagateAttrs) {
1618   DenseSet<Comdat *> NonPrevailingComdats;
1619   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1620     // See if the global summary analysis computed a new resolved linkage.
1621     const auto &GS = DefinedGlobals.find(GV.getGUID());
1622     if (GS == DefinedGlobals.end())
1623       return;
1624 
1625     if (Propagate)
1626       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1627         if (Function *F = dyn_cast<Function>(&GV)) {
1628           // TODO: propagate ReadNone and ReadOnly.
1629           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1630             F->setDoesNotAccessMemory();
1631 
1632           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1633             F->setOnlyReadsMemory();
1634 
1635           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1636             F->setDoesNotRecurse();
1637 
1638           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1639             F->setDoesNotThrow();
1640         }
1641       }
1642 
1643     auto NewLinkage = GS->second->linkage();
1644     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1645         // Don't internalize anything here, because the code below
1646         // lacks necessary correctness checks. Leave this job to
1647         // LLVM 'internalize' pass.
1648         GlobalValue::isLocalLinkage(NewLinkage) ||
1649         // In case it was dead and already converted to declaration.
1650         GV.isDeclaration())
1651       return;
1652 
1653     // Set the potentially more constraining visibility computed from summaries.
1654     // The DefaultVisibility condition is because older GlobalValueSummary does
1655     // not record DefaultVisibility and we don't want to change protected/hidden
1656     // to default.
1657     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1658       GV.setVisibility(GS->second->getVisibility());
1659 
1660     if (NewLinkage == GV.getLinkage())
1661       return;
1662 
1663     // Check for a non-prevailing def that has interposable linkage
1664     // (e.g. non-odr weak or linkonce). In that case we can't simply
1665     // convert to available_externally, since it would lose the
1666     // interposable property and possibly get inlined. Simply drop
1667     // the definition in that case.
1668     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1669         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1670       if (!convertToDeclaration(GV))
1671         // FIXME: Change this to collect replaced GVs and later erase
1672         // them from the parent module once thinLTOResolvePrevailingGUID is
1673         // changed to enable this for aliases.
1674         llvm_unreachable("Expected GV to be converted");
1675     } else {
1676       // If all copies of the original symbol had global unnamed addr and
1677       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1678       // and are constants, then it should be an auto hide symbol. In that case
1679       // the thin link would have marked it as CanAutoHide. Add hidden
1680       // visibility to the symbol to preserve the property.
1681       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1682           GS->second->canAutoHide()) {
1683         assert(GV.canBeOmittedFromSymbolTable());
1684         GV.setVisibility(GlobalValue::HiddenVisibility);
1685       }
1686 
1687       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1688                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1689                         << "\n");
1690       GV.setLinkage(NewLinkage);
1691     }
1692     // Remove declarations from comdats, including available_externally
1693     // as this is a declaration for the linker, and will be dropped eventually.
1694     // It is illegal for comdats to contain declarations.
1695     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1696     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1697       if (GO->getComdat()->getName() == GO->getName())
1698         NonPrevailingComdats.insert(GO->getComdat());
1699       GO->setComdat(nullptr);
1700     }
1701   };
1702 
1703   // Process functions and global now
1704   for (auto &GV : TheModule)
1705     FinalizeInModule(GV, PropagateAttrs);
1706   for (auto &GV : TheModule.globals())
1707     FinalizeInModule(GV);
1708   for (auto &GV : TheModule.aliases())
1709     FinalizeInModule(GV);
1710 
1711   // For a non-prevailing comdat, all its members must be available_externally.
1712   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1713   // local linkage GlobalValues.
1714   if (NonPrevailingComdats.empty())
1715     return;
1716   for (auto &GO : TheModule.global_objects()) {
1717     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1718       GO.setComdat(nullptr);
1719       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1720     }
1721   }
1722   bool Changed;
1723   do {
1724     Changed = false;
1725     // If an alias references a GlobalValue in a non-prevailing comdat, change
1726     // it to available_externally. For simplicity we only handle GlobalValue and
1727     // ConstantExpr with a base object. ConstantExpr without a base object is
1728     // unlikely used in a COMDAT.
1729     for (auto &GA : TheModule.aliases()) {
1730       if (GA.hasAvailableExternallyLinkage())
1731         continue;
1732       GlobalObject *Obj = GA.getAliaseeObject();
1733       assert(Obj && "aliasee without an base object is unimplemented");
1734       if (Obj->hasAvailableExternallyLinkage()) {
1735         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1736         Changed = true;
1737       }
1738     }
1739   } while (Changed);
1740 }
1741 
1742 /// Run internalization on \p TheModule based on symmary analysis.
1743 void llvm::thinLTOInternalizeModule(Module &TheModule,
1744                                     const GVSummaryMapTy &DefinedGlobals) {
1745   // Declare a callback for the internalize pass that will ask for every
1746   // candidate GlobalValue if it can be internalized or not.
1747   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1748     // It may be the case that GV is on a chain of an ifunc, its alias and
1749     // subsequent aliases. In this case, the summary for the value is not
1750     // available.
1751     if (isa<GlobalIFunc>(&GV) ||
1752         (isa<GlobalAlias>(&GV) &&
1753          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1754       return true;
1755 
1756     // Lookup the linkage recorded in the summaries during global analysis.
1757     auto GS = DefinedGlobals.find(GV.getGUID());
1758     if (GS == DefinedGlobals.end()) {
1759       // Must have been promoted (possibly conservatively). Find original
1760       // name so that we can access the correct summary and see if it can
1761       // be internalized again.
1762       // FIXME: Eventually we should control promotion instead of promoting
1763       // and internalizing again.
1764       StringRef OrigName =
1765           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1766       std::string OrigId = GlobalValue::getGlobalIdentifier(
1767           OrigName, GlobalValue::InternalLinkage,
1768           TheModule.getSourceFileName());
1769       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1770       if (GS == DefinedGlobals.end()) {
1771         // Also check the original non-promoted non-globalized name. In some
1772         // cases a preempted weak value is linked in as a local copy because
1773         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1774         // In that case, since it was originally not a local value, it was
1775         // recorded in the index using the original name.
1776         // FIXME: This may not be needed once PR27866 is fixed.
1777         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1778         assert(GS != DefinedGlobals.end());
1779       }
1780     }
1781     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1782   };
1783 
1784   // FIXME: See if we can just internalize directly here via linkage changes
1785   // based on the index, rather than invoking internalizeModule.
1786   internalizeModule(TheModule, MustPreserveGV);
1787 }
1788 
1789 /// Make alias a clone of its aliasee.
1790 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1791   Function *Fn = cast<Function>(GA->getAliaseeObject());
1792 
1793   ValueToValueMapTy VMap;
1794   Function *NewFn = CloneFunction(Fn, VMap);
1795   // Clone should use the original alias's linkage, visibility and name, and we
1796   // ensure all uses of alias instead use the new clone (casted if necessary).
1797   NewFn->setLinkage(GA->getLinkage());
1798   NewFn->setVisibility(GA->getVisibility());
1799   GA->replaceAllUsesWith(NewFn);
1800   NewFn->takeName(GA);
1801   return NewFn;
1802 }
1803 
1804 // Internalize values that we marked with specific attribute
1805 // in processGlobalForThinLTO.
1806 static void internalizeGVsAfterImport(Module &M) {
1807   for (auto &GV : M.globals())
1808     // Skip GVs which have been converted to declarations
1809     // by dropDeadSymbols.
1810     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1811       GV.setLinkage(GlobalValue::InternalLinkage);
1812       GV.setVisibility(GlobalValue::DefaultVisibility);
1813     }
1814 }
1815 
1816 // Automatically import functions in Module \p DestModule based on the summaries
1817 // index.
1818 Expected<bool> FunctionImporter::importFunctions(
1819     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1820   LLVM_DEBUG(dbgs() << "Starting import for Module "
1821                     << DestModule.getModuleIdentifier() << "\n");
1822   unsigned ImportedCount = 0, ImportedGVCount = 0;
1823 
1824   IRMover Mover(DestModule);
1825 
1826   // Do the actual import of functions now, one Module at a time
1827   for (const auto &ModName : ImportList.getSourceModules()) {
1828     // Get the module for the import
1829     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(ModName);
1830     if (!SrcModuleOrErr)
1831       return SrcModuleOrErr.takeError();
1832     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1833     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1834            "Context mismatch");
1835 
1836     // If modules were created with lazy metadata loading, materialize it
1837     // now, before linking it (otherwise this will be a noop).
1838     if (Error Err = SrcModule->materializeMetadata())
1839       return std::move(Err);
1840 
1841     // Find the globals to import
1842     SetVector<GlobalValue *> GlobalsToImport;
1843     for (Function &F : *SrcModule) {
1844       if (!F.hasName())
1845         continue;
1846       auto GUID = F.getGUID();
1847       auto MaybeImportType = ImportList.getImportType(ModName, GUID);
1848       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
1849 
1850       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1851                         << " importing function"
1852                         << (ImportDefinition
1853                                 ? " definition "
1854                                 : (MaybeImportType ? " declaration " : " "))
1855                         << GUID << " " << F.getName() << " from "
1856                         << SrcModule->getSourceFileName() << "\n");
1857       if (ImportDefinition) {
1858         if (Error Err = F.materialize())
1859           return std::move(Err);
1860         // MemProf should match function's definition and summary,
1861         // 'thinlto_src_module' is needed.
1862         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1863           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1864           // statistics and debugging.
1865           F.setMetadata(
1866               "thinlto_src_module",
1867               MDNode::get(DestModule.getContext(),
1868                           {MDString::get(DestModule.getContext(),
1869                                          SrcModule->getModuleIdentifier())}));
1870           F.setMetadata(
1871               "thinlto_src_file",
1872               MDNode::get(DestModule.getContext(),
1873                           {MDString::get(DestModule.getContext(),
1874                                          SrcModule->getSourceFileName())}));
1875         }
1876         GlobalsToImport.insert(&F);
1877       }
1878     }
1879     for (GlobalVariable &GV : SrcModule->globals()) {
1880       if (!GV.hasName())
1881         continue;
1882       auto GUID = GV.getGUID();
1883       auto MaybeImportType = ImportList.getImportType(ModName, GUID);
1884       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
1885 
1886       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1887                         << " importing global"
1888                         << (ImportDefinition
1889                                 ? " definition "
1890                                 : (MaybeImportType ? " declaration " : " "))
1891                         << GUID << " " << GV.getName() << " from "
1892                         << SrcModule->getSourceFileName() << "\n");
1893       if (ImportDefinition) {
1894         if (Error Err = GV.materialize())
1895           return std::move(Err);
1896         ImportedGVCount += GlobalsToImport.insert(&GV);
1897       }
1898     }
1899     for (GlobalAlias &GA : SrcModule->aliases()) {
1900       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1901         continue;
1902       auto GUID = GA.getGUID();
1903       auto MaybeImportType = ImportList.getImportType(ModName, GUID);
1904       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
1905 
1906       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1907                         << " importing alias"
1908                         << (ImportDefinition
1909                                 ? " definition "
1910                                 : (MaybeImportType ? " declaration " : " "))
1911                         << GUID << " " << GA.getName() << " from "
1912                         << SrcModule->getSourceFileName() << "\n");
1913       if (ImportDefinition) {
1914         if (Error Err = GA.materialize())
1915           return std::move(Err);
1916         // Import alias as a copy of its aliasee.
1917         GlobalObject *GO = GA.getAliaseeObject();
1918         if (Error Err = GO->materialize())
1919           return std::move(Err);
1920         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1921         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1922                           << GO->getName() << " from "
1923                           << SrcModule->getSourceFileName() << "\n");
1924         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1925           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1926           // statistics and debugging.
1927           Fn->setMetadata(
1928               "thinlto_src_module",
1929               MDNode::get(DestModule.getContext(),
1930                           {MDString::get(DestModule.getContext(),
1931                                          SrcModule->getModuleIdentifier())}));
1932           Fn->setMetadata(
1933               "thinlto_src_file",
1934               MDNode::get(DestModule.getContext(),
1935                           {MDString::get(DestModule.getContext(),
1936                                          SrcModule->getSourceFileName())}));
1937         }
1938         GlobalsToImport.insert(Fn);
1939       }
1940     }
1941 
1942     // Upgrade debug info after we're done materializing all the globals and we
1943     // have loaded all the required metadata!
1944     UpgradeDebugInfo(*SrcModule);
1945 
1946     // Set the partial sample profile ratio in the profile summary module flag
1947     // of the imported source module, if applicable, so that the profile summary
1948     // module flag will match with that of the destination module when it's
1949     // imported.
1950     SrcModule->setPartialSampleProfileRatio(Index);
1951 
1952     // Link in the specified functions.
1953     renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1954                            &GlobalsToImport);
1955 
1956     if (PrintImports) {
1957       for (const auto *GV : GlobalsToImport)
1958         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1959                << " from " << SrcModule->getSourceFileName() << "\n";
1960     }
1961 
1962     if (Error Err = Mover.move(std::move(SrcModule),
1963                                GlobalsToImport.getArrayRef(), nullptr,
1964                                /*IsPerformingImport=*/true))
1965       return createStringError(errc::invalid_argument,
1966                                Twine("Function Import: link error: ") +
1967                                    toString(std::move(Err)));
1968 
1969     ImportedCount += GlobalsToImport.size();
1970     NumImportedModules++;
1971   }
1972 
1973   internalizeGVsAfterImport(DestModule);
1974 
1975   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1976   NumImportedGlobalVars += ImportedGVCount;
1977 
1978   // TODO: Print counters for definitions and declarations in the debugging log.
1979   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1980                     << " functions for Module "
1981                     << DestModule.getModuleIdentifier() << "\n");
1982   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1983                     << " global variables for Module "
1984                     << DestModule.getModuleIdentifier() << "\n");
1985   return ImportedCount;
1986 }
1987 
1988 static bool doImportingForModuleForTest(
1989     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1990                    isPrevailing) {
1991   if (SummaryFile.empty())
1992     report_fatal_error("error: -function-import requires -summary-file\n");
1993   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1994       getModuleSummaryIndexForFile(SummaryFile);
1995   if (!IndexPtrOrErr) {
1996     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1997                           "Error loading file '" + SummaryFile + "': ");
1998     return false;
1999   }
2000   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
2001 
2002   // First step is collecting the import list.
2003   FunctionImporter::ImportIDTable ImportIDs;
2004   FunctionImporter::ImportMapTy ImportList(ImportIDs);
2005   // If requested, simply import all functions in the index. This is used
2006   // when testing distributed backend handling via the opt tool, when
2007   // we have distributed indexes containing exactly the summaries to import.
2008   if (ImportAllIndex)
2009     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
2010                                                       *Index, ImportList);
2011   else
2012     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
2013                                              isPrevailing, *Index, ImportList);
2014 
2015   // Conservatively mark all internal values as promoted. This interface is
2016   // only used when doing importing via the function importing pass. The pass
2017   // is only enabled when testing importing via the 'opt' tool, which does
2018   // not do the ThinLink that would normally determine what values to promote.
2019   for (auto &I : *Index) {
2020     for (auto &S : I.second.SummaryList) {
2021       if (GlobalValue::isLocalLinkage(S->linkage()))
2022         S->setLinkage(GlobalValue::ExternalLinkage);
2023     }
2024   }
2025 
2026   // Next we need to promote to global scope and rename any local values that
2027   // are potentially exported to other modules.
2028   renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
2029                          /*GlobalsToImport=*/nullptr);
2030 
2031   // Perform the import now.
2032   auto ModuleLoader = [&M](StringRef Identifier) {
2033     return loadFile(std::string(Identifier), M.getContext());
2034   };
2035   FunctionImporter Importer(*Index, ModuleLoader,
2036                             /*ClearDSOLocalOnDeclarations=*/false);
2037   Expected<bool> Result = Importer.importFunctions(M, ImportList);
2038 
2039   // FIXME: Probably need to propagate Errors through the pass manager.
2040   if (!Result) {
2041     logAllUnhandledErrors(Result.takeError(), errs(),
2042                           "Error importing module: ");
2043     return true;
2044   }
2045 
2046   return true;
2047 }
2048 
2049 PreservedAnalyses FunctionImportPass::run(Module &M,
2050                                           ModuleAnalysisManager &AM) {
2051   // This is only used for testing the function import pass via opt, where we
2052   // don't have prevailing information from the LTO context available, so just
2053   // conservatively assume everything is prevailing (which is fine for the very
2054   // limited use of prevailing checking in this pass).
2055   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
2056     return true;
2057   };
2058   if (!doImportingForModuleForTest(M, isPrevailing))
2059     return PreservedAnalyses::all();
2060 
2061   return PreservedAnalyses::none();
2062 }
2063