xref: /llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision dbd7ce0ccd3a88f2c1d6e47d31da63a48cafdc8f)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalAlias.h"
24 #include "llvm/IR/GlobalObject.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/ModuleSummaryIndex.h"
30 #include "llvm/IRReader/IRReader.h"
31 #include "llvm/Linker/IRMover.h"
32 #include "llvm/ProfileData/PGOCtxProfReader.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module' and "
129              "'thinlto_src_file'"));
130 
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134     SummaryFile("summary-file",
135                 cl::desc("The summary file to use for function importing."));
136 
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140     ImportAllIndex("import-all-index",
141                    cl::desc("Import all external functions in index."));
142 
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150     "import-declaration", cl::init(false), cl::Hidden,
151     cl::desc("If true, import function declaration as fallback if the function "
152              "definition is not imported."));
153 
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163     "thinlto-workload-def",
164     cl::desc("Pass a workload definition. This is a file containing a JSON "
165              "dictionary. The keys are root functions, the values are lists of "
166              "functions to import in the module defining the root. It is "
167              "assumed -funique-internal-linkage-names was used, to ensure "
168              "local linkage functions have unique names. For example: \n"
169              "{\n"
170              "  \"rootFunction_1\": [\"function_to_import_1\", "
171              "\"function_to_import_2\"], \n"
172              "  \"rootFunction_2\": [\"function_to_import_3\", "
173              "\"function_to_import_4\"] \n"
174              "}"),
175     cl::Hidden);
176 
177 extern cl::opt<std::string> UseCtxProfile;
178 
179 namespace llvm {
180 extern cl::opt<bool> EnableMemProfContextDisambiguation;
181 }
182 
183 // Load lazily a module from \p FileName in \p Context.
184 static std::unique_ptr<Module> loadFile(const std::string &FileName,
185                                         LLVMContext &Context) {
186   SMDiagnostic Err;
187   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
188   // Metadata isn't loaded until functions are imported, to minimize
189   // the memory overhead.
190   std::unique_ptr<Module> Result =
191       getLazyIRFileModule(FileName, Err, Context,
192                           /* ShouldLazyLoadMetadata = */ true);
193   if (!Result) {
194     Err.print("function-import", errs());
195     report_fatal_error("Abort");
196   }
197 
198   return Result;
199 }
200 
201 static bool shouldSkipLocalInAnotherModule(const GlobalValueSummary *RefSummary,
202                                            size_t NumDefs,
203                                            StringRef ImporterModule) {
204   // We can import a local when there is one definition.
205   if (NumDefs == 1)
206     return false;
207   // In other cases, make sure we import the copy in the caller's module if the
208   // referenced value has local linkage. The only time a local variable can
209   // share an entry in the index is if there is a local with the same name in
210   // another module that had the same source file name (in a different
211   // directory), where each was compiled in their own directory so there was not
212   // distinguishing path.
213   return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
214          RefSummary->modulePath() != ImporterModule;
215 }
216 
217 /// Given a list of possible callee implementation for a call site, qualify the
218 /// legality of importing each. The return is a range of pairs. Each pair
219 /// corresponds to a candidate. The first value is the ImportFailureReason for
220 /// that candidate, the second is the candidate.
221 static auto qualifyCalleeCandidates(
222     const ModuleSummaryIndex &Index,
223     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
224     StringRef CallerModulePath) {
225   return llvm::map_range(
226       CalleeSummaryList,
227       [&Index, CalleeSummaryList,
228        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
229           -> std::pair<FunctionImporter::ImportFailureReason,
230                        const GlobalValueSummary *> {
231         auto *GVSummary = SummaryPtr.get();
232         if (!Index.isGlobalValueLive(GVSummary))
233           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
234 
235         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
236           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
237                   GVSummary};
238 
239         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
240 
241         // Ignore any callees that aren't actually functions. This could happen
242         // in the case of GUID hash collisions. It could also happen in theory
243         // for SamplePGO profiles collected on old versions of the code after
244         // renaming, since we synthesize edges to any inlined callees appearing
245         // in the profile.
246         if (!Summary)
247           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
248 
249         // If this is a local function, make sure we import the copy in the
250         // caller's module. The only time a local function can share an entry in
251         // the index is if there is a local with the same name in another module
252         // that had the same source file name (in a different directory), where
253         // each was compiled in their own directory so there was not
254         // distinguishing path.
255         // If the local function is from another module, it must be a reference
256         // due to indirect call profile data since a function pointer can point
257         // to a local in another module. Do the import from another module if
258         // there is only one entry in the list or when all files in the program
259         // are compiled with full path - in both cases the local function has
260         // unique PGO name and GUID.
261         if (shouldSkipLocalInAnotherModule(Summary, CalleeSummaryList.size(),
262                                            CallerModulePath))
263           return {
264               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
265               GVSummary};
266 
267         // Skip if it isn't legal to import (e.g. may reference unpromotable
268         // locals).
269         if (Summary->notEligibleToImport())
270           return {FunctionImporter::ImportFailureReason::NotEligible,
271                   GVSummary};
272 
273         return {FunctionImporter::ImportFailureReason::None, GVSummary};
274       });
275 }
276 
277 /// Given a list of possible callee implementation for a call site, select one
278 /// that fits the \p Threshold for function definition import. If none are
279 /// found, the Reason will give the last reason for the failure (last, in the
280 /// order of CalleeSummaryList entries). While looking for a callee definition,
281 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
282 /// candidate; other modules may want to know the function summary or
283 /// declaration even if a definition is not needed.
284 ///
285 /// FIXME: select "best" instead of first that fits. But what is "best"?
286 /// - The smallest: more likely to be inlined.
287 /// - The one with the least outgoing edges (already well optimized).
288 /// - One from a module already being imported from in order to reduce the
289 ///   number of source modules parsed/linked.
290 /// - One that has PGO data attached.
291 /// - [insert you fancy metric here]
292 static const GlobalValueSummary *
293 selectCallee(const ModuleSummaryIndex &Index,
294              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
295              unsigned Threshold, StringRef CallerModulePath,
296              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
297              FunctionImporter::ImportFailureReason &Reason) {
298   // Records the last summary with reason noinline or too-large.
299   TooLargeOrNoInlineSummary = nullptr;
300   auto QualifiedCandidates =
301       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
302   for (auto QualifiedValue : QualifiedCandidates) {
303     Reason = QualifiedValue.first;
304     // Skip a summary if its import is not (proved to be) legal.
305     if (Reason != FunctionImporter::ImportFailureReason::None)
306       continue;
307     auto *Summary =
308         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
309 
310     // Don't bother importing the definition if the chance of inlining it is
311     // not high enough (except under `--force-import-all`).
312     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
313         !ForceImportAll) {
314       TooLargeOrNoInlineSummary = Summary;
315       Reason = FunctionImporter::ImportFailureReason::TooLarge;
316       continue;
317     }
318 
319     // Don't bother importing the definition if we can't inline it anyway.
320     if (Summary->fflags().NoInline && !ForceImportAll) {
321       TooLargeOrNoInlineSummary = Summary;
322       Reason = FunctionImporter::ImportFailureReason::NoInline;
323       continue;
324     }
325 
326     return Summary;
327   }
328   return nullptr;
329 }
330 
331 namespace {
332 
333 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
334 
335 } // anonymous namespace
336 
337 FunctionImporter::ImportMapTy::AddDefinitionStatus
338 FunctionImporter::ImportMapTy::addDefinition(StringRef FromModule,
339                                              GlobalValue::GUID GUID) {
340   auto [It, Inserted] =
341       ImportMap[FromModule].try_emplace(GUID, GlobalValueSummary::Definition);
342   if (Inserted)
343     return AddDefinitionStatus::Inserted;
344   if (It->second == GlobalValueSummary::Definition)
345     return AddDefinitionStatus::NoChange;
346   It->second = GlobalValueSummary::Definition;
347   return AddDefinitionStatus::ChangedToDefinition;
348 }
349 
350 void FunctionImporter::ImportMapTy::maybeAddDeclaration(
351     StringRef FromModule, GlobalValue::GUID GUID) {
352   ImportMap[FromModule].try_emplace(GUID, GlobalValueSummary::Declaration);
353 }
354 
355 /// Import globals referenced by a function or other globals that are being
356 /// imported, if importing such global is possible.
357 class GlobalsImporter final {
358   const ModuleSummaryIndex &Index;
359   const GVSummaryMapTy &DefinedGVSummaries;
360   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
361       IsPrevailing;
362   FunctionImporter::ImportMapTy &ImportList;
363   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
364 
365   bool shouldImportGlobal(const ValueInfo &VI) {
366     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
367     if (GVS == DefinedGVSummaries.end())
368       return true;
369     // We should not skip import if the module contains a non-prevailing
370     // definition with interposable linkage type. This is required for
371     // correctness in the situation where there is a prevailing def available
372     // for import and marked read-only. In this case, the non-prevailing def
373     // will be converted to a declaration, while the prevailing one becomes
374     // internal, thus no definitions will be available for linking. In order to
375     // prevent undefined symbol link error, the prevailing definition must be
376     // imported.
377     // FIXME: Consider adding a check that the suitable prevailing definition
378     // exists and marked read-only.
379     if (VI.getSummaryList().size() > 1 &&
380         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
381         !IsPrevailing(VI.getGUID(), GVS->second))
382       return true;
383 
384     return false;
385   }
386 
387   void
388   onImportingSummaryImpl(const GlobalValueSummary &Summary,
389                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
390     for (const auto &VI : Summary.refs()) {
391       if (!shouldImportGlobal(VI)) {
392         LLVM_DEBUG(
393             dbgs() << "Ref ignored! Target already in destination module.\n");
394         continue;
395       }
396 
397       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
398 
399       for (const auto &RefSummary : VI.getSummaryList()) {
400         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
401         // Functions could be referenced by global vars - e.g. a vtable; but we
402         // don't currently imagine a reason those would be imported here, rather
403         // than as part of the logic deciding which functions to import (i.e.
404         // based on profile information). Should we decide to handle them here,
405         // we can refactor accordingly at that time.
406         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
407             shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
408                                            Summary.modulePath()))
409           continue;
410 
411         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
412         // Otherwise, definition should take precedence over declaration.
413         if (ImportList.addDefinition(RefSummary->modulePath(), VI.getGUID()) !=
414             FunctionImporter::ImportMapTy::AddDefinitionStatus::Inserted)
415           break;
416 
417         // Only update stat and exports if we haven't already imported this
418         // variable.
419         NumImportedGlobalVarsThinLink++;
420         // Any references made by this variable will be marked exported
421         // later, in ComputeCrossModuleImport, after import decisions are
422         // complete, which is more efficient than adding them here.
423         if (ExportLists)
424           (*ExportLists)[RefSummary->modulePath()].insert(VI);
425 
426         // If variable is not writeonly we attempt to recursively analyze
427         // its references in order to import referenced constants.
428         if (!Index.isWriteOnly(GVS))
429           Worklist.emplace_back(GVS);
430         break;
431       }
432     }
433   }
434 
435 public:
436   GlobalsImporter(
437       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
438       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
439           IsPrevailing,
440       FunctionImporter::ImportMapTy &ImportList,
441       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
442       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
443         IsPrevailing(IsPrevailing), ImportList(ImportList),
444         ExportLists(ExportLists) {}
445 
446   void onImportingSummary(const GlobalValueSummary &Summary) {
447     SmallVector<const GlobalVarSummary *, 128> Worklist;
448     onImportingSummaryImpl(Summary, Worklist);
449     while (!Worklist.empty())
450       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
451   }
452 };
453 
454 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
455 
456 /// Determine the list of imports and exports for each module.
457 class ModuleImportsManager {
458 protected:
459   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
460       IsPrevailing;
461   const ModuleSummaryIndex &Index;
462   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
463 
464   ModuleImportsManager(
465       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
466           IsPrevailing,
467       const ModuleSummaryIndex &Index,
468       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
469       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
470 
471 public:
472   virtual ~ModuleImportsManager() = default;
473 
474   /// Given the list of globals defined in a module, compute the list of imports
475   /// as well as the list of "exports", i.e. the list of symbols referenced from
476   /// another module (that may require promotion).
477   virtual void
478   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
479                          StringRef ModName,
480                          FunctionImporter::ImportMapTy &ImportList);
481 
482   static std::unique_ptr<ModuleImportsManager>
483   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
484              IsPrevailing,
485          const ModuleSummaryIndex &Index,
486          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
487              nullptr);
488 };
489 
490 /// A ModuleImportsManager that operates based on a workload definition (see
491 /// -thinlto-workload-def). For modules that do not define workload roots, it
492 /// applies the base ModuleImportsManager import policy.
493 class WorkloadImportsManager : public ModuleImportsManager {
494   // Keep a module name -> value infos to import association. We use it to
495   // determine if a module's import list should be done by the base
496   // ModuleImportsManager or by us.
497   StringMap<DenseSet<ValueInfo>> Workloads;
498 
499   void
500   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
501                          StringRef ModName,
502                          FunctionImporter::ImportMapTy &ImportList) override {
503     auto SetIter = Workloads.find(ModName);
504     if (SetIter == Workloads.end()) {
505       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
506                         << " does not contain the root of any context.\n");
507       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
508                                                           ModName, ImportList);
509     }
510     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
511                       << " contains the root(s) of context(s).\n");
512 
513     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
514                         ExportLists);
515     auto &ValueInfos = SetIter->second;
516     SmallVector<EdgeInfo, 128> GlobWorklist;
517     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
518       auto It = DefinedGVSummaries.find(VI.getGUID());
519       if (It != DefinedGVSummaries.end() &&
520           IsPrevailing(VI.getGUID(), It->second)) {
521         LLVM_DEBUG(
522             dbgs() << "[Workload] " << VI.name()
523                    << " has the prevailing variant already in the module "
524                    << ModName << ". No need to import\n");
525         continue;
526       }
527       auto Candidates =
528           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
529 
530       const GlobalValueSummary *GVS = nullptr;
531       auto PotentialCandidates = llvm::map_range(
532           llvm::make_filter_range(
533               Candidates,
534               [&](const auto &Candidate) {
535                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
536                                   << " from " << Candidate.second->modulePath()
537                                   << " ImportFailureReason: "
538                                   << getFailureName(Candidate.first) << "\n");
539                 return Candidate.first ==
540                         FunctionImporter::ImportFailureReason::None;
541               }),
542           [](const auto &Candidate) { return Candidate.second; });
543       if (PotentialCandidates.empty()) {
544         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
545                           << " because can't find eligible Callee. Guid is: "
546                           << Function::getGUID(VI.name()) << "\n");
547         continue;
548       }
549       /// We will prefer importing the prevailing candidate, if not, we'll
550       /// still pick the first available candidate. The reason we want to make
551       /// sure we do import the prevailing candidate is because the goal of
552       /// workload-awareness is to enable optimizations specializing the call
553       /// graph of that workload. Suppose a function is already defined in the
554       /// module, but it's not the prevailing variant. Suppose also we do not
555       /// inline it (in fact, if it were interposable, we can't inline it),
556       /// but we could specialize it to the workload in other ways. However,
557       /// the linker would drop it in the favor of the prevailing copy.
558       /// Instead, by importing the prevailing variant (assuming also the use
559       /// of `-avail-extern-to-local`), we keep the specialization. We could
560       /// alteranatively make the non-prevailing variant local, but the
561       /// prevailing one is also the one for which we would have previously
562       /// collected profiles, making it preferrable.
563       auto PrevailingCandidates = llvm::make_filter_range(
564           PotentialCandidates, [&](const auto *Candidate) {
565             return IsPrevailing(VI.getGUID(), Candidate);
566           });
567       if (PrevailingCandidates.empty()) {
568         GVS = *PotentialCandidates.begin();
569         if (!llvm::hasSingleElement(PotentialCandidates) &&
570             GlobalValue::isLocalLinkage(GVS->linkage()))
571           LLVM_DEBUG(
572               dbgs()
573               << "[Workload] Found multiple non-prevailing candidates for "
574               << VI.name()
575               << ". This is unexpected. Are module paths passed to the "
576                  "compiler unique for the modules passed to the linker?");
577         // We could in theory have multiple (interposable) copies of a symbol
578         // when there is no prevailing candidate, if say the prevailing copy was
579         // in a native object being linked in. However, we should in theory be
580         // marking all of these non-prevailing IR copies dead in that case, in
581         // which case they won't be candidates.
582         assert(GVS->isLive());
583       } else {
584         assert(llvm::hasSingleElement(PrevailingCandidates));
585         GVS = *PrevailingCandidates.begin();
586       }
587 
588       auto ExportingModule = GVS->modulePath();
589       // We checked that for the prevailing case, but if we happen to have for
590       // example an internal that's defined in this module, it'd have no
591       // PrevailingCandidates.
592       if (ExportingModule == ModName) {
593         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
594                           << " because its defining module is the same as the "
595                              "current module\n");
596         continue;
597       }
598       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
599                         << ExportingModule << " : "
600                         << Function::getGUID(VI.name()) << "\n");
601       ImportList.addDefinition(ExportingModule, VI.getGUID());
602       GVI.onImportingSummary(*GVS);
603       if (ExportLists)
604         (*ExportLists)[ExportingModule].insert(VI);
605     }
606     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
607   }
608 
609   void loadFromJson() {
610     // Since the workload def uses names, we need a quick lookup
611     // name->ValueInfo.
612     StringMap<ValueInfo> NameToValueInfo;
613     StringSet<> AmbiguousNames;
614     for (auto &I : Index) {
615       ValueInfo VI = Index.getValueInfo(I);
616       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
617         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
618     }
619     auto DbgReportIfAmbiguous = [&](StringRef Name) {
620       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
621         dbgs() << "[Workload] Function name " << Name
622                << " present in the workload definition is ambiguous. Consider "
623                   "compiling with -funique-internal-linkage-names.";
624       });
625     };
626     std::error_code EC;
627     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
628     if (std::error_code EC = BufferOrErr.getError()) {
629       report_fatal_error("Failed to open context file");
630       return;
631     }
632     auto Buffer = std::move(BufferOrErr.get());
633     std::map<std::string, std::vector<std::string>> WorkloadDefs;
634     json::Path::Root NullRoot;
635     // The JSON is supposed to contain a dictionary matching the type of
636     // WorkloadDefs. For example:
637     // {
638     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
639     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
640     // }
641     auto Parsed = json::parse(Buffer->getBuffer());
642     if (!Parsed)
643       report_fatal_error(Parsed.takeError());
644     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
645       report_fatal_error("Invalid thinlto contextual profile format.");
646     for (const auto &Workload : WorkloadDefs) {
647       const auto &Root = Workload.first;
648       DbgReportIfAmbiguous(Root);
649       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
650       const auto &AllCallees = Workload.second;
651       auto RootIt = NameToValueInfo.find(Root);
652       if (RootIt == NameToValueInfo.end()) {
653         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
654                           << " not found in this linkage unit.\n");
655         continue;
656       }
657       auto RootVI = RootIt->second;
658       if (RootVI.getSummaryList().size() != 1) {
659         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
660                           << " should have exactly one summary, but has "
661                           << RootVI.getSummaryList().size() << ". Skipping.\n");
662         continue;
663       }
664       StringRef RootDefiningModule =
665           RootVI.getSummaryList().front()->modulePath();
666       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
667                         << " is : " << RootDefiningModule << "\n");
668       auto &Set = Workloads[RootDefiningModule];
669       for (const auto &Callee : AllCallees) {
670         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
671         DbgReportIfAmbiguous(Callee);
672         auto ElemIt = NameToValueInfo.find(Callee);
673         if (ElemIt == NameToValueInfo.end()) {
674           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
675           continue;
676         }
677         Set.insert(ElemIt->second);
678       }
679     }
680   }
681 
682   void loadFromCtxProf() {
683     std::error_code EC;
684     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(UseCtxProfile);
685     if (std::error_code EC = BufferOrErr.getError()) {
686       report_fatal_error("Failed to open contextual profile file");
687       return;
688     }
689     auto Buffer = std::move(BufferOrErr.get());
690 
691     PGOCtxProfileReader Reader(Buffer->getBuffer());
692     auto Ctx = Reader.loadContexts();
693     if (!Ctx) {
694       report_fatal_error("Failed to parse contextual profiles");
695       return;
696     }
697     const auto &CtxMap = *Ctx;
698     DenseSet<GlobalValue::GUID> ContainedGUIDs;
699     for (const auto &[RootGuid, Root] : CtxMap) {
700       // Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for
701       // subsequent roots, but clear its contents.
702       ContainedGUIDs.clear();
703 
704       auto RootVI = Index.getValueInfo(RootGuid);
705       if (!RootVI) {
706         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
707                           << " not found in this linkage unit.\n");
708         continue;
709       }
710       if (RootVI.getSummaryList().size() != 1) {
711         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
712                           << " should have exactly one summary, but has "
713                           << RootVI.getSummaryList().size() << ". Skipping.\n");
714         continue;
715       }
716       StringRef RootDefiningModule =
717           RootVI.getSummaryList().front()->modulePath();
718       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid
719                         << " is : " << RootDefiningModule << "\n");
720       auto &Set = Workloads[RootDefiningModule];
721       Root.getContainedGuids(ContainedGUIDs);
722       for (auto Guid : ContainedGUIDs)
723         if (auto VI = Index.getValueInfo(Guid))
724           Set.insert(VI);
725     }
726   }
727 
728 public:
729   WorkloadImportsManager(
730       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
731           IsPrevailing,
732       const ModuleSummaryIndex &Index,
733       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
734       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
735     if (UseCtxProfile.empty() == WorkloadDefinitions.empty()) {
736       report_fatal_error(
737           "Pass only one of: -thinlto-pgo-ctx-prof or -thinlto-workload-def");
738       return;
739     }
740     if (!UseCtxProfile.empty())
741       loadFromCtxProf();
742     else
743       loadFromJson();
744     LLVM_DEBUG({
745       for (const auto &[Root, Set] : Workloads) {
746         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
747                << " distinct callees.\n";
748         for (const auto &VI : Set) {
749           dbgs() << "[Workload] Root: " << Root
750                  << " Would include: " << VI.getGUID() << "\n";
751         }
752       }
753     });
754   }
755 };
756 
757 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
758     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
759         IsPrevailing,
760     const ModuleSummaryIndex &Index,
761     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
762   if (WorkloadDefinitions.empty() && UseCtxProfile.empty()) {
763     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
764     return std::unique_ptr<ModuleImportsManager>(
765         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
766   }
767   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
768   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
769                                                   ExportLists);
770 }
771 
772 static const char *
773 getFailureName(FunctionImporter::ImportFailureReason Reason) {
774   switch (Reason) {
775   case FunctionImporter::ImportFailureReason::None:
776     return "None";
777   case FunctionImporter::ImportFailureReason::GlobalVar:
778     return "GlobalVar";
779   case FunctionImporter::ImportFailureReason::NotLive:
780     return "NotLive";
781   case FunctionImporter::ImportFailureReason::TooLarge:
782     return "TooLarge";
783   case FunctionImporter::ImportFailureReason::InterposableLinkage:
784     return "InterposableLinkage";
785   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
786     return "LocalLinkageNotInModule";
787   case FunctionImporter::ImportFailureReason::NotEligible:
788     return "NotEligible";
789   case FunctionImporter::ImportFailureReason::NoInline:
790     return "NoInline";
791   }
792   llvm_unreachable("invalid reason");
793 }
794 
795 /// Compute the list of functions to import for a given caller. Mark these
796 /// imported functions and the symbols they reference in their source module as
797 /// exported from their source module.
798 static void computeImportForFunction(
799     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
800     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
801     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
802         isPrevailing,
803     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
804     FunctionImporter::ImportMapTy &ImportList,
805     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
806     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
807   GVImporter.onImportingSummary(Summary);
808   static int ImportCount = 0;
809   for (const auto &Edge : Summary.calls()) {
810     ValueInfo VI = Edge.first;
811     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
812                       << "\n");
813 
814     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
815       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
816                         << " reached.\n");
817       continue;
818     }
819 
820     if (DefinedGVSummaries.count(VI.getGUID())) {
821       // FIXME: Consider not skipping import if the module contains
822       // a non-prevailing def with interposable linkage. The prevailing copy
823       // can safely be imported (see shouldImportGlobal()).
824       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
825       continue;
826     }
827 
828     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
829       if (Hotness == CalleeInfo::HotnessType::Hot)
830         return ImportHotMultiplier;
831       if (Hotness == CalleeInfo::HotnessType::Cold)
832         return ImportColdMultiplier;
833       if (Hotness == CalleeInfo::HotnessType::Critical)
834         return ImportCriticalMultiplier;
835       return 1.0;
836     };
837 
838     const auto NewThreshold =
839         Threshold * GetBonusMultiplier(Edge.second.getHotness());
840 
841     auto IT = ImportThresholds.insert(std::make_pair(
842         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
843     bool PreviouslyVisited = !IT.second;
844     auto &ProcessedThreshold = std::get<0>(IT.first->second);
845     auto &CalleeSummary = std::get<1>(IT.first->second);
846     auto &FailureInfo = std::get<2>(IT.first->second);
847 
848     bool IsHotCallsite =
849         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
850     bool IsCriticalCallsite =
851         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
852 
853     const FunctionSummary *ResolvedCalleeSummary = nullptr;
854     if (CalleeSummary) {
855       assert(PreviouslyVisited);
856       // Since the traversal of the call graph is DFS, we can revisit a function
857       // a second time with a higher threshold. In this case, it is added back
858       // to the worklist with the new threshold (so that its own callee chains
859       // can be considered with the higher threshold).
860       if (NewThreshold <= ProcessedThreshold) {
861         LLVM_DEBUG(
862             dbgs() << "ignored! Target was already imported with Threshold "
863                    << ProcessedThreshold << "\n");
864         continue;
865       }
866       // Update with new larger threshold.
867       ProcessedThreshold = NewThreshold;
868       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
869     } else {
870       // If we already rejected importing a callee at the same or higher
871       // threshold, don't waste time calling selectCallee.
872       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
873         LLVM_DEBUG(
874             dbgs() << "ignored! Target was already rejected with Threshold "
875             << ProcessedThreshold << "\n");
876         if (PrintImportFailures) {
877           assert(FailureInfo &&
878                  "Expected FailureInfo for previously rejected candidate");
879           FailureInfo->Attempts++;
880         }
881         continue;
882       }
883 
884       FunctionImporter::ImportFailureReason Reason{};
885 
886       // `SummaryForDeclImport` is an summary eligible for declaration import.
887       const GlobalValueSummary *SummaryForDeclImport = nullptr;
888       CalleeSummary =
889           selectCallee(Index, VI.getSummaryList(), NewThreshold,
890                        Summary.modulePath(), SummaryForDeclImport, Reason);
891       if (!CalleeSummary) {
892         // There isn't a callee for definition import but one for declaration
893         // import.
894         if (ImportDeclaration && SummaryForDeclImport) {
895           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
896 
897           // Note `ExportLists` only keeps track of exports due to imported
898           // definitions.
899           ImportList.maybeAddDeclaration(DeclSourceModule, VI.getGUID());
900         }
901         // Update with new larger threshold if this was a retry (otherwise
902         // we would have already inserted with NewThreshold above). Also
903         // update failure info if requested.
904         if (PreviouslyVisited) {
905           ProcessedThreshold = NewThreshold;
906           if (PrintImportFailures) {
907             assert(FailureInfo &&
908                    "Expected FailureInfo for previously rejected candidate");
909             FailureInfo->Reason = Reason;
910             FailureInfo->Attempts++;
911             FailureInfo->MaxHotness =
912                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
913           }
914         } else if (PrintImportFailures) {
915           assert(!FailureInfo &&
916                  "Expected no FailureInfo for newly rejected candidate");
917           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
918               VI, Edge.second.getHotness(), Reason, 1);
919         }
920         if (ForceImportAll) {
921           std::string Msg = std::string("Failed to import function ") +
922                             VI.name().str() + " due to " +
923                             getFailureName(Reason);
924           auto Error = make_error<StringError>(
925               Msg, make_error_code(errc::not_supported));
926           logAllUnhandledErrors(std::move(Error), errs(),
927                                 "Error importing module: ");
928           break;
929         } else {
930           LLVM_DEBUG(dbgs()
931                      << "ignored! No qualifying callee with summary found.\n");
932           continue;
933         }
934       }
935 
936       // "Resolve" the summary
937       CalleeSummary = CalleeSummary->getBaseObject();
938       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
939 
940       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
941               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
942              "selectCallee() didn't honor the threshold");
943 
944       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
945 
946       // Try emplace the definition entry, and update stats based on insertion
947       // status.
948       if (ImportList.addDefinition(ExportModulePath, VI.getGUID()) !=
949           FunctionImporter::ImportMapTy::AddDefinitionStatus::NoChange) {
950         NumImportedFunctionsThinLink++;
951         if (IsHotCallsite)
952           NumImportedHotFunctionsThinLink++;
953         if (IsCriticalCallsite)
954           NumImportedCriticalFunctionsThinLink++;
955       }
956 
957       // Any calls/references made by this function will be marked exported
958       // later, in ComputeCrossModuleImport, after import decisions are
959       // complete, which is more efficient than adding them here.
960       if (ExportLists)
961         (*ExportLists)[ExportModulePath].insert(VI);
962     }
963 
964     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
965       // Adjust the threshold for next level of imported functions.
966       // The threshold is different for hot callsites because we can then
967       // inline chains of hot calls.
968       if (IsHotCallsite)
969         return Threshold * ImportHotInstrFactor;
970       return Threshold * ImportInstrFactor;
971     };
972 
973     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
974 
975     ImportCount++;
976 
977     // Insert the newly imported function to the worklist.
978     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
979   }
980 }
981 
982 void ModuleImportsManager::computeImportForModule(
983     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
984     FunctionImporter::ImportMapTy &ImportList) {
985   // Worklist contains the list of function imported in this module, for which
986   // we will analyse the callees and may import further down the callgraph.
987   SmallVector<EdgeInfo, 128> Worklist;
988   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
989                       ExportLists);
990   FunctionImporter::ImportThresholdsTy ImportThresholds;
991 
992   // Populate the worklist with the import for the functions in the current
993   // module
994   for (const auto &GVSummary : DefinedGVSummaries) {
995 #ifndef NDEBUG
996     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
997     // so this map look up (and possibly others) can be avoided.
998     auto VI = Index.getValueInfo(GVSummary.first);
999 #endif
1000     if (!Index.isGlobalValueLive(GVSummary.second)) {
1001       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
1002       continue;
1003     }
1004     auto *FuncSummary =
1005         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
1006     if (!FuncSummary)
1007       // Skip import for global variables
1008       continue;
1009     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
1010     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
1011                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
1012                              ImportList, ExportLists, ImportThresholds);
1013   }
1014 
1015   // Process the newly imported functions and add callees to the worklist.
1016   while (!Worklist.empty()) {
1017     auto GVInfo = Worklist.pop_back_val();
1018     auto *Summary = std::get<0>(GVInfo);
1019     auto Threshold = std::get<1>(GVInfo);
1020 
1021     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
1022       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
1023                                IsPrevailing, Worklist, GVI, ImportList,
1024                                ExportLists, ImportThresholds);
1025   }
1026 
1027   // Print stats about functions considered but rejected for importing
1028   // when requested.
1029   if (PrintImportFailures) {
1030     dbgs() << "Missed imports into module " << ModName << "\n";
1031     for (auto &I : ImportThresholds) {
1032       auto &ProcessedThreshold = std::get<0>(I.second);
1033       auto &CalleeSummary = std::get<1>(I.second);
1034       auto &FailureInfo = std::get<2>(I.second);
1035       if (CalleeSummary)
1036         continue; // We are going to import.
1037       assert(FailureInfo);
1038       FunctionSummary *FS = nullptr;
1039       if (!FailureInfo->VI.getSummaryList().empty())
1040         FS = dyn_cast<FunctionSummary>(
1041             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
1042       dbgs() << FailureInfo->VI
1043              << ": Reason = " << getFailureName(FailureInfo->Reason)
1044              << ", Threshold = " << ProcessedThreshold
1045              << ", Size = " << (FS ? (int)FS->instCount() : -1)
1046              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
1047              << ", Attempts = " << FailureInfo->Attempts << "\n";
1048     }
1049   }
1050 }
1051 
1052 #ifndef NDEBUG
1053 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
1054   auto SL = VI.getSummaryList();
1055   return SL.empty()
1056              ? false
1057              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
1058 }
1059 
1060 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
1061                                GlobalValue::GUID G) {
1062   if (const auto &VI = Index.getValueInfo(G))
1063     return isGlobalVarSummary(Index, VI);
1064   return false;
1065 }
1066 
1067 // Return the number of global variable summaries in ExportSet.
1068 static unsigned
1069 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1070                       FunctionImporter::ExportSetTy &ExportSet) {
1071   unsigned NumGVS = 0;
1072   for (auto &VI : ExportSet)
1073     if (isGlobalVarSummary(Index, VI.getGUID()))
1074       ++NumGVS;
1075   return NumGVS;
1076 }
1077 
1078 // Given ImportMap, return the number of global variable summaries and record
1079 // the number of defined function summaries as output parameter.
1080 static unsigned
1081 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1082                       const FunctionImporter::FunctionsToImportTy &ImportMap,
1083                       unsigned &DefinedFS) {
1084   unsigned NumGVS = 0;
1085   DefinedFS = 0;
1086   for (auto &[GUID, Type] : ImportMap) {
1087     if (isGlobalVarSummary(Index, GUID))
1088       ++NumGVS;
1089     else if (Type == GlobalValueSummary::Definition)
1090       ++DefinedFS;
1091   }
1092   return NumGVS;
1093 }
1094 #endif
1095 
1096 #ifndef NDEBUG
1097 static bool checkVariableImport(
1098     const ModuleSummaryIndex &Index,
1099     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1100     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1101   DenseSet<GlobalValue::GUID> FlattenedImports;
1102 
1103   for (const auto &ImportPerModule : ImportLists)
1104     for (const auto &ExportPerModule : ImportPerModule.second.getImportMap())
1105       for (const auto &[GUID, Type] : ExportPerModule.second)
1106         FlattenedImports.insert(GUID);
1107 
1108   // Checks that all GUIDs of read/writeonly vars we see in export lists
1109   // are also in the import lists. Otherwise we my face linker undefs,
1110   // because readonly and writeonly vars are internalized in their
1111   // source modules. The exception would be if it has a linkage type indicating
1112   // that there may have been a copy existing in the importing module (e.g.
1113   // linkonce_odr). In that case we cannot accurately do this checking.
1114   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1115                                                   const ValueInfo &VI) {
1116     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1117         Index.findSummaryInModule(VI, ModulePath));
1118     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1119            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1120              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1121              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1122   };
1123 
1124   for (auto &ExportPerModule : ExportLists)
1125     for (auto &VI : ExportPerModule.second)
1126       if (!FlattenedImports.count(VI.getGUID()) &&
1127           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1128         return false;
1129 
1130   return true;
1131 }
1132 #endif
1133 
1134 /// Compute all the import and export for every module using the Index.
1135 void llvm::ComputeCrossModuleImport(
1136     const ModuleSummaryIndex &Index,
1137     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1138     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1139         isPrevailing,
1140     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1141     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1142   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1143   // For each module that has function defined, compute the import/export lists.
1144   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1145     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1146     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1147                       << DefinedGVSummaries.first << "'\n");
1148     MIS->computeImportForModule(DefinedGVSummaries.second,
1149                                 DefinedGVSummaries.first, ImportList);
1150   }
1151 
1152   // When computing imports we only added the variables and functions being
1153   // imported to the export list. We also need to mark any references and calls
1154   // they make as exported as well. We do this here, as it is more efficient
1155   // since we may import the same values multiple times into different modules
1156   // during the import computation.
1157   for (auto &ELI : ExportLists) {
1158     // `NewExports` tracks the VI that gets exported because the full definition
1159     // of its user/referencer gets exported.
1160     FunctionImporter::ExportSetTy NewExports;
1161     const auto &DefinedGVSummaries =
1162         ModuleToDefinedGVSummaries.lookup(ELI.first);
1163     for (auto &EI : ELI.second) {
1164       // Find the copy defined in the exporting module so that we can mark the
1165       // values it references in that specific definition as exported.
1166       // Below we will add all references and called values, without regard to
1167       // whether they are also defined in this module. We subsequently prune the
1168       // list to only include those defined in the exporting module, see comment
1169       // there as to why.
1170       auto DS = DefinedGVSummaries.find(EI.getGUID());
1171       // Anything marked exported during the import computation must have been
1172       // defined in the exporting module.
1173       assert(DS != DefinedGVSummaries.end());
1174       auto *S = DS->getSecond();
1175       S = S->getBaseObject();
1176       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1177         // Export referenced functions and variables. We don't export/promote
1178         // objects referenced by writeonly variable initializer, because
1179         // we convert such variables initializers to "zeroinitializer".
1180         // See processGlobalForThinLTO.
1181         if (!Index.isWriteOnly(GVS))
1182           for (const auto &VI : GVS->refs())
1183             NewExports.insert(VI);
1184       } else {
1185         auto *FS = cast<FunctionSummary>(S);
1186         for (const auto &Edge : FS->calls())
1187           NewExports.insert(Edge.first);
1188         for (const auto &Ref : FS->refs())
1189           NewExports.insert(Ref);
1190       }
1191     }
1192     // Prune list computed above to only include values defined in the
1193     // exporting module. We do this after the above insertion since we may hit
1194     // the same ref/call target multiple times in above loop, and it is more
1195     // efficient to avoid a set lookup each time.
1196     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1197       if (!DefinedGVSummaries.count(EI->getGUID()))
1198         NewExports.erase(EI++);
1199       else
1200         ++EI;
1201     }
1202     ELI.second.insert(NewExports.begin(), NewExports.end());
1203   }
1204 
1205   assert(checkVariableImport(Index, ImportLists, ExportLists));
1206 #ifndef NDEBUG
1207   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1208                     << " modules:\n");
1209   for (auto &ModuleImports : ImportLists) {
1210     auto ModName = ModuleImports.first;
1211     auto &Exports = ExportLists[ModName];
1212     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1213     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1214                       << Exports.size() - NumGVS << " functions and " << NumGVS
1215                       << " vars. Imports from "
1216                       << ModuleImports.second.getImportMap().size()
1217                       << " modules.\n");
1218     for (const auto &Src : ModuleImports.second.getImportMap()) {
1219       auto SrcModName = Src.first;
1220       unsigned DefinedFS = 0;
1221       unsigned NumGVSPerMod =
1222           numGlobalVarSummaries(Index, Src.second, DefinedFS);
1223       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1224                         << Src.second.size() - NumGVSPerMod - DefinedFS
1225                         << " function declarations imported from " << SrcModName
1226                         << "\n");
1227       LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1228                         << " global vars imported from " << SrcModName << "\n");
1229     }
1230   }
1231 #endif
1232 }
1233 
1234 #ifndef NDEBUG
1235 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1236                                     StringRef ModulePath,
1237                                     FunctionImporter::ImportMapTy &ImportList) {
1238   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1239                     << ImportList.getImportMap().size() << " modules.\n");
1240   for (const auto &Src : ImportList.getImportMap()) {
1241     auto SrcModName = Src.first;
1242     unsigned DefinedFS = 0;
1243     unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
1244     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1245                       << Src.second.size() - DefinedFS - NumGVSPerMod
1246                       << " function declarations imported from " << SrcModName
1247                       << "\n");
1248     LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1249                       << SrcModName << "\n");
1250   }
1251 }
1252 #endif
1253 
1254 /// Compute all the imports for the given module using the Index.
1255 ///
1256 /// \p isPrevailing is a callback that will be called with a global value's GUID
1257 /// and summary and should return whether the module corresponding to the
1258 /// summary contains the linker-prevailing copy of that value.
1259 ///
1260 /// \p ImportList will be populated with a map that can be passed to
1261 /// FunctionImporter::importFunctions() above (see description there).
1262 static void ComputeCrossModuleImportForModuleForTest(
1263     StringRef ModulePath,
1264     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1265         isPrevailing,
1266     const ModuleSummaryIndex &Index,
1267     FunctionImporter::ImportMapTy &ImportList) {
1268   // Collect the list of functions this module defines.
1269   // GUID -> Summary
1270   GVSummaryMapTy FunctionSummaryMap;
1271   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1272 
1273   // Compute the import list for this module.
1274   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1275   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1276   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1277 
1278 #ifndef NDEBUG
1279   dumpImportListForModule(Index, ModulePath, ImportList);
1280 #endif
1281 }
1282 
1283 /// Mark all external summaries in \p Index for import into the given module.
1284 /// Used for testing the case of distributed builds using a distributed index.
1285 ///
1286 /// \p ImportList will be populated with a map that can be passed to
1287 /// FunctionImporter::importFunctions() above (see description there).
1288 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1289     StringRef ModulePath, const ModuleSummaryIndex &Index,
1290     FunctionImporter::ImportMapTy &ImportList) {
1291   for (const auto &GlobalList : Index) {
1292     // Ignore entries for undefined references.
1293     if (GlobalList.second.SummaryList.empty())
1294       continue;
1295 
1296     auto GUID = GlobalList.first;
1297     assert(GlobalList.second.SummaryList.size() == 1 &&
1298            "Expected individual combined index to have one summary per GUID");
1299     auto &Summary = GlobalList.second.SummaryList[0];
1300     // Skip the summaries for the importing module. These are included to
1301     // e.g. record required linkage changes.
1302     if (Summary->modulePath() == ModulePath)
1303       continue;
1304     // Add an entry to provoke importing by thinBackend.
1305     ImportList.addGUID(Summary->modulePath(), GUID, Summary->importType());
1306   }
1307 #ifndef NDEBUG
1308   dumpImportListForModule(Index, ModulePath, ImportList);
1309 #endif
1310 }
1311 
1312 // For SamplePGO, the indirect call targets for local functions will
1313 // have its original name annotated in profile. We try to find the
1314 // corresponding PGOFuncName as the GUID, and fix up the edges
1315 // accordingly.
1316 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1317                                      FunctionSummary *FS) {
1318   for (auto &EI : FS->mutableCalls()) {
1319     if (!EI.first.getSummaryList().empty())
1320       continue;
1321     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1322     if (GUID == 0)
1323       continue;
1324     // Update the edge to point directly to the correct GUID.
1325     auto VI = Index.getValueInfo(GUID);
1326     if (llvm::any_of(
1327             VI.getSummaryList(),
1328             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1329               // The mapping from OriginalId to GUID may return a GUID
1330               // that corresponds to a static variable. Filter it out here.
1331               // This can happen when
1332               // 1) There is a call to a library function which is not defined
1333               // in the index.
1334               // 2) There is a static variable with the  OriginalGUID identical
1335               // to the GUID of the library function in 1);
1336               // When this happens the static variable in 2) will be found,
1337               // which needs to be filtered out.
1338               return SummaryPtr->getSummaryKind() ==
1339                      GlobalValueSummary::GlobalVarKind;
1340             }))
1341       continue;
1342     EI.first = VI;
1343   }
1344 }
1345 
1346 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1347   for (const auto &Entry : Index) {
1348     for (const auto &S : Entry.second.SummaryList) {
1349       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1350         updateValueInfoForIndirectCalls(Index, FS);
1351     }
1352   }
1353 }
1354 
1355 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1356     ModuleSummaryIndex &Index,
1357     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1358     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1359   assert(!Index.withGlobalValueDeadStripping());
1360   if (!ComputeDead ||
1361       // Don't do anything when nothing is live, this is friendly with tests.
1362       GUIDPreservedSymbols.empty()) {
1363     // Still need to update indirect calls.
1364     updateIndirectCalls(Index);
1365     return;
1366   }
1367   unsigned LiveSymbols = 0;
1368   SmallVector<ValueInfo, 128> Worklist;
1369   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1370   for (auto GUID : GUIDPreservedSymbols) {
1371     ValueInfo VI = Index.getValueInfo(GUID);
1372     if (!VI)
1373       continue;
1374     for (const auto &S : VI.getSummaryList())
1375       S->setLive(true);
1376   }
1377 
1378   // Add values flagged in the index as live roots to the worklist.
1379   for (const auto &Entry : Index) {
1380     auto VI = Index.getValueInfo(Entry);
1381     for (const auto &S : Entry.second.SummaryList) {
1382       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1383         updateValueInfoForIndirectCalls(Index, FS);
1384       if (S->isLive()) {
1385         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1386         Worklist.push_back(VI);
1387         ++LiveSymbols;
1388         break;
1389       }
1390     }
1391   }
1392 
1393   // Make value live and add it to the worklist if it was not live before.
1394   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1395     // FIXME: If we knew which edges were created for indirect call profiles,
1396     // we could skip them here. Any that are live should be reached via
1397     // other edges, e.g. reference edges. Otherwise, using a profile collected
1398     // on a slightly different binary might provoke preserving, importing
1399     // and ultimately promoting calls to functions not linked into this
1400     // binary, which increases the binary size unnecessarily. Note that
1401     // if this code changes, the importer needs to change so that edges
1402     // to functions marked dead are skipped.
1403 
1404     if (llvm::any_of(VI.getSummaryList(),
1405                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1406                        return S->isLive();
1407                      }))
1408       return;
1409 
1410     // We only keep live symbols that are known to be non-prevailing if any are
1411     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1412     // later in the EliminateAvailableExternally pass and setting them to
1413     // not-live could break downstreams users of liveness information (PR36483)
1414     // or limit optimization opportunities.
1415     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1416       bool KeepAliveLinkage = false;
1417       bool Interposable = false;
1418       for (const auto &S : VI.getSummaryList()) {
1419         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1420             S->linkage() == GlobalValue::WeakODRLinkage ||
1421             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1422           KeepAliveLinkage = true;
1423         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1424           Interposable = true;
1425       }
1426 
1427       if (!IsAliasee) {
1428         if (!KeepAliveLinkage)
1429           return;
1430 
1431         if (Interposable)
1432           report_fatal_error(
1433               "Interposable and available_externally/linkonce_odr/weak_odr "
1434               "symbol");
1435       }
1436     }
1437 
1438     for (const auto &S : VI.getSummaryList())
1439       S->setLive(true);
1440     ++LiveSymbols;
1441     Worklist.push_back(VI);
1442   };
1443 
1444   while (!Worklist.empty()) {
1445     auto VI = Worklist.pop_back_val();
1446     for (const auto &Summary : VI.getSummaryList()) {
1447       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1448         // If this is an alias, visit the aliasee VI to ensure that all copies
1449         // are marked live and it is added to the worklist for further
1450         // processing of its references.
1451         visit(AS->getAliaseeVI(), true);
1452         continue;
1453       }
1454       for (auto Ref : Summary->refs())
1455         visit(Ref, false);
1456       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1457         for (auto Call : FS->calls())
1458           visit(Call.first, false);
1459     }
1460   }
1461   Index.setWithGlobalValueDeadStripping();
1462 
1463   unsigned DeadSymbols = Index.size() - LiveSymbols;
1464   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1465                     << " symbols Dead \n");
1466   NumDeadSymbols += DeadSymbols;
1467   NumLiveSymbols += LiveSymbols;
1468 }
1469 
1470 // Compute dead symbols and propagate constants in combined index.
1471 void llvm::computeDeadSymbolsWithConstProp(
1472     ModuleSummaryIndex &Index,
1473     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1474     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1475     bool ImportEnabled) {
1476   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1477                                            isPrevailing);
1478   if (ImportEnabled)
1479     Index.propagateAttributes(GUIDPreservedSymbols);
1480 }
1481 
1482 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1483 /// \p ModulePath.
1484 void llvm::gatherImportedSummariesForModule(
1485     StringRef ModulePath,
1486     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1487     const FunctionImporter::ImportMapTy &ImportList,
1488     ModuleToSummariesForIndexTy &ModuleToSummariesForIndex,
1489     GVSummaryPtrSet &DecSummaries) {
1490   // Include all summaries from the importing module.
1491   ModuleToSummariesForIndex[std::string(ModulePath)] =
1492       ModuleToDefinedGVSummaries.lookup(ModulePath);
1493   // Include summaries for imports.
1494   for (const auto &ILI : ImportList.getImportMap()) {
1495     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1496 
1497     const auto &DefinedGVSummaries =
1498         ModuleToDefinedGVSummaries.lookup(ILI.first);
1499     for (const auto &[GUID, Type] : ILI.second) {
1500       const auto &DS = DefinedGVSummaries.find(GUID);
1501       assert(DS != DefinedGVSummaries.end() &&
1502              "Expected a defined summary for imported global value");
1503       if (Type == GlobalValueSummary::Declaration)
1504         DecSummaries.insert(DS->second);
1505 
1506       SummariesForIndex[GUID] = DS->second;
1507     }
1508   }
1509 }
1510 
1511 /// Emit the files \p ModulePath will import from into \p OutputFilename.
1512 std::error_code llvm::EmitImportsFiles(
1513     StringRef ModulePath, StringRef OutputFilename,
1514     const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex) {
1515   std::error_code EC;
1516   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1517   if (EC)
1518     return EC;
1519   for (const auto &ILI : ModuleToSummariesForIndex)
1520     // The ModuleToSummariesForIndex map includes an entry for the current
1521     // Module (needed for writing out the index files). We don't want to
1522     // include it in the imports file, however, so filter it out.
1523     if (ILI.first != ModulePath)
1524       ImportsOS << ILI.first << "\n";
1525   return std::error_code();
1526 }
1527 
1528 bool llvm::convertToDeclaration(GlobalValue &GV) {
1529   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1530                     << "\n");
1531   if (Function *F = dyn_cast<Function>(&GV)) {
1532     F->deleteBody();
1533     F->clearMetadata();
1534     F->setComdat(nullptr);
1535   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1536     V->setInitializer(nullptr);
1537     V->setLinkage(GlobalValue::ExternalLinkage);
1538     V->clearMetadata();
1539     V->setComdat(nullptr);
1540   } else {
1541     GlobalValue *NewGV;
1542     if (GV.getValueType()->isFunctionTy())
1543       NewGV =
1544           Function::Create(cast<FunctionType>(GV.getValueType()),
1545                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1546                            "", GV.getParent());
1547     else
1548       NewGV =
1549           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1550                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1551                              /*init*/ nullptr, "",
1552                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1553                              GV.getType()->getAddressSpace());
1554     NewGV->takeName(&GV);
1555     GV.replaceAllUsesWith(NewGV);
1556     return false;
1557   }
1558   if (!GV.isImplicitDSOLocal())
1559     GV.setDSOLocal(false);
1560   return true;
1561 }
1562 
1563 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1564                                    const GVSummaryMapTy &DefinedGlobals,
1565                                    bool PropagateAttrs) {
1566   DenseSet<Comdat *> NonPrevailingComdats;
1567   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1568     // See if the global summary analysis computed a new resolved linkage.
1569     const auto &GS = DefinedGlobals.find(GV.getGUID());
1570     if (GS == DefinedGlobals.end())
1571       return;
1572 
1573     if (Propagate)
1574       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1575         if (Function *F = dyn_cast<Function>(&GV)) {
1576           // TODO: propagate ReadNone and ReadOnly.
1577           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1578             F->setDoesNotAccessMemory();
1579 
1580           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1581             F->setOnlyReadsMemory();
1582 
1583           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1584             F->setDoesNotRecurse();
1585 
1586           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1587             F->setDoesNotThrow();
1588         }
1589       }
1590 
1591     auto NewLinkage = GS->second->linkage();
1592     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1593         // Don't internalize anything here, because the code below
1594         // lacks necessary correctness checks. Leave this job to
1595         // LLVM 'internalize' pass.
1596         GlobalValue::isLocalLinkage(NewLinkage) ||
1597         // In case it was dead and already converted to declaration.
1598         GV.isDeclaration())
1599       return;
1600 
1601     // Set the potentially more constraining visibility computed from summaries.
1602     // The DefaultVisibility condition is because older GlobalValueSummary does
1603     // not record DefaultVisibility and we don't want to change protected/hidden
1604     // to default.
1605     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1606       GV.setVisibility(GS->second->getVisibility());
1607 
1608     if (NewLinkage == GV.getLinkage())
1609       return;
1610 
1611     // Check for a non-prevailing def that has interposable linkage
1612     // (e.g. non-odr weak or linkonce). In that case we can't simply
1613     // convert to available_externally, since it would lose the
1614     // interposable property and possibly get inlined. Simply drop
1615     // the definition in that case.
1616     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1617         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1618       if (!convertToDeclaration(GV))
1619         // FIXME: Change this to collect replaced GVs and later erase
1620         // them from the parent module once thinLTOResolvePrevailingGUID is
1621         // changed to enable this for aliases.
1622         llvm_unreachable("Expected GV to be converted");
1623     } else {
1624       // If all copies of the original symbol had global unnamed addr and
1625       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1626       // and are constants, then it should be an auto hide symbol. In that case
1627       // the thin link would have marked it as CanAutoHide. Add hidden
1628       // visibility to the symbol to preserve the property.
1629       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1630           GS->second->canAutoHide()) {
1631         assert(GV.canBeOmittedFromSymbolTable());
1632         GV.setVisibility(GlobalValue::HiddenVisibility);
1633       }
1634 
1635       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1636                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1637                         << "\n");
1638       GV.setLinkage(NewLinkage);
1639     }
1640     // Remove declarations from comdats, including available_externally
1641     // as this is a declaration for the linker, and will be dropped eventually.
1642     // It is illegal for comdats to contain declarations.
1643     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1644     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1645       if (GO->getComdat()->getName() == GO->getName())
1646         NonPrevailingComdats.insert(GO->getComdat());
1647       GO->setComdat(nullptr);
1648     }
1649   };
1650 
1651   // Process functions and global now
1652   for (auto &GV : TheModule)
1653     FinalizeInModule(GV, PropagateAttrs);
1654   for (auto &GV : TheModule.globals())
1655     FinalizeInModule(GV);
1656   for (auto &GV : TheModule.aliases())
1657     FinalizeInModule(GV);
1658 
1659   // For a non-prevailing comdat, all its members must be available_externally.
1660   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1661   // local linkage GlobalValues.
1662   if (NonPrevailingComdats.empty())
1663     return;
1664   for (auto &GO : TheModule.global_objects()) {
1665     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1666       GO.setComdat(nullptr);
1667       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1668     }
1669   }
1670   bool Changed;
1671   do {
1672     Changed = false;
1673     // If an alias references a GlobalValue in a non-prevailing comdat, change
1674     // it to available_externally. For simplicity we only handle GlobalValue and
1675     // ConstantExpr with a base object. ConstantExpr without a base object is
1676     // unlikely used in a COMDAT.
1677     for (auto &GA : TheModule.aliases()) {
1678       if (GA.hasAvailableExternallyLinkage())
1679         continue;
1680       GlobalObject *Obj = GA.getAliaseeObject();
1681       assert(Obj && "aliasee without an base object is unimplemented");
1682       if (Obj->hasAvailableExternallyLinkage()) {
1683         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1684         Changed = true;
1685       }
1686     }
1687   } while (Changed);
1688 }
1689 
1690 /// Run internalization on \p TheModule based on symmary analysis.
1691 void llvm::thinLTOInternalizeModule(Module &TheModule,
1692                                     const GVSummaryMapTy &DefinedGlobals) {
1693   // Declare a callback for the internalize pass that will ask for every
1694   // candidate GlobalValue if it can be internalized or not.
1695   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1696     // It may be the case that GV is on a chain of an ifunc, its alias and
1697     // subsequent aliases. In this case, the summary for the value is not
1698     // available.
1699     if (isa<GlobalIFunc>(&GV) ||
1700         (isa<GlobalAlias>(&GV) &&
1701          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1702       return true;
1703 
1704     // Lookup the linkage recorded in the summaries during global analysis.
1705     auto GS = DefinedGlobals.find(GV.getGUID());
1706     if (GS == DefinedGlobals.end()) {
1707       // Must have been promoted (possibly conservatively). Find original
1708       // name so that we can access the correct summary and see if it can
1709       // be internalized again.
1710       // FIXME: Eventually we should control promotion instead of promoting
1711       // and internalizing again.
1712       StringRef OrigName =
1713           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1714       std::string OrigId = GlobalValue::getGlobalIdentifier(
1715           OrigName, GlobalValue::InternalLinkage,
1716           TheModule.getSourceFileName());
1717       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1718       if (GS == DefinedGlobals.end()) {
1719         // Also check the original non-promoted non-globalized name. In some
1720         // cases a preempted weak value is linked in as a local copy because
1721         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1722         // In that case, since it was originally not a local value, it was
1723         // recorded in the index using the original name.
1724         // FIXME: This may not be needed once PR27866 is fixed.
1725         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1726         assert(GS != DefinedGlobals.end());
1727       }
1728     }
1729     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1730   };
1731 
1732   // FIXME: See if we can just internalize directly here via linkage changes
1733   // based on the index, rather than invoking internalizeModule.
1734   internalizeModule(TheModule, MustPreserveGV);
1735 }
1736 
1737 /// Make alias a clone of its aliasee.
1738 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1739   Function *Fn = cast<Function>(GA->getAliaseeObject());
1740 
1741   ValueToValueMapTy VMap;
1742   Function *NewFn = CloneFunction(Fn, VMap);
1743   // Clone should use the original alias's linkage, visibility and name, and we
1744   // ensure all uses of alias instead use the new clone (casted if necessary).
1745   NewFn->setLinkage(GA->getLinkage());
1746   NewFn->setVisibility(GA->getVisibility());
1747   GA->replaceAllUsesWith(NewFn);
1748   NewFn->takeName(GA);
1749   return NewFn;
1750 }
1751 
1752 // Internalize values that we marked with specific attribute
1753 // in processGlobalForThinLTO.
1754 static void internalizeGVsAfterImport(Module &M) {
1755   for (auto &GV : M.globals())
1756     // Skip GVs which have been converted to declarations
1757     // by dropDeadSymbols.
1758     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1759       GV.setLinkage(GlobalValue::InternalLinkage);
1760       GV.setVisibility(GlobalValue::DefaultVisibility);
1761     }
1762 }
1763 
1764 // Automatically import functions in Module \p DestModule based on the summaries
1765 // index.
1766 Expected<bool> FunctionImporter::importFunctions(
1767     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1768   LLVM_DEBUG(dbgs() << "Starting import for Module "
1769                     << DestModule.getModuleIdentifier() << "\n");
1770   unsigned ImportedCount = 0, ImportedGVCount = 0;
1771 
1772   IRMover Mover(DestModule);
1773   // Do the actual import of functions now, one Module at a time
1774   std::set<StringRef> ModuleNameOrderedList;
1775   for (const auto &FunctionsToImportPerModule : ImportList.getImportMap()) {
1776     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1777   }
1778 
1779   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1780                            GlobalValue::GUID GUID)
1781       -> std::optional<GlobalValueSummary::ImportKind> {
1782     auto Iter = GUIDToImportType.find(GUID);
1783     if (Iter == GUIDToImportType.end())
1784       return std::nullopt;
1785     return Iter->second;
1786   };
1787 
1788   for (const auto &Name : ModuleNameOrderedList) {
1789     // Get the module for the import
1790     const auto &FunctionsToImportPerModule =
1791         ImportList.getImportMap().find(Name);
1792     assert(FunctionsToImportPerModule != ImportList.getImportMap().end());
1793     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1794     if (!SrcModuleOrErr)
1795       return SrcModuleOrErr.takeError();
1796     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1797     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1798            "Context mismatch");
1799 
1800     // If modules were created with lazy metadata loading, materialize it
1801     // now, before linking it (otherwise this will be a noop).
1802     if (Error Err = SrcModule->materializeMetadata())
1803       return std::move(Err);
1804 
1805     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1806 
1807     // Find the globals to import
1808     SetVector<GlobalValue *> GlobalsToImport;
1809     for (Function &F : *SrcModule) {
1810       if (!F.hasName())
1811         continue;
1812       auto GUID = F.getGUID();
1813       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1814       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
1815 
1816       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1817                         << " importing function"
1818                         << (ImportDefinition
1819                                 ? " definition "
1820                                 : (MaybeImportType ? " declaration " : " "))
1821                         << GUID << " " << F.getName() << " from "
1822                         << SrcModule->getSourceFileName() << "\n");
1823       if (ImportDefinition) {
1824         if (Error Err = F.materialize())
1825           return std::move(Err);
1826         // MemProf should match function's definition and summary,
1827         // 'thinlto_src_module' is needed.
1828         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1829           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1830           // statistics and debugging.
1831           F.setMetadata(
1832               "thinlto_src_module",
1833               MDNode::get(DestModule.getContext(),
1834                           {MDString::get(DestModule.getContext(),
1835                                          SrcModule->getModuleIdentifier())}));
1836           F.setMetadata(
1837               "thinlto_src_file",
1838               MDNode::get(DestModule.getContext(),
1839                           {MDString::get(DestModule.getContext(),
1840                                          SrcModule->getSourceFileName())}));
1841         }
1842         GlobalsToImport.insert(&F);
1843       }
1844     }
1845     for (GlobalVariable &GV : SrcModule->globals()) {
1846       if (!GV.hasName())
1847         continue;
1848       auto GUID = GV.getGUID();
1849       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1850       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
1851 
1852       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1853                         << " importing global"
1854                         << (ImportDefinition
1855                                 ? " definition "
1856                                 : (MaybeImportType ? " declaration " : " "))
1857                         << GUID << " " << GV.getName() << " from "
1858                         << SrcModule->getSourceFileName() << "\n");
1859       if (ImportDefinition) {
1860         if (Error Err = GV.materialize())
1861           return std::move(Err);
1862         ImportedGVCount += GlobalsToImport.insert(&GV);
1863       }
1864     }
1865     for (GlobalAlias &GA : SrcModule->aliases()) {
1866       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1867         continue;
1868       auto GUID = GA.getGUID();
1869       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1870       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
1871 
1872       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1873                         << " importing alias"
1874                         << (ImportDefinition
1875                                 ? " definition "
1876                                 : (MaybeImportType ? " declaration " : " "))
1877                         << GUID << " " << GA.getName() << " from "
1878                         << SrcModule->getSourceFileName() << "\n");
1879       if (ImportDefinition) {
1880         if (Error Err = GA.materialize())
1881           return std::move(Err);
1882         // Import alias as a copy of its aliasee.
1883         GlobalObject *GO = GA.getAliaseeObject();
1884         if (Error Err = GO->materialize())
1885           return std::move(Err);
1886         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1887         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1888                           << GO->getName() << " from "
1889                           << SrcModule->getSourceFileName() << "\n");
1890         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1891           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1892           // statistics and debugging.
1893           Fn->setMetadata(
1894               "thinlto_src_module",
1895               MDNode::get(DestModule.getContext(),
1896                           {MDString::get(DestModule.getContext(),
1897                                          SrcModule->getModuleIdentifier())}));
1898           Fn->setMetadata(
1899               "thinlto_src_file",
1900               MDNode::get(DestModule.getContext(),
1901                           {MDString::get(DestModule.getContext(),
1902                                          SrcModule->getSourceFileName())}));
1903         }
1904         GlobalsToImport.insert(Fn);
1905       }
1906     }
1907 
1908     // Upgrade debug info after we're done materializing all the globals and we
1909     // have loaded all the required metadata!
1910     UpgradeDebugInfo(*SrcModule);
1911 
1912     // Set the partial sample profile ratio in the profile summary module flag
1913     // of the imported source module, if applicable, so that the profile summary
1914     // module flag will match with that of the destination module when it's
1915     // imported.
1916     SrcModule->setPartialSampleProfileRatio(Index);
1917 
1918     // Link in the specified functions.
1919     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1920                                &GlobalsToImport))
1921       return true;
1922 
1923     if (PrintImports) {
1924       for (const auto *GV : GlobalsToImport)
1925         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1926                << " from " << SrcModule->getSourceFileName() << "\n";
1927     }
1928 
1929     if (Error Err = Mover.move(std::move(SrcModule),
1930                                GlobalsToImport.getArrayRef(), nullptr,
1931                                /*IsPerformingImport=*/true))
1932       return createStringError(errc::invalid_argument,
1933                                Twine("Function Import: link error: ") +
1934                                    toString(std::move(Err)));
1935 
1936     ImportedCount += GlobalsToImport.size();
1937     NumImportedModules++;
1938   }
1939 
1940   internalizeGVsAfterImport(DestModule);
1941 
1942   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1943   NumImportedGlobalVars += ImportedGVCount;
1944 
1945   // TODO: Print counters for definitions and declarations in the debugging log.
1946   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1947                     << " functions for Module "
1948                     << DestModule.getModuleIdentifier() << "\n");
1949   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1950                     << " global variables for Module "
1951                     << DestModule.getModuleIdentifier() << "\n");
1952   return ImportedCount;
1953 }
1954 
1955 static bool doImportingForModuleForTest(
1956     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1957                    isPrevailing) {
1958   if (SummaryFile.empty())
1959     report_fatal_error("error: -function-import requires -summary-file\n");
1960   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1961       getModuleSummaryIndexForFile(SummaryFile);
1962   if (!IndexPtrOrErr) {
1963     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1964                           "Error loading file '" + SummaryFile + "': ");
1965     return false;
1966   }
1967   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1968 
1969   // First step is collecting the import list.
1970   FunctionImporter::ImportMapTy ImportList;
1971   // If requested, simply import all functions in the index. This is used
1972   // when testing distributed backend handling via the opt tool, when
1973   // we have distributed indexes containing exactly the summaries to import.
1974   if (ImportAllIndex)
1975     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1976                                                       *Index, ImportList);
1977   else
1978     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1979                                              isPrevailing, *Index, ImportList);
1980 
1981   // Conservatively mark all internal values as promoted. This interface is
1982   // only used when doing importing via the function importing pass. The pass
1983   // is only enabled when testing importing via the 'opt' tool, which does
1984   // not do the ThinLink that would normally determine what values to promote.
1985   for (auto &I : *Index) {
1986     for (auto &S : I.second.SummaryList) {
1987       if (GlobalValue::isLocalLinkage(S->linkage()))
1988         S->setLinkage(GlobalValue::ExternalLinkage);
1989     }
1990   }
1991 
1992   // Next we need to promote to global scope and rename any local values that
1993   // are potentially exported to other modules.
1994   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
1995                              /*GlobalsToImport=*/nullptr)) {
1996     errs() << "Error renaming module\n";
1997     return true;
1998   }
1999 
2000   // Perform the import now.
2001   auto ModuleLoader = [&M](StringRef Identifier) {
2002     return loadFile(std::string(Identifier), M.getContext());
2003   };
2004   FunctionImporter Importer(*Index, ModuleLoader,
2005                             /*ClearDSOLocalOnDeclarations=*/false);
2006   Expected<bool> Result = Importer.importFunctions(M, ImportList);
2007 
2008   // FIXME: Probably need to propagate Errors through the pass manager.
2009   if (!Result) {
2010     logAllUnhandledErrors(Result.takeError(), errs(),
2011                           "Error importing module: ");
2012     return true;
2013   }
2014 
2015   return true;
2016 }
2017 
2018 PreservedAnalyses FunctionImportPass::run(Module &M,
2019                                           ModuleAnalysisManager &AM) {
2020   // This is only used for testing the function import pass via opt, where we
2021   // don't have prevailing information from the LTO context available, so just
2022   // conservatively assume everything is prevailing (which is fine for the very
2023   // limited use of prevailing checking in this pass).
2024   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
2025     return true;
2026   };
2027   if (!doImportingForModuleForTest(M, isPrevailing))
2028     return PreservedAnalyses::all();
2029 
2030   return PreservedAnalyses::none();
2031 }
2032