xref: /llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision 8d9db947b725fefbb02905c5d6be05e09a306f6b)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalAlias.h"
25 #include "llvm/IR/GlobalObject.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/Metadata.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/ModuleSummaryIndex.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/Linker/IRMover.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module' and "
129              "'thinlto_src_file'"));
130 
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134     SummaryFile("summary-file",
135                 cl::desc("The summary file to use for function importing."));
136 
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140     ImportAllIndex("import-all-index",
141                    cl::desc("Import all external functions in index."));
142 
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150     "import-declaration", cl::init(false), cl::Hidden,
151     cl::desc("If true, import function declaration as fallback if the function "
152              "definition is not imported."));
153 
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163     "thinlto-workload-def",
164     cl::desc("Pass a workload definition. This is a file containing a JSON "
165              "dictionary. The keys are root functions, the values are lists of "
166              "functions to import in the module defining the root. It is "
167              "assumed -funique-internal-linkage-names was used, to ensure "
168              "local linkage functions have unique names. For example: \n"
169              "{\n"
170              "  \"rootFunction_1\": [\"function_to_import_1\", "
171              "\"function_to_import_2\"], \n"
172              "  \"rootFunction_2\": [\"function_to_import_3\", "
173              "\"function_to_import_4\"] \n"
174              "}"),
175     cl::Hidden);
176 
177 namespace llvm {
178 extern cl::opt<bool> EnableMemProfContextDisambiguation;
179 }
180 
181 // Load lazily a module from \p FileName in \p Context.
182 static std::unique_ptr<Module> loadFile(const std::string &FileName,
183                                         LLVMContext &Context) {
184   SMDiagnostic Err;
185   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
186   // Metadata isn't loaded until functions are imported, to minimize
187   // the memory overhead.
188   std::unique_ptr<Module> Result =
189       getLazyIRFileModule(FileName, Err, Context,
190                           /* ShouldLazyLoadMetadata = */ true);
191   if (!Result) {
192     Err.print("function-import", errs());
193     report_fatal_error("Abort");
194   }
195 
196   return Result;
197 }
198 
199 /// Given a list of possible callee implementation for a call site, qualify the
200 /// legality of importing each. The return is a range of pairs. Each pair
201 /// corresponds to a candidate. The first value is the ImportFailureReason for
202 /// that candidate, the second is the candidate.
203 static auto qualifyCalleeCandidates(
204     const ModuleSummaryIndex &Index,
205     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
206     StringRef CallerModulePath) {
207   return llvm::map_range(
208       CalleeSummaryList,
209       [&Index, CalleeSummaryList,
210        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
211           -> std::pair<FunctionImporter::ImportFailureReason,
212                        const GlobalValueSummary *> {
213         auto *GVSummary = SummaryPtr.get();
214         if (!Index.isGlobalValueLive(GVSummary))
215           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
216 
217         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
218           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
219                   GVSummary};
220 
221         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
222 
223         // Ignore any callees that aren't actually functions. This could happen
224         // in the case of GUID hash collisions. It could also happen in theory
225         // for SamplePGO profiles collected on old versions of the code after
226         // renaming, since we synthesize edges to any inlined callees appearing
227         // in the profile.
228         if (!Summary)
229           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
230 
231         // If this is a local function, make sure we import the copy
232         // in the caller's module. The only time a local function can
233         // share an entry in the index is if there is a local with the same name
234         // in another module that had the same source file name (in a different
235         // directory), where each was compiled in their own directory so there
236         // was not distinguishing path.
237         // However, do the import from another module if there is only one
238         // entry in the list - in that case this must be a reference due
239         // to indirect call profile data, since a function pointer can point to
240         // a local in another module.
241         if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
242             CalleeSummaryList.size() > 1 &&
243             Summary->modulePath() != CallerModulePath)
244           return {
245               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
246               GVSummary};
247 
248         // Skip if it isn't legal to import (e.g. may reference unpromotable
249         // locals).
250         if (Summary->notEligibleToImport())
251           return {FunctionImporter::ImportFailureReason::NotEligible,
252                   GVSummary};
253 
254         return {FunctionImporter::ImportFailureReason::None, GVSummary};
255       });
256 }
257 
258 /// Given a list of possible callee implementation for a call site, select one
259 /// that fits the \p Threshold for function definition import. If none are
260 /// found, the Reason will give the last reason for the failure (last, in the
261 /// order of CalleeSummaryList entries). While looking for a callee definition,
262 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
263 /// candidate; other modules may want to know the function summary or
264 /// declaration even if a definition is not needed.
265 ///
266 /// FIXME: select "best" instead of first that fits. But what is "best"?
267 /// - The smallest: more likely to be inlined.
268 /// - The one with the least outgoing edges (already well optimized).
269 /// - One from a module already being imported from in order to reduce the
270 ///   number of source modules parsed/linked.
271 /// - One that has PGO data attached.
272 /// - [insert you fancy metric here]
273 static const GlobalValueSummary *
274 selectCallee(const ModuleSummaryIndex &Index,
275              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
276              unsigned Threshold, StringRef CallerModulePath,
277              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
278              FunctionImporter::ImportFailureReason &Reason) {
279   // Records the last summary with reason noinline or too-large.
280   TooLargeOrNoInlineSummary = nullptr;
281   auto QualifiedCandidates =
282       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
283   for (auto QualifiedValue : QualifiedCandidates) {
284     Reason = QualifiedValue.first;
285     // Skip a summary if its import is not (proved to be) legal.
286     if (Reason != FunctionImporter::ImportFailureReason::None)
287       continue;
288     auto *Summary =
289         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
290 
291     // Don't bother importing the definition if the chance of inlining it is
292     // not high enough (except under `--force-import-all`).
293     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
294         !ForceImportAll) {
295       TooLargeOrNoInlineSummary = Summary;
296       Reason = FunctionImporter::ImportFailureReason::TooLarge;
297       continue;
298     }
299 
300     // Don't bother importing the definition if we can't inline it anyway.
301     if (Summary->fflags().NoInline && !ForceImportAll) {
302       TooLargeOrNoInlineSummary = Summary;
303       Reason = FunctionImporter::ImportFailureReason::NoInline;
304       continue;
305     }
306 
307     return Summary;
308   }
309   return nullptr;
310 }
311 
312 namespace {
313 
314 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
315 
316 } // anonymous namespace
317 
318 /// Import globals referenced by a function or other globals that are being
319 /// imported, if importing such global is possible.
320 class GlobalsImporter final {
321   const ModuleSummaryIndex &Index;
322   const GVSummaryMapTy &DefinedGVSummaries;
323   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
324       IsPrevailing;
325   FunctionImporter::ImportMapTy &ImportList;
326   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
327 
328   bool shouldImportGlobal(const ValueInfo &VI) {
329     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
330     if (GVS == DefinedGVSummaries.end())
331       return true;
332     // We should not skip import if the module contains a non-prevailing
333     // definition with interposable linkage type. This is required for
334     // correctness in the situation where there is a prevailing def available
335     // for import and marked read-only. In this case, the non-prevailing def
336     // will be converted to a declaration, while the prevailing one becomes
337     // internal, thus no definitions will be available for linking. In order to
338     // prevent undefined symbol link error, the prevailing definition must be
339     // imported.
340     // FIXME: Consider adding a check that the suitable prevailing definition
341     // exists and marked read-only.
342     if (VI.getSummaryList().size() > 1 &&
343         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
344         !IsPrevailing(VI.getGUID(), GVS->second))
345       return true;
346 
347     return false;
348   }
349 
350   void
351   onImportingSummaryImpl(const GlobalValueSummary &Summary,
352                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
353     for (const auto &VI : Summary.refs()) {
354       if (!shouldImportGlobal(VI)) {
355         LLVM_DEBUG(
356             dbgs() << "Ref ignored! Target already in destination module.\n");
357         continue;
358       }
359 
360       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
361 
362       // If this is a local variable, make sure we import the copy
363       // in the caller's module. The only time a local variable can
364       // share an entry in the index is if there is a local with the same name
365       // in another module that had the same source file name (in a different
366       // directory), where each was compiled in their own directory so there
367       // was not distinguishing path.
368       auto LocalNotInModule =
369           [&](const GlobalValueSummary *RefSummary) -> bool {
370         return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
371                RefSummary->modulePath() != Summary.modulePath();
372       };
373 
374       for (const auto &RefSummary : VI.getSummaryList()) {
375         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
376         // Functions could be referenced by global vars - e.g. a vtable; but we
377         // don't currently imagine a reason those would be imported here, rather
378         // than as part of the logic deciding which functions to import (i.e.
379         // based on profile information). Should we decide to handle them here,
380         // we can refactor accordingly at that time.
381         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
382             LocalNotInModule(GVS))
383           continue;
384 
385         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
386         // Otherwise, definition should take precedence over declaration.
387         auto [Iter, Inserted] =
388             ImportList[RefSummary->modulePath()].try_emplace(
389                 VI.getGUID(), GlobalValueSummary::Definition);
390         // Only update stat and exports if we haven't already imported this
391         // variable.
392         if (!Inserted) {
393           // Set the value to 'std::min(existing-value, new-value)' to make
394           // sure a definition takes precedence over a declaration.
395           Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
396           break;
397         }
398         NumImportedGlobalVarsThinLink++;
399         // Any references made by this variable will be marked exported
400         // later, in ComputeCrossModuleImport, after import decisions are
401         // complete, which is more efficient than adding them here.
402         if (ExportLists)
403           (*ExportLists)[RefSummary->modulePath()][VI] =
404               GlobalValueSummary::Definition;
405 
406         // If variable is not writeonly we attempt to recursively analyze
407         // its references in order to import referenced constants.
408         if (!Index.isWriteOnly(GVS))
409           Worklist.emplace_back(GVS);
410         break;
411       }
412     }
413   }
414 
415 public:
416   GlobalsImporter(
417       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
418       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
419           IsPrevailing,
420       FunctionImporter::ImportMapTy &ImportList,
421       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
422       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
423         IsPrevailing(IsPrevailing), ImportList(ImportList),
424         ExportLists(ExportLists) {}
425 
426   void onImportingSummary(const GlobalValueSummary &Summary) {
427     SmallVector<const GlobalVarSummary *, 128> Worklist;
428     onImportingSummaryImpl(Summary, Worklist);
429     while (!Worklist.empty())
430       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
431   }
432 };
433 
434 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
435 
436 /// Determine the list of imports and exports for each module.
437 class ModuleImportsManager {
438 protected:
439   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
440       IsPrevailing;
441   const ModuleSummaryIndex &Index;
442   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
443 
444   ModuleImportsManager(
445       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
446           IsPrevailing,
447       const ModuleSummaryIndex &Index,
448       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
449       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
450 
451 public:
452   virtual ~ModuleImportsManager() = default;
453 
454   /// Given the list of globals defined in a module, compute the list of imports
455   /// as well as the list of "exports", i.e. the list of symbols referenced from
456   /// another module (that may require promotion).
457   virtual void
458   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
459                          StringRef ModName,
460                          FunctionImporter::ImportMapTy &ImportList);
461 
462   static std::unique_ptr<ModuleImportsManager>
463   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
464              IsPrevailing,
465          const ModuleSummaryIndex &Index,
466          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
467              nullptr);
468 };
469 
470 /// A ModuleImportsManager that operates based on a workload definition (see
471 /// -thinlto-workload-def). For modules that do not define workload roots, it
472 /// applies the base ModuleImportsManager import policy.
473 class WorkloadImportsManager : public ModuleImportsManager {
474   // Keep a module name -> value infos to import association. We use it to
475   // determine if a module's import list should be done by the base
476   // ModuleImportsManager or by us.
477   StringMap<DenseSet<ValueInfo>> Workloads;
478 
479   void
480   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
481                          StringRef ModName,
482                          FunctionImporter::ImportMapTy &ImportList) override {
483     auto SetIter = Workloads.find(ModName);
484     if (SetIter == Workloads.end()) {
485       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
486                         << " does not contain the root of any context.\n");
487       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
488                                                           ModName, ImportList);
489     }
490     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
491                       << " contains the root(s) of context(s).\n");
492 
493     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
494                         ExportLists);
495     auto &ValueInfos = SetIter->second;
496     SmallVector<EdgeInfo, 128> GlobWorklist;
497     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
498       auto It = DefinedGVSummaries.find(VI.getGUID());
499       if (It != DefinedGVSummaries.end() &&
500           IsPrevailing(VI.getGUID(), It->second)) {
501         LLVM_DEBUG(
502             dbgs() << "[Workload] " << VI.name()
503                    << " has the prevailing variant already in the module "
504                    << ModName << ". No need to import\n");
505         continue;
506       }
507       auto Candidates =
508           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
509 
510       const GlobalValueSummary *GVS = nullptr;
511       auto PotentialCandidates = llvm::map_range(
512           llvm::make_filter_range(
513               Candidates,
514               [&](const auto &Candidate) {
515                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
516                                   << " from " << Candidate.second->modulePath()
517                                   << " ImportFailureReason: "
518                                   << getFailureName(Candidate.first) << "\n");
519                 return Candidate.first ==
520                         FunctionImporter::ImportFailureReason::None;
521               }),
522           [](const auto &Candidate) { return Candidate.second; });
523       if (PotentialCandidates.empty()) {
524         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
525                           << " because can't find eligible Callee. Guid is: "
526                           << Function::getGUID(VI.name()) << "\n");
527         continue;
528       }
529       /// We will prefer importing the prevailing candidate, if not, we'll
530       /// still pick the first available candidate. The reason we want to make
531       /// sure we do import the prevailing candidate is because the goal of
532       /// workload-awareness is to enable optimizations specializing the call
533       /// graph of that workload. Suppose a function is already defined in the
534       /// module, but it's not the prevailing variant. Suppose also we do not
535       /// inline it (in fact, if it were interposable, we can't inline it),
536       /// but we could specialize it to the workload in other ways. However,
537       /// the linker would drop it in the favor of the prevailing copy.
538       /// Instead, by importing the prevailing variant (assuming also the use
539       /// of `-avail-extern-to-local`), we keep the specialization. We could
540       /// alteranatively make the non-prevailing variant local, but the
541       /// prevailing one is also the one for which we would have previously
542       /// collected profiles, making it preferrable.
543       auto PrevailingCandidates = llvm::make_filter_range(
544           PotentialCandidates, [&](const auto *Candidate) {
545             return IsPrevailing(VI.getGUID(), Candidate);
546           });
547       if (PrevailingCandidates.empty()) {
548         GVS = *PotentialCandidates.begin();
549         if (!llvm::hasSingleElement(PotentialCandidates) &&
550             GlobalValue::isLocalLinkage(GVS->linkage()))
551           LLVM_DEBUG(
552               dbgs()
553               << "[Workload] Found multiple non-prevailing candidates for "
554               << VI.name()
555               << ". This is unexpected. Are module paths passed to the "
556                  "compiler unique for the modules passed to the linker?");
557         // We could in theory have multiple (interposable) copies of a symbol
558         // when there is no prevailing candidate, if say the prevailing copy was
559         // in a native object being linked in. However, we should in theory be
560         // marking all of these non-prevailing IR copies dead in that case, in
561         // which case they won't be candidates.
562         assert(GVS->isLive());
563       } else {
564         assert(llvm::hasSingleElement(PrevailingCandidates));
565         GVS = *PrevailingCandidates.begin();
566       }
567 
568       auto ExportingModule = GVS->modulePath();
569       // We checked that for the prevailing case, but if we happen to have for
570       // example an internal that's defined in this module, it'd have no
571       // PrevailingCandidates.
572       if (ExportingModule == ModName) {
573         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
574                           << " because its defining module is the same as the "
575                              "current module\n");
576         continue;
577       }
578       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
579                         << ExportingModule << " : "
580                         << Function::getGUID(VI.name()) << "\n");
581       ImportList[ExportingModule][VI.getGUID()] =
582           GlobalValueSummary::Definition;
583       GVI.onImportingSummary(*GVS);
584       if (ExportLists)
585         (*ExportLists)[ExportingModule][VI] = GlobalValueSummary::Definition;
586     }
587     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
588   }
589 
590 public:
591   WorkloadImportsManager(
592       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
593           IsPrevailing,
594       const ModuleSummaryIndex &Index,
595       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
596       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
597     // Since the workload def uses names, we need a quick lookup
598     // name->ValueInfo.
599     StringMap<ValueInfo> NameToValueInfo;
600     StringSet<> AmbiguousNames;
601     for (auto &I : Index) {
602       ValueInfo VI = Index.getValueInfo(I);
603       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
604         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
605     }
606     auto DbgReportIfAmbiguous = [&](StringRef Name) {
607       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
608         dbgs() << "[Workload] Function name " << Name
609                << " present in the workload definition is ambiguous. Consider "
610                   "compiling with -funique-internal-linkage-names.";
611       });
612     };
613     std::error_code EC;
614     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
615     if (std::error_code EC = BufferOrErr.getError()) {
616       report_fatal_error("Failed to open context file");
617       return;
618     }
619     auto Buffer = std::move(BufferOrErr.get());
620     std::map<std::string, std::vector<std::string>> WorkloadDefs;
621     json::Path::Root NullRoot;
622     // The JSON is supposed to contain a dictionary matching the type of
623     // WorkloadDefs. For example:
624     // {
625     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
626     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
627     // }
628     auto Parsed = json::parse(Buffer->getBuffer());
629     if (!Parsed)
630       report_fatal_error(Parsed.takeError());
631     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
632       report_fatal_error("Invalid thinlto contextual profile format.");
633     for (const auto &Workload : WorkloadDefs) {
634       const auto &Root = Workload.first;
635       DbgReportIfAmbiguous(Root);
636       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
637       const auto &AllCallees = Workload.second;
638       auto RootIt = NameToValueInfo.find(Root);
639       if (RootIt == NameToValueInfo.end()) {
640         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
641                           << " not found in this linkage unit.\n");
642         continue;
643       }
644       auto RootVI = RootIt->second;
645       if (RootVI.getSummaryList().size() != 1) {
646         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
647                           << " should have exactly one summary, but has "
648                           << RootVI.getSummaryList().size() << ". Skipping.\n");
649         continue;
650       }
651       StringRef RootDefiningModule =
652           RootVI.getSummaryList().front()->modulePath();
653       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
654                         << " is : " << RootDefiningModule << "\n");
655       auto &Set = Workloads[RootDefiningModule];
656       for (const auto &Callee : AllCallees) {
657         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
658         DbgReportIfAmbiguous(Callee);
659         auto ElemIt = NameToValueInfo.find(Callee);
660         if (ElemIt == NameToValueInfo.end()) {
661           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
662           continue;
663         }
664         Set.insert(ElemIt->second);
665       }
666       LLVM_DEBUG({
667         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
668                << " distinct callees.\n";
669         for (const auto &VI : Set) {
670           dbgs() << "[Workload] Root: " << Root
671                  << " Would include: " << VI.getGUID() << "\n";
672         }
673       });
674     }
675   }
676 };
677 
678 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
679     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
680         IsPrevailing,
681     const ModuleSummaryIndex &Index,
682     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
683   if (WorkloadDefinitions.empty()) {
684     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
685     return std::unique_ptr<ModuleImportsManager>(
686         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
687   }
688   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
689   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
690                                                   ExportLists);
691 }
692 
693 static const char *
694 getFailureName(FunctionImporter::ImportFailureReason Reason) {
695   switch (Reason) {
696   case FunctionImporter::ImportFailureReason::None:
697     return "None";
698   case FunctionImporter::ImportFailureReason::GlobalVar:
699     return "GlobalVar";
700   case FunctionImporter::ImportFailureReason::NotLive:
701     return "NotLive";
702   case FunctionImporter::ImportFailureReason::TooLarge:
703     return "TooLarge";
704   case FunctionImporter::ImportFailureReason::InterposableLinkage:
705     return "InterposableLinkage";
706   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
707     return "LocalLinkageNotInModule";
708   case FunctionImporter::ImportFailureReason::NotEligible:
709     return "NotEligible";
710   case FunctionImporter::ImportFailureReason::NoInline:
711     return "NoInline";
712   }
713   llvm_unreachable("invalid reason");
714 }
715 
716 /// Compute the list of functions to import for a given caller. Mark these
717 /// imported functions and the symbols they reference in their source module as
718 /// exported from their source module.
719 static void computeImportForFunction(
720     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
721     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
722     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
723         isPrevailing,
724     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
725     FunctionImporter::ImportMapTy &ImportList,
726     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
727     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
728   GVImporter.onImportingSummary(Summary);
729   static int ImportCount = 0;
730   for (const auto &Edge : Summary.calls()) {
731     ValueInfo VI = Edge.first;
732     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
733                       << "\n");
734 
735     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
736       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
737                         << " reached.\n");
738       continue;
739     }
740 
741     if (DefinedGVSummaries.count(VI.getGUID())) {
742       // FIXME: Consider not skipping import if the module contains
743       // a non-prevailing def with interposable linkage. The prevailing copy
744       // can safely be imported (see shouldImportGlobal()).
745       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
746       continue;
747     }
748 
749     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
750       if (Hotness == CalleeInfo::HotnessType::Hot)
751         return ImportHotMultiplier;
752       if (Hotness == CalleeInfo::HotnessType::Cold)
753         return ImportColdMultiplier;
754       if (Hotness == CalleeInfo::HotnessType::Critical)
755         return ImportCriticalMultiplier;
756       return 1.0;
757     };
758 
759     const auto NewThreshold =
760         Threshold * GetBonusMultiplier(Edge.second.getHotness());
761 
762     auto IT = ImportThresholds.insert(std::make_pair(
763         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
764     bool PreviouslyVisited = !IT.second;
765     auto &ProcessedThreshold = std::get<0>(IT.first->second);
766     auto &CalleeSummary = std::get<1>(IT.first->second);
767     auto &FailureInfo = std::get<2>(IT.first->second);
768 
769     bool IsHotCallsite =
770         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
771     bool IsCriticalCallsite =
772         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
773 
774     const FunctionSummary *ResolvedCalleeSummary = nullptr;
775     if (CalleeSummary) {
776       assert(PreviouslyVisited);
777       // Since the traversal of the call graph is DFS, we can revisit a function
778       // a second time with a higher threshold. In this case, it is added back
779       // to the worklist with the new threshold (so that its own callee chains
780       // can be considered with the higher threshold).
781       if (NewThreshold <= ProcessedThreshold) {
782         LLVM_DEBUG(
783             dbgs() << "ignored! Target was already imported with Threshold "
784                    << ProcessedThreshold << "\n");
785         continue;
786       }
787       // Update with new larger threshold.
788       ProcessedThreshold = NewThreshold;
789       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
790     } else {
791       // If we already rejected importing a callee at the same or higher
792       // threshold, don't waste time calling selectCallee.
793       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
794         LLVM_DEBUG(
795             dbgs() << "ignored! Target was already rejected with Threshold "
796             << ProcessedThreshold << "\n");
797         if (PrintImportFailures) {
798           assert(FailureInfo &&
799                  "Expected FailureInfo for previously rejected candidate");
800           FailureInfo->Attempts++;
801         }
802         continue;
803       }
804 
805       FunctionImporter::ImportFailureReason Reason{};
806 
807       // `SummaryForDeclImport` is an summary eligible for declaration import.
808       const GlobalValueSummary *SummaryForDeclImport = nullptr;
809       CalleeSummary =
810           selectCallee(Index, VI.getSummaryList(), NewThreshold,
811                        Summary.modulePath(), SummaryForDeclImport, Reason);
812       if (!CalleeSummary) {
813         // There isn't a callee for definition import but one for declaration
814         // import.
815         if (ImportDeclaration && SummaryForDeclImport) {
816           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
817 
818           // Since definition takes precedence over declaration for the same VI,
819           // try emplace <VI, declaration> pair without checking insert result.
820           // If insert doesn't happen, there must be an existing entry keyed by
821           // VI.
822           if (ExportLists)
823             (*ExportLists)[DeclSourceModule].try_emplace(
824                 VI, GlobalValueSummary::Declaration);
825           ImportList[DeclSourceModule].try_emplace(
826               VI.getGUID(), GlobalValueSummary::Declaration);
827         }
828         // Update with new larger threshold if this was a retry (otherwise
829         // we would have already inserted with NewThreshold above). Also
830         // update failure info if requested.
831         if (PreviouslyVisited) {
832           ProcessedThreshold = NewThreshold;
833           if (PrintImportFailures) {
834             assert(FailureInfo &&
835                    "Expected FailureInfo for previously rejected candidate");
836             FailureInfo->Reason = Reason;
837             FailureInfo->Attempts++;
838             FailureInfo->MaxHotness =
839                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
840           }
841         } else if (PrintImportFailures) {
842           assert(!FailureInfo &&
843                  "Expected no FailureInfo for newly rejected candidate");
844           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
845               VI, Edge.second.getHotness(), Reason, 1);
846         }
847         if (ForceImportAll) {
848           std::string Msg = std::string("Failed to import function ") +
849                             VI.name().str() + " due to " +
850                             getFailureName(Reason);
851           auto Error = make_error<StringError>(
852               Msg, make_error_code(errc::not_supported));
853           logAllUnhandledErrors(std::move(Error), errs(),
854                                 "Error importing module: ");
855           break;
856         } else {
857           LLVM_DEBUG(dbgs()
858                      << "ignored! No qualifying callee with summary found.\n");
859           continue;
860         }
861       }
862 
863       // "Resolve" the summary
864       CalleeSummary = CalleeSummary->getBaseObject();
865       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
866 
867       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
868               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
869              "selectCallee() didn't honor the threshold");
870 
871       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
872 
873       // Try emplace the definition entry, and update stats based on insertion
874       // status.
875       auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
876           VI.getGUID(), GlobalValueSummary::Definition);
877 
878       // We previously decided to import this GUID definition if it was already
879       // inserted in the set of imports from the exporting module.
880       if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
881         NumImportedFunctionsThinLink++;
882         if (IsHotCallsite)
883           NumImportedHotFunctionsThinLink++;
884         if (IsCriticalCallsite)
885           NumImportedCriticalFunctionsThinLink++;
886       }
887 
888       if (Iter->second == GlobalValueSummary::Declaration)
889         Iter->second = GlobalValueSummary::Definition;
890 
891       // Any calls/references made by this function will be marked exported
892       // later, in ComputeCrossModuleImport, after import decisions are
893       // complete, which is more efficient than adding them here.
894       if (ExportLists)
895         (*ExportLists)[ExportModulePath][VI] = GlobalValueSummary::Definition;
896     }
897 
898     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
899       // Adjust the threshold for next level of imported functions.
900       // The threshold is different for hot callsites because we can then
901       // inline chains of hot calls.
902       if (IsHotCallsite)
903         return Threshold * ImportHotInstrFactor;
904       return Threshold * ImportInstrFactor;
905     };
906 
907     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
908 
909     ImportCount++;
910 
911     // Insert the newly imported function to the worklist.
912     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
913   }
914 }
915 
916 void ModuleImportsManager::computeImportForModule(
917     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
918     FunctionImporter::ImportMapTy &ImportList) {
919   // Worklist contains the list of function imported in this module, for which
920   // we will analyse the callees and may import further down the callgraph.
921   SmallVector<EdgeInfo, 128> Worklist;
922   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
923                       ExportLists);
924   FunctionImporter::ImportThresholdsTy ImportThresholds;
925 
926   // Populate the worklist with the import for the functions in the current
927   // module
928   for (const auto &GVSummary : DefinedGVSummaries) {
929 #ifndef NDEBUG
930     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
931     // so this map look up (and possibly others) can be avoided.
932     auto VI = Index.getValueInfo(GVSummary.first);
933 #endif
934     if (!Index.isGlobalValueLive(GVSummary.second)) {
935       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
936       continue;
937     }
938     auto *FuncSummary =
939         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
940     if (!FuncSummary)
941       // Skip import for global variables
942       continue;
943     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
944     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
945                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
946                              ImportList, ExportLists, ImportThresholds);
947   }
948 
949   // Process the newly imported functions and add callees to the worklist.
950   while (!Worklist.empty()) {
951     auto GVInfo = Worklist.pop_back_val();
952     auto *Summary = std::get<0>(GVInfo);
953     auto Threshold = std::get<1>(GVInfo);
954 
955     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
956       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
957                                IsPrevailing, Worklist, GVI, ImportList,
958                                ExportLists, ImportThresholds);
959   }
960 
961   // Print stats about functions considered but rejected for importing
962   // when requested.
963   if (PrintImportFailures) {
964     dbgs() << "Missed imports into module " << ModName << "\n";
965     for (auto &I : ImportThresholds) {
966       auto &ProcessedThreshold = std::get<0>(I.second);
967       auto &CalleeSummary = std::get<1>(I.second);
968       auto &FailureInfo = std::get<2>(I.second);
969       if (CalleeSummary)
970         continue; // We are going to import.
971       assert(FailureInfo);
972       FunctionSummary *FS = nullptr;
973       if (!FailureInfo->VI.getSummaryList().empty())
974         FS = dyn_cast<FunctionSummary>(
975             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
976       dbgs() << FailureInfo->VI
977              << ": Reason = " << getFailureName(FailureInfo->Reason)
978              << ", Threshold = " << ProcessedThreshold
979              << ", Size = " << (FS ? (int)FS->instCount() : -1)
980              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
981              << ", Attempts = " << FailureInfo->Attempts << "\n";
982     }
983   }
984 }
985 
986 #ifndef NDEBUG
987 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
988   auto SL = VI.getSummaryList();
989   return SL.empty()
990              ? false
991              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
992 }
993 
994 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
995                                GlobalValue::GUID G) {
996   if (const auto &VI = Index.getValueInfo(G))
997     return isGlobalVarSummary(Index, VI);
998   return false;
999 }
1000 
1001 template <class T>
1002 static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index, T &Cont,
1003                                       unsigned &DefinedGVS,
1004                                       unsigned &DefinedFS) {
1005   unsigned NumGVS = 0;
1006   DefinedGVS = 0;
1007   DefinedFS = 0;
1008   for (auto &[GUID, Type] : Cont) {
1009     if (isGlobalVarSummary(Index, GUID)) {
1010       if (Type == GlobalValueSummary::Definition)
1011         ++DefinedGVS;
1012       ++NumGVS;
1013     } else if (Type == GlobalValueSummary::Definition)
1014       ++DefinedFS;
1015   }
1016   return NumGVS;
1017 }
1018 #endif
1019 
1020 #ifndef NDEBUG
1021 static bool checkVariableImport(
1022     const ModuleSummaryIndex &Index,
1023     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1024     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1025   DenseSet<GlobalValue::GUID> FlattenedImports;
1026 
1027   for (auto &ImportPerModule : ImportLists)
1028     for (auto &ExportPerModule : ImportPerModule.second)
1029       for (auto &[GUID, Type] : ExportPerModule.second)
1030         FlattenedImports.insert(GUID);
1031 
1032   // Checks that all GUIDs of read/writeonly vars we see in export lists
1033   // are also in the import lists. Otherwise we my face linker undefs,
1034   // because readonly and writeonly vars are internalized in their
1035   // source modules. The exception would be if it has a linkage type indicating
1036   // that there may have been a copy existing in the importing module (e.g.
1037   // linkonce_odr). In that case we cannot accurately do this checking.
1038   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1039                                                   const ValueInfo &VI) {
1040     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1041         Index.findSummaryInModule(VI, ModulePath));
1042     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1043            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1044              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1045              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1046   };
1047 
1048   for (auto &ExportPerModule : ExportLists)
1049     for (auto &[VI, Unused] : ExportPerModule.second)
1050       if (!FlattenedImports.count(VI.getGUID()) &&
1051           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1052         return false;
1053 
1054   return true;
1055 }
1056 #endif
1057 
1058 /// Compute all the import and export for every module using the Index.
1059 void llvm::ComputeCrossModuleImport(
1060     const ModuleSummaryIndex &Index,
1061     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1062     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1063         isPrevailing,
1064     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1065     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1066   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1067   // For each module that has function defined, compute the import/export lists.
1068   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1069     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1070     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1071                       << DefinedGVSummaries.first << "'\n");
1072     MIS->computeImportForModule(DefinedGVSummaries.second,
1073                                 DefinedGVSummaries.first, ImportList);
1074   }
1075 
1076   // When computing imports we only added the variables and functions being
1077   // imported to the export list. We also need to mark any references and calls
1078   // they make as exported as well. We do this here, as it is more efficient
1079   // since we may import the same values multiple times into different modules
1080   // during the import computation.
1081   for (auto &ELI : ExportLists) {
1082     FunctionImporter::ExportSetTy NewExports;
1083     const auto &DefinedGVSummaries =
1084         ModuleToDefinedGVSummaries.lookup(ELI.first);
1085     for (auto &[EI, Type] : ELI.second) {
1086       // If a variable is exported as a declaration, its 'refs' and 'calls' are
1087       // not further exported.
1088       if (Type == GlobalValueSummary::Declaration)
1089         continue;
1090       // Find the copy defined in the exporting module so that we can mark the
1091       // values it references in that specific definition as exported.
1092       // Below we will add all references and called values, without regard to
1093       // whether they are also defined in this module. We subsequently prune the
1094       // list to only include those defined in the exporting module, see comment
1095       // there as to why.
1096       auto DS = DefinedGVSummaries.find(EI.getGUID());
1097       // Anything marked exported during the import computation must have been
1098       // defined in the exporting module.
1099       assert(DS != DefinedGVSummaries.end());
1100       auto *S = DS->getSecond();
1101       S = S->getBaseObject();
1102       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1103         // Export referenced functions and variables. We don't export/promote
1104         // objects referenced by writeonly variable initializer, because
1105         // we convert such variables initializers to "zeroinitializer".
1106         // See processGlobalForThinLTO.
1107         if (!Index.isWriteOnly(GVS))
1108           for (const auto &VI : GVS->refs()) {
1109             // Try to emplace the declaration entry. If a definition entry
1110             // already exists for key `VI`, this is a no-op.
1111             NewExports.try_emplace(VI, GlobalValueSummary::Declaration);
1112           }
1113       } else {
1114         auto *FS = cast<FunctionSummary>(S);
1115         for (const auto &Edge : FS->calls()) {
1116           // Try to emplace the declaration entry. If a definition entry
1117           // already exists for key `VI`, this is a no-op.
1118           NewExports.try_emplace(Edge.first, GlobalValueSummary::Declaration);
1119         }
1120         for (const auto &Ref : FS->refs()) {
1121           // Try to emplace the declaration entry. If a definition entry
1122           // already exists for key `VI`, this is a no-op.
1123           NewExports.try_emplace(Ref, GlobalValueSummary::Declaration);
1124         }
1125       }
1126     }
1127     // Prune list computed above to only include values defined in the
1128     // exporting module. We do this after the above insertion since we may hit
1129     // the same ref/call target multiple times in above loop, and it is more
1130     // efficient to avoid a set lookup each time.
1131     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1132       if (!DefinedGVSummaries.count(EI->first.getGUID()))
1133         NewExports.erase(EI++);
1134       else
1135         ++EI;
1136     }
1137     ELI.second.insert(NewExports.begin(), NewExports.end());
1138   }
1139 
1140   assert(checkVariableImport(Index, ImportLists, ExportLists));
1141 #ifndef NDEBUG
1142   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1143                     << " modules:\n");
1144   for (auto &ModuleImports : ImportLists) {
1145     auto ModName = ModuleImports.first;
1146     auto &Exports = ExportLists[ModName];
1147     unsigned DefinedGVS = 0, DefinedFS = 0;
1148     unsigned NumGVS =
1149         numGlobalVarSummaries(Index, Exports, DefinedGVS, DefinedFS);
1150     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports " << DefinedFS
1151                       << " function as definitions, "
1152                       << Exports.size() - NumGVS - DefinedFS
1153                       << " functions as declarations, " << DefinedGVS
1154                       << " var definitions and " << NumGVS - DefinedGVS
1155                       << " var declarations. Imports from "
1156                       << ModuleImports.second.size() << " modules.\n");
1157     for (auto &Src : ModuleImports.second) {
1158       auto SrcModName = Src.first;
1159       unsigned DefinedGVS = 0, DefinedFS = 0;
1160       unsigned NumGVSPerMod =
1161           numGlobalVarSummaries(Index, Src.second, DefinedGVS, DefinedFS);
1162       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1163                         << Src.second.size() - NumGVSPerMod - DefinedFS
1164                         << " function declarations imported from " << SrcModName
1165                         << "\n");
1166       LLVM_DEBUG(dbgs() << " - " << DefinedGVS << " global vars definition and "
1167                         << NumGVSPerMod - DefinedGVS
1168                         << " global vars declaration imported from "
1169                         << SrcModName << "\n");
1170     }
1171   }
1172 #endif
1173 }
1174 
1175 #ifndef NDEBUG
1176 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1177                                     StringRef ModulePath,
1178                                     FunctionImporter::ImportMapTy &ImportList) {
1179   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1180                     << ImportList.size() << " modules.\n");
1181   for (auto &Src : ImportList) {
1182     auto SrcModName = Src.first;
1183     unsigned DefinedGVS = 0, DefinedFS = 0;
1184     unsigned NumGVSPerMod =
1185         numGlobalVarSummaries(Index, Src.second, DefinedGVS, DefinedFS);
1186     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1187                       << Src.second.size() - DefinedFS - NumGVSPerMod
1188                       << " function declarations imported from " << SrcModName
1189                       << "\n");
1190     LLVM_DEBUG(dbgs() << " - " << DefinedGVS << " var definitions and "
1191                       << NumGVSPerMod - DefinedGVS
1192                       << " var declarations imported from " << SrcModName
1193                       << "\n");
1194   }
1195 }
1196 #endif
1197 
1198 /// Compute all the imports for the given module using the Index.
1199 ///
1200 /// \p isPrevailing is a callback that will be called with a global value's GUID
1201 /// and summary and should return whether the module corresponding to the
1202 /// summary contains the linker-prevailing copy of that value.
1203 ///
1204 /// \p ImportList will be populated with a map that can be passed to
1205 /// FunctionImporter::importFunctions() above (see description there).
1206 static void ComputeCrossModuleImportForModuleForTest(
1207     StringRef ModulePath,
1208     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1209         isPrevailing,
1210     const ModuleSummaryIndex &Index,
1211     FunctionImporter::ImportMapTy &ImportList) {
1212   // Collect the list of functions this module defines.
1213   // GUID -> Summary
1214   GVSummaryMapTy FunctionSummaryMap;
1215   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1216 
1217   // Compute the import list for this module.
1218   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1219   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1220   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1221 
1222 #ifndef NDEBUG
1223   dumpImportListForModule(Index, ModulePath, ImportList);
1224 #endif
1225 }
1226 
1227 /// Mark all external summaries in \p Index for import into the given module.
1228 /// Used for testing the case of distributed builds using a distributed index.
1229 ///
1230 /// \p ImportList will be populated with a map that can be passed to
1231 /// FunctionImporter::importFunctions() above (see description there).
1232 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1233     StringRef ModulePath, const ModuleSummaryIndex &Index,
1234     FunctionImporter::ImportMapTy &ImportList) {
1235   for (const auto &GlobalList : Index) {
1236     // Ignore entries for undefined references.
1237     if (GlobalList.second.SummaryList.empty())
1238       continue;
1239 
1240     auto GUID = GlobalList.first;
1241     assert(GlobalList.second.SummaryList.size() == 1 &&
1242            "Expected individual combined index to have one summary per GUID");
1243     auto &Summary = GlobalList.second.SummaryList[0];
1244     // Skip the summaries for the importing module. These are included to
1245     // e.g. record required linkage changes.
1246     if (Summary->modulePath() == ModulePath)
1247       continue;
1248     // Add an entry to provoke importing by thinBackend.
1249     auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
1250         GUID, Summary->importType());
1251     if (!Inserted) {
1252       // Use 'std::min' to make sure definition (with enum value 0) takes
1253       // precedence over declaration (with enum value 1).
1254       Iter->second = std::min(Iter->second, Summary->importType());
1255     }
1256   }
1257 #ifndef NDEBUG
1258   dumpImportListForModule(Index, ModulePath, ImportList);
1259 #endif
1260 }
1261 
1262 // For SamplePGO, the indirect call targets for local functions will
1263 // have its original name annotated in profile. We try to find the
1264 // corresponding PGOFuncName as the GUID, and fix up the edges
1265 // accordingly.
1266 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1267                                      FunctionSummary *FS) {
1268   for (auto &EI : FS->mutableCalls()) {
1269     if (!EI.first.getSummaryList().empty())
1270       continue;
1271     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1272     if (GUID == 0)
1273       continue;
1274     // Update the edge to point directly to the correct GUID.
1275     auto VI = Index.getValueInfo(GUID);
1276     if (llvm::any_of(
1277             VI.getSummaryList(),
1278             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1279               // The mapping from OriginalId to GUID may return a GUID
1280               // that corresponds to a static variable. Filter it out here.
1281               // This can happen when
1282               // 1) There is a call to a library function which is not defined
1283               // in the index.
1284               // 2) There is a static variable with the  OriginalGUID identical
1285               // to the GUID of the library function in 1);
1286               // When this happens the static variable in 2) will be found,
1287               // which needs to be filtered out.
1288               return SummaryPtr->getSummaryKind() ==
1289                      GlobalValueSummary::GlobalVarKind;
1290             }))
1291       continue;
1292     EI.first = VI;
1293   }
1294 }
1295 
1296 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1297   for (const auto &Entry : Index) {
1298     for (const auto &S : Entry.second.SummaryList) {
1299       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1300         updateValueInfoForIndirectCalls(Index, FS);
1301     }
1302   }
1303 }
1304 
1305 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1306     ModuleSummaryIndex &Index,
1307     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1308     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1309   assert(!Index.withGlobalValueDeadStripping());
1310   if (!ComputeDead ||
1311       // Don't do anything when nothing is live, this is friendly with tests.
1312       GUIDPreservedSymbols.empty()) {
1313     // Still need to update indirect calls.
1314     updateIndirectCalls(Index);
1315     return;
1316   }
1317   unsigned LiveSymbols = 0;
1318   SmallVector<ValueInfo, 128> Worklist;
1319   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1320   for (auto GUID : GUIDPreservedSymbols) {
1321     ValueInfo VI = Index.getValueInfo(GUID);
1322     if (!VI)
1323       continue;
1324     for (const auto &S : VI.getSummaryList())
1325       S->setLive(true);
1326   }
1327 
1328   // Add values flagged in the index as live roots to the worklist.
1329   for (const auto &Entry : Index) {
1330     auto VI = Index.getValueInfo(Entry);
1331     for (const auto &S : Entry.second.SummaryList) {
1332       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1333         updateValueInfoForIndirectCalls(Index, FS);
1334       if (S->isLive()) {
1335         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1336         Worklist.push_back(VI);
1337         ++LiveSymbols;
1338         break;
1339       }
1340     }
1341   }
1342 
1343   // Make value live and add it to the worklist if it was not live before.
1344   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1345     // FIXME: If we knew which edges were created for indirect call profiles,
1346     // we could skip them here. Any that are live should be reached via
1347     // other edges, e.g. reference edges. Otherwise, using a profile collected
1348     // on a slightly different binary might provoke preserving, importing
1349     // and ultimately promoting calls to functions not linked into this
1350     // binary, which increases the binary size unnecessarily. Note that
1351     // if this code changes, the importer needs to change so that edges
1352     // to functions marked dead are skipped.
1353 
1354     if (llvm::any_of(VI.getSummaryList(),
1355                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1356                        return S->isLive();
1357                      }))
1358       return;
1359 
1360     // We only keep live symbols that are known to be non-prevailing if any are
1361     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1362     // later in the EliminateAvailableExternally pass and setting them to
1363     // not-live could break downstreams users of liveness information (PR36483)
1364     // or limit optimization opportunities.
1365     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1366       bool KeepAliveLinkage = false;
1367       bool Interposable = false;
1368       for (const auto &S : VI.getSummaryList()) {
1369         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1370             S->linkage() == GlobalValue::WeakODRLinkage ||
1371             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1372           KeepAliveLinkage = true;
1373         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1374           Interposable = true;
1375       }
1376 
1377       if (!IsAliasee) {
1378         if (!KeepAliveLinkage)
1379           return;
1380 
1381         if (Interposable)
1382           report_fatal_error(
1383               "Interposable and available_externally/linkonce_odr/weak_odr "
1384               "symbol");
1385       }
1386     }
1387 
1388     for (const auto &S : VI.getSummaryList())
1389       S->setLive(true);
1390     ++LiveSymbols;
1391     Worklist.push_back(VI);
1392   };
1393 
1394   while (!Worklist.empty()) {
1395     auto VI = Worklist.pop_back_val();
1396     for (const auto &Summary : VI.getSummaryList()) {
1397       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1398         // If this is an alias, visit the aliasee VI to ensure that all copies
1399         // are marked live and it is added to the worklist for further
1400         // processing of its references.
1401         visit(AS->getAliaseeVI(), true);
1402         continue;
1403       }
1404       for (auto Ref : Summary->refs())
1405         visit(Ref, false);
1406       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1407         for (auto Call : FS->calls())
1408           visit(Call.first, false);
1409     }
1410   }
1411   Index.setWithGlobalValueDeadStripping();
1412 
1413   unsigned DeadSymbols = Index.size() - LiveSymbols;
1414   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1415                     << " symbols Dead \n");
1416   NumDeadSymbols += DeadSymbols;
1417   NumLiveSymbols += LiveSymbols;
1418 }
1419 
1420 // Compute dead symbols and propagate constants in combined index.
1421 void llvm::computeDeadSymbolsWithConstProp(
1422     ModuleSummaryIndex &Index,
1423     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1424     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1425     bool ImportEnabled) {
1426   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1427                                            isPrevailing);
1428   if (ImportEnabled)
1429     Index.propagateAttributes(GUIDPreservedSymbols);
1430 }
1431 
1432 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1433 /// \p ModulePath.
1434 void llvm::gatherImportedSummariesForModule(
1435     StringRef ModulePath,
1436     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1437     const FunctionImporter::ImportMapTy &ImportList,
1438     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1439   // Include all summaries from the importing module.
1440   ModuleToSummariesForIndex[std::string(ModulePath)] =
1441       ModuleToDefinedGVSummaries.lookup(ModulePath);
1442   // Include summaries for imports.
1443   for (const auto &ILI : ImportList) {
1444     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1445 
1446     const auto &DefinedGVSummaries =
1447         ModuleToDefinedGVSummaries.lookup(ILI.first);
1448     for (const auto &[GUID, Type] : ILI.second) {
1449       const auto &DS = DefinedGVSummaries.find(GUID);
1450       assert(DS != DefinedGVSummaries.end() &&
1451              "Expected a defined summary for imported global value");
1452       if (Type == GlobalValueSummary::Declaration)
1453         continue;
1454 
1455       SummariesForIndex[GUID] = DS->second;
1456     }
1457   }
1458 }
1459 
1460 /// Emit the files \p ModulePath will import from into \p OutputFilename.
1461 std::error_code llvm::EmitImportsFiles(
1462     StringRef ModulePath, StringRef OutputFilename,
1463     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1464   std::error_code EC;
1465   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1466   if (EC)
1467     return EC;
1468   for (const auto &ILI : ModuleToSummariesForIndex)
1469     // The ModuleToSummariesForIndex map includes an entry for the current
1470     // Module (needed for writing out the index files). We don't want to
1471     // include it in the imports file, however, so filter it out.
1472     if (ILI.first != ModulePath)
1473       ImportsOS << ILI.first << "\n";
1474   return std::error_code();
1475 }
1476 
1477 bool llvm::convertToDeclaration(GlobalValue &GV) {
1478   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1479                     << "\n");
1480   if (Function *F = dyn_cast<Function>(&GV)) {
1481     F->deleteBody();
1482     F->clearMetadata();
1483     F->setComdat(nullptr);
1484   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1485     V->setInitializer(nullptr);
1486     V->setLinkage(GlobalValue::ExternalLinkage);
1487     V->clearMetadata();
1488     V->setComdat(nullptr);
1489   } else {
1490     GlobalValue *NewGV;
1491     if (GV.getValueType()->isFunctionTy())
1492       NewGV =
1493           Function::Create(cast<FunctionType>(GV.getValueType()),
1494                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1495                            "", GV.getParent());
1496     else
1497       NewGV =
1498           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1499                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1500                              /*init*/ nullptr, "",
1501                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1502                              GV.getType()->getAddressSpace());
1503     NewGV->takeName(&GV);
1504     GV.replaceAllUsesWith(NewGV);
1505     return false;
1506   }
1507   if (!GV.isImplicitDSOLocal())
1508     GV.setDSOLocal(false);
1509   return true;
1510 }
1511 
1512 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1513                                    const GVSummaryMapTy &DefinedGlobals,
1514                                    bool PropagateAttrs) {
1515   DenseSet<Comdat *> NonPrevailingComdats;
1516   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1517     // See if the global summary analysis computed a new resolved linkage.
1518     const auto &GS = DefinedGlobals.find(GV.getGUID());
1519     if (GS == DefinedGlobals.end())
1520       return;
1521 
1522     if (Propagate)
1523       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1524         if (Function *F = dyn_cast<Function>(&GV)) {
1525           // TODO: propagate ReadNone and ReadOnly.
1526           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1527             F->setDoesNotAccessMemory();
1528 
1529           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1530             F->setOnlyReadsMemory();
1531 
1532           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1533             F->setDoesNotRecurse();
1534 
1535           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1536             F->setDoesNotThrow();
1537         }
1538       }
1539 
1540     auto NewLinkage = GS->second->linkage();
1541     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1542         // Don't internalize anything here, because the code below
1543         // lacks necessary correctness checks. Leave this job to
1544         // LLVM 'internalize' pass.
1545         GlobalValue::isLocalLinkage(NewLinkage) ||
1546         // In case it was dead and already converted to declaration.
1547         GV.isDeclaration())
1548       return;
1549 
1550     // Set the potentially more constraining visibility computed from summaries.
1551     // The DefaultVisibility condition is because older GlobalValueSummary does
1552     // not record DefaultVisibility and we don't want to change protected/hidden
1553     // to default.
1554     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1555       GV.setVisibility(GS->second->getVisibility());
1556 
1557     if (NewLinkage == GV.getLinkage())
1558       return;
1559 
1560     // Check for a non-prevailing def that has interposable linkage
1561     // (e.g. non-odr weak or linkonce). In that case we can't simply
1562     // convert to available_externally, since it would lose the
1563     // interposable property and possibly get inlined. Simply drop
1564     // the definition in that case.
1565     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1566         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1567       if (!convertToDeclaration(GV))
1568         // FIXME: Change this to collect replaced GVs and later erase
1569         // them from the parent module once thinLTOResolvePrevailingGUID is
1570         // changed to enable this for aliases.
1571         llvm_unreachable("Expected GV to be converted");
1572     } else {
1573       // If all copies of the original symbol had global unnamed addr and
1574       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1575       // and are constants, then it should be an auto hide symbol. In that case
1576       // the thin link would have marked it as CanAutoHide. Add hidden
1577       // visibility to the symbol to preserve the property.
1578       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1579           GS->second->canAutoHide()) {
1580         assert(GV.canBeOmittedFromSymbolTable());
1581         GV.setVisibility(GlobalValue::HiddenVisibility);
1582       }
1583 
1584       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1585                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1586                         << "\n");
1587       GV.setLinkage(NewLinkage);
1588     }
1589     // Remove declarations from comdats, including available_externally
1590     // as this is a declaration for the linker, and will be dropped eventually.
1591     // It is illegal for comdats to contain declarations.
1592     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1593     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1594       if (GO->getComdat()->getName() == GO->getName())
1595         NonPrevailingComdats.insert(GO->getComdat());
1596       GO->setComdat(nullptr);
1597     }
1598   };
1599 
1600   // Process functions and global now
1601   for (auto &GV : TheModule)
1602     FinalizeInModule(GV, PropagateAttrs);
1603   for (auto &GV : TheModule.globals())
1604     FinalizeInModule(GV);
1605   for (auto &GV : TheModule.aliases())
1606     FinalizeInModule(GV);
1607 
1608   // For a non-prevailing comdat, all its members must be available_externally.
1609   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1610   // local linkage GlobalValues.
1611   if (NonPrevailingComdats.empty())
1612     return;
1613   for (auto &GO : TheModule.global_objects()) {
1614     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1615       GO.setComdat(nullptr);
1616       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1617     }
1618   }
1619   bool Changed;
1620   do {
1621     Changed = false;
1622     // If an alias references a GlobalValue in a non-prevailing comdat, change
1623     // it to available_externally. For simplicity we only handle GlobalValue and
1624     // ConstantExpr with a base object. ConstantExpr without a base object is
1625     // unlikely used in a COMDAT.
1626     for (auto &GA : TheModule.aliases()) {
1627       if (GA.hasAvailableExternallyLinkage())
1628         continue;
1629       GlobalObject *Obj = GA.getAliaseeObject();
1630       assert(Obj && "aliasee without an base object is unimplemented");
1631       if (Obj->hasAvailableExternallyLinkage()) {
1632         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1633         Changed = true;
1634       }
1635     }
1636   } while (Changed);
1637 }
1638 
1639 /// Run internalization on \p TheModule based on symmary analysis.
1640 void llvm::thinLTOInternalizeModule(Module &TheModule,
1641                                     const GVSummaryMapTy &DefinedGlobals) {
1642   // Declare a callback for the internalize pass that will ask for every
1643   // candidate GlobalValue if it can be internalized or not.
1644   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1645     // It may be the case that GV is on a chain of an ifunc, its alias and
1646     // subsequent aliases. In this case, the summary for the value is not
1647     // available.
1648     if (isa<GlobalIFunc>(&GV) ||
1649         (isa<GlobalAlias>(&GV) &&
1650          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1651       return true;
1652 
1653     // Lookup the linkage recorded in the summaries during global analysis.
1654     auto GS = DefinedGlobals.find(GV.getGUID());
1655     if (GS == DefinedGlobals.end()) {
1656       // Must have been promoted (possibly conservatively). Find original
1657       // name so that we can access the correct summary and see if it can
1658       // be internalized again.
1659       // FIXME: Eventually we should control promotion instead of promoting
1660       // and internalizing again.
1661       StringRef OrigName =
1662           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1663       std::string OrigId = GlobalValue::getGlobalIdentifier(
1664           OrigName, GlobalValue::InternalLinkage,
1665           TheModule.getSourceFileName());
1666       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1667       if (GS == DefinedGlobals.end()) {
1668         // Also check the original non-promoted non-globalized name. In some
1669         // cases a preempted weak value is linked in as a local copy because
1670         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1671         // In that case, since it was originally not a local value, it was
1672         // recorded in the index using the original name.
1673         // FIXME: This may not be needed once PR27866 is fixed.
1674         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1675         assert(GS != DefinedGlobals.end());
1676       }
1677     }
1678     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1679   };
1680 
1681   // FIXME: See if we can just internalize directly here via linkage changes
1682   // based on the index, rather than invoking internalizeModule.
1683   internalizeModule(TheModule, MustPreserveGV);
1684 }
1685 
1686 /// Make alias a clone of its aliasee.
1687 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1688   Function *Fn = cast<Function>(GA->getAliaseeObject());
1689 
1690   ValueToValueMapTy VMap;
1691   Function *NewFn = CloneFunction(Fn, VMap);
1692   // Clone should use the original alias's linkage, visibility and name, and we
1693   // ensure all uses of alias instead use the new clone (casted if necessary).
1694   NewFn->setLinkage(GA->getLinkage());
1695   NewFn->setVisibility(GA->getVisibility());
1696   GA->replaceAllUsesWith(NewFn);
1697   NewFn->takeName(GA);
1698   return NewFn;
1699 }
1700 
1701 // Internalize values that we marked with specific attribute
1702 // in processGlobalForThinLTO.
1703 static void internalizeGVsAfterImport(Module &M) {
1704   for (auto &GV : M.globals())
1705     // Skip GVs which have been converted to declarations
1706     // by dropDeadSymbols.
1707     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1708       GV.setLinkage(GlobalValue::InternalLinkage);
1709       GV.setVisibility(GlobalValue::DefaultVisibility);
1710     }
1711 }
1712 
1713 // Automatically import functions in Module \p DestModule based on the summaries
1714 // index.
1715 Expected<bool> FunctionImporter::importFunctions(
1716     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1717   LLVM_DEBUG(dbgs() << "Starting import for Module "
1718                     << DestModule.getModuleIdentifier() << "\n");
1719   unsigned ImportedCount = 0, ImportedGVCount = 0;
1720 
1721   IRMover Mover(DestModule);
1722   // Do the actual import of functions now, one Module at a time
1723   std::set<StringRef> ModuleNameOrderedList;
1724   for (const auto &FunctionsToImportPerModule : ImportList) {
1725     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1726   }
1727 
1728   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1729                            GlobalValue::GUID GUID)
1730       -> std::optional<GlobalValueSummary::ImportKind> {
1731     auto Iter = GUIDToImportType.find(GUID);
1732     if (Iter == GUIDToImportType.end())
1733       return std::nullopt;
1734     return Iter->second;
1735   };
1736 
1737   for (const auto &Name : ModuleNameOrderedList) {
1738     // Get the module for the import
1739     const auto &FunctionsToImportPerModule = ImportList.find(Name);
1740     assert(FunctionsToImportPerModule != ImportList.end());
1741     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1742     if (!SrcModuleOrErr)
1743       return SrcModuleOrErr.takeError();
1744     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1745     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1746            "Context mismatch");
1747 
1748     // If modules were created with lazy metadata loading, materialize it
1749     // now, before linking it (otherwise this will be a noop).
1750     if (Error Err = SrcModule->materializeMetadata())
1751       return std::move(Err);
1752 
1753     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1754 
1755     // Find the globals to import
1756     SetVector<GlobalValue *> GlobalsToImport;
1757     for (Function &F : *SrcModule) {
1758       if (!F.hasName())
1759         continue;
1760       auto GUID = F.getGUID();
1761       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1762 
1763       bool ImportDefinition =
1764           (MaybeImportType &&
1765            (*MaybeImportType == GlobalValueSummary::Definition));
1766 
1767       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1768                         << " importing function"
1769                         << (ImportDefinition
1770                                 ? " definition "
1771                                 : (MaybeImportType ? " declaration " : " "))
1772                         << GUID << " " << F.getName() << " from "
1773                         << SrcModule->getSourceFileName() << "\n");
1774       if (ImportDefinition) {
1775         if (Error Err = F.materialize())
1776           return std::move(Err);
1777         // MemProf should match function's definition and summary,
1778         // 'thinlto_src_module' is needed.
1779         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1780           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1781           // statistics and debugging.
1782           F.setMetadata(
1783               "thinlto_src_module",
1784               MDNode::get(DestModule.getContext(),
1785                           {MDString::get(DestModule.getContext(),
1786                                          SrcModule->getModuleIdentifier())}));
1787           F.setMetadata(
1788               "thinlto_src_file",
1789               MDNode::get(DestModule.getContext(),
1790                           {MDString::get(DestModule.getContext(),
1791                                          SrcModule->getSourceFileName())}));
1792         }
1793         GlobalsToImport.insert(&F);
1794       }
1795     }
1796     for (GlobalVariable &GV : SrcModule->globals()) {
1797       if (!GV.hasName())
1798         continue;
1799       auto GUID = GV.getGUID();
1800       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1801 
1802       bool ImportDefinition =
1803           (MaybeImportType &&
1804            (*MaybeImportType == GlobalValueSummary::Definition));
1805 
1806       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1807                         << " importing global"
1808                         << (ImportDefinition
1809                                 ? " definition "
1810                                 : (MaybeImportType ? " declaration " : " "))
1811                         << GUID << " " << GV.getName() << " from "
1812                         << SrcModule->getSourceFileName() << "\n");
1813       if (ImportDefinition) {
1814         if (Error Err = GV.materialize())
1815           return std::move(Err);
1816         ImportedGVCount += GlobalsToImport.insert(&GV);
1817       }
1818     }
1819     for (GlobalAlias &GA : SrcModule->aliases()) {
1820       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1821         continue;
1822       auto GUID = GA.getGUID();
1823       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1824 
1825       bool ImportDefinition =
1826           (MaybeImportType &&
1827            (*MaybeImportType == GlobalValueSummary::Definition));
1828 
1829       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1830                         << " importing alias"
1831                         << (ImportDefinition
1832                                 ? " definition "
1833                                 : (MaybeImportType ? " declaration " : " "))
1834                         << GUID << " " << GA.getName() << " from "
1835                         << SrcModule->getSourceFileName() << "\n");
1836       if (ImportDefinition) {
1837         if (Error Err = GA.materialize())
1838           return std::move(Err);
1839         // Import alias as a copy of its aliasee.
1840         GlobalObject *GO = GA.getAliaseeObject();
1841         if (Error Err = GO->materialize())
1842           return std::move(Err);
1843         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1844         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1845                           << GO->getName() << " from "
1846                           << SrcModule->getSourceFileName() << "\n");
1847         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1848           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1849           // statistics and debugging.
1850           Fn->setMetadata(
1851               "thinlto_src_module",
1852               MDNode::get(DestModule.getContext(),
1853                           {MDString::get(DestModule.getContext(),
1854                                          SrcModule->getModuleIdentifier())}));
1855           Fn->setMetadata(
1856               "thinlto_src_file",
1857               MDNode::get(DestModule.getContext(),
1858                           {MDString::get(DestModule.getContext(),
1859                                          SrcModule->getSourceFileName())}));
1860         }
1861         GlobalsToImport.insert(Fn);
1862       }
1863     }
1864 
1865     // Upgrade debug info after we're done materializing all the globals and we
1866     // have loaded all the required metadata!
1867     UpgradeDebugInfo(*SrcModule);
1868 
1869     // Set the partial sample profile ratio in the profile summary module flag
1870     // of the imported source module, if applicable, so that the profile summary
1871     // module flag will match with that of the destination module when it's
1872     // imported.
1873     SrcModule->setPartialSampleProfileRatio(Index);
1874 
1875     // Link in the specified functions.
1876     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1877                                &GlobalsToImport))
1878       return true;
1879 
1880     if (PrintImports) {
1881       for (const auto *GV : GlobalsToImport)
1882         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1883                << " from " << SrcModule->getSourceFileName() << "\n";
1884     }
1885 
1886     if (Error Err = Mover.move(std::move(SrcModule),
1887                                GlobalsToImport.getArrayRef(), nullptr,
1888                                /*IsPerformingImport=*/true))
1889       return createStringError(errc::invalid_argument,
1890                                Twine("Function Import: link error: ") +
1891                                    toString(std::move(Err)));
1892 
1893     ImportedCount += GlobalsToImport.size();
1894     NumImportedModules++;
1895   }
1896 
1897   internalizeGVsAfterImport(DestModule);
1898 
1899   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1900   NumImportedGlobalVars += ImportedGVCount;
1901 
1902   // TODO: Print counters for definitions and declarations in the debugging log.
1903   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1904                     << " functions for Module "
1905                     << DestModule.getModuleIdentifier() << "\n");
1906   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1907                     << " global variables for Module "
1908                     << DestModule.getModuleIdentifier() << "\n");
1909   return ImportedCount;
1910 }
1911 
1912 static bool doImportingForModuleForTest(
1913     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1914                    isPrevailing) {
1915   if (SummaryFile.empty())
1916     report_fatal_error("error: -function-import requires -summary-file\n");
1917   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1918       getModuleSummaryIndexForFile(SummaryFile);
1919   if (!IndexPtrOrErr) {
1920     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1921                           "Error loading file '" + SummaryFile + "': ");
1922     return false;
1923   }
1924   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1925 
1926   // First step is collecting the import list.
1927   FunctionImporter::ImportMapTy ImportList;
1928   // If requested, simply import all functions in the index. This is used
1929   // when testing distributed backend handling via the opt tool, when
1930   // we have distributed indexes containing exactly the summaries to import.
1931   if (ImportAllIndex)
1932     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1933                                                       *Index, ImportList);
1934   else
1935     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1936                                              isPrevailing, *Index, ImportList);
1937 
1938   // Conservatively mark all internal values as promoted. This interface is
1939   // only used when doing importing via the function importing pass. The pass
1940   // is only enabled when testing importing via the 'opt' tool, which does
1941   // not do the ThinLink that would normally determine what values to promote.
1942   for (auto &I : *Index) {
1943     for (auto &S : I.second.SummaryList) {
1944       if (GlobalValue::isLocalLinkage(S->linkage()))
1945         S->setLinkage(GlobalValue::ExternalLinkage);
1946     }
1947   }
1948 
1949   // Next we need to promote to global scope and rename any local values that
1950   // are potentially exported to other modules.
1951   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
1952                              /*GlobalsToImport=*/nullptr)) {
1953     errs() << "Error renaming module\n";
1954     return true;
1955   }
1956 
1957   // Perform the import now.
1958   auto ModuleLoader = [&M](StringRef Identifier) {
1959     return loadFile(std::string(Identifier), M.getContext());
1960   };
1961   FunctionImporter Importer(*Index, ModuleLoader,
1962                             /*ClearDSOLocalOnDeclarations=*/false);
1963   Expected<bool> Result = Importer.importFunctions(M, ImportList);
1964 
1965   // FIXME: Probably need to propagate Errors through the pass manager.
1966   if (!Result) {
1967     logAllUnhandledErrors(Result.takeError(), errs(),
1968                           "Error importing module: ");
1969     return true;
1970   }
1971 
1972   return true;
1973 }
1974 
1975 PreservedAnalyses FunctionImportPass::run(Module &M,
1976                                           ModuleAnalysisManager &AM) {
1977   // This is only used for testing the function import pass via opt, where we
1978   // don't have prevailing information from the LTO context available, so just
1979   // conservatively assume everything is prevailing (which is fine for the very
1980   // limited use of prevailing checking in this pass).
1981   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
1982     return true;
1983   };
1984   if (!doImportingForModuleForTest(M, isPrevailing))
1985     return PreservedAnalyses::all();
1986 
1987   return PreservedAnalyses::none();
1988 }
1989