xref: /llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision ba8883c46e3cb1782f66774a116bc0364ea68ce0)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalAlias.h"
25 #include "llvm/IR/GlobalObject.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/Metadata.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/ModuleSummaryIndex.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/Linker/IRMover.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module' and "
129              "'thinlto_src_file'"));
130 
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134     SummaryFile("summary-file",
135                 cl::desc("The summary file to use for function importing."));
136 
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140     ImportAllIndex("import-all-index",
141                    cl::desc("Import all external functions in index."));
142 
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150     "import-declaration", cl::init(false), cl::Hidden,
151     cl::desc("If true, import function declaration as fallback if the function "
152              "definition is not imported."));
153 
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163     "thinlto-workload-def",
164     cl::desc("Pass a workload definition. This is a file containing a JSON "
165              "dictionary. The keys are root functions, the values are lists of "
166              "functions to import in the module defining the root. It is "
167              "assumed -funique-internal-linkage-names was used, to ensure "
168              "local linkage functions have unique names. For example: \n"
169              "{\n"
170              "  \"rootFunction_1\": [\"function_to_import_1\", "
171              "\"function_to_import_2\"], \n"
172              "  \"rootFunction_2\": [\"function_to_import_3\", "
173              "\"function_to_import_4\"] \n"
174              "}"),
175     cl::Hidden);
176 
177 static cl::opt<bool> ImportAssumeUniqueLocal(
178     "import-assume-unique-local", cl::init(false),
179     cl::desc(
180         "By default, a local-linkage global variable won't be imported in the "
181         "edge mod1:func -> mod2:local-var (from value profiles) since compiler "
182         "cannot assume mod2 is compiled with full path which gives local-var a "
183         "program-wide unique GUID. Set this option to true will help cross "
184         "module import of such variables. This is only safe if the compiler "
185         "user specify the full module path."),
186     cl::Hidden);
187 
188 namespace llvm {
189 extern cl::opt<bool> EnableMemProfContextDisambiguation;
190 }
191 
192 // Load lazily a module from \p FileName in \p Context.
193 static std::unique_ptr<Module> loadFile(const std::string &FileName,
194                                         LLVMContext &Context) {
195   SMDiagnostic Err;
196   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
197   // Metadata isn't loaded until functions are imported, to minimize
198   // the memory overhead.
199   std::unique_ptr<Module> Result =
200       getLazyIRFileModule(FileName, Err, Context,
201                           /* ShouldLazyLoadMetadata = */ true);
202   if (!Result) {
203     Err.print("function-import", errs());
204     report_fatal_error("Abort");
205   }
206 
207   return Result;
208 }
209 
210 static bool shouldSkipLocalInAnotherModule(const GlobalValueSummary *RefSummary,
211                                            size_t NumDefs,
212                                            StringRef ImporterModule) {
213   // We can import a local from another module if all inputs are compiled
214   // with full paths or when there is one definition.
215   if (ImportAssumeUniqueLocal || NumDefs == 1)
216     return false;
217   // In other cases, make sure we import the copy in the caller's module if the
218   // referenced value has local linkage. The only time a local variable can
219   // share an entry in the index is if there is a local with the same name in
220   // another module that had the same source file name (in a different
221   // directory), where each was compiled in their own directory so there was not
222   // distinguishing path.
223   return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
224          RefSummary->modulePath() != ImporterModule;
225 }
226 
227 /// Given a list of possible callee implementation for a call site, qualify the
228 /// legality of importing each. The return is a range of pairs. Each pair
229 /// corresponds to a candidate. The first value is the ImportFailureReason for
230 /// that candidate, the second is the candidate.
231 static auto qualifyCalleeCandidates(
232     const ModuleSummaryIndex &Index,
233     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
234     StringRef CallerModulePath) {
235   return llvm::map_range(
236       CalleeSummaryList,
237       [&Index, CalleeSummaryList,
238        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
239           -> std::pair<FunctionImporter::ImportFailureReason,
240                        const GlobalValueSummary *> {
241         auto *GVSummary = SummaryPtr.get();
242         if (!Index.isGlobalValueLive(GVSummary))
243           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
244 
245         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
246           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
247                   GVSummary};
248 
249         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
250 
251         // Ignore any callees that aren't actually functions. This could happen
252         // in the case of GUID hash collisions. It could also happen in theory
253         // for SamplePGO profiles collected on old versions of the code after
254         // renaming, since we synthesize edges to any inlined callees appearing
255         // in the profile.
256         if (!Summary)
257           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
258 
259         // If this is a local function, make sure we import the copy in the
260         // caller's module. The only time a local function can share an entry in
261         // the index is if there is a local with the same name in another module
262         // that had the same source file name (in a different directory), where
263         // each was compiled in their own directory so there was not
264         // distinguishing path.
265         // If the local function is from another module, it must be a reference
266         // due to indirect call profile data since a function pointer can point
267         // to a local in another module. Do the import from another module if
268         // there is only one entry in the list or when all files in the program
269         // are compiled with full path - in both cases the local function has
270         // unique PGO name and GUID.
271         if (shouldSkipLocalInAnotherModule(Summary, CalleeSummaryList.size(),
272                                            CallerModulePath))
273           return {
274               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
275               GVSummary};
276 
277         // Skip if it isn't legal to import (e.g. may reference unpromotable
278         // locals).
279         if (Summary->notEligibleToImport())
280           return {FunctionImporter::ImportFailureReason::NotEligible,
281                   GVSummary};
282 
283         return {FunctionImporter::ImportFailureReason::None, GVSummary};
284       });
285 }
286 
287 /// Given a list of possible callee implementation for a call site, select one
288 /// that fits the \p Threshold for function definition import. If none are
289 /// found, the Reason will give the last reason for the failure (last, in the
290 /// order of CalleeSummaryList entries). While looking for a callee definition,
291 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
292 /// candidate; other modules may want to know the function summary or
293 /// declaration even if a definition is not needed.
294 ///
295 /// FIXME: select "best" instead of first that fits. But what is "best"?
296 /// - The smallest: more likely to be inlined.
297 /// - The one with the least outgoing edges (already well optimized).
298 /// - One from a module already being imported from in order to reduce the
299 ///   number of source modules parsed/linked.
300 /// - One that has PGO data attached.
301 /// - [insert you fancy metric here]
302 static const GlobalValueSummary *
303 selectCallee(const ModuleSummaryIndex &Index,
304              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
305              unsigned Threshold, StringRef CallerModulePath,
306              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
307              FunctionImporter::ImportFailureReason &Reason) {
308   // Records the last summary with reason noinline or too-large.
309   TooLargeOrNoInlineSummary = nullptr;
310   auto QualifiedCandidates =
311       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
312   for (auto QualifiedValue : QualifiedCandidates) {
313     Reason = QualifiedValue.first;
314     // Skip a summary if its import is not (proved to be) legal.
315     if (Reason != FunctionImporter::ImportFailureReason::None)
316       continue;
317     auto *Summary =
318         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
319 
320     // Don't bother importing the definition if the chance of inlining it is
321     // not high enough (except under `--force-import-all`).
322     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
323         !ForceImportAll) {
324       TooLargeOrNoInlineSummary = Summary;
325       Reason = FunctionImporter::ImportFailureReason::TooLarge;
326       continue;
327     }
328 
329     // Don't bother importing the definition if we can't inline it anyway.
330     if (Summary->fflags().NoInline && !ForceImportAll) {
331       TooLargeOrNoInlineSummary = Summary;
332       Reason = FunctionImporter::ImportFailureReason::NoInline;
333       continue;
334     }
335 
336     return Summary;
337   }
338   return nullptr;
339 }
340 
341 namespace {
342 
343 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
344 
345 } // anonymous namespace
346 
347 /// Import globals referenced by a function or other globals that are being
348 /// imported, if importing such global is possible.
349 class GlobalsImporter final {
350   const ModuleSummaryIndex &Index;
351   const GVSummaryMapTy &DefinedGVSummaries;
352   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
353       IsPrevailing;
354   FunctionImporter::ImportMapTy &ImportList;
355   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
356 
357   bool shouldImportGlobal(const ValueInfo &VI) {
358     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
359     if (GVS == DefinedGVSummaries.end())
360       return true;
361     // We should not skip import if the module contains a non-prevailing
362     // definition with interposable linkage type. This is required for
363     // correctness in the situation where there is a prevailing def available
364     // for import and marked read-only. In this case, the non-prevailing def
365     // will be converted to a declaration, while the prevailing one becomes
366     // internal, thus no definitions will be available for linking. In order to
367     // prevent undefined symbol link error, the prevailing definition must be
368     // imported.
369     // FIXME: Consider adding a check that the suitable prevailing definition
370     // exists and marked read-only.
371     if (VI.getSummaryList().size() > 1 &&
372         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
373         !IsPrevailing(VI.getGUID(), GVS->second))
374       return true;
375 
376     return false;
377   }
378 
379   void
380   onImportingSummaryImpl(const GlobalValueSummary &Summary,
381                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
382     for (const auto &VI : Summary.refs()) {
383       if (!shouldImportGlobal(VI)) {
384         LLVM_DEBUG(
385             dbgs() << "Ref ignored! Target already in destination module.\n");
386         continue;
387       }
388 
389       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
390 
391       for (const auto &RefSummary : VI.getSummaryList()) {
392         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
393         // Functions could be referenced by global vars - e.g. a vtable; but we
394         // don't currently imagine a reason those would be imported here, rather
395         // than as part of the logic deciding which functions to import (i.e.
396         // based on profile information). Should we decide to handle them here,
397         // we can refactor accordingly at that time.
398         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
399             shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
400                                            Summary.modulePath()))
401           continue;
402 
403         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
404         // Otherwise, definition should take precedence over declaration.
405         auto [Iter, Inserted] =
406             ImportList[RefSummary->modulePath()].try_emplace(
407                 VI.getGUID(), GlobalValueSummary::Definition);
408         // Only update stat and exports if we haven't already imported this
409         // variable.
410         if (!Inserted) {
411           // Set the value to 'std::min(existing-value, new-value)' to make
412           // sure a definition takes precedence over a declaration.
413           Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
414           break;
415         }
416         NumImportedGlobalVarsThinLink++;
417         // Any references made by this variable will be marked exported
418         // later, in ComputeCrossModuleImport, after import decisions are
419         // complete, which is more efficient than adding them here.
420         if (ExportLists)
421           (*ExportLists)[RefSummary->modulePath()].insert(VI);
422 
423         // If variable is not writeonly we attempt to recursively analyze
424         // its references in order to import referenced constants.
425         if (!Index.isWriteOnly(GVS))
426           Worklist.emplace_back(GVS);
427         break;
428       }
429     }
430   }
431 
432 public:
433   GlobalsImporter(
434       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
435       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
436           IsPrevailing,
437       FunctionImporter::ImportMapTy &ImportList,
438       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
439       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
440         IsPrevailing(IsPrevailing), ImportList(ImportList),
441         ExportLists(ExportLists) {}
442 
443   void onImportingSummary(const GlobalValueSummary &Summary) {
444     SmallVector<const GlobalVarSummary *, 128> Worklist;
445     onImportingSummaryImpl(Summary, Worklist);
446     while (!Worklist.empty())
447       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
448   }
449 };
450 
451 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
452 
453 /// Determine the list of imports and exports for each module.
454 class ModuleImportsManager {
455 protected:
456   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
457       IsPrevailing;
458   const ModuleSummaryIndex &Index;
459   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
460 
461   ModuleImportsManager(
462       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
463           IsPrevailing,
464       const ModuleSummaryIndex &Index,
465       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
466       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
467 
468 public:
469   virtual ~ModuleImportsManager() = default;
470 
471   /// Given the list of globals defined in a module, compute the list of imports
472   /// as well as the list of "exports", i.e. the list of symbols referenced from
473   /// another module (that may require promotion).
474   virtual void
475   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
476                          StringRef ModName,
477                          FunctionImporter::ImportMapTy &ImportList);
478 
479   static std::unique_ptr<ModuleImportsManager>
480   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
481              IsPrevailing,
482          const ModuleSummaryIndex &Index,
483          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
484              nullptr);
485 };
486 
487 /// A ModuleImportsManager that operates based on a workload definition (see
488 /// -thinlto-workload-def). For modules that do not define workload roots, it
489 /// applies the base ModuleImportsManager import policy.
490 class WorkloadImportsManager : public ModuleImportsManager {
491   // Keep a module name -> value infos to import association. We use it to
492   // determine if a module's import list should be done by the base
493   // ModuleImportsManager or by us.
494   StringMap<DenseSet<ValueInfo>> Workloads;
495 
496   void
497   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
498                          StringRef ModName,
499                          FunctionImporter::ImportMapTy &ImportList) override {
500     auto SetIter = Workloads.find(ModName);
501     if (SetIter == Workloads.end()) {
502       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
503                         << " does not contain the root of any context.\n");
504       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
505                                                           ModName, ImportList);
506     }
507     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
508                       << " contains the root(s) of context(s).\n");
509 
510     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
511                         ExportLists);
512     auto &ValueInfos = SetIter->second;
513     SmallVector<EdgeInfo, 128> GlobWorklist;
514     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
515       auto It = DefinedGVSummaries.find(VI.getGUID());
516       if (It != DefinedGVSummaries.end() &&
517           IsPrevailing(VI.getGUID(), It->second)) {
518         LLVM_DEBUG(
519             dbgs() << "[Workload] " << VI.name()
520                    << " has the prevailing variant already in the module "
521                    << ModName << ". No need to import\n");
522         continue;
523       }
524       auto Candidates =
525           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
526 
527       const GlobalValueSummary *GVS = nullptr;
528       auto PotentialCandidates = llvm::map_range(
529           llvm::make_filter_range(
530               Candidates,
531               [&](const auto &Candidate) {
532                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
533                                   << " from " << Candidate.second->modulePath()
534                                   << " ImportFailureReason: "
535                                   << getFailureName(Candidate.first) << "\n");
536                 return Candidate.first ==
537                         FunctionImporter::ImportFailureReason::None;
538               }),
539           [](const auto &Candidate) { return Candidate.second; });
540       if (PotentialCandidates.empty()) {
541         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
542                           << " because can't find eligible Callee. Guid is: "
543                           << Function::getGUID(VI.name()) << "\n");
544         continue;
545       }
546       /// We will prefer importing the prevailing candidate, if not, we'll
547       /// still pick the first available candidate. The reason we want to make
548       /// sure we do import the prevailing candidate is because the goal of
549       /// workload-awareness is to enable optimizations specializing the call
550       /// graph of that workload. Suppose a function is already defined in the
551       /// module, but it's not the prevailing variant. Suppose also we do not
552       /// inline it (in fact, if it were interposable, we can't inline it),
553       /// but we could specialize it to the workload in other ways. However,
554       /// the linker would drop it in the favor of the prevailing copy.
555       /// Instead, by importing the prevailing variant (assuming also the use
556       /// of `-avail-extern-to-local`), we keep the specialization. We could
557       /// alteranatively make the non-prevailing variant local, but the
558       /// prevailing one is also the one for which we would have previously
559       /// collected profiles, making it preferrable.
560       auto PrevailingCandidates = llvm::make_filter_range(
561           PotentialCandidates, [&](const auto *Candidate) {
562             return IsPrevailing(VI.getGUID(), Candidate);
563           });
564       if (PrevailingCandidates.empty()) {
565         GVS = *PotentialCandidates.begin();
566         if (!llvm::hasSingleElement(PotentialCandidates) &&
567             GlobalValue::isLocalLinkage(GVS->linkage()))
568           LLVM_DEBUG(
569               dbgs()
570               << "[Workload] Found multiple non-prevailing candidates for "
571               << VI.name()
572               << ". This is unexpected. Are module paths passed to the "
573                  "compiler unique for the modules passed to the linker?");
574         // We could in theory have multiple (interposable) copies of a symbol
575         // when there is no prevailing candidate, if say the prevailing copy was
576         // in a native object being linked in. However, we should in theory be
577         // marking all of these non-prevailing IR copies dead in that case, in
578         // which case they won't be candidates.
579         assert(GVS->isLive());
580       } else {
581         assert(llvm::hasSingleElement(PrevailingCandidates));
582         GVS = *PrevailingCandidates.begin();
583       }
584 
585       auto ExportingModule = GVS->modulePath();
586       // We checked that for the prevailing case, but if we happen to have for
587       // example an internal that's defined in this module, it'd have no
588       // PrevailingCandidates.
589       if (ExportingModule == ModName) {
590         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
591                           << " because its defining module is the same as the "
592                              "current module\n");
593         continue;
594       }
595       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
596                         << ExportingModule << " : "
597                         << Function::getGUID(VI.name()) << "\n");
598       ImportList[ExportingModule][VI.getGUID()] =
599           GlobalValueSummary::Definition;
600       GVI.onImportingSummary(*GVS);
601       if (ExportLists)
602         (*ExportLists)[ExportingModule].insert(VI);
603     }
604     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
605   }
606 
607 public:
608   WorkloadImportsManager(
609       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
610           IsPrevailing,
611       const ModuleSummaryIndex &Index,
612       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
613       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
614     // Since the workload def uses names, we need a quick lookup
615     // name->ValueInfo.
616     StringMap<ValueInfo> NameToValueInfo;
617     StringSet<> AmbiguousNames;
618     for (auto &I : Index) {
619       ValueInfo VI = Index.getValueInfo(I);
620       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
621         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
622     }
623     auto DbgReportIfAmbiguous = [&](StringRef Name) {
624       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
625         dbgs() << "[Workload] Function name " << Name
626                << " present in the workload definition is ambiguous. Consider "
627                   "compiling with -funique-internal-linkage-names.";
628       });
629     };
630     std::error_code EC;
631     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
632     if (std::error_code EC = BufferOrErr.getError()) {
633       report_fatal_error("Failed to open context file");
634       return;
635     }
636     auto Buffer = std::move(BufferOrErr.get());
637     std::map<std::string, std::vector<std::string>> WorkloadDefs;
638     json::Path::Root NullRoot;
639     // The JSON is supposed to contain a dictionary matching the type of
640     // WorkloadDefs. For example:
641     // {
642     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
643     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
644     // }
645     auto Parsed = json::parse(Buffer->getBuffer());
646     if (!Parsed)
647       report_fatal_error(Parsed.takeError());
648     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
649       report_fatal_error("Invalid thinlto contextual profile format.");
650     for (const auto &Workload : WorkloadDefs) {
651       const auto &Root = Workload.first;
652       DbgReportIfAmbiguous(Root);
653       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
654       const auto &AllCallees = Workload.second;
655       auto RootIt = NameToValueInfo.find(Root);
656       if (RootIt == NameToValueInfo.end()) {
657         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
658                           << " not found in this linkage unit.\n");
659         continue;
660       }
661       auto RootVI = RootIt->second;
662       if (RootVI.getSummaryList().size() != 1) {
663         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
664                           << " should have exactly one summary, but has "
665                           << RootVI.getSummaryList().size() << ". Skipping.\n");
666         continue;
667       }
668       StringRef RootDefiningModule =
669           RootVI.getSummaryList().front()->modulePath();
670       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
671                         << " is : " << RootDefiningModule << "\n");
672       auto &Set = Workloads[RootDefiningModule];
673       for (const auto &Callee : AllCallees) {
674         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
675         DbgReportIfAmbiguous(Callee);
676         auto ElemIt = NameToValueInfo.find(Callee);
677         if (ElemIt == NameToValueInfo.end()) {
678           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
679           continue;
680         }
681         Set.insert(ElemIt->second);
682       }
683       LLVM_DEBUG({
684         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
685                << " distinct callees.\n";
686         for (const auto &VI : Set) {
687           dbgs() << "[Workload] Root: " << Root
688                  << " Would include: " << VI.getGUID() << "\n";
689         }
690       });
691     }
692   }
693 };
694 
695 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
696     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
697         IsPrevailing,
698     const ModuleSummaryIndex &Index,
699     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
700   if (WorkloadDefinitions.empty()) {
701     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
702     return std::unique_ptr<ModuleImportsManager>(
703         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
704   }
705   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
706   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
707                                                   ExportLists);
708 }
709 
710 static const char *
711 getFailureName(FunctionImporter::ImportFailureReason Reason) {
712   switch (Reason) {
713   case FunctionImporter::ImportFailureReason::None:
714     return "None";
715   case FunctionImporter::ImportFailureReason::GlobalVar:
716     return "GlobalVar";
717   case FunctionImporter::ImportFailureReason::NotLive:
718     return "NotLive";
719   case FunctionImporter::ImportFailureReason::TooLarge:
720     return "TooLarge";
721   case FunctionImporter::ImportFailureReason::InterposableLinkage:
722     return "InterposableLinkage";
723   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
724     return "LocalLinkageNotInModule";
725   case FunctionImporter::ImportFailureReason::NotEligible:
726     return "NotEligible";
727   case FunctionImporter::ImportFailureReason::NoInline:
728     return "NoInline";
729   }
730   llvm_unreachable("invalid reason");
731 }
732 
733 /// Compute the list of functions to import for a given caller. Mark these
734 /// imported functions and the symbols they reference in their source module as
735 /// exported from their source module.
736 static void computeImportForFunction(
737     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
738     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
739     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
740         isPrevailing,
741     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
742     FunctionImporter::ImportMapTy &ImportList,
743     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
744     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
745   GVImporter.onImportingSummary(Summary);
746   static int ImportCount = 0;
747   for (const auto &Edge : Summary.calls()) {
748     ValueInfo VI = Edge.first;
749     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
750                       << "\n");
751 
752     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
753       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
754                         << " reached.\n");
755       continue;
756     }
757 
758     if (DefinedGVSummaries.count(VI.getGUID())) {
759       // FIXME: Consider not skipping import if the module contains
760       // a non-prevailing def with interposable linkage. The prevailing copy
761       // can safely be imported (see shouldImportGlobal()).
762       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
763       continue;
764     }
765 
766     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
767       if (Hotness == CalleeInfo::HotnessType::Hot)
768         return ImportHotMultiplier;
769       if (Hotness == CalleeInfo::HotnessType::Cold)
770         return ImportColdMultiplier;
771       if (Hotness == CalleeInfo::HotnessType::Critical)
772         return ImportCriticalMultiplier;
773       return 1.0;
774     };
775 
776     const auto NewThreshold =
777         Threshold * GetBonusMultiplier(Edge.second.getHotness());
778 
779     auto IT = ImportThresholds.insert(std::make_pair(
780         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
781     bool PreviouslyVisited = !IT.second;
782     auto &ProcessedThreshold = std::get<0>(IT.first->second);
783     auto &CalleeSummary = std::get<1>(IT.first->second);
784     auto &FailureInfo = std::get<2>(IT.first->second);
785 
786     bool IsHotCallsite =
787         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
788     bool IsCriticalCallsite =
789         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
790 
791     const FunctionSummary *ResolvedCalleeSummary = nullptr;
792     if (CalleeSummary) {
793       assert(PreviouslyVisited);
794       // Since the traversal of the call graph is DFS, we can revisit a function
795       // a second time with a higher threshold. In this case, it is added back
796       // to the worklist with the new threshold (so that its own callee chains
797       // can be considered with the higher threshold).
798       if (NewThreshold <= ProcessedThreshold) {
799         LLVM_DEBUG(
800             dbgs() << "ignored! Target was already imported with Threshold "
801                    << ProcessedThreshold << "\n");
802         continue;
803       }
804       // Update with new larger threshold.
805       ProcessedThreshold = NewThreshold;
806       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
807     } else {
808       // If we already rejected importing a callee at the same or higher
809       // threshold, don't waste time calling selectCallee.
810       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
811         LLVM_DEBUG(
812             dbgs() << "ignored! Target was already rejected with Threshold "
813             << ProcessedThreshold << "\n");
814         if (PrintImportFailures) {
815           assert(FailureInfo &&
816                  "Expected FailureInfo for previously rejected candidate");
817           FailureInfo->Attempts++;
818         }
819         continue;
820       }
821 
822       FunctionImporter::ImportFailureReason Reason{};
823 
824       // `SummaryForDeclImport` is an summary eligible for declaration import.
825       const GlobalValueSummary *SummaryForDeclImport = nullptr;
826       CalleeSummary =
827           selectCallee(Index, VI.getSummaryList(), NewThreshold,
828                        Summary.modulePath(), SummaryForDeclImport, Reason);
829       if (!CalleeSummary) {
830         // There isn't a callee for definition import but one for declaration
831         // import.
832         if (ImportDeclaration && SummaryForDeclImport) {
833           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
834 
835           // Since definition takes precedence over declaration for the same VI,
836           // try emplace <VI, declaration> pair without checking insert result.
837           // If insert doesn't happen, there must be an existing entry keyed by
838           // VI. Note `ExportLists` only keeps track of exports due to imported
839           // definitions.
840           ImportList[DeclSourceModule].try_emplace(
841               VI.getGUID(), GlobalValueSummary::Declaration);
842         }
843         // Update with new larger threshold if this was a retry (otherwise
844         // we would have already inserted with NewThreshold above). Also
845         // update failure info if requested.
846         if (PreviouslyVisited) {
847           ProcessedThreshold = NewThreshold;
848           if (PrintImportFailures) {
849             assert(FailureInfo &&
850                    "Expected FailureInfo for previously rejected candidate");
851             FailureInfo->Reason = Reason;
852             FailureInfo->Attempts++;
853             FailureInfo->MaxHotness =
854                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
855           }
856         } else if (PrintImportFailures) {
857           assert(!FailureInfo &&
858                  "Expected no FailureInfo for newly rejected candidate");
859           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
860               VI, Edge.second.getHotness(), Reason, 1);
861         }
862         if (ForceImportAll) {
863           std::string Msg = std::string("Failed to import function ") +
864                             VI.name().str() + " due to " +
865                             getFailureName(Reason);
866           auto Error = make_error<StringError>(
867               Msg, make_error_code(errc::not_supported));
868           logAllUnhandledErrors(std::move(Error), errs(),
869                                 "Error importing module: ");
870           break;
871         } else {
872           LLVM_DEBUG(dbgs()
873                      << "ignored! No qualifying callee with summary found.\n");
874           continue;
875         }
876       }
877 
878       // "Resolve" the summary
879       CalleeSummary = CalleeSummary->getBaseObject();
880       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
881 
882       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
883               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
884              "selectCallee() didn't honor the threshold");
885 
886       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
887 
888       // Try emplace the definition entry, and update stats based on insertion
889       // status.
890       auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
891           VI.getGUID(), GlobalValueSummary::Definition);
892 
893       // We previously decided to import this GUID definition if it was already
894       // inserted in the set of imports from the exporting module.
895       if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
896         NumImportedFunctionsThinLink++;
897         if (IsHotCallsite)
898           NumImportedHotFunctionsThinLink++;
899         if (IsCriticalCallsite)
900           NumImportedCriticalFunctionsThinLink++;
901       }
902 
903       if (Iter->second == GlobalValueSummary::Declaration)
904         Iter->second = GlobalValueSummary::Definition;
905 
906       // Any calls/references made by this function will be marked exported
907       // later, in ComputeCrossModuleImport, after import decisions are
908       // complete, which is more efficient than adding them here.
909       if (ExportLists)
910         (*ExportLists)[ExportModulePath].insert(VI);
911     }
912 
913     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
914       // Adjust the threshold for next level of imported functions.
915       // The threshold is different for hot callsites because we can then
916       // inline chains of hot calls.
917       if (IsHotCallsite)
918         return Threshold * ImportHotInstrFactor;
919       return Threshold * ImportInstrFactor;
920     };
921 
922     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
923 
924     ImportCount++;
925 
926     // Insert the newly imported function to the worklist.
927     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
928   }
929 }
930 
931 void ModuleImportsManager::computeImportForModule(
932     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
933     FunctionImporter::ImportMapTy &ImportList) {
934   // Worklist contains the list of function imported in this module, for which
935   // we will analyse the callees and may import further down the callgraph.
936   SmallVector<EdgeInfo, 128> Worklist;
937   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
938                       ExportLists);
939   FunctionImporter::ImportThresholdsTy ImportThresholds;
940 
941   // Populate the worklist with the import for the functions in the current
942   // module
943   for (const auto &GVSummary : DefinedGVSummaries) {
944 #ifndef NDEBUG
945     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
946     // so this map look up (and possibly others) can be avoided.
947     auto VI = Index.getValueInfo(GVSummary.first);
948 #endif
949     if (!Index.isGlobalValueLive(GVSummary.second)) {
950       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
951       continue;
952     }
953     auto *FuncSummary =
954         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
955     if (!FuncSummary)
956       // Skip import for global variables
957       continue;
958     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
959     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
960                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
961                              ImportList, ExportLists, ImportThresholds);
962   }
963 
964   // Process the newly imported functions and add callees to the worklist.
965   while (!Worklist.empty()) {
966     auto GVInfo = Worklist.pop_back_val();
967     auto *Summary = std::get<0>(GVInfo);
968     auto Threshold = std::get<1>(GVInfo);
969 
970     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
971       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
972                                IsPrevailing, Worklist, GVI, ImportList,
973                                ExportLists, ImportThresholds);
974   }
975 
976   // Print stats about functions considered but rejected for importing
977   // when requested.
978   if (PrintImportFailures) {
979     dbgs() << "Missed imports into module " << ModName << "\n";
980     for (auto &I : ImportThresholds) {
981       auto &ProcessedThreshold = std::get<0>(I.second);
982       auto &CalleeSummary = std::get<1>(I.second);
983       auto &FailureInfo = std::get<2>(I.second);
984       if (CalleeSummary)
985         continue; // We are going to import.
986       assert(FailureInfo);
987       FunctionSummary *FS = nullptr;
988       if (!FailureInfo->VI.getSummaryList().empty())
989         FS = dyn_cast<FunctionSummary>(
990             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
991       dbgs() << FailureInfo->VI
992              << ": Reason = " << getFailureName(FailureInfo->Reason)
993              << ", Threshold = " << ProcessedThreshold
994              << ", Size = " << (FS ? (int)FS->instCount() : -1)
995              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
996              << ", Attempts = " << FailureInfo->Attempts << "\n";
997     }
998   }
999 }
1000 
1001 #ifndef NDEBUG
1002 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
1003   auto SL = VI.getSummaryList();
1004   return SL.empty()
1005              ? false
1006              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
1007 }
1008 
1009 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
1010                                GlobalValue::GUID G) {
1011   if (const auto &VI = Index.getValueInfo(G))
1012     return isGlobalVarSummary(Index, VI);
1013   return false;
1014 }
1015 
1016 // Return the number of global variable summaries in ExportSet.
1017 static unsigned
1018 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1019                       FunctionImporter::ExportSetTy &ExportSet) {
1020   unsigned NumGVS = 0;
1021   for (auto &VI : ExportSet)
1022     if (isGlobalVarSummary(Index, VI.getGUID()))
1023       ++NumGVS;
1024   return NumGVS;
1025 }
1026 
1027 // Given ImportMap, return the number of global variable summaries and record
1028 // the number of defined function summaries as output parameter.
1029 static unsigned
1030 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1031                       FunctionImporter::FunctionsToImportTy &ImportMap,
1032                       unsigned &DefinedFS) {
1033   unsigned NumGVS = 0;
1034   DefinedFS = 0;
1035   for (auto &[GUID, Type] : ImportMap) {
1036     if (isGlobalVarSummary(Index, GUID))
1037       ++NumGVS;
1038     else if (Type == GlobalValueSummary::Definition)
1039       ++DefinedFS;
1040   }
1041   return NumGVS;
1042 }
1043 #endif
1044 
1045 #ifndef NDEBUG
1046 static bool checkVariableImport(
1047     const ModuleSummaryIndex &Index,
1048     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1049     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1050   DenseSet<GlobalValue::GUID> FlattenedImports;
1051 
1052   for (auto &ImportPerModule : ImportLists)
1053     for (auto &ExportPerModule : ImportPerModule.second)
1054       for (auto &[GUID, Type] : ExportPerModule.second)
1055         FlattenedImports.insert(GUID);
1056 
1057   // Checks that all GUIDs of read/writeonly vars we see in export lists
1058   // are also in the import lists. Otherwise we my face linker undefs,
1059   // because readonly and writeonly vars are internalized in their
1060   // source modules. The exception would be if it has a linkage type indicating
1061   // that there may have been a copy existing in the importing module (e.g.
1062   // linkonce_odr). In that case we cannot accurately do this checking.
1063   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1064                                                   const ValueInfo &VI) {
1065     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1066         Index.findSummaryInModule(VI, ModulePath));
1067     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1068            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1069              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1070              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1071   };
1072 
1073   for (auto &ExportPerModule : ExportLists)
1074     for (auto &VI : ExportPerModule.second)
1075       if (!FlattenedImports.count(VI.getGUID()) &&
1076           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1077         return false;
1078 
1079   return true;
1080 }
1081 #endif
1082 
1083 /// Compute all the import and export for every module using the Index.
1084 void llvm::ComputeCrossModuleImport(
1085     const ModuleSummaryIndex &Index,
1086     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1087     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1088         isPrevailing,
1089     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1090     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1091   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1092   // For each module that has function defined, compute the import/export lists.
1093   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1094     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1095     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1096                       << DefinedGVSummaries.first << "'\n");
1097     MIS->computeImportForModule(DefinedGVSummaries.second,
1098                                 DefinedGVSummaries.first, ImportList);
1099   }
1100 
1101   // When computing imports we only added the variables and functions being
1102   // imported to the export list. We also need to mark any references and calls
1103   // they make as exported as well. We do this here, as it is more efficient
1104   // since we may import the same values multiple times into different modules
1105   // during the import computation.
1106   for (auto &ELI : ExportLists) {
1107     // `NewExports` tracks the VI that gets exported because the full definition
1108     // of its user/referencer gets exported.
1109     FunctionImporter::ExportSetTy NewExports;
1110     const auto &DefinedGVSummaries =
1111         ModuleToDefinedGVSummaries.lookup(ELI.first);
1112     for (auto &EI : ELI.second) {
1113       // Find the copy defined in the exporting module so that we can mark the
1114       // values it references in that specific definition as exported.
1115       // Below we will add all references and called values, without regard to
1116       // whether they are also defined in this module. We subsequently prune the
1117       // list to only include those defined in the exporting module, see comment
1118       // there as to why.
1119       auto DS = DefinedGVSummaries.find(EI.getGUID());
1120       // Anything marked exported during the import computation must have been
1121       // defined in the exporting module.
1122       assert(DS != DefinedGVSummaries.end());
1123       auto *S = DS->getSecond();
1124       S = S->getBaseObject();
1125       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1126         // Export referenced functions and variables. We don't export/promote
1127         // objects referenced by writeonly variable initializer, because
1128         // we convert such variables initializers to "zeroinitializer".
1129         // See processGlobalForThinLTO.
1130         if (!Index.isWriteOnly(GVS))
1131           for (const auto &VI : GVS->refs())
1132             NewExports.insert(VI);
1133       } else {
1134         auto *FS = cast<FunctionSummary>(S);
1135         for (const auto &Edge : FS->calls())
1136           NewExports.insert(Edge.first);
1137         for (const auto &Ref : FS->refs())
1138           NewExports.insert(Ref);
1139       }
1140     }
1141     // Prune list computed above to only include values defined in the
1142     // exporting module. We do this after the above insertion since we may hit
1143     // the same ref/call target multiple times in above loop, and it is more
1144     // efficient to avoid a set lookup each time.
1145     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1146       if (!DefinedGVSummaries.count(EI->getGUID()))
1147         NewExports.erase(EI++);
1148       else
1149         ++EI;
1150     }
1151     ELI.second.insert(NewExports.begin(), NewExports.end());
1152   }
1153 
1154   assert(checkVariableImport(Index, ImportLists, ExportLists));
1155 #ifndef NDEBUG
1156   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1157                     << " modules:\n");
1158   for (auto &ModuleImports : ImportLists) {
1159     auto ModName = ModuleImports.first;
1160     auto &Exports = ExportLists[ModName];
1161     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1162     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1163                       << Exports.size() - NumGVS << " functions and " << NumGVS
1164                       << " vars. Imports from " << ModuleImports.second.size()
1165                       << " modules.\n");
1166     for (auto &Src : ModuleImports.second) {
1167       auto SrcModName = Src.first;
1168       unsigned DefinedFS = 0;
1169       unsigned NumGVSPerMod =
1170           numGlobalVarSummaries(Index, Src.second, DefinedFS);
1171       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1172                         << Src.second.size() - NumGVSPerMod - DefinedFS
1173                         << " function declarations imported from " << SrcModName
1174                         << "\n");
1175       LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1176                         << " global vars imported from " << SrcModName << "\n");
1177     }
1178   }
1179 #endif
1180 }
1181 
1182 #ifndef NDEBUG
1183 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1184                                     StringRef ModulePath,
1185                                     FunctionImporter::ImportMapTy &ImportList) {
1186   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1187                     << ImportList.size() << " modules.\n");
1188   for (auto &Src : ImportList) {
1189     auto SrcModName = Src.first;
1190     unsigned DefinedFS = 0;
1191     unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
1192     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1193                       << Src.second.size() - DefinedFS - NumGVSPerMod
1194                       << " function declarations imported from " << SrcModName
1195                       << "\n");
1196     LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1197                       << SrcModName << "\n");
1198   }
1199 }
1200 #endif
1201 
1202 /// Compute all the imports for the given module using the Index.
1203 ///
1204 /// \p isPrevailing is a callback that will be called with a global value's GUID
1205 /// and summary and should return whether the module corresponding to the
1206 /// summary contains the linker-prevailing copy of that value.
1207 ///
1208 /// \p ImportList will be populated with a map that can be passed to
1209 /// FunctionImporter::importFunctions() above (see description there).
1210 static void ComputeCrossModuleImportForModuleForTest(
1211     StringRef ModulePath,
1212     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1213         isPrevailing,
1214     const ModuleSummaryIndex &Index,
1215     FunctionImporter::ImportMapTy &ImportList) {
1216   // Collect the list of functions this module defines.
1217   // GUID -> Summary
1218   GVSummaryMapTy FunctionSummaryMap;
1219   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1220 
1221   // Compute the import list for this module.
1222   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1223   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1224   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1225 
1226 #ifndef NDEBUG
1227   dumpImportListForModule(Index, ModulePath, ImportList);
1228 #endif
1229 }
1230 
1231 /// Mark all external summaries in \p Index for import into the given module.
1232 /// Used for testing the case of distributed builds using a distributed index.
1233 ///
1234 /// \p ImportList will be populated with a map that can be passed to
1235 /// FunctionImporter::importFunctions() above (see description there).
1236 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1237     StringRef ModulePath, const ModuleSummaryIndex &Index,
1238     FunctionImporter::ImportMapTy &ImportList) {
1239   for (const auto &GlobalList : Index) {
1240     // Ignore entries for undefined references.
1241     if (GlobalList.second.SummaryList.empty())
1242       continue;
1243 
1244     auto GUID = GlobalList.first;
1245     assert(GlobalList.second.SummaryList.size() == 1 &&
1246            "Expected individual combined index to have one summary per GUID");
1247     auto &Summary = GlobalList.second.SummaryList[0];
1248     // Skip the summaries for the importing module. These are included to
1249     // e.g. record required linkage changes.
1250     if (Summary->modulePath() == ModulePath)
1251       continue;
1252     // Add an entry to provoke importing by thinBackend.
1253     auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
1254         GUID, Summary->importType());
1255     if (!Inserted) {
1256       // Use 'std::min' to make sure definition (with enum value 0) takes
1257       // precedence over declaration (with enum value 1).
1258       Iter->second = std::min(Iter->second, Summary->importType());
1259     }
1260   }
1261 #ifndef NDEBUG
1262   dumpImportListForModule(Index, ModulePath, ImportList);
1263 #endif
1264 }
1265 
1266 // For SamplePGO, the indirect call targets for local functions will
1267 // have its original name annotated in profile. We try to find the
1268 // corresponding PGOFuncName as the GUID, and fix up the edges
1269 // accordingly.
1270 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1271                                      FunctionSummary *FS) {
1272   for (auto &EI : FS->mutableCalls()) {
1273     if (!EI.first.getSummaryList().empty())
1274       continue;
1275     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1276     if (GUID == 0)
1277       continue;
1278     // Update the edge to point directly to the correct GUID.
1279     auto VI = Index.getValueInfo(GUID);
1280     if (llvm::any_of(
1281             VI.getSummaryList(),
1282             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1283               // The mapping from OriginalId to GUID may return a GUID
1284               // that corresponds to a static variable. Filter it out here.
1285               // This can happen when
1286               // 1) There is a call to a library function which is not defined
1287               // in the index.
1288               // 2) There is a static variable with the  OriginalGUID identical
1289               // to the GUID of the library function in 1);
1290               // When this happens the static variable in 2) will be found,
1291               // which needs to be filtered out.
1292               return SummaryPtr->getSummaryKind() ==
1293                      GlobalValueSummary::GlobalVarKind;
1294             }))
1295       continue;
1296     EI.first = VI;
1297   }
1298 }
1299 
1300 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1301   for (const auto &Entry : Index) {
1302     for (const auto &S : Entry.second.SummaryList) {
1303       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1304         updateValueInfoForIndirectCalls(Index, FS);
1305     }
1306   }
1307 }
1308 
1309 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1310     ModuleSummaryIndex &Index,
1311     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1312     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1313   assert(!Index.withGlobalValueDeadStripping());
1314   if (!ComputeDead ||
1315       // Don't do anything when nothing is live, this is friendly with tests.
1316       GUIDPreservedSymbols.empty()) {
1317     // Still need to update indirect calls.
1318     updateIndirectCalls(Index);
1319     return;
1320   }
1321   unsigned LiveSymbols = 0;
1322   SmallVector<ValueInfo, 128> Worklist;
1323   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1324   for (auto GUID : GUIDPreservedSymbols) {
1325     ValueInfo VI = Index.getValueInfo(GUID);
1326     if (!VI)
1327       continue;
1328     for (const auto &S : VI.getSummaryList())
1329       S->setLive(true);
1330   }
1331 
1332   // Add values flagged in the index as live roots to the worklist.
1333   for (const auto &Entry : Index) {
1334     auto VI = Index.getValueInfo(Entry);
1335     for (const auto &S : Entry.second.SummaryList) {
1336       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1337         updateValueInfoForIndirectCalls(Index, FS);
1338       if (S->isLive()) {
1339         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1340         Worklist.push_back(VI);
1341         ++LiveSymbols;
1342         break;
1343       }
1344     }
1345   }
1346 
1347   // Make value live and add it to the worklist if it was not live before.
1348   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1349     // FIXME: If we knew which edges were created for indirect call profiles,
1350     // we could skip them here. Any that are live should be reached via
1351     // other edges, e.g. reference edges. Otherwise, using a profile collected
1352     // on a slightly different binary might provoke preserving, importing
1353     // and ultimately promoting calls to functions not linked into this
1354     // binary, which increases the binary size unnecessarily. Note that
1355     // if this code changes, the importer needs to change so that edges
1356     // to functions marked dead are skipped.
1357 
1358     if (llvm::any_of(VI.getSummaryList(),
1359                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1360                        return S->isLive();
1361                      }))
1362       return;
1363 
1364     // We only keep live symbols that are known to be non-prevailing if any are
1365     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1366     // later in the EliminateAvailableExternally pass and setting them to
1367     // not-live could break downstreams users of liveness information (PR36483)
1368     // or limit optimization opportunities.
1369     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1370       bool KeepAliveLinkage = false;
1371       bool Interposable = false;
1372       for (const auto &S : VI.getSummaryList()) {
1373         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1374             S->linkage() == GlobalValue::WeakODRLinkage ||
1375             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1376           KeepAliveLinkage = true;
1377         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1378           Interposable = true;
1379       }
1380 
1381       if (!IsAliasee) {
1382         if (!KeepAliveLinkage)
1383           return;
1384 
1385         if (Interposable)
1386           report_fatal_error(
1387               "Interposable and available_externally/linkonce_odr/weak_odr "
1388               "symbol");
1389       }
1390     }
1391 
1392     for (const auto &S : VI.getSummaryList())
1393       S->setLive(true);
1394     ++LiveSymbols;
1395     Worklist.push_back(VI);
1396   };
1397 
1398   while (!Worklist.empty()) {
1399     auto VI = Worklist.pop_back_val();
1400     for (const auto &Summary : VI.getSummaryList()) {
1401       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1402         // If this is an alias, visit the aliasee VI to ensure that all copies
1403         // are marked live and it is added to the worklist for further
1404         // processing of its references.
1405         visit(AS->getAliaseeVI(), true);
1406         continue;
1407       }
1408       for (auto Ref : Summary->refs())
1409         visit(Ref, false);
1410       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1411         for (auto Call : FS->calls())
1412           visit(Call.first, false);
1413     }
1414   }
1415   Index.setWithGlobalValueDeadStripping();
1416 
1417   unsigned DeadSymbols = Index.size() - LiveSymbols;
1418   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1419                     << " symbols Dead \n");
1420   NumDeadSymbols += DeadSymbols;
1421   NumLiveSymbols += LiveSymbols;
1422 }
1423 
1424 // Compute dead symbols and propagate constants in combined index.
1425 void llvm::computeDeadSymbolsWithConstProp(
1426     ModuleSummaryIndex &Index,
1427     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1428     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1429     bool ImportEnabled) {
1430   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1431                                            isPrevailing);
1432   if (ImportEnabled)
1433     Index.propagateAttributes(GUIDPreservedSymbols);
1434 }
1435 
1436 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1437 /// \p ModulePath.
1438 void llvm::gatherImportedSummariesForModule(
1439     StringRef ModulePath,
1440     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1441     const FunctionImporter::ImportMapTy &ImportList,
1442     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
1443     GVSummaryPtrSet &DecSummaries) {
1444   // Include all summaries from the importing module.
1445   ModuleToSummariesForIndex[std::string(ModulePath)] =
1446       ModuleToDefinedGVSummaries.lookup(ModulePath);
1447   // Include summaries for imports.
1448   for (const auto &ILI : ImportList) {
1449     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1450 
1451     const auto &DefinedGVSummaries =
1452         ModuleToDefinedGVSummaries.lookup(ILI.first);
1453     for (const auto &[GUID, Type] : ILI.second) {
1454       const auto &DS = DefinedGVSummaries.find(GUID);
1455       assert(DS != DefinedGVSummaries.end() &&
1456              "Expected a defined summary for imported global value");
1457       if (Type == GlobalValueSummary::Declaration)
1458         DecSummaries.insert(DS->second);
1459 
1460       SummariesForIndex[GUID] = DS->second;
1461     }
1462   }
1463 }
1464 
1465 /// Emit the files \p ModulePath will import from into \p OutputFilename.
1466 std::error_code llvm::EmitImportsFiles(
1467     StringRef ModulePath, StringRef OutputFilename,
1468     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1469   std::error_code EC;
1470   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1471   if (EC)
1472     return EC;
1473   for (const auto &ILI : ModuleToSummariesForIndex)
1474     // The ModuleToSummariesForIndex map includes an entry for the current
1475     // Module (needed for writing out the index files). We don't want to
1476     // include it in the imports file, however, so filter it out.
1477     if (ILI.first != ModulePath)
1478       ImportsOS << ILI.first << "\n";
1479   return std::error_code();
1480 }
1481 
1482 bool llvm::convertToDeclaration(GlobalValue &GV) {
1483   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1484                     << "\n");
1485   if (Function *F = dyn_cast<Function>(&GV)) {
1486     F->deleteBody();
1487     F->clearMetadata();
1488     F->setComdat(nullptr);
1489   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1490     V->setInitializer(nullptr);
1491     V->setLinkage(GlobalValue::ExternalLinkage);
1492     V->clearMetadata();
1493     V->setComdat(nullptr);
1494   } else {
1495     GlobalValue *NewGV;
1496     if (GV.getValueType()->isFunctionTy())
1497       NewGV =
1498           Function::Create(cast<FunctionType>(GV.getValueType()),
1499                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1500                            "", GV.getParent());
1501     else
1502       NewGV =
1503           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1504                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1505                              /*init*/ nullptr, "",
1506                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1507                              GV.getType()->getAddressSpace());
1508     NewGV->takeName(&GV);
1509     GV.replaceAllUsesWith(NewGV);
1510     return false;
1511   }
1512   if (!GV.isImplicitDSOLocal())
1513     GV.setDSOLocal(false);
1514   return true;
1515 }
1516 
1517 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1518                                    const GVSummaryMapTy &DefinedGlobals,
1519                                    bool PropagateAttrs) {
1520   DenseSet<Comdat *> NonPrevailingComdats;
1521   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1522     // See if the global summary analysis computed a new resolved linkage.
1523     const auto &GS = DefinedGlobals.find(GV.getGUID());
1524     if (GS == DefinedGlobals.end())
1525       return;
1526 
1527     if (Propagate)
1528       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1529         if (Function *F = dyn_cast<Function>(&GV)) {
1530           // TODO: propagate ReadNone and ReadOnly.
1531           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1532             F->setDoesNotAccessMemory();
1533 
1534           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1535             F->setOnlyReadsMemory();
1536 
1537           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1538             F->setDoesNotRecurse();
1539 
1540           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1541             F->setDoesNotThrow();
1542         }
1543       }
1544 
1545     auto NewLinkage = GS->second->linkage();
1546     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1547         // Don't internalize anything here, because the code below
1548         // lacks necessary correctness checks. Leave this job to
1549         // LLVM 'internalize' pass.
1550         GlobalValue::isLocalLinkage(NewLinkage) ||
1551         // In case it was dead and already converted to declaration.
1552         GV.isDeclaration())
1553       return;
1554 
1555     // Set the potentially more constraining visibility computed from summaries.
1556     // The DefaultVisibility condition is because older GlobalValueSummary does
1557     // not record DefaultVisibility and we don't want to change protected/hidden
1558     // to default.
1559     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1560       GV.setVisibility(GS->second->getVisibility());
1561 
1562     if (NewLinkage == GV.getLinkage())
1563       return;
1564 
1565     // Check for a non-prevailing def that has interposable linkage
1566     // (e.g. non-odr weak or linkonce). In that case we can't simply
1567     // convert to available_externally, since it would lose the
1568     // interposable property and possibly get inlined. Simply drop
1569     // the definition in that case.
1570     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1571         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1572       if (!convertToDeclaration(GV))
1573         // FIXME: Change this to collect replaced GVs and later erase
1574         // them from the parent module once thinLTOResolvePrevailingGUID is
1575         // changed to enable this for aliases.
1576         llvm_unreachable("Expected GV to be converted");
1577     } else {
1578       // If all copies of the original symbol had global unnamed addr and
1579       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1580       // and are constants, then it should be an auto hide symbol. In that case
1581       // the thin link would have marked it as CanAutoHide. Add hidden
1582       // visibility to the symbol to preserve the property.
1583       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1584           GS->second->canAutoHide()) {
1585         assert(GV.canBeOmittedFromSymbolTable());
1586         GV.setVisibility(GlobalValue::HiddenVisibility);
1587       }
1588 
1589       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1590                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1591                         << "\n");
1592       GV.setLinkage(NewLinkage);
1593     }
1594     // Remove declarations from comdats, including available_externally
1595     // as this is a declaration for the linker, and will be dropped eventually.
1596     // It is illegal for comdats to contain declarations.
1597     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1598     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1599       if (GO->getComdat()->getName() == GO->getName())
1600         NonPrevailingComdats.insert(GO->getComdat());
1601       GO->setComdat(nullptr);
1602     }
1603   };
1604 
1605   // Process functions and global now
1606   for (auto &GV : TheModule)
1607     FinalizeInModule(GV, PropagateAttrs);
1608   for (auto &GV : TheModule.globals())
1609     FinalizeInModule(GV);
1610   for (auto &GV : TheModule.aliases())
1611     FinalizeInModule(GV);
1612 
1613   // For a non-prevailing comdat, all its members must be available_externally.
1614   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1615   // local linkage GlobalValues.
1616   if (NonPrevailingComdats.empty())
1617     return;
1618   for (auto &GO : TheModule.global_objects()) {
1619     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1620       GO.setComdat(nullptr);
1621       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1622     }
1623   }
1624   bool Changed;
1625   do {
1626     Changed = false;
1627     // If an alias references a GlobalValue in a non-prevailing comdat, change
1628     // it to available_externally. For simplicity we only handle GlobalValue and
1629     // ConstantExpr with a base object. ConstantExpr without a base object is
1630     // unlikely used in a COMDAT.
1631     for (auto &GA : TheModule.aliases()) {
1632       if (GA.hasAvailableExternallyLinkage())
1633         continue;
1634       GlobalObject *Obj = GA.getAliaseeObject();
1635       assert(Obj && "aliasee without an base object is unimplemented");
1636       if (Obj->hasAvailableExternallyLinkage()) {
1637         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1638         Changed = true;
1639       }
1640     }
1641   } while (Changed);
1642 }
1643 
1644 /// Run internalization on \p TheModule based on symmary analysis.
1645 void llvm::thinLTOInternalizeModule(Module &TheModule,
1646                                     const GVSummaryMapTy &DefinedGlobals) {
1647   // Declare a callback for the internalize pass that will ask for every
1648   // candidate GlobalValue if it can be internalized or not.
1649   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1650     // It may be the case that GV is on a chain of an ifunc, its alias and
1651     // subsequent aliases. In this case, the summary for the value is not
1652     // available.
1653     if (isa<GlobalIFunc>(&GV) ||
1654         (isa<GlobalAlias>(&GV) &&
1655          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1656       return true;
1657 
1658     // Lookup the linkage recorded in the summaries during global analysis.
1659     auto GS = DefinedGlobals.find(GV.getGUID());
1660     if (GS == DefinedGlobals.end()) {
1661       // Must have been promoted (possibly conservatively). Find original
1662       // name so that we can access the correct summary and see if it can
1663       // be internalized again.
1664       // FIXME: Eventually we should control promotion instead of promoting
1665       // and internalizing again.
1666       StringRef OrigName =
1667           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1668       std::string OrigId = GlobalValue::getGlobalIdentifier(
1669           OrigName, GlobalValue::InternalLinkage,
1670           TheModule.getSourceFileName());
1671       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1672       if (GS == DefinedGlobals.end()) {
1673         // Also check the original non-promoted non-globalized name. In some
1674         // cases a preempted weak value is linked in as a local copy because
1675         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1676         // In that case, since it was originally not a local value, it was
1677         // recorded in the index using the original name.
1678         // FIXME: This may not be needed once PR27866 is fixed.
1679         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1680         assert(GS != DefinedGlobals.end());
1681       }
1682     }
1683     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1684   };
1685 
1686   // FIXME: See if we can just internalize directly here via linkage changes
1687   // based on the index, rather than invoking internalizeModule.
1688   internalizeModule(TheModule, MustPreserveGV);
1689 }
1690 
1691 /// Make alias a clone of its aliasee.
1692 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1693   Function *Fn = cast<Function>(GA->getAliaseeObject());
1694 
1695   ValueToValueMapTy VMap;
1696   Function *NewFn = CloneFunction(Fn, VMap);
1697   // Clone should use the original alias's linkage, visibility and name, and we
1698   // ensure all uses of alias instead use the new clone (casted if necessary).
1699   NewFn->setLinkage(GA->getLinkage());
1700   NewFn->setVisibility(GA->getVisibility());
1701   GA->replaceAllUsesWith(NewFn);
1702   NewFn->takeName(GA);
1703   return NewFn;
1704 }
1705 
1706 // Internalize values that we marked with specific attribute
1707 // in processGlobalForThinLTO.
1708 static void internalizeGVsAfterImport(Module &M) {
1709   for (auto &GV : M.globals())
1710     // Skip GVs which have been converted to declarations
1711     // by dropDeadSymbols.
1712     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1713       GV.setLinkage(GlobalValue::InternalLinkage);
1714       GV.setVisibility(GlobalValue::DefaultVisibility);
1715     }
1716 }
1717 
1718 // Automatically import functions in Module \p DestModule based on the summaries
1719 // index.
1720 Expected<bool> FunctionImporter::importFunctions(
1721     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1722   LLVM_DEBUG(dbgs() << "Starting import for Module "
1723                     << DestModule.getModuleIdentifier() << "\n");
1724   unsigned ImportedCount = 0, ImportedGVCount = 0;
1725 
1726   IRMover Mover(DestModule);
1727   // Do the actual import of functions now, one Module at a time
1728   std::set<StringRef> ModuleNameOrderedList;
1729   for (const auto &FunctionsToImportPerModule : ImportList) {
1730     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1731   }
1732 
1733   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1734                            GlobalValue::GUID GUID)
1735       -> std::optional<GlobalValueSummary::ImportKind> {
1736     auto Iter = GUIDToImportType.find(GUID);
1737     if (Iter == GUIDToImportType.end())
1738       return std::nullopt;
1739     return Iter->second;
1740   };
1741 
1742   for (const auto &Name : ModuleNameOrderedList) {
1743     // Get the module for the import
1744     const auto &FunctionsToImportPerModule = ImportList.find(Name);
1745     assert(FunctionsToImportPerModule != ImportList.end());
1746     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1747     if (!SrcModuleOrErr)
1748       return SrcModuleOrErr.takeError();
1749     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1750     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1751            "Context mismatch");
1752 
1753     // If modules were created with lazy metadata loading, materialize it
1754     // now, before linking it (otherwise this will be a noop).
1755     if (Error Err = SrcModule->materializeMetadata())
1756       return std::move(Err);
1757 
1758     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1759 
1760     // Find the globals to import
1761     SetVector<GlobalValue *> GlobalsToImport;
1762     for (Function &F : *SrcModule) {
1763       if (!F.hasName())
1764         continue;
1765       auto GUID = F.getGUID();
1766       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1767 
1768       bool ImportDefinition =
1769           (MaybeImportType &&
1770            (*MaybeImportType == GlobalValueSummary::Definition));
1771 
1772       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1773                         << " importing function"
1774                         << (ImportDefinition
1775                                 ? " definition "
1776                                 : (MaybeImportType ? " declaration " : " "))
1777                         << GUID << " " << F.getName() << " from "
1778                         << SrcModule->getSourceFileName() << "\n");
1779       if (ImportDefinition) {
1780         if (Error Err = F.materialize())
1781           return std::move(Err);
1782         // MemProf should match function's definition and summary,
1783         // 'thinlto_src_module' is needed.
1784         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1785           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1786           // statistics and debugging.
1787           F.setMetadata(
1788               "thinlto_src_module",
1789               MDNode::get(DestModule.getContext(),
1790                           {MDString::get(DestModule.getContext(),
1791                                          SrcModule->getModuleIdentifier())}));
1792           F.setMetadata(
1793               "thinlto_src_file",
1794               MDNode::get(DestModule.getContext(),
1795                           {MDString::get(DestModule.getContext(),
1796                                          SrcModule->getSourceFileName())}));
1797         }
1798         GlobalsToImport.insert(&F);
1799       }
1800     }
1801     for (GlobalVariable &GV : SrcModule->globals()) {
1802       if (!GV.hasName())
1803         continue;
1804       auto GUID = GV.getGUID();
1805       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1806 
1807       bool ImportDefinition =
1808           (MaybeImportType &&
1809            (*MaybeImportType == GlobalValueSummary::Definition));
1810 
1811       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1812                         << " importing global"
1813                         << (ImportDefinition
1814                                 ? " definition "
1815                                 : (MaybeImportType ? " declaration " : " "))
1816                         << GUID << " " << GV.getName() << " from "
1817                         << SrcModule->getSourceFileName() << "\n");
1818       if (ImportDefinition) {
1819         if (Error Err = GV.materialize())
1820           return std::move(Err);
1821         ImportedGVCount += GlobalsToImport.insert(&GV);
1822       }
1823     }
1824     for (GlobalAlias &GA : SrcModule->aliases()) {
1825       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1826         continue;
1827       auto GUID = GA.getGUID();
1828       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1829 
1830       bool ImportDefinition =
1831           (MaybeImportType &&
1832            (*MaybeImportType == GlobalValueSummary::Definition));
1833 
1834       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1835                         << " importing alias"
1836                         << (ImportDefinition
1837                                 ? " definition "
1838                                 : (MaybeImportType ? " declaration " : " "))
1839                         << GUID << " " << GA.getName() << " from "
1840                         << SrcModule->getSourceFileName() << "\n");
1841       if (ImportDefinition) {
1842         if (Error Err = GA.materialize())
1843           return std::move(Err);
1844         // Import alias as a copy of its aliasee.
1845         GlobalObject *GO = GA.getAliaseeObject();
1846         if (Error Err = GO->materialize())
1847           return std::move(Err);
1848         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1849         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1850                           << GO->getName() << " from "
1851                           << SrcModule->getSourceFileName() << "\n");
1852         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1853           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1854           // statistics and debugging.
1855           Fn->setMetadata(
1856               "thinlto_src_module",
1857               MDNode::get(DestModule.getContext(),
1858                           {MDString::get(DestModule.getContext(),
1859                                          SrcModule->getModuleIdentifier())}));
1860           Fn->setMetadata(
1861               "thinlto_src_file",
1862               MDNode::get(DestModule.getContext(),
1863                           {MDString::get(DestModule.getContext(),
1864                                          SrcModule->getSourceFileName())}));
1865         }
1866         GlobalsToImport.insert(Fn);
1867       }
1868     }
1869 
1870     // Upgrade debug info after we're done materializing all the globals and we
1871     // have loaded all the required metadata!
1872     UpgradeDebugInfo(*SrcModule);
1873 
1874     // Set the partial sample profile ratio in the profile summary module flag
1875     // of the imported source module, if applicable, so that the profile summary
1876     // module flag will match with that of the destination module when it's
1877     // imported.
1878     SrcModule->setPartialSampleProfileRatio(Index);
1879 
1880     // Link in the specified functions.
1881     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1882                                &GlobalsToImport))
1883       return true;
1884 
1885     if (PrintImports) {
1886       for (const auto *GV : GlobalsToImport)
1887         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1888                << " from " << SrcModule->getSourceFileName() << "\n";
1889     }
1890 
1891     if (Error Err = Mover.move(std::move(SrcModule),
1892                                GlobalsToImport.getArrayRef(), nullptr,
1893                                /*IsPerformingImport=*/true))
1894       return createStringError(errc::invalid_argument,
1895                                Twine("Function Import: link error: ") +
1896                                    toString(std::move(Err)));
1897 
1898     ImportedCount += GlobalsToImport.size();
1899     NumImportedModules++;
1900   }
1901 
1902   internalizeGVsAfterImport(DestModule);
1903 
1904   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1905   NumImportedGlobalVars += ImportedGVCount;
1906 
1907   // TODO: Print counters for definitions and declarations in the debugging log.
1908   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1909                     << " functions for Module "
1910                     << DestModule.getModuleIdentifier() << "\n");
1911   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1912                     << " global variables for Module "
1913                     << DestModule.getModuleIdentifier() << "\n");
1914   return ImportedCount;
1915 }
1916 
1917 static bool doImportingForModuleForTest(
1918     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1919                    isPrevailing) {
1920   if (SummaryFile.empty())
1921     report_fatal_error("error: -function-import requires -summary-file\n");
1922   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1923       getModuleSummaryIndexForFile(SummaryFile);
1924   if (!IndexPtrOrErr) {
1925     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1926                           "Error loading file '" + SummaryFile + "': ");
1927     return false;
1928   }
1929   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1930 
1931   // First step is collecting the import list.
1932   FunctionImporter::ImportMapTy ImportList;
1933   // If requested, simply import all functions in the index. This is used
1934   // when testing distributed backend handling via the opt tool, when
1935   // we have distributed indexes containing exactly the summaries to import.
1936   if (ImportAllIndex)
1937     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1938                                                       *Index, ImportList);
1939   else
1940     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1941                                              isPrevailing, *Index, ImportList);
1942 
1943   // Conservatively mark all internal values as promoted. This interface is
1944   // only used when doing importing via the function importing pass. The pass
1945   // is only enabled when testing importing via the 'opt' tool, which does
1946   // not do the ThinLink that would normally determine what values to promote.
1947   for (auto &I : *Index) {
1948     for (auto &S : I.second.SummaryList) {
1949       if (GlobalValue::isLocalLinkage(S->linkage()))
1950         S->setLinkage(GlobalValue::ExternalLinkage);
1951     }
1952   }
1953 
1954   // Next we need to promote to global scope and rename any local values that
1955   // are potentially exported to other modules.
1956   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
1957                              /*GlobalsToImport=*/nullptr)) {
1958     errs() << "Error renaming module\n";
1959     return true;
1960   }
1961 
1962   // Perform the import now.
1963   auto ModuleLoader = [&M](StringRef Identifier) {
1964     return loadFile(std::string(Identifier), M.getContext());
1965   };
1966   FunctionImporter Importer(*Index, ModuleLoader,
1967                             /*ClearDSOLocalOnDeclarations=*/false);
1968   Expected<bool> Result = Importer.importFunctions(M, ImportList);
1969 
1970   // FIXME: Probably need to propagate Errors through the pass manager.
1971   if (!Result) {
1972     logAllUnhandledErrors(Result.takeError(), errs(),
1973                           "Error importing module: ");
1974     return true;
1975   }
1976 
1977   return true;
1978 }
1979 
1980 PreservedAnalyses FunctionImportPass::run(Module &M,
1981                                           ModuleAnalysisManager &AM) {
1982   // This is only used for testing the function import pass via opt, where we
1983   // don't have prevailing information from the LTO context available, so just
1984   // conservatively assume everything is prevailing (which is fine for the very
1985   // limited use of prevailing checking in this pass).
1986   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
1987     return true;
1988   };
1989   if (!doImportingForModuleForTest(M, isPrevailing))
1990     return PreservedAnalyses::all();
1991 
1992   return PreservedAnalyses::none();
1993 }
1994