xref: /llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision ac1a1e5797388598201511d17f05aa088ef4a2e2)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalAlias.h"
25 #include "llvm/IR/GlobalObject.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/Metadata.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/ModuleSummaryIndex.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/Linker/IRMover.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module' and "
129              "'thinlto_src_file'"));
130 
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134     SummaryFile("summary-file",
135                 cl::desc("The summary file to use for function importing."));
136 
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140     ImportAllIndex("import-all-index",
141                    cl::desc("Import all external functions in index."));
142 
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150     "import-declaration", cl::init(false), cl::Hidden,
151     cl::desc("If true, import function declaration as fallback if the function "
152              "definition is not imported."));
153 
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163     "thinlto-workload-def",
164     cl::desc("Pass a workload definition. This is a file containing a JSON "
165              "dictionary. The keys are root functions, the values are lists of "
166              "functions to import in the module defining the root. It is "
167              "assumed -funique-internal-linkage-names was used, to ensure "
168              "local linkage functions have unique names. For example: \n"
169              "{\n"
170              "  \"rootFunction_1\": [\"function_to_import_1\", "
171              "\"function_to_import_2\"], \n"
172              "  \"rootFunction_2\": [\"function_to_import_3\", "
173              "\"function_to_import_4\"] \n"
174              "}"),
175     cl::Hidden);
176 
177 static cl::opt<bool> ImportAssumeUniqueLocal(
178     "import-assume-unique-local", cl::init(false),
179     cl::desc(
180         "By default, a local-linkage global variable won't be imported in the "
181         "edge mod1:func -> mod2:local-var (from value profiles) since compiler "
182         "cannot assume mod2 is compiled with full path which gives local-var a "
183         "program-wide unique GUID. Set this option to true will help cross "
184         "module import of such variables. This is only safe if the compiler "
185         "user specify the full module path."),
186     cl::Hidden);
187 
188 namespace llvm {
189 extern cl::opt<bool> EnableMemProfContextDisambiguation;
190 }
191 
192 // Load lazily a module from \p FileName in \p Context.
193 static std::unique_ptr<Module> loadFile(const std::string &FileName,
194                                         LLVMContext &Context) {
195   SMDiagnostic Err;
196   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
197   // Metadata isn't loaded until functions are imported, to minimize
198   // the memory overhead.
199   std::unique_ptr<Module> Result =
200       getLazyIRFileModule(FileName, Err, Context,
201                           /* ShouldLazyLoadMetadata = */ true);
202   if (!Result) {
203     Err.print("function-import", errs());
204     report_fatal_error("Abort");
205   }
206 
207   return Result;
208 }
209 
210 static bool shouldSkipLocalInAnotherModule(const GlobalVarSummary *RefSummary,
211                                            size_t NumDefs,
212                                            StringRef ImporterModule) {
213   // We can import a local from another module if all inputs are compiled
214   // with full paths or when there is one definition.
215   if (ImportAssumeUniqueLocal || NumDefs == 1)
216     return false;
217   // In other cases, make sure we import the copy in the caller's module if the
218   // referenced value has local linkage. The only time a local variable can
219   // share an entry in the index is if there is a local with the same name in
220   // another module that had the same source file name (in a different
221   // directory), where each was compiled in their own directory so there was not
222   // distinguishing path.
223   return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
224          RefSummary->modulePath() != ImporterModule;
225 }
226 
227 /// Given a list of possible callee implementation for a call site, qualify the
228 /// legality of importing each. The return is a range of pairs. Each pair
229 /// corresponds to a candidate. The first value is the ImportFailureReason for
230 /// that candidate, the second is the candidate.
231 static auto qualifyCalleeCandidates(
232     const ModuleSummaryIndex &Index,
233     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
234     StringRef CallerModulePath) {
235   return llvm::map_range(
236       CalleeSummaryList,
237       [&Index, CalleeSummaryList,
238        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
239           -> std::pair<FunctionImporter::ImportFailureReason,
240                        const GlobalValueSummary *> {
241         auto *GVSummary = SummaryPtr.get();
242         if (!Index.isGlobalValueLive(GVSummary))
243           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
244 
245         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
246           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
247                   GVSummary};
248 
249         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
250 
251         // Ignore any callees that aren't actually functions. This could happen
252         // in the case of GUID hash collisions. It could also happen in theory
253         // for SamplePGO profiles collected on old versions of the code after
254         // renaming, since we synthesize edges to any inlined callees appearing
255         // in the profile.
256         if (!Summary)
257           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
258 
259         // If this is a local function, make sure we import the copy in the
260         // caller's module. The only time a local function can share an entry in
261         // the index is if there is a local with the same name in another module
262         // that had the same source file name (in a different directory), where
263         // each was compiled in their own directory so there was not
264         // distinguishing path.
265         // If the local function is from another module, it must be a reference
266         // due to indirect call profile data since a function pointer can point
267         // to a local in another module. Do the import from another module if
268         // there is only one entry in the list or when all files in the program
269         // are compiled with full path - in both cases the local function has
270         // unique PGO name and GUID.
271         if (shouldSkipLocalInAnotherModule(dyn_cast<GlobalVarSummary>(Summary),
272                                            CalleeSummaryList.size(),
273                                            CallerModulePath))
274           return {
275               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
276               GVSummary};
277 
278         // Skip if it isn't legal to import (e.g. may reference unpromotable
279         // locals).
280         if (Summary->notEligibleToImport())
281           return {FunctionImporter::ImportFailureReason::NotEligible,
282                   GVSummary};
283 
284         return {FunctionImporter::ImportFailureReason::None, GVSummary};
285       });
286 }
287 
288 /// Given a list of possible callee implementation for a call site, select one
289 /// that fits the \p Threshold for function definition import. If none are
290 /// found, the Reason will give the last reason for the failure (last, in the
291 /// order of CalleeSummaryList entries). While looking for a callee definition,
292 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
293 /// candidate; other modules may want to know the function summary or
294 /// declaration even if a definition is not needed.
295 ///
296 /// FIXME: select "best" instead of first that fits. But what is "best"?
297 /// - The smallest: more likely to be inlined.
298 /// - The one with the least outgoing edges (already well optimized).
299 /// - One from a module already being imported from in order to reduce the
300 ///   number of source modules parsed/linked.
301 /// - One that has PGO data attached.
302 /// - [insert you fancy metric here]
303 static const GlobalValueSummary *
304 selectCallee(const ModuleSummaryIndex &Index,
305              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
306              unsigned Threshold, StringRef CallerModulePath,
307              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
308              FunctionImporter::ImportFailureReason &Reason) {
309   // Records the last summary with reason noinline or too-large.
310   TooLargeOrNoInlineSummary = nullptr;
311   auto QualifiedCandidates =
312       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
313   for (auto QualifiedValue : QualifiedCandidates) {
314     Reason = QualifiedValue.first;
315     // Skip a summary if its import is not (proved to be) legal.
316     if (Reason != FunctionImporter::ImportFailureReason::None)
317       continue;
318     auto *Summary =
319         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
320 
321     // Don't bother importing the definition if the chance of inlining it is
322     // not high enough (except under `--force-import-all`).
323     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
324         !ForceImportAll) {
325       TooLargeOrNoInlineSummary = Summary;
326       Reason = FunctionImporter::ImportFailureReason::TooLarge;
327       continue;
328     }
329 
330     // Don't bother importing the definition if we can't inline it anyway.
331     if (Summary->fflags().NoInline && !ForceImportAll) {
332       TooLargeOrNoInlineSummary = Summary;
333       Reason = FunctionImporter::ImportFailureReason::NoInline;
334       continue;
335     }
336 
337     return Summary;
338   }
339   return nullptr;
340 }
341 
342 namespace {
343 
344 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
345 
346 } // anonymous namespace
347 
348 /// Import globals referenced by a function or other globals that are being
349 /// imported, if importing such global is possible.
350 class GlobalsImporter final {
351   const ModuleSummaryIndex &Index;
352   const GVSummaryMapTy &DefinedGVSummaries;
353   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
354       IsPrevailing;
355   FunctionImporter::ImportMapTy &ImportList;
356   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
357 
358   bool shouldImportGlobal(const ValueInfo &VI) {
359     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
360     if (GVS == DefinedGVSummaries.end())
361       return true;
362     // We should not skip import if the module contains a non-prevailing
363     // definition with interposable linkage type. This is required for
364     // correctness in the situation where there is a prevailing def available
365     // for import and marked read-only. In this case, the non-prevailing def
366     // will be converted to a declaration, while the prevailing one becomes
367     // internal, thus no definitions will be available for linking. In order to
368     // prevent undefined symbol link error, the prevailing definition must be
369     // imported.
370     // FIXME: Consider adding a check that the suitable prevailing definition
371     // exists and marked read-only.
372     if (VI.getSummaryList().size() > 1 &&
373         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
374         !IsPrevailing(VI.getGUID(), GVS->second))
375       return true;
376 
377     return false;
378   }
379 
380   void
381   onImportingSummaryImpl(const GlobalValueSummary &Summary,
382                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
383     for (const auto &VI : Summary.refs()) {
384       if (!shouldImportGlobal(VI)) {
385         LLVM_DEBUG(
386             dbgs() << "Ref ignored! Target already in destination module.\n");
387         continue;
388       }
389 
390       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
391 
392       for (const auto &RefSummary : VI.getSummaryList()) {
393         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
394         // Functions could be referenced by global vars - e.g. a vtable; but we
395         // don't currently imagine a reason those would be imported here, rather
396         // than as part of the logic deciding which functions to import (i.e.
397         // based on profile information). Should we decide to handle them here,
398         // we can refactor accordingly at that time.
399         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
400             shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
401                                            Summary.modulePath()))
402           continue;
403 
404         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
405         // Otherwise, definition should take precedence over declaration.
406         auto [Iter, Inserted] =
407             ImportList[RefSummary->modulePath()].try_emplace(
408                 VI.getGUID(), GlobalValueSummary::Definition);
409         // Only update stat and exports if we haven't already imported this
410         // variable.
411         if (!Inserted) {
412           // Set the value to 'std::min(existing-value, new-value)' to make
413           // sure a definition takes precedence over a declaration.
414           Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
415           break;
416         }
417         NumImportedGlobalVarsThinLink++;
418         // Any references made by this variable will be marked exported
419         // later, in ComputeCrossModuleImport, after import decisions are
420         // complete, which is more efficient than adding them here.
421         if (ExportLists)
422           (*ExportLists)[RefSummary->modulePath()].insert(VI);
423 
424         // If variable is not writeonly we attempt to recursively analyze
425         // its references in order to import referenced constants.
426         if (!Index.isWriteOnly(GVS))
427           Worklist.emplace_back(GVS);
428         break;
429       }
430     }
431   }
432 
433 public:
434   GlobalsImporter(
435       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
436       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
437           IsPrevailing,
438       FunctionImporter::ImportMapTy &ImportList,
439       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
440       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
441         IsPrevailing(IsPrevailing), ImportList(ImportList),
442         ExportLists(ExportLists) {}
443 
444   void onImportingSummary(const GlobalValueSummary &Summary) {
445     SmallVector<const GlobalVarSummary *, 128> Worklist;
446     onImportingSummaryImpl(Summary, Worklist);
447     while (!Worklist.empty())
448       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
449   }
450 };
451 
452 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
453 
454 /// Determine the list of imports and exports for each module.
455 class ModuleImportsManager {
456 protected:
457   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
458       IsPrevailing;
459   const ModuleSummaryIndex &Index;
460   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
461 
462   ModuleImportsManager(
463       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
464           IsPrevailing,
465       const ModuleSummaryIndex &Index,
466       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
467       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
468 
469 public:
470   virtual ~ModuleImportsManager() = default;
471 
472   /// Given the list of globals defined in a module, compute the list of imports
473   /// as well as the list of "exports", i.e. the list of symbols referenced from
474   /// another module (that may require promotion).
475   virtual void
476   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
477                          StringRef ModName,
478                          FunctionImporter::ImportMapTy &ImportList);
479 
480   static std::unique_ptr<ModuleImportsManager>
481   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
482              IsPrevailing,
483          const ModuleSummaryIndex &Index,
484          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
485              nullptr);
486 };
487 
488 /// A ModuleImportsManager that operates based on a workload definition (see
489 /// -thinlto-workload-def). For modules that do not define workload roots, it
490 /// applies the base ModuleImportsManager import policy.
491 class WorkloadImportsManager : public ModuleImportsManager {
492   // Keep a module name -> value infos to import association. We use it to
493   // determine if a module's import list should be done by the base
494   // ModuleImportsManager or by us.
495   StringMap<DenseSet<ValueInfo>> Workloads;
496 
497   void
498   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
499                          StringRef ModName,
500                          FunctionImporter::ImportMapTy &ImportList) override {
501     auto SetIter = Workloads.find(ModName);
502     if (SetIter == Workloads.end()) {
503       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
504                         << " does not contain the root of any context.\n");
505       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
506                                                           ModName, ImportList);
507     }
508     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
509                       << " contains the root(s) of context(s).\n");
510 
511     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
512                         ExportLists);
513     auto &ValueInfos = SetIter->second;
514     SmallVector<EdgeInfo, 128> GlobWorklist;
515     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
516       auto It = DefinedGVSummaries.find(VI.getGUID());
517       if (It != DefinedGVSummaries.end() &&
518           IsPrevailing(VI.getGUID(), It->second)) {
519         LLVM_DEBUG(
520             dbgs() << "[Workload] " << VI.name()
521                    << " has the prevailing variant already in the module "
522                    << ModName << ". No need to import\n");
523         continue;
524       }
525       auto Candidates =
526           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
527 
528       const GlobalValueSummary *GVS = nullptr;
529       auto PotentialCandidates = llvm::map_range(
530           llvm::make_filter_range(
531               Candidates,
532               [&](const auto &Candidate) {
533                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
534                                   << " from " << Candidate.second->modulePath()
535                                   << " ImportFailureReason: "
536                                   << getFailureName(Candidate.first) << "\n");
537                 return Candidate.first ==
538                         FunctionImporter::ImportFailureReason::None;
539               }),
540           [](const auto &Candidate) { return Candidate.second; });
541       if (PotentialCandidates.empty()) {
542         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
543                           << " because can't find eligible Callee. Guid is: "
544                           << Function::getGUID(VI.name()) << "\n");
545         continue;
546       }
547       /// We will prefer importing the prevailing candidate, if not, we'll
548       /// still pick the first available candidate. The reason we want to make
549       /// sure we do import the prevailing candidate is because the goal of
550       /// workload-awareness is to enable optimizations specializing the call
551       /// graph of that workload. Suppose a function is already defined in the
552       /// module, but it's not the prevailing variant. Suppose also we do not
553       /// inline it (in fact, if it were interposable, we can't inline it),
554       /// but we could specialize it to the workload in other ways. However,
555       /// the linker would drop it in the favor of the prevailing copy.
556       /// Instead, by importing the prevailing variant (assuming also the use
557       /// of `-avail-extern-to-local`), we keep the specialization. We could
558       /// alteranatively make the non-prevailing variant local, but the
559       /// prevailing one is also the one for which we would have previously
560       /// collected profiles, making it preferrable.
561       auto PrevailingCandidates = llvm::make_filter_range(
562           PotentialCandidates, [&](const auto *Candidate) {
563             return IsPrevailing(VI.getGUID(), Candidate);
564           });
565       if (PrevailingCandidates.empty()) {
566         GVS = *PotentialCandidates.begin();
567         if (!llvm::hasSingleElement(PotentialCandidates) &&
568             GlobalValue::isLocalLinkage(GVS->linkage()))
569           LLVM_DEBUG(
570               dbgs()
571               << "[Workload] Found multiple non-prevailing candidates for "
572               << VI.name()
573               << ". This is unexpected. Are module paths passed to the "
574                  "compiler unique for the modules passed to the linker?");
575         // We could in theory have multiple (interposable) copies of a symbol
576         // when there is no prevailing candidate, if say the prevailing copy was
577         // in a native object being linked in. However, we should in theory be
578         // marking all of these non-prevailing IR copies dead in that case, in
579         // which case they won't be candidates.
580         assert(GVS->isLive());
581       } else {
582         assert(llvm::hasSingleElement(PrevailingCandidates));
583         GVS = *PrevailingCandidates.begin();
584       }
585 
586       auto ExportingModule = GVS->modulePath();
587       // We checked that for the prevailing case, but if we happen to have for
588       // example an internal that's defined in this module, it'd have no
589       // PrevailingCandidates.
590       if (ExportingModule == ModName) {
591         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
592                           << " because its defining module is the same as the "
593                              "current module\n");
594         continue;
595       }
596       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
597                         << ExportingModule << " : "
598                         << Function::getGUID(VI.name()) << "\n");
599       ImportList[ExportingModule][VI.getGUID()] =
600           GlobalValueSummary::Definition;
601       GVI.onImportingSummary(*GVS);
602       if (ExportLists)
603         (*ExportLists)[ExportingModule].insert(VI);
604     }
605     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
606   }
607 
608 public:
609   WorkloadImportsManager(
610       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
611           IsPrevailing,
612       const ModuleSummaryIndex &Index,
613       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
614       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
615     // Since the workload def uses names, we need a quick lookup
616     // name->ValueInfo.
617     StringMap<ValueInfo> NameToValueInfo;
618     StringSet<> AmbiguousNames;
619     for (auto &I : Index) {
620       ValueInfo VI = Index.getValueInfo(I);
621       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
622         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
623     }
624     auto DbgReportIfAmbiguous = [&](StringRef Name) {
625       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
626         dbgs() << "[Workload] Function name " << Name
627                << " present in the workload definition is ambiguous. Consider "
628                   "compiling with -funique-internal-linkage-names.";
629       });
630     };
631     std::error_code EC;
632     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
633     if (std::error_code EC = BufferOrErr.getError()) {
634       report_fatal_error("Failed to open context file");
635       return;
636     }
637     auto Buffer = std::move(BufferOrErr.get());
638     std::map<std::string, std::vector<std::string>> WorkloadDefs;
639     json::Path::Root NullRoot;
640     // The JSON is supposed to contain a dictionary matching the type of
641     // WorkloadDefs. For example:
642     // {
643     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
644     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
645     // }
646     auto Parsed = json::parse(Buffer->getBuffer());
647     if (!Parsed)
648       report_fatal_error(Parsed.takeError());
649     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
650       report_fatal_error("Invalid thinlto contextual profile format.");
651     for (const auto &Workload : WorkloadDefs) {
652       const auto &Root = Workload.first;
653       DbgReportIfAmbiguous(Root);
654       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
655       const auto &AllCallees = Workload.second;
656       auto RootIt = NameToValueInfo.find(Root);
657       if (RootIt == NameToValueInfo.end()) {
658         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
659                           << " not found in this linkage unit.\n");
660         continue;
661       }
662       auto RootVI = RootIt->second;
663       if (RootVI.getSummaryList().size() != 1) {
664         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
665                           << " should have exactly one summary, but has "
666                           << RootVI.getSummaryList().size() << ". Skipping.\n");
667         continue;
668       }
669       StringRef RootDefiningModule =
670           RootVI.getSummaryList().front()->modulePath();
671       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
672                         << " is : " << RootDefiningModule << "\n");
673       auto &Set = Workloads[RootDefiningModule];
674       for (const auto &Callee : AllCallees) {
675         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
676         DbgReportIfAmbiguous(Callee);
677         auto ElemIt = NameToValueInfo.find(Callee);
678         if (ElemIt == NameToValueInfo.end()) {
679           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
680           continue;
681         }
682         Set.insert(ElemIt->second);
683       }
684       LLVM_DEBUG({
685         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
686                << " distinct callees.\n";
687         for (const auto &VI : Set) {
688           dbgs() << "[Workload] Root: " << Root
689                  << " Would include: " << VI.getGUID() << "\n";
690         }
691       });
692     }
693   }
694 };
695 
696 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
697     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
698         IsPrevailing,
699     const ModuleSummaryIndex &Index,
700     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
701   if (WorkloadDefinitions.empty()) {
702     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
703     return std::unique_ptr<ModuleImportsManager>(
704         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
705   }
706   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
707   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
708                                                   ExportLists);
709 }
710 
711 static const char *
712 getFailureName(FunctionImporter::ImportFailureReason Reason) {
713   switch (Reason) {
714   case FunctionImporter::ImportFailureReason::None:
715     return "None";
716   case FunctionImporter::ImportFailureReason::GlobalVar:
717     return "GlobalVar";
718   case FunctionImporter::ImportFailureReason::NotLive:
719     return "NotLive";
720   case FunctionImporter::ImportFailureReason::TooLarge:
721     return "TooLarge";
722   case FunctionImporter::ImportFailureReason::InterposableLinkage:
723     return "InterposableLinkage";
724   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
725     return "LocalLinkageNotInModule";
726   case FunctionImporter::ImportFailureReason::NotEligible:
727     return "NotEligible";
728   case FunctionImporter::ImportFailureReason::NoInline:
729     return "NoInline";
730   }
731   llvm_unreachable("invalid reason");
732 }
733 
734 /// Compute the list of functions to import for a given caller. Mark these
735 /// imported functions and the symbols they reference in their source module as
736 /// exported from their source module.
737 static void computeImportForFunction(
738     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
739     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
740     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
741         isPrevailing,
742     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
743     FunctionImporter::ImportMapTy &ImportList,
744     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
745     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
746   GVImporter.onImportingSummary(Summary);
747   static int ImportCount = 0;
748   for (const auto &Edge : Summary.calls()) {
749     ValueInfo VI = Edge.first;
750     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
751                       << "\n");
752 
753     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
754       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
755                         << " reached.\n");
756       continue;
757     }
758 
759     if (DefinedGVSummaries.count(VI.getGUID())) {
760       // FIXME: Consider not skipping import if the module contains
761       // a non-prevailing def with interposable linkage. The prevailing copy
762       // can safely be imported (see shouldImportGlobal()).
763       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
764       continue;
765     }
766 
767     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
768       if (Hotness == CalleeInfo::HotnessType::Hot)
769         return ImportHotMultiplier;
770       if (Hotness == CalleeInfo::HotnessType::Cold)
771         return ImportColdMultiplier;
772       if (Hotness == CalleeInfo::HotnessType::Critical)
773         return ImportCriticalMultiplier;
774       return 1.0;
775     };
776 
777     const auto NewThreshold =
778         Threshold * GetBonusMultiplier(Edge.second.getHotness());
779 
780     auto IT = ImportThresholds.insert(std::make_pair(
781         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
782     bool PreviouslyVisited = !IT.second;
783     auto &ProcessedThreshold = std::get<0>(IT.first->second);
784     auto &CalleeSummary = std::get<1>(IT.first->second);
785     auto &FailureInfo = std::get<2>(IT.first->second);
786 
787     bool IsHotCallsite =
788         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
789     bool IsCriticalCallsite =
790         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
791 
792     const FunctionSummary *ResolvedCalleeSummary = nullptr;
793     if (CalleeSummary) {
794       assert(PreviouslyVisited);
795       // Since the traversal of the call graph is DFS, we can revisit a function
796       // a second time with a higher threshold. In this case, it is added back
797       // to the worklist with the new threshold (so that its own callee chains
798       // can be considered with the higher threshold).
799       if (NewThreshold <= ProcessedThreshold) {
800         LLVM_DEBUG(
801             dbgs() << "ignored! Target was already imported with Threshold "
802                    << ProcessedThreshold << "\n");
803         continue;
804       }
805       // Update with new larger threshold.
806       ProcessedThreshold = NewThreshold;
807       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
808     } else {
809       // If we already rejected importing a callee at the same or higher
810       // threshold, don't waste time calling selectCallee.
811       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
812         LLVM_DEBUG(
813             dbgs() << "ignored! Target was already rejected with Threshold "
814             << ProcessedThreshold << "\n");
815         if (PrintImportFailures) {
816           assert(FailureInfo &&
817                  "Expected FailureInfo for previously rejected candidate");
818           FailureInfo->Attempts++;
819         }
820         continue;
821       }
822 
823       FunctionImporter::ImportFailureReason Reason{};
824 
825       // `SummaryForDeclImport` is an summary eligible for declaration import.
826       const GlobalValueSummary *SummaryForDeclImport = nullptr;
827       CalleeSummary =
828           selectCallee(Index, VI.getSummaryList(), NewThreshold,
829                        Summary.modulePath(), SummaryForDeclImport, Reason);
830       if (!CalleeSummary) {
831         // There isn't a callee for definition import but one for declaration
832         // import.
833         if (ImportDeclaration && SummaryForDeclImport) {
834           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
835 
836           // Since definition takes precedence over declaration for the same VI,
837           // try emplace <VI, declaration> pair without checking insert result.
838           // If insert doesn't happen, there must be an existing entry keyed by
839           // VI. Note `ExportLists` only keeps track of exports due to imported
840           // definitions.
841           ImportList[DeclSourceModule].try_emplace(
842               VI.getGUID(), GlobalValueSummary::Declaration);
843         }
844         // Update with new larger threshold if this was a retry (otherwise
845         // we would have already inserted with NewThreshold above). Also
846         // update failure info if requested.
847         if (PreviouslyVisited) {
848           ProcessedThreshold = NewThreshold;
849           if (PrintImportFailures) {
850             assert(FailureInfo &&
851                    "Expected FailureInfo for previously rejected candidate");
852             FailureInfo->Reason = Reason;
853             FailureInfo->Attempts++;
854             FailureInfo->MaxHotness =
855                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
856           }
857         } else if (PrintImportFailures) {
858           assert(!FailureInfo &&
859                  "Expected no FailureInfo for newly rejected candidate");
860           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
861               VI, Edge.second.getHotness(), Reason, 1);
862         }
863         if (ForceImportAll) {
864           std::string Msg = std::string("Failed to import function ") +
865                             VI.name().str() + " due to " +
866                             getFailureName(Reason);
867           auto Error = make_error<StringError>(
868               Msg, make_error_code(errc::not_supported));
869           logAllUnhandledErrors(std::move(Error), errs(),
870                                 "Error importing module: ");
871           break;
872         } else {
873           LLVM_DEBUG(dbgs()
874                      << "ignored! No qualifying callee with summary found.\n");
875           continue;
876         }
877       }
878 
879       // "Resolve" the summary
880       CalleeSummary = CalleeSummary->getBaseObject();
881       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
882 
883       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
884               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
885              "selectCallee() didn't honor the threshold");
886 
887       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
888 
889       // Try emplace the definition entry, and update stats based on insertion
890       // status.
891       auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
892           VI.getGUID(), GlobalValueSummary::Definition);
893 
894       // We previously decided to import this GUID definition if it was already
895       // inserted in the set of imports from the exporting module.
896       if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
897         NumImportedFunctionsThinLink++;
898         if (IsHotCallsite)
899           NumImportedHotFunctionsThinLink++;
900         if (IsCriticalCallsite)
901           NumImportedCriticalFunctionsThinLink++;
902       }
903 
904       if (Iter->second == GlobalValueSummary::Declaration)
905         Iter->second = GlobalValueSummary::Definition;
906 
907       // Any calls/references made by this function will be marked exported
908       // later, in ComputeCrossModuleImport, after import decisions are
909       // complete, which is more efficient than adding them here.
910       if (ExportLists)
911         (*ExportLists)[ExportModulePath].insert(VI);
912     }
913 
914     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
915       // Adjust the threshold for next level of imported functions.
916       // The threshold is different for hot callsites because we can then
917       // inline chains of hot calls.
918       if (IsHotCallsite)
919         return Threshold * ImportHotInstrFactor;
920       return Threshold * ImportInstrFactor;
921     };
922 
923     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
924 
925     ImportCount++;
926 
927     // Insert the newly imported function to the worklist.
928     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
929   }
930 }
931 
932 void ModuleImportsManager::computeImportForModule(
933     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
934     FunctionImporter::ImportMapTy &ImportList) {
935   // Worklist contains the list of function imported in this module, for which
936   // we will analyse the callees and may import further down the callgraph.
937   SmallVector<EdgeInfo, 128> Worklist;
938   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
939                       ExportLists);
940   FunctionImporter::ImportThresholdsTy ImportThresholds;
941 
942   // Populate the worklist with the import for the functions in the current
943   // module
944   for (const auto &GVSummary : DefinedGVSummaries) {
945 #ifndef NDEBUG
946     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
947     // so this map look up (and possibly others) can be avoided.
948     auto VI = Index.getValueInfo(GVSummary.first);
949 #endif
950     if (!Index.isGlobalValueLive(GVSummary.second)) {
951       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
952       continue;
953     }
954     auto *FuncSummary =
955         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
956     if (!FuncSummary)
957       // Skip import for global variables
958       continue;
959     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
960     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
961                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
962                              ImportList, ExportLists, ImportThresholds);
963   }
964 
965   // Process the newly imported functions and add callees to the worklist.
966   while (!Worklist.empty()) {
967     auto GVInfo = Worklist.pop_back_val();
968     auto *Summary = std::get<0>(GVInfo);
969     auto Threshold = std::get<1>(GVInfo);
970 
971     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
972       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
973                                IsPrevailing, Worklist, GVI, ImportList,
974                                ExportLists, ImportThresholds);
975   }
976 
977   // Print stats about functions considered but rejected for importing
978   // when requested.
979   if (PrintImportFailures) {
980     dbgs() << "Missed imports into module " << ModName << "\n";
981     for (auto &I : ImportThresholds) {
982       auto &ProcessedThreshold = std::get<0>(I.second);
983       auto &CalleeSummary = std::get<1>(I.second);
984       auto &FailureInfo = std::get<2>(I.second);
985       if (CalleeSummary)
986         continue; // We are going to import.
987       assert(FailureInfo);
988       FunctionSummary *FS = nullptr;
989       if (!FailureInfo->VI.getSummaryList().empty())
990         FS = dyn_cast<FunctionSummary>(
991             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
992       dbgs() << FailureInfo->VI
993              << ": Reason = " << getFailureName(FailureInfo->Reason)
994              << ", Threshold = " << ProcessedThreshold
995              << ", Size = " << (FS ? (int)FS->instCount() : -1)
996              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
997              << ", Attempts = " << FailureInfo->Attempts << "\n";
998     }
999   }
1000 }
1001 
1002 #ifndef NDEBUG
1003 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
1004   auto SL = VI.getSummaryList();
1005   return SL.empty()
1006              ? false
1007              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
1008 }
1009 
1010 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
1011                                GlobalValue::GUID G) {
1012   if (const auto &VI = Index.getValueInfo(G))
1013     return isGlobalVarSummary(Index, VI);
1014   return false;
1015 }
1016 
1017 // Return the number of global variable summaries in ExportSet.
1018 static unsigned
1019 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1020                       FunctionImporter::ExportSetTy &ExportSet) {
1021   unsigned NumGVS = 0;
1022   for (auto &VI : ExportSet)
1023     if (isGlobalVarSummary(Index, VI.getGUID()))
1024       ++NumGVS;
1025   return NumGVS;
1026 }
1027 
1028 // Given ImportMap, return the number of global variable summaries and record
1029 // the number of defined function summaries as output parameter.
1030 static unsigned
1031 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1032                       FunctionImporter::FunctionsToImportTy &ImportMap,
1033                       unsigned &DefinedFS) {
1034   unsigned NumGVS = 0;
1035   DefinedFS = 0;
1036   for (auto &[GUID, Type] : ImportMap) {
1037     if (isGlobalVarSummary(Index, GUID))
1038       ++NumGVS;
1039     else if (Type == GlobalValueSummary::Definition)
1040       ++DefinedFS;
1041   }
1042   return NumGVS;
1043 }
1044 #endif
1045 
1046 #ifndef NDEBUG
1047 static bool checkVariableImport(
1048     const ModuleSummaryIndex &Index,
1049     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1050     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1051   DenseSet<GlobalValue::GUID> FlattenedImports;
1052 
1053   for (auto &ImportPerModule : ImportLists)
1054     for (auto &ExportPerModule : ImportPerModule.second)
1055       for (auto &[GUID, Type] : ExportPerModule.second)
1056         FlattenedImports.insert(GUID);
1057 
1058   // Checks that all GUIDs of read/writeonly vars we see in export lists
1059   // are also in the import lists. Otherwise we my face linker undefs,
1060   // because readonly and writeonly vars are internalized in their
1061   // source modules. The exception would be if it has a linkage type indicating
1062   // that there may have been a copy existing in the importing module (e.g.
1063   // linkonce_odr). In that case we cannot accurately do this checking.
1064   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1065                                                   const ValueInfo &VI) {
1066     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1067         Index.findSummaryInModule(VI, ModulePath));
1068     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1069            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1070              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1071              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1072   };
1073 
1074   for (auto &ExportPerModule : ExportLists)
1075     for (auto &VI : ExportPerModule.second)
1076       if (!FlattenedImports.count(VI.getGUID()) &&
1077           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1078         return false;
1079 
1080   return true;
1081 }
1082 #endif
1083 
1084 /// Compute all the import and export for every module using the Index.
1085 void llvm::ComputeCrossModuleImport(
1086     const ModuleSummaryIndex &Index,
1087     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1088     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1089         isPrevailing,
1090     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1091     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1092   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1093   // For each module that has function defined, compute the import/export lists.
1094   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1095     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1096     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1097                       << DefinedGVSummaries.first << "'\n");
1098     MIS->computeImportForModule(DefinedGVSummaries.second,
1099                                 DefinedGVSummaries.first, ImportList);
1100   }
1101 
1102   // When computing imports we only added the variables and functions being
1103   // imported to the export list. We also need to mark any references and calls
1104   // they make as exported as well. We do this here, as it is more efficient
1105   // since we may import the same values multiple times into different modules
1106   // during the import computation.
1107   for (auto &ELI : ExportLists) {
1108     // `NewExports` tracks the VI that gets exported because the full definition
1109     // of its user/referencer gets exported.
1110     FunctionImporter::ExportSetTy NewExports;
1111     const auto &DefinedGVSummaries =
1112         ModuleToDefinedGVSummaries.lookup(ELI.first);
1113     for (auto &EI : ELI.second) {
1114       // Find the copy defined in the exporting module so that we can mark the
1115       // values it references in that specific definition as exported.
1116       // Below we will add all references and called values, without regard to
1117       // whether they are also defined in this module. We subsequently prune the
1118       // list to only include those defined in the exporting module, see comment
1119       // there as to why.
1120       auto DS = DefinedGVSummaries.find(EI.getGUID());
1121       // Anything marked exported during the import computation must have been
1122       // defined in the exporting module.
1123       assert(DS != DefinedGVSummaries.end());
1124       auto *S = DS->getSecond();
1125       S = S->getBaseObject();
1126       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1127         // Export referenced functions and variables. We don't export/promote
1128         // objects referenced by writeonly variable initializer, because
1129         // we convert such variables initializers to "zeroinitializer".
1130         // See processGlobalForThinLTO.
1131         if (!Index.isWriteOnly(GVS))
1132           for (const auto &VI : GVS->refs())
1133             NewExports.insert(VI);
1134       } else {
1135         auto *FS = cast<FunctionSummary>(S);
1136         for (const auto &Edge : FS->calls())
1137           NewExports.insert(Edge.first);
1138         for (const auto &Ref : FS->refs())
1139           NewExports.insert(Ref);
1140       }
1141     }
1142     // Prune list computed above to only include values defined in the
1143     // exporting module. We do this after the above insertion since we may hit
1144     // the same ref/call target multiple times in above loop, and it is more
1145     // efficient to avoid a set lookup each time.
1146     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1147       if (!DefinedGVSummaries.count(EI->getGUID()))
1148         NewExports.erase(EI++);
1149       else
1150         ++EI;
1151     }
1152     ELI.second.insert(NewExports.begin(), NewExports.end());
1153   }
1154 
1155   assert(checkVariableImport(Index, ImportLists, ExportLists));
1156 #ifndef NDEBUG
1157   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1158                     << " modules:\n");
1159   for (auto &ModuleImports : ImportLists) {
1160     auto ModName = ModuleImports.first;
1161     auto &Exports = ExportLists[ModName];
1162     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1163     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1164                       << Exports.size() - NumGVS << " functions and " << NumGVS
1165                       << " vars. Imports from " << ModuleImports.second.size()
1166                       << " modules.\n");
1167     for (auto &Src : ModuleImports.second) {
1168       auto SrcModName = Src.first;
1169       unsigned DefinedFS = 0;
1170       unsigned NumGVSPerMod =
1171           numGlobalVarSummaries(Index, Src.second, DefinedFS);
1172       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1173                         << Src.second.size() - NumGVSPerMod - DefinedFS
1174                         << " function declarations imported from " << SrcModName
1175                         << "\n");
1176       LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1177                         << " global vars imported from " << SrcModName << "\n");
1178     }
1179   }
1180 #endif
1181 }
1182 
1183 #ifndef NDEBUG
1184 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1185                                     StringRef ModulePath,
1186                                     FunctionImporter::ImportMapTy &ImportList) {
1187   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1188                     << ImportList.size() << " modules.\n");
1189   for (auto &Src : ImportList) {
1190     auto SrcModName = Src.first;
1191     unsigned DefinedFS = 0;
1192     unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
1193     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1194                       << Src.second.size() - DefinedFS - NumGVSPerMod
1195                       << " function declarations imported from " << SrcModName
1196                       << "\n");
1197     LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1198                       << SrcModName << "\n");
1199   }
1200 }
1201 #endif
1202 
1203 /// Compute all the imports for the given module using the Index.
1204 ///
1205 /// \p isPrevailing is a callback that will be called with a global value's GUID
1206 /// and summary and should return whether the module corresponding to the
1207 /// summary contains the linker-prevailing copy of that value.
1208 ///
1209 /// \p ImportList will be populated with a map that can be passed to
1210 /// FunctionImporter::importFunctions() above (see description there).
1211 static void ComputeCrossModuleImportForModuleForTest(
1212     StringRef ModulePath,
1213     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1214         isPrevailing,
1215     const ModuleSummaryIndex &Index,
1216     FunctionImporter::ImportMapTy &ImportList) {
1217   // Collect the list of functions this module defines.
1218   // GUID -> Summary
1219   GVSummaryMapTy FunctionSummaryMap;
1220   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1221 
1222   // Compute the import list for this module.
1223   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1224   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1225   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1226 
1227 #ifndef NDEBUG
1228   dumpImportListForModule(Index, ModulePath, ImportList);
1229 #endif
1230 }
1231 
1232 /// Mark all external summaries in \p Index for import into the given module.
1233 /// Used for testing the case of distributed builds using a distributed index.
1234 ///
1235 /// \p ImportList will be populated with a map that can be passed to
1236 /// FunctionImporter::importFunctions() above (see description there).
1237 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1238     StringRef ModulePath, const ModuleSummaryIndex &Index,
1239     FunctionImporter::ImportMapTy &ImportList) {
1240   for (const auto &GlobalList : Index) {
1241     // Ignore entries for undefined references.
1242     if (GlobalList.second.SummaryList.empty())
1243       continue;
1244 
1245     auto GUID = GlobalList.first;
1246     assert(GlobalList.second.SummaryList.size() == 1 &&
1247            "Expected individual combined index to have one summary per GUID");
1248     auto &Summary = GlobalList.second.SummaryList[0];
1249     // Skip the summaries for the importing module. These are included to
1250     // e.g. record required linkage changes.
1251     if (Summary->modulePath() == ModulePath)
1252       continue;
1253     // Add an entry to provoke importing by thinBackend.
1254     auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
1255         GUID, Summary->importType());
1256     if (!Inserted) {
1257       // Use 'std::min' to make sure definition (with enum value 0) takes
1258       // precedence over declaration (with enum value 1).
1259       Iter->second = std::min(Iter->second, Summary->importType());
1260     }
1261   }
1262 #ifndef NDEBUG
1263   dumpImportListForModule(Index, ModulePath, ImportList);
1264 #endif
1265 }
1266 
1267 // For SamplePGO, the indirect call targets for local functions will
1268 // have its original name annotated in profile. We try to find the
1269 // corresponding PGOFuncName as the GUID, and fix up the edges
1270 // accordingly.
1271 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1272                                      FunctionSummary *FS) {
1273   for (auto &EI : FS->mutableCalls()) {
1274     if (!EI.first.getSummaryList().empty())
1275       continue;
1276     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1277     if (GUID == 0)
1278       continue;
1279     // Update the edge to point directly to the correct GUID.
1280     auto VI = Index.getValueInfo(GUID);
1281     if (llvm::any_of(
1282             VI.getSummaryList(),
1283             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1284               // The mapping from OriginalId to GUID may return a GUID
1285               // that corresponds to a static variable. Filter it out here.
1286               // This can happen when
1287               // 1) There is a call to a library function which is not defined
1288               // in the index.
1289               // 2) There is a static variable with the  OriginalGUID identical
1290               // to the GUID of the library function in 1);
1291               // When this happens the static variable in 2) will be found,
1292               // which needs to be filtered out.
1293               return SummaryPtr->getSummaryKind() ==
1294                      GlobalValueSummary::GlobalVarKind;
1295             }))
1296       continue;
1297     EI.first = VI;
1298   }
1299 }
1300 
1301 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1302   for (const auto &Entry : Index) {
1303     for (const auto &S : Entry.second.SummaryList) {
1304       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1305         updateValueInfoForIndirectCalls(Index, FS);
1306     }
1307   }
1308 }
1309 
1310 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1311     ModuleSummaryIndex &Index,
1312     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1313     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1314   assert(!Index.withGlobalValueDeadStripping());
1315   if (!ComputeDead ||
1316       // Don't do anything when nothing is live, this is friendly with tests.
1317       GUIDPreservedSymbols.empty()) {
1318     // Still need to update indirect calls.
1319     updateIndirectCalls(Index);
1320     return;
1321   }
1322   unsigned LiveSymbols = 0;
1323   SmallVector<ValueInfo, 128> Worklist;
1324   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1325   for (auto GUID : GUIDPreservedSymbols) {
1326     ValueInfo VI = Index.getValueInfo(GUID);
1327     if (!VI)
1328       continue;
1329     for (const auto &S : VI.getSummaryList())
1330       S->setLive(true);
1331   }
1332 
1333   // Add values flagged in the index as live roots to the worklist.
1334   for (const auto &Entry : Index) {
1335     auto VI = Index.getValueInfo(Entry);
1336     for (const auto &S : Entry.second.SummaryList) {
1337       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1338         updateValueInfoForIndirectCalls(Index, FS);
1339       if (S->isLive()) {
1340         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1341         Worklist.push_back(VI);
1342         ++LiveSymbols;
1343         break;
1344       }
1345     }
1346   }
1347 
1348   // Make value live and add it to the worklist if it was not live before.
1349   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1350     // FIXME: If we knew which edges were created for indirect call profiles,
1351     // we could skip them here. Any that are live should be reached via
1352     // other edges, e.g. reference edges. Otherwise, using a profile collected
1353     // on a slightly different binary might provoke preserving, importing
1354     // and ultimately promoting calls to functions not linked into this
1355     // binary, which increases the binary size unnecessarily. Note that
1356     // if this code changes, the importer needs to change so that edges
1357     // to functions marked dead are skipped.
1358 
1359     if (llvm::any_of(VI.getSummaryList(),
1360                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1361                        return S->isLive();
1362                      }))
1363       return;
1364 
1365     // We only keep live symbols that are known to be non-prevailing if any are
1366     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1367     // later in the EliminateAvailableExternally pass and setting them to
1368     // not-live could break downstreams users of liveness information (PR36483)
1369     // or limit optimization opportunities.
1370     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1371       bool KeepAliveLinkage = false;
1372       bool Interposable = false;
1373       for (const auto &S : VI.getSummaryList()) {
1374         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1375             S->linkage() == GlobalValue::WeakODRLinkage ||
1376             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1377           KeepAliveLinkage = true;
1378         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1379           Interposable = true;
1380       }
1381 
1382       if (!IsAliasee) {
1383         if (!KeepAliveLinkage)
1384           return;
1385 
1386         if (Interposable)
1387           report_fatal_error(
1388               "Interposable and available_externally/linkonce_odr/weak_odr "
1389               "symbol");
1390       }
1391     }
1392 
1393     for (const auto &S : VI.getSummaryList())
1394       S->setLive(true);
1395     ++LiveSymbols;
1396     Worklist.push_back(VI);
1397   };
1398 
1399   while (!Worklist.empty()) {
1400     auto VI = Worklist.pop_back_val();
1401     for (const auto &Summary : VI.getSummaryList()) {
1402       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1403         // If this is an alias, visit the aliasee VI to ensure that all copies
1404         // are marked live and it is added to the worklist for further
1405         // processing of its references.
1406         visit(AS->getAliaseeVI(), true);
1407         continue;
1408       }
1409       for (auto Ref : Summary->refs())
1410         visit(Ref, false);
1411       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1412         for (auto Call : FS->calls())
1413           visit(Call.first, false);
1414     }
1415   }
1416   Index.setWithGlobalValueDeadStripping();
1417 
1418   unsigned DeadSymbols = Index.size() - LiveSymbols;
1419   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1420                     << " symbols Dead \n");
1421   NumDeadSymbols += DeadSymbols;
1422   NumLiveSymbols += LiveSymbols;
1423 }
1424 
1425 // Compute dead symbols and propagate constants in combined index.
1426 void llvm::computeDeadSymbolsWithConstProp(
1427     ModuleSummaryIndex &Index,
1428     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1429     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1430     bool ImportEnabled) {
1431   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1432                                            isPrevailing);
1433   if (ImportEnabled)
1434     Index.propagateAttributes(GUIDPreservedSymbols);
1435 }
1436 
1437 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1438 /// \p ModulePath.
1439 void llvm::gatherImportedSummariesForModule(
1440     StringRef ModulePath,
1441     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1442     const FunctionImporter::ImportMapTy &ImportList,
1443     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
1444     GVSummaryPtrSet &DecSummaries) {
1445   // Include all summaries from the importing module.
1446   ModuleToSummariesForIndex[std::string(ModulePath)] =
1447       ModuleToDefinedGVSummaries.lookup(ModulePath);
1448   // Include summaries for imports.
1449   for (const auto &ILI : ImportList) {
1450     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1451 
1452     const auto &DefinedGVSummaries =
1453         ModuleToDefinedGVSummaries.lookup(ILI.first);
1454     for (const auto &[GUID, Type] : ILI.second) {
1455       const auto &DS = DefinedGVSummaries.find(GUID);
1456       assert(DS != DefinedGVSummaries.end() &&
1457              "Expected a defined summary for imported global value");
1458       if (Type == GlobalValueSummary::Declaration)
1459         DecSummaries.insert(DS->second);
1460 
1461       SummariesForIndex[GUID] = DS->second;
1462     }
1463   }
1464 }
1465 
1466 /// Emit the files \p ModulePath will import from into \p OutputFilename.
1467 std::error_code llvm::EmitImportsFiles(
1468     StringRef ModulePath, StringRef OutputFilename,
1469     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1470   std::error_code EC;
1471   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1472   if (EC)
1473     return EC;
1474   for (const auto &ILI : ModuleToSummariesForIndex)
1475     // The ModuleToSummariesForIndex map includes an entry for the current
1476     // Module (needed for writing out the index files). We don't want to
1477     // include it in the imports file, however, so filter it out.
1478     if (ILI.first != ModulePath)
1479       ImportsOS << ILI.first << "\n";
1480   return std::error_code();
1481 }
1482 
1483 bool llvm::convertToDeclaration(GlobalValue &GV) {
1484   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1485                     << "\n");
1486   if (Function *F = dyn_cast<Function>(&GV)) {
1487     F->deleteBody();
1488     F->clearMetadata();
1489     F->setComdat(nullptr);
1490   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1491     V->setInitializer(nullptr);
1492     V->setLinkage(GlobalValue::ExternalLinkage);
1493     V->clearMetadata();
1494     V->setComdat(nullptr);
1495   } else {
1496     GlobalValue *NewGV;
1497     if (GV.getValueType()->isFunctionTy())
1498       NewGV =
1499           Function::Create(cast<FunctionType>(GV.getValueType()),
1500                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1501                            "", GV.getParent());
1502     else
1503       NewGV =
1504           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1505                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1506                              /*init*/ nullptr, "",
1507                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1508                              GV.getType()->getAddressSpace());
1509     NewGV->takeName(&GV);
1510     GV.replaceAllUsesWith(NewGV);
1511     return false;
1512   }
1513   if (!GV.isImplicitDSOLocal())
1514     GV.setDSOLocal(false);
1515   return true;
1516 }
1517 
1518 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1519                                    const GVSummaryMapTy &DefinedGlobals,
1520                                    bool PropagateAttrs) {
1521   DenseSet<Comdat *> NonPrevailingComdats;
1522   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1523     // See if the global summary analysis computed a new resolved linkage.
1524     const auto &GS = DefinedGlobals.find(GV.getGUID());
1525     if (GS == DefinedGlobals.end())
1526       return;
1527 
1528     if (Propagate)
1529       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1530         if (Function *F = dyn_cast<Function>(&GV)) {
1531           // TODO: propagate ReadNone and ReadOnly.
1532           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1533             F->setDoesNotAccessMemory();
1534 
1535           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1536             F->setOnlyReadsMemory();
1537 
1538           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1539             F->setDoesNotRecurse();
1540 
1541           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1542             F->setDoesNotThrow();
1543         }
1544       }
1545 
1546     auto NewLinkage = GS->second->linkage();
1547     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1548         // Don't internalize anything here, because the code below
1549         // lacks necessary correctness checks. Leave this job to
1550         // LLVM 'internalize' pass.
1551         GlobalValue::isLocalLinkage(NewLinkage) ||
1552         // In case it was dead and already converted to declaration.
1553         GV.isDeclaration())
1554       return;
1555 
1556     // Set the potentially more constraining visibility computed from summaries.
1557     // The DefaultVisibility condition is because older GlobalValueSummary does
1558     // not record DefaultVisibility and we don't want to change protected/hidden
1559     // to default.
1560     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1561       GV.setVisibility(GS->second->getVisibility());
1562 
1563     if (NewLinkage == GV.getLinkage())
1564       return;
1565 
1566     // Check for a non-prevailing def that has interposable linkage
1567     // (e.g. non-odr weak or linkonce). In that case we can't simply
1568     // convert to available_externally, since it would lose the
1569     // interposable property and possibly get inlined. Simply drop
1570     // the definition in that case.
1571     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1572         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1573       if (!convertToDeclaration(GV))
1574         // FIXME: Change this to collect replaced GVs and later erase
1575         // them from the parent module once thinLTOResolvePrevailingGUID is
1576         // changed to enable this for aliases.
1577         llvm_unreachable("Expected GV to be converted");
1578     } else {
1579       // If all copies of the original symbol had global unnamed addr and
1580       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1581       // and are constants, then it should be an auto hide symbol. In that case
1582       // the thin link would have marked it as CanAutoHide. Add hidden
1583       // visibility to the symbol to preserve the property.
1584       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1585           GS->second->canAutoHide()) {
1586         assert(GV.canBeOmittedFromSymbolTable());
1587         GV.setVisibility(GlobalValue::HiddenVisibility);
1588       }
1589 
1590       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1591                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1592                         << "\n");
1593       GV.setLinkage(NewLinkage);
1594     }
1595     // Remove declarations from comdats, including available_externally
1596     // as this is a declaration for the linker, and will be dropped eventually.
1597     // It is illegal for comdats to contain declarations.
1598     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1599     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1600       if (GO->getComdat()->getName() == GO->getName())
1601         NonPrevailingComdats.insert(GO->getComdat());
1602       GO->setComdat(nullptr);
1603     }
1604   };
1605 
1606   // Process functions and global now
1607   for (auto &GV : TheModule)
1608     FinalizeInModule(GV, PropagateAttrs);
1609   for (auto &GV : TheModule.globals())
1610     FinalizeInModule(GV);
1611   for (auto &GV : TheModule.aliases())
1612     FinalizeInModule(GV);
1613 
1614   // For a non-prevailing comdat, all its members must be available_externally.
1615   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1616   // local linkage GlobalValues.
1617   if (NonPrevailingComdats.empty())
1618     return;
1619   for (auto &GO : TheModule.global_objects()) {
1620     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1621       GO.setComdat(nullptr);
1622       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1623     }
1624   }
1625   bool Changed;
1626   do {
1627     Changed = false;
1628     // If an alias references a GlobalValue in a non-prevailing comdat, change
1629     // it to available_externally. For simplicity we only handle GlobalValue and
1630     // ConstantExpr with a base object. ConstantExpr without a base object is
1631     // unlikely used in a COMDAT.
1632     for (auto &GA : TheModule.aliases()) {
1633       if (GA.hasAvailableExternallyLinkage())
1634         continue;
1635       GlobalObject *Obj = GA.getAliaseeObject();
1636       assert(Obj && "aliasee without an base object is unimplemented");
1637       if (Obj->hasAvailableExternallyLinkage()) {
1638         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1639         Changed = true;
1640       }
1641     }
1642   } while (Changed);
1643 }
1644 
1645 /// Run internalization on \p TheModule based on symmary analysis.
1646 void llvm::thinLTOInternalizeModule(Module &TheModule,
1647                                     const GVSummaryMapTy &DefinedGlobals) {
1648   // Declare a callback for the internalize pass that will ask for every
1649   // candidate GlobalValue if it can be internalized or not.
1650   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1651     // It may be the case that GV is on a chain of an ifunc, its alias and
1652     // subsequent aliases. In this case, the summary for the value is not
1653     // available.
1654     if (isa<GlobalIFunc>(&GV) ||
1655         (isa<GlobalAlias>(&GV) &&
1656          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1657       return true;
1658 
1659     // Lookup the linkage recorded in the summaries during global analysis.
1660     auto GS = DefinedGlobals.find(GV.getGUID());
1661     if (GS == DefinedGlobals.end()) {
1662       // Must have been promoted (possibly conservatively). Find original
1663       // name so that we can access the correct summary and see if it can
1664       // be internalized again.
1665       // FIXME: Eventually we should control promotion instead of promoting
1666       // and internalizing again.
1667       StringRef OrigName =
1668           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1669       std::string OrigId = GlobalValue::getGlobalIdentifier(
1670           OrigName, GlobalValue::InternalLinkage,
1671           TheModule.getSourceFileName());
1672       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1673       if (GS == DefinedGlobals.end()) {
1674         // Also check the original non-promoted non-globalized name. In some
1675         // cases a preempted weak value is linked in as a local copy because
1676         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1677         // In that case, since it was originally not a local value, it was
1678         // recorded in the index using the original name.
1679         // FIXME: This may not be needed once PR27866 is fixed.
1680         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1681         assert(GS != DefinedGlobals.end());
1682       }
1683     }
1684     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1685   };
1686 
1687   // FIXME: See if we can just internalize directly here via linkage changes
1688   // based on the index, rather than invoking internalizeModule.
1689   internalizeModule(TheModule, MustPreserveGV);
1690 }
1691 
1692 /// Make alias a clone of its aliasee.
1693 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1694   Function *Fn = cast<Function>(GA->getAliaseeObject());
1695 
1696   ValueToValueMapTy VMap;
1697   Function *NewFn = CloneFunction(Fn, VMap);
1698   // Clone should use the original alias's linkage, visibility and name, and we
1699   // ensure all uses of alias instead use the new clone (casted if necessary).
1700   NewFn->setLinkage(GA->getLinkage());
1701   NewFn->setVisibility(GA->getVisibility());
1702   GA->replaceAllUsesWith(NewFn);
1703   NewFn->takeName(GA);
1704   return NewFn;
1705 }
1706 
1707 // Internalize values that we marked with specific attribute
1708 // in processGlobalForThinLTO.
1709 static void internalizeGVsAfterImport(Module &M) {
1710   for (auto &GV : M.globals())
1711     // Skip GVs which have been converted to declarations
1712     // by dropDeadSymbols.
1713     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1714       GV.setLinkage(GlobalValue::InternalLinkage);
1715       GV.setVisibility(GlobalValue::DefaultVisibility);
1716     }
1717 }
1718 
1719 // Automatically import functions in Module \p DestModule based on the summaries
1720 // index.
1721 Expected<bool> FunctionImporter::importFunctions(
1722     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1723   LLVM_DEBUG(dbgs() << "Starting import for Module "
1724                     << DestModule.getModuleIdentifier() << "\n");
1725   unsigned ImportedCount = 0, ImportedGVCount = 0;
1726 
1727   IRMover Mover(DestModule);
1728   // Do the actual import of functions now, one Module at a time
1729   std::set<StringRef> ModuleNameOrderedList;
1730   for (const auto &FunctionsToImportPerModule : ImportList) {
1731     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1732   }
1733 
1734   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1735                            GlobalValue::GUID GUID)
1736       -> std::optional<GlobalValueSummary::ImportKind> {
1737     auto Iter = GUIDToImportType.find(GUID);
1738     if (Iter == GUIDToImportType.end())
1739       return std::nullopt;
1740     return Iter->second;
1741   };
1742 
1743   for (const auto &Name : ModuleNameOrderedList) {
1744     // Get the module for the import
1745     const auto &FunctionsToImportPerModule = ImportList.find(Name);
1746     assert(FunctionsToImportPerModule != ImportList.end());
1747     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1748     if (!SrcModuleOrErr)
1749       return SrcModuleOrErr.takeError();
1750     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1751     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1752            "Context mismatch");
1753 
1754     // If modules were created with lazy metadata loading, materialize it
1755     // now, before linking it (otherwise this will be a noop).
1756     if (Error Err = SrcModule->materializeMetadata())
1757       return std::move(Err);
1758 
1759     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1760 
1761     // Find the globals to import
1762     SetVector<GlobalValue *> GlobalsToImport;
1763     for (Function &F : *SrcModule) {
1764       if (!F.hasName())
1765         continue;
1766       auto GUID = F.getGUID();
1767       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1768 
1769       bool ImportDefinition =
1770           (MaybeImportType &&
1771            (*MaybeImportType == GlobalValueSummary::Definition));
1772 
1773       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1774                         << " importing function"
1775                         << (ImportDefinition
1776                                 ? " definition "
1777                                 : (MaybeImportType ? " declaration " : " "))
1778                         << GUID << " " << F.getName() << " from "
1779                         << SrcModule->getSourceFileName() << "\n");
1780       if (ImportDefinition) {
1781         if (Error Err = F.materialize())
1782           return std::move(Err);
1783         // MemProf should match function's definition and summary,
1784         // 'thinlto_src_module' is needed.
1785         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1786           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1787           // statistics and debugging.
1788           F.setMetadata(
1789               "thinlto_src_module",
1790               MDNode::get(DestModule.getContext(),
1791                           {MDString::get(DestModule.getContext(),
1792                                          SrcModule->getModuleIdentifier())}));
1793           F.setMetadata(
1794               "thinlto_src_file",
1795               MDNode::get(DestModule.getContext(),
1796                           {MDString::get(DestModule.getContext(),
1797                                          SrcModule->getSourceFileName())}));
1798         }
1799         GlobalsToImport.insert(&F);
1800       }
1801     }
1802     for (GlobalVariable &GV : SrcModule->globals()) {
1803       if (!GV.hasName())
1804         continue;
1805       auto GUID = GV.getGUID();
1806       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1807 
1808       bool ImportDefinition =
1809           (MaybeImportType &&
1810            (*MaybeImportType == GlobalValueSummary::Definition));
1811 
1812       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1813                         << " importing global"
1814                         << (ImportDefinition
1815                                 ? " definition "
1816                                 : (MaybeImportType ? " declaration " : " "))
1817                         << GUID << " " << GV.getName() << " from "
1818                         << SrcModule->getSourceFileName() << "\n");
1819       if (ImportDefinition) {
1820         if (Error Err = GV.materialize())
1821           return std::move(Err);
1822         ImportedGVCount += GlobalsToImport.insert(&GV);
1823       }
1824     }
1825     for (GlobalAlias &GA : SrcModule->aliases()) {
1826       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1827         continue;
1828       auto GUID = GA.getGUID();
1829       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1830 
1831       bool ImportDefinition =
1832           (MaybeImportType &&
1833            (*MaybeImportType == GlobalValueSummary::Definition));
1834 
1835       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1836                         << " importing alias"
1837                         << (ImportDefinition
1838                                 ? " definition "
1839                                 : (MaybeImportType ? " declaration " : " "))
1840                         << GUID << " " << GA.getName() << " from "
1841                         << SrcModule->getSourceFileName() << "\n");
1842       if (ImportDefinition) {
1843         if (Error Err = GA.materialize())
1844           return std::move(Err);
1845         // Import alias as a copy of its aliasee.
1846         GlobalObject *GO = GA.getAliaseeObject();
1847         if (Error Err = GO->materialize())
1848           return std::move(Err);
1849         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1850         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1851                           << GO->getName() << " from "
1852                           << SrcModule->getSourceFileName() << "\n");
1853         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1854           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1855           // statistics and debugging.
1856           Fn->setMetadata(
1857               "thinlto_src_module",
1858               MDNode::get(DestModule.getContext(),
1859                           {MDString::get(DestModule.getContext(),
1860                                          SrcModule->getModuleIdentifier())}));
1861           Fn->setMetadata(
1862               "thinlto_src_file",
1863               MDNode::get(DestModule.getContext(),
1864                           {MDString::get(DestModule.getContext(),
1865                                          SrcModule->getSourceFileName())}));
1866         }
1867         GlobalsToImport.insert(Fn);
1868       }
1869     }
1870 
1871     // Upgrade debug info after we're done materializing all the globals and we
1872     // have loaded all the required metadata!
1873     UpgradeDebugInfo(*SrcModule);
1874 
1875     // Set the partial sample profile ratio in the profile summary module flag
1876     // of the imported source module, if applicable, so that the profile summary
1877     // module flag will match with that of the destination module when it's
1878     // imported.
1879     SrcModule->setPartialSampleProfileRatio(Index);
1880 
1881     // Link in the specified functions.
1882     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1883                                &GlobalsToImport))
1884       return true;
1885 
1886     if (PrintImports) {
1887       for (const auto *GV : GlobalsToImport)
1888         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1889                << " from " << SrcModule->getSourceFileName() << "\n";
1890     }
1891 
1892     if (Error Err = Mover.move(std::move(SrcModule),
1893                                GlobalsToImport.getArrayRef(), nullptr,
1894                                /*IsPerformingImport=*/true))
1895       return createStringError(errc::invalid_argument,
1896                                Twine("Function Import: link error: ") +
1897                                    toString(std::move(Err)));
1898 
1899     ImportedCount += GlobalsToImport.size();
1900     NumImportedModules++;
1901   }
1902 
1903   internalizeGVsAfterImport(DestModule);
1904 
1905   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1906   NumImportedGlobalVars += ImportedGVCount;
1907 
1908   // TODO: Print counters for definitions and declarations in the debugging log.
1909   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1910                     << " functions for Module "
1911                     << DestModule.getModuleIdentifier() << "\n");
1912   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1913                     << " global variables for Module "
1914                     << DestModule.getModuleIdentifier() << "\n");
1915   return ImportedCount;
1916 }
1917 
1918 static bool doImportingForModuleForTest(
1919     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1920                    isPrevailing) {
1921   if (SummaryFile.empty())
1922     report_fatal_error("error: -function-import requires -summary-file\n");
1923   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1924       getModuleSummaryIndexForFile(SummaryFile);
1925   if (!IndexPtrOrErr) {
1926     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1927                           "Error loading file '" + SummaryFile + "': ");
1928     return false;
1929   }
1930   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1931 
1932   // First step is collecting the import list.
1933   FunctionImporter::ImportMapTy ImportList;
1934   // If requested, simply import all functions in the index. This is used
1935   // when testing distributed backend handling via the opt tool, when
1936   // we have distributed indexes containing exactly the summaries to import.
1937   if (ImportAllIndex)
1938     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1939                                                       *Index, ImportList);
1940   else
1941     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1942                                              isPrevailing, *Index, ImportList);
1943 
1944   // Conservatively mark all internal values as promoted. This interface is
1945   // only used when doing importing via the function importing pass. The pass
1946   // is only enabled when testing importing via the 'opt' tool, which does
1947   // not do the ThinLink that would normally determine what values to promote.
1948   for (auto &I : *Index) {
1949     for (auto &S : I.second.SummaryList) {
1950       if (GlobalValue::isLocalLinkage(S->linkage()))
1951         S->setLinkage(GlobalValue::ExternalLinkage);
1952     }
1953   }
1954 
1955   // Next we need to promote to global scope and rename any local values that
1956   // are potentially exported to other modules.
1957   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
1958                              /*GlobalsToImport=*/nullptr)) {
1959     errs() << "Error renaming module\n";
1960     return true;
1961   }
1962 
1963   // Perform the import now.
1964   auto ModuleLoader = [&M](StringRef Identifier) {
1965     return loadFile(std::string(Identifier), M.getContext());
1966   };
1967   FunctionImporter Importer(*Index, ModuleLoader,
1968                             /*ClearDSOLocalOnDeclarations=*/false);
1969   Expected<bool> Result = Importer.importFunctions(M, ImportList);
1970 
1971   // FIXME: Probably need to propagate Errors through the pass manager.
1972   if (!Result) {
1973     logAllUnhandledErrors(Result.takeError(), errs(),
1974                           "Error importing module: ");
1975     return true;
1976   }
1977 
1978   return true;
1979 }
1980 
1981 PreservedAnalyses FunctionImportPass::run(Module &M,
1982                                           ModuleAnalysisManager &AM) {
1983   // This is only used for testing the function import pass via opt, where we
1984   // don't have prevailing information from the LTO context available, so just
1985   // conservatively assume everything is prevailing (which is fine for the very
1986   // limited use of prevailing checking in this pass).
1987   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
1988     return true;
1989   };
1990   if (!doImportingForModuleForTest(M, isPrevailing))
1991     return PreservedAnalyses::all();
1992 
1993   return PreservedAnalyses::none();
1994 }
1995