xref: /llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision 6807ca8e937e6949af838995964613fab2fbe0dc)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalAlias.h"
24 #include "llvm/IR/GlobalObject.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/ModuleSummaryIndex.h"
30 #include "llvm/IRReader/IRReader.h"
31 #include "llvm/Linker/IRMover.h"
32 #include "llvm/ProfileData/PGOCtxProfReader.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module' and "
129              "'thinlto_src_file'"));
130 
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134     SummaryFile("summary-file",
135                 cl::desc("The summary file to use for function importing."));
136 
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140     ImportAllIndex("import-all-index",
141                    cl::desc("Import all external functions in index."));
142 
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150     "import-declaration", cl::init(false), cl::Hidden,
151     cl::desc("If true, import function declaration as fallback if the function "
152              "definition is not imported."));
153 
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163     "thinlto-workload-def",
164     cl::desc("Pass a workload definition. This is a file containing a JSON "
165              "dictionary. The keys are root functions, the values are lists of "
166              "functions to import in the module defining the root. It is "
167              "assumed -funique-internal-linkage-names was used, to ensure "
168              "local linkage functions have unique names. For example: \n"
169              "{\n"
170              "  \"rootFunction_1\": [\"function_to_import_1\", "
171              "\"function_to_import_2\"], \n"
172              "  \"rootFunction_2\": [\"function_to_import_3\", "
173              "\"function_to_import_4\"] \n"
174              "}"),
175     cl::Hidden);
176 
177 extern cl::opt<std::string> UseCtxProfile;
178 
179 namespace llvm {
180 extern cl::opt<bool> EnableMemProfContextDisambiguation;
181 }
182 
183 // Load lazily a module from \p FileName in \p Context.
184 static std::unique_ptr<Module> loadFile(const std::string &FileName,
185                                         LLVMContext &Context) {
186   SMDiagnostic Err;
187   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
188   // Metadata isn't loaded until functions are imported, to minimize
189   // the memory overhead.
190   std::unique_ptr<Module> Result =
191       getLazyIRFileModule(FileName, Err, Context,
192                           /* ShouldLazyLoadMetadata = */ true);
193   if (!Result) {
194     Err.print("function-import", errs());
195     report_fatal_error("Abort");
196   }
197 
198   return Result;
199 }
200 
201 static bool shouldSkipLocalInAnotherModule(const GlobalValueSummary *RefSummary,
202                                            size_t NumDefs,
203                                            StringRef ImporterModule) {
204   // We can import a local when there is one definition.
205   if (NumDefs == 1)
206     return false;
207   // In other cases, make sure we import the copy in the caller's module if the
208   // referenced value has local linkage. The only time a local variable can
209   // share an entry in the index is if there is a local with the same name in
210   // another module that had the same source file name (in a different
211   // directory), where each was compiled in their own directory so there was not
212   // distinguishing path.
213   return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
214          RefSummary->modulePath() != ImporterModule;
215 }
216 
217 /// Given a list of possible callee implementation for a call site, qualify the
218 /// legality of importing each. The return is a range of pairs. Each pair
219 /// corresponds to a candidate. The first value is the ImportFailureReason for
220 /// that candidate, the second is the candidate.
221 static auto qualifyCalleeCandidates(
222     const ModuleSummaryIndex &Index,
223     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
224     StringRef CallerModulePath) {
225   return llvm::map_range(
226       CalleeSummaryList,
227       [&Index, CalleeSummaryList,
228        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
229           -> std::pair<FunctionImporter::ImportFailureReason,
230                        const GlobalValueSummary *> {
231         auto *GVSummary = SummaryPtr.get();
232         if (!Index.isGlobalValueLive(GVSummary))
233           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
234 
235         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
236           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
237                   GVSummary};
238 
239         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
240 
241         // Ignore any callees that aren't actually functions. This could happen
242         // in the case of GUID hash collisions. It could also happen in theory
243         // for SamplePGO profiles collected on old versions of the code after
244         // renaming, since we synthesize edges to any inlined callees appearing
245         // in the profile.
246         if (!Summary)
247           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
248 
249         // If this is a local function, make sure we import the copy in the
250         // caller's module. The only time a local function can share an entry in
251         // the index is if there is a local with the same name in another module
252         // that had the same source file name (in a different directory), where
253         // each was compiled in their own directory so there was not
254         // distinguishing path.
255         // If the local function is from another module, it must be a reference
256         // due to indirect call profile data since a function pointer can point
257         // to a local in another module. Do the import from another module if
258         // there is only one entry in the list or when all files in the program
259         // are compiled with full path - in both cases the local function has
260         // unique PGO name and GUID.
261         if (shouldSkipLocalInAnotherModule(Summary, CalleeSummaryList.size(),
262                                            CallerModulePath))
263           return {
264               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
265               GVSummary};
266 
267         // Skip if it isn't legal to import (e.g. may reference unpromotable
268         // locals).
269         if (Summary->notEligibleToImport())
270           return {FunctionImporter::ImportFailureReason::NotEligible,
271                   GVSummary};
272 
273         return {FunctionImporter::ImportFailureReason::None, GVSummary};
274       });
275 }
276 
277 /// Given a list of possible callee implementation for a call site, select one
278 /// that fits the \p Threshold for function definition import. If none are
279 /// found, the Reason will give the last reason for the failure (last, in the
280 /// order of CalleeSummaryList entries). While looking for a callee definition,
281 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
282 /// candidate; other modules may want to know the function summary or
283 /// declaration even if a definition is not needed.
284 ///
285 /// FIXME: select "best" instead of first that fits. But what is "best"?
286 /// - The smallest: more likely to be inlined.
287 /// - The one with the least outgoing edges (already well optimized).
288 /// - One from a module already being imported from in order to reduce the
289 ///   number of source modules parsed/linked.
290 /// - One that has PGO data attached.
291 /// - [insert you fancy metric here]
292 static const GlobalValueSummary *
293 selectCallee(const ModuleSummaryIndex &Index,
294              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
295              unsigned Threshold, StringRef CallerModulePath,
296              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
297              FunctionImporter::ImportFailureReason &Reason) {
298   // Records the last summary with reason noinline or too-large.
299   TooLargeOrNoInlineSummary = nullptr;
300   auto QualifiedCandidates =
301       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
302   for (auto QualifiedValue : QualifiedCandidates) {
303     Reason = QualifiedValue.first;
304     // Skip a summary if its import is not (proved to be) legal.
305     if (Reason != FunctionImporter::ImportFailureReason::None)
306       continue;
307     auto *Summary =
308         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
309 
310     // Don't bother importing the definition if the chance of inlining it is
311     // not high enough (except under `--force-import-all`).
312     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
313         !ForceImportAll) {
314       TooLargeOrNoInlineSummary = Summary;
315       Reason = FunctionImporter::ImportFailureReason::TooLarge;
316       continue;
317     }
318 
319     // Don't bother importing the definition if we can't inline it anyway.
320     if (Summary->fflags().NoInline && !ForceImportAll) {
321       TooLargeOrNoInlineSummary = Summary;
322       Reason = FunctionImporter::ImportFailureReason::NoInline;
323       continue;
324     }
325 
326     return Summary;
327   }
328   return nullptr;
329 }
330 
331 namespace {
332 
333 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
334 
335 } // anonymous namespace
336 
337 /// Import globals referenced by a function or other globals that are being
338 /// imported, if importing such global is possible.
339 class GlobalsImporter final {
340   const ModuleSummaryIndex &Index;
341   const GVSummaryMapTy &DefinedGVSummaries;
342   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
343       IsPrevailing;
344   FunctionImporter::ImportMapTy &ImportList;
345   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
346 
347   bool shouldImportGlobal(const ValueInfo &VI) {
348     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
349     if (GVS == DefinedGVSummaries.end())
350       return true;
351     // We should not skip import if the module contains a non-prevailing
352     // definition with interposable linkage type. This is required for
353     // correctness in the situation where there is a prevailing def available
354     // for import and marked read-only. In this case, the non-prevailing def
355     // will be converted to a declaration, while the prevailing one becomes
356     // internal, thus no definitions will be available for linking. In order to
357     // prevent undefined symbol link error, the prevailing definition must be
358     // imported.
359     // FIXME: Consider adding a check that the suitable prevailing definition
360     // exists and marked read-only.
361     if (VI.getSummaryList().size() > 1 &&
362         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
363         !IsPrevailing(VI.getGUID(), GVS->second))
364       return true;
365 
366     return false;
367   }
368 
369   void
370   onImportingSummaryImpl(const GlobalValueSummary &Summary,
371                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
372     for (const auto &VI : Summary.refs()) {
373       if (!shouldImportGlobal(VI)) {
374         LLVM_DEBUG(
375             dbgs() << "Ref ignored! Target already in destination module.\n");
376         continue;
377       }
378 
379       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
380 
381       for (const auto &RefSummary : VI.getSummaryList()) {
382         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
383         // Functions could be referenced by global vars - e.g. a vtable; but we
384         // don't currently imagine a reason those would be imported here, rather
385         // than as part of the logic deciding which functions to import (i.e.
386         // based on profile information). Should we decide to handle them here,
387         // we can refactor accordingly at that time.
388         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
389             shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
390                                            Summary.modulePath()))
391           continue;
392 
393         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
394         // Otherwise, definition should take precedence over declaration.
395         auto [Iter, Inserted] =
396             ImportList[RefSummary->modulePath()].try_emplace(
397                 VI.getGUID(), GlobalValueSummary::Definition);
398         // Only update stat and exports if we haven't already imported this
399         // variable.
400         if (!Inserted) {
401           // Set the value to 'std::min(existing-value, new-value)' to make
402           // sure a definition takes precedence over a declaration.
403           Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
404           break;
405         }
406         NumImportedGlobalVarsThinLink++;
407         // Any references made by this variable will be marked exported
408         // later, in ComputeCrossModuleImport, after import decisions are
409         // complete, which is more efficient than adding them here.
410         if (ExportLists)
411           (*ExportLists)[RefSummary->modulePath()].insert(VI);
412 
413         // If variable is not writeonly we attempt to recursively analyze
414         // its references in order to import referenced constants.
415         if (!Index.isWriteOnly(GVS))
416           Worklist.emplace_back(GVS);
417         break;
418       }
419     }
420   }
421 
422 public:
423   GlobalsImporter(
424       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
425       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
426           IsPrevailing,
427       FunctionImporter::ImportMapTy &ImportList,
428       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
429       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
430         IsPrevailing(IsPrevailing), ImportList(ImportList),
431         ExportLists(ExportLists) {}
432 
433   void onImportingSummary(const GlobalValueSummary &Summary) {
434     SmallVector<const GlobalVarSummary *, 128> Worklist;
435     onImportingSummaryImpl(Summary, Worklist);
436     while (!Worklist.empty())
437       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
438   }
439 };
440 
441 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
442 
443 /// Determine the list of imports and exports for each module.
444 class ModuleImportsManager {
445 protected:
446   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
447       IsPrevailing;
448   const ModuleSummaryIndex &Index;
449   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
450 
451   ModuleImportsManager(
452       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
453           IsPrevailing,
454       const ModuleSummaryIndex &Index,
455       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
456       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
457 
458 public:
459   virtual ~ModuleImportsManager() = default;
460 
461   /// Given the list of globals defined in a module, compute the list of imports
462   /// as well as the list of "exports", i.e. the list of symbols referenced from
463   /// another module (that may require promotion).
464   virtual void
465   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
466                          StringRef ModName,
467                          FunctionImporter::ImportMapTy &ImportList);
468 
469   static std::unique_ptr<ModuleImportsManager>
470   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
471              IsPrevailing,
472          const ModuleSummaryIndex &Index,
473          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
474              nullptr);
475 };
476 
477 /// A ModuleImportsManager that operates based on a workload definition (see
478 /// -thinlto-workload-def). For modules that do not define workload roots, it
479 /// applies the base ModuleImportsManager import policy.
480 class WorkloadImportsManager : public ModuleImportsManager {
481   // Keep a module name -> value infos to import association. We use it to
482   // determine if a module's import list should be done by the base
483   // ModuleImportsManager or by us.
484   StringMap<DenseSet<ValueInfo>> Workloads;
485 
486   void
487   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
488                          StringRef ModName,
489                          FunctionImporter::ImportMapTy &ImportList) override {
490     auto SetIter = Workloads.find(ModName);
491     if (SetIter == Workloads.end()) {
492       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
493                         << " does not contain the root of any context.\n");
494       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
495                                                           ModName, ImportList);
496     }
497     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
498                       << " contains the root(s) of context(s).\n");
499 
500     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
501                         ExportLists);
502     auto &ValueInfos = SetIter->second;
503     SmallVector<EdgeInfo, 128> GlobWorklist;
504     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
505       auto It = DefinedGVSummaries.find(VI.getGUID());
506       if (It != DefinedGVSummaries.end() &&
507           IsPrevailing(VI.getGUID(), It->second)) {
508         LLVM_DEBUG(
509             dbgs() << "[Workload] " << VI.name()
510                    << " has the prevailing variant already in the module "
511                    << ModName << ". No need to import\n");
512         continue;
513       }
514       auto Candidates =
515           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
516 
517       const GlobalValueSummary *GVS = nullptr;
518       auto PotentialCandidates = llvm::map_range(
519           llvm::make_filter_range(
520               Candidates,
521               [&](const auto &Candidate) {
522                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
523                                   << " from " << Candidate.second->modulePath()
524                                   << " ImportFailureReason: "
525                                   << getFailureName(Candidate.first) << "\n");
526                 return Candidate.first ==
527                         FunctionImporter::ImportFailureReason::None;
528               }),
529           [](const auto &Candidate) { return Candidate.second; });
530       if (PotentialCandidates.empty()) {
531         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
532                           << " because can't find eligible Callee. Guid is: "
533                           << Function::getGUID(VI.name()) << "\n");
534         continue;
535       }
536       /// We will prefer importing the prevailing candidate, if not, we'll
537       /// still pick the first available candidate. The reason we want to make
538       /// sure we do import the prevailing candidate is because the goal of
539       /// workload-awareness is to enable optimizations specializing the call
540       /// graph of that workload. Suppose a function is already defined in the
541       /// module, but it's not the prevailing variant. Suppose also we do not
542       /// inline it (in fact, if it were interposable, we can't inline it),
543       /// but we could specialize it to the workload in other ways. However,
544       /// the linker would drop it in the favor of the prevailing copy.
545       /// Instead, by importing the prevailing variant (assuming also the use
546       /// of `-avail-extern-to-local`), we keep the specialization. We could
547       /// alteranatively make the non-prevailing variant local, but the
548       /// prevailing one is also the one for which we would have previously
549       /// collected profiles, making it preferrable.
550       auto PrevailingCandidates = llvm::make_filter_range(
551           PotentialCandidates, [&](const auto *Candidate) {
552             return IsPrevailing(VI.getGUID(), Candidate);
553           });
554       if (PrevailingCandidates.empty()) {
555         GVS = *PotentialCandidates.begin();
556         if (!llvm::hasSingleElement(PotentialCandidates) &&
557             GlobalValue::isLocalLinkage(GVS->linkage()))
558           LLVM_DEBUG(
559               dbgs()
560               << "[Workload] Found multiple non-prevailing candidates for "
561               << VI.name()
562               << ". This is unexpected. Are module paths passed to the "
563                  "compiler unique for the modules passed to the linker?");
564         // We could in theory have multiple (interposable) copies of a symbol
565         // when there is no prevailing candidate, if say the prevailing copy was
566         // in a native object being linked in. However, we should in theory be
567         // marking all of these non-prevailing IR copies dead in that case, in
568         // which case they won't be candidates.
569         assert(GVS->isLive());
570       } else {
571         assert(llvm::hasSingleElement(PrevailingCandidates));
572         GVS = *PrevailingCandidates.begin();
573       }
574 
575       auto ExportingModule = GVS->modulePath();
576       // We checked that for the prevailing case, but if we happen to have for
577       // example an internal that's defined in this module, it'd have no
578       // PrevailingCandidates.
579       if (ExportingModule == ModName) {
580         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
581                           << " because its defining module is the same as the "
582                              "current module\n");
583         continue;
584       }
585       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
586                         << ExportingModule << " : "
587                         << Function::getGUID(VI.name()) << "\n");
588       ImportList[ExportingModule][VI.getGUID()] =
589           GlobalValueSummary::Definition;
590       GVI.onImportingSummary(*GVS);
591       if (ExportLists)
592         (*ExportLists)[ExportingModule].insert(VI);
593     }
594     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
595   }
596 
597   void loadFromJson() {
598     // Since the workload def uses names, we need a quick lookup
599     // name->ValueInfo.
600     StringMap<ValueInfo> NameToValueInfo;
601     StringSet<> AmbiguousNames;
602     for (auto &I : Index) {
603       ValueInfo VI = Index.getValueInfo(I);
604       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
605         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
606     }
607     auto DbgReportIfAmbiguous = [&](StringRef Name) {
608       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
609         dbgs() << "[Workload] Function name " << Name
610                << " present in the workload definition is ambiguous. Consider "
611                   "compiling with -funique-internal-linkage-names.";
612       });
613     };
614     std::error_code EC;
615     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
616     if (std::error_code EC = BufferOrErr.getError()) {
617       report_fatal_error("Failed to open context file");
618       return;
619     }
620     auto Buffer = std::move(BufferOrErr.get());
621     std::map<std::string, std::vector<std::string>> WorkloadDefs;
622     json::Path::Root NullRoot;
623     // The JSON is supposed to contain a dictionary matching the type of
624     // WorkloadDefs. For example:
625     // {
626     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
627     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
628     // }
629     auto Parsed = json::parse(Buffer->getBuffer());
630     if (!Parsed)
631       report_fatal_error(Parsed.takeError());
632     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
633       report_fatal_error("Invalid thinlto contextual profile format.");
634     for (const auto &Workload : WorkloadDefs) {
635       const auto &Root = Workload.first;
636       DbgReportIfAmbiguous(Root);
637       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
638       const auto &AllCallees = Workload.second;
639       auto RootIt = NameToValueInfo.find(Root);
640       if (RootIt == NameToValueInfo.end()) {
641         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
642                           << " not found in this linkage unit.\n");
643         continue;
644       }
645       auto RootVI = RootIt->second;
646       if (RootVI.getSummaryList().size() != 1) {
647         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
648                           << " should have exactly one summary, but has "
649                           << RootVI.getSummaryList().size() << ". Skipping.\n");
650         continue;
651       }
652       StringRef RootDefiningModule =
653           RootVI.getSummaryList().front()->modulePath();
654       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
655                         << " is : " << RootDefiningModule << "\n");
656       auto &Set = Workloads[RootDefiningModule];
657       for (const auto &Callee : AllCallees) {
658         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
659         DbgReportIfAmbiguous(Callee);
660         auto ElemIt = NameToValueInfo.find(Callee);
661         if (ElemIt == NameToValueInfo.end()) {
662           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
663           continue;
664         }
665         Set.insert(ElemIt->second);
666       }
667     }
668   }
669 
670   void loadFromCtxProf() {
671     std::error_code EC;
672     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(UseCtxProfile);
673     if (std::error_code EC = BufferOrErr.getError()) {
674       report_fatal_error("Failed to open contextual profile file");
675       return;
676     }
677     auto Buffer = std::move(BufferOrErr.get());
678 
679     PGOCtxProfileReader Reader(Buffer->getBuffer());
680     auto Ctx = Reader.loadContexts();
681     if (!Ctx) {
682       report_fatal_error("Failed to parse contextual profiles");
683       return;
684     }
685     const auto &CtxMap = *Ctx;
686     DenseSet<GlobalValue::GUID> ContainedGUIDs;
687     for (const auto &[RootGuid, Root] : CtxMap) {
688       // Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for
689       // subsequent roots, but clear its contents.
690       ContainedGUIDs.clear();
691 
692       auto RootVI = Index.getValueInfo(RootGuid);
693       if (!RootVI) {
694         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
695                           << " not found in this linkage unit.\n");
696         continue;
697       }
698       if (RootVI.getSummaryList().size() != 1) {
699         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
700                           << " should have exactly one summary, but has "
701                           << RootVI.getSummaryList().size() << ". Skipping.\n");
702         continue;
703       }
704       StringRef RootDefiningModule =
705           RootVI.getSummaryList().front()->modulePath();
706       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid
707                         << " is : " << RootDefiningModule << "\n");
708       auto &Set = Workloads[RootDefiningModule];
709       Root.getContainedGuids(ContainedGUIDs);
710       for (auto Guid : ContainedGUIDs)
711         if (auto VI = Index.getValueInfo(Guid))
712           Set.insert(VI);
713     }
714   }
715 
716 public:
717   WorkloadImportsManager(
718       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
719           IsPrevailing,
720       const ModuleSummaryIndex &Index,
721       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
722       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
723     if (UseCtxProfile.empty() == WorkloadDefinitions.empty()) {
724       report_fatal_error(
725           "Pass only one of: -thinlto-pgo-ctx-prof or -thinlto-workload-def");
726       return;
727     }
728     if (!UseCtxProfile.empty())
729       loadFromCtxProf();
730     else
731       loadFromJson();
732     LLVM_DEBUG({
733       for (const auto &[Root, Set] : Workloads) {
734         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
735                << " distinct callees.\n";
736         for (const auto &VI : Set) {
737           dbgs() << "[Workload] Root: " << Root
738                  << " Would include: " << VI.getGUID() << "\n";
739         }
740       }
741     });
742   }
743 };
744 
745 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
746     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
747         IsPrevailing,
748     const ModuleSummaryIndex &Index,
749     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
750   if (WorkloadDefinitions.empty() && UseCtxProfile.empty()) {
751     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
752     return std::unique_ptr<ModuleImportsManager>(
753         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
754   }
755   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
756   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
757                                                   ExportLists);
758 }
759 
760 static const char *
761 getFailureName(FunctionImporter::ImportFailureReason Reason) {
762   switch (Reason) {
763   case FunctionImporter::ImportFailureReason::None:
764     return "None";
765   case FunctionImporter::ImportFailureReason::GlobalVar:
766     return "GlobalVar";
767   case FunctionImporter::ImportFailureReason::NotLive:
768     return "NotLive";
769   case FunctionImporter::ImportFailureReason::TooLarge:
770     return "TooLarge";
771   case FunctionImporter::ImportFailureReason::InterposableLinkage:
772     return "InterposableLinkage";
773   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
774     return "LocalLinkageNotInModule";
775   case FunctionImporter::ImportFailureReason::NotEligible:
776     return "NotEligible";
777   case FunctionImporter::ImportFailureReason::NoInline:
778     return "NoInline";
779   }
780   llvm_unreachable("invalid reason");
781 }
782 
783 /// Compute the list of functions to import for a given caller. Mark these
784 /// imported functions and the symbols they reference in their source module as
785 /// exported from their source module.
786 static void computeImportForFunction(
787     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
788     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
789     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
790         isPrevailing,
791     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
792     FunctionImporter::ImportMapTy &ImportList,
793     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
794     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
795   GVImporter.onImportingSummary(Summary);
796   static int ImportCount = 0;
797   for (const auto &Edge : Summary.calls()) {
798     ValueInfo VI = Edge.first;
799     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
800                       << "\n");
801 
802     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
803       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
804                         << " reached.\n");
805       continue;
806     }
807 
808     if (DefinedGVSummaries.count(VI.getGUID())) {
809       // FIXME: Consider not skipping import if the module contains
810       // a non-prevailing def with interposable linkage. The prevailing copy
811       // can safely be imported (see shouldImportGlobal()).
812       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
813       continue;
814     }
815 
816     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
817       if (Hotness == CalleeInfo::HotnessType::Hot)
818         return ImportHotMultiplier;
819       if (Hotness == CalleeInfo::HotnessType::Cold)
820         return ImportColdMultiplier;
821       if (Hotness == CalleeInfo::HotnessType::Critical)
822         return ImportCriticalMultiplier;
823       return 1.0;
824     };
825 
826     const auto NewThreshold =
827         Threshold * GetBonusMultiplier(Edge.second.getHotness());
828 
829     auto IT = ImportThresholds.insert(std::make_pair(
830         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
831     bool PreviouslyVisited = !IT.second;
832     auto &ProcessedThreshold = std::get<0>(IT.first->second);
833     auto &CalleeSummary = std::get<1>(IT.first->second);
834     auto &FailureInfo = std::get<2>(IT.first->second);
835 
836     bool IsHotCallsite =
837         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
838     bool IsCriticalCallsite =
839         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
840 
841     const FunctionSummary *ResolvedCalleeSummary = nullptr;
842     if (CalleeSummary) {
843       assert(PreviouslyVisited);
844       // Since the traversal of the call graph is DFS, we can revisit a function
845       // a second time with a higher threshold. In this case, it is added back
846       // to the worklist with the new threshold (so that its own callee chains
847       // can be considered with the higher threshold).
848       if (NewThreshold <= ProcessedThreshold) {
849         LLVM_DEBUG(
850             dbgs() << "ignored! Target was already imported with Threshold "
851                    << ProcessedThreshold << "\n");
852         continue;
853       }
854       // Update with new larger threshold.
855       ProcessedThreshold = NewThreshold;
856       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
857     } else {
858       // If we already rejected importing a callee at the same or higher
859       // threshold, don't waste time calling selectCallee.
860       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
861         LLVM_DEBUG(
862             dbgs() << "ignored! Target was already rejected with Threshold "
863             << ProcessedThreshold << "\n");
864         if (PrintImportFailures) {
865           assert(FailureInfo &&
866                  "Expected FailureInfo for previously rejected candidate");
867           FailureInfo->Attempts++;
868         }
869         continue;
870       }
871 
872       FunctionImporter::ImportFailureReason Reason{};
873 
874       // `SummaryForDeclImport` is an summary eligible for declaration import.
875       const GlobalValueSummary *SummaryForDeclImport = nullptr;
876       CalleeSummary =
877           selectCallee(Index, VI.getSummaryList(), NewThreshold,
878                        Summary.modulePath(), SummaryForDeclImport, Reason);
879       if (!CalleeSummary) {
880         // There isn't a callee for definition import but one for declaration
881         // import.
882         if (ImportDeclaration && SummaryForDeclImport) {
883           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
884 
885           // Since definition takes precedence over declaration for the same VI,
886           // try emplace <VI, declaration> pair without checking insert result.
887           // If insert doesn't happen, there must be an existing entry keyed by
888           // VI. Note `ExportLists` only keeps track of exports due to imported
889           // definitions.
890           ImportList[DeclSourceModule].try_emplace(
891               VI.getGUID(), GlobalValueSummary::Declaration);
892         }
893         // Update with new larger threshold if this was a retry (otherwise
894         // we would have already inserted with NewThreshold above). Also
895         // update failure info if requested.
896         if (PreviouslyVisited) {
897           ProcessedThreshold = NewThreshold;
898           if (PrintImportFailures) {
899             assert(FailureInfo &&
900                    "Expected FailureInfo for previously rejected candidate");
901             FailureInfo->Reason = Reason;
902             FailureInfo->Attempts++;
903             FailureInfo->MaxHotness =
904                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
905           }
906         } else if (PrintImportFailures) {
907           assert(!FailureInfo &&
908                  "Expected no FailureInfo for newly rejected candidate");
909           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
910               VI, Edge.second.getHotness(), Reason, 1);
911         }
912         if (ForceImportAll) {
913           std::string Msg = std::string("Failed to import function ") +
914                             VI.name().str() + " due to " +
915                             getFailureName(Reason);
916           auto Error = make_error<StringError>(
917               Msg, make_error_code(errc::not_supported));
918           logAllUnhandledErrors(std::move(Error), errs(),
919                                 "Error importing module: ");
920           break;
921         } else {
922           LLVM_DEBUG(dbgs()
923                      << "ignored! No qualifying callee with summary found.\n");
924           continue;
925         }
926       }
927 
928       // "Resolve" the summary
929       CalleeSummary = CalleeSummary->getBaseObject();
930       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
931 
932       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
933               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
934              "selectCallee() didn't honor the threshold");
935 
936       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
937 
938       // Try emplace the definition entry, and update stats based on insertion
939       // status.
940       auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
941           VI.getGUID(), GlobalValueSummary::Definition);
942 
943       // We previously decided to import this GUID definition if it was already
944       // inserted in the set of imports from the exporting module.
945       if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
946         NumImportedFunctionsThinLink++;
947         if (IsHotCallsite)
948           NumImportedHotFunctionsThinLink++;
949         if (IsCriticalCallsite)
950           NumImportedCriticalFunctionsThinLink++;
951       }
952 
953       if (Iter->second == GlobalValueSummary::Declaration)
954         Iter->second = GlobalValueSummary::Definition;
955 
956       // Any calls/references made by this function will be marked exported
957       // later, in ComputeCrossModuleImport, after import decisions are
958       // complete, which is more efficient than adding them here.
959       if (ExportLists)
960         (*ExportLists)[ExportModulePath].insert(VI);
961     }
962 
963     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
964       // Adjust the threshold for next level of imported functions.
965       // The threshold is different for hot callsites because we can then
966       // inline chains of hot calls.
967       if (IsHotCallsite)
968         return Threshold * ImportHotInstrFactor;
969       return Threshold * ImportInstrFactor;
970     };
971 
972     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
973 
974     ImportCount++;
975 
976     // Insert the newly imported function to the worklist.
977     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
978   }
979 }
980 
981 void ModuleImportsManager::computeImportForModule(
982     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
983     FunctionImporter::ImportMapTy &ImportList) {
984   // Worklist contains the list of function imported in this module, for which
985   // we will analyse the callees and may import further down the callgraph.
986   SmallVector<EdgeInfo, 128> Worklist;
987   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
988                       ExportLists);
989   FunctionImporter::ImportThresholdsTy ImportThresholds;
990 
991   // Populate the worklist with the import for the functions in the current
992   // module
993   for (const auto &GVSummary : DefinedGVSummaries) {
994 #ifndef NDEBUG
995     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
996     // so this map look up (and possibly others) can be avoided.
997     auto VI = Index.getValueInfo(GVSummary.first);
998 #endif
999     if (!Index.isGlobalValueLive(GVSummary.second)) {
1000       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
1001       continue;
1002     }
1003     auto *FuncSummary =
1004         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
1005     if (!FuncSummary)
1006       // Skip import for global variables
1007       continue;
1008     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
1009     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
1010                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
1011                              ImportList, ExportLists, ImportThresholds);
1012   }
1013 
1014   // Process the newly imported functions and add callees to the worklist.
1015   while (!Worklist.empty()) {
1016     auto GVInfo = Worklist.pop_back_val();
1017     auto *Summary = std::get<0>(GVInfo);
1018     auto Threshold = std::get<1>(GVInfo);
1019 
1020     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
1021       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
1022                                IsPrevailing, Worklist, GVI, ImportList,
1023                                ExportLists, ImportThresholds);
1024   }
1025 
1026   // Print stats about functions considered but rejected for importing
1027   // when requested.
1028   if (PrintImportFailures) {
1029     dbgs() << "Missed imports into module " << ModName << "\n";
1030     for (auto &I : ImportThresholds) {
1031       auto &ProcessedThreshold = std::get<0>(I.second);
1032       auto &CalleeSummary = std::get<1>(I.second);
1033       auto &FailureInfo = std::get<2>(I.second);
1034       if (CalleeSummary)
1035         continue; // We are going to import.
1036       assert(FailureInfo);
1037       FunctionSummary *FS = nullptr;
1038       if (!FailureInfo->VI.getSummaryList().empty())
1039         FS = dyn_cast<FunctionSummary>(
1040             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
1041       dbgs() << FailureInfo->VI
1042              << ": Reason = " << getFailureName(FailureInfo->Reason)
1043              << ", Threshold = " << ProcessedThreshold
1044              << ", Size = " << (FS ? (int)FS->instCount() : -1)
1045              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
1046              << ", Attempts = " << FailureInfo->Attempts << "\n";
1047     }
1048   }
1049 }
1050 
1051 #ifndef NDEBUG
1052 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
1053   auto SL = VI.getSummaryList();
1054   return SL.empty()
1055              ? false
1056              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
1057 }
1058 
1059 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
1060                                GlobalValue::GUID G) {
1061   if (const auto &VI = Index.getValueInfo(G))
1062     return isGlobalVarSummary(Index, VI);
1063   return false;
1064 }
1065 
1066 // Return the number of global variable summaries in ExportSet.
1067 static unsigned
1068 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1069                       FunctionImporter::ExportSetTy &ExportSet) {
1070   unsigned NumGVS = 0;
1071   for (auto &VI : ExportSet)
1072     if (isGlobalVarSummary(Index, VI.getGUID()))
1073       ++NumGVS;
1074   return NumGVS;
1075 }
1076 
1077 // Given ImportMap, return the number of global variable summaries and record
1078 // the number of defined function summaries as output parameter.
1079 static unsigned
1080 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1081                       FunctionImporter::FunctionsToImportTy &ImportMap,
1082                       unsigned &DefinedFS) {
1083   unsigned NumGVS = 0;
1084   DefinedFS = 0;
1085   for (auto &[GUID, Type] : ImportMap) {
1086     if (isGlobalVarSummary(Index, GUID))
1087       ++NumGVS;
1088     else if (Type == GlobalValueSummary::Definition)
1089       ++DefinedFS;
1090   }
1091   return NumGVS;
1092 }
1093 #endif
1094 
1095 #ifndef NDEBUG
1096 static bool checkVariableImport(
1097     const ModuleSummaryIndex &Index,
1098     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1099     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1100   DenseSet<GlobalValue::GUID> FlattenedImports;
1101 
1102   for (auto &ImportPerModule : ImportLists)
1103     for (auto &ExportPerModule : ImportPerModule.second)
1104       for (auto &[GUID, Type] : ExportPerModule.second)
1105         FlattenedImports.insert(GUID);
1106 
1107   // Checks that all GUIDs of read/writeonly vars we see in export lists
1108   // are also in the import lists. Otherwise we my face linker undefs,
1109   // because readonly and writeonly vars are internalized in their
1110   // source modules. The exception would be if it has a linkage type indicating
1111   // that there may have been a copy existing in the importing module (e.g.
1112   // linkonce_odr). In that case we cannot accurately do this checking.
1113   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1114                                                   const ValueInfo &VI) {
1115     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1116         Index.findSummaryInModule(VI, ModulePath));
1117     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1118            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1119              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1120              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1121   };
1122 
1123   for (auto &ExportPerModule : ExportLists)
1124     for (auto &VI : ExportPerModule.second)
1125       if (!FlattenedImports.count(VI.getGUID()) &&
1126           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1127         return false;
1128 
1129   return true;
1130 }
1131 #endif
1132 
1133 /// Compute all the import and export for every module using the Index.
1134 void llvm::ComputeCrossModuleImport(
1135     const ModuleSummaryIndex &Index,
1136     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1137     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1138         isPrevailing,
1139     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1140     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1141   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1142   // For each module that has function defined, compute the import/export lists.
1143   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1144     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1145     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1146                       << DefinedGVSummaries.first << "'\n");
1147     MIS->computeImportForModule(DefinedGVSummaries.second,
1148                                 DefinedGVSummaries.first, ImportList);
1149   }
1150 
1151   // When computing imports we only added the variables and functions being
1152   // imported to the export list. We also need to mark any references and calls
1153   // they make as exported as well. We do this here, as it is more efficient
1154   // since we may import the same values multiple times into different modules
1155   // during the import computation.
1156   for (auto &ELI : ExportLists) {
1157     // `NewExports` tracks the VI that gets exported because the full definition
1158     // of its user/referencer gets exported.
1159     FunctionImporter::ExportSetTy NewExports;
1160     const auto &DefinedGVSummaries =
1161         ModuleToDefinedGVSummaries.lookup(ELI.first);
1162     for (auto &EI : ELI.second) {
1163       // Find the copy defined in the exporting module so that we can mark the
1164       // values it references in that specific definition as exported.
1165       // Below we will add all references and called values, without regard to
1166       // whether they are also defined in this module. We subsequently prune the
1167       // list to only include those defined in the exporting module, see comment
1168       // there as to why.
1169       auto DS = DefinedGVSummaries.find(EI.getGUID());
1170       // Anything marked exported during the import computation must have been
1171       // defined in the exporting module.
1172       assert(DS != DefinedGVSummaries.end());
1173       auto *S = DS->getSecond();
1174       S = S->getBaseObject();
1175       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1176         // Export referenced functions and variables. We don't export/promote
1177         // objects referenced by writeonly variable initializer, because
1178         // we convert such variables initializers to "zeroinitializer".
1179         // See processGlobalForThinLTO.
1180         if (!Index.isWriteOnly(GVS))
1181           for (const auto &VI : GVS->refs())
1182             NewExports.insert(VI);
1183       } else {
1184         auto *FS = cast<FunctionSummary>(S);
1185         for (const auto &Edge : FS->calls())
1186           NewExports.insert(Edge.first);
1187         for (const auto &Ref : FS->refs())
1188           NewExports.insert(Ref);
1189       }
1190     }
1191     // Prune list computed above to only include values defined in the
1192     // exporting module. We do this after the above insertion since we may hit
1193     // the same ref/call target multiple times in above loop, and it is more
1194     // efficient to avoid a set lookup each time.
1195     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1196       if (!DefinedGVSummaries.count(EI->getGUID()))
1197         NewExports.erase(EI++);
1198       else
1199         ++EI;
1200     }
1201     ELI.second.insert(NewExports.begin(), NewExports.end());
1202   }
1203 
1204   assert(checkVariableImport(Index, ImportLists, ExportLists));
1205 #ifndef NDEBUG
1206   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1207                     << " modules:\n");
1208   for (auto &ModuleImports : ImportLists) {
1209     auto ModName = ModuleImports.first;
1210     auto &Exports = ExportLists[ModName];
1211     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1212     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1213                       << Exports.size() - NumGVS << " functions and " << NumGVS
1214                       << " vars. Imports from " << ModuleImports.second.size()
1215                       << " modules.\n");
1216     for (auto &Src : ModuleImports.second) {
1217       auto SrcModName = Src.first;
1218       unsigned DefinedFS = 0;
1219       unsigned NumGVSPerMod =
1220           numGlobalVarSummaries(Index, Src.second, DefinedFS);
1221       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1222                         << Src.second.size() - NumGVSPerMod - DefinedFS
1223                         << " function declarations imported from " << SrcModName
1224                         << "\n");
1225       LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1226                         << " global vars imported from " << SrcModName << "\n");
1227     }
1228   }
1229 #endif
1230 }
1231 
1232 #ifndef NDEBUG
1233 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1234                                     StringRef ModulePath,
1235                                     FunctionImporter::ImportMapTy &ImportList) {
1236   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1237                     << ImportList.size() << " modules.\n");
1238   for (auto &Src : ImportList) {
1239     auto SrcModName = Src.first;
1240     unsigned DefinedFS = 0;
1241     unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
1242     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1243                       << Src.second.size() - DefinedFS - NumGVSPerMod
1244                       << " function declarations imported from " << SrcModName
1245                       << "\n");
1246     LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1247                       << SrcModName << "\n");
1248   }
1249 }
1250 #endif
1251 
1252 /// Compute all the imports for the given module using the Index.
1253 ///
1254 /// \p isPrevailing is a callback that will be called with a global value's GUID
1255 /// and summary and should return whether the module corresponding to the
1256 /// summary contains the linker-prevailing copy of that value.
1257 ///
1258 /// \p ImportList will be populated with a map that can be passed to
1259 /// FunctionImporter::importFunctions() above (see description there).
1260 static void ComputeCrossModuleImportForModuleForTest(
1261     StringRef ModulePath,
1262     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1263         isPrevailing,
1264     const ModuleSummaryIndex &Index,
1265     FunctionImporter::ImportMapTy &ImportList) {
1266   // Collect the list of functions this module defines.
1267   // GUID -> Summary
1268   GVSummaryMapTy FunctionSummaryMap;
1269   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1270 
1271   // Compute the import list for this module.
1272   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1273   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1274   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1275 
1276 #ifndef NDEBUG
1277   dumpImportListForModule(Index, ModulePath, ImportList);
1278 #endif
1279 }
1280 
1281 /// Mark all external summaries in \p Index for import into the given module.
1282 /// Used for testing the case of distributed builds using a distributed index.
1283 ///
1284 /// \p ImportList will be populated with a map that can be passed to
1285 /// FunctionImporter::importFunctions() above (see description there).
1286 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1287     StringRef ModulePath, const ModuleSummaryIndex &Index,
1288     FunctionImporter::ImportMapTy &ImportList) {
1289   for (const auto &GlobalList : Index) {
1290     // Ignore entries for undefined references.
1291     if (GlobalList.second.SummaryList.empty())
1292       continue;
1293 
1294     auto GUID = GlobalList.first;
1295     assert(GlobalList.second.SummaryList.size() == 1 &&
1296            "Expected individual combined index to have one summary per GUID");
1297     auto &Summary = GlobalList.second.SummaryList[0];
1298     // Skip the summaries for the importing module. These are included to
1299     // e.g. record required linkage changes.
1300     if (Summary->modulePath() == ModulePath)
1301       continue;
1302     // Add an entry to provoke importing by thinBackend.
1303     auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
1304         GUID, Summary->importType());
1305     if (!Inserted) {
1306       // Use 'std::min' to make sure definition (with enum value 0) takes
1307       // precedence over declaration (with enum value 1).
1308       Iter->second = std::min(Iter->second, Summary->importType());
1309     }
1310   }
1311 #ifndef NDEBUG
1312   dumpImportListForModule(Index, ModulePath, ImportList);
1313 #endif
1314 }
1315 
1316 // For SamplePGO, the indirect call targets for local functions will
1317 // have its original name annotated in profile. We try to find the
1318 // corresponding PGOFuncName as the GUID, and fix up the edges
1319 // accordingly.
1320 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1321                                      FunctionSummary *FS) {
1322   for (auto &EI : FS->mutableCalls()) {
1323     if (!EI.first.getSummaryList().empty())
1324       continue;
1325     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1326     if (GUID == 0)
1327       continue;
1328     // Update the edge to point directly to the correct GUID.
1329     auto VI = Index.getValueInfo(GUID);
1330     if (llvm::any_of(
1331             VI.getSummaryList(),
1332             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1333               // The mapping from OriginalId to GUID may return a GUID
1334               // that corresponds to a static variable. Filter it out here.
1335               // This can happen when
1336               // 1) There is a call to a library function which is not defined
1337               // in the index.
1338               // 2) There is a static variable with the  OriginalGUID identical
1339               // to the GUID of the library function in 1);
1340               // When this happens the static variable in 2) will be found,
1341               // which needs to be filtered out.
1342               return SummaryPtr->getSummaryKind() ==
1343                      GlobalValueSummary::GlobalVarKind;
1344             }))
1345       continue;
1346     EI.first = VI;
1347   }
1348 }
1349 
1350 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1351   for (const auto &Entry : Index) {
1352     for (const auto &S : Entry.second.SummaryList) {
1353       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1354         updateValueInfoForIndirectCalls(Index, FS);
1355     }
1356   }
1357 }
1358 
1359 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1360     ModuleSummaryIndex &Index,
1361     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1362     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1363   assert(!Index.withGlobalValueDeadStripping());
1364   if (!ComputeDead ||
1365       // Don't do anything when nothing is live, this is friendly with tests.
1366       GUIDPreservedSymbols.empty()) {
1367     // Still need to update indirect calls.
1368     updateIndirectCalls(Index);
1369     return;
1370   }
1371   unsigned LiveSymbols = 0;
1372   SmallVector<ValueInfo, 128> Worklist;
1373   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1374   for (auto GUID : GUIDPreservedSymbols) {
1375     ValueInfo VI = Index.getValueInfo(GUID);
1376     if (!VI)
1377       continue;
1378     for (const auto &S : VI.getSummaryList())
1379       S->setLive(true);
1380   }
1381 
1382   // Add values flagged in the index as live roots to the worklist.
1383   for (const auto &Entry : Index) {
1384     auto VI = Index.getValueInfo(Entry);
1385     for (const auto &S : Entry.second.SummaryList) {
1386       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1387         updateValueInfoForIndirectCalls(Index, FS);
1388       if (S->isLive()) {
1389         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1390         Worklist.push_back(VI);
1391         ++LiveSymbols;
1392         break;
1393       }
1394     }
1395   }
1396 
1397   // Make value live and add it to the worklist if it was not live before.
1398   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1399     // FIXME: If we knew which edges were created for indirect call profiles,
1400     // we could skip them here. Any that are live should be reached via
1401     // other edges, e.g. reference edges. Otherwise, using a profile collected
1402     // on a slightly different binary might provoke preserving, importing
1403     // and ultimately promoting calls to functions not linked into this
1404     // binary, which increases the binary size unnecessarily. Note that
1405     // if this code changes, the importer needs to change so that edges
1406     // to functions marked dead are skipped.
1407 
1408     if (llvm::any_of(VI.getSummaryList(),
1409                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1410                        return S->isLive();
1411                      }))
1412       return;
1413 
1414     // We only keep live symbols that are known to be non-prevailing if any are
1415     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1416     // later in the EliminateAvailableExternally pass and setting them to
1417     // not-live could break downstreams users of liveness information (PR36483)
1418     // or limit optimization opportunities.
1419     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1420       bool KeepAliveLinkage = false;
1421       bool Interposable = false;
1422       for (const auto &S : VI.getSummaryList()) {
1423         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1424             S->linkage() == GlobalValue::WeakODRLinkage ||
1425             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1426           KeepAliveLinkage = true;
1427         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1428           Interposable = true;
1429       }
1430 
1431       if (!IsAliasee) {
1432         if (!KeepAliveLinkage)
1433           return;
1434 
1435         if (Interposable)
1436           report_fatal_error(
1437               "Interposable and available_externally/linkonce_odr/weak_odr "
1438               "symbol");
1439       }
1440     }
1441 
1442     for (const auto &S : VI.getSummaryList())
1443       S->setLive(true);
1444     ++LiveSymbols;
1445     Worklist.push_back(VI);
1446   };
1447 
1448   while (!Worklist.empty()) {
1449     auto VI = Worklist.pop_back_val();
1450     for (const auto &Summary : VI.getSummaryList()) {
1451       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1452         // If this is an alias, visit the aliasee VI to ensure that all copies
1453         // are marked live and it is added to the worklist for further
1454         // processing of its references.
1455         visit(AS->getAliaseeVI(), true);
1456         continue;
1457       }
1458       for (auto Ref : Summary->refs())
1459         visit(Ref, false);
1460       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1461         for (auto Call : FS->calls())
1462           visit(Call.first, false);
1463     }
1464   }
1465   Index.setWithGlobalValueDeadStripping();
1466 
1467   unsigned DeadSymbols = Index.size() - LiveSymbols;
1468   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1469                     << " symbols Dead \n");
1470   NumDeadSymbols += DeadSymbols;
1471   NumLiveSymbols += LiveSymbols;
1472 }
1473 
1474 // Compute dead symbols and propagate constants in combined index.
1475 void llvm::computeDeadSymbolsWithConstProp(
1476     ModuleSummaryIndex &Index,
1477     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1478     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1479     bool ImportEnabled) {
1480   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1481                                            isPrevailing);
1482   if (ImportEnabled)
1483     Index.propagateAttributes(GUIDPreservedSymbols);
1484 }
1485 
1486 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1487 /// \p ModulePath.
1488 void llvm::gatherImportedSummariesForModule(
1489     StringRef ModulePath,
1490     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1491     const FunctionImporter::ImportMapTy &ImportList,
1492     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
1493     GVSummaryPtrSet &DecSummaries) {
1494   // Include all summaries from the importing module.
1495   ModuleToSummariesForIndex[std::string(ModulePath)] =
1496       ModuleToDefinedGVSummaries.lookup(ModulePath);
1497   // Include summaries for imports.
1498   for (const auto &ILI : ImportList) {
1499     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1500 
1501     const auto &DefinedGVSummaries =
1502         ModuleToDefinedGVSummaries.lookup(ILI.first);
1503     for (const auto &[GUID, Type] : ILI.second) {
1504       const auto &DS = DefinedGVSummaries.find(GUID);
1505       assert(DS != DefinedGVSummaries.end() &&
1506              "Expected a defined summary for imported global value");
1507       if (Type == GlobalValueSummary::Declaration)
1508         DecSummaries.insert(DS->second);
1509 
1510       SummariesForIndex[GUID] = DS->second;
1511     }
1512   }
1513 }
1514 
1515 /// Emit the files \p ModulePath will import from into \p OutputFilename.
1516 std::error_code llvm::EmitImportsFiles(
1517     StringRef ModulePath, StringRef OutputFilename,
1518     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1519   std::error_code EC;
1520   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1521   if (EC)
1522     return EC;
1523   for (const auto &ILI : ModuleToSummariesForIndex)
1524     // The ModuleToSummariesForIndex map includes an entry for the current
1525     // Module (needed for writing out the index files). We don't want to
1526     // include it in the imports file, however, so filter it out.
1527     if (ILI.first != ModulePath)
1528       ImportsOS << ILI.first << "\n";
1529   return std::error_code();
1530 }
1531 
1532 bool llvm::convertToDeclaration(GlobalValue &GV) {
1533   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1534                     << "\n");
1535   if (Function *F = dyn_cast<Function>(&GV)) {
1536     F->deleteBody();
1537     F->clearMetadata();
1538     F->setComdat(nullptr);
1539   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1540     V->setInitializer(nullptr);
1541     V->setLinkage(GlobalValue::ExternalLinkage);
1542     V->clearMetadata();
1543     V->setComdat(nullptr);
1544   } else {
1545     GlobalValue *NewGV;
1546     if (GV.getValueType()->isFunctionTy())
1547       NewGV =
1548           Function::Create(cast<FunctionType>(GV.getValueType()),
1549                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1550                            "", GV.getParent());
1551     else
1552       NewGV =
1553           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1554                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1555                              /*init*/ nullptr, "",
1556                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1557                              GV.getType()->getAddressSpace());
1558     NewGV->takeName(&GV);
1559     GV.replaceAllUsesWith(NewGV);
1560     return false;
1561   }
1562   if (!GV.isImplicitDSOLocal())
1563     GV.setDSOLocal(false);
1564   return true;
1565 }
1566 
1567 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1568                                    const GVSummaryMapTy &DefinedGlobals,
1569                                    bool PropagateAttrs) {
1570   DenseSet<Comdat *> NonPrevailingComdats;
1571   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1572     // See if the global summary analysis computed a new resolved linkage.
1573     const auto &GS = DefinedGlobals.find(GV.getGUID());
1574     if (GS == DefinedGlobals.end())
1575       return;
1576 
1577     if (Propagate)
1578       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1579         if (Function *F = dyn_cast<Function>(&GV)) {
1580           // TODO: propagate ReadNone and ReadOnly.
1581           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1582             F->setDoesNotAccessMemory();
1583 
1584           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1585             F->setOnlyReadsMemory();
1586 
1587           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1588             F->setDoesNotRecurse();
1589 
1590           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1591             F->setDoesNotThrow();
1592         }
1593       }
1594 
1595     auto NewLinkage = GS->second->linkage();
1596     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1597         // Don't internalize anything here, because the code below
1598         // lacks necessary correctness checks. Leave this job to
1599         // LLVM 'internalize' pass.
1600         GlobalValue::isLocalLinkage(NewLinkage) ||
1601         // In case it was dead and already converted to declaration.
1602         GV.isDeclaration())
1603       return;
1604 
1605     // Set the potentially more constraining visibility computed from summaries.
1606     // The DefaultVisibility condition is because older GlobalValueSummary does
1607     // not record DefaultVisibility and we don't want to change protected/hidden
1608     // to default.
1609     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1610       GV.setVisibility(GS->second->getVisibility());
1611 
1612     if (NewLinkage == GV.getLinkage())
1613       return;
1614 
1615     // Check for a non-prevailing def that has interposable linkage
1616     // (e.g. non-odr weak or linkonce). In that case we can't simply
1617     // convert to available_externally, since it would lose the
1618     // interposable property and possibly get inlined. Simply drop
1619     // the definition in that case.
1620     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1621         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1622       if (!convertToDeclaration(GV))
1623         // FIXME: Change this to collect replaced GVs and later erase
1624         // them from the parent module once thinLTOResolvePrevailingGUID is
1625         // changed to enable this for aliases.
1626         llvm_unreachable("Expected GV to be converted");
1627     } else {
1628       // If all copies of the original symbol had global unnamed addr and
1629       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1630       // and are constants, then it should be an auto hide symbol. In that case
1631       // the thin link would have marked it as CanAutoHide. Add hidden
1632       // visibility to the symbol to preserve the property.
1633       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1634           GS->second->canAutoHide()) {
1635         assert(GV.canBeOmittedFromSymbolTable());
1636         GV.setVisibility(GlobalValue::HiddenVisibility);
1637       }
1638 
1639       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1640                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1641                         << "\n");
1642       GV.setLinkage(NewLinkage);
1643     }
1644     // Remove declarations from comdats, including available_externally
1645     // as this is a declaration for the linker, and will be dropped eventually.
1646     // It is illegal for comdats to contain declarations.
1647     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1648     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1649       if (GO->getComdat()->getName() == GO->getName())
1650         NonPrevailingComdats.insert(GO->getComdat());
1651       GO->setComdat(nullptr);
1652     }
1653   };
1654 
1655   // Process functions and global now
1656   for (auto &GV : TheModule)
1657     FinalizeInModule(GV, PropagateAttrs);
1658   for (auto &GV : TheModule.globals())
1659     FinalizeInModule(GV);
1660   for (auto &GV : TheModule.aliases())
1661     FinalizeInModule(GV);
1662 
1663   // For a non-prevailing comdat, all its members must be available_externally.
1664   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1665   // local linkage GlobalValues.
1666   if (NonPrevailingComdats.empty())
1667     return;
1668   for (auto &GO : TheModule.global_objects()) {
1669     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1670       GO.setComdat(nullptr);
1671       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1672     }
1673   }
1674   bool Changed;
1675   do {
1676     Changed = false;
1677     // If an alias references a GlobalValue in a non-prevailing comdat, change
1678     // it to available_externally. For simplicity we only handle GlobalValue and
1679     // ConstantExpr with a base object. ConstantExpr without a base object is
1680     // unlikely used in a COMDAT.
1681     for (auto &GA : TheModule.aliases()) {
1682       if (GA.hasAvailableExternallyLinkage())
1683         continue;
1684       GlobalObject *Obj = GA.getAliaseeObject();
1685       assert(Obj && "aliasee without an base object is unimplemented");
1686       if (Obj->hasAvailableExternallyLinkage()) {
1687         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1688         Changed = true;
1689       }
1690     }
1691   } while (Changed);
1692 }
1693 
1694 /// Run internalization on \p TheModule based on symmary analysis.
1695 void llvm::thinLTOInternalizeModule(Module &TheModule,
1696                                     const GVSummaryMapTy &DefinedGlobals) {
1697   // Declare a callback for the internalize pass that will ask for every
1698   // candidate GlobalValue if it can be internalized or not.
1699   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1700     // It may be the case that GV is on a chain of an ifunc, its alias and
1701     // subsequent aliases. In this case, the summary for the value is not
1702     // available.
1703     if (isa<GlobalIFunc>(&GV) ||
1704         (isa<GlobalAlias>(&GV) &&
1705          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1706       return true;
1707 
1708     // Lookup the linkage recorded in the summaries during global analysis.
1709     auto GS = DefinedGlobals.find(GV.getGUID());
1710     if (GS == DefinedGlobals.end()) {
1711       // Must have been promoted (possibly conservatively). Find original
1712       // name so that we can access the correct summary and see if it can
1713       // be internalized again.
1714       // FIXME: Eventually we should control promotion instead of promoting
1715       // and internalizing again.
1716       StringRef OrigName =
1717           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1718       std::string OrigId = GlobalValue::getGlobalIdentifier(
1719           OrigName, GlobalValue::InternalLinkage,
1720           TheModule.getSourceFileName());
1721       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1722       if (GS == DefinedGlobals.end()) {
1723         // Also check the original non-promoted non-globalized name. In some
1724         // cases a preempted weak value is linked in as a local copy because
1725         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1726         // In that case, since it was originally not a local value, it was
1727         // recorded in the index using the original name.
1728         // FIXME: This may not be needed once PR27866 is fixed.
1729         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1730         assert(GS != DefinedGlobals.end());
1731       }
1732     }
1733     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1734   };
1735 
1736   // FIXME: See if we can just internalize directly here via linkage changes
1737   // based on the index, rather than invoking internalizeModule.
1738   internalizeModule(TheModule, MustPreserveGV);
1739 }
1740 
1741 /// Make alias a clone of its aliasee.
1742 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1743   Function *Fn = cast<Function>(GA->getAliaseeObject());
1744 
1745   ValueToValueMapTy VMap;
1746   Function *NewFn = CloneFunction(Fn, VMap);
1747   // Clone should use the original alias's linkage, visibility and name, and we
1748   // ensure all uses of alias instead use the new clone (casted if necessary).
1749   NewFn->setLinkage(GA->getLinkage());
1750   NewFn->setVisibility(GA->getVisibility());
1751   GA->replaceAllUsesWith(NewFn);
1752   NewFn->takeName(GA);
1753   return NewFn;
1754 }
1755 
1756 // Internalize values that we marked with specific attribute
1757 // in processGlobalForThinLTO.
1758 static void internalizeGVsAfterImport(Module &M) {
1759   for (auto &GV : M.globals())
1760     // Skip GVs which have been converted to declarations
1761     // by dropDeadSymbols.
1762     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1763       GV.setLinkage(GlobalValue::InternalLinkage);
1764       GV.setVisibility(GlobalValue::DefaultVisibility);
1765     }
1766 }
1767 
1768 // Automatically import functions in Module \p DestModule based on the summaries
1769 // index.
1770 Expected<bool> FunctionImporter::importFunctions(
1771     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1772   LLVM_DEBUG(dbgs() << "Starting import for Module "
1773                     << DestModule.getModuleIdentifier() << "\n");
1774   unsigned ImportedCount = 0, ImportedGVCount = 0;
1775 
1776   IRMover Mover(DestModule);
1777   // Do the actual import of functions now, one Module at a time
1778   std::set<StringRef> ModuleNameOrderedList;
1779   for (const auto &FunctionsToImportPerModule : ImportList) {
1780     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1781   }
1782 
1783   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1784                            GlobalValue::GUID GUID)
1785       -> std::optional<GlobalValueSummary::ImportKind> {
1786     auto Iter = GUIDToImportType.find(GUID);
1787     if (Iter == GUIDToImportType.end())
1788       return std::nullopt;
1789     return Iter->second;
1790   };
1791 
1792   for (const auto &Name : ModuleNameOrderedList) {
1793     // Get the module for the import
1794     const auto &FunctionsToImportPerModule = ImportList.find(Name);
1795     assert(FunctionsToImportPerModule != ImportList.end());
1796     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1797     if (!SrcModuleOrErr)
1798       return SrcModuleOrErr.takeError();
1799     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1800     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1801            "Context mismatch");
1802 
1803     // If modules were created with lazy metadata loading, materialize it
1804     // now, before linking it (otherwise this will be a noop).
1805     if (Error Err = SrcModule->materializeMetadata())
1806       return std::move(Err);
1807 
1808     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1809 
1810     // Find the globals to import
1811     SetVector<GlobalValue *> GlobalsToImport;
1812     for (Function &F : *SrcModule) {
1813       if (!F.hasName())
1814         continue;
1815       auto GUID = F.getGUID();
1816       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1817 
1818       bool ImportDefinition =
1819           (MaybeImportType &&
1820            (*MaybeImportType == GlobalValueSummary::Definition));
1821 
1822       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1823                         << " importing function"
1824                         << (ImportDefinition
1825                                 ? " definition "
1826                                 : (MaybeImportType ? " declaration " : " "))
1827                         << GUID << " " << F.getName() << " from "
1828                         << SrcModule->getSourceFileName() << "\n");
1829       if (ImportDefinition) {
1830         if (Error Err = F.materialize())
1831           return std::move(Err);
1832         // MemProf should match function's definition and summary,
1833         // 'thinlto_src_module' is needed.
1834         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1835           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1836           // statistics and debugging.
1837           F.setMetadata(
1838               "thinlto_src_module",
1839               MDNode::get(DestModule.getContext(),
1840                           {MDString::get(DestModule.getContext(),
1841                                          SrcModule->getModuleIdentifier())}));
1842           F.setMetadata(
1843               "thinlto_src_file",
1844               MDNode::get(DestModule.getContext(),
1845                           {MDString::get(DestModule.getContext(),
1846                                          SrcModule->getSourceFileName())}));
1847         }
1848         GlobalsToImport.insert(&F);
1849       }
1850     }
1851     for (GlobalVariable &GV : SrcModule->globals()) {
1852       if (!GV.hasName())
1853         continue;
1854       auto GUID = GV.getGUID();
1855       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1856 
1857       bool ImportDefinition =
1858           (MaybeImportType &&
1859            (*MaybeImportType == GlobalValueSummary::Definition));
1860 
1861       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1862                         << " importing global"
1863                         << (ImportDefinition
1864                                 ? " definition "
1865                                 : (MaybeImportType ? " declaration " : " "))
1866                         << GUID << " " << GV.getName() << " from "
1867                         << SrcModule->getSourceFileName() << "\n");
1868       if (ImportDefinition) {
1869         if (Error Err = GV.materialize())
1870           return std::move(Err);
1871         ImportedGVCount += GlobalsToImport.insert(&GV);
1872       }
1873     }
1874     for (GlobalAlias &GA : SrcModule->aliases()) {
1875       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1876         continue;
1877       auto GUID = GA.getGUID();
1878       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1879 
1880       bool ImportDefinition =
1881           (MaybeImportType &&
1882            (*MaybeImportType == GlobalValueSummary::Definition));
1883 
1884       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1885                         << " importing alias"
1886                         << (ImportDefinition
1887                                 ? " definition "
1888                                 : (MaybeImportType ? " declaration " : " "))
1889                         << GUID << " " << GA.getName() << " from "
1890                         << SrcModule->getSourceFileName() << "\n");
1891       if (ImportDefinition) {
1892         if (Error Err = GA.materialize())
1893           return std::move(Err);
1894         // Import alias as a copy of its aliasee.
1895         GlobalObject *GO = GA.getAliaseeObject();
1896         if (Error Err = GO->materialize())
1897           return std::move(Err);
1898         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1899         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1900                           << GO->getName() << " from "
1901                           << SrcModule->getSourceFileName() << "\n");
1902         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1903           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1904           // statistics and debugging.
1905           Fn->setMetadata(
1906               "thinlto_src_module",
1907               MDNode::get(DestModule.getContext(),
1908                           {MDString::get(DestModule.getContext(),
1909                                          SrcModule->getModuleIdentifier())}));
1910           Fn->setMetadata(
1911               "thinlto_src_file",
1912               MDNode::get(DestModule.getContext(),
1913                           {MDString::get(DestModule.getContext(),
1914                                          SrcModule->getSourceFileName())}));
1915         }
1916         GlobalsToImport.insert(Fn);
1917       }
1918     }
1919 
1920     // Upgrade debug info after we're done materializing all the globals and we
1921     // have loaded all the required metadata!
1922     UpgradeDebugInfo(*SrcModule);
1923 
1924     // Set the partial sample profile ratio in the profile summary module flag
1925     // of the imported source module, if applicable, so that the profile summary
1926     // module flag will match with that of the destination module when it's
1927     // imported.
1928     SrcModule->setPartialSampleProfileRatio(Index);
1929 
1930     // Link in the specified functions.
1931     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1932                                &GlobalsToImport))
1933       return true;
1934 
1935     if (PrintImports) {
1936       for (const auto *GV : GlobalsToImport)
1937         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1938                << " from " << SrcModule->getSourceFileName() << "\n";
1939     }
1940 
1941     if (Error Err = Mover.move(std::move(SrcModule),
1942                                GlobalsToImport.getArrayRef(), nullptr,
1943                                /*IsPerformingImport=*/true))
1944       return createStringError(errc::invalid_argument,
1945                                Twine("Function Import: link error: ") +
1946                                    toString(std::move(Err)));
1947 
1948     ImportedCount += GlobalsToImport.size();
1949     NumImportedModules++;
1950   }
1951 
1952   internalizeGVsAfterImport(DestModule);
1953 
1954   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1955   NumImportedGlobalVars += ImportedGVCount;
1956 
1957   // TODO: Print counters for definitions and declarations in the debugging log.
1958   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1959                     << " functions for Module "
1960                     << DestModule.getModuleIdentifier() << "\n");
1961   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1962                     << " global variables for Module "
1963                     << DestModule.getModuleIdentifier() << "\n");
1964   return ImportedCount;
1965 }
1966 
1967 static bool doImportingForModuleForTest(
1968     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1969                    isPrevailing) {
1970   if (SummaryFile.empty())
1971     report_fatal_error("error: -function-import requires -summary-file\n");
1972   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1973       getModuleSummaryIndexForFile(SummaryFile);
1974   if (!IndexPtrOrErr) {
1975     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1976                           "Error loading file '" + SummaryFile + "': ");
1977     return false;
1978   }
1979   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1980 
1981   // First step is collecting the import list.
1982   FunctionImporter::ImportMapTy ImportList;
1983   // If requested, simply import all functions in the index. This is used
1984   // when testing distributed backend handling via the opt tool, when
1985   // we have distributed indexes containing exactly the summaries to import.
1986   if (ImportAllIndex)
1987     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1988                                                       *Index, ImportList);
1989   else
1990     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1991                                              isPrevailing, *Index, ImportList);
1992 
1993   // Conservatively mark all internal values as promoted. This interface is
1994   // only used when doing importing via the function importing pass. The pass
1995   // is only enabled when testing importing via the 'opt' tool, which does
1996   // not do the ThinLink that would normally determine what values to promote.
1997   for (auto &I : *Index) {
1998     for (auto &S : I.second.SummaryList) {
1999       if (GlobalValue::isLocalLinkage(S->linkage()))
2000         S->setLinkage(GlobalValue::ExternalLinkage);
2001     }
2002   }
2003 
2004   // Next we need to promote to global scope and rename any local values that
2005   // are potentially exported to other modules.
2006   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
2007                              /*GlobalsToImport=*/nullptr)) {
2008     errs() << "Error renaming module\n";
2009     return true;
2010   }
2011 
2012   // Perform the import now.
2013   auto ModuleLoader = [&M](StringRef Identifier) {
2014     return loadFile(std::string(Identifier), M.getContext());
2015   };
2016   FunctionImporter Importer(*Index, ModuleLoader,
2017                             /*ClearDSOLocalOnDeclarations=*/false);
2018   Expected<bool> Result = Importer.importFunctions(M, ImportList);
2019 
2020   // FIXME: Probably need to propagate Errors through the pass manager.
2021   if (!Result) {
2022     logAllUnhandledErrors(Result.takeError(), errs(),
2023                           "Error importing module: ");
2024     return true;
2025   }
2026 
2027   return true;
2028 }
2029 
2030 PreservedAnalyses FunctionImportPass::run(Module &M,
2031                                           ModuleAnalysisManager &AM) {
2032   // This is only used for testing the function import pass via opt, where we
2033   // don't have prevailing information from the LTO context available, so just
2034   // conservatively assume everything is prevailing (which is fine for the very
2035   // limited use of prevailing checking in this pass).
2036   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
2037     return true;
2038   };
2039   if (!doImportingForModuleForTest(M, isPrevailing))
2040     return PreservedAnalyses::all();
2041 
2042   return PreservedAnalyses::none();
2043 }
2044