xref: /llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision 51a3bc12176ab46f3d2ce6ad4aa26af088d3cf14)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalAlias.h"
24 #include "llvm/IR/GlobalObject.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/ModuleSummaryIndex.h"
30 #include "llvm/IRReader/IRReader.h"
31 #include "llvm/Linker/IRMover.h"
32 #include "llvm/ProfileData/PGOCtxProfReader.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module' and "
129              "'thinlto_src_file'"));
130 
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134     SummaryFile("summary-file",
135                 cl::desc("The summary file to use for function importing."));
136 
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140     ImportAllIndex("import-all-index",
141                    cl::desc("Import all external functions in index."));
142 
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150     "import-declaration", cl::init(false), cl::Hidden,
151     cl::desc("If true, import function declaration as fallback if the function "
152              "definition is not imported."));
153 
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163     "thinlto-workload-def",
164     cl::desc("Pass a workload definition. This is a file containing a JSON "
165              "dictionary. The keys are root functions, the values are lists of "
166              "functions to import in the module defining the root. It is "
167              "assumed -funique-internal-linkage-names was used, to ensure "
168              "local linkage functions have unique names. For example: \n"
169              "{\n"
170              "  \"rootFunction_1\": [\"function_to_import_1\", "
171              "\"function_to_import_2\"], \n"
172              "  \"rootFunction_2\": [\"function_to_import_3\", "
173              "\"function_to_import_4\"] \n"
174              "}"),
175     cl::Hidden);
176 
177 static cl::opt<std::string>
178     ContextualProfile("thinlto-pgo-ctx-prof",
179                       cl::desc("Path to a contextual profile."), cl::Hidden);
180 
181 namespace llvm {
182 extern cl::opt<bool> EnableMemProfContextDisambiguation;
183 }
184 
185 // Load lazily a module from \p FileName in \p Context.
186 static std::unique_ptr<Module> loadFile(const std::string &FileName,
187                                         LLVMContext &Context) {
188   SMDiagnostic Err;
189   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
190   // Metadata isn't loaded until functions are imported, to minimize
191   // the memory overhead.
192   std::unique_ptr<Module> Result =
193       getLazyIRFileModule(FileName, Err, Context,
194                           /* ShouldLazyLoadMetadata = */ true);
195   if (!Result) {
196     Err.print("function-import", errs());
197     report_fatal_error("Abort");
198   }
199 
200   return Result;
201 }
202 
203 static bool shouldSkipLocalInAnotherModule(const GlobalValueSummary *RefSummary,
204                                            size_t NumDefs,
205                                            StringRef ImporterModule) {
206   // We can import a local when there is one definition.
207   if (NumDefs == 1)
208     return false;
209   // In other cases, make sure we import the copy in the caller's module if the
210   // referenced value has local linkage. The only time a local variable can
211   // share an entry in the index is if there is a local with the same name in
212   // another module that had the same source file name (in a different
213   // directory), where each was compiled in their own directory so there was not
214   // distinguishing path.
215   return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
216          RefSummary->modulePath() != ImporterModule;
217 }
218 
219 /// Given a list of possible callee implementation for a call site, qualify the
220 /// legality of importing each. The return is a range of pairs. Each pair
221 /// corresponds to a candidate. The first value is the ImportFailureReason for
222 /// that candidate, the second is the candidate.
223 static auto qualifyCalleeCandidates(
224     const ModuleSummaryIndex &Index,
225     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
226     StringRef CallerModulePath) {
227   return llvm::map_range(
228       CalleeSummaryList,
229       [&Index, CalleeSummaryList,
230        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
231           -> std::pair<FunctionImporter::ImportFailureReason,
232                        const GlobalValueSummary *> {
233         auto *GVSummary = SummaryPtr.get();
234         if (!Index.isGlobalValueLive(GVSummary))
235           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
236 
237         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
238           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
239                   GVSummary};
240 
241         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
242 
243         // Ignore any callees that aren't actually functions. This could happen
244         // in the case of GUID hash collisions. It could also happen in theory
245         // for SamplePGO profiles collected on old versions of the code after
246         // renaming, since we synthesize edges to any inlined callees appearing
247         // in the profile.
248         if (!Summary)
249           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
250 
251         // If this is a local function, make sure we import the copy in the
252         // caller's module. The only time a local function can share an entry in
253         // the index is if there is a local with the same name in another module
254         // that had the same source file name (in a different directory), where
255         // each was compiled in their own directory so there was not
256         // distinguishing path.
257         // If the local function is from another module, it must be a reference
258         // due to indirect call profile data since a function pointer can point
259         // to a local in another module. Do the import from another module if
260         // there is only one entry in the list or when all files in the program
261         // are compiled with full path - in both cases the local function has
262         // unique PGO name and GUID.
263         if (shouldSkipLocalInAnotherModule(Summary, CalleeSummaryList.size(),
264                                            CallerModulePath))
265           return {
266               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
267               GVSummary};
268 
269         // Skip if it isn't legal to import (e.g. may reference unpromotable
270         // locals).
271         if (Summary->notEligibleToImport())
272           return {FunctionImporter::ImportFailureReason::NotEligible,
273                   GVSummary};
274 
275         return {FunctionImporter::ImportFailureReason::None, GVSummary};
276       });
277 }
278 
279 /// Given a list of possible callee implementation for a call site, select one
280 /// that fits the \p Threshold for function definition import. If none are
281 /// found, the Reason will give the last reason for the failure (last, in the
282 /// order of CalleeSummaryList entries). While looking for a callee definition,
283 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
284 /// candidate; other modules may want to know the function summary or
285 /// declaration even if a definition is not needed.
286 ///
287 /// FIXME: select "best" instead of first that fits. But what is "best"?
288 /// - The smallest: more likely to be inlined.
289 /// - The one with the least outgoing edges (already well optimized).
290 /// - One from a module already being imported from in order to reduce the
291 ///   number of source modules parsed/linked.
292 /// - One that has PGO data attached.
293 /// - [insert you fancy metric here]
294 static const GlobalValueSummary *
295 selectCallee(const ModuleSummaryIndex &Index,
296              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
297              unsigned Threshold, StringRef CallerModulePath,
298              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
299              FunctionImporter::ImportFailureReason &Reason) {
300   // Records the last summary with reason noinline or too-large.
301   TooLargeOrNoInlineSummary = nullptr;
302   auto QualifiedCandidates =
303       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
304   for (auto QualifiedValue : QualifiedCandidates) {
305     Reason = QualifiedValue.first;
306     // Skip a summary if its import is not (proved to be) legal.
307     if (Reason != FunctionImporter::ImportFailureReason::None)
308       continue;
309     auto *Summary =
310         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
311 
312     // Don't bother importing the definition if the chance of inlining it is
313     // not high enough (except under `--force-import-all`).
314     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
315         !ForceImportAll) {
316       TooLargeOrNoInlineSummary = Summary;
317       Reason = FunctionImporter::ImportFailureReason::TooLarge;
318       continue;
319     }
320 
321     // Don't bother importing the definition if we can't inline it anyway.
322     if (Summary->fflags().NoInline && !ForceImportAll) {
323       TooLargeOrNoInlineSummary = Summary;
324       Reason = FunctionImporter::ImportFailureReason::NoInline;
325       continue;
326     }
327 
328     return Summary;
329   }
330   return nullptr;
331 }
332 
333 namespace {
334 
335 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
336 
337 } // anonymous namespace
338 
339 /// Import globals referenced by a function or other globals that are being
340 /// imported, if importing such global is possible.
341 class GlobalsImporter final {
342   const ModuleSummaryIndex &Index;
343   const GVSummaryMapTy &DefinedGVSummaries;
344   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
345       IsPrevailing;
346   FunctionImporter::ImportMapTy &ImportList;
347   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
348 
349   bool shouldImportGlobal(const ValueInfo &VI) {
350     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
351     if (GVS == DefinedGVSummaries.end())
352       return true;
353     // We should not skip import if the module contains a non-prevailing
354     // definition with interposable linkage type. This is required for
355     // correctness in the situation where there is a prevailing def available
356     // for import and marked read-only. In this case, the non-prevailing def
357     // will be converted to a declaration, while the prevailing one becomes
358     // internal, thus no definitions will be available for linking. In order to
359     // prevent undefined symbol link error, the prevailing definition must be
360     // imported.
361     // FIXME: Consider adding a check that the suitable prevailing definition
362     // exists and marked read-only.
363     if (VI.getSummaryList().size() > 1 &&
364         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
365         !IsPrevailing(VI.getGUID(), GVS->second))
366       return true;
367 
368     return false;
369   }
370 
371   void
372   onImportingSummaryImpl(const GlobalValueSummary &Summary,
373                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
374     for (const auto &VI : Summary.refs()) {
375       if (!shouldImportGlobal(VI)) {
376         LLVM_DEBUG(
377             dbgs() << "Ref ignored! Target already in destination module.\n");
378         continue;
379       }
380 
381       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
382 
383       for (const auto &RefSummary : VI.getSummaryList()) {
384         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
385         // Functions could be referenced by global vars - e.g. a vtable; but we
386         // don't currently imagine a reason those would be imported here, rather
387         // than as part of the logic deciding which functions to import (i.e.
388         // based on profile information). Should we decide to handle them here,
389         // we can refactor accordingly at that time.
390         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
391             shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
392                                            Summary.modulePath()))
393           continue;
394 
395         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
396         // Otherwise, definition should take precedence over declaration.
397         auto [Iter, Inserted] =
398             ImportList[RefSummary->modulePath()].try_emplace(
399                 VI.getGUID(), GlobalValueSummary::Definition);
400         // Only update stat and exports if we haven't already imported this
401         // variable.
402         if (!Inserted) {
403           // Set the value to 'std::min(existing-value, new-value)' to make
404           // sure a definition takes precedence over a declaration.
405           Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
406           break;
407         }
408         NumImportedGlobalVarsThinLink++;
409         // Any references made by this variable will be marked exported
410         // later, in ComputeCrossModuleImport, after import decisions are
411         // complete, which is more efficient than adding them here.
412         if (ExportLists)
413           (*ExportLists)[RefSummary->modulePath()].insert(VI);
414 
415         // If variable is not writeonly we attempt to recursively analyze
416         // its references in order to import referenced constants.
417         if (!Index.isWriteOnly(GVS))
418           Worklist.emplace_back(GVS);
419         break;
420       }
421     }
422   }
423 
424 public:
425   GlobalsImporter(
426       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
427       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
428           IsPrevailing,
429       FunctionImporter::ImportMapTy &ImportList,
430       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
431       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
432         IsPrevailing(IsPrevailing), ImportList(ImportList),
433         ExportLists(ExportLists) {}
434 
435   void onImportingSummary(const GlobalValueSummary &Summary) {
436     SmallVector<const GlobalVarSummary *, 128> Worklist;
437     onImportingSummaryImpl(Summary, Worklist);
438     while (!Worklist.empty())
439       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
440   }
441 };
442 
443 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
444 
445 /// Determine the list of imports and exports for each module.
446 class ModuleImportsManager {
447 protected:
448   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
449       IsPrevailing;
450   const ModuleSummaryIndex &Index;
451   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
452 
453   ModuleImportsManager(
454       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
455           IsPrevailing,
456       const ModuleSummaryIndex &Index,
457       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
458       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
459 
460 public:
461   virtual ~ModuleImportsManager() = default;
462 
463   /// Given the list of globals defined in a module, compute the list of imports
464   /// as well as the list of "exports", i.e. the list of symbols referenced from
465   /// another module (that may require promotion).
466   virtual void
467   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
468                          StringRef ModName,
469                          FunctionImporter::ImportMapTy &ImportList);
470 
471   static std::unique_ptr<ModuleImportsManager>
472   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
473              IsPrevailing,
474          const ModuleSummaryIndex &Index,
475          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
476              nullptr);
477 };
478 
479 /// A ModuleImportsManager that operates based on a workload definition (see
480 /// -thinlto-workload-def). For modules that do not define workload roots, it
481 /// applies the base ModuleImportsManager import policy.
482 class WorkloadImportsManager : public ModuleImportsManager {
483   // Keep a module name -> value infos to import association. We use it to
484   // determine if a module's import list should be done by the base
485   // ModuleImportsManager or by us.
486   StringMap<DenseSet<ValueInfo>> Workloads;
487 
488   void
489   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
490                          StringRef ModName,
491                          FunctionImporter::ImportMapTy &ImportList) override {
492     auto SetIter = Workloads.find(ModName);
493     if (SetIter == Workloads.end()) {
494       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
495                         << " does not contain the root of any context.\n");
496       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
497                                                           ModName, ImportList);
498     }
499     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
500                       << " contains the root(s) of context(s).\n");
501 
502     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
503                         ExportLists);
504     auto &ValueInfos = SetIter->second;
505     SmallVector<EdgeInfo, 128> GlobWorklist;
506     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
507       auto It = DefinedGVSummaries.find(VI.getGUID());
508       if (It != DefinedGVSummaries.end() &&
509           IsPrevailing(VI.getGUID(), It->second)) {
510         LLVM_DEBUG(
511             dbgs() << "[Workload] " << VI.name()
512                    << " has the prevailing variant already in the module "
513                    << ModName << ". No need to import\n");
514         continue;
515       }
516       auto Candidates =
517           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
518 
519       const GlobalValueSummary *GVS = nullptr;
520       auto PotentialCandidates = llvm::map_range(
521           llvm::make_filter_range(
522               Candidates,
523               [&](const auto &Candidate) {
524                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
525                                   << " from " << Candidate.second->modulePath()
526                                   << " ImportFailureReason: "
527                                   << getFailureName(Candidate.first) << "\n");
528                 return Candidate.first ==
529                         FunctionImporter::ImportFailureReason::None;
530               }),
531           [](const auto &Candidate) { return Candidate.second; });
532       if (PotentialCandidates.empty()) {
533         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
534                           << " because can't find eligible Callee. Guid is: "
535                           << Function::getGUID(VI.name()) << "\n");
536         continue;
537       }
538       /// We will prefer importing the prevailing candidate, if not, we'll
539       /// still pick the first available candidate. The reason we want to make
540       /// sure we do import the prevailing candidate is because the goal of
541       /// workload-awareness is to enable optimizations specializing the call
542       /// graph of that workload. Suppose a function is already defined in the
543       /// module, but it's not the prevailing variant. Suppose also we do not
544       /// inline it (in fact, if it were interposable, we can't inline it),
545       /// but we could specialize it to the workload in other ways. However,
546       /// the linker would drop it in the favor of the prevailing copy.
547       /// Instead, by importing the prevailing variant (assuming also the use
548       /// of `-avail-extern-to-local`), we keep the specialization. We could
549       /// alteranatively make the non-prevailing variant local, but the
550       /// prevailing one is also the one for which we would have previously
551       /// collected profiles, making it preferrable.
552       auto PrevailingCandidates = llvm::make_filter_range(
553           PotentialCandidates, [&](const auto *Candidate) {
554             return IsPrevailing(VI.getGUID(), Candidate);
555           });
556       if (PrevailingCandidates.empty()) {
557         GVS = *PotentialCandidates.begin();
558         if (!llvm::hasSingleElement(PotentialCandidates) &&
559             GlobalValue::isLocalLinkage(GVS->linkage()))
560           LLVM_DEBUG(
561               dbgs()
562               << "[Workload] Found multiple non-prevailing candidates for "
563               << VI.name()
564               << ". This is unexpected. Are module paths passed to the "
565                  "compiler unique for the modules passed to the linker?");
566         // We could in theory have multiple (interposable) copies of a symbol
567         // when there is no prevailing candidate, if say the prevailing copy was
568         // in a native object being linked in. However, we should in theory be
569         // marking all of these non-prevailing IR copies dead in that case, in
570         // which case they won't be candidates.
571         assert(GVS->isLive());
572       } else {
573         assert(llvm::hasSingleElement(PrevailingCandidates));
574         GVS = *PrevailingCandidates.begin();
575       }
576 
577       auto ExportingModule = GVS->modulePath();
578       // We checked that for the prevailing case, but if we happen to have for
579       // example an internal that's defined in this module, it'd have no
580       // PrevailingCandidates.
581       if (ExportingModule == ModName) {
582         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
583                           << " because its defining module is the same as the "
584                              "current module\n");
585         continue;
586       }
587       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
588                         << ExportingModule << " : "
589                         << Function::getGUID(VI.name()) << "\n");
590       ImportList[ExportingModule][VI.getGUID()] =
591           GlobalValueSummary::Definition;
592       GVI.onImportingSummary(*GVS);
593       if (ExportLists)
594         (*ExportLists)[ExportingModule].insert(VI);
595     }
596     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
597   }
598 
599   void loadFromJson() {
600     // Since the workload def uses names, we need a quick lookup
601     // name->ValueInfo.
602     StringMap<ValueInfo> NameToValueInfo;
603     StringSet<> AmbiguousNames;
604     for (auto &I : Index) {
605       ValueInfo VI = Index.getValueInfo(I);
606       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
607         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
608     }
609     auto DbgReportIfAmbiguous = [&](StringRef Name) {
610       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
611         dbgs() << "[Workload] Function name " << Name
612                << " present in the workload definition is ambiguous. Consider "
613                   "compiling with -funique-internal-linkage-names.";
614       });
615     };
616     std::error_code EC;
617     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
618     if (std::error_code EC = BufferOrErr.getError()) {
619       report_fatal_error("Failed to open context file");
620       return;
621     }
622     auto Buffer = std::move(BufferOrErr.get());
623     std::map<std::string, std::vector<std::string>> WorkloadDefs;
624     json::Path::Root NullRoot;
625     // The JSON is supposed to contain a dictionary matching the type of
626     // WorkloadDefs. For example:
627     // {
628     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
629     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
630     // }
631     auto Parsed = json::parse(Buffer->getBuffer());
632     if (!Parsed)
633       report_fatal_error(Parsed.takeError());
634     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
635       report_fatal_error("Invalid thinlto contextual profile format.");
636     for (const auto &Workload : WorkloadDefs) {
637       const auto &Root = Workload.first;
638       DbgReportIfAmbiguous(Root);
639       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
640       const auto &AllCallees = Workload.second;
641       auto RootIt = NameToValueInfo.find(Root);
642       if (RootIt == NameToValueInfo.end()) {
643         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
644                           << " not found in this linkage unit.\n");
645         continue;
646       }
647       auto RootVI = RootIt->second;
648       if (RootVI.getSummaryList().size() != 1) {
649         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
650                           << " should have exactly one summary, but has "
651                           << RootVI.getSummaryList().size() << ". Skipping.\n");
652         continue;
653       }
654       StringRef RootDefiningModule =
655           RootVI.getSummaryList().front()->modulePath();
656       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
657                         << " is : " << RootDefiningModule << "\n");
658       auto &Set = Workloads[RootDefiningModule];
659       for (const auto &Callee : AllCallees) {
660         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
661         DbgReportIfAmbiguous(Callee);
662         auto ElemIt = NameToValueInfo.find(Callee);
663         if (ElemIt == NameToValueInfo.end()) {
664           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
665           continue;
666         }
667         Set.insert(ElemIt->second);
668       }
669     }
670   }
671 
672   void loadFromCtxProf() {
673     std::error_code EC;
674     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ContextualProfile);
675     if (std::error_code EC = BufferOrErr.getError()) {
676       report_fatal_error("Failed to open contextual profile file");
677       return;
678     }
679     auto Buffer = std::move(BufferOrErr.get());
680 
681     PGOCtxProfileReader Reader(Buffer->getBuffer());
682     auto Ctx = Reader.loadContexts();
683     if (!Ctx) {
684       report_fatal_error("Failed to parse contextual profiles");
685       return;
686     }
687     const auto &CtxMap = *Ctx;
688     DenseSet<GlobalValue::GUID> ContainedGUIDs;
689     for (const auto &[RootGuid, Root] : CtxMap) {
690       // Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for
691       // subsequent roots, but clear its contents.
692       ContainedGUIDs.clear();
693 
694       auto RootVI = Index.getValueInfo(RootGuid);
695       if (!RootVI) {
696         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
697                           << " not found in this linkage unit.\n");
698         continue;
699       }
700       if (RootVI.getSummaryList().size() != 1) {
701         LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
702                           << " should have exactly one summary, but has "
703                           << RootVI.getSummaryList().size() << ". Skipping.\n");
704         continue;
705       }
706       StringRef RootDefiningModule =
707           RootVI.getSummaryList().front()->modulePath();
708       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid
709                         << " is : " << RootDefiningModule << "\n");
710       auto &Set = Workloads[RootDefiningModule];
711       Root.getContainedGuids(ContainedGUIDs);
712       for (auto Guid : ContainedGUIDs)
713         if (auto VI = Index.getValueInfo(Guid))
714           Set.insert(VI);
715     }
716   }
717 
718 public:
719   WorkloadImportsManager(
720       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
721           IsPrevailing,
722       const ModuleSummaryIndex &Index,
723       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
724       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
725     if (ContextualProfile.empty() == WorkloadDefinitions.empty()) {
726       report_fatal_error(
727           "Pass only one of: -thinlto-pgo-ctx-prof or -thinlto-workload-def");
728       return;
729     }
730     if (!ContextualProfile.empty())
731       loadFromCtxProf();
732     else
733       loadFromJson();
734     LLVM_DEBUG({
735       for (const auto &[Root, Set] : Workloads) {
736         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
737                << " distinct callees.\n";
738         for (const auto &VI : Set) {
739           dbgs() << "[Workload] Root: " << Root
740                  << " Would include: " << VI.getGUID() << "\n";
741         }
742       }
743     });
744   }
745 };
746 
747 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
748     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
749         IsPrevailing,
750     const ModuleSummaryIndex &Index,
751     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
752   if (WorkloadDefinitions.empty() && ContextualProfile.empty()) {
753     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
754     return std::unique_ptr<ModuleImportsManager>(
755         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
756   }
757   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
758   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
759                                                   ExportLists);
760 }
761 
762 static const char *
763 getFailureName(FunctionImporter::ImportFailureReason Reason) {
764   switch (Reason) {
765   case FunctionImporter::ImportFailureReason::None:
766     return "None";
767   case FunctionImporter::ImportFailureReason::GlobalVar:
768     return "GlobalVar";
769   case FunctionImporter::ImportFailureReason::NotLive:
770     return "NotLive";
771   case FunctionImporter::ImportFailureReason::TooLarge:
772     return "TooLarge";
773   case FunctionImporter::ImportFailureReason::InterposableLinkage:
774     return "InterposableLinkage";
775   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
776     return "LocalLinkageNotInModule";
777   case FunctionImporter::ImportFailureReason::NotEligible:
778     return "NotEligible";
779   case FunctionImporter::ImportFailureReason::NoInline:
780     return "NoInline";
781   }
782   llvm_unreachable("invalid reason");
783 }
784 
785 /// Compute the list of functions to import for a given caller. Mark these
786 /// imported functions and the symbols they reference in their source module as
787 /// exported from their source module.
788 static void computeImportForFunction(
789     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
790     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
791     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
792         isPrevailing,
793     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
794     FunctionImporter::ImportMapTy &ImportList,
795     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
796     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
797   GVImporter.onImportingSummary(Summary);
798   static int ImportCount = 0;
799   for (const auto &Edge : Summary.calls()) {
800     ValueInfo VI = Edge.first;
801     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
802                       << "\n");
803 
804     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
805       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
806                         << " reached.\n");
807       continue;
808     }
809 
810     if (DefinedGVSummaries.count(VI.getGUID())) {
811       // FIXME: Consider not skipping import if the module contains
812       // a non-prevailing def with interposable linkage. The prevailing copy
813       // can safely be imported (see shouldImportGlobal()).
814       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
815       continue;
816     }
817 
818     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
819       if (Hotness == CalleeInfo::HotnessType::Hot)
820         return ImportHotMultiplier;
821       if (Hotness == CalleeInfo::HotnessType::Cold)
822         return ImportColdMultiplier;
823       if (Hotness == CalleeInfo::HotnessType::Critical)
824         return ImportCriticalMultiplier;
825       return 1.0;
826     };
827 
828     const auto NewThreshold =
829         Threshold * GetBonusMultiplier(Edge.second.getHotness());
830 
831     auto IT = ImportThresholds.insert(std::make_pair(
832         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
833     bool PreviouslyVisited = !IT.second;
834     auto &ProcessedThreshold = std::get<0>(IT.first->second);
835     auto &CalleeSummary = std::get<1>(IT.first->second);
836     auto &FailureInfo = std::get<2>(IT.first->second);
837 
838     bool IsHotCallsite =
839         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
840     bool IsCriticalCallsite =
841         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
842 
843     const FunctionSummary *ResolvedCalleeSummary = nullptr;
844     if (CalleeSummary) {
845       assert(PreviouslyVisited);
846       // Since the traversal of the call graph is DFS, we can revisit a function
847       // a second time with a higher threshold. In this case, it is added back
848       // to the worklist with the new threshold (so that its own callee chains
849       // can be considered with the higher threshold).
850       if (NewThreshold <= ProcessedThreshold) {
851         LLVM_DEBUG(
852             dbgs() << "ignored! Target was already imported with Threshold "
853                    << ProcessedThreshold << "\n");
854         continue;
855       }
856       // Update with new larger threshold.
857       ProcessedThreshold = NewThreshold;
858       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
859     } else {
860       // If we already rejected importing a callee at the same or higher
861       // threshold, don't waste time calling selectCallee.
862       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
863         LLVM_DEBUG(
864             dbgs() << "ignored! Target was already rejected with Threshold "
865             << ProcessedThreshold << "\n");
866         if (PrintImportFailures) {
867           assert(FailureInfo &&
868                  "Expected FailureInfo for previously rejected candidate");
869           FailureInfo->Attempts++;
870         }
871         continue;
872       }
873 
874       FunctionImporter::ImportFailureReason Reason{};
875 
876       // `SummaryForDeclImport` is an summary eligible for declaration import.
877       const GlobalValueSummary *SummaryForDeclImport = nullptr;
878       CalleeSummary =
879           selectCallee(Index, VI.getSummaryList(), NewThreshold,
880                        Summary.modulePath(), SummaryForDeclImport, Reason);
881       if (!CalleeSummary) {
882         // There isn't a callee for definition import but one for declaration
883         // import.
884         if (ImportDeclaration && SummaryForDeclImport) {
885           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
886 
887           // Since definition takes precedence over declaration for the same VI,
888           // try emplace <VI, declaration> pair without checking insert result.
889           // If insert doesn't happen, there must be an existing entry keyed by
890           // VI. Note `ExportLists` only keeps track of exports due to imported
891           // definitions.
892           ImportList[DeclSourceModule].try_emplace(
893               VI.getGUID(), GlobalValueSummary::Declaration);
894         }
895         // Update with new larger threshold if this was a retry (otherwise
896         // we would have already inserted with NewThreshold above). Also
897         // update failure info if requested.
898         if (PreviouslyVisited) {
899           ProcessedThreshold = NewThreshold;
900           if (PrintImportFailures) {
901             assert(FailureInfo &&
902                    "Expected FailureInfo for previously rejected candidate");
903             FailureInfo->Reason = Reason;
904             FailureInfo->Attempts++;
905             FailureInfo->MaxHotness =
906                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
907           }
908         } else if (PrintImportFailures) {
909           assert(!FailureInfo &&
910                  "Expected no FailureInfo for newly rejected candidate");
911           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
912               VI, Edge.second.getHotness(), Reason, 1);
913         }
914         if (ForceImportAll) {
915           std::string Msg = std::string("Failed to import function ") +
916                             VI.name().str() + " due to " +
917                             getFailureName(Reason);
918           auto Error = make_error<StringError>(
919               Msg, make_error_code(errc::not_supported));
920           logAllUnhandledErrors(std::move(Error), errs(),
921                                 "Error importing module: ");
922           break;
923         } else {
924           LLVM_DEBUG(dbgs()
925                      << "ignored! No qualifying callee with summary found.\n");
926           continue;
927         }
928       }
929 
930       // "Resolve" the summary
931       CalleeSummary = CalleeSummary->getBaseObject();
932       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
933 
934       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
935               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
936              "selectCallee() didn't honor the threshold");
937 
938       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
939 
940       // Try emplace the definition entry, and update stats based on insertion
941       // status.
942       auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
943           VI.getGUID(), GlobalValueSummary::Definition);
944 
945       // We previously decided to import this GUID definition if it was already
946       // inserted in the set of imports from the exporting module.
947       if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
948         NumImportedFunctionsThinLink++;
949         if (IsHotCallsite)
950           NumImportedHotFunctionsThinLink++;
951         if (IsCriticalCallsite)
952           NumImportedCriticalFunctionsThinLink++;
953       }
954 
955       if (Iter->second == GlobalValueSummary::Declaration)
956         Iter->second = GlobalValueSummary::Definition;
957 
958       // Any calls/references made by this function will be marked exported
959       // later, in ComputeCrossModuleImport, after import decisions are
960       // complete, which is more efficient than adding them here.
961       if (ExportLists)
962         (*ExportLists)[ExportModulePath].insert(VI);
963     }
964 
965     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
966       // Adjust the threshold for next level of imported functions.
967       // The threshold is different for hot callsites because we can then
968       // inline chains of hot calls.
969       if (IsHotCallsite)
970         return Threshold * ImportHotInstrFactor;
971       return Threshold * ImportInstrFactor;
972     };
973 
974     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
975 
976     ImportCount++;
977 
978     // Insert the newly imported function to the worklist.
979     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
980   }
981 }
982 
983 void ModuleImportsManager::computeImportForModule(
984     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
985     FunctionImporter::ImportMapTy &ImportList) {
986   // Worklist contains the list of function imported in this module, for which
987   // we will analyse the callees and may import further down the callgraph.
988   SmallVector<EdgeInfo, 128> Worklist;
989   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
990                       ExportLists);
991   FunctionImporter::ImportThresholdsTy ImportThresholds;
992 
993   // Populate the worklist with the import for the functions in the current
994   // module
995   for (const auto &GVSummary : DefinedGVSummaries) {
996 #ifndef NDEBUG
997     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
998     // so this map look up (and possibly others) can be avoided.
999     auto VI = Index.getValueInfo(GVSummary.first);
1000 #endif
1001     if (!Index.isGlobalValueLive(GVSummary.second)) {
1002       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
1003       continue;
1004     }
1005     auto *FuncSummary =
1006         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
1007     if (!FuncSummary)
1008       // Skip import for global variables
1009       continue;
1010     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
1011     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
1012                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
1013                              ImportList, ExportLists, ImportThresholds);
1014   }
1015 
1016   // Process the newly imported functions and add callees to the worklist.
1017   while (!Worklist.empty()) {
1018     auto GVInfo = Worklist.pop_back_val();
1019     auto *Summary = std::get<0>(GVInfo);
1020     auto Threshold = std::get<1>(GVInfo);
1021 
1022     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
1023       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
1024                                IsPrevailing, Worklist, GVI, ImportList,
1025                                ExportLists, ImportThresholds);
1026   }
1027 
1028   // Print stats about functions considered but rejected for importing
1029   // when requested.
1030   if (PrintImportFailures) {
1031     dbgs() << "Missed imports into module " << ModName << "\n";
1032     for (auto &I : ImportThresholds) {
1033       auto &ProcessedThreshold = std::get<0>(I.second);
1034       auto &CalleeSummary = std::get<1>(I.second);
1035       auto &FailureInfo = std::get<2>(I.second);
1036       if (CalleeSummary)
1037         continue; // We are going to import.
1038       assert(FailureInfo);
1039       FunctionSummary *FS = nullptr;
1040       if (!FailureInfo->VI.getSummaryList().empty())
1041         FS = dyn_cast<FunctionSummary>(
1042             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
1043       dbgs() << FailureInfo->VI
1044              << ": Reason = " << getFailureName(FailureInfo->Reason)
1045              << ", Threshold = " << ProcessedThreshold
1046              << ", Size = " << (FS ? (int)FS->instCount() : -1)
1047              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
1048              << ", Attempts = " << FailureInfo->Attempts << "\n";
1049     }
1050   }
1051 }
1052 
1053 #ifndef NDEBUG
1054 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
1055   auto SL = VI.getSummaryList();
1056   return SL.empty()
1057              ? false
1058              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
1059 }
1060 
1061 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
1062                                GlobalValue::GUID G) {
1063   if (const auto &VI = Index.getValueInfo(G))
1064     return isGlobalVarSummary(Index, VI);
1065   return false;
1066 }
1067 
1068 // Return the number of global variable summaries in ExportSet.
1069 static unsigned
1070 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1071                       FunctionImporter::ExportSetTy &ExportSet) {
1072   unsigned NumGVS = 0;
1073   for (auto &VI : ExportSet)
1074     if (isGlobalVarSummary(Index, VI.getGUID()))
1075       ++NumGVS;
1076   return NumGVS;
1077 }
1078 
1079 // Given ImportMap, return the number of global variable summaries and record
1080 // the number of defined function summaries as output parameter.
1081 static unsigned
1082 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1083                       FunctionImporter::FunctionsToImportTy &ImportMap,
1084                       unsigned &DefinedFS) {
1085   unsigned NumGVS = 0;
1086   DefinedFS = 0;
1087   for (auto &[GUID, Type] : ImportMap) {
1088     if (isGlobalVarSummary(Index, GUID))
1089       ++NumGVS;
1090     else if (Type == GlobalValueSummary::Definition)
1091       ++DefinedFS;
1092   }
1093   return NumGVS;
1094 }
1095 #endif
1096 
1097 #ifndef NDEBUG
1098 static bool checkVariableImport(
1099     const ModuleSummaryIndex &Index,
1100     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1101     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1102   DenseSet<GlobalValue::GUID> FlattenedImports;
1103 
1104   for (auto &ImportPerModule : ImportLists)
1105     for (auto &ExportPerModule : ImportPerModule.second)
1106       for (auto &[GUID, Type] : ExportPerModule.second)
1107         FlattenedImports.insert(GUID);
1108 
1109   // Checks that all GUIDs of read/writeonly vars we see in export lists
1110   // are also in the import lists. Otherwise we my face linker undefs,
1111   // because readonly and writeonly vars are internalized in their
1112   // source modules. The exception would be if it has a linkage type indicating
1113   // that there may have been a copy existing in the importing module (e.g.
1114   // linkonce_odr). In that case we cannot accurately do this checking.
1115   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1116                                                   const ValueInfo &VI) {
1117     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1118         Index.findSummaryInModule(VI, ModulePath));
1119     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1120            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1121              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1122              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1123   };
1124 
1125   for (auto &ExportPerModule : ExportLists)
1126     for (auto &VI : ExportPerModule.second)
1127       if (!FlattenedImports.count(VI.getGUID()) &&
1128           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1129         return false;
1130 
1131   return true;
1132 }
1133 #endif
1134 
1135 /// Compute all the import and export for every module using the Index.
1136 void llvm::ComputeCrossModuleImport(
1137     const ModuleSummaryIndex &Index,
1138     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1139     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1140         isPrevailing,
1141     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1142     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1143   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1144   // For each module that has function defined, compute the import/export lists.
1145   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1146     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1147     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1148                       << DefinedGVSummaries.first << "'\n");
1149     MIS->computeImportForModule(DefinedGVSummaries.second,
1150                                 DefinedGVSummaries.first, ImportList);
1151   }
1152 
1153   // When computing imports we only added the variables and functions being
1154   // imported to the export list. We also need to mark any references and calls
1155   // they make as exported as well. We do this here, as it is more efficient
1156   // since we may import the same values multiple times into different modules
1157   // during the import computation.
1158   for (auto &ELI : ExportLists) {
1159     // `NewExports` tracks the VI that gets exported because the full definition
1160     // of its user/referencer gets exported.
1161     FunctionImporter::ExportSetTy NewExports;
1162     const auto &DefinedGVSummaries =
1163         ModuleToDefinedGVSummaries.lookup(ELI.first);
1164     for (auto &EI : ELI.second) {
1165       // Find the copy defined in the exporting module so that we can mark the
1166       // values it references in that specific definition as exported.
1167       // Below we will add all references and called values, without regard to
1168       // whether they are also defined in this module. We subsequently prune the
1169       // list to only include those defined in the exporting module, see comment
1170       // there as to why.
1171       auto DS = DefinedGVSummaries.find(EI.getGUID());
1172       // Anything marked exported during the import computation must have been
1173       // defined in the exporting module.
1174       assert(DS != DefinedGVSummaries.end());
1175       auto *S = DS->getSecond();
1176       S = S->getBaseObject();
1177       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1178         // Export referenced functions and variables. We don't export/promote
1179         // objects referenced by writeonly variable initializer, because
1180         // we convert such variables initializers to "zeroinitializer".
1181         // See processGlobalForThinLTO.
1182         if (!Index.isWriteOnly(GVS))
1183           for (const auto &VI : GVS->refs())
1184             NewExports.insert(VI);
1185       } else {
1186         auto *FS = cast<FunctionSummary>(S);
1187         for (const auto &Edge : FS->calls())
1188           NewExports.insert(Edge.first);
1189         for (const auto &Ref : FS->refs())
1190           NewExports.insert(Ref);
1191       }
1192     }
1193     // Prune list computed above to only include values defined in the
1194     // exporting module. We do this after the above insertion since we may hit
1195     // the same ref/call target multiple times in above loop, and it is more
1196     // efficient to avoid a set lookup each time.
1197     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1198       if (!DefinedGVSummaries.count(EI->getGUID()))
1199         NewExports.erase(EI++);
1200       else
1201         ++EI;
1202     }
1203     ELI.second.insert(NewExports.begin(), NewExports.end());
1204   }
1205 
1206   assert(checkVariableImport(Index, ImportLists, ExportLists));
1207 #ifndef NDEBUG
1208   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1209                     << " modules:\n");
1210   for (auto &ModuleImports : ImportLists) {
1211     auto ModName = ModuleImports.first;
1212     auto &Exports = ExportLists[ModName];
1213     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1214     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1215                       << Exports.size() - NumGVS << " functions and " << NumGVS
1216                       << " vars. Imports from " << ModuleImports.second.size()
1217                       << " modules.\n");
1218     for (auto &Src : ModuleImports.second) {
1219       auto SrcModName = Src.first;
1220       unsigned DefinedFS = 0;
1221       unsigned NumGVSPerMod =
1222           numGlobalVarSummaries(Index, Src.second, DefinedFS);
1223       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1224                         << Src.second.size() - NumGVSPerMod - DefinedFS
1225                         << " function declarations imported from " << SrcModName
1226                         << "\n");
1227       LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1228                         << " global vars imported from " << SrcModName << "\n");
1229     }
1230   }
1231 #endif
1232 }
1233 
1234 #ifndef NDEBUG
1235 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1236                                     StringRef ModulePath,
1237                                     FunctionImporter::ImportMapTy &ImportList) {
1238   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1239                     << ImportList.size() << " modules.\n");
1240   for (auto &Src : ImportList) {
1241     auto SrcModName = Src.first;
1242     unsigned DefinedFS = 0;
1243     unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
1244     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1245                       << Src.second.size() - DefinedFS - NumGVSPerMod
1246                       << " function declarations imported from " << SrcModName
1247                       << "\n");
1248     LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1249                       << SrcModName << "\n");
1250   }
1251 }
1252 #endif
1253 
1254 /// Compute all the imports for the given module using the Index.
1255 ///
1256 /// \p isPrevailing is a callback that will be called with a global value's GUID
1257 /// and summary and should return whether the module corresponding to the
1258 /// summary contains the linker-prevailing copy of that value.
1259 ///
1260 /// \p ImportList will be populated with a map that can be passed to
1261 /// FunctionImporter::importFunctions() above (see description there).
1262 static void ComputeCrossModuleImportForModuleForTest(
1263     StringRef ModulePath,
1264     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1265         isPrevailing,
1266     const ModuleSummaryIndex &Index,
1267     FunctionImporter::ImportMapTy &ImportList) {
1268   // Collect the list of functions this module defines.
1269   // GUID -> Summary
1270   GVSummaryMapTy FunctionSummaryMap;
1271   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1272 
1273   // Compute the import list for this module.
1274   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1275   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1276   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1277 
1278 #ifndef NDEBUG
1279   dumpImportListForModule(Index, ModulePath, ImportList);
1280 #endif
1281 }
1282 
1283 /// Mark all external summaries in \p Index for import into the given module.
1284 /// Used for testing the case of distributed builds using a distributed index.
1285 ///
1286 /// \p ImportList will be populated with a map that can be passed to
1287 /// FunctionImporter::importFunctions() above (see description there).
1288 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1289     StringRef ModulePath, const ModuleSummaryIndex &Index,
1290     FunctionImporter::ImportMapTy &ImportList) {
1291   for (const auto &GlobalList : Index) {
1292     // Ignore entries for undefined references.
1293     if (GlobalList.second.SummaryList.empty())
1294       continue;
1295 
1296     auto GUID = GlobalList.first;
1297     assert(GlobalList.second.SummaryList.size() == 1 &&
1298            "Expected individual combined index to have one summary per GUID");
1299     auto &Summary = GlobalList.second.SummaryList[0];
1300     // Skip the summaries for the importing module. These are included to
1301     // e.g. record required linkage changes.
1302     if (Summary->modulePath() == ModulePath)
1303       continue;
1304     // Add an entry to provoke importing by thinBackend.
1305     auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
1306         GUID, Summary->importType());
1307     if (!Inserted) {
1308       // Use 'std::min' to make sure definition (with enum value 0) takes
1309       // precedence over declaration (with enum value 1).
1310       Iter->second = std::min(Iter->second, Summary->importType());
1311     }
1312   }
1313 #ifndef NDEBUG
1314   dumpImportListForModule(Index, ModulePath, ImportList);
1315 #endif
1316 }
1317 
1318 // For SamplePGO, the indirect call targets for local functions will
1319 // have its original name annotated in profile. We try to find the
1320 // corresponding PGOFuncName as the GUID, and fix up the edges
1321 // accordingly.
1322 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1323                                      FunctionSummary *FS) {
1324   for (auto &EI : FS->mutableCalls()) {
1325     if (!EI.first.getSummaryList().empty())
1326       continue;
1327     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1328     if (GUID == 0)
1329       continue;
1330     // Update the edge to point directly to the correct GUID.
1331     auto VI = Index.getValueInfo(GUID);
1332     if (llvm::any_of(
1333             VI.getSummaryList(),
1334             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1335               // The mapping from OriginalId to GUID may return a GUID
1336               // that corresponds to a static variable. Filter it out here.
1337               // This can happen when
1338               // 1) There is a call to a library function which is not defined
1339               // in the index.
1340               // 2) There is a static variable with the  OriginalGUID identical
1341               // to the GUID of the library function in 1);
1342               // When this happens the static variable in 2) will be found,
1343               // which needs to be filtered out.
1344               return SummaryPtr->getSummaryKind() ==
1345                      GlobalValueSummary::GlobalVarKind;
1346             }))
1347       continue;
1348     EI.first = VI;
1349   }
1350 }
1351 
1352 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1353   for (const auto &Entry : Index) {
1354     for (const auto &S : Entry.second.SummaryList) {
1355       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1356         updateValueInfoForIndirectCalls(Index, FS);
1357     }
1358   }
1359 }
1360 
1361 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1362     ModuleSummaryIndex &Index,
1363     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1364     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1365   assert(!Index.withGlobalValueDeadStripping());
1366   if (!ComputeDead ||
1367       // Don't do anything when nothing is live, this is friendly with tests.
1368       GUIDPreservedSymbols.empty()) {
1369     // Still need to update indirect calls.
1370     updateIndirectCalls(Index);
1371     return;
1372   }
1373   unsigned LiveSymbols = 0;
1374   SmallVector<ValueInfo, 128> Worklist;
1375   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1376   for (auto GUID : GUIDPreservedSymbols) {
1377     ValueInfo VI = Index.getValueInfo(GUID);
1378     if (!VI)
1379       continue;
1380     for (const auto &S : VI.getSummaryList())
1381       S->setLive(true);
1382   }
1383 
1384   // Add values flagged in the index as live roots to the worklist.
1385   for (const auto &Entry : Index) {
1386     auto VI = Index.getValueInfo(Entry);
1387     for (const auto &S : Entry.second.SummaryList) {
1388       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1389         updateValueInfoForIndirectCalls(Index, FS);
1390       if (S->isLive()) {
1391         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1392         Worklist.push_back(VI);
1393         ++LiveSymbols;
1394         break;
1395       }
1396     }
1397   }
1398 
1399   // Make value live and add it to the worklist if it was not live before.
1400   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1401     // FIXME: If we knew which edges were created for indirect call profiles,
1402     // we could skip them here. Any that are live should be reached via
1403     // other edges, e.g. reference edges. Otherwise, using a profile collected
1404     // on a slightly different binary might provoke preserving, importing
1405     // and ultimately promoting calls to functions not linked into this
1406     // binary, which increases the binary size unnecessarily. Note that
1407     // if this code changes, the importer needs to change so that edges
1408     // to functions marked dead are skipped.
1409 
1410     if (llvm::any_of(VI.getSummaryList(),
1411                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1412                        return S->isLive();
1413                      }))
1414       return;
1415 
1416     // We only keep live symbols that are known to be non-prevailing if any are
1417     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1418     // later in the EliminateAvailableExternally pass and setting them to
1419     // not-live could break downstreams users of liveness information (PR36483)
1420     // or limit optimization opportunities.
1421     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1422       bool KeepAliveLinkage = false;
1423       bool Interposable = false;
1424       for (const auto &S : VI.getSummaryList()) {
1425         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1426             S->linkage() == GlobalValue::WeakODRLinkage ||
1427             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1428           KeepAliveLinkage = true;
1429         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1430           Interposable = true;
1431       }
1432 
1433       if (!IsAliasee) {
1434         if (!KeepAliveLinkage)
1435           return;
1436 
1437         if (Interposable)
1438           report_fatal_error(
1439               "Interposable and available_externally/linkonce_odr/weak_odr "
1440               "symbol");
1441       }
1442     }
1443 
1444     for (const auto &S : VI.getSummaryList())
1445       S->setLive(true);
1446     ++LiveSymbols;
1447     Worklist.push_back(VI);
1448   };
1449 
1450   while (!Worklist.empty()) {
1451     auto VI = Worklist.pop_back_val();
1452     for (const auto &Summary : VI.getSummaryList()) {
1453       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1454         // If this is an alias, visit the aliasee VI to ensure that all copies
1455         // are marked live and it is added to the worklist for further
1456         // processing of its references.
1457         visit(AS->getAliaseeVI(), true);
1458         continue;
1459       }
1460       for (auto Ref : Summary->refs())
1461         visit(Ref, false);
1462       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1463         for (auto Call : FS->calls())
1464           visit(Call.first, false);
1465     }
1466   }
1467   Index.setWithGlobalValueDeadStripping();
1468 
1469   unsigned DeadSymbols = Index.size() - LiveSymbols;
1470   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1471                     << " symbols Dead \n");
1472   NumDeadSymbols += DeadSymbols;
1473   NumLiveSymbols += LiveSymbols;
1474 }
1475 
1476 // Compute dead symbols and propagate constants in combined index.
1477 void llvm::computeDeadSymbolsWithConstProp(
1478     ModuleSummaryIndex &Index,
1479     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1480     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1481     bool ImportEnabled) {
1482   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1483                                            isPrevailing);
1484   if (ImportEnabled)
1485     Index.propagateAttributes(GUIDPreservedSymbols);
1486 }
1487 
1488 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1489 /// \p ModulePath.
1490 void llvm::gatherImportedSummariesForModule(
1491     StringRef ModulePath,
1492     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1493     const FunctionImporter::ImportMapTy &ImportList,
1494     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
1495     GVSummaryPtrSet &DecSummaries) {
1496   // Include all summaries from the importing module.
1497   ModuleToSummariesForIndex[std::string(ModulePath)] =
1498       ModuleToDefinedGVSummaries.lookup(ModulePath);
1499   // Include summaries for imports.
1500   for (const auto &ILI : ImportList) {
1501     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1502 
1503     const auto &DefinedGVSummaries =
1504         ModuleToDefinedGVSummaries.lookup(ILI.first);
1505     for (const auto &[GUID, Type] : ILI.second) {
1506       const auto &DS = DefinedGVSummaries.find(GUID);
1507       assert(DS != DefinedGVSummaries.end() &&
1508              "Expected a defined summary for imported global value");
1509       if (Type == GlobalValueSummary::Declaration)
1510         DecSummaries.insert(DS->second);
1511 
1512       SummariesForIndex[GUID] = DS->second;
1513     }
1514   }
1515 }
1516 
1517 /// Emit the files \p ModulePath will import from into \p OutputFilename.
1518 std::error_code llvm::EmitImportsFiles(
1519     StringRef ModulePath, StringRef OutputFilename,
1520     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1521   std::error_code EC;
1522   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1523   if (EC)
1524     return EC;
1525   for (const auto &ILI : ModuleToSummariesForIndex)
1526     // The ModuleToSummariesForIndex map includes an entry for the current
1527     // Module (needed for writing out the index files). We don't want to
1528     // include it in the imports file, however, so filter it out.
1529     if (ILI.first != ModulePath)
1530       ImportsOS << ILI.first << "\n";
1531   return std::error_code();
1532 }
1533 
1534 bool llvm::convertToDeclaration(GlobalValue &GV) {
1535   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1536                     << "\n");
1537   if (Function *F = dyn_cast<Function>(&GV)) {
1538     F->deleteBody();
1539     F->clearMetadata();
1540     F->setComdat(nullptr);
1541   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1542     V->setInitializer(nullptr);
1543     V->setLinkage(GlobalValue::ExternalLinkage);
1544     V->clearMetadata();
1545     V->setComdat(nullptr);
1546   } else {
1547     GlobalValue *NewGV;
1548     if (GV.getValueType()->isFunctionTy())
1549       NewGV =
1550           Function::Create(cast<FunctionType>(GV.getValueType()),
1551                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1552                            "", GV.getParent());
1553     else
1554       NewGV =
1555           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1556                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1557                              /*init*/ nullptr, "",
1558                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1559                              GV.getType()->getAddressSpace());
1560     NewGV->takeName(&GV);
1561     GV.replaceAllUsesWith(NewGV);
1562     return false;
1563   }
1564   if (!GV.isImplicitDSOLocal())
1565     GV.setDSOLocal(false);
1566   return true;
1567 }
1568 
1569 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1570                                    const GVSummaryMapTy &DefinedGlobals,
1571                                    bool PropagateAttrs) {
1572   DenseSet<Comdat *> NonPrevailingComdats;
1573   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1574     // See if the global summary analysis computed a new resolved linkage.
1575     const auto &GS = DefinedGlobals.find(GV.getGUID());
1576     if (GS == DefinedGlobals.end())
1577       return;
1578 
1579     if (Propagate)
1580       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1581         if (Function *F = dyn_cast<Function>(&GV)) {
1582           // TODO: propagate ReadNone and ReadOnly.
1583           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1584             F->setDoesNotAccessMemory();
1585 
1586           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1587             F->setOnlyReadsMemory();
1588 
1589           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1590             F->setDoesNotRecurse();
1591 
1592           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1593             F->setDoesNotThrow();
1594         }
1595       }
1596 
1597     auto NewLinkage = GS->second->linkage();
1598     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1599         // Don't internalize anything here, because the code below
1600         // lacks necessary correctness checks. Leave this job to
1601         // LLVM 'internalize' pass.
1602         GlobalValue::isLocalLinkage(NewLinkage) ||
1603         // In case it was dead and already converted to declaration.
1604         GV.isDeclaration())
1605       return;
1606 
1607     // Set the potentially more constraining visibility computed from summaries.
1608     // The DefaultVisibility condition is because older GlobalValueSummary does
1609     // not record DefaultVisibility and we don't want to change protected/hidden
1610     // to default.
1611     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1612       GV.setVisibility(GS->second->getVisibility());
1613 
1614     if (NewLinkage == GV.getLinkage())
1615       return;
1616 
1617     // Check for a non-prevailing def that has interposable linkage
1618     // (e.g. non-odr weak or linkonce). In that case we can't simply
1619     // convert to available_externally, since it would lose the
1620     // interposable property and possibly get inlined. Simply drop
1621     // the definition in that case.
1622     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1623         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1624       if (!convertToDeclaration(GV))
1625         // FIXME: Change this to collect replaced GVs and later erase
1626         // them from the parent module once thinLTOResolvePrevailingGUID is
1627         // changed to enable this for aliases.
1628         llvm_unreachable("Expected GV to be converted");
1629     } else {
1630       // If all copies of the original symbol had global unnamed addr and
1631       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1632       // and are constants, then it should be an auto hide symbol. In that case
1633       // the thin link would have marked it as CanAutoHide. Add hidden
1634       // visibility to the symbol to preserve the property.
1635       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1636           GS->second->canAutoHide()) {
1637         assert(GV.canBeOmittedFromSymbolTable());
1638         GV.setVisibility(GlobalValue::HiddenVisibility);
1639       }
1640 
1641       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1642                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1643                         << "\n");
1644       GV.setLinkage(NewLinkage);
1645     }
1646     // Remove declarations from comdats, including available_externally
1647     // as this is a declaration for the linker, and will be dropped eventually.
1648     // It is illegal for comdats to contain declarations.
1649     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1650     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1651       if (GO->getComdat()->getName() == GO->getName())
1652         NonPrevailingComdats.insert(GO->getComdat());
1653       GO->setComdat(nullptr);
1654     }
1655   };
1656 
1657   // Process functions and global now
1658   for (auto &GV : TheModule)
1659     FinalizeInModule(GV, PropagateAttrs);
1660   for (auto &GV : TheModule.globals())
1661     FinalizeInModule(GV);
1662   for (auto &GV : TheModule.aliases())
1663     FinalizeInModule(GV);
1664 
1665   // For a non-prevailing comdat, all its members must be available_externally.
1666   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1667   // local linkage GlobalValues.
1668   if (NonPrevailingComdats.empty())
1669     return;
1670   for (auto &GO : TheModule.global_objects()) {
1671     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1672       GO.setComdat(nullptr);
1673       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1674     }
1675   }
1676   bool Changed;
1677   do {
1678     Changed = false;
1679     // If an alias references a GlobalValue in a non-prevailing comdat, change
1680     // it to available_externally. For simplicity we only handle GlobalValue and
1681     // ConstantExpr with a base object. ConstantExpr without a base object is
1682     // unlikely used in a COMDAT.
1683     for (auto &GA : TheModule.aliases()) {
1684       if (GA.hasAvailableExternallyLinkage())
1685         continue;
1686       GlobalObject *Obj = GA.getAliaseeObject();
1687       assert(Obj && "aliasee without an base object is unimplemented");
1688       if (Obj->hasAvailableExternallyLinkage()) {
1689         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1690         Changed = true;
1691       }
1692     }
1693   } while (Changed);
1694 }
1695 
1696 /// Run internalization on \p TheModule based on symmary analysis.
1697 void llvm::thinLTOInternalizeModule(Module &TheModule,
1698                                     const GVSummaryMapTy &DefinedGlobals) {
1699   // Declare a callback for the internalize pass that will ask for every
1700   // candidate GlobalValue if it can be internalized or not.
1701   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1702     // It may be the case that GV is on a chain of an ifunc, its alias and
1703     // subsequent aliases. In this case, the summary for the value is not
1704     // available.
1705     if (isa<GlobalIFunc>(&GV) ||
1706         (isa<GlobalAlias>(&GV) &&
1707          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1708       return true;
1709 
1710     // Lookup the linkage recorded in the summaries during global analysis.
1711     auto GS = DefinedGlobals.find(GV.getGUID());
1712     if (GS == DefinedGlobals.end()) {
1713       // Must have been promoted (possibly conservatively). Find original
1714       // name so that we can access the correct summary and see if it can
1715       // be internalized again.
1716       // FIXME: Eventually we should control promotion instead of promoting
1717       // and internalizing again.
1718       StringRef OrigName =
1719           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1720       std::string OrigId = GlobalValue::getGlobalIdentifier(
1721           OrigName, GlobalValue::InternalLinkage,
1722           TheModule.getSourceFileName());
1723       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1724       if (GS == DefinedGlobals.end()) {
1725         // Also check the original non-promoted non-globalized name. In some
1726         // cases a preempted weak value is linked in as a local copy because
1727         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1728         // In that case, since it was originally not a local value, it was
1729         // recorded in the index using the original name.
1730         // FIXME: This may not be needed once PR27866 is fixed.
1731         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1732         assert(GS != DefinedGlobals.end());
1733       }
1734     }
1735     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1736   };
1737 
1738   // FIXME: See if we can just internalize directly here via linkage changes
1739   // based on the index, rather than invoking internalizeModule.
1740   internalizeModule(TheModule, MustPreserveGV);
1741 }
1742 
1743 /// Make alias a clone of its aliasee.
1744 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1745   Function *Fn = cast<Function>(GA->getAliaseeObject());
1746 
1747   ValueToValueMapTy VMap;
1748   Function *NewFn = CloneFunction(Fn, VMap);
1749   // Clone should use the original alias's linkage, visibility and name, and we
1750   // ensure all uses of alias instead use the new clone (casted if necessary).
1751   NewFn->setLinkage(GA->getLinkage());
1752   NewFn->setVisibility(GA->getVisibility());
1753   GA->replaceAllUsesWith(NewFn);
1754   NewFn->takeName(GA);
1755   return NewFn;
1756 }
1757 
1758 // Internalize values that we marked with specific attribute
1759 // in processGlobalForThinLTO.
1760 static void internalizeGVsAfterImport(Module &M) {
1761   for (auto &GV : M.globals())
1762     // Skip GVs which have been converted to declarations
1763     // by dropDeadSymbols.
1764     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1765       GV.setLinkage(GlobalValue::InternalLinkage);
1766       GV.setVisibility(GlobalValue::DefaultVisibility);
1767     }
1768 }
1769 
1770 // Automatically import functions in Module \p DestModule based on the summaries
1771 // index.
1772 Expected<bool> FunctionImporter::importFunctions(
1773     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1774   LLVM_DEBUG(dbgs() << "Starting import for Module "
1775                     << DestModule.getModuleIdentifier() << "\n");
1776   unsigned ImportedCount = 0, ImportedGVCount = 0;
1777 
1778   IRMover Mover(DestModule);
1779   // Do the actual import of functions now, one Module at a time
1780   std::set<StringRef> ModuleNameOrderedList;
1781   for (const auto &FunctionsToImportPerModule : ImportList) {
1782     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1783   }
1784 
1785   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1786                            GlobalValue::GUID GUID)
1787       -> std::optional<GlobalValueSummary::ImportKind> {
1788     auto Iter = GUIDToImportType.find(GUID);
1789     if (Iter == GUIDToImportType.end())
1790       return std::nullopt;
1791     return Iter->second;
1792   };
1793 
1794   for (const auto &Name : ModuleNameOrderedList) {
1795     // Get the module for the import
1796     const auto &FunctionsToImportPerModule = ImportList.find(Name);
1797     assert(FunctionsToImportPerModule != ImportList.end());
1798     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1799     if (!SrcModuleOrErr)
1800       return SrcModuleOrErr.takeError();
1801     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1802     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1803            "Context mismatch");
1804 
1805     // If modules were created with lazy metadata loading, materialize it
1806     // now, before linking it (otherwise this will be a noop).
1807     if (Error Err = SrcModule->materializeMetadata())
1808       return std::move(Err);
1809 
1810     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1811 
1812     // Find the globals to import
1813     SetVector<GlobalValue *> GlobalsToImport;
1814     for (Function &F : *SrcModule) {
1815       if (!F.hasName())
1816         continue;
1817       auto GUID = F.getGUID();
1818       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1819 
1820       bool ImportDefinition =
1821           (MaybeImportType &&
1822            (*MaybeImportType == GlobalValueSummary::Definition));
1823 
1824       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1825                         << " importing function"
1826                         << (ImportDefinition
1827                                 ? " definition "
1828                                 : (MaybeImportType ? " declaration " : " "))
1829                         << GUID << " " << F.getName() << " from "
1830                         << SrcModule->getSourceFileName() << "\n");
1831       if (ImportDefinition) {
1832         if (Error Err = F.materialize())
1833           return std::move(Err);
1834         // MemProf should match function's definition and summary,
1835         // 'thinlto_src_module' is needed.
1836         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1837           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1838           // statistics and debugging.
1839           F.setMetadata(
1840               "thinlto_src_module",
1841               MDNode::get(DestModule.getContext(),
1842                           {MDString::get(DestModule.getContext(),
1843                                          SrcModule->getModuleIdentifier())}));
1844           F.setMetadata(
1845               "thinlto_src_file",
1846               MDNode::get(DestModule.getContext(),
1847                           {MDString::get(DestModule.getContext(),
1848                                          SrcModule->getSourceFileName())}));
1849         }
1850         GlobalsToImport.insert(&F);
1851       }
1852     }
1853     for (GlobalVariable &GV : SrcModule->globals()) {
1854       if (!GV.hasName())
1855         continue;
1856       auto GUID = GV.getGUID();
1857       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1858 
1859       bool ImportDefinition =
1860           (MaybeImportType &&
1861            (*MaybeImportType == GlobalValueSummary::Definition));
1862 
1863       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1864                         << " importing global"
1865                         << (ImportDefinition
1866                                 ? " definition "
1867                                 : (MaybeImportType ? " declaration " : " "))
1868                         << GUID << " " << GV.getName() << " from "
1869                         << SrcModule->getSourceFileName() << "\n");
1870       if (ImportDefinition) {
1871         if (Error Err = GV.materialize())
1872           return std::move(Err);
1873         ImportedGVCount += GlobalsToImport.insert(&GV);
1874       }
1875     }
1876     for (GlobalAlias &GA : SrcModule->aliases()) {
1877       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1878         continue;
1879       auto GUID = GA.getGUID();
1880       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1881 
1882       bool ImportDefinition =
1883           (MaybeImportType &&
1884            (*MaybeImportType == GlobalValueSummary::Definition));
1885 
1886       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1887                         << " importing alias"
1888                         << (ImportDefinition
1889                                 ? " definition "
1890                                 : (MaybeImportType ? " declaration " : " "))
1891                         << GUID << " " << GA.getName() << " from "
1892                         << SrcModule->getSourceFileName() << "\n");
1893       if (ImportDefinition) {
1894         if (Error Err = GA.materialize())
1895           return std::move(Err);
1896         // Import alias as a copy of its aliasee.
1897         GlobalObject *GO = GA.getAliaseeObject();
1898         if (Error Err = GO->materialize())
1899           return std::move(Err);
1900         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1901         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1902                           << GO->getName() << " from "
1903                           << SrcModule->getSourceFileName() << "\n");
1904         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1905           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1906           // statistics and debugging.
1907           Fn->setMetadata(
1908               "thinlto_src_module",
1909               MDNode::get(DestModule.getContext(),
1910                           {MDString::get(DestModule.getContext(),
1911                                          SrcModule->getModuleIdentifier())}));
1912           Fn->setMetadata(
1913               "thinlto_src_file",
1914               MDNode::get(DestModule.getContext(),
1915                           {MDString::get(DestModule.getContext(),
1916                                          SrcModule->getSourceFileName())}));
1917         }
1918         GlobalsToImport.insert(Fn);
1919       }
1920     }
1921 
1922     // Upgrade debug info after we're done materializing all the globals and we
1923     // have loaded all the required metadata!
1924     UpgradeDebugInfo(*SrcModule);
1925 
1926     // Set the partial sample profile ratio in the profile summary module flag
1927     // of the imported source module, if applicable, so that the profile summary
1928     // module flag will match with that of the destination module when it's
1929     // imported.
1930     SrcModule->setPartialSampleProfileRatio(Index);
1931 
1932     // Link in the specified functions.
1933     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1934                                &GlobalsToImport))
1935       return true;
1936 
1937     if (PrintImports) {
1938       for (const auto *GV : GlobalsToImport)
1939         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1940                << " from " << SrcModule->getSourceFileName() << "\n";
1941     }
1942 
1943     if (Error Err = Mover.move(std::move(SrcModule),
1944                                GlobalsToImport.getArrayRef(), nullptr,
1945                                /*IsPerformingImport=*/true))
1946       return createStringError(errc::invalid_argument,
1947                                Twine("Function Import: link error: ") +
1948                                    toString(std::move(Err)));
1949 
1950     ImportedCount += GlobalsToImport.size();
1951     NumImportedModules++;
1952   }
1953 
1954   internalizeGVsAfterImport(DestModule);
1955 
1956   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1957   NumImportedGlobalVars += ImportedGVCount;
1958 
1959   // TODO: Print counters for definitions and declarations in the debugging log.
1960   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1961                     << " functions for Module "
1962                     << DestModule.getModuleIdentifier() << "\n");
1963   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1964                     << " global variables for Module "
1965                     << DestModule.getModuleIdentifier() << "\n");
1966   return ImportedCount;
1967 }
1968 
1969 static bool doImportingForModuleForTest(
1970     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1971                    isPrevailing) {
1972   if (SummaryFile.empty())
1973     report_fatal_error("error: -function-import requires -summary-file\n");
1974   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1975       getModuleSummaryIndexForFile(SummaryFile);
1976   if (!IndexPtrOrErr) {
1977     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1978                           "Error loading file '" + SummaryFile + "': ");
1979     return false;
1980   }
1981   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1982 
1983   // First step is collecting the import list.
1984   FunctionImporter::ImportMapTy ImportList;
1985   // If requested, simply import all functions in the index. This is used
1986   // when testing distributed backend handling via the opt tool, when
1987   // we have distributed indexes containing exactly the summaries to import.
1988   if (ImportAllIndex)
1989     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1990                                                       *Index, ImportList);
1991   else
1992     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1993                                              isPrevailing, *Index, ImportList);
1994 
1995   // Conservatively mark all internal values as promoted. This interface is
1996   // only used when doing importing via the function importing pass. The pass
1997   // is only enabled when testing importing via the 'opt' tool, which does
1998   // not do the ThinLink that would normally determine what values to promote.
1999   for (auto &I : *Index) {
2000     for (auto &S : I.second.SummaryList) {
2001       if (GlobalValue::isLocalLinkage(S->linkage()))
2002         S->setLinkage(GlobalValue::ExternalLinkage);
2003     }
2004   }
2005 
2006   // Next we need to promote to global scope and rename any local values that
2007   // are potentially exported to other modules.
2008   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
2009                              /*GlobalsToImport=*/nullptr)) {
2010     errs() << "Error renaming module\n";
2011     return true;
2012   }
2013 
2014   // Perform the import now.
2015   auto ModuleLoader = [&M](StringRef Identifier) {
2016     return loadFile(std::string(Identifier), M.getContext());
2017   };
2018   FunctionImporter Importer(*Index, ModuleLoader,
2019                             /*ClearDSOLocalOnDeclarations=*/false);
2020   Expected<bool> Result = Importer.importFunctions(M, ImportList);
2021 
2022   // FIXME: Probably need to propagate Errors through the pass manager.
2023   if (!Result) {
2024     logAllUnhandledErrors(Result.takeError(), errs(),
2025                           "Error importing module: ");
2026     return true;
2027   }
2028 
2029   return true;
2030 }
2031 
2032 PreservedAnalyses FunctionImportPass::run(Module &M,
2033                                           ModuleAnalysisManager &AM) {
2034   // This is only used for testing the function import pass via opt, where we
2035   // don't have prevailing information from the LTO context available, so just
2036   // conservatively assume everything is prevailing (which is fine for the very
2037   // limited use of prevailing checking in this pass).
2038   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
2039     return true;
2040   };
2041   if (!doImportingForModuleForTest(M, isPrevailing))
2042     return PreservedAnalyses::all();
2043 
2044   return PreservedAnalyses::none();
2045 }
2046