xref: /llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp (revision d3161defd6b717241a85b6ca528754c747060735)
1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/BasicAliasAnalysis.h"
20 #include "llvm/Analysis/CGSCCPassManager.h"
21 #include "llvm/Analysis/CtxProfAnalysis.h"
22 #include "llvm/Analysis/GlobalsModRef.h"
23 #include "llvm/Analysis/InlineAdvisor.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/ScopedNoAliasAA.h"
26 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
27 #include "llvm/CodeGen/GlobalMergeFunctions.h"
28 #include "llvm/IR/PassManager.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Passes/OptimizationLevel.h"
31 #include "llvm/Passes/PassBuilder.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/PGOOptions.h"
35 #include "llvm/Support/VirtualFileSystem.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
38 #include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
39 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
40 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
41 #include "llvm/Transforms/Coroutines/CoroEarly.h"
42 #include "llvm/Transforms/Coroutines/CoroElide.h"
43 #include "llvm/Transforms/Coroutines/CoroSplit.h"
44 #include "llvm/Transforms/HipStdPar/HipStdPar.h"
45 #include "llvm/Transforms/IPO/AlwaysInliner.h"
46 #include "llvm/Transforms/IPO/Annotation2Metadata.h"
47 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
48 #include "llvm/Transforms/IPO/Attributor.h"
49 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
50 #include "llvm/Transforms/IPO/ConstantMerge.h"
51 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
52 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
53 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
54 #include "llvm/Transforms/IPO/EmbedBitcodePass.h"
55 #include "llvm/Transforms/IPO/ExpandVariadics.h"
56 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
57 #include "llvm/Transforms/IPO/FunctionAttrs.h"
58 #include "llvm/Transforms/IPO/GlobalDCE.h"
59 #include "llvm/Transforms/IPO/GlobalOpt.h"
60 #include "llvm/Transforms/IPO/GlobalSplit.h"
61 #include "llvm/Transforms/IPO/HotColdSplitting.h"
62 #include "llvm/Transforms/IPO/IROutliner.h"
63 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
64 #include "llvm/Transforms/IPO/Inliner.h"
65 #include "llvm/Transforms/IPO/LowerTypeTests.h"
66 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
67 #include "llvm/Transforms/IPO/MergeFunctions.h"
68 #include "llvm/Transforms/IPO/ModuleInliner.h"
69 #include "llvm/Transforms/IPO/OpenMPOpt.h"
70 #include "llvm/Transforms/IPO/PartialInlining.h"
71 #include "llvm/Transforms/IPO/SCCP.h"
72 #include "llvm/Transforms/IPO/SampleProfile.h"
73 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
74 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
75 #include "llvm/Transforms/InstCombine/InstCombine.h"
76 #include "llvm/Transforms/Instrumentation/CGProfile.h"
77 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
78 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
79 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
80 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
81 #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
82 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
83 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
84 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
85 #include "llvm/Transforms/Scalar/ADCE.h"
86 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
87 #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
88 #include "llvm/Transforms/Scalar/BDCE.h"
89 #include "llvm/Transforms/Scalar/CallSiteSplitting.h"
90 #include "llvm/Transforms/Scalar/ConstraintElimination.h"
91 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
92 #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
93 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
94 #include "llvm/Transforms/Scalar/DivRemPairs.h"
95 #include "llvm/Transforms/Scalar/EarlyCSE.h"
96 #include "llvm/Transforms/Scalar/Float2Int.h"
97 #include "llvm/Transforms/Scalar/GVN.h"
98 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
99 #include "llvm/Transforms/Scalar/InferAlignment.h"
100 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
101 #include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
102 #include "llvm/Transforms/Scalar/JumpThreading.h"
103 #include "llvm/Transforms/Scalar/LICM.h"
104 #include "llvm/Transforms/Scalar/LoopDeletion.h"
105 #include "llvm/Transforms/Scalar/LoopDistribute.h"
106 #include "llvm/Transforms/Scalar/LoopFlatten.h"
107 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
108 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
109 #include "llvm/Transforms/Scalar/LoopInterchange.h"
110 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
111 #include "llvm/Transforms/Scalar/LoopPassManager.h"
112 #include "llvm/Transforms/Scalar/LoopRotation.h"
113 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
114 #include "llvm/Transforms/Scalar/LoopSink.h"
115 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
116 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
117 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
118 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
119 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
120 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
121 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
122 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
123 #include "llvm/Transforms/Scalar/NewGVN.h"
124 #include "llvm/Transforms/Scalar/Reassociate.h"
125 #include "llvm/Transforms/Scalar/SCCP.h"
126 #include "llvm/Transforms/Scalar/SROA.h"
127 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
128 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
129 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
130 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
131 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
132 #include "llvm/Transforms/Utils/AddDiscriminators.h"
133 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
134 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
135 #include "llvm/Transforms/Utils/CountVisits.h"
136 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
137 #include "llvm/Transforms/Utils/ExtraPassManager.h"
138 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
139 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
140 #include "llvm/Transforms/Utils/Mem2Reg.h"
141 #include "llvm/Transforms/Utils/MoveAutoInit.h"
142 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
143 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
144 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
145 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
146 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
147 #include "llvm/Transforms/Vectorize/VectorCombine.h"
148 
149 using namespace llvm;
150 
151 static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
152     "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
153     cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
154     cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
155                           "Heuristics-based inliner version"),
156                clEnumValN(InliningAdvisorMode::Development, "development",
157                           "Use development mode (runtime-loadable model)"),
158                clEnumValN(InliningAdvisorMode::Release, "release",
159                           "Use release mode (AOT-compiled model)")));
160 
161 /// Flag to enable inline deferral during PGO.
162 static cl::opt<bool>
163     EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
164                             cl::Hidden,
165                             cl::desc("Enable inline deferral during PGO"));
166 
167 static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
168                                          cl::init(false), cl::Hidden,
169                                          cl::desc("Enable module inliner"));
170 
171 static cl::opt<bool> PerformMandatoryInliningsFirst(
172     "mandatory-inlining-first", cl::init(false), cl::Hidden,
173     cl::desc("Perform mandatory inlinings module-wide, before performing "
174              "inlining"));
175 
176 static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
177     "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
178     cl::desc("Eagerly invalidate more analyses in default pipelines"));
179 
180 static cl::opt<bool> EnableMergeFunctions(
181     "enable-merge-functions", cl::init(false), cl::Hidden,
182     cl::desc("Enable function merging as part of the optimization pipeline"));
183 
184 static cl::opt<bool> EnablePostPGOLoopRotation(
185     "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
186     cl::desc("Run the loop rotation transformation after PGO instrumentation"));
187 
188 static cl::opt<bool> EnableGlobalAnalyses(
189     "enable-global-analyses", cl::init(true), cl::Hidden,
190     cl::desc("Enable inter-procedural analyses"));
191 
192 static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
193                                         cl::init(false), cl::Hidden,
194                                         cl::desc("Run Partial inlining pass"));
195 
196 static cl::opt<bool> ExtraVectorizerPasses(
197     "extra-vectorizer-passes", cl::init(false), cl::Hidden,
198     cl::desc("Run cleanup optimization passes after vectorization"));
199 
200 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
201                                cl::desc("Run the NewGVN pass"));
202 
203 static cl::opt<bool> EnableLoopInterchange(
204     "enable-loopinterchange", cl::init(false), cl::Hidden,
205     cl::desc("Enable the experimental LoopInterchange Pass"));
206 
207 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
208                                         cl::init(false), cl::Hidden,
209                                         cl::desc("Enable Unroll And Jam Pass"));
210 
211 static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
212                                        cl::Hidden,
213                                        cl::desc("Enable the LoopFlatten Pass"));
214 
215 // Experimentally allow loop header duplication. This should allow for better
216 // optimization at Oz, since loop-idiom recognition can then recognize things
217 // like memcpy. If this ends up being useful for many targets, we should drop
218 // this flag and make a code generation option that can be controlled
219 // independent of the opt level and exposed through the frontend.
220 static cl::opt<bool> EnableLoopHeaderDuplication(
221     "enable-loop-header-duplication", cl::init(false), cl::Hidden,
222     cl::desc("Enable loop header duplication at any optimization level"));
223 
224 static cl::opt<bool>
225     EnableDFAJumpThreading("enable-dfa-jump-thread",
226                            cl::desc("Enable DFA jump threading"),
227                            cl::init(false), cl::Hidden);
228 
229 static cl::opt<bool>
230     EnableHotColdSplit("hot-cold-split",
231                        cl::desc("Enable hot-cold splitting pass"));
232 
233 static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
234                                       cl::Hidden,
235                                       cl::desc("Enable ir outliner pass"));
236 
237 static cl::opt<bool>
238     DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239                       cl::desc("Disable pre-instrumentation inliner"));
240 
241 static cl::opt<int> PreInlineThreshold(
242     "preinline-threshold", cl::Hidden, cl::init(75),
243     cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244              "(default = 75)"));
245 
246 static cl::opt<bool>
247     EnableGVNHoist("enable-gvn-hoist",
248                    cl::desc("Enable the GVN hoisting pass (default = off)"));
249 
250 static cl::opt<bool>
251     EnableGVNSink("enable-gvn-sink",
252                   cl::desc("Enable the GVN sinking pass (default = off)"));
253 
254 static cl::opt<bool> EnableJumpTableToSwitch(
255     "enable-jump-table-to-switch",
256     cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257 
258 // This option is used in simplifying testing SampleFDO optimizations for
259 // profile loading.
260 static cl::opt<bool>
261     EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262               cl::desc("Enable control height reduction optimization (CHR)"));
263 
264 static cl::opt<bool> FlattenedProfileUsed(
265     "flattened-profile-used", cl::init(false), cl::Hidden,
266     cl::desc("Indicate the sample profile being used is flattened, i.e., "
267              "no inline hierarchy exists in the profile"));
268 
269 static cl::opt<bool> EnableOrderFileInstrumentation(
270     "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
271     cl::desc("Enable order file instrumentation (default = off)"));
272 
273 static cl::opt<bool>
274     EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
275                  cl::desc("Enable lowering of the matrix intrinsics"));
276 
277 static cl::opt<bool> EnableConstraintElimination(
278     "enable-constraint-elimination", cl::init(true), cl::Hidden,
279     cl::desc(
280         "Enable pass to eliminate conditions based on linear constraints"));
281 
282 static cl::opt<AttributorRunOption> AttributorRun(
283     "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284     cl::desc("Enable the attributor inter-procedural deduction pass"),
285     cl::values(clEnumValN(AttributorRunOption::ALL, "all",
286                           "enable all attributor runs"),
287                clEnumValN(AttributorRunOption::MODULE, "module",
288                           "enable module-wide attributor runs"),
289                clEnumValN(AttributorRunOption::CGSCC, "cgscc",
290                           "enable call graph SCC attributor runs"),
291                clEnumValN(AttributorRunOption::NONE, "none",
292                           "disable attributor runs")));
293 
294 static cl::opt<bool> EnableSampledInstr(
295     "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
296     cl::desc("Enable profile instrumentation sampling (default = off)"));
297 static cl::opt<bool> UseLoopVersioningLICM(
298     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
299     cl::desc("Enable the experimental Loop Versioning LICM pass"));
300 
301 static cl::opt<std::string> InstrumentColdFuncOnlyPath(
302     "instrument-cold-function-only-path", cl::init(""),
303     cl::desc("File path for cold function only instrumentation(requires use "
304              "with --pgo-instrument-cold-function-only)"),
305     cl::Hidden);
306 
307 extern cl::opt<std::string> UseCtxProfile;
308 extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
309 
310 namespace llvm {
311 extern cl::opt<bool> EnableMemProfContextDisambiguation;
312 } // namespace llvm
313 
314 PipelineTuningOptions::PipelineTuningOptions() {
315   LoopInterleaving = true;
316   LoopVectorization = true;
317   SLPVectorization = false;
318   LoopUnrolling = true;
319   ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
320   LicmMssaOptCap = SetLicmMssaOptCap;
321   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
322   CallGraphProfile = true;
323   UnifiedLTO = false;
324   MergeFunctions = EnableMergeFunctions;
325   InlinerThreshold = -1;
326   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
327 }
328 
329 namespace llvm {
330 extern cl::opt<unsigned> MaxDevirtIterations;
331 } // namespace llvm
332 
333 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
334                                             OptimizationLevel Level) {
335   for (auto &C : PeepholeEPCallbacks)
336     C(FPM, Level);
337 }
338 void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
339     LoopPassManager &LPM, OptimizationLevel Level) {
340   for (auto &C : LateLoopOptimizationsEPCallbacks)
341     C(LPM, Level);
342 }
343 void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
344                                                     OptimizationLevel Level) {
345   for (auto &C : LoopOptimizerEndEPCallbacks)
346     C(LPM, Level);
347 }
348 void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
349     FunctionPassManager &FPM, OptimizationLevel Level) {
350   for (auto &C : ScalarOptimizerLateEPCallbacks)
351     C(FPM, Level);
352 }
353 void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
354                                                       OptimizationLevel Level) {
355   for (auto &C : CGSCCOptimizerLateEPCallbacks)
356     C(CGPM, Level);
357 }
358 void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
359                                                    OptimizationLevel Level) {
360   for (auto &C : VectorizerStartEPCallbacks)
361     C(FPM, Level);
362 }
363 void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM,
364                                                  OptimizationLevel Level) {
365   for (auto &C : VectorizerEndEPCallbacks)
366     C(FPM, Level);
367 }
368 void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
369                                                   OptimizationLevel Level,
370                                                   ThinOrFullLTOPhase Phase) {
371   for (auto &C : OptimizerEarlyEPCallbacks)
372     C(MPM, Level, Phase);
373 }
374 void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
375                                                  OptimizationLevel Level,
376                                                  ThinOrFullLTOPhase Phase) {
377   for (auto &C : OptimizerLastEPCallbacks)
378     C(MPM, Level, Phase);
379 }
380 void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
381     ModulePassManager &MPM, OptimizationLevel Level) {
382   for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
383     C(MPM, Level);
384 }
385 void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
386     ModulePassManager &MPM, OptimizationLevel Level) {
387   for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
388     C(MPM, Level);
389 }
390 void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
391                                                  OptimizationLevel Level) {
392   for (auto &C : PipelineStartEPCallbacks)
393     C(MPM, Level);
394 }
395 void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
396     ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) {
397   for (auto &C : PipelineEarlySimplificationEPCallbacks)
398     C(MPM, Level, Phase);
399 }
400 
401 // Helper to add AnnotationRemarksPass.
402 static void addAnnotationRemarksPass(ModulePassManager &MPM) {
403   MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
404 }
405 
406 // Helper to check if the current compilation phase is preparing for LTO
407 static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
408   return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
409          Phase == ThinOrFullLTOPhase::FullLTOPreLink;
410 }
411 
412 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
413 FunctionPassManager
414 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
415                                                    ThinOrFullLTOPhase Phase) {
416 
417   FunctionPassManager FPM;
418 
419   if (AreStatisticsEnabled())
420     FPM.addPass(CountVisitsPass());
421 
422   // Form SSA out of local memory accesses after breaking apart aggregates into
423   // scalars.
424   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
425 
426   // Catch trivial redundancies
427   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
428 
429   // Hoisting of scalars and load expressions.
430   FPM.addPass(
431       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
432   FPM.addPass(InstCombinePass());
433 
434   FPM.addPass(LibCallsShrinkWrapPass());
435 
436   invokePeepholeEPCallbacks(FPM, Level);
437 
438   FPM.addPass(
439       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
440 
441   // Form canonically associated expression trees, and simplify the trees using
442   // basic mathematical properties. For example, this will form (nearly)
443   // minimal multiplication trees.
444   FPM.addPass(ReassociatePass());
445 
446   // Add the primary loop simplification pipeline.
447   // FIXME: Currently this is split into two loop pass pipelines because we run
448   // some function passes in between them. These can and should be removed
449   // and/or replaced by scheduling the loop pass equivalents in the correct
450   // positions. But those equivalent passes aren't powerful enough yet.
451   // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
452   // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
453   // fully replace `SimplifyCFGPass`, and the closest to the other we have is
454   // `LoopInstSimplify`.
455   LoopPassManager LPM1, LPM2;
456 
457   // Simplify the loop body. We do this initially to clean up after other loop
458   // passes run, either when iterating on a loop or on inner loops with
459   // implications on the outer loop.
460   LPM1.addPass(LoopInstSimplifyPass());
461   LPM1.addPass(LoopSimplifyCFGPass());
462 
463   // Try to remove as much code from the loop header as possible,
464   // to reduce amount of IR that will have to be duplicated. However,
465   // do not perform speculative hoisting the first time as LICM
466   // will destroy metadata that may not need to be destroyed if run
467   // after loop rotation.
468   // TODO: Investigate promotion cap for O1.
469   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
470                         /*AllowSpeculation=*/false));
471 
472   LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
473                               isLTOPreLink(Phase)));
474   // TODO: Investigate promotion cap for O1.
475   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
476                         /*AllowSpeculation=*/true));
477   LPM1.addPass(SimpleLoopUnswitchPass());
478   if (EnableLoopFlatten)
479     LPM1.addPass(LoopFlattenPass());
480 
481   LPM2.addPass(LoopIdiomRecognizePass());
482   LPM2.addPass(IndVarSimplifyPass());
483 
484   invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
485 
486   LPM2.addPass(LoopDeletionPass());
487 
488   if (EnableLoopInterchange)
489     LPM2.addPass(LoopInterchangePass());
490 
491   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
492   // because it changes IR to makes profile annotation in back compile
493   // inaccurate. The normal unroller doesn't pay attention to forced full unroll
494   // attributes so we need to make sure and allow the full unroll pass to pay
495   // attention to it.
496   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
497       PGOOpt->Action != PGOOptions::SampleUse)
498     LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
499                                     /* OnlyWhenForced= */ !PTO.LoopUnrolling,
500                                     PTO.ForgetAllSCEVInLoopUnroll));
501 
502   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
503 
504   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
505                                               /*UseMemorySSA=*/true,
506                                               /*UseBlockFrequencyInfo=*/true));
507   FPM.addPass(
508       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
509   FPM.addPass(InstCombinePass());
510   // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
511   // *All* loop passes must preserve it, in order to be able to use it.
512   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
513                                               /*UseMemorySSA=*/false,
514                                               /*UseBlockFrequencyInfo=*/false));
515 
516   // Delete small array after loop unroll.
517   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
518 
519   // Specially optimize memory movement as it doesn't look like dataflow in SSA.
520   FPM.addPass(MemCpyOptPass());
521 
522   // Sparse conditional constant propagation.
523   // FIXME: It isn't clear why we do this *after* loop passes rather than
524   // before...
525   FPM.addPass(SCCPPass());
526 
527   // Delete dead bit computations (instcombine runs after to fold away the dead
528   // computations, and then ADCE will run later to exploit any new DCE
529   // opportunities that creates).
530   FPM.addPass(BDCEPass());
531 
532   // Run instcombine after redundancy and dead bit elimination to exploit
533   // opportunities opened up by them.
534   FPM.addPass(InstCombinePass());
535   invokePeepholeEPCallbacks(FPM, Level);
536 
537   FPM.addPass(CoroElidePass());
538 
539   invokeScalarOptimizerLateEPCallbacks(FPM, Level);
540 
541   // Finally, do an expensive DCE pass to catch all the dead code exposed by
542   // the simplifications and basic cleanup after all the simplifications.
543   // TODO: Investigate if this is too expensive.
544   FPM.addPass(ADCEPass());
545   FPM.addPass(
546       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
547   FPM.addPass(InstCombinePass());
548   invokePeepholeEPCallbacks(FPM, Level);
549 
550   return FPM;
551 }
552 
553 FunctionPassManager
554 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
555                                                  ThinOrFullLTOPhase Phase) {
556   assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
557 
558   // The O1 pipeline has a separate pipeline creation function to simplify
559   // construction readability.
560   if (Level.getSpeedupLevel() == 1)
561     return buildO1FunctionSimplificationPipeline(Level, Phase);
562 
563   FunctionPassManager FPM;
564 
565   if (AreStatisticsEnabled())
566     FPM.addPass(CountVisitsPass());
567 
568   // Form SSA out of local memory accesses after breaking apart aggregates into
569   // scalars.
570   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
571 
572   // Catch trivial redundancies
573   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
574   if (EnableKnowledgeRetention)
575     FPM.addPass(AssumeSimplifyPass());
576 
577   // Hoisting of scalars and load expressions.
578   if (EnableGVNHoist)
579     FPM.addPass(GVNHoistPass());
580 
581   // Global value numbering based sinking.
582   if (EnableGVNSink) {
583     FPM.addPass(GVNSinkPass());
584     FPM.addPass(
585         SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
586   }
587 
588   // Speculative execution if the target has divergent branches; otherwise nop.
589   FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
590 
591   // Optimize based on known information about branches, and cleanup afterward.
592   FPM.addPass(JumpThreadingPass());
593   FPM.addPass(CorrelatedValuePropagationPass());
594 
595   // Jump table to switch conversion.
596   if (EnableJumpTableToSwitch)
597     FPM.addPass(JumpTableToSwitchPass());
598 
599   FPM.addPass(
600       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
601   FPM.addPass(InstCombinePass());
602   FPM.addPass(AggressiveInstCombinePass());
603 
604   if (!Level.isOptimizingForSize())
605     FPM.addPass(LibCallsShrinkWrapPass());
606 
607   invokePeepholeEPCallbacks(FPM, Level);
608 
609   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
610   // using the size value profile. Don't perform this when optimizing for size.
611   if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
612       !Level.isOptimizingForSize())
613     FPM.addPass(PGOMemOPSizeOpt());
614 
615   FPM.addPass(TailCallElimPass());
616   FPM.addPass(
617       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
618 
619   // Form canonically associated expression trees, and simplify the trees using
620   // basic mathematical properties. For example, this will form (nearly)
621   // minimal multiplication trees.
622   FPM.addPass(ReassociatePass());
623 
624   if (EnableConstraintElimination)
625     FPM.addPass(ConstraintEliminationPass());
626 
627   // Add the primary loop simplification pipeline.
628   // FIXME: Currently this is split into two loop pass pipelines because we run
629   // some function passes in between them. These can and should be removed
630   // and/or replaced by scheduling the loop pass equivalents in the correct
631   // positions. But those equivalent passes aren't powerful enough yet.
632   // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
633   // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
634   // fully replace `SimplifyCFGPass`, and the closest to the other we have is
635   // `LoopInstSimplify`.
636   LoopPassManager LPM1, LPM2;
637 
638   // Simplify the loop body. We do this initially to clean up after other loop
639   // passes run, either when iterating on a loop or on inner loops with
640   // implications on the outer loop.
641   LPM1.addPass(LoopInstSimplifyPass());
642   LPM1.addPass(LoopSimplifyCFGPass());
643 
644   // Try to remove as much code from the loop header as possible,
645   // to reduce amount of IR that will have to be duplicated. However,
646   // do not perform speculative hoisting the first time as LICM
647   // will destroy metadata that may not need to be destroyed if run
648   // after loop rotation.
649   // TODO: Investigate promotion cap for O1.
650   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
651                         /*AllowSpeculation=*/false));
652 
653   // Disable header duplication in loop rotation at -Oz.
654   LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
655                                   Level != OptimizationLevel::Oz,
656                               isLTOPreLink(Phase)));
657   // TODO: Investigate promotion cap for O1.
658   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
659                         /*AllowSpeculation=*/true));
660   LPM1.addPass(
661       SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
662   if (EnableLoopFlatten)
663     LPM1.addPass(LoopFlattenPass());
664 
665   LPM2.addPass(LoopIdiomRecognizePass());
666   LPM2.addPass(IndVarSimplifyPass());
667 
668   {
669     ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses;
670     ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
671                                                OptimizationLevel::O3));
672     LPM2.addPass(std::move(ExtraPasses));
673   }
674 
675   invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
676 
677   LPM2.addPass(LoopDeletionPass());
678 
679   if (EnableLoopInterchange)
680     LPM2.addPass(LoopInterchangePass());
681 
682   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
683   // because it changes IR to makes profile annotation in back compile
684   // inaccurate. The normal unroller doesn't pay attention to forced full unroll
685   // attributes so we need to make sure and allow the full unroll pass to pay
686   // attention to it.
687   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
688       PGOOpt->Action != PGOOptions::SampleUse)
689     LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
690                                     /* OnlyWhenForced= */ !PTO.LoopUnrolling,
691                                     PTO.ForgetAllSCEVInLoopUnroll));
692 
693   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
694 
695   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
696                                               /*UseMemorySSA=*/true,
697                                               /*UseBlockFrequencyInfo=*/true));
698   FPM.addPass(
699       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
700   FPM.addPass(InstCombinePass());
701   // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
702   // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
703   // *All* loop passes must preserve it, in order to be able to use it.
704   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
705                                               /*UseMemorySSA=*/false,
706                                               /*UseBlockFrequencyInfo=*/false));
707 
708   // Delete small array after loop unroll.
709   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
710 
711   // Try vectorization/scalarization transforms that are both improvements
712   // themselves and can allow further folds with GVN and InstCombine.
713   FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
714 
715   // Eliminate redundancies.
716   FPM.addPass(MergedLoadStoreMotionPass());
717   if (RunNewGVN)
718     FPM.addPass(NewGVNPass());
719   else
720     FPM.addPass(GVNPass());
721 
722   // Sparse conditional constant propagation.
723   // FIXME: It isn't clear why we do this *after* loop passes rather than
724   // before...
725   FPM.addPass(SCCPPass());
726 
727   // Delete dead bit computations (instcombine runs after to fold away the dead
728   // computations, and then ADCE will run later to exploit any new DCE
729   // opportunities that creates).
730   FPM.addPass(BDCEPass());
731 
732   // Run instcombine after redundancy and dead bit elimination to exploit
733   // opportunities opened up by them.
734   FPM.addPass(InstCombinePass());
735   invokePeepholeEPCallbacks(FPM, Level);
736 
737   // Re-consider control flow based optimizations after redundancy elimination,
738   // redo DCE, etc.
739   if (EnableDFAJumpThreading)
740     FPM.addPass(DFAJumpThreadingPass());
741 
742   FPM.addPass(JumpThreadingPass());
743   FPM.addPass(CorrelatedValuePropagationPass());
744 
745   // Finally, do an expensive DCE pass to catch all the dead code exposed by
746   // the simplifications and basic cleanup after all the simplifications.
747   // TODO: Investigate if this is too expensive.
748   FPM.addPass(ADCEPass());
749 
750   // Specially optimize memory movement as it doesn't look like dataflow in SSA.
751   FPM.addPass(MemCpyOptPass());
752 
753   FPM.addPass(DSEPass());
754   FPM.addPass(MoveAutoInitPass());
755 
756   FPM.addPass(createFunctionToLoopPassAdaptor(
757       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
758                /*AllowSpeculation=*/true),
759       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
760 
761   FPM.addPass(CoroElidePass());
762 
763   invokeScalarOptimizerLateEPCallbacks(FPM, Level);
764 
765   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
766                                   .convertSwitchRangeToICmp(true)
767                                   .hoistCommonInsts(true)
768                                   .sinkCommonInsts(true)));
769   FPM.addPass(InstCombinePass());
770   invokePeepholeEPCallbacks(FPM, Level);
771 
772   return FPM;
773 }
774 
775 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
776   MPM.addPass(CanonicalizeAliasesPass());
777   MPM.addPass(NameAnonGlobalPass());
778 }
779 
780 void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
781                                       OptimizationLevel Level,
782                                       ThinOrFullLTOPhase LTOPhase) {
783   assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
784   if (DisablePreInliner)
785     return;
786   InlineParams IP;
787 
788   IP.DefaultThreshold = PreInlineThreshold;
789 
790   // FIXME: The hint threshold has the same value used by the regular inliner
791   // when not optimzing for size. This should probably be lowered after
792   // performance testing.
793   // FIXME: this comment is cargo culted from the old pass manager, revisit).
794   IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
795   ModuleInlinerWrapperPass MIWP(
796       IP, /* MandatoryFirst */ true,
797       InlineContext{LTOPhase, InlinePass::EarlyInliner});
798   CGSCCPassManager &CGPipeline = MIWP.getPM();
799 
800   FunctionPassManager FPM;
801   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
802   FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
803   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
804       true)));                    // Merge & remove basic blocks.
805   FPM.addPass(InstCombinePass()); // Combine silly sequences.
806   invokePeepholeEPCallbacks(FPM, Level);
807 
808   CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
809       std::move(FPM), PTO.EagerlyInvalidateAnalyses));
810 
811   MPM.addPass(std::move(MIWP));
812 
813   // Delete anything that is now dead to make sure that we don't instrument
814   // dead code. Instrumentation can end up keeping dead code around and
815   // dramatically increase code size.
816   MPM.addPass(GlobalDCEPass());
817 }
818 
819 void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
820                                          OptimizationLevel Level) {
821   if (EnablePostPGOLoopRotation) {
822     // Disable header duplication in loop rotation at -Oz.
823     MPM.addPass(createModuleToFunctionPassAdaptor(
824         createFunctionToLoopPassAdaptor(
825             LoopRotatePass(EnableLoopHeaderDuplication ||
826                            Level != OptimizationLevel::Oz),
827             /*UseMemorySSA=*/false,
828             /*UseBlockFrequencyInfo=*/false),
829         PTO.EagerlyInvalidateAnalyses));
830   }
831 }
832 
833 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
834                                     OptimizationLevel Level, bool RunProfileGen,
835                                     bool IsCS, bool AtomicCounterUpdate,
836                                     std::string ProfileFile,
837                                     std::string ProfileRemappingFile,
838                                     IntrusiveRefCntPtr<vfs::FileSystem> FS) {
839   assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
840 
841   if (!RunProfileGen) {
842     assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
843     MPM.addPass(
844         PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
845     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
846     // RequireAnalysisPass for PSI before subsequent non-module passes.
847     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
848     return;
849   }
850 
851   // Perform PGO instrumentation.
852   MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
853                                          : PGOInstrumentationType::FDO));
854 
855   addPostPGOLoopRotation(MPM, Level);
856   // Add the profile lowering pass.
857   InstrProfOptions Options;
858   if (!ProfileFile.empty())
859     Options.InstrProfileOutput = ProfileFile;
860   // Do counter promotion at Level greater than O0.
861   Options.DoCounterPromotion = true;
862   Options.UseBFIInPromotion = IsCS;
863   if (EnableSampledInstr) {
864     Options.Sampling = true;
865     // With sampling, there is little beneifit to enable counter promotion.
866     // But note that sampling does work with counter promotion.
867     Options.DoCounterPromotion = false;
868   }
869   Options.Atomic = AtomicCounterUpdate;
870   MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
871 }
872 
873 void PassBuilder::addPGOInstrPassesForO0(
874     ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
875     bool AtomicCounterUpdate, std::string ProfileFile,
876     std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
877   if (!RunProfileGen) {
878     assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
879     MPM.addPass(
880         PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
881     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
882     // RequireAnalysisPass for PSI before subsequent non-module passes.
883     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
884     return;
885   }
886 
887   // Perform PGO instrumentation.
888   MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
889                                          : PGOInstrumentationType::FDO));
890   // Add the profile lowering pass.
891   InstrProfOptions Options;
892   if (!ProfileFile.empty())
893     Options.InstrProfileOutput = ProfileFile;
894   // Do not do counter promotion at O0.
895   Options.DoCounterPromotion = false;
896   Options.UseBFIInPromotion = IsCS;
897   Options.Atomic = AtomicCounterUpdate;
898   MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
899 }
900 
901 static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
902   return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
903 }
904 
905 ModuleInlinerWrapperPass
906 PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
907                                   ThinOrFullLTOPhase Phase) {
908   InlineParams IP;
909   if (PTO.InlinerThreshold == -1)
910     IP = getInlineParamsFromOptLevel(Level);
911   else
912     IP = getInlineParams(PTO.InlinerThreshold);
913   // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
914   // disable hot callsite inline (as much as possible [1]) because it makes
915   // profile annotation in the backend inaccurate.
916   //
917   // [1] Note the cost of a function could be below zero due to erased
918   // prologue / epilogue.
919   if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
920       PGOOpt->Action == PGOOptions::SampleUse)
921     IP.HotCallSiteThreshold = 0;
922 
923   if (PGOOpt)
924     IP.EnableDeferral = EnablePGOInlineDeferral;
925 
926   ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
927                                 InlineContext{Phase, InlinePass::CGSCCInliner},
928                                 UseInlineAdvisor, MaxDevirtIterations);
929 
930   // Require the GlobalsAA analysis for the module so we can query it within
931   // the CGSCC pipeline.
932   if (EnableGlobalAnalyses) {
933     MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
934     // Invalidate AAManager so it can be recreated and pick up the newly
935     // available GlobalsAA.
936     MIWP.addModulePass(
937         createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
938   }
939 
940   // Require the ProfileSummaryAnalysis for the module so we can query it within
941   // the inliner pass.
942   MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
943 
944   // Now begin the main postorder CGSCC pipeline.
945   // FIXME: The current CGSCC pipeline has its origins in the legacy pass
946   // manager and trying to emulate its precise behavior. Much of this doesn't
947   // make a lot of sense and we should revisit the core CGSCC structure.
948   CGSCCPassManager &MainCGPipeline = MIWP.getPM();
949 
950   // Note: historically, the PruneEH pass was run first to deduce nounwind and
951   // generally clean up exception handling overhead. It isn't clear this is
952   // valuable as the inliner doesn't currently care whether it is inlining an
953   // invoke or a call.
954 
955   if (AttributorRun & AttributorRunOption::CGSCC)
956     MainCGPipeline.addPass(AttributorCGSCCPass());
957 
958   // Deduce function attributes. We do another run of this after the function
959   // simplification pipeline, so this only needs to run when it could affect the
960   // function simplification pipeline, which is only the case with recursive
961   // functions.
962   MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
963 
964   // When at O3 add argument promotion to the pass pipeline.
965   // FIXME: It isn't at all clear why this should be limited to O3.
966   if (Level == OptimizationLevel::O3)
967     MainCGPipeline.addPass(ArgumentPromotionPass());
968 
969   // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
970   // there are no OpenMP runtime calls present in the module.
971   if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
972     MainCGPipeline.addPass(OpenMPOptCGSCCPass());
973 
974   invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
975 
976   // Add the core function simplification pipeline nested inside the
977   // CGSCC walk.
978   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
979       buildFunctionSimplificationPipeline(Level, Phase),
980       PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
981 
982   // Finally, deduce any function attributes based on the fully simplified
983   // function.
984   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
985 
986   // Mark that the function is fully simplified and that it shouldn't be
987   // simplified again if we somehow revisit it due to CGSCC mutations unless
988   // it's been modified since.
989   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
990       RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
991 
992   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
993     MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
994     MainCGPipeline.addPass(CoroAnnotationElidePass());
995   }
996 
997   // Make sure we don't affect potential future NoRerun CGSCC adaptors.
998   MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
999       InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
1000 
1001   return MIWP;
1002 }
1003 
1004 ModulePassManager
1005 PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
1006                                         ThinOrFullLTOPhase Phase) {
1007   ModulePassManager MPM;
1008 
1009   InlineParams IP = getInlineParamsFromOptLevel(Level);
1010   // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
1011   // disable hot callsite inline (as much as possible [1]) because it makes
1012   // profile annotation in the backend inaccurate.
1013   //
1014   // [1] Note the cost of a function could be below zero due to erased
1015   // prologue / epilogue.
1016   if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1017       PGOOpt->Action == PGOOptions::SampleUse)
1018     IP.HotCallSiteThreshold = 0;
1019 
1020   if (PGOOpt)
1021     IP.EnableDeferral = EnablePGOInlineDeferral;
1022 
1023   // The inline deferral logic is used to avoid losing some
1024   // inlining chance in future. It is helpful in SCC inliner, in which
1025   // inlining is processed in bottom-up order.
1026   // While in module inliner, the inlining order is a priority-based order
1027   // by default. The inline deferral is unnecessary there. So we disable the
1028   // inline deferral logic in module inliner.
1029   IP.EnableDeferral = false;
1030 
1031   MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1032   if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1033     MPM.addPass(GlobalOptPass());
1034     MPM.addPass(GlobalDCEPass());
1035     MPM.addPass(PGOCtxProfFlatteningPass());
1036   }
1037 
1038   MPM.addPass(createModuleToFunctionPassAdaptor(
1039       buildFunctionSimplificationPipeline(Level, Phase),
1040       PTO.EagerlyInvalidateAnalyses));
1041 
1042   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1043     MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1044         CoroSplitPass(Level != OptimizationLevel::O0)));
1045     MPM.addPass(
1046         createModuleToPostOrderCGSCCPassAdaptor(CoroAnnotationElidePass()));
1047   }
1048 
1049   return MPM;
1050 }
1051 
1052 ModulePassManager
1053 PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1054                                                ThinOrFullLTOPhase Phase) {
1055   assert(Level != OptimizationLevel::O0 &&
1056          "Should not be used for O0 pipeline");
1057 
1058   assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1059          "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1060 
1061   ModulePassManager MPM;
1062 
1063   // Place pseudo probe instrumentation as the first pass of the pipeline to
1064   // minimize the impact of optimization changes.
1065   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1066       Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1067     MPM.addPass(SampleProfileProbePass(TM));
1068 
1069   bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1070 
1071   // In ThinLTO mode, when flattened profile is used, all the available
1072   // profile information will be annotated in PreLink phase so there is
1073   // no need to load the profile again in PostLink.
1074   bool LoadSampleProfile =
1075       HasSampleProfile &&
1076       !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1077 
1078   // During the ThinLTO backend phase we perform early indirect call promotion
1079   // here, before globalopt. Otherwise imported available_externally functions
1080   // look unreferenced and are removed. If we are going to load the sample
1081   // profile then defer until later.
1082   // TODO: See if we can move later and consolidate with the location where
1083   // we perform ICP when we are loading a sample profile.
1084   // TODO: We pass HasSampleProfile (whether there was a sample profile file
1085   // passed to the compile) to the SamplePGO flag of ICP. This is used to
1086   // determine whether the new direct calls are annotated with prof metadata.
1087   // Ideally this should be determined from whether the IR is annotated with
1088   // sample profile, and not whether the a sample profile was provided on the
1089   // command line. E.g. for flattened profiles where we will not be reloading
1090   // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1091   // provide the sample profile file.
1092   if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1093     MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1094 
1095   // Create an early function pass manager to cleanup the output of the
1096   // frontend. Not necessary with LTO post link pipelines since the pre link
1097   // pipeline already cleaned up the frontend output.
1098   if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1099     // Do basic inference of function attributes from known properties of system
1100     // libraries and other oracles.
1101     MPM.addPass(InferFunctionAttrsPass());
1102     MPM.addPass(CoroEarlyPass());
1103 
1104     FunctionPassManager EarlyFPM;
1105     EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1106     // Lower llvm.expect to metadata before attempting transforms.
1107     // Compare/branch metadata may alter the behavior of passes like
1108     // SimplifyCFG.
1109     EarlyFPM.addPass(LowerExpectIntrinsicPass());
1110     EarlyFPM.addPass(SimplifyCFGPass());
1111     EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1112     EarlyFPM.addPass(EarlyCSEPass());
1113     if (Level == OptimizationLevel::O3)
1114       EarlyFPM.addPass(CallSiteSplittingPass());
1115     MPM.addPass(createModuleToFunctionPassAdaptor(
1116         std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1117   }
1118 
1119   if (LoadSampleProfile) {
1120     // Annotate sample profile right after early FPM to ensure freshness of
1121     // the debug info.
1122     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1123                                         PGOOpt->ProfileRemappingFile, Phase));
1124     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1125     // RequireAnalysisPass for PSI before subsequent non-module passes.
1126     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1127     // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1128     // for the profile annotation to be accurate in the LTO backend.
1129     if (!isLTOPreLink(Phase))
1130       // We perform early indirect call promotion here, before globalopt.
1131       // This is important for the ThinLTO backend phase because otherwise
1132       // imported available_externally functions look unreferenced and are
1133       // removed.
1134       MPM.addPass(
1135           PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1136   }
1137 
1138   // Try to perform OpenMP specific optimizations on the module. This is a
1139   // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1140   MPM.addPass(OpenMPOptPass());
1141 
1142   if (AttributorRun & AttributorRunOption::MODULE)
1143     MPM.addPass(AttributorPass());
1144 
1145   // Lower type metadata and the type.test intrinsic in the ThinLTO
1146   // post link pipeline after ICP. This is to enable usage of the type
1147   // tests in ICP sequences.
1148   if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1149     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1150                                    lowertypetests::DropTestKind::Assume));
1151 
1152   invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
1153 
1154   // Interprocedural constant propagation now that basic cleanup has occurred
1155   // and prior to optimizing globals.
1156   // FIXME: This position in the pipeline hasn't been carefully considered in
1157   // years, it should be re-analyzed.
1158   MPM.addPass(IPSCCPPass(
1159               IPSCCPOptions(/*AllowFuncSpec=*/
1160                             Level != OptimizationLevel::Os &&
1161                             Level != OptimizationLevel::Oz &&
1162                             !isLTOPreLink(Phase))));
1163 
1164   // Attach metadata to indirect call sites indicating the set of functions
1165   // they may target at run-time. This should follow IPSCCP.
1166   MPM.addPass(CalledValuePropagationPass());
1167 
1168   // Optimize globals to try and fold them into constants.
1169   MPM.addPass(GlobalOptPass());
1170 
1171   // Create a small function pass pipeline to cleanup after all the global
1172   // optimizations.
1173   FunctionPassManager GlobalCleanupPM;
1174   // FIXME: Should this instead by a run of SROA?
1175   GlobalCleanupPM.addPass(PromotePass());
1176   GlobalCleanupPM.addPass(InstCombinePass());
1177   invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1178   GlobalCleanupPM.addPass(
1179       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1180   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1181                                                 PTO.EagerlyInvalidateAnalyses));
1182 
1183   // We already asserted this happens in non-FullLTOPostLink earlier.
1184   const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1185   const bool IsPGOPreLink = PGOOpt && IsPreLink;
1186   const bool IsPGOInstrGen =
1187       IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1188   const bool IsPGOInstrUse =
1189       IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1190   const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1191   // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1192   // enable ctx profiling from the frontend.
1193   assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1194          "Enabling both instrumented PGO and contextual instrumentation is not "
1195          "supported.");
1196   // Enable contextual profiling instrumentation.
1197   const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1198                             PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
1199   const bool IsCtxProfUse =
1200       !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
1201 
1202   assert(
1203       (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) &&
1204       "--instrument-cold-function-only-path is provided but "
1205       "--pgo-instrument-cold-function-only is not enabled");
1206   const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1207                                       IsPGOPreLink &&
1208                                       !InstrumentColdFuncOnlyPath.empty();
1209 
1210   if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1211       IsCtxProfUse || IsColdFuncOnlyInstrGen)
1212     addPreInlinerPasses(MPM, Level, Phase);
1213 
1214   // Add all the requested passes for instrumentation PGO, if requested.
1215   if (IsPGOInstrGen || IsPGOInstrUse) {
1216     addPGOInstrPasses(MPM, Level,
1217                       /*RunProfileGen=*/IsPGOInstrGen,
1218                       /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1219                       PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1220                       PGOOpt->FS);
1221   } else if (IsCtxProfGen || IsCtxProfUse) {
1222     MPM.addPass(PGOInstrumentationGen(PGOInstrumentationType::CTXPROF));
1223     // In pre-link, we just want the instrumented IR. We use the contextual
1224     // profile in the post-thinlink phase.
1225     // The instrumentation will be removed in post-thinlink after IPO.
1226     // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1227     // mechanism for GUIDs.
1228     MPM.addPass(AssignGUIDPass());
1229     if (IsCtxProfUse)
1230       return MPM;
1231     addPostPGOLoopRotation(MPM, Level);
1232     MPM.addPass(PGOCtxProfLoweringPass());
1233   } else if (IsColdFuncOnlyInstrGen) {
1234     addPGOInstrPasses(
1235         MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1236         /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1237         /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
1238   }
1239 
1240   if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1241     MPM.addPass(PGOIndirectCallPromotion(false, false));
1242 
1243   if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1244     MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1245                                                EnableSampledInstr));
1246 
1247   if (IsMemprofUse)
1248     MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1249 
1250   if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1251                  PGOOpt->Action == PGOOptions::SampleUse))
1252     MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1253 
1254   MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1255 
1256   if (EnableModuleInliner)
1257     MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
1258   else
1259     MPM.addPass(buildInlinerPipeline(Level, Phase));
1260 
1261   // Remove any dead arguments exposed by cleanups, constant folding globals,
1262   // and argument promotion.
1263   MPM.addPass(DeadArgumentEliminationPass());
1264 
1265   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
1266     MPM.addPass(CoroCleanupPass());
1267 
1268   // Optimize globals now that functions are fully simplified.
1269   MPM.addPass(GlobalOptPass());
1270   MPM.addPass(GlobalDCEPass());
1271 
1272   return MPM;
1273 }
1274 
1275 /// TODO: Should LTO cause any differences to this set of passes?
1276 void PassBuilder::addVectorPasses(OptimizationLevel Level,
1277                                   FunctionPassManager &FPM, bool IsFullLTO) {
1278   FPM.addPass(LoopVectorizePass(
1279       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1280 
1281   FPM.addPass(InferAlignmentPass());
1282   if (IsFullLTO) {
1283     // The vectorizer may have significantly shortened a loop body; unroll
1284     // again. Unroll small loops to hide loop backedge latency and saturate any
1285     // parallel execution resources of an out-of-order processor. We also then
1286     // need to clean up redundancies and loop invariant code.
1287     // FIXME: It would be really good to use a loop-integrated instruction
1288     // combiner for cleanup here so that the unrolling and LICM can be pipelined
1289     // across the loop nests.
1290     // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1291     if (EnableUnrollAndJam && PTO.LoopUnrolling)
1292       FPM.addPass(createFunctionToLoopPassAdaptor(
1293           LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1294     FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1295         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1296         PTO.ForgetAllSCEVInLoopUnroll)));
1297     FPM.addPass(WarnMissedTransformationsPass());
1298     // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1299     // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1300     // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1301     // NOTE: we are very late in the pipeline, and we don't have any LICM
1302     // or SimplifyCFG passes scheduled after us, that would cleanup
1303     // the CFG mess this may created if allowed to modify CFG, so forbid that.
1304     FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1305   }
1306 
1307   if (!IsFullLTO) {
1308     // Eliminate loads by forwarding stores from the previous iteration to loads
1309     // of the current iteration.
1310     FPM.addPass(LoopLoadEliminationPass());
1311   }
1312   // Cleanup after the loop optimization passes.
1313   FPM.addPass(InstCombinePass());
1314 
1315   if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1316     ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1317     // At higher optimization levels, try to clean up any runtime overlap and
1318     // alignment checks inserted by the vectorizer. We want to track correlated
1319     // runtime checks for two inner loops in the same outer loop, fold any
1320     // common computations, hoist loop-invariant aspects out of any outer loop,
1321     // and unswitch the runtime checks if possible. Once hoisted, we may have
1322     // dead (or speculatable) control flows or more combining opportunities.
1323     ExtraPasses.addPass(EarlyCSEPass());
1324     ExtraPasses.addPass(CorrelatedValuePropagationPass());
1325     ExtraPasses.addPass(InstCombinePass());
1326     LoopPassManager LPM;
1327     LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1328                          /*AllowSpeculation=*/true));
1329     LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1330                                        OptimizationLevel::O3));
1331     ExtraPasses.addPass(
1332         createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1333                                         /*UseBlockFrequencyInfo=*/true));
1334     ExtraPasses.addPass(
1335         SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1336     ExtraPasses.addPass(InstCombinePass());
1337     FPM.addPass(std::move(ExtraPasses));
1338   }
1339 
1340   // Now that we've formed fast to execute loop structures, we do further
1341   // optimizations. These are run afterward as they might block doing complex
1342   // analyses and transforms such as what are needed for loop vectorization.
1343 
1344   // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1345   // GVN, loop transforms, and others have already run, so it's now better to
1346   // convert to more optimized IR using more aggressive simplify CFG options.
1347   // The extra sinking transform can create larger basic blocks, so do this
1348   // before SLP vectorization.
1349   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1350                                   .forwardSwitchCondToPhi(true)
1351                                   .convertSwitchRangeToICmp(true)
1352                                   .convertSwitchToLookupTable(true)
1353                                   .needCanonicalLoops(false)
1354                                   .hoistCommonInsts(true)
1355                                   .sinkCommonInsts(true)));
1356 
1357   if (IsFullLTO) {
1358     FPM.addPass(SCCPPass());
1359     FPM.addPass(InstCombinePass());
1360     FPM.addPass(BDCEPass());
1361   }
1362 
1363   // Optimize parallel scalar instruction chains into SIMD instructions.
1364   if (PTO.SLPVectorization) {
1365     FPM.addPass(SLPVectorizerPass());
1366     if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1367       FPM.addPass(EarlyCSEPass());
1368     }
1369   }
1370   // Enhance/cleanup vector code.
1371   FPM.addPass(VectorCombinePass());
1372 
1373   if (!IsFullLTO) {
1374     FPM.addPass(InstCombinePass());
1375     // Unroll small loops to hide loop backedge latency and saturate any
1376     // parallel execution resources of an out-of-order processor. We also then
1377     // need to clean up redundancies and loop invariant code.
1378     // FIXME: It would be really good to use a loop-integrated instruction
1379     // combiner for cleanup here so that the unrolling and LICM can be pipelined
1380     // across the loop nests.
1381     // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1382     if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1383       FPM.addPass(createFunctionToLoopPassAdaptor(
1384           LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1385     }
1386     FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1387         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1388         PTO.ForgetAllSCEVInLoopUnroll)));
1389     FPM.addPass(WarnMissedTransformationsPass());
1390     // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1391     // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1392     // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1393     // NOTE: we are very late in the pipeline, and we don't have any LICM
1394     // or SimplifyCFG passes scheduled after us, that would cleanup
1395     // the CFG mess this may created if allowed to modify CFG, so forbid that.
1396     FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1397   }
1398 
1399   FPM.addPass(InferAlignmentPass());
1400   FPM.addPass(InstCombinePass());
1401 
1402   // This is needed for two reasons:
1403   //   1. It works around problems that instcombine introduces, such as sinking
1404   //      expensive FP divides into loops containing multiplications using the
1405   //      divide result.
1406   //   2. It helps to clean up some loop-invariant code created by the loop
1407   //      unroll pass when IsFullLTO=false.
1408   FPM.addPass(createFunctionToLoopPassAdaptor(
1409       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1410                /*AllowSpeculation=*/true),
1411       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1412 
1413   // Now that we've vectorized and unrolled loops, we may have more refined
1414   // alignment information, try to re-derive it here.
1415   FPM.addPass(AlignmentFromAssumptionsPass());
1416 }
1417 
1418 ModulePassManager
1419 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1420                                              ThinOrFullLTOPhase LTOPhase) {
1421   const bool LTOPreLink = isLTOPreLink(LTOPhase);
1422   ModulePassManager MPM;
1423 
1424   // Run partial inlining pass to partially inline functions that have
1425   // large bodies.
1426   if (RunPartialInlining)
1427     MPM.addPass(PartialInlinerPass());
1428 
1429   // Remove avail extern fns and globals definitions since we aren't compiling
1430   // an object file for later LTO. For LTO we want to preserve these so they
1431   // are eligible for inlining at link-time. Note if they are unreferenced they
1432   // will be removed by GlobalDCE later, so this only impacts referenced
1433   // available externally globals. Eventually they will be suppressed during
1434   // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1435   // may make globals referenced by available external functions dead and saves
1436   // running remaining passes on the eliminated functions. These should be
1437   // preserved during prelinking for link-time inlining decisions.
1438   if (!LTOPreLink)
1439     MPM.addPass(EliminateAvailableExternallyPass());
1440 
1441   if (EnableOrderFileInstrumentation)
1442     MPM.addPass(InstrOrderFilePass());
1443 
1444   // Do RPO function attribute inference across the module to forward-propagate
1445   // attributes where applicable.
1446   // FIXME: Is this really an optimization rather than a canonicalization?
1447   MPM.addPass(ReversePostOrderFunctionAttrsPass());
1448 
1449   // Do a post inline PGO instrumentation and use pass. This is a context
1450   // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1451   // cross-module inline has not been done yet. The context sensitive
1452   // instrumentation is after all the inlines are done.
1453   if (!LTOPreLink && PGOOpt) {
1454     if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1455       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1456                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1457                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1458                         PGOOpt->FS);
1459     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1460       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1461                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1462                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1463                         PGOOpt->FS);
1464   }
1465 
1466   // Re-compute GlobalsAA here prior to function passes. This is particularly
1467   // useful as the above will have inlined, DCE'ed, and function-attr
1468   // propagated everything. We should at this point have a reasonably minimal
1469   // and richly annotated call graph. By computing aliasing and mod/ref
1470   // information for all local globals here, the late loop passes and notably
1471   // the vectorizer will be able to use them to help recognize vectorizable
1472   // memory operations.
1473   if (EnableGlobalAnalyses)
1474     MPM.addPass(RecomputeGlobalsAAPass());
1475 
1476   invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1477 
1478   FunctionPassManager OptimizePM;
1479   // Scheduling LoopVersioningLICM when inlining is over, because after that
1480   // we may see more accurate aliasing. Reason to run this late is that too
1481   // early versioning may prevent further inlining due to increase of code
1482   // size. Other optimizations which runs later might get benefit of no-alias
1483   // assumption in clone loop.
1484   if (UseLoopVersioningLICM) {
1485     OptimizePM.addPass(
1486         createFunctionToLoopPassAdaptor(LoopVersioningLICMPass()));
1487     // LoopVersioningLICM pass might increase new LICM opportunities.
1488     OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1489         LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1490                  /*AllowSpeculation=*/true),
1491         /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1492   }
1493 
1494   OptimizePM.addPass(Float2IntPass());
1495   OptimizePM.addPass(LowerConstantIntrinsicsPass());
1496 
1497   if (EnableMatrix) {
1498     OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1499     OptimizePM.addPass(EarlyCSEPass());
1500   }
1501 
1502   // CHR pass should only be applied with the profile information.
1503   // The check is to check the profile summary information in CHR.
1504   if (EnableCHR && Level == OptimizationLevel::O3)
1505     OptimizePM.addPass(ControlHeightReductionPass());
1506 
1507   // FIXME: We need to run some loop optimizations to re-rotate loops after
1508   // simplifycfg and others undo their rotation.
1509 
1510   // Optimize the loop execution. These passes operate on entire loop nests
1511   // rather than on each loop in an inside-out manner, and so they are actually
1512   // function passes.
1513 
1514   invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1515 
1516   LoopPassManager LPM;
1517   // First rotate loops that may have been un-rotated by prior passes.
1518   // Disable header duplication at -Oz.
1519   LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
1520                                  Level != OptimizationLevel::Oz,
1521                              LTOPreLink));
1522   // Some loops may have become dead by now. Try to delete them.
1523   // FIXME: see discussion in https://reviews.llvm.org/D112851,
1524   //        this may need to be revisited once we run GVN before loop deletion
1525   //        in the simplification pipeline.
1526   LPM.addPass(LoopDeletionPass());
1527   OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1528       std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1529 
1530   // Distribute loops to allow partial vectorization.  I.e. isolate dependences
1531   // into separate loop that would otherwise inhibit vectorization.  This is
1532   // currently only performed for loops marked with the metadata
1533   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1534   OptimizePM.addPass(LoopDistributePass());
1535 
1536   // Populates the VFABI attribute with the scalar-to-vector mappings
1537   // from the TargetLibraryInfo.
1538   OptimizePM.addPass(InjectTLIMappings());
1539 
1540   addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1541 
1542   invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1543 
1544   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1545   // canonicalization pass that enables other optimizations. As a result,
1546   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1547   // result too early.
1548   OptimizePM.addPass(LoopSinkPass());
1549 
1550   // And finally clean up LCSSA form before generating code.
1551   OptimizePM.addPass(InstSimplifyPass());
1552 
1553   // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1554   // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1555   // flattening of blocks.
1556   OptimizePM.addPass(DivRemPairsPass());
1557 
1558   // Try to annotate calls that were created during optimization.
1559   OptimizePM.addPass(TailCallElimPass());
1560 
1561   // LoopSink (and other loop passes since the last simplifyCFG) might have
1562   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1563   OptimizePM.addPass(
1564       SimplifyCFGPass(SimplifyCFGOptions()
1565                           .convertSwitchRangeToICmp(true)
1566                           .speculateUnpredictables(true)
1567                           .hoistLoadsStoresWithCondFaulting(true)));
1568 
1569   // Add the core optimizing pipeline.
1570   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1571                                                 PTO.EagerlyInvalidateAnalyses));
1572 
1573   invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1574 
1575   // Split out cold code. Splitting is done late to avoid hiding context from
1576   // other optimizations and inadvertently regressing performance. The tradeoff
1577   // is that this has a higher code size cost than splitting early.
1578   if (EnableHotColdSplit && !LTOPreLink)
1579     MPM.addPass(HotColdSplittingPass());
1580 
1581   // Search the code for similar regions of code. If enough similar regions can
1582   // be found where extracting the regions into their own function will decrease
1583   // the size of the program, we extract the regions, a deduplicate the
1584   // structurally similar regions.
1585   if (EnableIROutliner)
1586     MPM.addPass(IROutlinerPass());
1587 
1588   // Now we need to do some global optimization transforms.
1589   // FIXME: It would seem like these should come first in the optimization
1590   // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1591   // ordering here.
1592   MPM.addPass(GlobalDCEPass());
1593   MPM.addPass(ConstantMergePass());
1594 
1595   // Merge functions if requested. It has a better chance to merge functions
1596   // after ConstantMerge folded jump tables.
1597   if (PTO.MergeFunctions)
1598     MPM.addPass(MergeFunctionsPass());
1599 
1600   if (PTO.CallGraphProfile && !LTOPreLink)
1601     MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1602                               LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
1603 
1604   // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1605   if (!LTOPreLink)
1606     MPM.addPass(RelLookupTableConverterPass());
1607 
1608   return MPM;
1609 }
1610 
1611 ModulePassManager
1612 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1613                                            ThinOrFullLTOPhase Phase) {
1614   if (Level == OptimizationLevel::O0)
1615     return buildO0DefaultPipeline(Level, Phase);
1616 
1617   ModulePassManager MPM;
1618 
1619   // Convert @llvm.global.annotations to !annotation metadata.
1620   MPM.addPass(Annotation2MetadataPass());
1621 
1622   // Force any function attributes we want the rest of the pipeline to observe.
1623   MPM.addPass(ForceFunctionAttrsPass());
1624 
1625   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1626     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1627 
1628   // Apply module pipeline start EP callback.
1629   invokePipelineStartEPCallbacks(MPM, Level);
1630 
1631   // Add the core simplification pipeline.
1632   MPM.addPass(buildModuleSimplificationPipeline(Level, Phase));
1633 
1634   // Now add the optimization pipeline.
1635   MPM.addPass(buildModuleOptimizationPipeline(Level, Phase));
1636 
1637   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1638       PGOOpt->Action == PGOOptions::SampleUse)
1639     MPM.addPass(PseudoProbeUpdatePass());
1640 
1641   // Emit annotation remarks.
1642   addAnnotationRemarksPass(MPM);
1643 
1644   if (isLTOPreLink(Phase))
1645     addRequiredLTOPreLinkPasses(MPM);
1646   return MPM;
1647 }
1648 
1649 ModulePassManager
1650 PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1651                                         bool EmitSummary) {
1652   ModulePassManager MPM;
1653   if (ThinLTO)
1654     MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
1655   else
1656     MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
1657   MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1658 
1659   // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1660   // object code, only in the bitcode section, so drop it before we run
1661   // module optimization and generate machine code. If llvm.type.test() isn't in
1662   // the IR, this won't do anything.
1663   MPM.addPass(
1664       LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All));
1665 
1666   // Use the ThinLTO post-link pipeline with sample profiling
1667   if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1668     MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1669   else {
1670     // otherwise, just use module optimization
1671     MPM.addPass(
1672         buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
1673     // Emit annotation remarks.
1674     addAnnotationRemarksPass(MPM);
1675   }
1676   return MPM;
1677 }
1678 
1679 ModulePassManager
1680 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1681   if (Level == OptimizationLevel::O0)
1682     return buildO0DefaultPipeline(Level, ThinOrFullLTOPhase::ThinLTOPreLink);
1683 
1684   ModulePassManager MPM;
1685 
1686   // Convert @llvm.global.annotations to !annotation metadata.
1687   MPM.addPass(Annotation2MetadataPass());
1688 
1689   // Force any function attributes we want the rest of the pipeline to observe.
1690   MPM.addPass(ForceFunctionAttrsPass());
1691 
1692   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1693     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1694 
1695   // Apply module pipeline start EP callback.
1696   invokePipelineStartEPCallbacks(MPM, Level);
1697 
1698   // If we are planning to perform ThinLTO later, we don't bloat the code with
1699   // unrolling/vectorization/... now. Just simplify the module as much as we
1700   // can.
1701   MPM.addPass(buildModuleSimplificationPipeline(
1702       Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1703   // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1704   // thinlto use the contextual info to perform imports; then use the contextual
1705   // profile in the post-thinlink phase.
1706   if (!UseCtxProfile.empty()) {
1707     addRequiredLTOPreLinkPasses(MPM);
1708     return MPM;
1709   }
1710 
1711   // Run partial inlining pass to partially inline functions that have
1712   // large bodies.
1713   // FIXME: It isn't clear whether this is really the right place to run this
1714   // in ThinLTO. Because there is another canonicalization and simplification
1715   // phase that will run after the thin link, running this here ends up with
1716   // less information than will be available later and it may grow functions in
1717   // ways that aren't beneficial.
1718   if (RunPartialInlining)
1719     MPM.addPass(PartialInlinerPass());
1720 
1721   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1722       PGOOpt->Action == PGOOptions::SampleUse)
1723     MPM.addPass(PseudoProbeUpdatePass());
1724 
1725   // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1726   // optimization is going to be done in PostLink stage, but clang can't add
1727   // callbacks there in case of in-process ThinLTO called by linker.
1728   invokeOptimizerEarlyEPCallbacks(MPM, Level,
1729                                   /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1730   invokeOptimizerLastEPCallbacks(MPM, Level,
1731                                  /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1732 
1733   // Emit annotation remarks.
1734   addAnnotationRemarksPass(MPM);
1735 
1736   addRequiredLTOPreLinkPasses(MPM);
1737 
1738   return MPM;
1739 }
1740 
1741 ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1742     OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1743   ModulePassManager MPM;
1744 
1745   if (ImportSummary) {
1746     // For ThinLTO we must apply the context disambiguation decisions early, to
1747     // ensure we can correctly match the callsites to summary data.
1748     if (EnableMemProfContextDisambiguation)
1749       MPM.addPass(MemProfContextDisambiguation(
1750           ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1751 
1752     // These passes import type identifier resolutions for whole-program
1753     // devirtualization and CFI. They must run early because other passes may
1754     // disturb the specific instruction patterns that these passes look for,
1755     // creating dependencies on resolutions that may not appear in the summary.
1756     //
1757     // For example, GVN may transform the pattern assume(type.test) appearing in
1758     // two basic blocks into assume(phi(type.test, type.test)), which would
1759     // transform a dependency on a WPD resolution into a dependency on a type
1760     // identifier resolution for CFI.
1761     //
1762     // Also, WPD has access to more precise information than ICP and can
1763     // devirtualize more effectively, so it should operate on the IR first.
1764     //
1765     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1766     // metadata and intrinsics.
1767     MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1768     MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1769   }
1770 
1771   if (Level == OptimizationLevel::O0) {
1772     // Run a second time to clean up any type tests left behind by WPD for use
1773     // in ICP.
1774     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1775                                    lowertypetests::DropTestKind::Assume));
1776     // Drop available_externally and unreferenced globals. This is necessary
1777     // with ThinLTO in order to avoid leaving undefined references to dead
1778     // globals in the object file.
1779     MPM.addPass(EliminateAvailableExternallyPass());
1780     MPM.addPass(GlobalDCEPass());
1781     return MPM;
1782   }
1783   if (!UseCtxProfile.empty()) {
1784     MPM.addPass(
1785         buildModuleInlinerPipeline(Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1786   } else {
1787     // Add the core simplification pipeline.
1788     MPM.addPass(buildModuleSimplificationPipeline(
1789         Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1790   }
1791   // Now add the optimization pipeline.
1792   MPM.addPass(buildModuleOptimizationPipeline(
1793       Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1794 
1795   // Emit annotation remarks.
1796   addAnnotationRemarksPass(MPM);
1797 
1798   return MPM;
1799 }
1800 
1801 ModulePassManager
1802 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1803   // FIXME: We should use a customized pre-link pipeline!
1804   return buildPerModuleDefaultPipeline(Level,
1805                                        ThinOrFullLTOPhase::FullLTOPreLink);
1806 }
1807 
1808 ModulePassManager
1809 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1810                                      ModuleSummaryIndex *ExportSummary) {
1811   ModulePassManager MPM;
1812 
1813   invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1814 
1815   // Create a function that performs CFI checks for cross-DSO calls with targets
1816   // in the current module.
1817   MPM.addPass(CrossDSOCFIPass());
1818 
1819   if (Level == OptimizationLevel::O0) {
1820     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1821     // metadata and intrinsics.
1822     MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1823     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1824     // Run a second time to clean up any type tests left behind by WPD for use
1825     // in ICP.
1826     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1827                                    lowertypetests::DropTestKind::Assume));
1828 
1829     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1830 
1831     // Emit annotation remarks.
1832     addAnnotationRemarksPass(MPM);
1833 
1834     return MPM;
1835   }
1836 
1837   if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1838     // Load sample profile before running the LTO optimization pipeline.
1839     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1840                                         PGOOpt->ProfileRemappingFile,
1841                                         ThinOrFullLTOPhase::FullLTOPostLink));
1842     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1843     // RequireAnalysisPass for PSI before subsequent non-module passes.
1844     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1845   }
1846 
1847   // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1848   MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1849 
1850   // Remove unused virtual tables to improve the quality of code generated by
1851   // whole-program devirtualization and bitset lowering.
1852   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1853 
1854   // Do basic inference of function attributes from known properties of system
1855   // libraries and other oracles.
1856   MPM.addPass(InferFunctionAttrsPass());
1857 
1858   if (Level.getSpeedupLevel() > 1) {
1859     MPM.addPass(createModuleToFunctionPassAdaptor(
1860         CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1861 
1862     // Indirect call promotion. This should promote all the targets that are
1863     // left by the earlier promotion pass that promotes intra-module targets.
1864     // This two-step promotion is to save the compile time. For LTO, it should
1865     // produce the same result as if we only do promotion here.
1866     MPM.addPass(PGOIndirectCallPromotion(
1867         true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1868 
1869     // Promoting by-reference arguments to by-value exposes more constants to
1870     // IPSCCP.
1871     CGSCCPassManager CGPM;
1872     CGPM.addPass(PostOrderFunctionAttrsPass());
1873     CGPM.addPass(ArgumentPromotionPass());
1874     CGPM.addPass(
1875         createCGSCCToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG)));
1876     MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1877 
1878     // Propagate constants at call sites into the functions they call.  This
1879     // opens opportunities for globalopt (and inlining) by substituting function
1880     // pointers passed as arguments to direct uses of functions.
1881     MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1882                                          Level != OptimizationLevel::Os &&
1883                                          Level != OptimizationLevel::Oz)));
1884 
1885     // Attach metadata to indirect call sites indicating the set of functions
1886     // they may target at run-time. This should follow IPSCCP.
1887     MPM.addPass(CalledValuePropagationPass());
1888   }
1889 
1890   // Do RPO function attribute inference across the module to forward-propagate
1891   // attributes where applicable.
1892   // FIXME: Is this really an optimization rather than a canonicalization?
1893   MPM.addPass(ReversePostOrderFunctionAttrsPass());
1894 
1895   // Use in-range annotations on GEP indices to split globals where beneficial.
1896   MPM.addPass(GlobalSplitPass());
1897 
1898   // Run whole program optimization of virtual call when the list of callees
1899   // is fixed.
1900   MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1901 
1902   // Stop here at -O1.
1903   if (Level == OptimizationLevel::O1) {
1904     // The LowerTypeTestsPass needs to run to lower type metadata and the
1905     // type.test intrinsics. The pass does nothing if CFI is disabled.
1906     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1907     // Run a second time to clean up any type tests left behind by WPD for use
1908     // in ICP (which is performed earlier than this in the regular LTO
1909     // pipeline).
1910     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1911                                    lowertypetests::DropTestKind::Assume));
1912 
1913     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1914 
1915     // Emit annotation remarks.
1916     addAnnotationRemarksPass(MPM);
1917 
1918     return MPM;
1919   }
1920 
1921   // Optimize globals to try and fold them into constants.
1922   MPM.addPass(GlobalOptPass());
1923 
1924   // Promote any localized globals to SSA registers.
1925   MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1926 
1927   // Linking modules together can lead to duplicate global constant, only
1928   // keep one copy of each constant.
1929   MPM.addPass(ConstantMergePass());
1930 
1931   // Remove unused arguments from functions.
1932   MPM.addPass(DeadArgumentEliminationPass());
1933 
1934   // Reduce the code after globalopt and ipsccp.  Both can open up significant
1935   // simplification opportunities, and both can propagate functions through
1936   // function pointers.  When this happens, we often have to resolve varargs
1937   // calls, etc, so let instcombine do this.
1938   FunctionPassManager PeepholeFPM;
1939   PeepholeFPM.addPass(InstCombinePass());
1940   if (Level.getSpeedupLevel() > 1)
1941     PeepholeFPM.addPass(AggressiveInstCombinePass());
1942   invokePeepholeEPCallbacks(PeepholeFPM, Level);
1943 
1944   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1945                                                 PTO.EagerlyInvalidateAnalyses));
1946 
1947   // Lower variadic functions for supported targets prior to inlining.
1948   MPM.addPass(ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
1949 
1950   // Note: historically, the PruneEH pass was run first to deduce nounwind and
1951   // generally clean up exception handling overhead. It isn't clear this is
1952   // valuable as the inliner doesn't currently care whether it is inlining an
1953   // invoke or a call.
1954   // Run the inliner now.
1955   if (EnableModuleInliner) {
1956     MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1957                                   UseInlineAdvisor,
1958                                   ThinOrFullLTOPhase::FullLTOPostLink));
1959   } else {
1960     MPM.addPass(ModuleInlinerWrapperPass(
1961         getInlineParamsFromOptLevel(Level),
1962         /* MandatoryFirst */ true,
1963         InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
1964                       InlinePass::CGSCCInliner}));
1965   }
1966 
1967   // Perform context disambiguation after inlining, since that would reduce the
1968   // amount of additional cloning required to distinguish the allocation
1969   // contexts.
1970   if (EnableMemProfContextDisambiguation)
1971     MPM.addPass(MemProfContextDisambiguation(
1972         /*Summary=*/nullptr,
1973         PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1974 
1975   // Optimize globals again after we ran the inliner.
1976   MPM.addPass(GlobalOptPass());
1977 
1978   // Run the OpenMPOpt pass again after global optimizations.
1979   MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1980 
1981   // Garbage collect dead functions.
1982   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1983 
1984   // If we didn't decide to inline a function, check to see if we can
1985   // transform it to pass arguments by value instead of by reference.
1986   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
1987 
1988   FunctionPassManager FPM;
1989   // The IPO Passes may leave cruft around. Clean up after them.
1990   FPM.addPass(InstCombinePass());
1991   invokePeepholeEPCallbacks(FPM, Level);
1992 
1993   if (EnableConstraintElimination)
1994     FPM.addPass(ConstraintEliminationPass());
1995 
1996   FPM.addPass(JumpThreadingPass());
1997 
1998   // Do a post inline PGO instrumentation and use pass. This is a context
1999   // sensitive PGO pass.
2000   if (PGOOpt) {
2001     if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2002       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2003                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2004                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
2005                         PGOOpt->FS);
2006     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2007       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2008                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2009                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
2010                         PGOOpt->FS);
2011   }
2012 
2013   // Break up allocas
2014   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
2015 
2016   // LTO provides additional opportunities for tailcall elimination due to
2017   // link-time inlining, and visibility of nocapture attribute.
2018   FPM.addPass(TailCallElimPass());
2019 
2020   // Run a few AA driver optimizations here and now to cleanup the code.
2021   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2022                                                 PTO.EagerlyInvalidateAnalyses));
2023 
2024   MPM.addPass(
2025       createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
2026 
2027   // Require the GlobalsAA analysis for the module so we can query it within
2028   // MainFPM.
2029   if (EnableGlobalAnalyses) {
2030     MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
2031     // Invalidate AAManager so it can be recreated and pick up the newly
2032     // available GlobalsAA.
2033     MPM.addPass(
2034         createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
2035   }
2036 
2037   FunctionPassManager MainFPM;
2038   MainFPM.addPass(createFunctionToLoopPassAdaptor(
2039       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2040                /*AllowSpeculation=*/true),
2041       /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2042 
2043   if (RunNewGVN)
2044     MainFPM.addPass(NewGVNPass());
2045   else
2046     MainFPM.addPass(GVNPass());
2047 
2048   // Remove dead memcpy()'s.
2049   MainFPM.addPass(MemCpyOptPass());
2050 
2051   // Nuke dead stores.
2052   MainFPM.addPass(DSEPass());
2053   MainFPM.addPass(MoveAutoInitPass());
2054   MainFPM.addPass(MergedLoadStoreMotionPass());
2055 
2056   invokeVectorizerStartEPCallbacks(MainFPM, Level);
2057 
2058   LoopPassManager LPM;
2059   if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2060     LPM.addPass(LoopFlattenPass());
2061   LPM.addPass(IndVarSimplifyPass());
2062   LPM.addPass(LoopDeletionPass());
2063   // FIXME: Add loop interchange.
2064 
2065   // Unroll small loops and perform peeling.
2066   LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2067                                  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2068                                  PTO.ForgetAllSCEVInLoopUnroll));
2069   // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2070   // *All* loop passes must preserve it, in order to be able to use it.
2071   MainFPM.addPass(createFunctionToLoopPassAdaptor(
2072       std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2073 
2074   MainFPM.addPass(LoopDistributePass());
2075 
2076   addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2077 
2078   invokeVectorizerEndEPCallbacks(MainFPM, Level);
2079 
2080   // Run the OpenMPOpt CGSCC pass again late.
2081   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
2082       OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2083 
2084   invokePeepholeEPCallbacks(MainFPM, Level);
2085   MainFPM.addPass(JumpThreadingPass());
2086   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2087                                                 PTO.EagerlyInvalidateAnalyses));
2088 
2089   // Lower type metadata and the type.test intrinsic. This pass supports
2090   // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2091   // to be run at link time if CFI is enabled. This pass does nothing if
2092   // CFI is disabled.
2093   MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2094   // Run a second time to clean up any type tests left behind by WPD for use
2095   // in ICP (which is performed earlier than this in the regular LTO pipeline).
2096   MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2097                                  lowertypetests::DropTestKind::Assume));
2098 
2099   // Enable splitting late in the FullLTO post-link pipeline.
2100   if (EnableHotColdSplit)
2101     MPM.addPass(HotColdSplittingPass());
2102 
2103   // Add late LTO optimization passes.
2104   FunctionPassManager LateFPM;
2105 
2106   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2107   // canonicalization pass that enables other optimizations. As a result,
2108   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2109   // result too early.
2110   LateFPM.addPass(LoopSinkPass());
2111 
2112   // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2113   // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2114   // flattening of blocks.
2115   LateFPM.addPass(DivRemPairsPass());
2116 
2117   // Delete basic blocks, which optimization passes may have killed.
2118   LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
2119                                       .convertSwitchRangeToICmp(true)
2120                                       .hoistCommonInsts(true)
2121                                       .speculateUnpredictables(true)));
2122   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2123 
2124   // Drop bodies of available eternally objects to improve GlobalDCE.
2125   MPM.addPass(EliminateAvailableExternallyPass());
2126 
2127   // Now that we have optimized the program, discard unreachable functions.
2128   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2129 
2130   if (PTO.MergeFunctions)
2131     MPM.addPass(MergeFunctionsPass());
2132 
2133   MPM.addPass(RelLookupTableConverterPass());
2134 
2135   if (PTO.CallGraphProfile)
2136     MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2137 
2138   invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2139 
2140   // Emit annotation remarks.
2141   addAnnotationRemarksPass(MPM);
2142 
2143   return MPM;
2144 }
2145 
2146 ModulePassManager
2147 PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2148                                     ThinOrFullLTOPhase Phase) {
2149   assert(Level == OptimizationLevel::O0 &&
2150          "buildO0DefaultPipeline should only be used with O0");
2151 
2152   ModulePassManager MPM;
2153 
2154   // Perform pseudo probe instrumentation in O0 mode. This is for the
2155   // consistency between different build modes. For example, a LTO build can be
2156   // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2157   // the postlink will require pseudo probe instrumentation in the prelink.
2158   if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2159     MPM.addPass(SampleProfileProbePass(TM));
2160 
2161   if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2162                  PGOOpt->Action == PGOOptions::IRUse))
2163     addPGOInstrPassesForO0(
2164         MPM,
2165         /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2166         /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2167         PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2168 
2169   // Instrument function entry and exit before all inlining.
2170   MPM.addPass(createModuleToFunctionPassAdaptor(
2171       EntryExitInstrumenterPass(/*PostInlining=*/false)));
2172 
2173   invokePipelineStartEPCallbacks(MPM, Level);
2174 
2175   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2176     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
2177 
2178   if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2179     // Explicitly disable sample loader inlining and use flattened profile in O0
2180     // pipeline.
2181     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2182                                         PGOOpt->ProfileRemappingFile,
2183                                         ThinOrFullLTOPhase::None, nullptr,
2184                                         /*DisableSampleProfileInlining=*/true,
2185                                         /*UseFlattenedProfile=*/true));
2186     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2187     // RequireAnalysisPass for PSI before subsequent non-module passes.
2188     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2189   }
2190 
2191   invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
2192 
2193   // Build a minimal pipeline based on the semantics required by LLVM,
2194   // which is just that always inlining occurs. Further, disable generating
2195   // lifetime intrinsics to avoid enabling further optimizations during
2196   // code generation.
2197   MPM.addPass(AlwaysInlinerPass(
2198       /*InsertLifetimeIntrinsics=*/false));
2199 
2200   if (PTO.MergeFunctions)
2201     MPM.addPass(MergeFunctionsPass());
2202 
2203   if (EnableMatrix)
2204     MPM.addPass(
2205         createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
2206 
2207   if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2208     CGSCCPassManager CGPM;
2209     invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2210     if (!CGPM.isEmpty())
2211       MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2212   }
2213   if (!LateLoopOptimizationsEPCallbacks.empty()) {
2214     LoopPassManager LPM;
2215     invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2216     if (!LPM.isEmpty()) {
2217       MPM.addPass(createModuleToFunctionPassAdaptor(
2218           createFunctionToLoopPassAdaptor(std::move(LPM))));
2219     }
2220   }
2221   if (!LoopOptimizerEndEPCallbacks.empty()) {
2222     LoopPassManager LPM;
2223     invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2224     if (!LPM.isEmpty()) {
2225       MPM.addPass(createModuleToFunctionPassAdaptor(
2226           createFunctionToLoopPassAdaptor(std::move(LPM))));
2227     }
2228   }
2229   if (!ScalarOptimizerLateEPCallbacks.empty()) {
2230     FunctionPassManager FPM;
2231     invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2232     if (!FPM.isEmpty())
2233       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2234   }
2235 
2236   invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
2237 
2238   if (!VectorizerStartEPCallbacks.empty()) {
2239     FunctionPassManager FPM;
2240     invokeVectorizerStartEPCallbacks(FPM, Level);
2241     if (!FPM.isEmpty())
2242       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2243   }
2244 
2245   if (!VectorizerEndEPCallbacks.empty()) {
2246     FunctionPassManager FPM;
2247     invokeVectorizerEndEPCallbacks(FPM, Level);
2248     if (!FPM.isEmpty())
2249       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2250   }
2251 
2252   ModulePassManager CoroPM;
2253   CoroPM.addPass(CoroEarlyPass());
2254   CGSCCPassManager CGPM;
2255   CGPM.addPass(CoroSplitPass());
2256   CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2257   CoroPM.addPass(CoroCleanupPass());
2258   CoroPM.addPass(GlobalDCEPass());
2259   MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2260 
2261   invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
2262 
2263   if (isLTOPreLink(Phase))
2264     addRequiredLTOPreLinkPasses(MPM);
2265 
2266   MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
2267 
2268   return MPM;
2269 }
2270 
2271 AAManager PassBuilder::buildDefaultAAPipeline() {
2272   AAManager AA;
2273 
2274   // The order in which these are registered determines their priority when
2275   // being queried.
2276 
2277   // First we register the basic alias analysis that provides the majority of
2278   // per-function local AA logic. This is a stateless, on-demand local set of
2279   // AA techniques.
2280   AA.registerFunctionAnalysis<BasicAA>();
2281 
2282   // Next we query fast, specialized alias analyses that wrap IR-embedded
2283   // information about aliasing.
2284   AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2285   AA.registerFunctionAnalysis<TypeBasedAA>();
2286 
2287   // Add support for querying global aliasing information when available.
2288   // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2289   // analysis, all that the `AAManager` can do is query for any *cached*
2290   // results from `GlobalsAA` through a readonly proxy.
2291   if (EnableGlobalAnalyses)
2292     AA.registerModuleAnalysis<GlobalsAA>();
2293 
2294   // Add target-specific alias analyses.
2295   if (TM)
2296     TM->registerDefaultAliasAnalyses(AA);
2297 
2298   return AA;
2299 }
2300