1 //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the PassManagerBuilder class, which is used to set up a
10 // "standard" optimization sequence suitable for languages like C and C++.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
15 #include "llvm-c/Transforms/PassManagerBuilder.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
20 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/InlineCost.h"
23 #include "llvm/Analysis/Passes.h"
24 #include "llvm/Analysis/ScopedNoAliasAA.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/LegacyPassManager.h"
29 #include "llvm/IR/Verifier.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/ManagedStatic.h"
32 #include "llvm/Target/CGPassBuilderOption.h"
33 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
34 #include "llvm/Transforms/IPO.h"
35 #include "llvm/Transforms/IPO/Attributor.h"
36 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
37 #include "llvm/Transforms/IPO/FunctionAttrs.h"
38 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
39 #include "llvm/Transforms/InstCombine/InstCombine.h"
40 #include "llvm/Transforms/Instrumentation.h"
41 #include "llvm/Transforms/Scalar.h"
42 #include "llvm/Transforms/Scalar/GVN.h"
43 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
44 #include "llvm/Transforms/Scalar/LICM.h"
45 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
46 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
47 #include "llvm/Transforms/Utils.h"
48 #include "llvm/Transforms/Vectorize.h"
49 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
50 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
51 #include "llvm/Transforms/Vectorize/VectorCombine.h"
52
53 using namespace llvm;
54
55 namespace llvm {
56 cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false),
57 cl::Hidden, cl::ZeroOrMore,
58 cl::desc("Run Partial inlinining pass"));
59
60 static cl::opt<bool>
61 UseGVNAfterVectorization("use-gvn-after-vectorization",
62 cl::init(false), cl::Hidden,
63 cl::desc("Run GVN instead of Early CSE after vectorization passes"));
64
65 cl::opt<bool> ExtraVectorizerPasses(
66 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
67 cl::desc("Run cleanup optimization passes after vectorization."));
68
69 static cl::opt<bool>
70 RunLoopRerolling("reroll-loops", cl::Hidden,
71 cl::desc("Run the loop rerolling pass"));
72
73 cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
74 cl::desc("Run the NewGVN pass"));
75
76 // Experimental option to use CFL-AA
77 static cl::opt<::CFLAAType>
78 UseCFLAA("use-cfl-aa", cl::init(::CFLAAType::None), cl::Hidden,
79 cl::desc("Enable the new, experimental CFL alias analysis"),
80 cl::values(clEnumValN(::CFLAAType::None, "none", "Disable CFL-AA"),
81 clEnumValN(::CFLAAType::Steensgaard, "steens",
82 "Enable unification-based CFL-AA"),
83 clEnumValN(::CFLAAType::Andersen, "anders",
84 "Enable inclusion-based CFL-AA"),
85 clEnumValN(::CFLAAType::Both, "both",
86 "Enable both variants of CFL-AA")));
87
88 cl::opt<bool> EnableLoopInterchange(
89 "enable-loopinterchange", cl::init(false), cl::Hidden,
90 cl::desc("Enable the experimental LoopInterchange Pass"));
91
92 cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false),
93 cl::Hidden,
94 cl::desc("Enable Unroll And Jam Pass"));
95
96 cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
97 cl::Hidden,
98 cl::desc("Enable the LoopFlatten Pass"));
99
100 static cl::opt<bool>
101 EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
102 cl::desc("Enable preparation for ThinLTO."));
103
104 static cl::opt<bool>
105 EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
106 cl::desc("Enable performing ThinLTO."));
107
108 cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
109 cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
110
111 cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
112 cl::desc("Enable ir outliner pass"));
113
114 static cl::opt<bool> UseLoopVersioningLICM(
115 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
116 cl::desc("Enable the experimental Loop Versioning LICM pass"));
117
118 cl::opt<bool>
119 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
120 cl::desc("Disable pre-instrumentation inliner"));
121
122 cl::opt<int> PreInlineThreshold(
123 "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
124 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
125 "(default = 75)"));
126
127 cl::opt<bool>
128 EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
129 cl::desc("Enable the GVN hoisting pass (default = off)"));
130
131 static cl::opt<bool>
132 DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
133 cl::Hidden,
134 cl::desc("Disable shrink-wrap library calls"));
135
136 static cl::opt<bool> EnableSimpleLoopUnswitch(
137 "enable-simple-loop-unswitch", cl::init(false), cl::Hidden,
138 cl::desc("Enable the simple loop unswitch pass. Also enables independent "
139 "cleanup passes integrated into the loop pass manager pipeline."));
140
141 cl::opt<bool>
142 EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
143 cl::desc("Enable the GVN sinking pass (default = off)"));
144
145 // This option is used in simplifying testing SampleFDO optimizations for
146 // profile loading.
147 cl::opt<bool>
148 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
149 cl::desc("Enable control height reduction optimization (CHR)"));
150
151 cl::opt<bool> FlattenedProfileUsed(
152 "flattened-profile-used", cl::init(false), cl::Hidden,
153 cl::desc("Indicate the sample profile being used is flattened, i.e., "
154 "no inline hierachy exists in the profile. "));
155
156 cl::opt<bool> EnableOrderFileInstrumentation(
157 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
158 cl::desc("Enable order file instrumentation (default = off)"));
159
160 cl::opt<bool> EnableMatrix(
161 "enable-matrix", cl::init(false), cl::Hidden,
162 cl::desc("Enable lowering of the matrix intrinsics"));
163
164 cl::opt<bool> EnableConstraintElimination(
165 "enable-constraint-elimination", cl::init(false), cl::Hidden,
166 cl::desc(
167 "Enable pass to eliminate conditions based on linear constraints."));
168
169 cl::opt<AttributorRunOption> AttributorRun(
170 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
171 cl::desc("Enable the attributor inter-procedural deduction pass."),
172 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
173 "enable all attributor runs"),
174 clEnumValN(AttributorRunOption::MODULE, "module",
175 "enable module-wide attributor runs"),
176 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
177 "enable call graph SCC attributor runs"),
178 clEnumValN(AttributorRunOption::NONE, "none",
179 "disable attributor runs")));
180
181 extern cl::opt<bool> EnableKnowledgeRetention;
182 } // namespace llvm
183
PassManagerBuilder()184 PassManagerBuilder::PassManagerBuilder() {
185 OptLevel = 2;
186 SizeLevel = 0;
187 LibraryInfo = nullptr;
188 Inliner = nullptr;
189 DisableUnrollLoops = false;
190 SLPVectorize = false;
191 LoopVectorize = true;
192 LoopsInterleaved = true;
193 RerollLoops = RunLoopRerolling;
194 NewGVN = RunNewGVN;
195 LicmMssaOptCap = SetLicmMssaOptCap;
196 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
197 DisableGVNLoadPRE = false;
198 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
199 VerifyInput = false;
200 VerifyOutput = false;
201 MergeFunctions = false;
202 PrepareForLTO = false;
203 EnablePGOInstrGen = false;
204 EnablePGOCSInstrGen = false;
205 EnablePGOCSInstrUse = false;
206 PGOInstrGen = "";
207 PGOInstrUse = "";
208 PGOSampleUse = "";
209 PrepareForThinLTO = EnablePrepareForThinLTO;
210 PerformThinLTO = EnablePerformThinLTO;
211 DivergentTarget = false;
212 CallGraphProfile = true;
213 }
214
~PassManagerBuilder()215 PassManagerBuilder::~PassManagerBuilder() {
216 delete LibraryInfo;
217 delete Inliner;
218 }
219
220 /// Set of global extensions, automatically added as part of the standard set.
221 static ManagedStatic<
222 SmallVector<std::tuple<PassManagerBuilder::ExtensionPointTy,
223 PassManagerBuilder::ExtensionFn,
224 PassManagerBuilder::GlobalExtensionID>,
225 8>>
226 GlobalExtensions;
227 static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter;
228
229 /// Check if GlobalExtensions is constructed and not empty.
230 /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
231 /// the construction of the object.
GlobalExtensionsNotEmpty()232 static bool GlobalExtensionsNotEmpty() {
233 return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
234 }
235
236 PassManagerBuilder::GlobalExtensionID
addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty,PassManagerBuilder::ExtensionFn Fn)237 PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty,
238 PassManagerBuilder::ExtensionFn Fn) {
239 auto ExtensionID = GlobalExtensionsCounter++;
240 GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID));
241 return ExtensionID;
242 }
243
removeGlobalExtension(PassManagerBuilder::GlobalExtensionID ExtensionID)244 void PassManagerBuilder::removeGlobalExtension(
245 PassManagerBuilder::GlobalExtensionID ExtensionID) {
246 // RegisterStandardPasses may try to call this function after GlobalExtensions
247 // has already been destroyed; doing so should not generate an error.
248 if (!GlobalExtensions.isConstructed())
249 return;
250
251 auto GlobalExtension =
252 llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) {
253 return std::get<2>(elem) == ExtensionID;
254 });
255 assert(GlobalExtension != GlobalExtensions->end() &&
256 "The extension ID to be removed should always be valid.");
257
258 GlobalExtensions->erase(GlobalExtension);
259 }
260
addExtension(ExtensionPointTy Ty,ExtensionFn Fn)261 void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
262 Extensions.push_back(std::make_pair(Ty, std::move(Fn)));
263 }
264
addExtensionsToPM(ExtensionPointTy ETy,legacy::PassManagerBase & PM) const265 void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
266 legacy::PassManagerBase &PM) const {
267 if (GlobalExtensionsNotEmpty()) {
268 for (auto &Ext : *GlobalExtensions) {
269 if (std::get<0>(Ext) == ETy)
270 std::get<1>(Ext)(*this, PM);
271 }
272 }
273 for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
274 if (Extensions[i].first == ETy)
275 Extensions[i].second(*this, PM);
276 }
277
addInitialAliasAnalysisPasses(legacy::PassManagerBase & PM) const278 void PassManagerBuilder::addInitialAliasAnalysisPasses(
279 legacy::PassManagerBase &PM) const {
280 switch (UseCFLAA) {
281 case ::CFLAAType::Steensgaard:
282 PM.add(createCFLSteensAAWrapperPass());
283 break;
284 case ::CFLAAType::Andersen:
285 PM.add(createCFLAndersAAWrapperPass());
286 break;
287 case ::CFLAAType::Both:
288 PM.add(createCFLSteensAAWrapperPass());
289 PM.add(createCFLAndersAAWrapperPass());
290 break;
291 default:
292 break;
293 }
294
295 // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
296 // BasicAliasAnalysis wins if they disagree. This is intended to help
297 // support "obvious" type-punning idioms.
298 PM.add(createTypeBasedAAWrapperPass());
299 PM.add(createScopedNoAliasAAWrapperPass());
300 }
301
populateFunctionPassManager(legacy::FunctionPassManager & FPM)302 void PassManagerBuilder::populateFunctionPassManager(
303 legacy::FunctionPassManager &FPM) {
304 addExtensionsToPM(EP_EarlyAsPossible, FPM);
305
306 // Add LibraryInfo if we have some.
307 if (LibraryInfo)
308 FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
309
310 // The backends do not handle matrix intrinsics currently.
311 // Make sure they are also lowered in O0.
312 // FIXME: A lightweight version of the pass should run in the backend
313 // pipeline on demand.
314 if (EnableMatrix && OptLevel == 0)
315 FPM.add(createLowerMatrixIntrinsicsMinimalPass());
316
317 if (OptLevel == 0) return;
318
319 addInitialAliasAnalysisPasses(FPM);
320
321 // Lower llvm.expect to metadata before attempting transforms.
322 // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
323 FPM.add(createLowerExpectIntrinsicPass());
324 FPM.add(createCFGSimplificationPass());
325 FPM.add(createSROAPass());
326 FPM.add(createEarlyCSEPass());
327 }
328
329 // Do PGO instrumentation generation or use pass as the option specified.
addPGOInstrPasses(legacy::PassManagerBase & MPM,bool IsCS=false)330 void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
331 bool IsCS = false) {
332 if (IsCS) {
333 if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse)
334 return;
335 } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
336 return;
337
338 // Perform the preinline and cleanup passes for O1 and above.
339 // We will not do this inline for context sensitive PGO (when IsCS is true).
340 if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) {
341 // Create preinline pass. We construct an InlineParams object and specify
342 // the threshold here to avoid the command line options of the regular
343 // inliner to influence pre-inlining. The only fields of InlineParams we
344 // care about are DefaultThreshold and HintThreshold.
345 InlineParams IP;
346 IP.DefaultThreshold = PreInlineThreshold;
347 // FIXME: The hint threshold has the same value used by the regular inliner
348 // when not optimzing for size. This should probably be lowered after
349 // performance testing.
350 // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes
351 // the instrumented binary unusably large. Even if PreInlineThreshold is not
352 // correct thresold for -Oz, it is better than not running preinliner.
353 IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325;
354
355 MPM.add(createFunctionInliningPass(IP));
356 MPM.add(createSROAPass());
357 MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
358 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
359 MPM.add(createInstructionCombiningPass()); // Combine silly seq's
360 addExtensionsToPM(EP_Peephole, MPM);
361 }
362 if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) {
363 MPM.add(createPGOInstrumentationGenLegacyPass(IsCS));
364 // Add the profile lowering pass.
365 InstrProfOptions Options;
366 if (!PGOInstrGen.empty())
367 Options.InstrProfileOutput = PGOInstrGen;
368 Options.DoCounterPromotion = true;
369 Options.UseBFIInPromotion = IsCS;
370 MPM.add(createLoopRotatePass());
371 MPM.add(createInstrProfilingLegacyPass(Options, IsCS));
372 }
373 if (!PGOInstrUse.empty())
374 MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS));
375 // Indirect call promotion that promotes intra-module targets only.
376 // For ThinLTO this is done earlier due to interactions with globalopt
377 // for imported functions. We don't run this at -O0.
378 if (OptLevel > 0 && !IsCS)
379 MPM.add(
380 createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
381 }
addFunctionSimplificationPasses(legacy::PassManagerBase & MPM)382 void PassManagerBuilder::addFunctionSimplificationPasses(
383 legacy::PassManagerBase &MPM) {
384 // Start of function pass.
385 // Break up aggregate allocas, using SSAUpdater.
386 assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
387 MPM.add(createSROAPass());
388 MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
389 if (EnableKnowledgeRetention)
390 MPM.add(createAssumeSimplifyPass());
391
392 if (OptLevel > 1) {
393 if (EnableGVNHoist)
394 MPM.add(createGVNHoistPass());
395 if (EnableGVNSink) {
396 MPM.add(createGVNSinkPass());
397 MPM.add(createCFGSimplificationPass());
398 }
399 }
400
401 if (EnableConstraintElimination)
402 MPM.add(createConstraintEliminationPass());
403
404 if (OptLevel > 1) {
405 // Speculative execution if the target has divergent branches; otherwise nop.
406 MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
407
408 MPM.add(createJumpThreadingPass()); // Thread jumps.
409 MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
410 }
411 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
412 // Combine silly seq's
413 if (OptLevel > 2)
414 MPM.add(createAggressiveInstCombinerPass());
415 MPM.add(createInstructionCombiningPass());
416 if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
417 MPM.add(createLibCallsShrinkWrapPass());
418 addExtensionsToPM(EP_Peephole, MPM);
419
420 // Optimize memory intrinsic calls based on the profiled size information.
421 if (SizeLevel == 0)
422 MPM.add(createPGOMemOPSizeOptLegacyPass());
423
424 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
425 if (OptLevel > 1)
426 MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
427 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
428 MPM.add(createReassociatePass()); // Reassociate expressions
429
430 // Begin the loop pass pipeline.
431 if (EnableSimpleLoopUnswitch) {
432 // The simple loop unswitch pass relies on separate cleanup passes. Schedule
433 // them first so when we re-process a loop they run before other loop
434 // passes.
435 MPM.add(createLoopInstSimplifyPass());
436 MPM.add(createLoopSimplifyCFGPass());
437 }
438 // Try to remove as much code from the loop header as possible,
439 // to reduce amount of IR that will have to be duplicated.
440 // TODO: Investigate promotion cap for O1.
441 MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
442 // Rotate Loop - disable header duplication at -Oz
443 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
444 // TODO: Investigate promotion cap for O1.
445 MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
446 if (EnableSimpleLoopUnswitch)
447 MPM.add(createSimpleLoopUnswitchLegacyPass());
448 else
449 MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
450 // FIXME: We break the loop pass pipeline here in order to do full
451 // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
452 // need for this.
453 MPM.add(createCFGSimplificationPass());
454 MPM.add(createInstructionCombiningPass());
455 // We resume loop passes creating a second loop pipeline here.
456 if (EnableLoopFlatten) {
457 MPM.add(createLoopFlattenPass()); // Flatten loops
458 MPM.add(createLoopSimplifyCFGPass());
459 }
460 MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
461 MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
462 addExtensionsToPM(EP_LateLoopOptimizations, MPM);
463 MPM.add(createLoopDeletionPass()); // Delete dead loops
464
465 if (EnableLoopInterchange)
466 MPM.add(createLoopInterchangePass()); // Interchange loops
467
468 // Unroll small loops and perform peeling.
469 MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
470 ForgetAllSCEVInLoopUnroll));
471 addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
472 // This ends the loop pass pipelines.
473
474 // Break up allocas that may now be splittable after loop unrolling.
475 MPM.add(createSROAPass());
476
477 if (OptLevel > 1) {
478 MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
479 MPM.add(NewGVN ? createNewGVNPass()
480 : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
481 }
482 MPM.add(createSCCPPass()); // Constant prop with SCCP
483
484 if (EnableConstraintElimination)
485 MPM.add(createConstraintEliminationPass());
486
487 // Delete dead bit computations (instcombine runs after to fold away the dead
488 // computations, and then ADCE will run later to exploit any new DCE
489 // opportunities that creates).
490 MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
491
492 // Run instcombine after redundancy elimination to exploit opportunities
493 // opened up by them.
494 MPM.add(createInstructionCombiningPass());
495 addExtensionsToPM(EP_Peephole, MPM);
496 if (OptLevel > 1) {
497 MPM.add(createJumpThreadingPass()); // Thread jumps
498 MPM.add(createCorrelatedValuePropagationPass());
499 }
500 MPM.add(createAggressiveDCEPass()); // Delete dead instructions
501
502 MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
503 // TODO: Investigate if this is too expensive at O1.
504 if (OptLevel > 1) {
505 MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
506 MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
507 }
508
509 addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
510
511 if (RerollLoops)
512 MPM.add(createLoopRerollPass());
513
514 // Merge & remove BBs and sink & hoist common instructions.
515 MPM.add(createCFGSimplificationPass(
516 SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
517 // Clean up after everything.
518 MPM.add(createInstructionCombiningPass());
519 addExtensionsToPM(EP_Peephole, MPM);
520
521 if (EnableCHR && OptLevel >= 3 &&
522 (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen))
523 MPM.add(createControlHeightReductionLegacyPass());
524 }
525
526 /// FIXME: Should LTO cause any differences to this set of passes?
addVectorPasses(legacy::PassManagerBase & PM,bool IsLTO)527 void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
528 bool IsLTO) {
529 PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
530
531 if (IsLTO) {
532 // The vectorizer may have significantly shortened a loop body; unroll
533 // again. Unroll small loops to hide loop backedge latency and saturate any
534 // parallel execution resources of an out-of-order processor. We also then
535 // need to clean up redundancies and loop invariant code.
536 // FIXME: It would be really good to use a loop-integrated instruction
537 // combiner for cleanup here so that the unrolling and LICM can be pipelined
538 // across the loop nests.
539 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
540 if (EnableUnrollAndJam && !DisableUnrollLoops)
541 PM.add(createLoopUnrollAndJamPass(OptLevel));
542 PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
543 ForgetAllSCEVInLoopUnroll));
544 PM.add(createWarnMissedTransformationsPass());
545 }
546
547 if (!IsLTO) {
548 // Eliminate loads by forwarding stores from the previous iteration to loads
549 // of the current iteration.
550 PM.add(createLoopLoadEliminationPass());
551 }
552 // Cleanup after the loop optimization passes.
553 PM.add(createInstructionCombiningPass());
554
555 if (OptLevel > 1 && ExtraVectorizerPasses) {
556 // At higher optimization levels, try to clean up any runtime overlap and
557 // alignment checks inserted by the vectorizer. We want to track correlated
558 // runtime checks for two inner loops in the same outer loop, fold any
559 // common computations, hoist loop-invariant aspects out of any outer loop,
560 // and unswitch the runtime checks if possible. Once hoisted, we may have
561 // dead (or speculatable) control flows or more combining opportunities.
562 PM.add(createEarlyCSEPass());
563 PM.add(createCorrelatedValuePropagationPass());
564 PM.add(createInstructionCombiningPass());
565 PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
566 PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
567 PM.add(createCFGSimplificationPass());
568 PM.add(createInstructionCombiningPass());
569 }
570
571 if (IsLTO) {
572 PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
573 .hoistCommonInsts(true)));
574 } else {
575 // Now that we've formed fast to execute loop structures, we do further
576 // optimizations. These are run afterward as they might block doing complex
577 // analyses and transforms such as what are needed for loop vectorization.
578
579 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
580 // GVN, loop transforms, and others have already run, so it's now better to
581 // convert to more optimized IR using more aggressive simplify CFG options.
582 // The extra sinking transform can create larger basic blocks, so do this
583 // before SLP vectorization.
584 PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
585 .forwardSwitchCondToPhi(true)
586 .convertSwitchToLookupTable(true)
587 .needCanonicalLoops(false)
588 .hoistCommonInsts(true)
589 .sinkCommonInsts(true)));
590 }
591 if (IsLTO) {
592 PM.add(createSCCPPass()); // Propagate exposed constants
593 PM.add(createInstructionCombiningPass()); // Clean up again
594 PM.add(createBitTrackingDCEPass());
595 }
596
597 // Optimize parallel scalar instruction chains into SIMD instructions.
598 if (SLPVectorize) {
599 PM.add(createSLPVectorizerPass());
600 if (OptLevel > 1 && ExtraVectorizerPasses)
601 PM.add(createEarlyCSEPass());
602 }
603
604 // Enhance/cleanup vector code.
605 PM.add(createVectorCombinePass());
606
607 if (!IsLTO) {
608 addExtensionsToPM(EP_Peephole, PM);
609 PM.add(createInstructionCombiningPass());
610
611 if (EnableUnrollAndJam && !DisableUnrollLoops) {
612 // Unroll and Jam. We do this before unroll but need to be in a separate
613 // loop pass manager in order for the outer loop to be processed by
614 // unroll and jam before the inner loop is unrolled.
615 PM.add(createLoopUnrollAndJamPass(OptLevel));
616 }
617
618 // Unroll small loops
619 PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
620 ForgetAllSCEVInLoopUnroll));
621
622 if (!DisableUnrollLoops) {
623 // LoopUnroll may generate some redundency to cleanup.
624 PM.add(createInstructionCombiningPass());
625
626 // Runtime unrolling will introduce runtime check in loop prologue. If the
627 // unrolled loop is a inner loop, then the prologue will be inside the
628 // outer loop. LICM pass can help to promote the runtime check out if the
629 // checked value is loop invariant.
630 PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
631 }
632
633 PM.add(createWarnMissedTransformationsPass());
634 }
635
636 // After vectorization and unrolling, assume intrinsics may tell us more
637 // about pointer alignments.
638 PM.add(createAlignmentFromAssumptionsPass());
639
640 if (IsLTO)
641 PM.add(createInstructionCombiningPass());
642 }
643
populateModulePassManager(legacy::PassManagerBase & MPM)644 void PassManagerBuilder::populateModulePassManager(
645 legacy::PassManagerBase &MPM) {
646 // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
647 // is handled separately, so just check this is not the ThinLTO post-link.
648 bool DefaultOrPreLinkPipeline = !PerformThinLTO;
649
650 MPM.add(createAnnotation2MetadataLegacyPass());
651
652 if (!PGOSampleUse.empty()) {
653 MPM.add(createPruneEHPass());
654 // In ThinLTO mode, when flattened profile is used, all the available
655 // profile information will be annotated in PreLink phase so there is
656 // no need to load the profile again in PostLink.
657 if (!(FlattenedProfileUsed && PerformThinLTO))
658 MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
659 }
660
661 // Allow forcing function attributes as a debugging and tuning aid.
662 MPM.add(createForceFunctionAttrsLegacyPass());
663
664 // If all optimizations are disabled, just run the always-inline pass and,
665 // if enabled, the function merging pass.
666 if (OptLevel == 0) {
667 addPGOInstrPasses(MPM);
668 if (Inliner) {
669 MPM.add(Inliner);
670 Inliner = nullptr;
671 }
672
673 // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
674 // creates a CGSCC pass manager, but we don't want to add extensions into
675 // that pass manager. To prevent this we insert a no-op module pass to reset
676 // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
677 // builds. The function merging pass is
678 if (MergeFunctions)
679 MPM.add(createMergeFunctionsPass());
680 else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
681 MPM.add(createBarrierNoopPass());
682
683 if (PerformThinLTO) {
684 MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
685 // Drop available_externally and unreferenced globals. This is necessary
686 // with ThinLTO in order to avoid leaving undefined references to dead
687 // globals in the object file.
688 MPM.add(createEliminateAvailableExternallyPass());
689 MPM.add(createGlobalDCEPass());
690 }
691
692 addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
693
694 if (PrepareForLTO || PrepareForThinLTO) {
695 MPM.add(createCanonicalizeAliasesPass());
696 // Rename anon globals to be able to export them in the summary.
697 // This has to be done after we add the extensions to the pass manager
698 // as there could be passes (e.g. Adddress sanitizer) which introduce
699 // new unnamed globals.
700 MPM.add(createNameAnonGlobalPass());
701 }
702
703 MPM.add(createAnnotationRemarksLegacyPass());
704 return;
705 }
706
707 // Add LibraryInfo if we have some.
708 if (LibraryInfo)
709 MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
710
711 addInitialAliasAnalysisPasses(MPM);
712
713 // For ThinLTO there are two passes of indirect call promotion. The
714 // first is during the compile phase when PerformThinLTO=false and
715 // intra-module indirect call targets are promoted. The second is during
716 // the ThinLTO backend when PerformThinLTO=true, when we promote imported
717 // inter-module indirect calls. For that we perform indirect call promotion
718 // earlier in the pass pipeline, here before globalopt. Otherwise imported
719 // available_externally functions look unreferenced and are removed.
720 if (PerformThinLTO) {
721 MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
722 !PGOSampleUse.empty()));
723 MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
724 }
725
726 // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
727 // as it will change the CFG too much to make the 2nd profile annotation
728 // in backend more difficult.
729 bool PrepareForThinLTOUsingPGOSampleProfile =
730 PrepareForThinLTO && !PGOSampleUse.empty();
731 if (PrepareForThinLTOUsingPGOSampleProfile)
732 DisableUnrollLoops = true;
733
734 // Infer attributes about declarations if possible.
735 MPM.add(createInferFunctionAttrsLegacyPass());
736
737 // Infer attributes on declarations, call sites, arguments, etc.
738 if (AttributorRun & AttributorRunOption::MODULE)
739 MPM.add(createAttributorLegacyPass());
740
741 addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
742
743 if (OptLevel > 2)
744 MPM.add(createCallSiteSplittingPass());
745
746 MPM.add(createIPSCCPPass()); // IP SCCP
747 MPM.add(createCalledValuePropagationPass());
748
749 MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
750 // Promote any localized global vars.
751 MPM.add(createPromoteMemoryToRegisterPass());
752
753 MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
754
755 MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
756 addExtensionsToPM(EP_Peephole, MPM);
757 MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
758
759 // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
760 // call promotion as it will change the CFG too much to make the 2nd
761 // profile annotation in backend more difficult.
762 // PGO instrumentation is added during the compile phase for ThinLTO, do
763 // not run it a second time
764 if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
765 addPGOInstrPasses(MPM);
766
767 // Create profile COMDAT variables. Lld linker wants to see all variables
768 // before the LTO/ThinLTO link since it needs to resolve symbols/comdats.
769 if (!PerformThinLTO && EnablePGOCSInstrGen)
770 MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen));
771
772 // We add a module alias analysis pass here. In part due to bugs in the
773 // analysis infrastructure this "works" in that the analysis stays alive
774 // for the entire SCC pass run below.
775 MPM.add(createGlobalsAAWrapperPass());
776
777 // Start of CallGraph SCC passes.
778 MPM.add(createPruneEHPass()); // Remove dead EH info
779 bool RunInliner = false;
780 if (Inliner) {
781 MPM.add(Inliner);
782 Inliner = nullptr;
783 RunInliner = true;
784 }
785
786 // Infer attributes on declarations, call sites, arguments, etc. for an SCC.
787 if (AttributorRun & AttributorRunOption::CGSCC)
788 MPM.add(createAttributorCGSCCLegacyPass());
789
790 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
791 // there are no OpenMP runtime calls present in the module.
792 if (OptLevel > 1)
793 MPM.add(createOpenMPOptCGSCCLegacyPass());
794
795 MPM.add(createPostOrderFunctionAttrsLegacyPass());
796 if (OptLevel > 2)
797 MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
798
799 addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
800 addFunctionSimplificationPasses(MPM);
801
802 // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
803 // pass manager that we are specifically trying to avoid. To prevent this
804 // we must insert a no-op module pass to reset the pass manager.
805 MPM.add(createBarrierNoopPass());
806
807 if (RunPartialInlining)
808 MPM.add(createPartialInliningPass());
809
810 if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO)
811 // Remove avail extern fns and globals definitions if we aren't
812 // compiling an object file for later LTO. For LTO we want to preserve
813 // these so they are eligible for inlining at link-time. Note if they
814 // are unreferenced they will be removed by GlobalDCE later, so
815 // this only impacts referenced available externally globals.
816 // Eventually they will be suppressed during codegen, but eliminating
817 // here enables more opportunity for GlobalDCE as it may make
818 // globals referenced by available external functions dead
819 // and saves running remaining passes on the eliminated functions.
820 MPM.add(createEliminateAvailableExternallyPass());
821
822 // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass
823 // for LTO and ThinLTO -- The actual pass will be called after all inlines
824 // are performed.
825 // Need to do this after COMDAT variables have been eliminated,
826 // (i.e. after EliminateAvailableExternallyPass).
827 if (!(PrepareForLTO || PrepareForThinLTO))
828 addPGOInstrPasses(MPM, /* IsCS */ true);
829
830 if (EnableOrderFileInstrumentation)
831 MPM.add(createInstrOrderFilePass());
832
833 MPM.add(createReversePostOrderFunctionAttrsPass());
834
835 // The inliner performs some kind of dead code elimination as it goes,
836 // but there are cases that are not really caught by it. We might
837 // at some point consider teaching the inliner about them, but it
838 // is OK for now to run GlobalOpt + GlobalDCE in tandem as their
839 // benefits generally outweight the cost, making the whole pipeline
840 // faster.
841 if (RunInliner) {
842 MPM.add(createGlobalOptimizerPass());
843 MPM.add(createGlobalDCEPass());
844 }
845
846 // If we are planning to perform ThinLTO later, let's not bloat the code with
847 // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
848 // during ThinLTO and perform the rest of the optimizations afterward.
849 if (PrepareForThinLTO) {
850 // Ensure we perform any last passes, but do so before renaming anonymous
851 // globals in case the passes add any.
852 addExtensionsToPM(EP_OptimizerLast, MPM);
853 MPM.add(createCanonicalizeAliasesPass());
854 // Rename anon globals to be able to export them in the summary.
855 MPM.add(createNameAnonGlobalPass());
856 return;
857 }
858
859 if (PerformThinLTO)
860 // Optimize globals now when performing ThinLTO, this enables more
861 // optimizations later.
862 MPM.add(createGlobalOptimizerPass());
863
864 // Scheduling LoopVersioningLICM when inlining is over, because after that
865 // we may see more accurate aliasing. Reason to run this late is that too
866 // early versioning may prevent further inlining due to increase of code
867 // size. By placing it just after inlining other optimizations which runs
868 // later might get benefit of no-alias assumption in clone loop.
869 if (UseLoopVersioningLICM) {
870 MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
871 MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
872 }
873
874 // We add a fresh GlobalsModRef run at this point. This is particularly
875 // useful as the above will have inlined, DCE'ed, and function-attr
876 // propagated everything. We should at this point have a reasonably minimal
877 // and richly annotated call graph. By computing aliasing and mod/ref
878 // information for all local globals here, the late loop passes and notably
879 // the vectorizer will be able to use them to help recognize vectorizable
880 // memory operations.
881 //
882 // Note that this relies on a bug in the pass manager which preserves
883 // a module analysis into a function pass pipeline (and throughout it) so
884 // long as the first function pass doesn't invalidate the module analysis.
885 // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
886 // this to work. Fortunately, it is trivial to preserve AliasAnalysis
887 // (doing nothing preserves it as it is required to be conservatively
888 // correct in the face of IR changes).
889 MPM.add(createGlobalsAAWrapperPass());
890
891 MPM.add(createFloat2IntPass());
892 MPM.add(createLowerConstantIntrinsicsPass());
893
894 if (EnableMatrix) {
895 MPM.add(createLowerMatrixIntrinsicsPass());
896 // CSE the pointer arithmetic of the column vectors. This allows alias
897 // analysis to establish no-aliasing between loads and stores of different
898 // columns of the same matrix.
899 MPM.add(createEarlyCSEPass(false));
900 }
901
902 addExtensionsToPM(EP_VectorizerStart, MPM);
903
904 // Re-rotate loops in all our loop nests. These may have fallout out of
905 // rotated form due to GVN or other transformations, and the vectorizer relies
906 // on the rotated form. Disable header duplication at -Oz.
907 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
908
909 // Distribute loops to allow partial vectorization. I.e. isolate dependences
910 // into separate loop that would otherwise inhibit vectorization. This is
911 // currently only performed for loops marked with the metadata
912 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
913 MPM.add(createLoopDistributePass());
914
915 addVectorPasses(MPM, /* IsLTO */ false);
916
917 // FIXME: We shouldn't bother with this anymore.
918 MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
919
920 // GlobalOpt already deletes dead functions and globals, at -O2 try a
921 // late pass of GlobalDCE. It is capable of deleting dead cycles.
922 if (OptLevel > 1) {
923 MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
924 MPM.add(createConstantMergePass()); // Merge dup global constants
925 }
926
927 // See comment in the new PM for justification of scheduling splitting at
928 // this stage (\ref buildModuleSimplificationPipeline).
929 if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
930 MPM.add(createHotColdSplittingPass());
931
932 if (EnableIROutliner)
933 MPM.add(createIROutlinerPass());
934
935 if (MergeFunctions)
936 MPM.add(createMergeFunctionsPass());
937
938 // Add Module flag "CG Profile" based on Branch Frequency Information.
939 if (CallGraphProfile)
940 MPM.add(createCGProfileLegacyPass());
941
942 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
943 // canonicalization pass that enables other optimizations. As a result,
944 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
945 // result too early.
946 MPM.add(createLoopSinkPass());
947 // Get rid of LCSSA nodes.
948 MPM.add(createInstSimplifyLegacyPass());
949
950 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
951 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
952 // flattening of blocks.
953 MPM.add(createDivRemPairsPass());
954
955 // LoopSink (and other loop passes since the last simplifyCFG) might have
956 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
957 MPM.add(createCFGSimplificationPass());
958
959 addExtensionsToPM(EP_OptimizerLast, MPM);
960
961 if (PrepareForLTO) {
962 MPM.add(createCanonicalizeAliasesPass());
963 // Rename anon globals to be able to handle them in the summary
964 MPM.add(createNameAnonGlobalPass());
965 }
966
967 MPM.add(createAnnotationRemarksLegacyPass());
968 }
969
addLTOOptimizationPasses(legacy::PassManagerBase & PM)970 void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
971 // Load sample profile before running the LTO optimization pipeline.
972 if (!PGOSampleUse.empty()) {
973 PM.add(createPruneEHPass());
974 PM.add(createSampleProfileLoaderPass(PGOSampleUse));
975 }
976
977 // Remove unused virtual tables to improve the quality of code generated by
978 // whole-program devirtualization and bitset lowering.
979 PM.add(createGlobalDCEPass());
980
981 // Provide AliasAnalysis services for optimizations.
982 addInitialAliasAnalysisPasses(PM);
983
984 // Allow forcing function attributes as a debugging and tuning aid.
985 PM.add(createForceFunctionAttrsLegacyPass());
986
987 // Infer attributes about declarations if possible.
988 PM.add(createInferFunctionAttrsLegacyPass());
989
990 if (OptLevel > 1) {
991 // Split call-site with more constrained arguments.
992 PM.add(createCallSiteSplittingPass());
993
994 // Indirect call promotion. This should promote all the targets that are
995 // left by the earlier promotion pass that promotes intra-module targets.
996 // This two-step promotion is to save the compile time. For LTO, it should
997 // produce the same result as if we only do promotion here.
998 PM.add(
999 createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
1000
1001 // Propagate constants at call sites into the functions they call. This
1002 // opens opportunities for globalopt (and inlining) by substituting function
1003 // pointers passed as arguments to direct uses of functions.
1004 PM.add(createIPSCCPPass());
1005
1006 // Attach metadata to indirect call sites indicating the set of functions
1007 // they may target at run-time. This should follow IPSCCP.
1008 PM.add(createCalledValuePropagationPass());
1009
1010 // Infer attributes on declarations, call sites, arguments, etc.
1011 if (AttributorRun & AttributorRunOption::MODULE)
1012 PM.add(createAttributorLegacyPass());
1013 }
1014
1015 // Infer attributes about definitions. The readnone attribute in particular is
1016 // required for virtual constant propagation.
1017 PM.add(createPostOrderFunctionAttrsLegacyPass());
1018 PM.add(createReversePostOrderFunctionAttrsPass());
1019
1020 // Split globals using inrange annotations on GEP indices. This can help
1021 // improve the quality of generated code when virtual constant propagation or
1022 // control flow integrity are enabled.
1023 PM.add(createGlobalSplitPass());
1024
1025 // Apply whole-program devirtualization and virtual constant propagation.
1026 PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
1027
1028 // That's all we need at opt level 1.
1029 if (OptLevel == 1)
1030 return;
1031
1032 // Now that we internalized some globals, see if we can hack on them!
1033 PM.add(createGlobalOptimizerPass());
1034 // Promote any localized global vars.
1035 PM.add(createPromoteMemoryToRegisterPass());
1036
1037 // Linking modules together can lead to duplicated global constants, only
1038 // keep one copy of each constant.
1039 PM.add(createConstantMergePass());
1040
1041 // Remove unused arguments from functions.
1042 PM.add(createDeadArgEliminationPass());
1043
1044 // Reduce the code after globalopt and ipsccp. Both can open up significant
1045 // simplification opportunities, and both can propagate functions through
1046 // function pointers. When this happens, we often have to resolve varargs
1047 // calls, etc, so let instcombine do this.
1048 if (OptLevel > 2)
1049 PM.add(createAggressiveInstCombinerPass());
1050 PM.add(createInstructionCombiningPass());
1051 addExtensionsToPM(EP_Peephole, PM);
1052
1053 // Inline small functions
1054 bool RunInliner = Inliner;
1055 if (RunInliner) {
1056 PM.add(Inliner);
1057 Inliner = nullptr;
1058 }
1059
1060 PM.add(createPruneEHPass()); // Remove dead EH info.
1061
1062 // CSFDO instrumentation and use pass.
1063 addPGOInstrPasses(PM, /* IsCS */ true);
1064
1065 // Infer attributes on declarations, call sites, arguments, etc. for an SCC.
1066 if (AttributorRun & AttributorRunOption::CGSCC)
1067 PM.add(createAttributorCGSCCLegacyPass());
1068
1069 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1070 // there are no OpenMP runtime calls present in the module.
1071 if (OptLevel > 1)
1072 PM.add(createOpenMPOptCGSCCLegacyPass());
1073
1074 // Optimize globals again if we ran the inliner.
1075 if (RunInliner)
1076 PM.add(createGlobalOptimizerPass());
1077 PM.add(createGlobalDCEPass()); // Remove dead functions.
1078
1079 // If we didn't decide to inline a function, check to see if we can
1080 // transform it to pass arguments by value instead of by reference.
1081 PM.add(createArgumentPromotionPass());
1082
1083 // The IPO passes may leave cruft around. Clean up after them.
1084 PM.add(createInstructionCombiningPass());
1085 addExtensionsToPM(EP_Peephole, PM);
1086 PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
1087
1088 // Break up allocas
1089 PM.add(createSROAPass());
1090
1091 // LTO provides additional opportunities for tailcall elimination due to
1092 // link-time inlining, and visibility of nocapture attribute.
1093 if (OptLevel > 1)
1094 PM.add(createTailCallEliminationPass());
1095
1096 // Infer attributes on declarations, call sites, arguments, etc.
1097 PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
1098 // Run a few AA driven optimizations here and now, to cleanup the code.
1099 PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
1100
1101 PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
1102 PM.add(NewGVN ? createNewGVNPass()
1103 : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
1104 PM.add(createMemCpyOptPass()); // Remove dead memcpys.
1105
1106 // Nuke dead stores.
1107 PM.add(createDeadStoreEliminationPass());
1108 PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
1109
1110 // More loops are countable; try to optimize them.
1111 if (EnableLoopFlatten)
1112 PM.add(createLoopFlattenPass());
1113 PM.add(createIndVarSimplifyPass());
1114 PM.add(createLoopDeletionPass());
1115 if (EnableLoopInterchange)
1116 PM.add(createLoopInterchangePass());
1117
1118 if (EnableConstraintElimination)
1119 PM.add(createConstraintEliminationPass());
1120
1121 // Unroll small loops and perform peeling.
1122 PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
1123 ForgetAllSCEVInLoopUnroll));
1124 PM.add(createLoopDistributePass());
1125
1126 addVectorPasses(PM, /* IsLTO */ true);
1127
1128 addExtensionsToPM(EP_Peephole, PM);
1129
1130 PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
1131 }
1132
addLateLTOOptimizationPasses(legacy::PassManagerBase & PM)1133 void PassManagerBuilder::addLateLTOOptimizationPasses(
1134 legacy::PassManagerBase &PM) {
1135 // See comment in the new PM for justification of scheduling splitting at
1136 // this stage (\ref buildLTODefaultPipeline).
1137 if (EnableHotColdSplit)
1138 PM.add(createHotColdSplittingPass());
1139
1140 // Delete basic blocks, which optimization passes may have killed.
1141 PM.add(
1142 createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true)));
1143
1144 // Drop bodies of available externally objects to improve GlobalDCE.
1145 PM.add(createEliminateAvailableExternallyPass());
1146
1147 // Now that we have optimized the program, discard unreachable functions.
1148 PM.add(createGlobalDCEPass());
1149
1150 // FIXME: this is profitable (for compiler time) to do at -O0 too, but
1151 // currently it damages debug info.
1152 if (MergeFunctions)
1153 PM.add(createMergeFunctionsPass());
1154 }
1155
populateThinLTOPassManager(legacy::PassManagerBase & PM)1156 void PassManagerBuilder::populateThinLTOPassManager(
1157 legacy::PassManagerBase &PM) {
1158 PerformThinLTO = true;
1159 if (LibraryInfo)
1160 PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
1161
1162 if (VerifyInput)
1163 PM.add(createVerifierPass());
1164
1165 if (ImportSummary) {
1166 // This pass imports type identifier resolutions for whole-program
1167 // devirtualization and CFI. It must run early because other passes may
1168 // disturb the specific instruction patterns that these passes look for,
1169 // creating dependencies on resolutions that may not appear in the summary.
1170 //
1171 // For example, GVN may transform the pattern assume(type.test) appearing in
1172 // two basic blocks into assume(phi(type.test, type.test)), which would
1173 // transform a dependency on a WPD resolution into a dependency on a type
1174 // identifier resolution for CFI.
1175 //
1176 // Also, WPD has access to more precise information than ICP and can
1177 // devirtualize more effectively, so it should operate on the IR first.
1178 PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary));
1179 PM.add(createLowerTypeTestsPass(nullptr, ImportSummary));
1180 }
1181
1182 populateModulePassManager(PM);
1183
1184 if (VerifyOutput)
1185 PM.add(createVerifierPass());
1186 PerformThinLTO = false;
1187 }
1188
populateLTOPassManager(legacy::PassManagerBase & PM)1189 void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
1190 if (LibraryInfo)
1191 PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
1192
1193 if (VerifyInput)
1194 PM.add(createVerifierPass());
1195
1196 addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM);
1197
1198 if (OptLevel != 0)
1199 addLTOOptimizationPasses(PM);
1200 else {
1201 // The whole-program-devirt pass needs to run at -O0 because only it knows
1202 // about the llvm.type.checked.load intrinsic: it needs to both lower the
1203 // intrinsic itself and handle it in the summary.
1204 PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
1205 }
1206
1207 // Create a function that performs CFI checks for cross-DSO calls with targets
1208 // in the current module.
1209 PM.add(createCrossDSOCFIPass());
1210
1211 // Lower type metadata and the type.test intrinsic. This pass supports Clang's
1212 // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
1213 // link time if CFI is enabled. The pass does nothing if CFI is disabled.
1214 PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
1215 // Run a second time to clean up any type tests left behind by WPD for use
1216 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1217 PM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
1218
1219 if (OptLevel != 0)
1220 addLateLTOOptimizationPasses(PM);
1221
1222 addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
1223
1224 PM.add(createAnnotationRemarksLegacyPass());
1225
1226 if (VerifyOutput)
1227 PM.add(createVerifierPass());
1228 }
1229
LLVMPassManagerBuilderCreate()1230 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
1231 PassManagerBuilder *PMB = new PassManagerBuilder();
1232 return wrap(PMB);
1233 }
1234
LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB)1235 void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
1236 PassManagerBuilder *Builder = unwrap(PMB);
1237 delete Builder;
1238 }
1239
1240 void
LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,unsigned OptLevel)1241 LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
1242 unsigned OptLevel) {
1243 PassManagerBuilder *Builder = unwrap(PMB);
1244 Builder->OptLevel = OptLevel;
1245 }
1246
1247 void
LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,unsigned SizeLevel)1248 LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
1249 unsigned SizeLevel) {
1250 PassManagerBuilder *Builder = unwrap(PMB);
1251 Builder->SizeLevel = SizeLevel;
1252 }
1253
1254 void
LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,LLVMBool Value)1255 LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
1256 LLVMBool Value) {
1257 // NOTE: The DisableUnitAtATime switch has been removed.
1258 }
1259
1260 void
LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,LLVMBool Value)1261 LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
1262 LLVMBool Value) {
1263 PassManagerBuilder *Builder = unwrap(PMB);
1264 Builder->DisableUnrollLoops = Value;
1265 }
1266
1267 void
LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,LLVMBool Value)1268 LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
1269 LLVMBool Value) {
1270 // NOTE: The simplify-libcalls pass has been removed.
1271 }
1272
1273 void
LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,unsigned Threshold)1274 LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
1275 unsigned Threshold) {
1276 PassManagerBuilder *Builder = unwrap(PMB);
1277 Builder->Inliner = createFunctionInliningPass(Threshold);
1278 }
1279
1280 void
LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,LLVMPassManagerRef PM)1281 LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
1282 LLVMPassManagerRef PM) {
1283 PassManagerBuilder *Builder = unwrap(PMB);
1284 legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
1285 Builder->populateFunctionPassManager(*FPM);
1286 }
1287
1288 void
LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,LLVMPassManagerRef PM)1289 LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
1290 LLVMPassManagerRef PM) {
1291 PassManagerBuilder *Builder = unwrap(PMB);
1292 legacy::PassManagerBase *MPM = unwrap(PM);
1293 Builder->populateModulePassManager(*MPM);
1294 }
1295
LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,LLVMPassManagerRef PM,LLVMBool Internalize,LLVMBool RunInliner)1296 void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
1297 LLVMPassManagerRef PM,
1298 LLVMBool Internalize,
1299 LLVMBool RunInliner) {
1300 PassManagerBuilder *Builder = unwrap(PMB);
1301 legacy::PassManagerBase *LPM = unwrap(PM);
1302
1303 // A small backwards compatibility hack. populateLTOPassManager used to take
1304 // an RunInliner option.
1305 if (RunInliner && !Builder->Inliner)
1306 Builder->Inliner = createFunctionInliningPass();
1307
1308 Builder->populateLTOPassManager(*LPM);
1309 }
1310