1*349cc55cSDimitry Andric //===- Construction of pass pipelines -------------------------------------===// 2*349cc55cSDimitry Andric // 3*349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*349cc55cSDimitry Andric // 7*349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8*349cc55cSDimitry Andric /// \file 9*349cc55cSDimitry Andric /// 10*349cc55cSDimitry Andric /// This file provides the implementation of the PassBuilder based on our 11*349cc55cSDimitry Andric /// static pass registry as well as related functionality. It also provides 12*349cc55cSDimitry Andric /// helpers to aid in analyzing, debugging, and testing passes and pass 13*349cc55cSDimitry Andric /// pipelines. 14*349cc55cSDimitry Andric /// 15*349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 16*349cc55cSDimitry Andric 17*349cc55cSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 18*349cc55cSDimitry Andric #include "llvm/Analysis/BasicAliasAnalysis.h" 19*349cc55cSDimitry Andric #include "llvm/Analysis/CGSCCPassManager.h" 20*349cc55cSDimitry Andric #include "llvm/Analysis/GlobalsModRef.h" 21*349cc55cSDimitry Andric #include "llvm/Analysis/InlineAdvisor.h" 22*349cc55cSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 23*349cc55cSDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 24*349cc55cSDimitry Andric #include "llvm/Analysis/ScopedNoAliasAA.h" 25*349cc55cSDimitry Andric #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 26*349cc55cSDimitry Andric #include "llvm/IR/PassManager.h" 27*349cc55cSDimitry Andric #include "llvm/Passes/OptimizationLevel.h" 28*349cc55cSDimitry Andric #include "llvm/Passes/PassBuilder.h" 29*349cc55cSDimitry Andric #include "llvm/Support/CommandLine.h" 30*349cc55cSDimitry Andric #include "llvm/Support/ErrorHandling.h" 31*349cc55cSDimitry Andric #include "llvm/Support/PGOOptions.h" 32*349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h" 33*349cc55cSDimitry Andric #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" 34*349cc55cSDimitry Andric #include "llvm/Transforms/Coroutines/CoroCleanup.h" 35*349cc55cSDimitry Andric #include "llvm/Transforms/Coroutines/CoroEarly.h" 36*349cc55cSDimitry Andric #include "llvm/Transforms/Coroutines/CoroElide.h" 37*349cc55cSDimitry Andric #include "llvm/Transforms/Coroutines/CoroSplit.h" 38*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/AlwaysInliner.h" 39*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/Annotation2Metadata.h" 40*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/ArgumentPromotion.h" 41*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 42*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/CalledValuePropagation.h" 43*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/ConstantMerge.h" 44*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/CrossDSOCFI.h" 45*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/DeadArgumentElimination.h" 46*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/ElimAvailExtern.h" 47*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" 48*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/FunctionAttrs.h" 49*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/GlobalDCE.h" 50*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/GlobalOpt.h" 51*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/GlobalSplit.h" 52*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/HotColdSplitting.h" 53*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/IROutliner.h" 54*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/InferFunctionAttrs.h" 55*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/Inliner.h" 56*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/LowerTypeTests.h" 57*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/MergeFunctions.h" 58*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/ModuleInliner.h" 59*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/OpenMPOpt.h" 60*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/PartialInlining.h" 61*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/SCCP.h" 62*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/SampleProfile.h" 63*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/SampleProfileProbe.h" 64*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" 65*349cc55cSDimitry Andric #include "llvm/Transforms/IPO/WholeProgramDevirt.h" 66*349cc55cSDimitry Andric #include "llvm/Transforms/InstCombine/InstCombine.h" 67*349cc55cSDimitry Andric #include "llvm/Transforms/Instrumentation/CGProfile.h" 68*349cc55cSDimitry Andric #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 69*349cc55cSDimitry Andric #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" 70*349cc55cSDimitry Andric #include "llvm/Transforms/Instrumentation/InstrProfiling.h" 71*349cc55cSDimitry Andric #include "llvm/Transforms/Instrumentation/MemProfiler.h" 72*349cc55cSDimitry Andric #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 73*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/ADCE.h" 74*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" 75*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/AnnotationRemarks.h" 76*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/BDCE.h" 77*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/CallSiteSplitting.h" 78*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/ConstraintElimination.h" 79*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" 80*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 81*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 82*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/DivRemPairs.h" 83*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/EarlyCSE.h" 84*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/Float2Int.h" 85*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/GVN.h" 86*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/IndVarSimplify.h" 87*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/InstSimplifyPass.h" 88*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/JumpThreading.h" 89*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LICM.h" 90*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopDeletion.h" 91*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopDistribute.h" 92*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopFlatten.h" 93*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" 94*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopInstSimplify.h" 95*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopInterchange.h" 96*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopLoadElimination.h" 97*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h" 98*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopRotation.h" 99*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" 100*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopSink.h" 101*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" 102*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LoopUnrollPass.h" 103*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" 104*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" 105*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" 106*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" 107*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" 108*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/NewGVN.h" 109*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/Reassociate.h" 110*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/SCCP.h" 111*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/SROA.h" 112*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 113*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/SimplifyCFG.h" 114*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/SpeculativeExecution.h" 115*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/TailRecursionElimination.h" 116*349cc55cSDimitry Andric #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" 117*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/AddDiscriminators.h" 118*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 119*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/CanonicalizeAliases.h" 120*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/InjectTLIMappings.h" 121*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" 122*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/Mem2Reg.h" 123*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/NameAnonGlobals.h" 124*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/RelLookupTableConverter.h" 125*349cc55cSDimitry Andric #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" 126*349cc55cSDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorize.h" 127*349cc55cSDimitry Andric #include "llvm/Transforms/Vectorize/SLPVectorizer.h" 128*349cc55cSDimitry Andric #include "llvm/Transforms/Vectorize/VectorCombine.h" 129*349cc55cSDimitry Andric 130*349cc55cSDimitry Andric using namespace llvm; 131*349cc55cSDimitry Andric 132*349cc55cSDimitry Andric static cl::opt<InliningAdvisorMode> UseInlineAdvisor( 133*349cc55cSDimitry Andric "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, 134*349cc55cSDimitry Andric cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), 135*349cc55cSDimitry Andric cl::values(clEnumValN(InliningAdvisorMode::Default, "default", 136*349cc55cSDimitry Andric "Heuristics-based inliner version."), 137*349cc55cSDimitry Andric clEnumValN(InliningAdvisorMode::Development, "development", 138*349cc55cSDimitry Andric "Use development mode (runtime-loadable model)."), 139*349cc55cSDimitry Andric clEnumValN(InliningAdvisorMode::Release, "release", 140*349cc55cSDimitry Andric "Use release mode (AOT-compiled model)."))); 141*349cc55cSDimitry Andric 142*349cc55cSDimitry Andric static cl::opt<bool> EnableSyntheticCounts( 143*349cc55cSDimitry Andric "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, 144*349cc55cSDimitry Andric cl::desc("Run synthetic function entry count generation " 145*349cc55cSDimitry Andric "pass")); 146*349cc55cSDimitry Andric 147*349cc55cSDimitry Andric /// Flag to enable inline deferral during PGO. 148*349cc55cSDimitry Andric static cl::opt<bool> 149*349cc55cSDimitry Andric EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), 150*349cc55cSDimitry Andric cl::Hidden, 151*349cc55cSDimitry Andric cl::desc("Enable inline deferral during PGO")); 152*349cc55cSDimitry Andric 153*349cc55cSDimitry Andric static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false), 154*349cc55cSDimitry Andric cl::Hidden, cl::ZeroOrMore, 155*349cc55cSDimitry Andric cl::desc("Enable memory profiler")); 156*349cc55cSDimitry Andric 157*349cc55cSDimitry Andric static cl::opt<bool> EnableModuleInliner("enable-module-inliner", 158*349cc55cSDimitry Andric cl::init(false), cl::Hidden, 159*349cc55cSDimitry Andric cl::desc("Enable module inliner")); 160*349cc55cSDimitry Andric 161*349cc55cSDimitry Andric static cl::opt<bool> PerformMandatoryInliningsFirst( 162*349cc55cSDimitry Andric "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore, 163*349cc55cSDimitry Andric cl::desc("Perform mandatory inlinings module-wide, before performing " 164*349cc55cSDimitry Andric "inlining.")); 165*349cc55cSDimitry Andric 166*349cc55cSDimitry Andric static cl::opt<bool> EnableO3NonTrivialUnswitching( 167*349cc55cSDimitry Andric "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, 168*349cc55cSDimitry Andric cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3")); 169*349cc55cSDimitry Andric 170*349cc55cSDimitry Andric static cl::opt<bool> EnableEagerlyInvalidateAnalyses( 171*349cc55cSDimitry Andric "eagerly-invalidate-analyses", cl::init(true), cl::Hidden, 172*349cc55cSDimitry Andric cl::desc("Eagerly invalidate more analyses in default pipelines")); 173*349cc55cSDimitry Andric 174*349cc55cSDimitry Andric static cl::opt<bool> EnableNoRerunSimplificationPipeline( 175*349cc55cSDimitry Andric "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden, 176*349cc55cSDimitry Andric cl::desc( 177*349cc55cSDimitry Andric "Prevent running the simplification pipeline on a function more " 178*349cc55cSDimitry Andric "than once in the case that SCC mutations cause a function to be " 179*349cc55cSDimitry Andric "visited multiple times as long as the function has not been changed")); 180*349cc55cSDimitry Andric 181*349cc55cSDimitry Andric PipelineTuningOptions::PipelineTuningOptions() { 182*349cc55cSDimitry Andric LoopInterleaving = true; 183*349cc55cSDimitry Andric LoopVectorization = true; 184*349cc55cSDimitry Andric SLPVectorization = false; 185*349cc55cSDimitry Andric LoopUnrolling = true; 186*349cc55cSDimitry Andric ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; 187*349cc55cSDimitry Andric LicmMssaOptCap = SetLicmMssaOptCap; 188*349cc55cSDimitry Andric LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; 189*349cc55cSDimitry Andric CallGraphProfile = true; 190*349cc55cSDimitry Andric MergeFunctions = false; 191*349cc55cSDimitry Andric EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; 192*349cc55cSDimitry Andric } 193*349cc55cSDimitry Andric 194*349cc55cSDimitry Andric namespace llvm { 195*349cc55cSDimitry Andric 196*349cc55cSDimitry Andric extern cl::opt<unsigned> MaxDevirtIterations; 197*349cc55cSDimitry Andric extern cl::opt<bool> EnableConstraintElimination; 198*349cc55cSDimitry Andric extern cl::opt<bool> EnableFunctionSpecialization; 199*349cc55cSDimitry Andric extern cl::opt<bool> EnableGVNHoist; 200*349cc55cSDimitry Andric extern cl::opt<bool> EnableGVNSink; 201*349cc55cSDimitry Andric extern cl::opt<bool> EnableHotColdSplit; 202*349cc55cSDimitry Andric extern cl::opt<bool> EnableIROutliner; 203*349cc55cSDimitry Andric extern cl::opt<bool> EnableOrderFileInstrumentation; 204*349cc55cSDimitry Andric extern cl::opt<bool> EnableCHR; 205*349cc55cSDimitry Andric extern cl::opt<bool> EnableLoopInterchange; 206*349cc55cSDimitry Andric extern cl::opt<bool> EnableUnrollAndJam; 207*349cc55cSDimitry Andric extern cl::opt<bool> EnableLoopFlatten; 208*349cc55cSDimitry Andric extern cl::opt<bool> EnableDFAJumpThreading; 209*349cc55cSDimitry Andric extern cl::opt<bool> RunNewGVN; 210*349cc55cSDimitry Andric extern cl::opt<bool> RunPartialInlining; 211*349cc55cSDimitry Andric extern cl::opt<bool> ExtraVectorizerPasses; 212*349cc55cSDimitry Andric 213*349cc55cSDimitry Andric extern cl::opt<bool> FlattenedProfileUsed; 214*349cc55cSDimitry Andric 215*349cc55cSDimitry Andric extern cl::opt<AttributorRunOption> AttributorRun; 216*349cc55cSDimitry Andric extern cl::opt<bool> EnableKnowledgeRetention; 217*349cc55cSDimitry Andric 218*349cc55cSDimitry Andric extern cl::opt<bool> EnableMatrix; 219*349cc55cSDimitry Andric 220*349cc55cSDimitry Andric extern cl::opt<bool> DisablePreInliner; 221*349cc55cSDimitry Andric extern cl::opt<int> PreInlineThreshold; 222*349cc55cSDimitry Andric } // namespace llvm 223*349cc55cSDimitry Andric 224*349cc55cSDimitry Andric void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, 225*349cc55cSDimitry Andric OptimizationLevel Level) { 226*349cc55cSDimitry Andric for (auto &C : PeepholeEPCallbacks) 227*349cc55cSDimitry Andric C(FPM, Level); 228*349cc55cSDimitry Andric } 229*349cc55cSDimitry Andric 230*349cc55cSDimitry Andric // Helper to add AnnotationRemarksPass. 231*349cc55cSDimitry Andric static void addAnnotationRemarksPass(ModulePassManager &MPM) { 232*349cc55cSDimitry Andric FunctionPassManager FPM; 233*349cc55cSDimitry Andric FPM.addPass(AnnotationRemarksPass()); 234*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 235*349cc55cSDimitry Andric } 236*349cc55cSDimitry Andric 237*349cc55cSDimitry Andric // Helper to check if the current compilation phase is preparing for LTO 238*349cc55cSDimitry Andric static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { 239*349cc55cSDimitry Andric return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || 240*349cc55cSDimitry Andric Phase == ThinOrFullLTOPhase::FullLTOPreLink; 241*349cc55cSDimitry Andric } 242*349cc55cSDimitry Andric 243*349cc55cSDimitry Andric // TODO: Investigate the cost/benefit of tail call elimination on debugging. 244*349cc55cSDimitry Andric FunctionPassManager 245*349cc55cSDimitry Andric PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, 246*349cc55cSDimitry Andric ThinOrFullLTOPhase Phase) { 247*349cc55cSDimitry Andric 248*349cc55cSDimitry Andric FunctionPassManager FPM; 249*349cc55cSDimitry Andric 250*349cc55cSDimitry Andric // Form SSA out of local memory accesses after breaking apart aggregates into 251*349cc55cSDimitry Andric // scalars. 252*349cc55cSDimitry Andric FPM.addPass(SROAPass()); 253*349cc55cSDimitry Andric 254*349cc55cSDimitry Andric // Catch trivial redundancies 255*349cc55cSDimitry Andric FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 256*349cc55cSDimitry Andric 257*349cc55cSDimitry Andric // Hoisting of scalars and load expressions. 258*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 259*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 260*349cc55cSDimitry Andric 261*349cc55cSDimitry Andric FPM.addPass(LibCallsShrinkWrapPass()); 262*349cc55cSDimitry Andric 263*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 264*349cc55cSDimitry Andric 265*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 266*349cc55cSDimitry Andric 267*349cc55cSDimitry Andric // Form canonically associated expression trees, and simplify the trees using 268*349cc55cSDimitry Andric // basic mathematical properties. For example, this will form (nearly) 269*349cc55cSDimitry Andric // minimal multiplication trees. 270*349cc55cSDimitry Andric FPM.addPass(ReassociatePass()); 271*349cc55cSDimitry Andric 272*349cc55cSDimitry Andric // Add the primary loop simplification pipeline. 273*349cc55cSDimitry Andric // FIXME: Currently this is split into two loop pass pipelines because we run 274*349cc55cSDimitry Andric // some function passes in between them. These can and should be removed 275*349cc55cSDimitry Andric // and/or replaced by scheduling the loop pass equivalents in the correct 276*349cc55cSDimitry Andric // positions. But those equivalent passes aren't powerful enough yet. 277*349cc55cSDimitry Andric // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 278*349cc55cSDimitry Andric // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 279*349cc55cSDimitry Andric // fully replace `SimplifyCFGPass`, and the closest to the other we have is 280*349cc55cSDimitry Andric // `LoopInstSimplify`. 281*349cc55cSDimitry Andric LoopPassManager LPM1, LPM2; 282*349cc55cSDimitry Andric 283*349cc55cSDimitry Andric // Simplify the loop body. We do this initially to clean up after other loop 284*349cc55cSDimitry Andric // passes run, either when iterating on a loop or on inner loops with 285*349cc55cSDimitry Andric // implications on the outer loop. 286*349cc55cSDimitry Andric LPM1.addPass(LoopInstSimplifyPass()); 287*349cc55cSDimitry Andric LPM1.addPass(LoopSimplifyCFGPass()); 288*349cc55cSDimitry Andric 289*349cc55cSDimitry Andric // Try to remove as much code from the loop header as possible, 290*349cc55cSDimitry Andric // to reduce amount of IR that will have to be duplicated. 291*349cc55cSDimitry Andric // TODO: Investigate promotion cap for O1. 292*349cc55cSDimitry Andric LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); 293*349cc55cSDimitry Andric 294*349cc55cSDimitry Andric LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, 295*349cc55cSDimitry Andric isLTOPreLink(Phase))); 296*349cc55cSDimitry Andric // TODO: Investigate promotion cap for O1. 297*349cc55cSDimitry Andric LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); 298*349cc55cSDimitry Andric LPM1.addPass(SimpleLoopUnswitchPass()); 299*349cc55cSDimitry Andric 300*349cc55cSDimitry Andric LPM2.addPass(LoopIdiomRecognizePass()); 301*349cc55cSDimitry Andric LPM2.addPass(IndVarSimplifyPass()); 302*349cc55cSDimitry Andric 303*349cc55cSDimitry Andric for (auto &C : LateLoopOptimizationsEPCallbacks) 304*349cc55cSDimitry Andric C(LPM2, Level); 305*349cc55cSDimitry Andric 306*349cc55cSDimitry Andric LPM2.addPass(LoopDeletionPass()); 307*349cc55cSDimitry Andric 308*349cc55cSDimitry Andric if (EnableLoopInterchange) 309*349cc55cSDimitry Andric LPM2.addPass(LoopInterchangePass()); 310*349cc55cSDimitry Andric 311*349cc55cSDimitry Andric // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 312*349cc55cSDimitry Andric // because it changes IR to makes profile annotation in back compile 313*349cc55cSDimitry Andric // inaccurate. The normal unroller doesn't pay attention to forced full unroll 314*349cc55cSDimitry Andric // attributes so we need to make sure and allow the full unroll pass to pay 315*349cc55cSDimitry Andric // attention to it. 316*349cc55cSDimitry Andric if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 317*349cc55cSDimitry Andric PGOOpt->Action != PGOOptions::SampleUse) 318*349cc55cSDimitry Andric LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 319*349cc55cSDimitry Andric /* OnlyWhenForced= */ !PTO.LoopUnrolling, 320*349cc55cSDimitry Andric PTO.ForgetAllSCEVInLoopUnroll)); 321*349cc55cSDimitry Andric 322*349cc55cSDimitry Andric for (auto &C : LoopOptimizerEndEPCallbacks) 323*349cc55cSDimitry Andric C(LPM2, Level); 324*349cc55cSDimitry Andric 325*349cc55cSDimitry Andric // We provide the opt remark emitter pass for LICM to use. We only need to do 326*349cc55cSDimitry Andric // this once as it is immutable. 327*349cc55cSDimitry Andric FPM.addPass( 328*349cc55cSDimitry Andric RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 329*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 330*349cc55cSDimitry Andric /*UseMemorySSA=*/true, 331*349cc55cSDimitry Andric /*UseBlockFrequencyInfo=*/true)); 332*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 333*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 334*349cc55cSDimitry Andric if (EnableLoopFlatten) 335*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass())); 336*349cc55cSDimitry Andric // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. 337*349cc55cSDimitry Andric // *All* loop passes must preserve it, in order to be able to use it. 338*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 339*349cc55cSDimitry Andric /*UseMemorySSA=*/false, 340*349cc55cSDimitry Andric /*UseBlockFrequencyInfo=*/false)); 341*349cc55cSDimitry Andric 342*349cc55cSDimitry Andric // Delete small array after loop unroll. 343*349cc55cSDimitry Andric FPM.addPass(SROAPass()); 344*349cc55cSDimitry Andric 345*349cc55cSDimitry Andric // Specially optimize memory movement as it doesn't look like dataflow in SSA. 346*349cc55cSDimitry Andric FPM.addPass(MemCpyOptPass()); 347*349cc55cSDimitry Andric 348*349cc55cSDimitry Andric // Sparse conditional constant propagation. 349*349cc55cSDimitry Andric // FIXME: It isn't clear why we do this *after* loop passes rather than 350*349cc55cSDimitry Andric // before... 351*349cc55cSDimitry Andric FPM.addPass(SCCPPass()); 352*349cc55cSDimitry Andric 353*349cc55cSDimitry Andric // Delete dead bit computations (instcombine runs after to fold away the dead 354*349cc55cSDimitry Andric // computations, and then ADCE will run later to exploit any new DCE 355*349cc55cSDimitry Andric // opportunities that creates). 356*349cc55cSDimitry Andric FPM.addPass(BDCEPass()); 357*349cc55cSDimitry Andric 358*349cc55cSDimitry Andric // Run instcombine after redundancy and dead bit elimination to exploit 359*349cc55cSDimitry Andric // opportunities opened up by them. 360*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 361*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 362*349cc55cSDimitry Andric 363*349cc55cSDimitry Andric FPM.addPass(CoroElidePass()); 364*349cc55cSDimitry Andric 365*349cc55cSDimitry Andric for (auto &C : ScalarOptimizerLateEPCallbacks) 366*349cc55cSDimitry Andric C(FPM, Level); 367*349cc55cSDimitry Andric 368*349cc55cSDimitry Andric // Finally, do an expensive DCE pass to catch all the dead code exposed by 369*349cc55cSDimitry Andric // the simplifications and basic cleanup after all the simplifications. 370*349cc55cSDimitry Andric // TODO: Investigate if this is too expensive. 371*349cc55cSDimitry Andric FPM.addPass(ADCEPass()); 372*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 373*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 374*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 375*349cc55cSDimitry Andric 376*349cc55cSDimitry Andric return FPM; 377*349cc55cSDimitry Andric } 378*349cc55cSDimitry Andric 379*349cc55cSDimitry Andric FunctionPassManager 380*349cc55cSDimitry Andric PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, 381*349cc55cSDimitry Andric ThinOrFullLTOPhase Phase) { 382*349cc55cSDimitry Andric assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); 383*349cc55cSDimitry Andric 384*349cc55cSDimitry Andric // The O1 pipeline has a separate pipeline creation function to simplify 385*349cc55cSDimitry Andric // construction readability. 386*349cc55cSDimitry Andric if (Level.getSpeedupLevel() == 1) 387*349cc55cSDimitry Andric return buildO1FunctionSimplificationPipeline(Level, Phase); 388*349cc55cSDimitry Andric 389*349cc55cSDimitry Andric FunctionPassManager FPM; 390*349cc55cSDimitry Andric 391*349cc55cSDimitry Andric // Form SSA out of local memory accesses after breaking apart aggregates into 392*349cc55cSDimitry Andric // scalars. 393*349cc55cSDimitry Andric FPM.addPass(SROAPass()); 394*349cc55cSDimitry Andric 395*349cc55cSDimitry Andric // Catch trivial redundancies 396*349cc55cSDimitry Andric FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 397*349cc55cSDimitry Andric if (EnableKnowledgeRetention) 398*349cc55cSDimitry Andric FPM.addPass(AssumeSimplifyPass()); 399*349cc55cSDimitry Andric 400*349cc55cSDimitry Andric // Hoisting of scalars and load expressions. 401*349cc55cSDimitry Andric if (EnableGVNHoist) 402*349cc55cSDimitry Andric FPM.addPass(GVNHoistPass()); 403*349cc55cSDimitry Andric 404*349cc55cSDimitry Andric // Global value numbering based sinking. 405*349cc55cSDimitry Andric if (EnableGVNSink) { 406*349cc55cSDimitry Andric FPM.addPass(GVNSinkPass()); 407*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 408*349cc55cSDimitry Andric } 409*349cc55cSDimitry Andric 410*349cc55cSDimitry Andric if (EnableConstraintElimination) 411*349cc55cSDimitry Andric FPM.addPass(ConstraintEliminationPass()); 412*349cc55cSDimitry Andric 413*349cc55cSDimitry Andric // Speculative execution if the target has divergent branches; otherwise nop. 414*349cc55cSDimitry Andric FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); 415*349cc55cSDimitry Andric 416*349cc55cSDimitry Andric // Optimize based on known information about branches, and cleanup afterward. 417*349cc55cSDimitry Andric FPM.addPass(JumpThreadingPass()); 418*349cc55cSDimitry Andric FPM.addPass(CorrelatedValuePropagationPass()); 419*349cc55cSDimitry Andric 420*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 421*349cc55cSDimitry Andric if (Level == OptimizationLevel::O3) 422*349cc55cSDimitry Andric FPM.addPass(AggressiveInstCombinePass()); 423*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 424*349cc55cSDimitry Andric 425*349cc55cSDimitry Andric if (!Level.isOptimizingForSize()) 426*349cc55cSDimitry Andric FPM.addPass(LibCallsShrinkWrapPass()); 427*349cc55cSDimitry Andric 428*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 429*349cc55cSDimitry Andric 430*349cc55cSDimitry Andric // For PGO use pipeline, try to optimize memory intrinsics such as memcpy 431*349cc55cSDimitry Andric // using the size value profile. Don't perform this when optimizing for size. 432*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && 433*349cc55cSDimitry Andric !Level.isOptimizingForSize()) 434*349cc55cSDimitry Andric FPM.addPass(PGOMemOPSizeOpt()); 435*349cc55cSDimitry Andric 436*349cc55cSDimitry Andric FPM.addPass(TailCallElimPass()); 437*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 438*349cc55cSDimitry Andric 439*349cc55cSDimitry Andric // Form canonically associated expression trees, and simplify the trees using 440*349cc55cSDimitry Andric // basic mathematical properties. For example, this will form (nearly) 441*349cc55cSDimitry Andric // minimal multiplication trees. 442*349cc55cSDimitry Andric FPM.addPass(ReassociatePass()); 443*349cc55cSDimitry Andric 444*349cc55cSDimitry Andric // Add the primary loop simplification pipeline. 445*349cc55cSDimitry Andric // FIXME: Currently this is split into two loop pass pipelines because we run 446*349cc55cSDimitry Andric // some function passes in between them. These can and should be removed 447*349cc55cSDimitry Andric // and/or replaced by scheduling the loop pass equivalents in the correct 448*349cc55cSDimitry Andric // positions. But those equivalent passes aren't powerful enough yet. 449*349cc55cSDimitry Andric // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 450*349cc55cSDimitry Andric // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 451*349cc55cSDimitry Andric // fully replace `SimplifyCFGPass`, and the closest to the other we have is 452*349cc55cSDimitry Andric // `LoopInstSimplify`. 453*349cc55cSDimitry Andric LoopPassManager LPM1, LPM2; 454*349cc55cSDimitry Andric 455*349cc55cSDimitry Andric // Simplify the loop body. We do this initially to clean up after other loop 456*349cc55cSDimitry Andric // passes run, either when iterating on a loop or on inner loops with 457*349cc55cSDimitry Andric // implications on the outer loop. 458*349cc55cSDimitry Andric LPM1.addPass(LoopInstSimplifyPass()); 459*349cc55cSDimitry Andric LPM1.addPass(LoopSimplifyCFGPass()); 460*349cc55cSDimitry Andric 461*349cc55cSDimitry Andric // Try to remove as much code from the loop header as possible, 462*349cc55cSDimitry Andric // to reduce amount of IR that will have to be duplicated. 463*349cc55cSDimitry Andric // TODO: Investigate promotion cap for O1. 464*349cc55cSDimitry Andric LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); 465*349cc55cSDimitry Andric 466*349cc55cSDimitry Andric // Disable header duplication in loop rotation at -Oz. 467*349cc55cSDimitry Andric LPM1.addPass( 468*349cc55cSDimitry Andric LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); 469*349cc55cSDimitry Andric // TODO: Investigate promotion cap for O1. 470*349cc55cSDimitry Andric LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); 471*349cc55cSDimitry Andric LPM1.addPass( 472*349cc55cSDimitry Andric SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && 473*349cc55cSDimitry Andric EnableO3NonTrivialUnswitching)); 474*349cc55cSDimitry Andric LPM2.addPass(LoopIdiomRecognizePass()); 475*349cc55cSDimitry Andric LPM2.addPass(IndVarSimplifyPass()); 476*349cc55cSDimitry Andric 477*349cc55cSDimitry Andric for (auto &C : LateLoopOptimizationsEPCallbacks) 478*349cc55cSDimitry Andric C(LPM2, Level); 479*349cc55cSDimitry Andric 480*349cc55cSDimitry Andric LPM2.addPass(LoopDeletionPass()); 481*349cc55cSDimitry Andric 482*349cc55cSDimitry Andric if (EnableLoopInterchange) 483*349cc55cSDimitry Andric LPM2.addPass(LoopInterchangePass()); 484*349cc55cSDimitry Andric 485*349cc55cSDimitry Andric // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 486*349cc55cSDimitry Andric // because it changes IR to makes profile annotation in back compile 487*349cc55cSDimitry Andric // inaccurate. The normal unroller doesn't pay attention to forced full unroll 488*349cc55cSDimitry Andric // attributes so we need to make sure and allow the full unroll pass to pay 489*349cc55cSDimitry Andric // attention to it. 490*349cc55cSDimitry Andric if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 491*349cc55cSDimitry Andric PGOOpt->Action != PGOOptions::SampleUse) 492*349cc55cSDimitry Andric LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 493*349cc55cSDimitry Andric /* OnlyWhenForced= */ !PTO.LoopUnrolling, 494*349cc55cSDimitry Andric PTO.ForgetAllSCEVInLoopUnroll)); 495*349cc55cSDimitry Andric 496*349cc55cSDimitry Andric for (auto &C : LoopOptimizerEndEPCallbacks) 497*349cc55cSDimitry Andric C(LPM2, Level); 498*349cc55cSDimitry Andric 499*349cc55cSDimitry Andric // We provide the opt remark emitter pass for LICM to use. We only need to do 500*349cc55cSDimitry Andric // this once as it is immutable. 501*349cc55cSDimitry Andric FPM.addPass( 502*349cc55cSDimitry Andric RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 503*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 504*349cc55cSDimitry Andric /*UseMemorySSA=*/true, 505*349cc55cSDimitry Andric /*UseBlockFrequencyInfo=*/true)); 506*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 507*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 508*349cc55cSDimitry Andric if (EnableLoopFlatten) 509*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass())); 510*349cc55cSDimitry Andric // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, 511*349cc55cSDimitry Andric // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. 512*349cc55cSDimitry Andric // *All* loop passes must preserve it, in order to be able to use it. 513*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 514*349cc55cSDimitry Andric /*UseMemorySSA=*/false, 515*349cc55cSDimitry Andric /*UseBlockFrequencyInfo=*/false)); 516*349cc55cSDimitry Andric 517*349cc55cSDimitry Andric // Delete small array after loop unroll. 518*349cc55cSDimitry Andric FPM.addPass(SROAPass()); 519*349cc55cSDimitry Andric 520*349cc55cSDimitry Andric // The matrix extension can introduce large vector operations early, which can 521*349cc55cSDimitry Andric // benefit from running vector-combine early on. 522*349cc55cSDimitry Andric if (EnableMatrix) 523*349cc55cSDimitry Andric FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true)); 524*349cc55cSDimitry Andric 525*349cc55cSDimitry Andric // Eliminate redundancies. 526*349cc55cSDimitry Andric FPM.addPass(MergedLoadStoreMotionPass()); 527*349cc55cSDimitry Andric if (RunNewGVN) 528*349cc55cSDimitry Andric FPM.addPass(NewGVNPass()); 529*349cc55cSDimitry Andric else 530*349cc55cSDimitry Andric FPM.addPass(GVNPass()); 531*349cc55cSDimitry Andric 532*349cc55cSDimitry Andric // Sparse conditional constant propagation. 533*349cc55cSDimitry Andric // FIXME: It isn't clear why we do this *after* loop passes rather than 534*349cc55cSDimitry Andric // before... 535*349cc55cSDimitry Andric FPM.addPass(SCCPPass()); 536*349cc55cSDimitry Andric 537*349cc55cSDimitry Andric // Delete dead bit computations (instcombine runs after to fold away the dead 538*349cc55cSDimitry Andric // computations, and then ADCE will run later to exploit any new DCE 539*349cc55cSDimitry Andric // opportunities that creates). 540*349cc55cSDimitry Andric FPM.addPass(BDCEPass()); 541*349cc55cSDimitry Andric 542*349cc55cSDimitry Andric // Run instcombine after redundancy and dead bit elimination to exploit 543*349cc55cSDimitry Andric // opportunities opened up by them. 544*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 545*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 546*349cc55cSDimitry Andric 547*349cc55cSDimitry Andric // Re-consider control flow based optimizations after redundancy elimination, 548*349cc55cSDimitry Andric // redo DCE, etc. 549*349cc55cSDimitry Andric if (EnableDFAJumpThreading && Level.getSizeLevel() == 0) 550*349cc55cSDimitry Andric FPM.addPass(DFAJumpThreadingPass()); 551*349cc55cSDimitry Andric 552*349cc55cSDimitry Andric FPM.addPass(JumpThreadingPass()); 553*349cc55cSDimitry Andric FPM.addPass(CorrelatedValuePropagationPass()); 554*349cc55cSDimitry Andric 555*349cc55cSDimitry Andric // Finally, do an expensive DCE pass to catch all the dead code exposed by 556*349cc55cSDimitry Andric // the simplifications and basic cleanup after all the simplifications. 557*349cc55cSDimitry Andric // TODO: Investigate if this is too expensive. 558*349cc55cSDimitry Andric FPM.addPass(ADCEPass()); 559*349cc55cSDimitry Andric 560*349cc55cSDimitry Andric // Specially optimize memory movement as it doesn't look like dataflow in SSA. 561*349cc55cSDimitry Andric FPM.addPass(MemCpyOptPass()); 562*349cc55cSDimitry Andric 563*349cc55cSDimitry Andric FPM.addPass(DSEPass()); 564*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor( 565*349cc55cSDimitry Andric LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), 566*349cc55cSDimitry Andric /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); 567*349cc55cSDimitry Andric 568*349cc55cSDimitry Andric FPM.addPass(CoroElidePass()); 569*349cc55cSDimitry Andric 570*349cc55cSDimitry Andric for (auto &C : ScalarOptimizerLateEPCallbacks) 571*349cc55cSDimitry Andric C(FPM, Level); 572*349cc55cSDimitry Andric 573*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass( 574*349cc55cSDimitry Andric SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); 575*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 576*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 577*349cc55cSDimitry Andric 578*349cc55cSDimitry Andric if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt && 579*349cc55cSDimitry Andric (PGOOpt->Action == PGOOptions::IRUse || 580*349cc55cSDimitry Andric PGOOpt->Action == PGOOptions::SampleUse)) 581*349cc55cSDimitry Andric FPM.addPass(ControlHeightReductionPass()); 582*349cc55cSDimitry Andric 583*349cc55cSDimitry Andric return FPM; 584*349cc55cSDimitry Andric } 585*349cc55cSDimitry Andric 586*349cc55cSDimitry Andric void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { 587*349cc55cSDimitry Andric MPM.addPass(CanonicalizeAliasesPass()); 588*349cc55cSDimitry Andric MPM.addPass(NameAnonGlobalPass()); 589*349cc55cSDimitry Andric } 590*349cc55cSDimitry Andric 591*349cc55cSDimitry Andric void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, 592*349cc55cSDimitry Andric OptimizationLevel Level, bool RunProfileGen, 593*349cc55cSDimitry Andric bool IsCS, std::string ProfileFile, 594*349cc55cSDimitry Andric std::string ProfileRemappingFile) { 595*349cc55cSDimitry Andric assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); 596*349cc55cSDimitry Andric if (!IsCS && !DisablePreInliner) { 597*349cc55cSDimitry Andric InlineParams IP; 598*349cc55cSDimitry Andric 599*349cc55cSDimitry Andric IP.DefaultThreshold = PreInlineThreshold; 600*349cc55cSDimitry Andric 601*349cc55cSDimitry Andric // FIXME: The hint threshold has the same value used by the regular inliner 602*349cc55cSDimitry Andric // when not optimzing for size. This should probably be lowered after 603*349cc55cSDimitry Andric // performance testing. 604*349cc55cSDimitry Andric // FIXME: this comment is cargo culted from the old pass manager, revisit). 605*349cc55cSDimitry Andric IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; 606*349cc55cSDimitry Andric ModuleInlinerWrapperPass MIWP(IP); 607*349cc55cSDimitry Andric CGSCCPassManager &CGPipeline = MIWP.getPM(); 608*349cc55cSDimitry Andric 609*349cc55cSDimitry Andric FunctionPassManager FPM; 610*349cc55cSDimitry Andric FPM.addPass(SROAPass()); 611*349cc55cSDimitry Andric FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. 612*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. 613*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); // Combine silly sequences. 614*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 615*349cc55cSDimitry Andric 616*349cc55cSDimitry Andric CGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 617*349cc55cSDimitry Andric std::move(FPM), PTO.EagerlyInvalidateAnalyses)); 618*349cc55cSDimitry Andric 619*349cc55cSDimitry Andric MPM.addPass(std::move(MIWP)); 620*349cc55cSDimitry Andric 621*349cc55cSDimitry Andric // Delete anything that is now dead to make sure that we don't instrument 622*349cc55cSDimitry Andric // dead code. Instrumentation can end up keeping dead code around and 623*349cc55cSDimitry Andric // dramatically increase code size. 624*349cc55cSDimitry Andric MPM.addPass(GlobalDCEPass()); 625*349cc55cSDimitry Andric } 626*349cc55cSDimitry Andric 627*349cc55cSDimitry Andric if (!RunProfileGen) { 628*349cc55cSDimitry Andric assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 629*349cc55cSDimitry Andric MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); 630*349cc55cSDimitry Andric // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 631*349cc55cSDimitry Andric // RequireAnalysisPass for PSI before subsequent non-module passes. 632*349cc55cSDimitry Andric MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 633*349cc55cSDimitry Andric return; 634*349cc55cSDimitry Andric } 635*349cc55cSDimitry Andric 636*349cc55cSDimitry Andric // Perform PGO instrumentation. 637*349cc55cSDimitry Andric MPM.addPass(PGOInstrumentationGen(IsCS)); 638*349cc55cSDimitry Andric 639*349cc55cSDimitry Andric FunctionPassManager FPM; 640*349cc55cSDimitry Andric // Disable header duplication in loop rotation at -Oz. 641*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor( 642*349cc55cSDimitry Andric LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false, 643*349cc55cSDimitry Andric /*UseBlockFrequencyInfo=*/false)); 644*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), 645*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses)); 646*349cc55cSDimitry Andric 647*349cc55cSDimitry Andric // Add the profile lowering pass. 648*349cc55cSDimitry Andric InstrProfOptions Options; 649*349cc55cSDimitry Andric if (!ProfileFile.empty()) 650*349cc55cSDimitry Andric Options.InstrProfileOutput = ProfileFile; 651*349cc55cSDimitry Andric // Do counter promotion at Level greater than O0. 652*349cc55cSDimitry Andric Options.DoCounterPromotion = true; 653*349cc55cSDimitry Andric Options.UseBFIInPromotion = IsCS; 654*349cc55cSDimitry Andric MPM.addPass(InstrProfiling(Options, IsCS)); 655*349cc55cSDimitry Andric } 656*349cc55cSDimitry Andric 657*349cc55cSDimitry Andric void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, 658*349cc55cSDimitry Andric bool RunProfileGen, bool IsCS, 659*349cc55cSDimitry Andric std::string ProfileFile, 660*349cc55cSDimitry Andric std::string ProfileRemappingFile) { 661*349cc55cSDimitry Andric if (!RunProfileGen) { 662*349cc55cSDimitry Andric assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 663*349cc55cSDimitry Andric MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); 664*349cc55cSDimitry Andric // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 665*349cc55cSDimitry Andric // RequireAnalysisPass for PSI before subsequent non-module passes. 666*349cc55cSDimitry Andric MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 667*349cc55cSDimitry Andric return; 668*349cc55cSDimitry Andric } 669*349cc55cSDimitry Andric 670*349cc55cSDimitry Andric // Perform PGO instrumentation. 671*349cc55cSDimitry Andric MPM.addPass(PGOInstrumentationGen(IsCS)); 672*349cc55cSDimitry Andric // Add the profile lowering pass. 673*349cc55cSDimitry Andric InstrProfOptions Options; 674*349cc55cSDimitry Andric if (!ProfileFile.empty()) 675*349cc55cSDimitry Andric Options.InstrProfileOutput = ProfileFile; 676*349cc55cSDimitry Andric // Do not do counter promotion at O0. 677*349cc55cSDimitry Andric Options.DoCounterPromotion = false; 678*349cc55cSDimitry Andric Options.UseBFIInPromotion = IsCS; 679*349cc55cSDimitry Andric MPM.addPass(InstrProfiling(Options, IsCS)); 680*349cc55cSDimitry Andric } 681*349cc55cSDimitry Andric 682*349cc55cSDimitry Andric static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { 683*349cc55cSDimitry Andric return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); 684*349cc55cSDimitry Andric } 685*349cc55cSDimitry Andric 686*349cc55cSDimitry Andric ModuleInlinerWrapperPass 687*349cc55cSDimitry Andric PassBuilder::buildInlinerPipeline(OptimizationLevel Level, 688*349cc55cSDimitry Andric ThinOrFullLTOPhase Phase) { 689*349cc55cSDimitry Andric InlineParams IP = getInlineParamsFromOptLevel(Level); 690*349cc55cSDimitry Andric if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 691*349cc55cSDimitry Andric PGOOpt->Action == PGOOptions::SampleUse) 692*349cc55cSDimitry Andric IP.HotCallSiteThreshold = 0; 693*349cc55cSDimitry Andric 694*349cc55cSDimitry Andric if (PGOOpt) 695*349cc55cSDimitry Andric IP.EnableDeferral = EnablePGOInlineDeferral; 696*349cc55cSDimitry Andric 697*349cc55cSDimitry Andric ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, 698*349cc55cSDimitry Andric UseInlineAdvisor, MaxDevirtIterations); 699*349cc55cSDimitry Andric 700*349cc55cSDimitry Andric // Require the GlobalsAA analysis for the module so we can query it within 701*349cc55cSDimitry Andric // the CGSCC pipeline. 702*349cc55cSDimitry Andric MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>()); 703*349cc55cSDimitry Andric // Invalidate AAManager so it can be recreated and pick up the newly available 704*349cc55cSDimitry Andric // GlobalsAA. 705*349cc55cSDimitry Andric MIWP.addModulePass( 706*349cc55cSDimitry Andric createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 707*349cc55cSDimitry Andric 708*349cc55cSDimitry Andric // Require the ProfileSummaryAnalysis for the module so we can query it within 709*349cc55cSDimitry Andric // the inliner pass. 710*349cc55cSDimitry Andric MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 711*349cc55cSDimitry Andric 712*349cc55cSDimitry Andric // Now begin the main postorder CGSCC pipeline. 713*349cc55cSDimitry Andric // FIXME: The current CGSCC pipeline has its origins in the legacy pass 714*349cc55cSDimitry Andric // manager and trying to emulate its precise behavior. Much of this doesn't 715*349cc55cSDimitry Andric // make a lot of sense and we should revisit the core CGSCC structure. 716*349cc55cSDimitry Andric CGSCCPassManager &MainCGPipeline = MIWP.getPM(); 717*349cc55cSDimitry Andric 718*349cc55cSDimitry Andric // Note: historically, the PruneEH pass was run first to deduce nounwind and 719*349cc55cSDimitry Andric // generally clean up exception handling overhead. It isn't clear this is 720*349cc55cSDimitry Andric // valuable as the inliner doesn't currently care whether it is inlining an 721*349cc55cSDimitry Andric // invoke or a call. 722*349cc55cSDimitry Andric 723*349cc55cSDimitry Andric if (AttributorRun & AttributorRunOption::CGSCC) 724*349cc55cSDimitry Andric MainCGPipeline.addPass(AttributorCGSCCPass()); 725*349cc55cSDimitry Andric 726*349cc55cSDimitry Andric // Now deduce any function attributes based in the current code. 727*349cc55cSDimitry Andric MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); 728*349cc55cSDimitry Andric 729*349cc55cSDimitry Andric // When at O3 add argument promotion to the pass pipeline. 730*349cc55cSDimitry Andric // FIXME: It isn't at all clear why this should be limited to O3. 731*349cc55cSDimitry Andric if (Level == OptimizationLevel::O3) 732*349cc55cSDimitry Andric MainCGPipeline.addPass(ArgumentPromotionPass()); 733*349cc55cSDimitry Andric 734*349cc55cSDimitry Andric // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if 735*349cc55cSDimitry Andric // there are no OpenMP runtime calls present in the module. 736*349cc55cSDimitry Andric if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) 737*349cc55cSDimitry Andric MainCGPipeline.addPass(OpenMPOptCGSCCPass()); 738*349cc55cSDimitry Andric 739*349cc55cSDimitry Andric for (auto &C : CGSCCOptimizerLateEPCallbacks) 740*349cc55cSDimitry Andric C(MainCGPipeline, Level); 741*349cc55cSDimitry Andric 742*349cc55cSDimitry Andric // Lastly, add the core function simplification pipeline nested inside the 743*349cc55cSDimitry Andric // CGSCC walk. 744*349cc55cSDimitry Andric MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 745*349cc55cSDimitry Andric buildFunctionSimplificationPipeline(Level, Phase), 746*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline)); 747*349cc55cSDimitry Andric 748*349cc55cSDimitry Andric MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); 749*349cc55cSDimitry Andric 750*349cc55cSDimitry Andric if (EnableNoRerunSimplificationPipeline) 751*349cc55cSDimitry Andric MIWP.addLateModulePass(createModuleToFunctionPassAdaptor( 752*349cc55cSDimitry Andric InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); 753*349cc55cSDimitry Andric 754*349cc55cSDimitry Andric return MIWP; 755*349cc55cSDimitry Andric } 756*349cc55cSDimitry Andric 757*349cc55cSDimitry Andric ModuleInlinerPass 758*349cc55cSDimitry Andric PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, 759*349cc55cSDimitry Andric ThinOrFullLTOPhase Phase) { 760*349cc55cSDimitry Andric InlineParams IP = getInlineParamsFromOptLevel(Level); 761*349cc55cSDimitry Andric if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 762*349cc55cSDimitry Andric PGOOpt->Action == PGOOptions::SampleUse) 763*349cc55cSDimitry Andric IP.HotCallSiteThreshold = 0; 764*349cc55cSDimitry Andric 765*349cc55cSDimitry Andric if (PGOOpt) 766*349cc55cSDimitry Andric IP.EnableDeferral = EnablePGOInlineDeferral; 767*349cc55cSDimitry Andric 768*349cc55cSDimitry Andric // The inline deferral logic is used to avoid losing some 769*349cc55cSDimitry Andric // inlining chance in future. It is helpful in SCC inliner, in which 770*349cc55cSDimitry Andric // inlining is processed in bottom-up order. 771*349cc55cSDimitry Andric // While in module inliner, the inlining order is a priority-based order 772*349cc55cSDimitry Andric // by default. The inline deferral is unnecessary there. So we disable the 773*349cc55cSDimitry Andric // inline deferral logic in module inliner. 774*349cc55cSDimitry Andric IP.EnableDeferral = false; 775*349cc55cSDimitry Andric 776*349cc55cSDimitry Andric return ModuleInlinerPass(IP, UseInlineAdvisor); 777*349cc55cSDimitry Andric } 778*349cc55cSDimitry Andric 779*349cc55cSDimitry Andric ModulePassManager 780*349cc55cSDimitry Andric PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, 781*349cc55cSDimitry Andric ThinOrFullLTOPhase Phase) { 782*349cc55cSDimitry Andric ModulePassManager MPM; 783*349cc55cSDimitry Andric 784*349cc55cSDimitry Andric // Place pseudo probe instrumentation as the first pass of the pipeline to 785*349cc55cSDimitry Andric // minimize the impact of optimization changes. 786*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 787*349cc55cSDimitry Andric Phase != ThinOrFullLTOPhase::ThinLTOPostLink) 788*349cc55cSDimitry Andric MPM.addPass(SampleProfileProbePass(TM)); 789*349cc55cSDimitry Andric 790*349cc55cSDimitry Andric bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); 791*349cc55cSDimitry Andric 792*349cc55cSDimitry Andric // In ThinLTO mode, when flattened profile is used, all the available 793*349cc55cSDimitry Andric // profile information will be annotated in PreLink phase so there is 794*349cc55cSDimitry Andric // no need to load the profile again in PostLink. 795*349cc55cSDimitry Andric bool LoadSampleProfile = 796*349cc55cSDimitry Andric HasSampleProfile && 797*349cc55cSDimitry Andric !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); 798*349cc55cSDimitry Andric 799*349cc55cSDimitry Andric // During the ThinLTO backend phase we perform early indirect call promotion 800*349cc55cSDimitry Andric // here, before globalopt. Otherwise imported available_externally functions 801*349cc55cSDimitry Andric // look unreferenced and are removed. If we are going to load the sample 802*349cc55cSDimitry Andric // profile then defer until later. 803*349cc55cSDimitry Andric // TODO: See if we can move later and consolidate with the location where 804*349cc55cSDimitry Andric // we perform ICP when we are loading a sample profile. 805*349cc55cSDimitry Andric // TODO: We pass HasSampleProfile (whether there was a sample profile file 806*349cc55cSDimitry Andric // passed to the compile) to the SamplePGO flag of ICP. This is used to 807*349cc55cSDimitry Andric // determine whether the new direct calls are annotated with prof metadata. 808*349cc55cSDimitry Andric // Ideally this should be determined from whether the IR is annotated with 809*349cc55cSDimitry Andric // sample profile, and not whether the a sample profile was provided on the 810*349cc55cSDimitry Andric // command line. E.g. for flattened profiles where we will not be reloading 811*349cc55cSDimitry Andric // the sample profile in the ThinLTO backend, we ideally shouldn't have to 812*349cc55cSDimitry Andric // provide the sample profile file. 813*349cc55cSDimitry Andric if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) 814*349cc55cSDimitry Andric MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); 815*349cc55cSDimitry Andric 816*349cc55cSDimitry Andric // Do basic inference of function attributes from known properties of system 817*349cc55cSDimitry Andric // libraries and other oracles. 818*349cc55cSDimitry Andric MPM.addPass(InferFunctionAttrsPass()); 819*349cc55cSDimitry Andric 820*349cc55cSDimitry Andric // Create an early function pass manager to cleanup the output of the 821*349cc55cSDimitry Andric // frontend. 822*349cc55cSDimitry Andric FunctionPassManager EarlyFPM; 823*349cc55cSDimitry Andric // Lower llvm.expect to metadata before attempting transforms. 824*349cc55cSDimitry Andric // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. 825*349cc55cSDimitry Andric EarlyFPM.addPass(LowerExpectIntrinsicPass()); 826*349cc55cSDimitry Andric EarlyFPM.addPass(SimplifyCFGPass()); 827*349cc55cSDimitry Andric EarlyFPM.addPass(SROAPass()); 828*349cc55cSDimitry Andric EarlyFPM.addPass(EarlyCSEPass()); 829*349cc55cSDimitry Andric EarlyFPM.addPass(CoroEarlyPass()); 830*349cc55cSDimitry Andric if (Level == OptimizationLevel::O3) 831*349cc55cSDimitry Andric EarlyFPM.addPass(CallSiteSplittingPass()); 832*349cc55cSDimitry Andric 833*349cc55cSDimitry Andric // In SamplePGO ThinLTO backend, we need instcombine before profile annotation 834*349cc55cSDimitry Andric // to convert bitcast to direct calls so that they can be inlined during the 835*349cc55cSDimitry Andric // profile annotation prepration step. 836*349cc55cSDimitry Andric // More details about SamplePGO design can be found in: 837*349cc55cSDimitry Andric // https://research.google.com/pubs/pub45290.html 838*349cc55cSDimitry Andric // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. 839*349cc55cSDimitry Andric if (LoadSampleProfile) 840*349cc55cSDimitry Andric EarlyFPM.addPass(InstCombinePass()); 841*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM), 842*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses)); 843*349cc55cSDimitry Andric 844*349cc55cSDimitry Andric if (LoadSampleProfile) { 845*349cc55cSDimitry Andric // Annotate sample profile right after early FPM to ensure freshness of 846*349cc55cSDimitry Andric // the debug info. 847*349cc55cSDimitry Andric MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 848*349cc55cSDimitry Andric PGOOpt->ProfileRemappingFile, Phase)); 849*349cc55cSDimitry Andric // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 850*349cc55cSDimitry Andric // RequireAnalysisPass for PSI before subsequent non-module passes. 851*349cc55cSDimitry Andric MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 852*349cc55cSDimitry Andric // Do not invoke ICP in the LTOPrelink phase as it makes it hard 853*349cc55cSDimitry Andric // for the profile annotation to be accurate in the LTO backend. 854*349cc55cSDimitry Andric if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink && 855*349cc55cSDimitry Andric Phase != ThinOrFullLTOPhase::FullLTOPreLink) 856*349cc55cSDimitry Andric // We perform early indirect call promotion here, before globalopt. 857*349cc55cSDimitry Andric // This is important for the ThinLTO backend phase because otherwise 858*349cc55cSDimitry Andric // imported available_externally functions look unreferenced and are 859*349cc55cSDimitry Andric // removed. 860*349cc55cSDimitry Andric MPM.addPass( 861*349cc55cSDimitry Andric PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); 862*349cc55cSDimitry Andric } 863*349cc55cSDimitry Andric 864*349cc55cSDimitry Andric // Try to perform OpenMP specific optimizations on the module. This is a 865*349cc55cSDimitry Andric // (quick!) no-op if there are no OpenMP runtime calls present in the module. 866*349cc55cSDimitry Andric if (Level != OptimizationLevel::O0) 867*349cc55cSDimitry Andric MPM.addPass(OpenMPOptPass()); 868*349cc55cSDimitry Andric 869*349cc55cSDimitry Andric if (AttributorRun & AttributorRunOption::MODULE) 870*349cc55cSDimitry Andric MPM.addPass(AttributorPass()); 871*349cc55cSDimitry Andric 872*349cc55cSDimitry Andric // Lower type metadata and the type.test intrinsic in the ThinLTO 873*349cc55cSDimitry Andric // post link pipeline after ICP. This is to enable usage of the type 874*349cc55cSDimitry Andric // tests in ICP sequences. 875*349cc55cSDimitry Andric if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) 876*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 877*349cc55cSDimitry Andric 878*349cc55cSDimitry Andric for (auto &C : PipelineEarlySimplificationEPCallbacks) 879*349cc55cSDimitry Andric C(MPM, Level); 880*349cc55cSDimitry Andric 881*349cc55cSDimitry Andric // Specialize functions with IPSCCP. 882*349cc55cSDimitry Andric if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) 883*349cc55cSDimitry Andric MPM.addPass(FunctionSpecializationPass()); 884*349cc55cSDimitry Andric 885*349cc55cSDimitry Andric // Interprocedural constant propagation now that basic cleanup has occurred 886*349cc55cSDimitry Andric // and prior to optimizing globals. 887*349cc55cSDimitry Andric // FIXME: This position in the pipeline hasn't been carefully considered in 888*349cc55cSDimitry Andric // years, it should be re-analyzed. 889*349cc55cSDimitry Andric MPM.addPass(IPSCCPPass()); 890*349cc55cSDimitry Andric 891*349cc55cSDimitry Andric // Attach metadata to indirect call sites indicating the set of functions 892*349cc55cSDimitry Andric // they may target at run-time. This should follow IPSCCP. 893*349cc55cSDimitry Andric MPM.addPass(CalledValuePropagationPass()); 894*349cc55cSDimitry Andric 895*349cc55cSDimitry Andric // Optimize globals to try and fold them into constants. 896*349cc55cSDimitry Andric MPM.addPass(GlobalOptPass()); 897*349cc55cSDimitry Andric 898*349cc55cSDimitry Andric // Promote any localized globals to SSA registers. 899*349cc55cSDimitry Andric // FIXME: Should this instead by a run of SROA? 900*349cc55cSDimitry Andric // FIXME: We should probably run instcombine and simplifycfg afterward to 901*349cc55cSDimitry Andric // delete control flows that are dead once globals have been folded to 902*349cc55cSDimitry Andric // constants. 903*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); 904*349cc55cSDimitry Andric 905*349cc55cSDimitry Andric // Remove any dead arguments exposed by cleanups and constant folding 906*349cc55cSDimitry Andric // globals. 907*349cc55cSDimitry Andric MPM.addPass(DeadArgumentEliminationPass()); 908*349cc55cSDimitry Andric 909*349cc55cSDimitry Andric // Create a small function pass pipeline to cleanup after all the global 910*349cc55cSDimitry Andric // optimizations. 911*349cc55cSDimitry Andric FunctionPassManager GlobalCleanupPM; 912*349cc55cSDimitry Andric GlobalCleanupPM.addPass(InstCombinePass()); 913*349cc55cSDimitry Andric invokePeepholeEPCallbacks(GlobalCleanupPM, Level); 914*349cc55cSDimitry Andric 915*349cc55cSDimitry Andric GlobalCleanupPM.addPass(SimplifyCFGPass()); 916*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), 917*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses)); 918*349cc55cSDimitry Andric 919*349cc55cSDimitry Andric // Add all the requested passes for instrumentation PGO, if requested. 920*349cc55cSDimitry Andric if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && 921*349cc55cSDimitry Andric (PGOOpt->Action == PGOOptions::IRInstr || 922*349cc55cSDimitry Andric PGOOpt->Action == PGOOptions::IRUse)) { 923*349cc55cSDimitry Andric addPGOInstrPasses(MPM, Level, 924*349cc55cSDimitry Andric /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, 925*349cc55cSDimitry Andric /* IsCS */ false, PGOOpt->ProfileFile, 926*349cc55cSDimitry Andric PGOOpt->ProfileRemappingFile); 927*349cc55cSDimitry Andric MPM.addPass(PGOIndirectCallPromotion(false, false)); 928*349cc55cSDimitry Andric } 929*349cc55cSDimitry Andric if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && 930*349cc55cSDimitry Andric PGOOpt->CSAction == PGOOptions::CSIRInstr) 931*349cc55cSDimitry Andric MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); 932*349cc55cSDimitry Andric 933*349cc55cSDimitry Andric // Synthesize function entry counts for non-PGO compilation. 934*349cc55cSDimitry Andric if (EnableSyntheticCounts && !PGOOpt) 935*349cc55cSDimitry Andric MPM.addPass(SyntheticCountsPropagation()); 936*349cc55cSDimitry Andric 937*349cc55cSDimitry Andric if (EnableModuleInliner) 938*349cc55cSDimitry Andric MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); 939*349cc55cSDimitry Andric else 940*349cc55cSDimitry Andric MPM.addPass(buildInlinerPipeline(Level, Phase)); 941*349cc55cSDimitry Andric 942*349cc55cSDimitry Andric if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { 943*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); 944*349cc55cSDimitry Andric MPM.addPass(ModuleMemProfilerPass()); 945*349cc55cSDimitry Andric } 946*349cc55cSDimitry Andric 947*349cc55cSDimitry Andric return MPM; 948*349cc55cSDimitry Andric } 949*349cc55cSDimitry Andric 950*349cc55cSDimitry Andric /// TODO: Should LTO cause any differences to this set of passes? 951*349cc55cSDimitry Andric void PassBuilder::addVectorPasses(OptimizationLevel Level, 952*349cc55cSDimitry Andric FunctionPassManager &FPM, bool IsFullLTO) { 953*349cc55cSDimitry Andric FPM.addPass(LoopVectorizePass( 954*349cc55cSDimitry Andric LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); 955*349cc55cSDimitry Andric 956*349cc55cSDimitry Andric if (IsFullLTO) { 957*349cc55cSDimitry Andric // The vectorizer may have significantly shortened a loop body; unroll 958*349cc55cSDimitry Andric // again. Unroll small loops to hide loop backedge latency and saturate any 959*349cc55cSDimitry Andric // parallel execution resources of an out-of-order processor. We also then 960*349cc55cSDimitry Andric // need to clean up redundancies and loop invariant code. 961*349cc55cSDimitry Andric // FIXME: It would be really good to use a loop-integrated instruction 962*349cc55cSDimitry Andric // combiner for cleanup here so that the unrolling and LICM can be pipelined 963*349cc55cSDimitry Andric // across the loop nests. 964*349cc55cSDimitry Andric // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 965*349cc55cSDimitry Andric if (EnableUnrollAndJam && PTO.LoopUnrolling) 966*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor( 967*349cc55cSDimitry Andric LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 968*349cc55cSDimitry Andric FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 969*349cc55cSDimitry Andric Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 970*349cc55cSDimitry Andric PTO.ForgetAllSCEVInLoopUnroll))); 971*349cc55cSDimitry Andric FPM.addPass(WarnMissedTransformationsPass()); 972*349cc55cSDimitry Andric } 973*349cc55cSDimitry Andric 974*349cc55cSDimitry Andric if (!IsFullLTO) { 975*349cc55cSDimitry Andric // Eliminate loads by forwarding stores from the previous iteration to loads 976*349cc55cSDimitry Andric // of the current iteration. 977*349cc55cSDimitry Andric FPM.addPass(LoopLoadEliminationPass()); 978*349cc55cSDimitry Andric } 979*349cc55cSDimitry Andric // Cleanup after the loop optimization passes. 980*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 981*349cc55cSDimitry Andric 982*349cc55cSDimitry Andric if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 983*349cc55cSDimitry Andric // At higher optimization levels, try to clean up any runtime overlap and 984*349cc55cSDimitry Andric // alignment checks inserted by the vectorizer. We want to track correlated 985*349cc55cSDimitry Andric // runtime checks for two inner loops in the same outer loop, fold any 986*349cc55cSDimitry Andric // common computations, hoist loop-invariant aspects out of any outer loop, 987*349cc55cSDimitry Andric // and unswitch the runtime checks if possible. Once hoisted, we may have 988*349cc55cSDimitry Andric // dead (or speculatable) control flows or more combining opportunities. 989*349cc55cSDimitry Andric FPM.addPass(EarlyCSEPass()); 990*349cc55cSDimitry Andric FPM.addPass(CorrelatedValuePropagationPass()); 991*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 992*349cc55cSDimitry Andric LoopPassManager LPM; 993*349cc55cSDimitry Andric LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); 994*349cc55cSDimitry Andric LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == 995*349cc55cSDimitry Andric OptimizationLevel::O3)); 996*349cc55cSDimitry Andric FPM.addPass( 997*349cc55cSDimitry Andric RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 998*349cc55cSDimitry Andric FPM.addPass( 999*349cc55cSDimitry Andric createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, 1000*349cc55cSDimitry Andric /*UseBlockFrequencyInfo=*/true)); 1001*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass()); 1002*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 1003*349cc55cSDimitry Andric } 1004*349cc55cSDimitry Andric 1005*349cc55cSDimitry Andric // Now that we've formed fast to execute loop structures, we do further 1006*349cc55cSDimitry Andric // optimizations. These are run afterward as they might block doing complex 1007*349cc55cSDimitry Andric // analyses and transforms such as what are needed for loop vectorization. 1008*349cc55cSDimitry Andric 1009*349cc55cSDimitry Andric // Cleanup after loop vectorization, etc. Simplification passes like CVP and 1010*349cc55cSDimitry Andric // GVN, loop transforms, and others have already run, so it's now better to 1011*349cc55cSDimitry Andric // convert to more optimized IR using more aggressive simplify CFG options. 1012*349cc55cSDimitry Andric // The extra sinking transform can create larger basic blocks, so do this 1013*349cc55cSDimitry Andric // before SLP vectorization. 1014*349cc55cSDimitry Andric FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 1015*349cc55cSDimitry Andric .forwardSwitchCondToPhi(true) 1016*349cc55cSDimitry Andric .convertSwitchToLookupTable(true) 1017*349cc55cSDimitry Andric .needCanonicalLoops(false) 1018*349cc55cSDimitry Andric .hoistCommonInsts(true) 1019*349cc55cSDimitry Andric .sinkCommonInsts(true))); 1020*349cc55cSDimitry Andric 1021*349cc55cSDimitry Andric if (IsFullLTO) { 1022*349cc55cSDimitry Andric FPM.addPass(SCCPPass()); 1023*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 1024*349cc55cSDimitry Andric FPM.addPass(BDCEPass()); 1025*349cc55cSDimitry Andric } 1026*349cc55cSDimitry Andric 1027*349cc55cSDimitry Andric // Optimize parallel scalar instruction chains into SIMD instructions. 1028*349cc55cSDimitry Andric if (PTO.SLPVectorization) { 1029*349cc55cSDimitry Andric FPM.addPass(SLPVectorizerPass()); 1030*349cc55cSDimitry Andric if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 1031*349cc55cSDimitry Andric FPM.addPass(EarlyCSEPass()); 1032*349cc55cSDimitry Andric } 1033*349cc55cSDimitry Andric } 1034*349cc55cSDimitry Andric // Enhance/cleanup vector code. 1035*349cc55cSDimitry Andric FPM.addPass(VectorCombinePass()); 1036*349cc55cSDimitry Andric 1037*349cc55cSDimitry Andric if (!IsFullLTO) { 1038*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 1039*349cc55cSDimitry Andric // Unroll small loops to hide loop backedge latency and saturate any 1040*349cc55cSDimitry Andric // parallel execution resources of an out-of-order processor. We also then 1041*349cc55cSDimitry Andric // need to clean up redundancies and loop invariant code. 1042*349cc55cSDimitry Andric // FIXME: It would be really good to use a loop-integrated instruction 1043*349cc55cSDimitry Andric // combiner for cleanup here so that the unrolling and LICM can be pipelined 1044*349cc55cSDimitry Andric // across the loop nests. 1045*349cc55cSDimitry Andric // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 1046*349cc55cSDimitry Andric if (EnableUnrollAndJam && PTO.LoopUnrolling) { 1047*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor( 1048*349cc55cSDimitry Andric LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 1049*349cc55cSDimitry Andric } 1050*349cc55cSDimitry Andric FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 1051*349cc55cSDimitry Andric Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 1052*349cc55cSDimitry Andric PTO.ForgetAllSCEVInLoopUnroll))); 1053*349cc55cSDimitry Andric FPM.addPass(WarnMissedTransformationsPass()); 1054*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 1055*349cc55cSDimitry Andric FPM.addPass( 1056*349cc55cSDimitry Andric RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 1057*349cc55cSDimitry Andric FPM.addPass(createFunctionToLoopPassAdaptor( 1058*349cc55cSDimitry Andric LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), 1059*349cc55cSDimitry Andric /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); 1060*349cc55cSDimitry Andric } 1061*349cc55cSDimitry Andric 1062*349cc55cSDimitry Andric // Now that we've vectorized and unrolled loops, we may have more refined 1063*349cc55cSDimitry Andric // alignment information, try to re-derive it here. 1064*349cc55cSDimitry Andric FPM.addPass(AlignmentFromAssumptionsPass()); 1065*349cc55cSDimitry Andric 1066*349cc55cSDimitry Andric if (IsFullLTO) 1067*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 1068*349cc55cSDimitry Andric } 1069*349cc55cSDimitry Andric 1070*349cc55cSDimitry Andric ModulePassManager 1071*349cc55cSDimitry Andric PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, 1072*349cc55cSDimitry Andric bool LTOPreLink) { 1073*349cc55cSDimitry Andric ModulePassManager MPM; 1074*349cc55cSDimitry Andric 1075*349cc55cSDimitry Andric // Optimize globals now that the module is fully simplified. 1076*349cc55cSDimitry Andric MPM.addPass(GlobalOptPass()); 1077*349cc55cSDimitry Andric MPM.addPass(GlobalDCEPass()); 1078*349cc55cSDimitry Andric 1079*349cc55cSDimitry Andric // Run partial inlining pass to partially inline functions that have 1080*349cc55cSDimitry Andric // large bodies. 1081*349cc55cSDimitry Andric if (RunPartialInlining) 1082*349cc55cSDimitry Andric MPM.addPass(PartialInlinerPass()); 1083*349cc55cSDimitry Andric 1084*349cc55cSDimitry Andric // Remove avail extern fns and globals definitions since we aren't compiling 1085*349cc55cSDimitry Andric // an object file for later LTO. For LTO we want to preserve these so they 1086*349cc55cSDimitry Andric // are eligible for inlining at link-time. Note if they are unreferenced they 1087*349cc55cSDimitry Andric // will be removed by GlobalDCE later, so this only impacts referenced 1088*349cc55cSDimitry Andric // available externally globals. Eventually they will be suppressed during 1089*349cc55cSDimitry Andric // codegen, but eliminating here enables more opportunity for GlobalDCE as it 1090*349cc55cSDimitry Andric // may make globals referenced by available external functions dead and saves 1091*349cc55cSDimitry Andric // running remaining passes on the eliminated functions. These should be 1092*349cc55cSDimitry Andric // preserved during prelinking for link-time inlining decisions. 1093*349cc55cSDimitry Andric if (!LTOPreLink) 1094*349cc55cSDimitry Andric MPM.addPass(EliminateAvailableExternallyPass()); 1095*349cc55cSDimitry Andric 1096*349cc55cSDimitry Andric if (EnableOrderFileInstrumentation) 1097*349cc55cSDimitry Andric MPM.addPass(InstrOrderFilePass()); 1098*349cc55cSDimitry Andric 1099*349cc55cSDimitry Andric // Do RPO function attribute inference across the module to forward-propagate 1100*349cc55cSDimitry Andric // attributes where applicable. 1101*349cc55cSDimitry Andric // FIXME: Is this really an optimization rather than a canonicalization? 1102*349cc55cSDimitry Andric MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1103*349cc55cSDimitry Andric 1104*349cc55cSDimitry Andric // Do a post inline PGO instrumentation and use pass. This is a context 1105*349cc55cSDimitry Andric // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as 1106*349cc55cSDimitry Andric // cross-module inline has not been done yet. The context sensitive 1107*349cc55cSDimitry Andric // instrumentation is after all the inlines are done. 1108*349cc55cSDimitry Andric if (!LTOPreLink && PGOOpt) { 1109*349cc55cSDimitry Andric if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 1110*349cc55cSDimitry Andric addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, 1111*349cc55cSDimitry Andric /* IsCS */ true, PGOOpt->CSProfileGenFile, 1112*349cc55cSDimitry Andric PGOOpt->ProfileRemappingFile); 1113*349cc55cSDimitry Andric else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 1114*349cc55cSDimitry Andric addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, 1115*349cc55cSDimitry Andric /* IsCS */ true, PGOOpt->ProfileFile, 1116*349cc55cSDimitry Andric PGOOpt->ProfileRemappingFile); 1117*349cc55cSDimitry Andric } 1118*349cc55cSDimitry Andric 1119*349cc55cSDimitry Andric // Re-require GloblasAA here prior to function passes. This is particularly 1120*349cc55cSDimitry Andric // useful as the above will have inlined, DCE'ed, and function-attr 1121*349cc55cSDimitry Andric // propagated everything. We should at this point have a reasonably minimal 1122*349cc55cSDimitry Andric // and richly annotated call graph. By computing aliasing and mod/ref 1123*349cc55cSDimitry Andric // information for all local globals here, the late loop passes and notably 1124*349cc55cSDimitry Andric // the vectorizer will be able to use them to help recognize vectorizable 1125*349cc55cSDimitry Andric // memory operations. 1126*349cc55cSDimitry Andric MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); 1127*349cc55cSDimitry Andric 1128*349cc55cSDimitry Andric FunctionPassManager OptimizePM; 1129*349cc55cSDimitry Andric OptimizePM.addPass(Float2IntPass()); 1130*349cc55cSDimitry Andric OptimizePM.addPass(LowerConstantIntrinsicsPass()); 1131*349cc55cSDimitry Andric 1132*349cc55cSDimitry Andric if (EnableMatrix) { 1133*349cc55cSDimitry Andric OptimizePM.addPass(LowerMatrixIntrinsicsPass()); 1134*349cc55cSDimitry Andric OptimizePM.addPass(EarlyCSEPass()); 1135*349cc55cSDimitry Andric } 1136*349cc55cSDimitry Andric 1137*349cc55cSDimitry Andric // FIXME: We need to run some loop optimizations to re-rotate loops after 1138*349cc55cSDimitry Andric // simplifycfg and others undo their rotation. 1139*349cc55cSDimitry Andric 1140*349cc55cSDimitry Andric // Optimize the loop execution. These passes operate on entire loop nests 1141*349cc55cSDimitry Andric // rather than on each loop in an inside-out manner, and so they are actually 1142*349cc55cSDimitry Andric // function passes. 1143*349cc55cSDimitry Andric 1144*349cc55cSDimitry Andric for (auto &C : VectorizerStartEPCallbacks) 1145*349cc55cSDimitry Andric C(OptimizePM, Level); 1146*349cc55cSDimitry Andric 1147*349cc55cSDimitry Andric LoopPassManager LPM; 1148*349cc55cSDimitry Andric // First rotate loops that may have been un-rotated by prior passes. 1149*349cc55cSDimitry Andric // Disable header duplication at -Oz. 1150*349cc55cSDimitry Andric LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink)); 1151*349cc55cSDimitry Andric // Some loops may have become dead by now. Try to delete them. 1152*349cc55cSDimitry Andric // FIXME: see disscussion in https://reviews.llvm.org/D112851 1153*349cc55cSDimitry Andric // this may need to be revisited once GVN is more powerful. 1154*349cc55cSDimitry Andric LPM.addPass(LoopDeletionPass()); 1155*349cc55cSDimitry Andric OptimizePM.addPass(createFunctionToLoopPassAdaptor( 1156*349cc55cSDimitry Andric std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); 1157*349cc55cSDimitry Andric 1158*349cc55cSDimitry Andric // Distribute loops to allow partial vectorization. I.e. isolate dependences 1159*349cc55cSDimitry Andric // into separate loop that would otherwise inhibit vectorization. This is 1160*349cc55cSDimitry Andric // currently only performed for loops marked with the metadata 1161*349cc55cSDimitry Andric // llvm.loop.distribute=true or when -enable-loop-distribute is specified. 1162*349cc55cSDimitry Andric OptimizePM.addPass(LoopDistributePass()); 1163*349cc55cSDimitry Andric 1164*349cc55cSDimitry Andric // Populates the VFABI attribute with the scalar-to-vector mappings 1165*349cc55cSDimitry Andric // from the TargetLibraryInfo. 1166*349cc55cSDimitry Andric OptimizePM.addPass(InjectTLIMappings()); 1167*349cc55cSDimitry Andric 1168*349cc55cSDimitry Andric addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); 1169*349cc55cSDimitry Andric 1170*349cc55cSDimitry Andric // Split out cold code. Splitting is done late to avoid hiding context from 1171*349cc55cSDimitry Andric // other optimizations and inadvertently regressing performance. The tradeoff 1172*349cc55cSDimitry Andric // is that this has a higher code size cost than splitting early. 1173*349cc55cSDimitry Andric if (EnableHotColdSplit && !LTOPreLink) 1174*349cc55cSDimitry Andric MPM.addPass(HotColdSplittingPass()); 1175*349cc55cSDimitry Andric 1176*349cc55cSDimitry Andric // Search the code for similar regions of code. If enough similar regions can 1177*349cc55cSDimitry Andric // be found where extracting the regions into their own function will decrease 1178*349cc55cSDimitry Andric // the size of the program, we extract the regions, a deduplicate the 1179*349cc55cSDimitry Andric // structurally similar regions. 1180*349cc55cSDimitry Andric if (EnableIROutliner) 1181*349cc55cSDimitry Andric MPM.addPass(IROutlinerPass()); 1182*349cc55cSDimitry Andric 1183*349cc55cSDimitry Andric // Merge functions if requested. 1184*349cc55cSDimitry Andric if (PTO.MergeFunctions) 1185*349cc55cSDimitry Andric MPM.addPass(MergeFunctionsPass()); 1186*349cc55cSDimitry Andric 1187*349cc55cSDimitry Andric // LoopSink pass sinks instructions hoisted by LICM, which serves as a 1188*349cc55cSDimitry Andric // canonicalization pass that enables other optimizations. As a result, 1189*349cc55cSDimitry Andric // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 1190*349cc55cSDimitry Andric // result too early. 1191*349cc55cSDimitry Andric OptimizePM.addPass(LoopSinkPass()); 1192*349cc55cSDimitry Andric 1193*349cc55cSDimitry Andric // And finally clean up LCSSA form before generating code. 1194*349cc55cSDimitry Andric OptimizePM.addPass(InstSimplifyPass()); 1195*349cc55cSDimitry Andric 1196*349cc55cSDimitry Andric // This hoists/decomposes div/rem ops. It should run after other sink/hoist 1197*349cc55cSDimitry Andric // passes to avoid re-sinking, but before SimplifyCFG because it can allow 1198*349cc55cSDimitry Andric // flattening of blocks. 1199*349cc55cSDimitry Andric OptimizePM.addPass(DivRemPairsPass()); 1200*349cc55cSDimitry Andric 1201*349cc55cSDimitry Andric // LoopSink (and other loop passes since the last simplifyCFG) might have 1202*349cc55cSDimitry Andric // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. 1203*349cc55cSDimitry Andric OptimizePM.addPass(SimplifyCFGPass()); 1204*349cc55cSDimitry Andric 1205*349cc55cSDimitry Andric OptimizePM.addPass(CoroCleanupPass()); 1206*349cc55cSDimitry Andric 1207*349cc55cSDimitry Andric // Add the core optimizing pipeline. 1208*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), 1209*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses)); 1210*349cc55cSDimitry Andric 1211*349cc55cSDimitry Andric for (auto &C : OptimizerLastEPCallbacks) 1212*349cc55cSDimitry Andric C(MPM, Level); 1213*349cc55cSDimitry Andric 1214*349cc55cSDimitry Andric if (PTO.CallGraphProfile) 1215*349cc55cSDimitry Andric MPM.addPass(CGProfilePass()); 1216*349cc55cSDimitry Andric 1217*349cc55cSDimitry Andric // Now we need to do some global optimization transforms. 1218*349cc55cSDimitry Andric // FIXME: It would seem like these should come first in the optimization 1219*349cc55cSDimitry Andric // pipeline and maybe be the bottom of the canonicalization pipeline? Weird 1220*349cc55cSDimitry Andric // ordering here. 1221*349cc55cSDimitry Andric MPM.addPass(GlobalDCEPass()); 1222*349cc55cSDimitry Andric MPM.addPass(ConstantMergePass()); 1223*349cc55cSDimitry Andric 1224*349cc55cSDimitry Andric // TODO: Relative look table converter pass caused an issue when full lto is 1225*349cc55cSDimitry Andric // enabled. See https://reviews.llvm.org/D94355 for more details. 1226*349cc55cSDimitry Andric // Until the issue fixed, disable this pass during pre-linking phase. 1227*349cc55cSDimitry Andric if (!LTOPreLink) 1228*349cc55cSDimitry Andric MPM.addPass(RelLookupTableConverterPass()); 1229*349cc55cSDimitry Andric 1230*349cc55cSDimitry Andric return MPM; 1231*349cc55cSDimitry Andric } 1232*349cc55cSDimitry Andric 1233*349cc55cSDimitry Andric ModulePassManager 1234*349cc55cSDimitry Andric PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, 1235*349cc55cSDimitry Andric bool LTOPreLink) { 1236*349cc55cSDimitry Andric assert(Level != OptimizationLevel::O0 && 1237*349cc55cSDimitry Andric "Must request optimizations for the default pipeline!"); 1238*349cc55cSDimitry Andric 1239*349cc55cSDimitry Andric ModulePassManager MPM; 1240*349cc55cSDimitry Andric 1241*349cc55cSDimitry Andric // Convert @llvm.global.annotations to !annotation metadata. 1242*349cc55cSDimitry Andric MPM.addPass(Annotation2MetadataPass()); 1243*349cc55cSDimitry Andric 1244*349cc55cSDimitry Andric // Force any function attributes we want the rest of the pipeline to observe. 1245*349cc55cSDimitry Andric MPM.addPass(ForceFunctionAttrsPass()); 1246*349cc55cSDimitry Andric 1247*349cc55cSDimitry Andric // Apply module pipeline start EP callback. 1248*349cc55cSDimitry Andric for (auto &C : PipelineStartEPCallbacks) 1249*349cc55cSDimitry Andric C(MPM, Level); 1250*349cc55cSDimitry Andric 1251*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1252*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1253*349cc55cSDimitry Andric 1254*349cc55cSDimitry Andric // Add the core simplification pipeline. 1255*349cc55cSDimitry Andric MPM.addPass(buildModuleSimplificationPipeline( 1256*349cc55cSDimitry Andric Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink 1257*349cc55cSDimitry Andric : ThinOrFullLTOPhase::None)); 1258*349cc55cSDimitry Andric 1259*349cc55cSDimitry Andric // Now add the optimization pipeline. 1260*349cc55cSDimitry Andric MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); 1261*349cc55cSDimitry Andric 1262*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 1263*349cc55cSDimitry Andric PGOOpt->Action == PGOOptions::SampleUse) 1264*349cc55cSDimitry Andric MPM.addPass(PseudoProbeUpdatePass()); 1265*349cc55cSDimitry Andric 1266*349cc55cSDimitry Andric // Emit annotation remarks. 1267*349cc55cSDimitry Andric addAnnotationRemarksPass(MPM); 1268*349cc55cSDimitry Andric 1269*349cc55cSDimitry Andric if (LTOPreLink) 1270*349cc55cSDimitry Andric addRequiredLTOPreLinkPasses(MPM); 1271*349cc55cSDimitry Andric 1272*349cc55cSDimitry Andric return MPM; 1273*349cc55cSDimitry Andric } 1274*349cc55cSDimitry Andric 1275*349cc55cSDimitry Andric ModulePassManager 1276*349cc55cSDimitry Andric PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1277*349cc55cSDimitry Andric assert(Level != OptimizationLevel::O0 && 1278*349cc55cSDimitry Andric "Must request optimizations for the default pipeline!"); 1279*349cc55cSDimitry Andric 1280*349cc55cSDimitry Andric ModulePassManager MPM; 1281*349cc55cSDimitry Andric 1282*349cc55cSDimitry Andric // Convert @llvm.global.annotations to !annotation metadata. 1283*349cc55cSDimitry Andric MPM.addPass(Annotation2MetadataPass()); 1284*349cc55cSDimitry Andric 1285*349cc55cSDimitry Andric // Force any function attributes we want the rest of the pipeline to observe. 1286*349cc55cSDimitry Andric MPM.addPass(ForceFunctionAttrsPass()); 1287*349cc55cSDimitry Andric 1288*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1289*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1290*349cc55cSDimitry Andric 1291*349cc55cSDimitry Andric // Apply module pipeline start EP callback. 1292*349cc55cSDimitry Andric for (auto &C : PipelineStartEPCallbacks) 1293*349cc55cSDimitry Andric C(MPM, Level); 1294*349cc55cSDimitry Andric 1295*349cc55cSDimitry Andric // If we are planning to perform ThinLTO later, we don't bloat the code with 1296*349cc55cSDimitry Andric // unrolling/vectorization/... now. Just simplify the module as much as we 1297*349cc55cSDimitry Andric // can. 1298*349cc55cSDimitry Andric MPM.addPass(buildModuleSimplificationPipeline( 1299*349cc55cSDimitry Andric Level, ThinOrFullLTOPhase::ThinLTOPreLink)); 1300*349cc55cSDimitry Andric 1301*349cc55cSDimitry Andric // Run partial inlining pass to partially inline functions that have 1302*349cc55cSDimitry Andric // large bodies. 1303*349cc55cSDimitry Andric // FIXME: It isn't clear whether this is really the right place to run this 1304*349cc55cSDimitry Andric // in ThinLTO. Because there is another canonicalization and simplification 1305*349cc55cSDimitry Andric // phase that will run after the thin link, running this here ends up with 1306*349cc55cSDimitry Andric // less information than will be available later and it may grow functions in 1307*349cc55cSDimitry Andric // ways that aren't beneficial. 1308*349cc55cSDimitry Andric if (RunPartialInlining) 1309*349cc55cSDimitry Andric MPM.addPass(PartialInlinerPass()); 1310*349cc55cSDimitry Andric 1311*349cc55cSDimitry Andric // Reduce the size of the IR as much as possible. 1312*349cc55cSDimitry Andric MPM.addPass(GlobalOptPass()); 1313*349cc55cSDimitry Andric 1314*349cc55cSDimitry Andric // Module simplification splits coroutines, but does not fully clean up 1315*349cc55cSDimitry Andric // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up 1316*349cc55cSDimitry Andric // on these, we schedule the cleanup here. 1317*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); 1318*349cc55cSDimitry Andric 1319*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 1320*349cc55cSDimitry Andric PGOOpt->Action == PGOOptions::SampleUse) 1321*349cc55cSDimitry Andric MPM.addPass(PseudoProbeUpdatePass()); 1322*349cc55cSDimitry Andric 1323*349cc55cSDimitry Andric // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual 1324*349cc55cSDimitry Andric // optimization is going to be done in PostLink stage, but clang can't 1325*349cc55cSDimitry Andric // add callbacks there in case of in-process ThinLTO called by linker. 1326*349cc55cSDimitry Andric for (auto &C : OptimizerLastEPCallbacks) 1327*349cc55cSDimitry Andric C(MPM, Level); 1328*349cc55cSDimitry Andric 1329*349cc55cSDimitry Andric // Emit annotation remarks. 1330*349cc55cSDimitry Andric addAnnotationRemarksPass(MPM); 1331*349cc55cSDimitry Andric 1332*349cc55cSDimitry Andric addRequiredLTOPreLinkPasses(MPM); 1333*349cc55cSDimitry Andric 1334*349cc55cSDimitry Andric return MPM; 1335*349cc55cSDimitry Andric } 1336*349cc55cSDimitry Andric 1337*349cc55cSDimitry Andric ModulePassManager PassBuilder::buildThinLTODefaultPipeline( 1338*349cc55cSDimitry Andric OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { 1339*349cc55cSDimitry Andric ModulePassManager MPM; 1340*349cc55cSDimitry Andric 1341*349cc55cSDimitry Andric // Convert @llvm.global.annotations to !annotation metadata. 1342*349cc55cSDimitry Andric MPM.addPass(Annotation2MetadataPass()); 1343*349cc55cSDimitry Andric 1344*349cc55cSDimitry Andric if (ImportSummary) { 1345*349cc55cSDimitry Andric // These passes import type identifier resolutions for whole-program 1346*349cc55cSDimitry Andric // devirtualization and CFI. They must run early because other passes may 1347*349cc55cSDimitry Andric // disturb the specific instruction patterns that these passes look for, 1348*349cc55cSDimitry Andric // creating dependencies on resolutions that may not appear in the summary. 1349*349cc55cSDimitry Andric // 1350*349cc55cSDimitry Andric // For example, GVN may transform the pattern assume(type.test) appearing in 1351*349cc55cSDimitry Andric // two basic blocks into assume(phi(type.test, type.test)), which would 1352*349cc55cSDimitry Andric // transform a dependency on a WPD resolution into a dependency on a type 1353*349cc55cSDimitry Andric // identifier resolution for CFI. 1354*349cc55cSDimitry Andric // 1355*349cc55cSDimitry Andric // Also, WPD has access to more precise information than ICP and can 1356*349cc55cSDimitry Andric // devirtualize more effectively, so it should operate on the IR first. 1357*349cc55cSDimitry Andric // 1358*349cc55cSDimitry Andric // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1359*349cc55cSDimitry Andric // metadata and intrinsics. 1360*349cc55cSDimitry Andric MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); 1361*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); 1362*349cc55cSDimitry Andric } 1363*349cc55cSDimitry Andric 1364*349cc55cSDimitry Andric if (Level == OptimizationLevel::O0) { 1365*349cc55cSDimitry Andric // Run a second time to clean up any type tests left behind by WPD for use 1366*349cc55cSDimitry Andric // in ICP. 1367*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1368*349cc55cSDimitry Andric // Drop available_externally and unreferenced globals. This is necessary 1369*349cc55cSDimitry Andric // with ThinLTO in order to avoid leaving undefined references to dead 1370*349cc55cSDimitry Andric // globals in the object file. 1371*349cc55cSDimitry Andric MPM.addPass(EliminateAvailableExternallyPass()); 1372*349cc55cSDimitry Andric MPM.addPass(GlobalDCEPass()); 1373*349cc55cSDimitry Andric return MPM; 1374*349cc55cSDimitry Andric } 1375*349cc55cSDimitry Andric 1376*349cc55cSDimitry Andric // Force any function attributes we want the rest of the pipeline to observe. 1377*349cc55cSDimitry Andric MPM.addPass(ForceFunctionAttrsPass()); 1378*349cc55cSDimitry Andric 1379*349cc55cSDimitry Andric // Add the core simplification pipeline. 1380*349cc55cSDimitry Andric MPM.addPass(buildModuleSimplificationPipeline( 1381*349cc55cSDimitry Andric Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 1382*349cc55cSDimitry Andric 1383*349cc55cSDimitry Andric // Now add the optimization pipeline. 1384*349cc55cSDimitry Andric MPM.addPass(buildModuleOptimizationPipeline(Level)); 1385*349cc55cSDimitry Andric 1386*349cc55cSDimitry Andric // Emit annotation remarks. 1387*349cc55cSDimitry Andric addAnnotationRemarksPass(MPM); 1388*349cc55cSDimitry Andric 1389*349cc55cSDimitry Andric return MPM; 1390*349cc55cSDimitry Andric } 1391*349cc55cSDimitry Andric 1392*349cc55cSDimitry Andric ModulePassManager 1393*349cc55cSDimitry Andric PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1394*349cc55cSDimitry Andric assert(Level != OptimizationLevel::O0 && 1395*349cc55cSDimitry Andric "Must request optimizations for the default pipeline!"); 1396*349cc55cSDimitry Andric // FIXME: We should use a customized pre-link pipeline! 1397*349cc55cSDimitry Andric return buildPerModuleDefaultPipeline(Level, 1398*349cc55cSDimitry Andric /* LTOPreLink */ true); 1399*349cc55cSDimitry Andric } 1400*349cc55cSDimitry Andric 1401*349cc55cSDimitry Andric ModulePassManager 1402*349cc55cSDimitry Andric PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, 1403*349cc55cSDimitry Andric ModuleSummaryIndex *ExportSummary) { 1404*349cc55cSDimitry Andric ModulePassManager MPM; 1405*349cc55cSDimitry Andric 1406*349cc55cSDimitry Andric // Convert @llvm.global.annotations to !annotation metadata. 1407*349cc55cSDimitry Andric MPM.addPass(Annotation2MetadataPass()); 1408*349cc55cSDimitry Andric 1409*349cc55cSDimitry Andric // Create a function that performs CFI checks for cross-DSO calls with targets 1410*349cc55cSDimitry Andric // in the current module. 1411*349cc55cSDimitry Andric MPM.addPass(CrossDSOCFIPass()); 1412*349cc55cSDimitry Andric 1413*349cc55cSDimitry Andric if (Level == OptimizationLevel::O0) { 1414*349cc55cSDimitry Andric // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1415*349cc55cSDimitry Andric // metadata and intrinsics. 1416*349cc55cSDimitry Andric MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1417*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1418*349cc55cSDimitry Andric // Run a second time to clean up any type tests left behind by WPD for use 1419*349cc55cSDimitry Andric // in ICP. 1420*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1421*349cc55cSDimitry Andric 1422*349cc55cSDimitry Andric // Emit annotation remarks. 1423*349cc55cSDimitry Andric addAnnotationRemarksPass(MPM); 1424*349cc55cSDimitry Andric 1425*349cc55cSDimitry Andric return MPM; 1426*349cc55cSDimitry Andric } 1427*349cc55cSDimitry Andric 1428*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { 1429*349cc55cSDimitry Andric // Load sample profile before running the LTO optimization pipeline. 1430*349cc55cSDimitry Andric MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 1431*349cc55cSDimitry Andric PGOOpt->ProfileRemappingFile, 1432*349cc55cSDimitry Andric ThinOrFullLTOPhase::FullLTOPostLink)); 1433*349cc55cSDimitry Andric // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 1434*349cc55cSDimitry Andric // RequireAnalysisPass for PSI before subsequent non-module passes. 1435*349cc55cSDimitry Andric MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 1436*349cc55cSDimitry Andric } 1437*349cc55cSDimitry Andric 1438*349cc55cSDimitry Andric // Remove unused virtual tables to improve the quality of code generated by 1439*349cc55cSDimitry Andric // whole-program devirtualization and bitset lowering. 1440*349cc55cSDimitry Andric MPM.addPass(GlobalDCEPass()); 1441*349cc55cSDimitry Andric 1442*349cc55cSDimitry Andric // Force any function attributes we want the rest of the pipeline to observe. 1443*349cc55cSDimitry Andric MPM.addPass(ForceFunctionAttrsPass()); 1444*349cc55cSDimitry Andric 1445*349cc55cSDimitry Andric // Do basic inference of function attributes from known properties of system 1446*349cc55cSDimitry Andric // libraries and other oracles. 1447*349cc55cSDimitry Andric MPM.addPass(InferFunctionAttrsPass()); 1448*349cc55cSDimitry Andric 1449*349cc55cSDimitry Andric if (Level.getSpeedupLevel() > 1) { 1450*349cc55cSDimitry Andric FunctionPassManager EarlyFPM; 1451*349cc55cSDimitry Andric EarlyFPM.addPass(CallSiteSplittingPass()); 1452*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor( 1453*349cc55cSDimitry Andric std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses)); 1454*349cc55cSDimitry Andric 1455*349cc55cSDimitry Andric // Indirect call promotion. This should promote all the targets that are 1456*349cc55cSDimitry Andric // left by the earlier promotion pass that promotes intra-module targets. 1457*349cc55cSDimitry Andric // This two-step promotion is to save the compile time. For LTO, it should 1458*349cc55cSDimitry Andric // produce the same result as if we only do promotion here. 1459*349cc55cSDimitry Andric MPM.addPass(PGOIndirectCallPromotion( 1460*349cc55cSDimitry Andric true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); 1461*349cc55cSDimitry Andric 1462*349cc55cSDimitry Andric if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) 1463*349cc55cSDimitry Andric MPM.addPass(FunctionSpecializationPass()); 1464*349cc55cSDimitry Andric // Propagate constants at call sites into the functions they call. This 1465*349cc55cSDimitry Andric // opens opportunities for globalopt (and inlining) by substituting function 1466*349cc55cSDimitry Andric // pointers passed as arguments to direct uses of functions. 1467*349cc55cSDimitry Andric MPM.addPass(IPSCCPPass()); 1468*349cc55cSDimitry Andric 1469*349cc55cSDimitry Andric // Attach metadata to indirect call sites indicating the set of functions 1470*349cc55cSDimitry Andric // they may target at run-time. This should follow IPSCCP. 1471*349cc55cSDimitry Andric MPM.addPass(CalledValuePropagationPass()); 1472*349cc55cSDimitry Andric } 1473*349cc55cSDimitry Andric 1474*349cc55cSDimitry Andric // Now deduce any function attributes based in the current code. 1475*349cc55cSDimitry Andric MPM.addPass( 1476*349cc55cSDimitry Andric createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); 1477*349cc55cSDimitry Andric 1478*349cc55cSDimitry Andric // Do RPO function attribute inference across the module to forward-propagate 1479*349cc55cSDimitry Andric // attributes where applicable. 1480*349cc55cSDimitry Andric // FIXME: Is this really an optimization rather than a canonicalization? 1481*349cc55cSDimitry Andric MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1482*349cc55cSDimitry Andric 1483*349cc55cSDimitry Andric // Use in-range annotations on GEP indices to split globals where beneficial. 1484*349cc55cSDimitry Andric MPM.addPass(GlobalSplitPass()); 1485*349cc55cSDimitry Andric 1486*349cc55cSDimitry Andric // Run whole program optimization of virtual call when the list of callees 1487*349cc55cSDimitry Andric // is fixed. 1488*349cc55cSDimitry Andric MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1489*349cc55cSDimitry Andric 1490*349cc55cSDimitry Andric // Stop here at -O1. 1491*349cc55cSDimitry Andric if (Level == OptimizationLevel::O1) { 1492*349cc55cSDimitry Andric // The LowerTypeTestsPass needs to run to lower type metadata and the 1493*349cc55cSDimitry Andric // type.test intrinsics. The pass does nothing if CFI is disabled. 1494*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1495*349cc55cSDimitry Andric // Run a second time to clean up any type tests left behind by WPD for use 1496*349cc55cSDimitry Andric // in ICP (which is performed earlier than this in the regular LTO 1497*349cc55cSDimitry Andric // pipeline). 1498*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1499*349cc55cSDimitry Andric 1500*349cc55cSDimitry Andric // Emit annotation remarks. 1501*349cc55cSDimitry Andric addAnnotationRemarksPass(MPM); 1502*349cc55cSDimitry Andric 1503*349cc55cSDimitry Andric return MPM; 1504*349cc55cSDimitry Andric } 1505*349cc55cSDimitry Andric 1506*349cc55cSDimitry Andric // Optimize globals to try and fold them into constants. 1507*349cc55cSDimitry Andric MPM.addPass(GlobalOptPass()); 1508*349cc55cSDimitry Andric 1509*349cc55cSDimitry Andric // Promote any localized globals to SSA registers. 1510*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); 1511*349cc55cSDimitry Andric 1512*349cc55cSDimitry Andric // Linking modules together can lead to duplicate global constant, only 1513*349cc55cSDimitry Andric // keep one copy of each constant. 1514*349cc55cSDimitry Andric MPM.addPass(ConstantMergePass()); 1515*349cc55cSDimitry Andric 1516*349cc55cSDimitry Andric // Remove unused arguments from functions. 1517*349cc55cSDimitry Andric MPM.addPass(DeadArgumentEliminationPass()); 1518*349cc55cSDimitry Andric 1519*349cc55cSDimitry Andric // Reduce the code after globalopt and ipsccp. Both can open up significant 1520*349cc55cSDimitry Andric // simplification opportunities, and both can propagate functions through 1521*349cc55cSDimitry Andric // function pointers. When this happens, we often have to resolve varargs 1522*349cc55cSDimitry Andric // calls, etc, so let instcombine do this. 1523*349cc55cSDimitry Andric FunctionPassManager PeepholeFPM; 1524*349cc55cSDimitry Andric if (Level == OptimizationLevel::O3) 1525*349cc55cSDimitry Andric PeepholeFPM.addPass(AggressiveInstCombinePass()); 1526*349cc55cSDimitry Andric PeepholeFPM.addPass(InstCombinePass()); 1527*349cc55cSDimitry Andric invokePeepholeEPCallbacks(PeepholeFPM, Level); 1528*349cc55cSDimitry Andric 1529*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM), 1530*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses)); 1531*349cc55cSDimitry Andric 1532*349cc55cSDimitry Andric // Note: historically, the PruneEH pass was run first to deduce nounwind and 1533*349cc55cSDimitry Andric // generally clean up exception handling overhead. It isn't clear this is 1534*349cc55cSDimitry Andric // valuable as the inliner doesn't currently care whether it is inlining an 1535*349cc55cSDimitry Andric // invoke or a call. 1536*349cc55cSDimitry Andric // Run the inliner now. 1537*349cc55cSDimitry Andric MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level))); 1538*349cc55cSDimitry Andric 1539*349cc55cSDimitry Andric // Optimize globals again after we ran the inliner. 1540*349cc55cSDimitry Andric MPM.addPass(GlobalOptPass()); 1541*349cc55cSDimitry Andric 1542*349cc55cSDimitry Andric // Garbage collect dead functions. 1543*349cc55cSDimitry Andric MPM.addPass(GlobalDCEPass()); 1544*349cc55cSDimitry Andric 1545*349cc55cSDimitry Andric // If we didn't decide to inline a function, check to see if we can 1546*349cc55cSDimitry Andric // transform it to pass arguments by value instead of by reference. 1547*349cc55cSDimitry Andric MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); 1548*349cc55cSDimitry Andric 1549*349cc55cSDimitry Andric FunctionPassManager FPM; 1550*349cc55cSDimitry Andric // The IPO Passes may leave cruft around. Clean up after them. 1551*349cc55cSDimitry Andric FPM.addPass(InstCombinePass()); 1552*349cc55cSDimitry Andric invokePeepholeEPCallbacks(FPM, Level); 1553*349cc55cSDimitry Andric 1554*349cc55cSDimitry Andric FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); 1555*349cc55cSDimitry Andric 1556*349cc55cSDimitry Andric // Do a post inline PGO instrumentation and use pass. This is a context 1557*349cc55cSDimitry Andric // sensitive PGO pass. 1558*349cc55cSDimitry Andric if (PGOOpt) { 1559*349cc55cSDimitry Andric if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 1560*349cc55cSDimitry Andric addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, 1561*349cc55cSDimitry Andric /* IsCS */ true, PGOOpt->CSProfileGenFile, 1562*349cc55cSDimitry Andric PGOOpt->ProfileRemappingFile); 1563*349cc55cSDimitry Andric else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 1564*349cc55cSDimitry Andric addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, 1565*349cc55cSDimitry Andric /* IsCS */ true, PGOOpt->ProfileFile, 1566*349cc55cSDimitry Andric PGOOpt->ProfileRemappingFile); 1567*349cc55cSDimitry Andric } 1568*349cc55cSDimitry Andric 1569*349cc55cSDimitry Andric // Break up allocas 1570*349cc55cSDimitry Andric FPM.addPass(SROAPass()); 1571*349cc55cSDimitry Andric 1572*349cc55cSDimitry Andric // LTO provides additional opportunities for tailcall elimination due to 1573*349cc55cSDimitry Andric // link-time inlining, and visibility of nocapture attribute. 1574*349cc55cSDimitry Andric FPM.addPass(TailCallElimPass()); 1575*349cc55cSDimitry Andric 1576*349cc55cSDimitry Andric // Run a few AA driver optimizations here and now to cleanup the code. 1577*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), 1578*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses)); 1579*349cc55cSDimitry Andric 1580*349cc55cSDimitry Andric MPM.addPass( 1581*349cc55cSDimitry Andric createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); 1582*349cc55cSDimitry Andric 1583*349cc55cSDimitry Andric // Require the GlobalsAA analysis for the module so we can query it within 1584*349cc55cSDimitry Andric // MainFPM. 1585*349cc55cSDimitry Andric MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); 1586*349cc55cSDimitry Andric // Invalidate AAManager so it can be recreated and pick up the newly available 1587*349cc55cSDimitry Andric // GlobalsAA. 1588*349cc55cSDimitry Andric MPM.addPass( 1589*349cc55cSDimitry Andric createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 1590*349cc55cSDimitry Andric 1591*349cc55cSDimitry Andric FunctionPassManager MainFPM; 1592*349cc55cSDimitry Andric MainFPM.addPass(createFunctionToLoopPassAdaptor( 1593*349cc55cSDimitry Andric LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), 1594*349cc55cSDimitry Andric /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); 1595*349cc55cSDimitry Andric 1596*349cc55cSDimitry Andric if (RunNewGVN) 1597*349cc55cSDimitry Andric MainFPM.addPass(NewGVNPass()); 1598*349cc55cSDimitry Andric else 1599*349cc55cSDimitry Andric MainFPM.addPass(GVNPass()); 1600*349cc55cSDimitry Andric 1601*349cc55cSDimitry Andric // Remove dead memcpy()'s. 1602*349cc55cSDimitry Andric MainFPM.addPass(MemCpyOptPass()); 1603*349cc55cSDimitry Andric 1604*349cc55cSDimitry Andric // Nuke dead stores. 1605*349cc55cSDimitry Andric MainFPM.addPass(DSEPass()); 1606*349cc55cSDimitry Andric MainFPM.addPass(MergedLoadStoreMotionPass()); 1607*349cc55cSDimitry Andric 1608*349cc55cSDimitry Andric // More loops are countable; try to optimize them. 1609*349cc55cSDimitry Andric if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) 1610*349cc55cSDimitry Andric MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass())); 1611*349cc55cSDimitry Andric 1612*349cc55cSDimitry Andric if (EnableConstraintElimination) 1613*349cc55cSDimitry Andric MainFPM.addPass(ConstraintEliminationPass()); 1614*349cc55cSDimitry Andric 1615*349cc55cSDimitry Andric LoopPassManager LPM; 1616*349cc55cSDimitry Andric LPM.addPass(IndVarSimplifyPass()); 1617*349cc55cSDimitry Andric LPM.addPass(LoopDeletionPass()); 1618*349cc55cSDimitry Andric // FIXME: Add loop interchange. 1619*349cc55cSDimitry Andric 1620*349cc55cSDimitry Andric // Unroll small loops and perform peeling. 1621*349cc55cSDimitry Andric LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 1622*349cc55cSDimitry Andric /* OnlyWhenForced= */ !PTO.LoopUnrolling, 1623*349cc55cSDimitry Andric PTO.ForgetAllSCEVInLoopUnroll)); 1624*349cc55cSDimitry Andric // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. 1625*349cc55cSDimitry Andric // *All* loop passes must preserve it, in order to be able to use it. 1626*349cc55cSDimitry Andric MainFPM.addPass(createFunctionToLoopPassAdaptor( 1627*349cc55cSDimitry Andric std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); 1628*349cc55cSDimitry Andric 1629*349cc55cSDimitry Andric MainFPM.addPass(LoopDistributePass()); 1630*349cc55cSDimitry Andric 1631*349cc55cSDimitry Andric addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); 1632*349cc55cSDimitry Andric 1633*349cc55cSDimitry Andric invokePeepholeEPCallbacks(MainFPM, Level); 1634*349cc55cSDimitry Andric MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); 1635*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), 1636*349cc55cSDimitry Andric PTO.EagerlyInvalidateAnalyses)); 1637*349cc55cSDimitry Andric 1638*349cc55cSDimitry Andric // Lower type metadata and the type.test intrinsic. This pass supports 1639*349cc55cSDimitry Andric // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs 1640*349cc55cSDimitry Andric // to be run at link time if CFI is enabled. This pass does nothing if 1641*349cc55cSDimitry Andric // CFI is disabled. 1642*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1643*349cc55cSDimitry Andric // Run a second time to clean up any type tests left behind by WPD for use 1644*349cc55cSDimitry Andric // in ICP (which is performed earlier than this in the regular LTO pipeline). 1645*349cc55cSDimitry Andric MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1646*349cc55cSDimitry Andric 1647*349cc55cSDimitry Andric // Enable splitting late in the FullLTO post-link pipeline. This is done in 1648*349cc55cSDimitry Andric // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). 1649*349cc55cSDimitry Andric if (EnableHotColdSplit) 1650*349cc55cSDimitry Andric MPM.addPass(HotColdSplittingPass()); 1651*349cc55cSDimitry Andric 1652*349cc55cSDimitry Andric // Add late LTO optimization passes. 1653*349cc55cSDimitry Andric // Delete basic blocks, which optimization passes may have killed. 1654*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor( 1655*349cc55cSDimitry Andric SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)))); 1656*349cc55cSDimitry Andric 1657*349cc55cSDimitry Andric // Drop bodies of available eternally objects to improve GlobalDCE. 1658*349cc55cSDimitry Andric MPM.addPass(EliminateAvailableExternallyPass()); 1659*349cc55cSDimitry Andric 1660*349cc55cSDimitry Andric // Now that we have optimized the program, discard unreachable functions. 1661*349cc55cSDimitry Andric MPM.addPass(GlobalDCEPass()); 1662*349cc55cSDimitry Andric 1663*349cc55cSDimitry Andric if (PTO.MergeFunctions) 1664*349cc55cSDimitry Andric MPM.addPass(MergeFunctionsPass()); 1665*349cc55cSDimitry Andric 1666*349cc55cSDimitry Andric // Emit annotation remarks. 1667*349cc55cSDimitry Andric addAnnotationRemarksPass(MPM); 1668*349cc55cSDimitry Andric 1669*349cc55cSDimitry Andric return MPM; 1670*349cc55cSDimitry Andric } 1671*349cc55cSDimitry Andric 1672*349cc55cSDimitry Andric ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, 1673*349cc55cSDimitry Andric bool LTOPreLink) { 1674*349cc55cSDimitry Andric assert(Level == OptimizationLevel::O0 && 1675*349cc55cSDimitry Andric "buildO0DefaultPipeline should only be used with O0"); 1676*349cc55cSDimitry Andric 1677*349cc55cSDimitry Andric ModulePassManager MPM; 1678*349cc55cSDimitry Andric 1679*349cc55cSDimitry Andric // Perform pseudo probe instrumentation in O0 mode. This is for the 1680*349cc55cSDimitry Andric // consistency between different build modes. For example, a LTO build can be 1681*349cc55cSDimitry Andric // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in 1682*349cc55cSDimitry Andric // the postlink will require pseudo probe instrumentation in the prelink. 1683*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->PseudoProbeForProfiling) 1684*349cc55cSDimitry Andric MPM.addPass(SampleProfileProbePass(TM)); 1685*349cc55cSDimitry Andric 1686*349cc55cSDimitry Andric if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || 1687*349cc55cSDimitry Andric PGOOpt->Action == PGOOptions::IRUse)) 1688*349cc55cSDimitry Andric addPGOInstrPassesForO0( 1689*349cc55cSDimitry Andric MPM, 1690*349cc55cSDimitry Andric /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), 1691*349cc55cSDimitry Andric /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); 1692*349cc55cSDimitry Andric 1693*349cc55cSDimitry Andric for (auto &C : PipelineStartEPCallbacks) 1694*349cc55cSDimitry Andric C(MPM, Level); 1695*349cc55cSDimitry Andric 1696*349cc55cSDimitry Andric if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1697*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1698*349cc55cSDimitry Andric 1699*349cc55cSDimitry Andric for (auto &C : PipelineEarlySimplificationEPCallbacks) 1700*349cc55cSDimitry Andric C(MPM, Level); 1701*349cc55cSDimitry Andric 1702*349cc55cSDimitry Andric // Build a minimal pipeline based on the semantics required by LLVM, 1703*349cc55cSDimitry Andric // which is just that always inlining occurs. Further, disable generating 1704*349cc55cSDimitry Andric // lifetime intrinsics to avoid enabling further optimizations during 1705*349cc55cSDimitry Andric // code generation. 1706*349cc55cSDimitry Andric MPM.addPass(AlwaysInlinerPass( 1707*349cc55cSDimitry Andric /*InsertLifetimeIntrinsics=*/false)); 1708*349cc55cSDimitry Andric 1709*349cc55cSDimitry Andric if (PTO.MergeFunctions) 1710*349cc55cSDimitry Andric MPM.addPass(MergeFunctionsPass()); 1711*349cc55cSDimitry Andric 1712*349cc55cSDimitry Andric if (EnableMatrix) 1713*349cc55cSDimitry Andric MPM.addPass( 1714*349cc55cSDimitry Andric createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true))); 1715*349cc55cSDimitry Andric 1716*349cc55cSDimitry Andric if (!CGSCCOptimizerLateEPCallbacks.empty()) { 1717*349cc55cSDimitry Andric CGSCCPassManager CGPM; 1718*349cc55cSDimitry Andric for (auto &C : CGSCCOptimizerLateEPCallbacks) 1719*349cc55cSDimitry Andric C(CGPM, Level); 1720*349cc55cSDimitry Andric if (!CGPM.isEmpty()) 1721*349cc55cSDimitry Andric MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 1722*349cc55cSDimitry Andric } 1723*349cc55cSDimitry Andric if (!LateLoopOptimizationsEPCallbacks.empty()) { 1724*349cc55cSDimitry Andric LoopPassManager LPM; 1725*349cc55cSDimitry Andric for (auto &C : LateLoopOptimizationsEPCallbacks) 1726*349cc55cSDimitry Andric C(LPM, Level); 1727*349cc55cSDimitry Andric if (!LPM.isEmpty()) { 1728*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor( 1729*349cc55cSDimitry Andric createFunctionToLoopPassAdaptor(std::move(LPM)))); 1730*349cc55cSDimitry Andric } 1731*349cc55cSDimitry Andric } 1732*349cc55cSDimitry Andric if (!LoopOptimizerEndEPCallbacks.empty()) { 1733*349cc55cSDimitry Andric LoopPassManager LPM; 1734*349cc55cSDimitry Andric for (auto &C : LoopOptimizerEndEPCallbacks) 1735*349cc55cSDimitry Andric C(LPM, Level); 1736*349cc55cSDimitry Andric if (!LPM.isEmpty()) { 1737*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor( 1738*349cc55cSDimitry Andric createFunctionToLoopPassAdaptor(std::move(LPM)))); 1739*349cc55cSDimitry Andric } 1740*349cc55cSDimitry Andric } 1741*349cc55cSDimitry Andric if (!ScalarOptimizerLateEPCallbacks.empty()) { 1742*349cc55cSDimitry Andric FunctionPassManager FPM; 1743*349cc55cSDimitry Andric for (auto &C : ScalarOptimizerLateEPCallbacks) 1744*349cc55cSDimitry Andric C(FPM, Level); 1745*349cc55cSDimitry Andric if (!FPM.isEmpty()) 1746*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 1747*349cc55cSDimitry Andric } 1748*349cc55cSDimitry Andric if (!VectorizerStartEPCallbacks.empty()) { 1749*349cc55cSDimitry Andric FunctionPassManager FPM; 1750*349cc55cSDimitry Andric for (auto &C : VectorizerStartEPCallbacks) 1751*349cc55cSDimitry Andric C(FPM, Level); 1752*349cc55cSDimitry Andric if (!FPM.isEmpty()) 1753*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 1754*349cc55cSDimitry Andric } 1755*349cc55cSDimitry Andric 1756*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass())); 1757*349cc55cSDimitry Andric CGSCCPassManager CGPM; 1758*349cc55cSDimitry Andric CGPM.addPass(CoroSplitPass()); 1759*349cc55cSDimitry Andric MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 1760*349cc55cSDimitry Andric MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); 1761*349cc55cSDimitry Andric 1762*349cc55cSDimitry Andric for (auto &C : OptimizerLastEPCallbacks) 1763*349cc55cSDimitry Andric C(MPM, Level); 1764*349cc55cSDimitry Andric 1765*349cc55cSDimitry Andric if (LTOPreLink) 1766*349cc55cSDimitry Andric addRequiredLTOPreLinkPasses(MPM); 1767*349cc55cSDimitry Andric 1768*349cc55cSDimitry Andric return MPM; 1769*349cc55cSDimitry Andric } 1770*349cc55cSDimitry Andric 1771*349cc55cSDimitry Andric AAManager PassBuilder::buildDefaultAAPipeline() { 1772*349cc55cSDimitry Andric AAManager AA; 1773*349cc55cSDimitry Andric 1774*349cc55cSDimitry Andric // The order in which these are registered determines their priority when 1775*349cc55cSDimitry Andric // being queried. 1776*349cc55cSDimitry Andric 1777*349cc55cSDimitry Andric // First we register the basic alias analysis that provides the majority of 1778*349cc55cSDimitry Andric // per-function local AA logic. This is a stateless, on-demand local set of 1779*349cc55cSDimitry Andric // AA techniques. 1780*349cc55cSDimitry Andric AA.registerFunctionAnalysis<BasicAA>(); 1781*349cc55cSDimitry Andric 1782*349cc55cSDimitry Andric // Next we query fast, specialized alias analyses that wrap IR-embedded 1783*349cc55cSDimitry Andric // information about aliasing. 1784*349cc55cSDimitry Andric AA.registerFunctionAnalysis<ScopedNoAliasAA>(); 1785*349cc55cSDimitry Andric AA.registerFunctionAnalysis<TypeBasedAA>(); 1786*349cc55cSDimitry Andric 1787*349cc55cSDimitry Andric // Add support for querying global aliasing information when available. 1788*349cc55cSDimitry Andric // Because the `AAManager` is a function analysis and `GlobalsAA` is a module 1789*349cc55cSDimitry Andric // analysis, all that the `AAManager` can do is query for any *cached* 1790*349cc55cSDimitry Andric // results from `GlobalsAA` through a readonly proxy. 1791*349cc55cSDimitry Andric AA.registerModuleAnalysis<GlobalsAA>(); 1792*349cc55cSDimitry Andric 1793*349cc55cSDimitry Andric // Add target-specific alias analyses. 1794*349cc55cSDimitry Andric if (TM) 1795*349cc55cSDimitry Andric TM->registerDefaultAliasAnalyses(AA); 1796*349cc55cSDimitry Andric 1797*349cc55cSDimitry Andric return AA; 1798*349cc55cSDimitry Andric } 1799