1c3ddc13dSArthur Eubanks //===- Construction of pass pipelines -------------------------------------===// 2c3ddc13dSArthur Eubanks // 3c3ddc13dSArthur Eubanks // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4c3ddc13dSArthur Eubanks // See https://llvm.org/LICENSE.txt for license information. 5c3ddc13dSArthur Eubanks // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6c3ddc13dSArthur Eubanks // 7c3ddc13dSArthur Eubanks //===----------------------------------------------------------------------===// 8c3ddc13dSArthur Eubanks /// \file 9c3ddc13dSArthur Eubanks /// 10c3ddc13dSArthur Eubanks /// This file provides the implementation of the PassBuilder based on our 11c3ddc13dSArthur Eubanks /// static pass registry as well as related functionality. It also provides 12c3ddc13dSArthur Eubanks /// helpers to aid in analyzing, debugging, and testing passes and pass 13c3ddc13dSArthur Eubanks /// pipelines. 14c3ddc13dSArthur Eubanks /// 15c3ddc13dSArthur Eubanks //===----------------------------------------------------------------------===// 16c3ddc13dSArthur Eubanks 1725af6507SArthur Eubanks #include "llvm/ADT/Statistic.h" 18c3ddc13dSArthur Eubanks #include "llvm/Analysis/AliasAnalysis.h" 19c3ddc13dSArthur Eubanks #include "llvm/Analysis/BasicAliasAnalysis.h" 20c3ddc13dSArthur Eubanks #include "llvm/Analysis/CGSCCPassManager.h" 21aca01bffSMircea Trofin #include "llvm/Analysis/CtxProfAnalysis.h" 22c3ddc13dSArthur Eubanks #include "llvm/Analysis/GlobalsModRef.h" 23c3ddc13dSArthur Eubanks #include "llvm/Analysis/InlineAdvisor.h" 24c3ddc13dSArthur Eubanks #include "llvm/Analysis/ProfileSummaryInfo.h" 25c3ddc13dSArthur Eubanks #include "llvm/Analysis/ScopedNoAliasAA.h" 26c3ddc13dSArthur Eubanks #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 27d23c5c2dSKyungwoo Lee #include "llvm/CodeGen/GlobalMergeFunctions.h" 28c3ddc13dSArthur Eubanks #include "llvm/IR/PassManager.h" 293b226180SMircea Trofin #include "llvm/Pass.h" 30c3ddc13dSArthur Eubanks #include "llvm/Passes/OptimizationLevel.h" 31c3ddc13dSArthur Eubanks #include "llvm/Passes/PassBuilder.h" 32c3ddc13dSArthur Eubanks #include "llvm/Support/CommandLine.h" 33c3ddc13dSArthur Eubanks #include "llvm/Support/ErrorHandling.h" 34c3ddc13dSArthur Eubanks #include "llvm/Support/PGOOptions.h" 35516e3017SSteven Wu #include "llvm/Support/VirtualFileSystem.h" 36c3ddc13dSArthur Eubanks #include "llvm/Target/TargetMachine.h" 37c3ddc13dSArthur Eubanks #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" 38a416267aSYuxuan Chen #include "llvm/Transforms/Coroutines/CoroAnnotationElide.h" 39c3ddc13dSArthur Eubanks #include "llvm/Transforms/Coroutines/CoroCleanup.h" 409bd66b31SArthur Eubanks #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" 41c3ddc13dSArthur Eubanks #include "llvm/Transforms/Coroutines/CoroEarly.h" 42c3ddc13dSArthur Eubanks #include "llvm/Transforms/Coroutines/CoroElide.h" 43c3ddc13dSArthur Eubanks #include "llvm/Transforms/Coroutines/CoroSplit.h" 440ce6255aSAlex Voicu #include "llvm/Transforms/HipStdPar/HipStdPar.h" 45c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/AlwaysInliner.h" 46c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/Annotation2Metadata.h" 47c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/ArgumentPromotion.h" 48c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/Attributor.h" 49c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/CalledValuePropagation.h" 50c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/ConstantMerge.h" 51c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/CrossDSOCFI.h" 52c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/DeadArgumentElimination.h" 53c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/ElimAvailExtern.h" 5475a17970SPaul Kirth #include "llvm/Transforms/IPO/EmbedBitcodePass.h" 558758091aSJoseph Huber #include "llvm/Transforms/IPO/ExpandVariadics.h" 56c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" 57c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/FunctionAttrs.h" 58c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/GlobalDCE.h" 59c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/GlobalOpt.h" 60c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/GlobalSplit.h" 61c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/HotColdSplitting.h" 62c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/IROutliner.h" 63c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/InferFunctionAttrs.h" 64c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/Inliner.h" 65c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/LowerTypeTests.h" 66700cd990STeresa Johnson #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 67c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/MergeFunctions.h" 686cad45d5SLiqiang Tao #include "llvm/Transforms/IPO/ModuleInliner.h" 69c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/OpenMPOpt.h" 70c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/PartialInlining.h" 71c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/SCCP.h" 72c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/SampleProfile.h" 73c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/SampleProfileProbe.h" 74c3ddc13dSArthur Eubanks #include "llvm/Transforms/IPO/WholeProgramDevirt.h" 75c3ddc13dSArthur Eubanks #include "llvm/Transforms/InstCombine/InstCombine.h" 76c3ddc13dSArthur Eubanks #include "llvm/Transforms/Instrumentation/CGProfile.h" 77c3ddc13dSArthur Eubanks #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 78c3ddc13dSArthur Eubanks #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" 79c3ddc13dSArthur Eubanks #include "llvm/Transforms/Instrumentation/InstrProfiling.h" 80c3ddc13dSArthur Eubanks #include "llvm/Transforms/Instrumentation/MemProfiler.h" 81775c5070SMircea Trofin #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" 8296568f35SMircea Trofin #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" 8393cdd1b5SArthur Eubanks #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" 84c3ddc13dSArthur Eubanks #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 85c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/ADCE.h" 86c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" 87c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/AnnotationRemarks.h" 88c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/BDCE.h" 89c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/CallSiteSplitting.h" 90c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/ConstraintElimination.h" 91c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" 92c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 93c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 94c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/DivRemPairs.h" 95c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/EarlyCSE.h" 96c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/Float2Int.h" 97c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/GVN.h" 98c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/IndVarSimplify.h" 990f152a55SDhruv Chawla #include "llvm/Transforms/Scalar/InferAlignment.h" 100c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/InstSimplifyPass.h" 101d26b43ffSAlexander Shaposhnikov #include "llvm/Transforms/Scalar/JumpTableToSwitch.h" 102c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/JumpThreading.h" 103c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LICM.h" 104c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopDeletion.h" 105c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopDistribute.h" 106c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopFlatten.h" 107c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" 108c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopInstSimplify.h" 109c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopInterchange.h" 110c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopLoadElimination.h" 111c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopPassManager.h" 112c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopRotation.h" 113c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" 114c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopSink.h" 115c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" 116c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LoopUnrollPass.h" 117f3c417f3Slcvon007 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" 118c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" 119c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" 120c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" 121c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" 122c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" 123c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/NewGVN.h" 124c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/Reassociate.h" 125c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/SCCP.h" 126c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/SROA.h" 127c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 128c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/SimplifyCFG.h" 129c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/SpeculativeExecution.h" 130c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/TailRecursionElimination.h" 131c3ddc13dSArthur Eubanks #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" 132c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/AddDiscriminators.h" 133c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 134c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/CanonicalizeAliases.h" 1354ce34bb2SArthur Eubanks #include "llvm/Transforms/Utils/CountVisits.h" 136cab81dd0SEgor Pasko #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" 1379e662066SFlorian Hahn #include "llvm/Transforms/Utils/ExtraPassManager.h" 138c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/InjectTLIMappings.h" 139c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" 140c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/Mem2Reg.h" 141afa13ba1Sserge-sans-paille #include "llvm/Transforms/Utils/MoveAutoInit.h" 142c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/NameAnonGlobals.h" 143c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/RelLookupTableConverter.h" 144c3ddc13dSArthur Eubanks #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" 145c3ddc13dSArthur Eubanks #include "llvm/Transforms/Vectorize/LoopVectorize.h" 146c3ddc13dSArthur Eubanks #include "llvm/Transforms/Vectorize/SLPVectorizer.h" 147c3ddc13dSArthur Eubanks #include "llvm/Transforms/Vectorize/VectorCombine.h" 148c3ddc13dSArthur Eubanks 149c3ddc13dSArthur Eubanks using namespace llvm; 150c3ddc13dSArthur Eubanks 151c3ddc13dSArthur Eubanks static cl::opt<InliningAdvisorMode> UseInlineAdvisor( 152c3ddc13dSArthur Eubanks "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, 153c3ddc13dSArthur Eubanks cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), 154c3ddc13dSArthur Eubanks cl::values(clEnumValN(InliningAdvisorMode::Default, "default", 155cbcf123aSArthur Eubanks "Heuristics-based inliner version"), 156c3ddc13dSArthur Eubanks clEnumValN(InliningAdvisorMode::Development, "development", 157cbcf123aSArthur Eubanks "Use development mode (runtime-loadable model)"), 158c3ddc13dSArthur Eubanks clEnumValN(InliningAdvisorMode::Release, "release", 159cbcf123aSArthur Eubanks "Use release mode (AOT-compiled model)"))); 160c3ddc13dSArthur Eubanks 161c3ddc13dSArthur Eubanks /// Flag to enable inline deferral during PGO. 162c3ddc13dSArthur Eubanks static cl::opt<bool> 163c3ddc13dSArthur Eubanks EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), 164c3ddc13dSArthur Eubanks cl::Hidden, 165c3ddc13dSArthur Eubanks cl::desc("Enable inline deferral during PGO")); 166c3ddc13dSArthur Eubanks 1676cad45d5SLiqiang Tao static cl::opt<bool> EnableModuleInliner("enable-module-inliner", 1686cad45d5SLiqiang Tao cl::init(false), cl::Hidden, 1696cad45d5SLiqiang Tao cl::desc("Enable module inliner")); 1706cad45d5SLiqiang Tao 171c3ddc13dSArthur Eubanks static cl::opt<bool> PerformMandatoryInliningsFirst( 1721a2e77cfSAmara Emerson "mandatory-inlining-first", cl::init(false), cl::Hidden, 173c3ddc13dSArthur Eubanks cl::desc("Perform mandatory inlinings module-wide, before performing " 174cbcf123aSArthur Eubanks "inlining")); 175c3ddc13dSArthur Eubanks 1767175886aSArthur Eubanks static cl::opt<bool> EnableEagerlyInvalidateAnalyses( 17719867de9SArthur Eubanks "eagerly-invalidate-analyses", cl::init(true), cl::Hidden, 1787175886aSArthur Eubanks cl::desc("Eagerly invalidate more analyses in default pipelines")); 1797175886aSArthur Eubanks 1805b94037aSNikita Popov static cl::opt<bool> EnableMergeFunctions( 1815b94037aSNikita Popov "enable-merge-functions", cl::init(false), cl::Hidden, 1825b94037aSNikita Popov cl::desc("Enable function merging as part of the optimization pipeline")); 1835b94037aSNikita Popov 1840f946a50SEllis Hoag static cl::opt<bool> EnablePostPGOLoopRotation( 1850f946a50SEllis Hoag "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, 1860f946a50SEllis Hoag cl::desc("Run the loop rotation transformation after PGO instrumentation")); 1870f946a50SEllis Hoag 188d953d017SNuno Lopes static cl::opt<bool> EnableGlobalAnalyses( 189d953d017SNuno Lopes "enable-global-analyses", cl::init(true), cl::Hidden, 190d953d017SNuno Lopes cl::desc("Enable inter-procedural analyses")); 191d953d017SNuno Lopes 19267efbd0bSRyan Mansfield static cl::opt<bool> RunPartialInlining("enable-partial-inlining", 19367efbd0bSRyan Mansfield cl::init(false), cl::Hidden, 19467efbd0bSRyan Mansfield cl::desc("Run Partial inlining pass")); 195cbcf123aSArthur Eubanks 196cbcf123aSArthur Eubanks static cl::opt<bool> ExtraVectorizerPasses( 197cbcf123aSArthur Eubanks "extra-vectorizer-passes", cl::init(false), cl::Hidden, 198cbcf123aSArthur Eubanks cl::desc("Run cleanup optimization passes after vectorization")); 199cbcf123aSArthur Eubanks 200cbcf123aSArthur Eubanks static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, 201cbcf123aSArthur Eubanks cl::desc("Run the NewGVN pass")); 202cbcf123aSArthur Eubanks 203cbcf123aSArthur Eubanks static cl::opt<bool> EnableLoopInterchange( 204cbcf123aSArthur Eubanks "enable-loopinterchange", cl::init(false), cl::Hidden, 205cbcf123aSArthur Eubanks cl::desc("Enable the experimental LoopInterchange Pass")); 206cbcf123aSArthur Eubanks 207cbcf123aSArthur Eubanks static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", 208cbcf123aSArthur Eubanks cl::init(false), cl::Hidden, 209cbcf123aSArthur Eubanks cl::desc("Enable Unroll And Jam Pass")); 210cbcf123aSArthur Eubanks 21182501802SSjoerd Meijer static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false), 212cbcf123aSArthur Eubanks cl::Hidden, 213cbcf123aSArthur Eubanks cl::desc("Enable the LoopFlatten Pass")); 214cbcf123aSArthur Eubanks 2152fef6853SPaul Kirth // Experimentally allow loop header duplication. This should allow for better 2162fef6853SPaul Kirth // optimization at Oz, since loop-idiom recognition can then recognize things 2172fef6853SPaul Kirth // like memcpy. If this ends up being useful for many targets, we should drop 2182fef6853SPaul Kirth // this flag and make a code generation option that can be controlled 2192fef6853SPaul Kirth // independent of the opt level and exposed through the frontend. 2202fef6853SPaul Kirth static cl::opt<bool> EnableLoopHeaderDuplication( 2212fef6853SPaul Kirth "enable-loop-header-duplication", cl::init(false), cl::Hidden, 2222fef6853SPaul Kirth cl::desc("Enable loop header duplication at any optimization level")); 2232fef6853SPaul Kirth 224cbcf123aSArthur Eubanks static cl::opt<bool> 225cbcf123aSArthur Eubanks EnableDFAJumpThreading("enable-dfa-jump-thread", 226cbcf123aSArthur Eubanks cl::desc("Enable DFA jump threading"), 227cbcf123aSArthur Eubanks cl::init(false), cl::Hidden); 228cbcf123aSArthur Eubanks 229cbcf123aSArthur Eubanks static cl::opt<bool> 230cbcf123aSArthur Eubanks EnableHotColdSplit("hot-cold-split", 231cbcf123aSArthur Eubanks cl::desc("Enable hot-cold splitting pass")); 232cbcf123aSArthur Eubanks 233cbcf123aSArthur Eubanks static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), 234cbcf123aSArthur Eubanks cl::Hidden, 235cbcf123aSArthur Eubanks cl::desc("Enable ir outliner pass")); 236cbcf123aSArthur Eubanks 237cbcf123aSArthur Eubanks static cl::opt<bool> 238cbcf123aSArthur Eubanks DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, 239cbcf123aSArthur Eubanks cl::desc("Disable pre-instrumentation inliner")); 240cbcf123aSArthur Eubanks 241cbcf123aSArthur Eubanks static cl::opt<int> PreInlineThreshold( 242cbcf123aSArthur Eubanks "preinline-threshold", cl::Hidden, cl::init(75), 243cbcf123aSArthur Eubanks cl::desc("Control the amount of inlining in pre-instrumentation inliner " 244cbcf123aSArthur Eubanks "(default = 75)")); 245cbcf123aSArthur Eubanks 246cbcf123aSArthur Eubanks static cl::opt<bool> 247cbcf123aSArthur Eubanks EnableGVNHoist("enable-gvn-hoist", 248cbcf123aSArthur Eubanks cl::desc("Enable the GVN hoisting pass (default = off)")); 249cbcf123aSArthur Eubanks 250cbcf123aSArthur Eubanks static cl::opt<bool> 251cbcf123aSArthur Eubanks EnableGVNSink("enable-gvn-sink", 252cbcf123aSArthur Eubanks cl::desc("Enable the GVN sinking pass (default = off)")); 253cbcf123aSArthur Eubanks 254d26b43ffSAlexander Shaposhnikov static cl::opt<bool> EnableJumpTableToSwitch( 255d26b43ffSAlexander Shaposhnikov "enable-jump-table-to-switch", 2569c5ca6b0SDavid Spickett cl::desc("Enable JumpTableToSwitch pass (default = off)")); 257d26b43ffSAlexander Shaposhnikov 258cbcf123aSArthur Eubanks // This option is used in simplifying testing SampleFDO optimizations for 259cbcf123aSArthur Eubanks // profile loading. 260cbcf123aSArthur Eubanks static cl::opt<bool> 261cbcf123aSArthur Eubanks EnableCHR("enable-chr", cl::init(true), cl::Hidden, 262cbcf123aSArthur Eubanks cl::desc("Enable control height reduction optimization (CHR)")); 263cbcf123aSArthur Eubanks 264cbcf123aSArthur Eubanks static cl::opt<bool> FlattenedProfileUsed( 265cbcf123aSArthur Eubanks "flattened-profile-used", cl::init(false), cl::Hidden, 266cbcf123aSArthur Eubanks cl::desc("Indicate the sample profile being used is flattened, i.e., " 26767efbd0bSRyan Mansfield "no inline hierarchy exists in the profile")); 268cbcf123aSArthur Eubanks 269cbcf123aSArthur Eubanks static cl::opt<bool> EnableOrderFileInstrumentation( 270cbcf123aSArthur Eubanks "enable-order-file-instrumentation", cl::init(false), cl::Hidden, 271cbcf123aSArthur Eubanks cl::desc("Enable order file instrumentation (default = off)")); 272cbcf123aSArthur Eubanks 273cbcf123aSArthur Eubanks static cl::opt<bool> 274cbcf123aSArthur Eubanks EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, 275cbcf123aSArthur Eubanks cl::desc("Enable lowering of the matrix intrinsics")); 276cbcf123aSArthur Eubanks 277cbcf123aSArthur Eubanks static cl::opt<bool> EnableConstraintElimination( 2788028263cSFlorian Hahn "enable-constraint-elimination", cl::init(true), cl::Hidden, 279cbcf123aSArthur Eubanks cl::desc( 280cbcf123aSArthur Eubanks "Enable pass to eliminate conditions based on linear constraints")); 281cbcf123aSArthur Eubanks 282cbcf123aSArthur Eubanks static cl::opt<AttributorRunOption> AttributorRun( 283cbcf123aSArthur Eubanks "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), 284cbcf123aSArthur Eubanks cl::desc("Enable the attributor inter-procedural deduction pass"), 285cbcf123aSArthur Eubanks cl::values(clEnumValN(AttributorRunOption::ALL, "all", 286cbcf123aSArthur Eubanks "enable all attributor runs"), 287cbcf123aSArthur Eubanks clEnumValN(AttributorRunOption::MODULE, "module", 288cbcf123aSArthur Eubanks "enable module-wide attributor runs"), 289cbcf123aSArthur Eubanks clEnumValN(AttributorRunOption::CGSCC, "cgscc", 290cbcf123aSArthur Eubanks "enable call graph SCC attributor runs"), 291cbcf123aSArthur Eubanks clEnumValN(AttributorRunOption::NONE, "none", 292cbcf123aSArthur Eubanks "disable attributor runs"))); 293cbcf123aSArthur Eubanks 294b1ca2a95Sxur-llvm static cl::opt<bool> EnableSampledInstr( 295b1ca2a95Sxur-llvm "enable-sampled-instrumentation", cl::init(false), cl::Hidden, 296b1ca2a95Sxur-llvm cl::desc("Enable profile instrumentation sampling (default = off)")); 297f3c417f3Slcvon007 static cl::opt<bool> UseLoopVersioningLICM( 298f3c417f3Slcvon007 "enable-loop-versioning-licm", cl::init(false), cl::Hidden, 299f3c417f3Slcvon007 cl::desc("Enable the experimental Loop Versioning LICM pass")); 300f3c417f3Slcvon007 301bef3b54eSLei Wang static cl::opt<std::string> InstrumentColdFuncOnlyPath( 302bef3b54eSLei Wang "instrument-cold-function-only-path", cl::init(""), 303bef3b54eSLei Wang cl::desc("File path for cold function only instrumentation(requires use " 304bef3b54eSLei Wang "with --pgo-instrument-cold-function-only)"), 305bef3b54eSLei Wang cl::Hidden); 306bef3b54eSLei Wang 3074a2bf059SMircea Trofin extern cl::opt<std::string> UseCtxProfile; 308bef3b54eSLei Wang extern cl::opt<bool> PGOInstrumentColdFunctionOnly; 309ba4da5a0SMircea Trofin 3102d854dd3SFangrui Song namespace llvm { 311e40cabfeSlifengxiang1025 extern cl::opt<bool> EnableMemProfContextDisambiguation; 3122d854dd3SFangrui Song } // namespace llvm 3130f152a55SDhruv Chawla 314c3ddc13dSArthur Eubanks PipelineTuningOptions::PipelineTuningOptions() { 315c3ddc13dSArthur Eubanks LoopInterleaving = true; 316c3ddc13dSArthur Eubanks LoopVectorization = true; 317c3ddc13dSArthur Eubanks SLPVectorization = false; 318c3ddc13dSArthur Eubanks LoopUnrolling = true; 319c3ddc13dSArthur Eubanks ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; 320c3ddc13dSArthur Eubanks LicmMssaOptCap = SetLicmMssaOptCap; 321c3ddc13dSArthur Eubanks LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; 322c3ddc13dSArthur Eubanks CallGraphProfile = true; 323a1ca3af3SMatthew Voss UnifiedLTO = false; 3245b94037aSNikita Popov MergeFunctions = EnableMergeFunctions; 3254fa32807SArthur Eubanks InlinerThreshold = -1; 3267175886aSArthur Eubanks EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; 327c3ddc13dSArthur Eubanks } 328c3ddc13dSArthur Eubanks 329c3ddc13dSArthur Eubanks namespace llvm { 330c3ddc13dSArthur Eubanks extern cl::opt<unsigned> MaxDevirtIterations; 331c3ddc13dSArthur Eubanks } // namespace llvm 332c3ddc13dSArthur Eubanks 333c3ddc13dSArthur Eubanks void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, 334c3ddc13dSArthur Eubanks OptimizationLevel Level) { 335c3ddc13dSArthur Eubanks for (auto &C : PeepholeEPCallbacks) 336c3ddc13dSArthur Eubanks C(FPM, Level); 337c3ddc13dSArthur Eubanks } 33833817296SPrem Chintalapudi void PassBuilder::invokeLateLoopOptimizationsEPCallbacks( 33933817296SPrem Chintalapudi LoopPassManager &LPM, OptimizationLevel Level) { 34033817296SPrem Chintalapudi for (auto &C : LateLoopOptimizationsEPCallbacks) 34133817296SPrem Chintalapudi C(LPM, Level); 34233817296SPrem Chintalapudi } 34333817296SPrem Chintalapudi void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, 34433817296SPrem Chintalapudi OptimizationLevel Level) { 34533817296SPrem Chintalapudi for (auto &C : LoopOptimizerEndEPCallbacks) 34633817296SPrem Chintalapudi C(LPM, Level); 34733817296SPrem Chintalapudi } 34833817296SPrem Chintalapudi void PassBuilder::invokeScalarOptimizerLateEPCallbacks( 34933817296SPrem Chintalapudi FunctionPassManager &FPM, OptimizationLevel Level) { 35033817296SPrem Chintalapudi for (auto &C : ScalarOptimizerLateEPCallbacks) 35133817296SPrem Chintalapudi C(FPM, Level); 35233817296SPrem Chintalapudi } 35333817296SPrem Chintalapudi void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, 35433817296SPrem Chintalapudi OptimizationLevel Level) { 35533817296SPrem Chintalapudi for (auto &C : CGSCCOptimizerLateEPCallbacks) 35633817296SPrem Chintalapudi C(CGPM, Level); 35733817296SPrem Chintalapudi } 35833817296SPrem Chintalapudi void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, 35933817296SPrem Chintalapudi OptimizationLevel Level) { 36033817296SPrem Chintalapudi for (auto &C : VectorizerStartEPCallbacks) 36133817296SPrem Chintalapudi C(FPM, Level); 36233817296SPrem Chintalapudi } 363*d3161defSAxel Sorenson void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, 364*d3161defSAxel Sorenson OptimizationLevel Level) { 365*d3161defSAxel Sorenson for (auto &C : VectorizerEndEPCallbacks) 366*d3161defSAxel Sorenson C(FPM, Level); 367*d3161defSAxel Sorenson } 36833817296SPrem Chintalapudi void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, 369390300d9SShilei Tian OptimizationLevel Level, 370390300d9SShilei Tian ThinOrFullLTOPhase Phase) { 37133817296SPrem Chintalapudi for (auto &C : OptimizerEarlyEPCallbacks) 372390300d9SShilei Tian C(MPM, Level, Phase); 37333817296SPrem Chintalapudi } 37433817296SPrem Chintalapudi void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, 375390300d9SShilei Tian OptimizationLevel Level, 376390300d9SShilei Tian ThinOrFullLTOPhase Phase) { 37733817296SPrem Chintalapudi for (auto &C : OptimizerLastEPCallbacks) 378390300d9SShilei Tian C(MPM, Level, Phase); 37933817296SPrem Chintalapudi } 38033817296SPrem Chintalapudi void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( 38133817296SPrem Chintalapudi ModulePassManager &MPM, OptimizationLevel Level) { 38233817296SPrem Chintalapudi for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) 38333817296SPrem Chintalapudi C(MPM, Level); 38433817296SPrem Chintalapudi } 38533817296SPrem Chintalapudi void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks( 38633817296SPrem Chintalapudi ModulePassManager &MPM, OptimizationLevel Level) { 38733817296SPrem Chintalapudi for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) 38833817296SPrem Chintalapudi C(MPM, Level); 38933817296SPrem Chintalapudi } 39033817296SPrem Chintalapudi void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM, 39133817296SPrem Chintalapudi OptimizationLevel Level) { 39233817296SPrem Chintalapudi for (auto &C : PipelineStartEPCallbacks) 39333817296SPrem Chintalapudi C(MPM, Level); 39433817296SPrem Chintalapudi } 39533817296SPrem Chintalapudi void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( 396dc45ff1dSShilei Tian ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) { 39733817296SPrem Chintalapudi for (auto &C : PipelineEarlySimplificationEPCallbacks) 398dc45ff1dSShilei Tian C(MPM, Level, Phase); 39933817296SPrem Chintalapudi } 400c3ddc13dSArthur Eubanks 401c3ddc13dSArthur Eubanks // Helper to add AnnotationRemarksPass. 402c3ddc13dSArthur Eubanks static void addAnnotationRemarksPass(ModulePassManager &MPM) { 40318da6810SArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); 404c3ddc13dSArthur Eubanks } 405c3ddc13dSArthur Eubanks 406c3ddc13dSArthur Eubanks // Helper to check if the current compilation phase is preparing for LTO 407c3ddc13dSArthur Eubanks static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { 408c3ddc13dSArthur Eubanks return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || 409c3ddc13dSArthur Eubanks Phase == ThinOrFullLTOPhase::FullLTOPreLink; 410c3ddc13dSArthur Eubanks } 411c3ddc13dSArthur Eubanks 412c3ddc13dSArthur Eubanks // TODO: Investigate the cost/benefit of tail call elimination on debugging. 413c3ddc13dSArthur Eubanks FunctionPassManager 414c3ddc13dSArthur Eubanks PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, 415c3ddc13dSArthur Eubanks ThinOrFullLTOPhase Phase) { 416c3ddc13dSArthur Eubanks 417c3ddc13dSArthur Eubanks FunctionPassManager FPM; 418c3ddc13dSArthur Eubanks 41925af6507SArthur Eubanks if (AreStatisticsEnabled()) 4204ce34bb2SArthur Eubanks FPM.addPass(CountVisitsPass()); 4214ce34bb2SArthur Eubanks 422c3ddc13dSArthur Eubanks // Form SSA out of local memory accesses after breaking apart aggregates into 423c3ddc13dSArthur Eubanks // scalars. 4244f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 425c3ddc13dSArthur Eubanks 426c3ddc13dSArthur Eubanks // Catch trivial redundancies 427c3ddc13dSArthur Eubanks FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 428c3ddc13dSArthur Eubanks 429c3ddc13dSArthur Eubanks // Hoisting of scalars and load expressions. 430371fcb72SRoman Lebedev FPM.addPass( 431371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 432c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 433c3ddc13dSArthur Eubanks 434c3ddc13dSArthur Eubanks FPM.addPass(LibCallsShrinkWrapPass()); 435c3ddc13dSArthur Eubanks 436c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 437c3ddc13dSArthur Eubanks 438371fcb72SRoman Lebedev FPM.addPass( 439371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 440c3ddc13dSArthur Eubanks 441c3ddc13dSArthur Eubanks // Form canonically associated expression trees, and simplify the trees using 442c3ddc13dSArthur Eubanks // basic mathematical properties. For example, this will form (nearly) 443c3ddc13dSArthur Eubanks // minimal multiplication trees. 444c3ddc13dSArthur Eubanks FPM.addPass(ReassociatePass()); 445c3ddc13dSArthur Eubanks 446c3ddc13dSArthur Eubanks // Add the primary loop simplification pipeline. 447c3ddc13dSArthur Eubanks // FIXME: Currently this is split into two loop pass pipelines because we run 448c3ddc13dSArthur Eubanks // some function passes in between them. These can and should be removed 449c3ddc13dSArthur Eubanks // and/or replaced by scheduling the loop pass equivalents in the correct 450c3ddc13dSArthur Eubanks // positions. But those equivalent passes aren't powerful enough yet. 451c3ddc13dSArthur Eubanks // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 452c3ddc13dSArthur Eubanks // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 453c3ddc13dSArthur Eubanks // fully replace `SimplifyCFGPass`, and the closest to the other we have is 454c3ddc13dSArthur Eubanks // `LoopInstSimplify`. 455c3ddc13dSArthur Eubanks LoopPassManager LPM1, LPM2; 456c3ddc13dSArthur Eubanks 457c3ddc13dSArthur Eubanks // Simplify the loop body. We do this initially to clean up after other loop 458c3ddc13dSArthur Eubanks // passes run, either when iterating on a loop or on inner loops with 459c3ddc13dSArthur Eubanks // implications on the outer loop. 460c3ddc13dSArthur Eubanks LPM1.addPass(LoopInstSimplifyPass()); 461c3ddc13dSArthur Eubanks LPM1.addPass(LoopSimplifyCFGPass()); 462c3ddc13dSArthur Eubanks 463c3ddc13dSArthur Eubanks // Try to remove as much code from the loop header as possible, 464d9da6a53SWilliam S. Moses // to reduce amount of IR that will have to be duplicated. However, 465d9da6a53SWilliam S. Moses // do not perform speculative hoisting the first time as LICM 466d9da6a53SWilliam S. Moses // will destroy metadata that may not need to be destroyed if run 467d9da6a53SWilliam S. Moses // after loop rotation. 468c3ddc13dSArthur Eubanks // TODO: Investigate promotion cap for O1. 469d9da6a53SWilliam S. Moses LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 470d9da6a53SWilliam S. Moses /*AllowSpeculation=*/false)); 471c3ddc13dSArthur Eubanks 472c3ddc13dSArthur Eubanks LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, 473c3ddc13dSArthur Eubanks isLTOPreLink(Phase))); 474c3ddc13dSArthur Eubanks // TODO: Investigate promotion cap for O1. 475d9da6a53SWilliam S. Moses LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 476d9da6a53SWilliam S. Moses /*AllowSpeculation=*/true)); 477c3ddc13dSArthur Eubanks LPM1.addPass(SimpleLoopUnswitchPass()); 478f269ec23SSjoerd Meijer if (EnableLoopFlatten) 479f269ec23SSjoerd Meijer LPM1.addPass(LoopFlattenPass()); 480c3ddc13dSArthur Eubanks 481c3ddc13dSArthur Eubanks LPM2.addPass(LoopIdiomRecognizePass()); 482c3ddc13dSArthur Eubanks LPM2.addPass(IndVarSimplifyPass()); 483c3ddc13dSArthur Eubanks 48433817296SPrem Chintalapudi invokeLateLoopOptimizationsEPCallbacks(LPM2, Level); 485c3ddc13dSArthur Eubanks 486c3ddc13dSArthur Eubanks LPM2.addPass(LoopDeletionPass()); 487c3ddc13dSArthur Eubanks 488c3ddc13dSArthur Eubanks if (EnableLoopInterchange) 489c3ddc13dSArthur Eubanks LPM2.addPass(LoopInterchangePass()); 490c3ddc13dSArthur Eubanks 491c3ddc13dSArthur Eubanks // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 492c3ddc13dSArthur Eubanks // because it changes IR to makes profile annotation in back compile 493c3ddc13dSArthur Eubanks // inaccurate. The normal unroller doesn't pay attention to forced full unroll 494c3ddc13dSArthur Eubanks // attributes so we need to make sure and allow the full unroll pass to pay 495c3ddc13dSArthur Eubanks // attention to it. 496c3ddc13dSArthur Eubanks if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 497c3ddc13dSArthur Eubanks PGOOpt->Action != PGOOptions::SampleUse) 498c3ddc13dSArthur Eubanks LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 499c3ddc13dSArthur Eubanks /* OnlyWhenForced= */ !PTO.LoopUnrolling, 500c3ddc13dSArthur Eubanks PTO.ForgetAllSCEVInLoopUnroll)); 501c3ddc13dSArthur Eubanks 50233817296SPrem Chintalapudi invokeLoopOptimizerEndEPCallbacks(LPM2, Level); 503c3ddc13dSArthur Eubanks 504c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 505c3ddc13dSArthur Eubanks /*UseMemorySSA=*/true, 506c3ddc13dSArthur Eubanks /*UseBlockFrequencyInfo=*/true)); 507371fcb72SRoman Lebedev FPM.addPass( 508371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 509c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 510c3ddc13dSArthur Eubanks // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. 511c3ddc13dSArthur Eubanks // *All* loop passes must preserve it, in order to be able to use it. 512c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 513c3ddc13dSArthur Eubanks /*UseMemorySSA=*/false, 514c3ddc13dSArthur Eubanks /*UseBlockFrequencyInfo=*/false)); 515c3ddc13dSArthur Eubanks 516c3ddc13dSArthur Eubanks // Delete small array after loop unroll. 5174f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 518c3ddc13dSArthur Eubanks 519c3ddc13dSArthur Eubanks // Specially optimize memory movement as it doesn't look like dataflow in SSA. 520c3ddc13dSArthur Eubanks FPM.addPass(MemCpyOptPass()); 521c3ddc13dSArthur Eubanks 522c3ddc13dSArthur Eubanks // Sparse conditional constant propagation. 523c3ddc13dSArthur Eubanks // FIXME: It isn't clear why we do this *after* loop passes rather than 524c3ddc13dSArthur Eubanks // before... 525c3ddc13dSArthur Eubanks FPM.addPass(SCCPPass()); 526c3ddc13dSArthur Eubanks 527c3ddc13dSArthur Eubanks // Delete dead bit computations (instcombine runs after to fold away the dead 528c3ddc13dSArthur Eubanks // computations, and then ADCE will run later to exploit any new DCE 529c3ddc13dSArthur Eubanks // opportunities that creates). 530c3ddc13dSArthur Eubanks FPM.addPass(BDCEPass()); 531c3ddc13dSArthur Eubanks 532c3ddc13dSArthur Eubanks // Run instcombine after redundancy and dead bit elimination to exploit 533c3ddc13dSArthur Eubanks // opportunities opened up by them. 534c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 535c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 536c3ddc13dSArthur Eubanks 537c3ddc13dSArthur Eubanks FPM.addPass(CoroElidePass()); 538c3ddc13dSArthur Eubanks 53933817296SPrem Chintalapudi invokeScalarOptimizerLateEPCallbacks(FPM, Level); 540c3ddc13dSArthur Eubanks 541c3ddc13dSArthur Eubanks // Finally, do an expensive DCE pass to catch all the dead code exposed by 542c3ddc13dSArthur Eubanks // the simplifications and basic cleanup after all the simplifications. 543c3ddc13dSArthur Eubanks // TODO: Investigate if this is too expensive. 544c3ddc13dSArthur Eubanks FPM.addPass(ADCEPass()); 545371fcb72SRoman Lebedev FPM.addPass( 546371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 547c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 548c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 549c3ddc13dSArthur Eubanks 550c3ddc13dSArthur Eubanks return FPM; 551c3ddc13dSArthur Eubanks } 552c3ddc13dSArthur Eubanks 553c3ddc13dSArthur Eubanks FunctionPassManager 554c3ddc13dSArthur Eubanks PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, 555c3ddc13dSArthur Eubanks ThinOrFullLTOPhase Phase) { 556c3ddc13dSArthur Eubanks assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); 557c3ddc13dSArthur Eubanks 558c3ddc13dSArthur Eubanks // The O1 pipeline has a separate pipeline creation function to simplify 559c3ddc13dSArthur Eubanks // construction readability. 560c3ddc13dSArthur Eubanks if (Level.getSpeedupLevel() == 1) 561c3ddc13dSArthur Eubanks return buildO1FunctionSimplificationPipeline(Level, Phase); 562c3ddc13dSArthur Eubanks 563c3ddc13dSArthur Eubanks FunctionPassManager FPM; 564c3ddc13dSArthur Eubanks 56525af6507SArthur Eubanks if (AreStatisticsEnabled()) 5664ce34bb2SArthur Eubanks FPM.addPass(CountVisitsPass()); 5674ce34bb2SArthur Eubanks 568c3ddc13dSArthur Eubanks // Form SSA out of local memory accesses after breaking apart aggregates into 569c3ddc13dSArthur Eubanks // scalars. 5704f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 571c3ddc13dSArthur Eubanks 572c3ddc13dSArthur Eubanks // Catch trivial redundancies 573c3ddc13dSArthur Eubanks FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 574c3ddc13dSArthur Eubanks if (EnableKnowledgeRetention) 575c3ddc13dSArthur Eubanks FPM.addPass(AssumeSimplifyPass()); 576c3ddc13dSArthur Eubanks 577c3ddc13dSArthur Eubanks // Hoisting of scalars and load expressions. 578c3ddc13dSArthur Eubanks if (EnableGVNHoist) 579c3ddc13dSArthur Eubanks FPM.addPass(GVNHoistPass()); 580c3ddc13dSArthur Eubanks 581c3ddc13dSArthur Eubanks // Global value numbering based sinking. 582c3ddc13dSArthur Eubanks if (EnableGVNSink) { 583c3ddc13dSArthur Eubanks FPM.addPass(GVNSinkPass()); 584371fcb72SRoman Lebedev FPM.addPass( 585371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 586c3ddc13dSArthur Eubanks } 587c3ddc13dSArthur Eubanks 588c3ddc13dSArthur Eubanks // Speculative execution if the target has divergent branches; otherwise nop. 589c3ddc13dSArthur Eubanks FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); 590c3ddc13dSArthur Eubanks 591c3ddc13dSArthur Eubanks // Optimize based on known information about branches, and cleanup afterward. 592c3ddc13dSArthur Eubanks FPM.addPass(JumpThreadingPass()); 593c3ddc13dSArthur Eubanks FPM.addPass(CorrelatedValuePropagationPass()); 594c3ddc13dSArthur Eubanks 595d26b43ffSAlexander Shaposhnikov // Jump table to switch conversion. 596d26b43ffSAlexander Shaposhnikov if (EnableJumpTableToSwitch) 597d26b43ffSAlexander Shaposhnikov FPM.addPass(JumpTableToSwitchPass()); 598d26b43ffSAlexander Shaposhnikov 599371fcb72SRoman Lebedev FPM.addPass( 600371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 601c34d157fSAnton Afanasyev FPM.addPass(InstCombinePass()); 602c3ddc13dSArthur Eubanks FPM.addPass(AggressiveInstCombinePass()); 603c3ddc13dSArthur Eubanks 604c3ddc13dSArthur Eubanks if (!Level.isOptimizingForSize()) 605c3ddc13dSArthur Eubanks FPM.addPass(LibCallsShrinkWrapPass()); 606c3ddc13dSArthur Eubanks 607c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 608c3ddc13dSArthur Eubanks 609c3ddc13dSArthur Eubanks // For PGO use pipeline, try to optimize memory intrinsics such as memcpy 610c3ddc13dSArthur Eubanks // using the size value profile. Don't perform this when optimizing for size. 611c3ddc13dSArthur Eubanks if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && 612c3ddc13dSArthur Eubanks !Level.isOptimizingForSize()) 613c3ddc13dSArthur Eubanks FPM.addPass(PGOMemOPSizeOpt()); 614c3ddc13dSArthur Eubanks 615c3ddc13dSArthur Eubanks FPM.addPass(TailCallElimPass()); 616371fcb72SRoman Lebedev FPM.addPass( 617371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 618c3ddc13dSArthur Eubanks 619c3ddc13dSArthur Eubanks // Form canonically associated expression trees, and simplify the trees using 620c3ddc13dSArthur Eubanks // basic mathematical properties. For example, this will form (nearly) 621c3ddc13dSArthur Eubanks // minimal multiplication trees. 622c3ddc13dSArthur Eubanks FPM.addPass(ReassociatePass()); 623c3ddc13dSArthur Eubanks 62404f9a8a7SFlorian Hahn if (EnableConstraintElimination) 62504f9a8a7SFlorian Hahn FPM.addPass(ConstraintEliminationPass()); 62604f9a8a7SFlorian Hahn 627c3ddc13dSArthur Eubanks // Add the primary loop simplification pipeline. 628c3ddc13dSArthur Eubanks // FIXME: Currently this is split into two loop pass pipelines because we run 629c3ddc13dSArthur Eubanks // some function passes in between them. These can and should be removed 630c3ddc13dSArthur Eubanks // and/or replaced by scheduling the loop pass equivalents in the correct 631c3ddc13dSArthur Eubanks // positions. But those equivalent passes aren't powerful enough yet. 632c3ddc13dSArthur Eubanks // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 633c3ddc13dSArthur Eubanks // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 634c3ddc13dSArthur Eubanks // fully replace `SimplifyCFGPass`, and the closest to the other we have is 635c3ddc13dSArthur Eubanks // `LoopInstSimplify`. 636c3ddc13dSArthur Eubanks LoopPassManager LPM1, LPM2; 637c3ddc13dSArthur Eubanks 638c3ddc13dSArthur Eubanks // Simplify the loop body. We do this initially to clean up after other loop 639c3ddc13dSArthur Eubanks // passes run, either when iterating on a loop or on inner loops with 640c3ddc13dSArthur Eubanks // implications on the outer loop. 641c3ddc13dSArthur Eubanks LPM1.addPass(LoopInstSimplifyPass()); 642c3ddc13dSArthur Eubanks LPM1.addPass(LoopSimplifyCFGPass()); 643c3ddc13dSArthur Eubanks 644c3ddc13dSArthur Eubanks // Try to remove as much code from the loop header as possible, 645d9da6a53SWilliam S. Moses // to reduce amount of IR that will have to be duplicated. However, 646d9da6a53SWilliam S. Moses // do not perform speculative hoisting the first time as LICM 647d9da6a53SWilliam S. Moses // will destroy metadata that may not need to be destroyed if run 648d9da6a53SWilliam S. Moses // after loop rotation. 649c3ddc13dSArthur Eubanks // TODO: Investigate promotion cap for O1. 650d9da6a53SWilliam S. Moses LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 651d9da6a53SWilliam S. Moses /*AllowSpeculation=*/false)); 652c3ddc13dSArthur Eubanks 653c3ddc13dSArthur Eubanks // Disable header duplication in loop rotation at -Oz. 6542fef6853SPaul Kirth LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication || 6552fef6853SPaul Kirth Level != OptimizationLevel::Oz, 6562fef6853SPaul Kirth isLTOPreLink(Phase))); 657c3ddc13dSArthur Eubanks // TODO: Investigate promotion cap for O1. 658d9da6a53SWilliam S. Moses LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 659d9da6a53SWilliam S. Moses /*AllowSpeculation=*/true)); 660c3ddc13dSArthur Eubanks LPM1.addPass( 6615f5cf602SArthur Eubanks SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); 662f269ec23SSjoerd Meijer if (EnableLoopFlatten) 663f269ec23SSjoerd Meijer LPM1.addPass(LoopFlattenPass()); 664f269ec23SSjoerd Meijer 665c3ddc13dSArthur Eubanks LPM2.addPass(LoopIdiomRecognizePass()); 666c3ddc13dSArthur Eubanks LPM2.addPass(IndVarSimplifyPass()); 667c3ddc13dSArthur Eubanks 6680f824693SFlorian Hahn { 6699e662066SFlorian Hahn ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses; 6700f824693SFlorian Hahn ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == 6710f824693SFlorian Hahn OptimizationLevel::O3)); 6720f824693SFlorian Hahn LPM2.addPass(std::move(ExtraPasses)); 6730f824693SFlorian Hahn } 6740f824693SFlorian Hahn 67533817296SPrem Chintalapudi invokeLateLoopOptimizationsEPCallbacks(LPM2, Level); 676c3ddc13dSArthur Eubanks 677c3ddc13dSArthur Eubanks LPM2.addPass(LoopDeletionPass()); 678c3ddc13dSArthur Eubanks 679c3ddc13dSArthur Eubanks if (EnableLoopInterchange) 680c3ddc13dSArthur Eubanks LPM2.addPass(LoopInterchangePass()); 681c3ddc13dSArthur Eubanks 682c3ddc13dSArthur Eubanks // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 683c3ddc13dSArthur Eubanks // because it changes IR to makes profile annotation in back compile 684c3ddc13dSArthur Eubanks // inaccurate. The normal unroller doesn't pay attention to forced full unroll 685c3ddc13dSArthur Eubanks // attributes so we need to make sure and allow the full unroll pass to pay 686c3ddc13dSArthur Eubanks // attention to it. 687c3ddc13dSArthur Eubanks if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 688c3ddc13dSArthur Eubanks PGOOpt->Action != PGOOptions::SampleUse) 689c3ddc13dSArthur Eubanks LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 690c3ddc13dSArthur Eubanks /* OnlyWhenForced= */ !PTO.LoopUnrolling, 691c3ddc13dSArthur Eubanks PTO.ForgetAllSCEVInLoopUnroll)); 692c3ddc13dSArthur Eubanks 69333817296SPrem Chintalapudi invokeLoopOptimizerEndEPCallbacks(LPM2, Level); 694c3ddc13dSArthur Eubanks 695c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 696c3ddc13dSArthur Eubanks /*UseMemorySSA=*/true, 697c3ddc13dSArthur Eubanks /*UseBlockFrequencyInfo=*/true)); 69805b4310cSDavid Green FPM.addPass( 69905b4310cSDavid Green SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 700c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 701c3ddc13dSArthur Eubanks // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, 702c3ddc13dSArthur Eubanks // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. 703c3ddc13dSArthur Eubanks // *All* loop passes must preserve it, in order to be able to use it. 704c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 705c3ddc13dSArthur Eubanks /*UseMemorySSA=*/false, 706c3ddc13dSArthur Eubanks /*UseBlockFrequencyInfo=*/false)); 707c3ddc13dSArthur Eubanks 708c3ddc13dSArthur Eubanks // Delete small array after loop unroll. 7094f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 710c3ddc13dSArthur Eubanks 711163bb6d6SSanjay Patel // Try vectorization/scalarization transforms that are both improvements 712163bb6d6SSanjay Patel // themselves and can allow further folds with GVN and InstCombine. 7138f337f8fSSanjay Patel FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); 714a7c6471aSFlorian Hahn 715c3ddc13dSArthur Eubanks // Eliminate redundancies. 716c3ddc13dSArthur Eubanks FPM.addPass(MergedLoadStoreMotionPass()); 717c3ddc13dSArthur Eubanks if (RunNewGVN) 718c3ddc13dSArthur Eubanks FPM.addPass(NewGVNPass()); 719c3ddc13dSArthur Eubanks else 7201d8750c3SArthur Eubanks FPM.addPass(GVNPass()); 721c3ddc13dSArthur Eubanks 722c3ddc13dSArthur Eubanks // Sparse conditional constant propagation. 723c3ddc13dSArthur Eubanks // FIXME: It isn't clear why we do this *after* loop passes rather than 724c3ddc13dSArthur Eubanks // before... 725c3ddc13dSArthur Eubanks FPM.addPass(SCCPPass()); 726c3ddc13dSArthur Eubanks 727c3ddc13dSArthur Eubanks // Delete dead bit computations (instcombine runs after to fold away the dead 728c3ddc13dSArthur Eubanks // computations, and then ADCE will run later to exploit any new DCE 729c3ddc13dSArthur Eubanks // opportunities that creates). 730c3ddc13dSArthur Eubanks FPM.addPass(BDCEPass()); 731c3ddc13dSArthur Eubanks 732c3ddc13dSArthur Eubanks // Run instcombine after redundancy and dead bit elimination to exploit 733c3ddc13dSArthur Eubanks // opportunities opened up by them. 734c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 735c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 736c3ddc13dSArthur Eubanks 737c3ddc13dSArthur Eubanks // Re-consider control flow based optimizations after redundancy elimination, 738c3ddc13dSArthur Eubanks // redo DCE, etc. 739777ac46dSPaul Kirth if (EnableDFAJumpThreading) 740c3ddc13dSArthur Eubanks FPM.addPass(DFAJumpThreadingPass()); 741c3ddc13dSArthur Eubanks 742c3ddc13dSArthur Eubanks FPM.addPass(JumpThreadingPass()); 743c3ddc13dSArthur Eubanks FPM.addPass(CorrelatedValuePropagationPass()); 744c3ddc13dSArthur Eubanks 745c3ddc13dSArthur Eubanks // Finally, do an expensive DCE pass to catch all the dead code exposed by 746c3ddc13dSArthur Eubanks // the simplifications and basic cleanup after all the simplifications. 747c3ddc13dSArthur Eubanks // TODO: Investigate if this is too expensive. 748c3ddc13dSArthur Eubanks FPM.addPass(ADCEPass()); 749c3ddc13dSArthur Eubanks 750c3ddc13dSArthur Eubanks // Specially optimize memory movement as it doesn't look like dataflow in SSA. 751c3ddc13dSArthur Eubanks FPM.addPass(MemCpyOptPass()); 752c3ddc13dSArthur Eubanks 753c3ddc13dSArthur Eubanks FPM.addPass(DSEPass()); 754afa13ba1Sserge-sans-paille FPM.addPass(MoveAutoInitPass()); 755afa13ba1Sserge-sans-paille 756c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor( 757d9da6a53SWilliam S. Moses LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 758d9da6a53SWilliam S. Moses /*AllowSpeculation=*/true), 759384a8dd1SNikita Popov /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); 760c3ddc13dSArthur Eubanks 761c3ddc13dSArthur Eubanks FPM.addPass(CoroElidePass()); 762c3ddc13dSArthur Eubanks 76333817296SPrem Chintalapudi invokeScalarOptimizerLateEPCallbacks(FPM, Level); 764c3ddc13dSArthur Eubanks 765371fcb72SRoman Lebedev FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 766371fcb72SRoman Lebedev .convertSwitchRangeToICmp(true) 767371fcb72SRoman Lebedev .hoistCommonInsts(true) 768371fcb72SRoman Lebedev .sinkCommonInsts(true))); 769c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 770c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 771c3ddc13dSArthur Eubanks 772c3ddc13dSArthur Eubanks return FPM; 773c3ddc13dSArthur Eubanks } 774c3ddc13dSArthur Eubanks 775c3ddc13dSArthur Eubanks void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { 776c3ddc13dSArthur Eubanks MPM.addPass(CanonicalizeAliasesPass()); 777c3ddc13dSArthur Eubanks MPM.addPass(NameAnonGlobalPass()); 778c3ddc13dSArthur Eubanks } 779c3ddc13dSArthur Eubanks 78087f5e229STeresa Johnson void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM, 78187f5e229STeresa Johnson OptimizationLevel Level, 78287f5e229STeresa Johnson ThinOrFullLTOPhase LTOPhase) { 783c3ddc13dSArthur Eubanks assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); 78487f5e229STeresa Johnson if (DisablePreInliner) 78587f5e229STeresa Johnson return; 786c3ddc13dSArthur Eubanks InlineParams IP; 787c3ddc13dSArthur Eubanks 788c3ddc13dSArthur Eubanks IP.DefaultThreshold = PreInlineThreshold; 789c3ddc13dSArthur Eubanks 790c3ddc13dSArthur Eubanks // FIXME: The hint threshold has the same value used by the regular inliner 791c3ddc13dSArthur Eubanks // when not optimzing for size. This should probably be lowered after 792c3ddc13dSArthur Eubanks // performance testing. 793c3ddc13dSArthur Eubanks // FIXME: this comment is cargo culted from the old pass manager, revisit). 794c3ddc13dSArthur Eubanks IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; 795e0d06959SMingming Liu ModuleInlinerWrapperPass MIWP( 796e0d06959SMingming Liu IP, /* MandatoryFirst */ true, 797e0d06959SMingming Liu InlineContext{LTOPhase, InlinePass::EarlyInliner}); 798c3ddc13dSArthur Eubanks CGSCCPassManager &CGPipeline = MIWP.getPM(); 799c3ddc13dSArthur Eubanks 800c3ddc13dSArthur Eubanks FunctionPassManager FPM; 8014f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 802c3ddc13dSArthur Eubanks FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. 803371fcb72SRoman Lebedev FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( 804371fcb72SRoman Lebedev true))); // Merge & remove basic blocks. 805c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); // Combine silly sequences. 806c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 807c3ddc13dSArthur Eubanks 8087175886aSArthur Eubanks CGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 8097175886aSArthur Eubanks std::move(FPM), PTO.EagerlyInvalidateAnalyses)); 810c3ddc13dSArthur Eubanks 811c3ddc13dSArthur Eubanks MPM.addPass(std::move(MIWP)); 812c3ddc13dSArthur Eubanks 813c3ddc13dSArthur Eubanks // Delete anything that is now dead to make sure that we don't instrument 814c3ddc13dSArthur Eubanks // dead code. Instrumentation can end up keeping dead code around and 815c3ddc13dSArthur Eubanks // dramatically increase code size. 816c3ddc13dSArthur Eubanks MPM.addPass(GlobalDCEPass()); 817c3ddc13dSArthur Eubanks } 818c3ddc13dSArthur Eubanks 819174cdeceSMircea Trofin void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM, 820174cdeceSMircea Trofin OptimizationLevel Level) { 821174cdeceSMircea Trofin if (EnablePostPGOLoopRotation) { 822174cdeceSMircea Trofin // Disable header duplication in loop rotation at -Oz. 823174cdeceSMircea Trofin MPM.addPass(createModuleToFunctionPassAdaptor( 824174cdeceSMircea Trofin createFunctionToLoopPassAdaptor( 825174cdeceSMircea Trofin LoopRotatePass(EnableLoopHeaderDuplication || 826174cdeceSMircea Trofin Level != OptimizationLevel::Oz), 827174cdeceSMircea Trofin /*UseMemorySSA=*/false, 828174cdeceSMircea Trofin /*UseBlockFrequencyInfo=*/false), 829174cdeceSMircea Trofin PTO.EagerlyInvalidateAnalyses)); 830174cdeceSMircea Trofin } 831174cdeceSMircea Trofin } 832174cdeceSMircea Trofin 83387f5e229STeresa Johnson void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, 83487f5e229STeresa Johnson OptimizationLevel Level, bool RunProfileGen, 83587f5e229STeresa Johnson bool IsCS, bool AtomicCounterUpdate, 83687f5e229STeresa Johnson std::string ProfileFile, 83787f5e229STeresa Johnson std::string ProfileRemappingFile, 83887f5e229STeresa Johnson IntrusiveRefCntPtr<vfs::FileSystem> FS) { 83987f5e229STeresa Johnson assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); 84087f5e229STeresa Johnson 841c3ddc13dSArthur Eubanks if (!RunProfileGen) { 842c3ddc13dSArthur Eubanks assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 843516e3017SSteven Wu MPM.addPass( 844516e3017SSteven Wu PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); 845c3ddc13dSArthur Eubanks // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 846c3ddc13dSArthur Eubanks // RequireAnalysisPass for PSI before subsequent non-module passes. 847c3ddc13dSArthur Eubanks MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 848c3ddc13dSArthur Eubanks return; 849c3ddc13dSArthur Eubanks } 850c3ddc13dSArthur Eubanks 851c3ddc13dSArthur Eubanks // Perform PGO instrumentation. 8523f18a0a7SMircea Trofin MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO 8533f18a0a7SMircea Trofin : PGOInstrumentationType::FDO)); 854c3ddc13dSArthur Eubanks 855174cdeceSMircea Trofin addPostPGOLoopRotation(MPM, Level); 856c3ddc13dSArthur Eubanks // Add the profile lowering pass. 857c3ddc13dSArthur Eubanks InstrProfOptions Options; 858c3ddc13dSArthur Eubanks if (!ProfileFile.empty()) 859c3ddc13dSArthur Eubanks Options.InstrProfileOutput = ProfileFile; 860c3ddc13dSArthur Eubanks // Do counter promotion at Level greater than O0. 861c3ddc13dSArthur Eubanks Options.DoCounterPromotion = true; 862c3ddc13dSArthur Eubanks Options.UseBFIInPromotion = IsCS; 863b1ca2a95Sxur-llvm if (EnableSampledInstr) { 864b1ca2a95Sxur-llvm Options.Sampling = true; 865b1ca2a95Sxur-llvm // With sampling, there is little beneifit to enable counter promotion. 866b1ca2a95Sxur-llvm // But note that sampling does work with counter promotion. 867b1ca2a95Sxur-llvm Options.DoCounterPromotion = false; 868b1ca2a95Sxur-llvm } 869611ce241SQiongsi Wu Options.Atomic = AtomicCounterUpdate; 8701d608fc7SMircea Trofin MPM.addPass(InstrProfilingLoweringPass(Options, IsCS)); 871c3ddc13dSArthur Eubanks } 872c3ddc13dSArthur Eubanks 873516e3017SSteven Wu void PassBuilder::addPGOInstrPassesForO0( 874516e3017SSteven Wu ModulePassManager &MPM, bool RunProfileGen, bool IsCS, 875611ce241SQiongsi Wu bool AtomicCounterUpdate, std::string ProfileFile, 876611ce241SQiongsi Wu std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) { 877c3ddc13dSArthur Eubanks if (!RunProfileGen) { 878c3ddc13dSArthur Eubanks assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 879516e3017SSteven Wu MPM.addPass( 880516e3017SSteven Wu PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); 881c3ddc13dSArthur Eubanks // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 882c3ddc13dSArthur Eubanks // RequireAnalysisPass for PSI before subsequent non-module passes. 883c3ddc13dSArthur Eubanks MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 884c3ddc13dSArthur Eubanks return; 885c3ddc13dSArthur Eubanks } 886c3ddc13dSArthur Eubanks 887c3ddc13dSArthur Eubanks // Perform PGO instrumentation. 8883f18a0a7SMircea Trofin MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO 8893f18a0a7SMircea Trofin : PGOInstrumentationType::FDO)); 890c3ddc13dSArthur Eubanks // Add the profile lowering pass. 891c3ddc13dSArthur Eubanks InstrProfOptions Options; 892c3ddc13dSArthur Eubanks if (!ProfileFile.empty()) 893c3ddc13dSArthur Eubanks Options.InstrProfileOutput = ProfileFile; 894c3ddc13dSArthur Eubanks // Do not do counter promotion at O0. 895c3ddc13dSArthur Eubanks Options.DoCounterPromotion = false; 896c3ddc13dSArthur Eubanks Options.UseBFIInPromotion = IsCS; 897611ce241SQiongsi Wu Options.Atomic = AtomicCounterUpdate; 8981d608fc7SMircea Trofin MPM.addPass(InstrProfilingLoweringPass(Options, IsCS)); 899c3ddc13dSArthur Eubanks } 900c3ddc13dSArthur Eubanks 901c3ddc13dSArthur Eubanks static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { 902c3ddc13dSArthur Eubanks return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); 903c3ddc13dSArthur Eubanks } 904c3ddc13dSArthur Eubanks 905c3ddc13dSArthur Eubanks ModuleInlinerWrapperPass 906c3ddc13dSArthur Eubanks PassBuilder::buildInlinerPipeline(OptimizationLevel Level, 907c3ddc13dSArthur Eubanks ThinOrFullLTOPhase Phase) { 9084fa32807SArthur Eubanks InlineParams IP; 9094fa32807SArthur Eubanks if (PTO.InlinerThreshold == -1) 9104fa32807SArthur Eubanks IP = getInlineParamsFromOptLevel(Level); 9114fa32807SArthur Eubanks else 9124fa32807SArthur Eubanks IP = getInlineParams(PTO.InlinerThreshold); 913408bb9a3SMingming Liu // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to 914408bb9a3SMingming Liu // disable hot callsite inline (as much as possible [1]) because it makes 915408bb9a3SMingming Liu // profile annotation in the backend inaccurate. 916408bb9a3SMingming Liu // 917408bb9a3SMingming Liu // [1] Note the cost of a function could be below zero due to erased 918408bb9a3SMingming Liu // prologue / epilogue. 919c3ddc13dSArthur Eubanks if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 920c3ddc13dSArthur Eubanks PGOOpt->Action == PGOOptions::SampleUse) 921c3ddc13dSArthur Eubanks IP.HotCallSiteThreshold = 0; 922c3ddc13dSArthur Eubanks 923c3ddc13dSArthur Eubanks if (PGOOpt) 924c3ddc13dSArthur Eubanks IP.EnableDeferral = EnablePGOInlineDeferral; 925c3ddc13dSArthur Eubanks 9261c530500SPavel Samolysov ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, 927e0d06959SMingming Liu InlineContext{Phase, InlinePass::CGSCCInliner}, 928c3ddc13dSArthur Eubanks UseInlineAdvisor, MaxDevirtIterations); 929c3ddc13dSArthur Eubanks 930c3ddc13dSArthur Eubanks // Require the GlobalsAA analysis for the module so we can query it within 931c3ddc13dSArthur Eubanks // the CGSCC pipeline. 932281ae490SNuno Lopes if (EnableGlobalAnalyses) { 933c3ddc13dSArthur Eubanks MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>()); 934281ae490SNuno Lopes // Invalidate AAManager so it can be recreated and pick up the newly 935281ae490SNuno Lopes // available GlobalsAA. 936c3ddc13dSArthur Eubanks MIWP.addModulePass( 937c3ddc13dSArthur Eubanks createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 938281ae490SNuno Lopes } 939c3ddc13dSArthur Eubanks 940c3ddc13dSArthur Eubanks // Require the ProfileSummaryAnalysis for the module so we can query it within 941c3ddc13dSArthur Eubanks // the inliner pass. 942c3ddc13dSArthur Eubanks MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 943c3ddc13dSArthur Eubanks 944c3ddc13dSArthur Eubanks // Now begin the main postorder CGSCC pipeline. 945c3ddc13dSArthur Eubanks // FIXME: The current CGSCC pipeline has its origins in the legacy pass 946c3ddc13dSArthur Eubanks // manager and trying to emulate its precise behavior. Much of this doesn't 947c3ddc13dSArthur Eubanks // make a lot of sense and we should revisit the core CGSCC structure. 948c3ddc13dSArthur Eubanks CGSCCPassManager &MainCGPipeline = MIWP.getPM(); 949c3ddc13dSArthur Eubanks 950c3ddc13dSArthur Eubanks // Note: historically, the PruneEH pass was run first to deduce nounwind and 951c3ddc13dSArthur Eubanks // generally clean up exception handling overhead. It isn't clear this is 952c3ddc13dSArthur Eubanks // valuable as the inliner doesn't currently care whether it is inlining an 953c3ddc13dSArthur Eubanks // invoke or a call. 954c3ddc13dSArthur Eubanks 955c3ddc13dSArthur Eubanks if (AttributorRun & AttributorRunOption::CGSCC) 956c3ddc13dSArthur Eubanks MainCGPipeline.addPass(AttributorCGSCCPass()); 957c3ddc13dSArthur Eubanks 9580d4a709bSArthur Eubanks // Deduce function attributes. We do another run of this after the function 9590d4a709bSArthur Eubanks // simplification pipeline, so this only needs to run when it could affect the 9600d4a709bSArthur Eubanks // function simplification pipeline, which is only the case with recursive 9610d4a709bSArthur Eubanks // functions. 9620d4a709bSArthur Eubanks MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)); 963c3ddc13dSArthur Eubanks 964141be5c0SShoaib Meenai // When at O3 add argument promotion to the pass pipeline. 965141be5c0SShoaib Meenai // FIXME: It isn't at all clear why this should be limited to O3. 966141be5c0SShoaib Meenai if (Level == OptimizationLevel::O3) 967c3ddc13dSArthur Eubanks MainCGPipeline.addPass(ArgumentPromotionPass()); 968c3ddc13dSArthur Eubanks 969c3ddc13dSArthur Eubanks // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if 970c3ddc13dSArthur Eubanks // there are no OpenMP runtime calls present in the module. 971c3ddc13dSArthur Eubanks if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) 972c3ddc13dSArthur Eubanks MainCGPipeline.addPass(OpenMPOptCGSCCPass()); 973c3ddc13dSArthur Eubanks 97433817296SPrem Chintalapudi invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level); 975c3ddc13dSArthur Eubanks 9760d4a709bSArthur Eubanks // Add the core function simplification pipeline nested inside the 977c3ddc13dSArthur Eubanks // CGSCC walk. 978c3ddc13dSArthur Eubanks MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 9797175886aSArthur Eubanks buildFunctionSimplificationPipeline(Level, Phase), 9804d16ebd6SArthur Eubanks PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true)); 981c3ddc13dSArthur Eubanks 9820d4a709bSArthur Eubanks // Finally, deduce any function attributes based on the fully simplified 9830d4a709bSArthur Eubanks // function. 9840d4a709bSArthur Eubanks MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); 9850d4a709bSArthur Eubanks 9860d4a709bSArthur Eubanks // Mark that the function is fully simplified and that it shouldn't be 9870d4a709bSArthur Eubanks // simplified again if we somehow revisit it due to CGSCC mutations unless 9880d4a709bSArthur Eubanks // it's been modified since. 9890d4a709bSArthur Eubanks MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 9900d4a709bSArthur Eubanks RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>())); 9910d4a709bSArthur Eubanks 992a416267aSYuxuan Chen if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { 993c3ddc13dSArthur Eubanks MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); 994c6414970SYuxuan Chen MainCGPipeline.addPass(CoroAnnotationElidePass()); 995a416267aSYuxuan Chen } 996c3ddc13dSArthur Eubanks 9970d4a709bSArthur Eubanks // Make sure we don't affect potential future NoRerun CGSCC adaptors. 998e3e25b51SArthur Eubanks MIWP.addLateModulePass(createModuleToFunctionPassAdaptor( 999e3e25b51SArthur Eubanks InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); 1000e3e25b51SArthur Eubanks 1001c3ddc13dSArthur Eubanks return MIWP; 1002c3ddc13dSArthur Eubanks } 1003c3ddc13dSArthur Eubanks 10047e8f9d6bSLiqiang Tao ModulePassManager 10056cad45d5SLiqiang Tao PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, 10066cad45d5SLiqiang Tao ThinOrFullLTOPhase Phase) { 10077e8f9d6bSLiqiang Tao ModulePassManager MPM; 10087e8f9d6bSLiqiang Tao 10096cad45d5SLiqiang Tao InlineParams IP = getInlineParamsFromOptLevel(Level); 1010408bb9a3SMingming Liu // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to 1011408bb9a3SMingming Liu // disable hot callsite inline (as much as possible [1]) because it makes 1012408bb9a3SMingming Liu // profile annotation in the backend inaccurate. 1013408bb9a3SMingming Liu // 1014408bb9a3SMingming Liu // [1] Note the cost of a function could be below zero due to erased 1015408bb9a3SMingming Liu // prologue / epilogue. 10166cad45d5SLiqiang Tao if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 10176cad45d5SLiqiang Tao PGOOpt->Action == PGOOptions::SampleUse) 10186cad45d5SLiqiang Tao IP.HotCallSiteThreshold = 0; 10196cad45d5SLiqiang Tao 10206cad45d5SLiqiang Tao if (PGOOpt) 10216cad45d5SLiqiang Tao IP.EnableDeferral = EnablePGOInlineDeferral; 10226cad45d5SLiqiang Tao 10236cad45d5SLiqiang Tao // The inline deferral logic is used to avoid losing some 10246cad45d5SLiqiang Tao // inlining chance in future. It is helpful in SCC inliner, in which 10256cad45d5SLiqiang Tao // inlining is processed in bottom-up order. 10266cad45d5SLiqiang Tao // While in module inliner, the inlining order is a priority-based order 10276cad45d5SLiqiang Tao // by default. The inline deferral is unnecessary there. So we disable the 10286cad45d5SLiqiang Tao // inline deferral logic in module inliner. 10296cad45d5SLiqiang Tao IP.EnableDeferral = false; 10306cad45d5SLiqiang Tao 1031e0d06959SMingming Liu MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); 10323b226180SMircea Trofin if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) { 10333b226180SMircea Trofin MPM.addPass(GlobalOptPass()); 10343b226180SMircea Trofin MPM.addPass(GlobalDCEPass()); 10353b226180SMircea Trofin MPM.addPass(PGOCtxProfFlatteningPass()); 10363b226180SMircea Trofin } 10377e8f9d6bSLiqiang Tao 10387e8f9d6bSLiqiang Tao MPM.addPass(createModuleToFunctionPassAdaptor( 10397e8f9d6bSLiqiang Tao buildFunctionSimplificationPipeline(Level, Phase), 10407e8f9d6bSLiqiang Tao PTO.EagerlyInvalidateAnalyses)); 10417e8f9d6bSLiqiang Tao 1042a416267aSYuxuan Chen if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { 10437e8f9d6bSLiqiang Tao MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( 10447e8f9d6bSLiqiang Tao CoroSplitPass(Level != OptimizationLevel::O0))); 1045c6414970SYuxuan Chen MPM.addPass( 1046c6414970SYuxuan Chen createModuleToPostOrderCGSCCPassAdaptor(CoroAnnotationElidePass())); 1047a416267aSYuxuan Chen } 10487e8f9d6bSLiqiang Tao 10497e8f9d6bSLiqiang Tao return MPM; 10506cad45d5SLiqiang Tao } 10516cad45d5SLiqiang Tao 1052c3ddc13dSArthur Eubanks ModulePassManager 1053c3ddc13dSArthur Eubanks PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, 1054c3ddc13dSArthur Eubanks ThinOrFullLTOPhase Phase) { 1055721a914fSNikita Popov assert(Level != OptimizationLevel::O0 && 1056721a914fSNikita Popov "Should not be used for O0 pipeline"); 105713e3d4aaSArthur Eubanks 105813e3d4aaSArthur Eubanks assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink && 105913e3d4aaSArthur Eubanks "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!"); 106013e3d4aaSArthur Eubanks 1061c3ddc13dSArthur Eubanks ModulePassManager MPM; 1062c3ddc13dSArthur Eubanks 1063c3ddc13dSArthur Eubanks // Place pseudo probe instrumentation as the first pass of the pipeline to 1064c3ddc13dSArthur Eubanks // minimize the impact of optimization changes. 1065c3ddc13dSArthur Eubanks if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 1066c3ddc13dSArthur Eubanks Phase != ThinOrFullLTOPhase::ThinLTOPostLink) 1067c3ddc13dSArthur Eubanks MPM.addPass(SampleProfileProbePass(TM)); 1068c3ddc13dSArthur Eubanks 1069c3ddc13dSArthur Eubanks bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); 1070c3ddc13dSArthur Eubanks 1071c3ddc13dSArthur Eubanks // In ThinLTO mode, when flattened profile is used, all the available 1072c3ddc13dSArthur Eubanks // profile information will be annotated in PreLink phase so there is 1073c3ddc13dSArthur Eubanks // no need to load the profile again in PostLink. 1074c3ddc13dSArthur Eubanks bool LoadSampleProfile = 1075c3ddc13dSArthur Eubanks HasSampleProfile && 1076c3ddc13dSArthur Eubanks !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); 1077c3ddc13dSArthur Eubanks 1078c3ddc13dSArthur Eubanks // During the ThinLTO backend phase we perform early indirect call promotion 1079c3ddc13dSArthur Eubanks // here, before globalopt. Otherwise imported available_externally functions 1080c3ddc13dSArthur Eubanks // look unreferenced and are removed. If we are going to load the sample 1081c3ddc13dSArthur Eubanks // profile then defer until later. 1082c3ddc13dSArthur Eubanks // TODO: See if we can move later and consolidate with the location where 1083c3ddc13dSArthur Eubanks // we perform ICP when we are loading a sample profile. 1084c3ddc13dSArthur Eubanks // TODO: We pass HasSampleProfile (whether there was a sample profile file 1085c3ddc13dSArthur Eubanks // passed to the compile) to the SamplePGO flag of ICP. This is used to 1086c3ddc13dSArthur Eubanks // determine whether the new direct calls are annotated with prof metadata. 1087c3ddc13dSArthur Eubanks // Ideally this should be determined from whether the IR is annotated with 1088c3ddc13dSArthur Eubanks // sample profile, and not whether the a sample profile was provided on the 1089c3ddc13dSArthur Eubanks // command line. E.g. for flattened profiles where we will not be reloading 1090c3ddc13dSArthur Eubanks // the sample profile in the ThinLTO backend, we ideally shouldn't have to 1091c3ddc13dSArthur Eubanks // provide the sample profile file. 1092c3ddc13dSArthur Eubanks if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) 1093c3ddc13dSArthur Eubanks MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); 1094c3ddc13dSArthur Eubanks 109513e3d4aaSArthur Eubanks // Create an early function pass manager to cleanup the output of the 109613e3d4aaSArthur Eubanks // frontend. Not necessary with LTO post link pipelines since the pre link 109713e3d4aaSArthur Eubanks // pipeline already cleaned up the frontend output. 109813e3d4aaSArthur Eubanks if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) { 1099c3ddc13dSArthur Eubanks // Do basic inference of function attributes from known properties of system 1100c3ddc13dSArthur Eubanks // libraries and other oracles. 1101c3ddc13dSArthur Eubanks MPM.addPass(InferFunctionAttrsPass()); 1102f9bee356SChuanqi Xu MPM.addPass(CoroEarlyPass()); 1103c3ddc13dSArthur Eubanks 1104c3ddc13dSArthur Eubanks FunctionPassManager EarlyFPM; 1105cab81dd0SEgor Pasko EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false)); 1106c3ddc13dSArthur Eubanks // Lower llvm.expect to metadata before attempting transforms. 110713e3d4aaSArthur Eubanks // Compare/branch metadata may alter the behavior of passes like 110813e3d4aaSArthur Eubanks // SimplifyCFG. 1109c3ddc13dSArthur Eubanks EarlyFPM.addPass(LowerExpectIntrinsicPass()); 1110c3ddc13dSArthur Eubanks EarlyFPM.addPass(SimplifyCFGPass()); 11114f7e5d22SRoman Lebedev EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 1112c3ddc13dSArthur Eubanks EarlyFPM.addPass(EarlyCSEPass()); 1113c3ddc13dSArthur Eubanks if (Level == OptimizationLevel::O3) 1114c3ddc13dSArthur Eubanks EarlyFPM.addPass(CallSiteSplittingPass()); 111513e3d4aaSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor( 111613e3d4aaSArthur Eubanks std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses)); 111713e3d4aaSArthur Eubanks } 1118c3ddc13dSArthur Eubanks 1119c3ddc13dSArthur Eubanks if (LoadSampleProfile) { 1120c3ddc13dSArthur Eubanks // Annotate sample profile right after early FPM to ensure freshness of 1121c3ddc13dSArthur Eubanks // the debug info. 1122c3ddc13dSArthur Eubanks MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 1123c3ddc13dSArthur Eubanks PGOOpt->ProfileRemappingFile, Phase)); 1124c3ddc13dSArthur Eubanks // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 1125c3ddc13dSArthur Eubanks // RequireAnalysisPass for PSI before subsequent non-module passes. 1126c3ddc13dSArthur Eubanks MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 1127c3ddc13dSArthur Eubanks // Do not invoke ICP in the LTOPrelink phase as it makes it hard 1128c3ddc13dSArthur Eubanks // for the profile annotation to be accurate in the LTO backend. 1129f5f04a52SNikita Popov if (!isLTOPreLink(Phase)) 1130c3ddc13dSArthur Eubanks // We perform early indirect call promotion here, before globalopt. 1131c3ddc13dSArthur Eubanks // This is important for the ThinLTO backend phase because otherwise 1132c3ddc13dSArthur Eubanks // imported available_externally functions look unreferenced and are 1133c3ddc13dSArthur Eubanks // removed. 1134c3ddc13dSArthur Eubanks MPM.addPass( 1135c3ddc13dSArthur Eubanks PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); 1136c3ddc13dSArthur Eubanks } 1137c3ddc13dSArthur Eubanks 1138c3ddc13dSArthur Eubanks // Try to perform OpenMP specific optimizations on the module. This is a 1139c3ddc13dSArthur Eubanks // (quick!) no-op if there are no OpenMP runtime calls present in the module. 1140c3ddc13dSArthur Eubanks MPM.addPass(OpenMPOptPass()); 1141c3ddc13dSArthur Eubanks 1142c3ddc13dSArthur Eubanks if (AttributorRun & AttributorRunOption::MODULE) 1143c3ddc13dSArthur Eubanks MPM.addPass(AttributorPass()); 1144c3ddc13dSArthur Eubanks 1145c3ddc13dSArthur Eubanks // Lower type metadata and the type.test intrinsic in the ThinLTO 1146c3ddc13dSArthur Eubanks // post link pipeline after ICP. This is to enable usage of the type 1147c3ddc13dSArthur Eubanks // tests in ICP sequences. 1148c3ddc13dSArthur Eubanks if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) 1149b01e2a8bSPaul Kirth MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, 1150b01e2a8bSPaul Kirth lowertypetests::DropTestKind::Assume)); 1151c3ddc13dSArthur Eubanks 1152dc45ff1dSShilei Tian invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); 1153c3ddc13dSArthur Eubanks 1154c3ddc13dSArthur Eubanks // Interprocedural constant propagation now that basic cleanup has occurred 1155c3ddc13dSArthur Eubanks // and prior to optimizing globals. 1156c3ddc13dSArthur Eubanks // FIXME: This position in the pipeline hasn't been carefully considered in 1157c3ddc13dSArthur Eubanks // years, it should be re-analyzed. 1158f242291fSAlexandros Lamprineas MPM.addPass(IPSCCPPass( 1159f242291fSAlexandros Lamprineas IPSCCPOptions(/*AllowFuncSpec=*/ 1160572a757fSAlexandros Lamprineas Level != OptimizationLevel::Os && 1161f242291fSAlexandros Lamprineas Level != OptimizationLevel::Oz && 1162f5f04a52SNikita Popov !isLTOPreLink(Phase)))); 1163c3ddc13dSArthur Eubanks 1164c3ddc13dSArthur Eubanks // Attach metadata to indirect call sites indicating the set of functions 1165c3ddc13dSArthur Eubanks // they may target at run-time. This should follow IPSCCP. 1166c3ddc13dSArthur Eubanks MPM.addPass(CalledValuePropagationPass()); 1167c3ddc13dSArthur Eubanks 1168c3ddc13dSArthur Eubanks // Optimize globals to try and fold them into constants. 1169c3ddc13dSArthur Eubanks MPM.addPass(GlobalOptPass()); 1170c3ddc13dSArthur Eubanks 1171c3ddc13dSArthur Eubanks // Create a small function pass pipeline to cleanup after all the global 1172c3ddc13dSArthur Eubanks // optimizations. 1173c3ddc13dSArthur Eubanks FunctionPassManager GlobalCleanupPM; 1174bd6eb142SArthur Eubanks // FIXME: Should this instead by a run of SROA? 1175bd6eb142SArthur Eubanks GlobalCleanupPM.addPass(PromotePass()); 1176c3ddc13dSArthur Eubanks GlobalCleanupPM.addPass(InstCombinePass()); 1177c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(GlobalCleanupPM, Level); 1178371fcb72SRoman Lebedev GlobalCleanupPM.addPass( 1179371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 11807175886aSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), 11817175886aSArthur Eubanks PTO.EagerlyInvalidateAnalyses)); 1182c3ddc13dSArthur Eubanks 1183174cdeceSMircea Trofin // We already asserted this happens in non-FullLTOPostLink earlier. 1184174cdeceSMircea Trofin const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink; 1185174cdeceSMircea Trofin const bool IsPGOPreLink = PGOOpt && IsPreLink; 1186174cdeceSMircea Trofin const bool IsPGOInstrGen = 1187174cdeceSMircea Trofin IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr; 1188174cdeceSMircea Trofin const bool IsPGOInstrUse = 1189174cdeceSMircea Trofin IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse; 1190174cdeceSMircea Trofin const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty(); 1191d311a62eSMircea Trofin // We don't want to mix pgo ctx gen and pgo gen; we also don't currently 1192d311a62eSMircea Trofin // enable ctx profiling from the frontend. 11934a2bf059SMircea Trofin assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) && 11944a2bf059SMircea Trofin "Enabling both instrumented PGO and contextual instrumentation is not " 1195d311a62eSMircea Trofin "supported."); 1196d311a62eSMircea Trofin // Enable contextual profiling instrumentation. 1197d311a62eSMircea Trofin const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink && 11984a2bf059SMircea Trofin PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled(); 11999c0ba620SMircea Trofin const bool IsCtxProfUse = 12009c0ba620SMircea Trofin !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink; 1201174cdeceSMircea Trofin 1202bef3b54eSLei Wang assert( 1203bef3b54eSLei Wang (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) && 1204bef3b54eSLei Wang "--instrument-cold-function-only-path is provided but " 1205bef3b54eSLei Wang "--pgo-instrument-cold-function-only is not enabled"); 1206bef3b54eSLei Wang const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly && 1207bef3b54eSLei Wang IsPGOPreLink && 1208bef3b54eSLei Wang !InstrumentColdFuncOnlyPath.empty(); 1209bef3b54eSLei Wang 1210ba4da5a0SMircea Trofin if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen || 1211bef3b54eSLei Wang IsCtxProfUse || IsColdFuncOnlyInstrGen) 121287f5e229STeresa Johnson addPreInlinerPasses(MPM, Level, Phase); 121387f5e229STeresa Johnson 1214c3ddc13dSArthur Eubanks // Add all the requested passes for instrumentation PGO, if requested. 1215174cdeceSMircea Trofin if (IsPGOInstrGen || IsPGOInstrUse) { 1216c3ddc13dSArthur Eubanks addPGOInstrPasses(MPM, Level, 1217174cdeceSMircea Trofin /*RunProfileGen=*/IsPGOInstrGen, 1218611ce241SQiongsi Wu /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, 121987f5e229STeresa Johnson PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, 1220611ce241SQiongsi Wu PGOOpt->FS); 1221ba4da5a0SMircea Trofin } else if (IsCtxProfGen || IsCtxProfUse) { 12223f18a0a7SMircea Trofin MPM.addPass(PGOInstrumentationGen(PGOInstrumentationType::CTXPROF)); 1223ba4da5a0SMircea Trofin // In pre-link, we just want the instrumented IR. We use the contextual 1224ba4da5a0SMircea Trofin // profile in the post-thinlink phase. 1225ba4da5a0SMircea Trofin // The instrumentation will be removed in post-thinlink after IPO. 1226aca01bffSMircea Trofin // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this 1227aca01bffSMircea Trofin // mechanism for GUIDs. 1228aca01bffSMircea Trofin MPM.addPass(AssignGUIDPass()); 1229ba4da5a0SMircea Trofin if (IsCtxProfUse) 1230ba4da5a0SMircea Trofin return MPM; 1231d311a62eSMircea Trofin addPostPGOLoopRotation(MPM, Level); 1232d311a62eSMircea Trofin MPM.addPass(PGOCtxProfLoweringPass()); 1233bef3b54eSLei Wang } else if (IsColdFuncOnlyInstrGen) { 1234bef3b54eSLei Wang addPGOInstrPasses( 1235bef3b54eSLei Wang MPM, Level, /* RunProfileGen */ true, /* IsCS */ false, 1236bef3b54eSLei Wang /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath, 1237bef3b54eSLei Wang /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>()); 1238c3ddc13dSArthur Eubanks } 1239174cdeceSMircea Trofin 1240d311a62eSMircea Trofin if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen) 1241174cdeceSMircea Trofin MPM.addPass(PGOIndirectCallPromotion(false, false)); 1242174cdeceSMircea Trofin 1243174cdeceSMircea Trofin if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) 1244b1ca2a95Sxur-llvm MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile, 1245b1ca2a95Sxur-llvm EnableSampledInstr)); 1246c3ddc13dSArthur Eubanks 1247174cdeceSMircea Trofin if (IsMemprofUse) 1248546ec641STeresa Johnson MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); 1249546ec641STeresa Johnson 1250fb14f1dfSArthur Eubanks if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || 1251fb14f1dfSArthur Eubanks PGOOpt->Action == PGOOptions::SampleUse)) 125293cdd1b5SArthur Eubanks MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); 125393cdd1b5SArthur Eubanks 12541a2e77cfSAmara Emerson MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); 12551a2e77cfSAmara Emerson 12566cad45d5SLiqiang Tao if (EnableModuleInliner) 12576cad45d5SLiqiang Tao MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); 12586cad45d5SLiqiang Tao else 1259c3ddc13dSArthur Eubanks MPM.addPass(buildInlinerPipeline(Level, Phase)); 1260c3ddc13dSArthur Eubanks 12611c530500SPavel Samolysov // Remove any dead arguments exposed by cleanups, constant folding globals, 12621c530500SPavel Samolysov // and argument promotion. 12631c530500SPavel Samolysov MPM.addPass(DeadArgumentEliminationPass()); 12641c530500SPavel Samolysov 12653a9ef4e6SWei Wang if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) 1266405bf902SChuanqi Xu MPM.addPass(CoroCleanupPass()); 12677d40f562SChuanqi Xu 126887dadf0fSArthur Eubanks // Optimize globals now that functions are fully simplified. 126987dadf0fSArthur Eubanks MPM.addPass(GlobalOptPass()); 127087dadf0fSArthur Eubanks MPM.addPass(GlobalDCEPass()); 127187dadf0fSArthur Eubanks 1272c3ddc13dSArthur Eubanks return MPM; 1273c3ddc13dSArthur Eubanks } 1274c3ddc13dSArthur Eubanks 1275c3ddc13dSArthur Eubanks /// TODO: Should LTO cause any differences to this set of passes? 1276c3ddc13dSArthur Eubanks void PassBuilder::addVectorPasses(OptimizationLevel Level, 1277c3ddc13dSArthur Eubanks FunctionPassManager &FPM, bool IsFullLTO) { 1278c3ddc13dSArthur Eubanks FPM.addPass(LoopVectorizePass( 1279c3ddc13dSArthur Eubanks LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); 1280c3ddc13dSArthur Eubanks 12810f152a55SDhruv Chawla FPM.addPass(InferAlignmentPass()); 1282c3ddc13dSArthur Eubanks if (IsFullLTO) { 1283c3ddc13dSArthur Eubanks // The vectorizer may have significantly shortened a loop body; unroll 1284c3ddc13dSArthur Eubanks // again. Unroll small loops to hide loop backedge latency and saturate any 1285c3ddc13dSArthur Eubanks // parallel execution resources of an out-of-order processor. We also then 1286c3ddc13dSArthur Eubanks // need to clean up redundancies and loop invariant code. 1287c3ddc13dSArthur Eubanks // FIXME: It would be really good to use a loop-integrated instruction 1288c3ddc13dSArthur Eubanks // combiner for cleanup here so that the unrolling and LICM can be pipelined 1289c3ddc13dSArthur Eubanks // across the loop nests. 1290c3ddc13dSArthur Eubanks // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 1291c3ddc13dSArthur Eubanks if (EnableUnrollAndJam && PTO.LoopUnrolling) 1292c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor( 1293c3ddc13dSArthur Eubanks LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 1294c3ddc13dSArthur Eubanks FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 1295c3ddc13dSArthur Eubanks Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 1296c3ddc13dSArthur Eubanks PTO.ForgetAllSCEVInLoopUnroll))); 1297c3ddc13dSArthur Eubanks FPM.addPass(WarnMissedTransformationsPass()); 12988adfa297SRoman Lebedev // Now that we are done with loop unrolling, be it either by LoopVectorizer, 12998adfa297SRoman Lebedev // or LoopUnroll passes, some variable-offset GEP's into alloca's could have 13008adfa297SRoman Lebedev // become constant-offset, thus enabling SROA and alloca promotion. Do so. 13014f7e5d22SRoman Lebedev // NOTE: we are very late in the pipeline, and we don't have any LICM 13024f7e5d22SRoman Lebedev // or SimplifyCFG passes scheduled after us, that would cleanup 13034f7e5d22SRoman Lebedev // the CFG mess this may created if allowed to modify CFG, so forbid that. 13044f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); 1305c3ddc13dSArthur Eubanks } 1306c3ddc13dSArthur Eubanks 1307c3ddc13dSArthur Eubanks if (!IsFullLTO) { 1308c3ddc13dSArthur Eubanks // Eliminate loads by forwarding stores from the previous iteration to loads 1309c3ddc13dSArthur Eubanks // of the current iteration. 1310c3ddc13dSArthur Eubanks FPM.addPass(LoopLoadEliminationPass()); 1311c3ddc13dSArthur Eubanks } 1312c3ddc13dSArthur Eubanks // Cleanup after the loop optimization passes. 1313ef6f2353SSanjay Patel FPM.addPass(InstCombinePass()); 1314c3ddc13dSArthur Eubanks 1315c3ddc13dSArthur Eubanks if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 13169e662066SFlorian Hahn ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses; 1317c3ddc13dSArthur Eubanks // At higher optimization levels, try to clean up any runtime overlap and 1318c3ddc13dSArthur Eubanks // alignment checks inserted by the vectorizer. We want to track correlated 1319c3ddc13dSArthur Eubanks // runtime checks for two inner loops in the same outer loop, fold any 1320c3ddc13dSArthur Eubanks // common computations, hoist loop-invariant aspects out of any outer loop, 1321c3ddc13dSArthur Eubanks // and unswitch the runtime checks if possible. Once hoisted, we may have 1322c3ddc13dSArthur Eubanks // dead (or speculatable) control flows or more combining opportunities. 1323acea6e9cSFlorian Hahn ExtraPasses.addPass(EarlyCSEPass()); 1324acea6e9cSFlorian Hahn ExtraPasses.addPass(CorrelatedValuePropagationPass()); 1325ef6f2353SSanjay Patel ExtraPasses.addPass(InstCombinePass()); 1326c3ddc13dSArthur Eubanks LoopPassManager LPM; 1327d9da6a53SWilliam S. Moses LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1328d9da6a53SWilliam S. Moses /*AllowSpeculation=*/true)); 1329c3ddc13dSArthur Eubanks LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == 1330c3ddc13dSArthur Eubanks OptimizationLevel::O3)); 1331acea6e9cSFlorian Hahn ExtraPasses.addPass( 1332c3ddc13dSArthur Eubanks createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, 1333c3ddc13dSArthur Eubanks /*UseBlockFrequencyInfo=*/true)); 1334371fcb72SRoman Lebedev ExtraPasses.addPass( 1335371fcb72SRoman Lebedev SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 1336acea6e9cSFlorian Hahn ExtraPasses.addPass(InstCombinePass()); 1337acea6e9cSFlorian Hahn FPM.addPass(std::move(ExtraPasses)); 1338c3ddc13dSArthur Eubanks } 1339c3ddc13dSArthur Eubanks 1340c3ddc13dSArthur Eubanks // Now that we've formed fast to execute loop structures, we do further 1341c3ddc13dSArthur Eubanks // optimizations. These are run afterward as they might block doing complex 1342c3ddc13dSArthur Eubanks // analyses and transforms such as what are needed for loop vectorization. 1343c3ddc13dSArthur Eubanks 1344c3ddc13dSArthur Eubanks // Cleanup after loop vectorization, etc. Simplification passes like CVP and 1345c3ddc13dSArthur Eubanks // GVN, loop transforms, and others have already run, so it's now better to 1346c3ddc13dSArthur Eubanks // convert to more optimized IR using more aggressive simplify CFG options. 1347c3ddc13dSArthur Eubanks // The extra sinking transform can create larger basic blocks, so do this 1348c3ddc13dSArthur Eubanks // before SLP vectorization. 1349c3ddc13dSArthur Eubanks FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 1350c3ddc13dSArthur Eubanks .forwardSwitchCondToPhi(true) 1351371fcb72SRoman Lebedev .convertSwitchRangeToICmp(true) 1352c3ddc13dSArthur Eubanks .convertSwitchToLookupTable(true) 1353c3ddc13dSArthur Eubanks .needCanonicalLoops(false) 1354c3ddc13dSArthur Eubanks .hoistCommonInsts(true) 1355c3ddc13dSArthur Eubanks .sinkCommonInsts(true))); 1356c3ddc13dSArthur Eubanks 1357c3ddc13dSArthur Eubanks if (IsFullLTO) { 1358c3ddc13dSArthur Eubanks FPM.addPass(SCCPPass()); 1359c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 1360c3ddc13dSArthur Eubanks FPM.addPass(BDCEPass()); 1361c3ddc13dSArthur Eubanks } 1362c3ddc13dSArthur Eubanks 1363c3ddc13dSArthur Eubanks // Optimize parallel scalar instruction chains into SIMD instructions. 1364c3ddc13dSArthur Eubanks if (PTO.SLPVectorization) { 1365c3ddc13dSArthur Eubanks FPM.addPass(SLPVectorizerPass()); 1366c3ddc13dSArthur Eubanks if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 1367c3ddc13dSArthur Eubanks FPM.addPass(EarlyCSEPass()); 1368c3ddc13dSArthur Eubanks } 1369c3ddc13dSArthur Eubanks } 1370c3ddc13dSArthur Eubanks // Enhance/cleanup vector code. 1371c3ddc13dSArthur Eubanks FPM.addPass(VectorCombinePass()); 1372c3ddc13dSArthur Eubanks 1373c3ddc13dSArthur Eubanks if (!IsFullLTO) { 1374c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 1375c3ddc13dSArthur Eubanks // Unroll small loops to hide loop backedge latency and saturate any 1376c3ddc13dSArthur Eubanks // parallel execution resources of an out-of-order processor. We also then 1377c3ddc13dSArthur Eubanks // need to clean up redundancies and loop invariant code. 1378c3ddc13dSArthur Eubanks // FIXME: It would be really good to use a loop-integrated instruction 1379c3ddc13dSArthur Eubanks // combiner for cleanup here so that the unrolling and LICM can be pipelined 1380c3ddc13dSArthur Eubanks // across the loop nests. 1381c3ddc13dSArthur Eubanks // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 1382c3ddc13dSArthur Eubanks if (EnableUnrollAndJam && PTO.LoopUnrolling) { 1383c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor( 1384c3ddc13dSArthur Eubanks LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 1385c3ddc13dSArthur Eubanks } 1386c3ddc13dSArthur Eubanks FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 1387c3ddc13dSArthur Eubanks Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 1388c3ddc13dSArthur Eubanks PTO.ForgetAllSCEVInLoopUnroll))); 1389c3ddc13dSArthur Eubanks FPM.addPass(WarnMissedTransformationsPass()); 13908adfa297SRoman Lebedev // Now that we are done with loop unrolling, be it either by LoopVectorizer, 13918adfa297SRoman Lebedev // or LoopUnroll passes, some variable-offset GEP's into alloca's could have 13928adfa297SRoman Lebedev // become constant-offset, thus enabling SROA and alloca promotion. Do so. 13934f7e5d22SRoman Lebedev // NOTE: we are very late in the pipeline, and we don't have any LICM 13944f7e5d22SRoman Lebedev // or SimplifyCFG passes scheduled after us, that would cleanup 13954f7e5d22SRoman Lebedev // the CFG mess this may created if allowed to modify CFG, so forbid that. 13964f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); 1397905083f3SDavid Sherwood } 1398905083f3SDavid Sherwood 13990f152a55SDhruv Chawla FPM.addPass(InferAlignmentPass()); 1400ef6f2353SSanjay Patel FPM.addPass(InstCombinePass()); 1401905083f3SDavid Sherwood 1402905083f3SDavid Sherwood // This is needed for two reasons: 1403905083f3SDavid Sherwood // 1. It works around problems that instcombine introduces, such as sinking 1404905083f3SDavid Sherwood // expensive FP divides into loops containing multiplications using the 1405905083f3SDavid Sherwood // divide result. 1406905083f3SDavid Sherwood // 2. It helps to clean up some loop-invariant code created by the loop 1407905083f3SDavid Sherwood // unroll pass when IsFullLTO=false. 1408c3ddc13dSArthur Eubanks FPM.addPass(createFunctionToLoopPassAdaptor( 1409d9da6a53SWilliam S. Moses LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1410d9da6a53SWilliam S. Moses /*AllowSpeculation=*/true), 1411384a8dd1SNikita Popov /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); 1412c3ddc13dSArthur Eubanks 1413c3ddc13dSArthur Eubanks // Now that we've vectorized and unrolled loops, we may have more refined 1414c3ddc13dSArthur Eubanks // alignment information, try to re-derive it here. 1415c3ddc13dSArthur Eubanks FPM.addPass(AlignmentFromAssumptionsPass()); 1416c3ddc13dSArthur Eubanks } 1417c3ddc13dSArthur Eubanks 1418c3ddc13dSArthur Eubanks ModulePassManager 1419c3ddc13dSArthur Eubanks PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, 1420e0d06959SMingming Liu ThinOrFullLTOPhase LTOPhase) { 1421f5f04a52SNikita Popov const bool LTOPreLink = isLTOPreLink(LTOPhase); 1422c3ddc13dSArthur Eubanks ModulePassManager MPM; 1423c3ddc13dSArthur Eubanks 1424c3ddc13dSArthur Eubanks // Run partial inlining pass to partially inline functions that have 1425c3ddc13dSArthur Eubanks // large bodies. 1426c3ddc13dSArthur Eubanks if (RunPartialInlining) 1427c3ddc13dSArthur Eubanks MPM.addPass(PartialInlinerPass()); 1428c3ddc13dSArthur Eubanks 1429c3ddc13dSArthur Eubanks // Remove avail extern fns and globals definitions since we aren't compiling 1430c3ddc13dSArthur Eubanks // an object file for later LTO. For LTO we want to preserve these so they 1431c3ddc13dSArthur Eubanks // are eligible for inlining at link-time. Note if they are unreferenced they 1432c3ddc13dSArthur Eubanks // will be removed by GlobalDCE later, so this only impacts referenced 1433c3ddc13dSArthur Eubanks // available externally globals. Eventually they will be suppressed during 1434c3ddc13dSArthur Eubanks // codegen, but eliminating here enables more opportunity for GlobalDCE as it 1435c3ddc13dSArthur Eubanks // may make globals referenced by available external functions dead and saves 1436c3ddc13dSArthur Eubanks // running remaining passes on the eliminated functions. These should be 1437c3ddc13dSArthur Eubanks // preserved during prelinking for link-time inlining decisions. 1438c3ddc13dSArthur Eubanks if (!LTOPreLink) 1439c3ddc13dSArthur Eubanks MPM.addPass(EliminateAvailableExternallyPass()); 1440c3ddc13dSArthur Eubanks 1441c3ddc13dSArthur Eubanks if (EnableOrderFileInstrumentation) 1442c3ddc13dSArthur Eubanks MPM.addPass(InstrOrderFilePass()); 1443c3ddc13dSArthur Eubanks 1444c3ddc13dSArthur Eubanks // Do RPO function attribute inference across the module to forward-propagate 1445c3ddc13dSArthur Eubanks // attributes where applicable. 1446c3ddc13dSArthur Eubanks // FIXME: Is this really an optimization rather than a canonicalization? 1447c3ddc13dSArthur Eubanks MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1448c3ddc13dSArthur Eubanks 1449c3ddc13dSArthur Eubanks // Do a post inline PGO instrumentation and use pass. This is a context 1450c3ddc13dSArthur Eubanks // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as 1451c3ddc13dSArthur Eubanks // cross-module inline has not been done yet. The context sensitive 1452c3ddc13dSArthur Eubanks // instrumentation is after all the inlines are done. 1453c3ddc13dSArthur Eubanks if (!LTOPreLink && PGOOpt) { 1454c3ddc13dSArthur Eubanks if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 1455611ce241SQiongsi Wu addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, 1456611ce241SQiongsi Wu /*IsCS=*/true, PGOOpt->AtomicCounterUpdate, 1457611ce241SQiongsi Wu PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile, 145887f5e229STeresa Johnson PGOOpt->FS); 1459c3ddc13dSArthur Eubanks else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 1460611ce241SQiongsi Wu addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, 1461611ce241SQiongsi Wu /*IsCS=*/true, PGOOpt->AtomicCounterUpdate, 1462611ce241SQiongsi Wu PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, 146387f5e229STeresa Johnson PGOOpt->FS); 1464c3ddc13dSArthur Eubanks } 1465c3ddc13dSArthur Eubanks 14664fc7c55fSArthur Eubanks // Re-compute GlobalsAA here prior to function passes. This is particularly 1467c3ddc13dSArthur Eubanks // useful as the above will have inlined, DCE'ed, and function-attr 1468c3ddc13dSArthur Eubanks // propagated everything. We should at this point have a reasonably minimal 1469c3ddc13dSArthur Eubanks // and richly annotated call graph. By computing aliasing and mod/ref 1470c3ddc13dSArthur Eubanks // information for all local globals here, the late loop passes and notably 1471c3ddc13dSArthur Eubanks // the vectorizer will be able to use them to help recognize vectorizable 1472c3ddc13dSArthur Eubanks // memory operations. 1473281ae490SNuno Lopes if (EnableGlobalAnalyses) 14744fc7c55fSArthur Eubanks MPM.addPass(RecomputeGlobalsAAPass()); 1475c3ddc13dSArthur Eubanks 1476390300d9SShilei Tian invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase); 14770bda12b5SWenju He 1478c3ddc13dSArthur Eubanks FunctionPassManager OptimizePM; 1479f3c417f3Slcvon007 // Scheduling LoopVersioningLICM when inlining is over, because after that 1480f3c417f3Slcvon007 // we may see more accurate aliasing. Reason to run this late is that too 1481f3c417f3Slcvon007 // early versioning may prevent further inlining due to increase of code 1482f3c417f3Slcvon007 // size. Other optimizations which runs later might get benefit of no-alias 1483f3c417f3Slcvon007 // assumption in clone loop. 1484f3c417f3Slcvon007 if (UseLoopVersioningLICM) { 1485f3c417f3Slcvon007 OptimizePM.addPass( 1486f3c417f3Slcvon007 createFunctionToLoopPassAdaptor(LoopVersioningLICMPass())); 1487f3c417f3Slcvon007 // LoopVersioningLICM pass might increase new LICM opportunities. 1488f3c417f3Slcvon007 OptimizePM.addPass(createFunctionToLoopPassAdaptor( 1489f3c417f3Slcvon007 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1490f3c417f3Slcvon007 /*AllowSpeculation=*/true), 1491f3c417f3Slcvon007 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); 1492f3c417f3Slcvon007 } 1493f3c417f3Slcvon007 1494c3ddc13dSArthur Eubanks OptimizePM.addPass(Float2IntPass()); 1495c3ddc13dSArthur Eubanks OptimizePM.addPass(LowerConstantIntrinsicsPass()); 1496c3ddc13dSArthur Eubanks 1497c3ddc13dSArthur Eubanks if (EnableMatrix) { 1498c3ddc13dSArthur Eubanks OptimizePM.addPass(LowerMatrixIntrinsicsPass()); 1499c3ddc13dSArthur Eubanks OptimizePM.addPass(EarlyCSEPass()); 1500c3ddc13dSArthur Eubanks } 1501c3ddc13dSArthur Eubanks 150266673166SRong Xu // CHR pass should only be applied with the profile information. 150366673166SRong Xu // The check is to check the profile summary information in CHR. 150466673166SRong Xu if (EnableCHR && Level == OptimizationLevel::O3) 150566673166SRong Xu OptimizePM.addPass(ControlHeightReductionPass()); 150666673166SRong Xu 1507c3ddc13dSArthur Eubanks // FIXME: We need to run some loop optimizations to re-rotate loops after 1508c3ddc13dSArthur Eubanks // simplifycfg and others undo their rotation. 1509c3ddc13dSArthur Eubanks 1510c3ddc13dSArthur Eubanks // Optimize the loop execution. These passes operate on entire loop nests 1511c3ddc13dSArthur Eubanks // rather than on each loop in an inside-out manner, and so they are actually 1512c3ddc13dSArthur Eubanks // function passes. 1513c3ddc13dSArthur Eubanks 151433817296SPrem Chintalapudi invokeVectorizerStartEPCallbacks(OptimizePM, Level); 1515c3ddc13dSArthur Eubanks 15169c2469c1SRoman Lebedev LoopPassManager LPM; 1517c3ddc13dSArthur Eubanks // First rotate loops that may have been un-rotated by prior passes. 1518c3ddc13dSArthur Eubanks // Disable header duplication at -Oz. 15192fef6853SPaul Kirth LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication || 15202fef6853SPaul Kirth Level != OptimizationLevel::Oz, 15212fef6853SPaul Kirth LTOPreLink)); 15229c2469c1SRoman Lebedev // Some loops may have become dead by now. Try to delete them. 15235c7e783eSArthur Eubanks // FIXME: see discussion in https://reviews.llvm.org/D112851, 15245c7e783eSArthur Eubanks // this may need to be revisited once we run GVN before loop deletion 15255c7e783eSArthur Eubanks // in the simplification pipeline. 15269c2469c1SRoman Lebedev LPM.addPass(LoopDeletionPass()); 1527c3ddc13dSArthur Eubanks OptimizePM.addPass(createFunctionToLoopPassAdaptor( 15289c2469c1SRoman Lebedev std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); 1529c3ddc13dSArthur Eubanks 1530c3ddc13dSArthur Eubanks // Distribute loops to allow partial vectorization. I.e. isolate dependences 1531c3ddc13dSArthur Eubanks // into separate loop that would otherwise inhibit vectorization. This is 1532c3ddc13dSArthur Eubanks // currently only performed for loops marked with the metadata 1533c3ddc13dSArthur Eubanks // llvm.loop.distribute=true or when -enable-loop-distribute is specified. 1534c3ddc13dSArthur Eubanks OptimizePM.addPass(LoopDistributePass()); 1535c3ddc13dSArthur Eubanks 1536c3ddc13dSArthur Eubanks // Populates the VFABI attribute with the scalar-to-vector mappings 1537c3ddc13dSArthur Eubanks // from the TargetLibraryInfo. 1538c3ddc13dSArthur Eubanks OptimizePM.addPass(InjectTLIMappings()); 1539c3ddc13dSArthur Eubanks 1540c3ddc13dSArthur Eubanks addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); 1541c3ddc13dSArthur Eubanks 1542*d3161defSAxel Sorenson invokeVectorizerEndEPCallbacks(OptimizePM, Level); 1543*d3161defSAxel Sorenson 1544c3ddc13dSArthur Eubanks // LoopSink pass sinks instructions hoisted by LICM, which serves as a 1545c3ddc13dSArthur Eubanks // canonicalization pass that enables other optimizations. As a result, 1546c3ddc13dSArthur Eubanks // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 1547c3ddc13dSArthur Eubanks // result too early. 1548c3ddc13dSArthur Eubanks OptimizePM.addPass(LoopSinkPass()); 1549c3ddc13dSArthur Eubanks 1550c3ddc13dSArthur Eubanks // And finally clean up LCSSA form before generating code. 1551c3ddc13dSArthur Eubanks OptimizePM.addPass(InstSimplifyPass()); 1552c3ddc13dSArthur Eubanks 1553c3ddc13dSArthur Eubanks // This hoists/decomposes div/rem ops. It should run after other sink/hoist 1554c3ddc13dSArthur Eubanks // passes to avoid re-sinking, but before SimplifyCFG because it can allow 1555c3ddc13dSArthur Eubanks // flattening of blocks. 1556c3ddc13dSArthur Eubanks OptimizePM.addPass(DivRemPairsPass()); 1557c3ddc13dSArthur Eubanks 1558bfb9b8e0SSanjay Patel // Try to annotate calls that were created during optimization. 1559bfb9b8e0SSanjay Patel OptimizePM.addPass(TailCallElimPass()); 1560bfb9b8e0SSanjay Patel 1561c3ddc13dSArthur Eubanks // LoopSink (and other loop passes since the last simplifyCFG) might have 1562c3ddc13dSArthur Eubanks // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. 156387c86aa6SShengchen Kan OptimizePM.addPass( 156487c86aa6SShengchen Kan SimplifyCFGPass(SimplifyCFGOptions() 15653d494bfcSTianqing Wang .convertSwitchRangeToICmp(true) 156687c86aa6SShengchen Kan .speculateUnpredictables(true) 156787c86aa6SShengchen Kan .hoistLoadsStoresWithCondFaulting(true))); 1568c3ddc13dSArthur Eubanks 1569c3ddc13dSArthur Eubanks // Add the core optimizing pipeline. 15707175886aSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), 15717175886aSArthur Eubanks PTO.EagerlyInvalidateAnalyses)); 1572c3ddc13dSArthur Eubanks 1573390300d9SShilei Tian invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase); 1574c3ddc13dSArthur Eubanks 1575ae7f4680SNikita Popov // Split out cold code. Splitting is done late to avoid hiding context from 1576ae7f4680SNikita Popov // other optimizations and inadvertently regressing performance. The tradeoff 1577ae7f4680SNikita Popov // is that this has a higher code size cost than splitting early. 1578ae7f4680SNikita Popov if (EnableHotColdSplit && !LTOPreLink) 1579ae7f4680SNikita Popov MPM.addPass(HotColdSplittingPass()); 1580ae7f4680SNikita Popov 1581ae7f4680SNikita Popov // Search the code for similar regions of code. If enough similar regions can 1582ae7f4680SNikita Popov // be found where extracting the regions into their own function will decrease 1583ae7f4680SNikita Popov // the size of the program, we extract the regions, a deduplicate the 1584ae7f4680SNikita Popov // structurally similar regions. 1585ae7f4680SNikita Popov if (EnableIROutliner) 1586ae7f4680SNikita Popov MPM.addPass(IROutlinerPass()); 1587ae7f4680SNikita Popov 1588c3ddc13dSArthur Eubanks // Now we need to do some global optimization transforms. 1589c3ddc13dSArthur Eubanks // FIXME: It would seem like these should come first in the optimization 1590c3ddc13dSArthur Eubanks // pipeline and maybe be the bottom of the canonicalization pipeline? Weird 1591c3ddc13dSArthur Eubanks // ordering here. 1592c3ddc13dSArthur Eubanks MPM.addPass(GlobalDCEPass()); 1593c3ddc13dSArthur Eubanks MPM.addPass(ConstantMergePass()); 1594c3ddc13dSArthur Eubanks 15955d791109SYAMAMOTO Takashi // Merge functions if requested. It has a better chance to merge functions 15965d791109SYAMAMOTO Takashi // after ConstantMerge folded jump tables. 15975d791109SYAMAMOTO Takashi if (PTO.MergeFunctions) 15985d791109SYAMAMOTO Takashi MPM.addPass(MergeFunctionsPass()); 15995d791109SYAMAMOTO Takashi 1600325e7e8bSBen Dunbobbin if (PTO.CallGraphProfile && !LTOPreLink) 16015ce28684SMingming Liu MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || 16025ce28684SMingming Liu LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink)); 1603325e7e8bSBen Dunbobbin 160438902153Sgulfemsavrun // RelLookupTableConverterPass runs later in LTO post-link pipeline. 1605c3ddc13dSArthur Eubanks if (!LTOPreLink) 1606c3ddc13dSArthur Eubanks MPM.addPass(RelLookupTableConverterPass()); 1607c3ddc13dSArthur Eubanks 1608c3ddc13dSArthur Eubanks return MPM; 1609c3ddc13dSArthur Eubanks } 1610c3ddc13dSArthur Eubanks 1611c3ddc13dSArthur Eubanks ModulePassManager 1612c3ddc13dSArthur Eubanks PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, 16135445edb5SShilei Tian ThinOrFullLTOPhase Phase) { 1614a8f6b576SNikita Popov if (Level == OptimizationLevel::O0) 16155445edb5SShilei Tian return buildO0DefaultPipeline(Level, Phase); 1616c3ddc13dSArthur Eubanks 1617c3ddc13dSArthur Eubanks ModulePassManager MPM; 1618c3ddc13dSArthur Eubanks 1619c3ddc13dSArthur Eubanks // Convert @llvm.global.annotations to !annotation metadata. 1620c3ddc13dSArthur Eubanks MPM.addPass(Annotation2MetadataPass()); 1621c3ddc13dSArthur Eubanks 1622c3ddc13dSArthur Eubanks // Force any function attributes we want the rest of the pipeline to observe. 1623c3ddc13dSArthur Eubanks MPM.addPass(ForceFunctionAttrsPass()); 1624c3ddc13dSArthur Eubanks 1625271853c6SNikita Popov if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1626271853c6SNikita Popov MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1627271853c6SNikita Popov 1628c3ddc13dSArthur Eubanks // Apply module pipeline start EP callback. 162933817296SPrem Chintalapudi invokePipelineStartEPCallbacks(MPM, Level); 1630c3ddc13dSArthur Eubanks 1631c3ddc13dSArthur Eubanks // Add the core simplification pipeline. 16325445edb5SShilei Tian MPM.addPass(buildModuleSimplificationPipeline(Level, Phase)); 1633c3ddc13dSArthur Eubanks 1634c3ddc13dSArthur Eubanks // Now add the optimization pipeline. 16355445edb5SShilei Tian MPM.addPass(buildModuleOptimizationPipeline(Level, Phase)); 1636c3ddc13dSArthur Eubanks 163742ad7e1bSHongtao Yu if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 163842ad7e1bSHongtao Yu PGOOpt->Action == PGOOptions::SampleUse) 1639c3ddc13dSArthur Eubanks MPM.addPass(PseudoProbeUpdatePass()); 1640c3ddc13dSArthur Eubanks 1641c3ddc13dSArthur Eubanks // Emit annotation remarks. 1642c3ddc13dSArthur Eubanks addAnnotationRemarksPass(MPM); 1643c3ddc13dSArthur Eubanks 16445445edb5SShilei Tian if (isLTOPreLink(Phase)) 1645c3ddc13dSArthur Eubanks addRequiredLTOPreLinkPasses(MPM); 164675a17970SPaul Kirth return MPM; 164775a17970SPaul Kirth } 1648c3ddc13dSArthur Eubanks 164975a17970SPaul Kirth ModulePassManager 16509d476e1eSPaul Kirth PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, 16519d476e1eSPaul Kirth bool EmitSummary) { 165275a17970SPaul Kirth ModulePassManager MPM; 16539d476e1eSPaul Kirth if (ThinLTO) 1654cfe1ece8SPaul Kirth MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level)); 16559d476e1eSPaul Kirth else 16569d476e1eSPaul Kirth MPM.addPass(buildLTOPreLinkDefaultPipeline(Level)); 16579d476e1eSPaul Kirth MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary)); 1658cfe1ece8SPaul Kirth 1659913cd11fSPaul Kirth // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the 1660913cd11fSPaul Kirth // object code, only in the bitcode section, so drop it before we run 1661913cd11fSPaul Kirth // module optimization and generate machine code. If llvm.type.test() isn't in 1662913cd11fSPaul Kirth // the IR, this won't do anything. 1663913cd11fSPaul Kirth MPM.addPass( 1664913cd11fSPaul Kirth LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All)); 1665913cd11fSPaul Kirth 16669d476e1eSPaul Kirth // Use the ThinLTO post-link pipeline with sample profiling 16679d476e1eSPaul Kirth if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) 1668cfe1ece8SPaul Kirth MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr)); 1669cfe1ece8SPaul Kirth else { 1670cfe1ece8SPaul Kirth // otherwise, just use module optimization 1671cfe1ece8SPaul Kirth MPM.addPass( 1672cfe1ece8SPaul Kirth buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None)); 1673cfe1ece8SPaul Kirth // Emit annotation remarks. 1674cfe1ece8SPaul Kirth addAnnotationRemarksPass(MPM); 1675cfe1ece8SPaul Kirth } 1676c3ddc13dSArthur Eubanks return MPM; 1677c3ddc13dSArthur Eubanks } 1678c3ddc13dSArthur Eubanks 1679c3ddc13dSArthur Eubanks ModulePassManager 1680c3ddc13dSArthur Eubanks PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1681a8f6b576SNikita Popov if (Level == OptimizationLevel::O0) 16825445edb5SShilei Tian return buildO0DefaultPipeline(Level, ThinOrFullLTOPhase::ThinLTOPreLink); 1683c3ddc13dSArthur Eubanks 1684c3ddc13dSArthur Eubanks ModulePassManager MPM; 1685c3ddc13dSArthur Eubanks 1686c3ddc13dSArthur Eubanks // Convert @llvm.global.annotations to !annotation metadata. 1687c3ddc13dSArthur Eubanks MPM.addPass(Annotation2MetadataPass()); 1688c3ddc13dSArthur Eubanks 1689c3ddc13dSArthur Eubanks // Force any function attributes we want the rest of the pipeline to observe. 1690c3ddc13dSArthur Eubanks MPM.addPass(ForceFunctionAttrsPass()); 1691c3ddc13dSArthur Eubanks 1692c3ddc13dSArthur Eubanks if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1693c3ddc13dSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1694c3ddc13dSArthur Eubanks 1695c3ddc13dSArthur Eubanks // Apply module pipeline start EP callback. 169633817296SPrem Chintalapudi invokePipelineStartEPCallbacks(MPM, Level); 1697c3ddc13dSArthur Eubanks 1698c3ddc13dSArthur Eubanks // If we are planning to perform ThinLTO later, we don't bloat the code with 1699c3ddc13dSArthur Eubanks // unrolling/vectorization/... now. Just simplify the module as much as we 1700c3ddc13dSArthur Eubanks // can. 1701c3ddc13dSArthur Eubanks MPM.addPass(buildModuleSimplificationPipeline( 1702c3ddc13dSArthur Eubanks Level, ThinOrFullLTOPhase::ThinLTOPreLink)); 1703ba4da5a0SMircea Trofin // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let 1704ba4da5a0SMircea Trofin // thinlto use the contextual info to perform imports; then use the contextual 1705ba4da5a0SMircea Trofin // profile in the post-thinlink phase. 17069c0ba620SMircea Trofin if (!UseCtxProfile.empty()) { 17074a2bf059SMircea Trofin addRequiredLTOPreLinkPasses(MPM); 1708ba4da5a0SMircea Trofin return MPM; 17094a2bf059SMircea Trofin } 1710c3ddc13dSArthur Eubanks 1711c3ddc13dSArthur Eubanks // Run partial inlining pass to partially inline functions that have 1712c3ddc13dSArthur Eubanks // large bodies. 1713c3ddc13dSArthur Eubanks // FIXME: It isn't clear whether this is really the right place to run this 1714c3ddc13dSArthur Eubanks // in ThinLTO. Because there is another canonicalization and simplification 1715c3ddc13dSArthur Eubanks // phase that will run after the thin link, running this here ends up with 1716c3ddc13dSArthur Eubanks // less information than will be available later and it may grow functions in 1717c3ddc13dSArthur Eubanks // ways that aren't beneficial. 1718c3ddc13dSArthur Eubanks if (RunPartialInlining) 1719c3ddc13dSArthur Eubanks MPM.addPass(PartialInlinerPass()); 1720c3ddc13dSArthur Eubanks 172142ad7e1bSHongtao Yu if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 172242ad7e1bSHongtao Yu PGOOpt->Action == PGOOptions::SampleUse) 1723c3ddc13dSArthur Eubanks MPM.addPass(PseudoProbeUpdatePass()); 1724c3ddc13dSArthur Eubanks 1725181d4081SVitaly Buka // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual 1726181d4081SVitaly Buka // optimization is going to be done in PostLink stage, but clang can't add 1727181d4081SVitaly Buka // callbacks there in case of in-process ThinLTO called by linker. 1728390300d9SShilei Tian invokeOptimizerEarlyEPCallbacks(MPM, Level, 1729390300d9SShilei Tian /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); 1730390300d9SShilei Tian invokeOptimizerLastEPCallbacks(MPM, Level, 1731390300d9SShilei Tian /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); 1732c3ddc13dSArthur Eubanks 1733c3ddc13dSArthur Eubanks // Emit annotation remarks. 1734c3ddc13dSArthur Eubanks addAnnotationRemarksPass(MPM); 1735c3ddc13dSArthur Eubanks 1736c3ddc13dSArthur Eubanks addRequiredLTOPreLinkPasses(MPM); 1737c3ddc13dSArthur Eubanks 1738c3ddc13dSArthur Eubanks return MPM; 1739c3ddc13dSArthur Eubanks } 1740c3ddc13dSArthur Eubanks 1741c3ddc13dSArthur Eubanks ModulePassManager PassBuilder::buildThinLTODefaultPipeline( 1742c3ddc13dSArthur Eubanks OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { 1743c3ddc13dSArthur Eubanks ModulePassManager MPM; 1744c3ddc13dSArthur Eubanks 1745c3ddc13dSArthur Eubanks if (ImportSummary) { 1746cfad2d3aSTeresa Johnson // For ThinLTO we must apply the context disambiguation decisions early, to 1747cfad2d3aSTeresa Johnson // ensure we can correctly match the callsites to summary data. 1748cfad2d3aSTeresa Johnson if (EnableMemProfContextDisambiguation) 17491de71652STeresa Johnson MPM.addPass(MemProfContextDisambiguation( 17501de71652STeresa Johnson ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); 1751cfad2d3aSTeresa Johnson 1752c3ddc13dSArthur Eubanks // These passes import type identifier resolutions for whole-program 1753c3ddc13dSArthur Eubanks // devirtualization and CFI. They must run early because other passes may 1754c3ddc13dSArthur Eubanks // disturb the specific instruction patterns that these passes look for, 1755c3ddc13dSArthur Eubanks // creating dependencies on resolutions that may not appear in the summary. 1756c3ddc13dSArthur Eubanks // 1757c3ddc13dSArthur Eubanks // For example, GVN may transform the pattern assume(type.test) appearing in 1758c3ddc13dSArthur Eubanks // two basic blocks into assume(phi(type.test, type.test)), which would 1759c3ddc13dSArthur Eubanks // transform a dependency on a WPD resolution into a dependency on a type 1760c3ddc13dSArthur Eubanks // identifier resolution for CFI. 1761c3ddc13dSArthur Eubanks // 1762c3ddc13dSArthur Eubanks // Also, WPD has access to more precise information than ICP and can 1763c3ddc13dSArthur Eubanks // devirtualize more effectively, so it should operate on the IR first. 1764c3ddc13dSArthur Eubanks // 1765c3ddc13dSArthur Eubanks // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1766c3ddc13dSArthur Eubanks // metadata and intrinsics. 1767c3ddc13dSArthur Eubanks MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); 1768c3ddc13dSArthur Eubanks MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); 1769c3ddc13dSArthur Eubanks } 1770c3ddc13dSArthur Eubanks 1771c3ddc13dSArthur Eubanks if (Level == OptimizationLevel::O0) { 1772c3ddc13dSArthur Eubanks // Run a second time to clean up any type tests left behind by WPD for use 1773c3ddc13dSArthur Eubanks // in ICP. 1774b01e2a8bSPaul Kirth MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, 1775b01e2a8bSPaul Kirth lowertypetests::DropTestKind::Assume)); 1776c3ddc13dSArthur Eubanks // Drop available_externally and unreferenced globals. This is necessary 1777c3ddc13dSArthur Eubanks // with ThinLTO in order to avoid leaving undefined references to dead 1778c3ddc13dSArthur Eubanks // globals in the object file. 1779c3ddc13dSArthur Eubanks MPM.addPass(EliminateAvailableExternallyPass()); 1780c3ddc13dSArthur Eubanks MPM.addPass(GlobalDCEPass()); 1781c3ddc13dSArthur Eubanks return MPM; 1782c3ddc13dSArthur Eubanks } 17833b226180SMircea Trofin if (!UseCtxProfile.empty()) { 17843b226180SMircea Trofin MPM.addPass( 17853b226180SMircea Trofin buildModuleInlinerPipeline(Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 17863b226180SMircea Trofin } else { 1787c3ddc13dSArthur Eubanks // Add the core simplification pipeline. 1788c3ddc13dSArthur Eubanks MPM.addPass(buildModuleSimplificationPipeline( 1789c3ddc13dSArthur Eubanks Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 17903b226180SMircea Trofin } 1791c3ddc13dSArthur Eubanks // Now add the optimization pipeline. 1792e0d06959SMingming Liu MPM.addPass(buildModuleOptimizationPipeline( 1793e0d06959SMingming Liu Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 1794c3ddc13dSArthur Eubanks 1795c3ddc13dSArthur Eubanks // Emit annotation remarks. 1796c3ddc13dSArthur Eubanks addAnnotationRemarksPass(MPM); 1797c3ddc13dSArthur Eubanks 1798c3ddc13dSArthur Eubanks return MPM; 1799c3ddc13dSArthur Eubanks } 1800c3ddc13dSArthur Eubanks 1801c3ddc13dSArthur Eubanks ModulePassManager 1802c3ddc13dSArthur Eubanks PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1803c3ddc13dSArthur Eubanks // FIXME: We should use a customized pre-link pipeline! 1804c3ddc13dSArthur Eubanks return buildPerModuleDefaultPipeline(Level, 18055445edb5SShilei Tian ThinOrFullLTOPhase::FullLTOPreLink); 1806c3ddc13dSArthur Eubanks } 1807c3ddc13dSArthur Eubanks 1808c3ddc13dSArthur Eubanks ModulePassManager 1809c3ddc13dSArthur Eubanks PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, 1810c3ddc13dSArthur Eubanks ModuleSummaryIndex *ExportSummary) { 1811c3ddc13dSArthur Eubanks ModulePassManager MPM; 1812c3ddc13dSArthur Eubanks 181333817296SPrem Chintalapudi invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level); 1814942efa59SElia Geretto 1815c3ddc13dSArthur Eubanks // Create a function that performs CFI checks for cross-DSO calls with targets 1816c3ddc13dSArthur Eubanks // in the current module. 1817c3ddc13dSArthur Eubanks MPM.addPass(CrossDSOCFIPass()); 1818c3ddc13dSArthur Eubanks 1819c3ddc13dSArthur Eubanks if (Level == OptimizationLevel::O0) { 1820c3ddc13dSArthur Eubanks // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1821c3ddc13dSArthur Eubanks // metadata and intrinsics. 1822c3ddc13dSArthur Eubanks MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1823c3ddc13dSArthur Eubanks MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1824c3ddc13dSArthur Eubanks // Run a second time to clean up any type tests left behind by WPD for use 1825c3ddc13dSArthur Eubanks // in ICP. 1826b01e2a8bSPaul Kirth MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, 1827b01e2a8bSPaul Kirth lowertypetests::DropTestKind::Assume)); 1828c3ddc13dSArthur Eubanks 182933817296SPrem Chintalapudi invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); 1830942efa59SElia Geretto 1831c3ddc13dSArthur Eubanks // Emit annotation remarks. 1832c3ddc13dSArthur Eubanks addAnnotationRemarksPass(MPM); 1833c3ddc13dSArthur Eubanks 1834c3ddc13dSArthur Eubanks return MPM; 1835c3ddc13dSArthur Eubanks } 1836c3ddc13dSArthur Eubanks 1837c3ddc13dSArthur Eubanks if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { 1838c3ddc13dSArthur Eubanks // Load sample profile before running the LTO optimization pipeline. 1839c3ddc13dSArthur Eubanks MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 1840c3ddc13dSArthur Eubanks PGOOpt->ProfileRemappingFile, 1841c3ddc13dSArthur Eubanks ThinOrFullLTOPhase::FullLTOPostLink)); 1842c3ddc13dSArthur Eubanks // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 1843c3ddc13dSArthur Eubanks // RequireAnalysisPass for PSI before subsequent non-module passes. 1844c3ddc13dSArthur Eubanks MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 1845c3ddc13dSArthur Eubanks } 1846c3ddc13dSArthur Eubanks 18479d3a4757SJoseph Huber // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. 18480bdde9dfSJoseph Huber MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); 18499d3a4757SJoseph Huber 1850c3ddc13dSArthur Eubanks // Remove unused virtual tables to improve the quality of code generated by 1851c3ddc13dSArthur Eubanks // whole-program devirtualization and bitset lowering. 1852200cc952STeresa Johnson MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); 1853c3ddc13dSArthur Eubanks 1854c3ddc13dSArthur Eubanks // Do basic inference of function attributes from known properties of system 1855c3ddc13dSArthur Eubanks // libraries and other oracles. 1856c3ddc13dSArthur Eubanks MPM.addPass(InferFunctionAttrsPass()); 1857c3ddc13dSArthur Eubanks 1858c3ddc13dSArthur Eubanks if (Level.getSpeedupLevel() > 1) { 18597175886aSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor( 186018da6810SArthur Eubanks CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses)); 1861c3ddc13dSArthur Eubanks 1862c3ddc13dSArthur Eubanks // Indirect call promotion. This should promote all the targets that are 1863c3ddc13dSArthur Eubanks // left by the earlier promotion pass that promotes intra-module targets. 1864c3ddc13dSArthur Eubanks // This two-step promotion is to save the compile time. For LTO, it should 1865c3ddc13dSArthur Eubanks // produce the same result as if we only do promotion here. 1866c3ddc13dSArthur Eubanks MPM.addPass(PGOIndirectCallPromotion( 1867c3ddc13dSArthur Eubanks true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); 1868c3ddc13dSArthur Eubanks 1869fbd89bccSHari Limaye // Promoting by-reference arguments to by-value exposes more constants to 1870fbd89bccSHari Limaye // IPSCCP. 1871fbd89bccSHari Limaye CGSCCPassManager CGPM; 1872fbd89bccSHari Limaye CGPM.addPass(PostOrderFunctionAttrsPass()); 1873fbd89bccSHari Limaye CGPM.addPass(ArgumentPromotionPass()); 1874fbd89bccSHari Limaye CGPM.addPass( 1875fbd89bccSHari Limaye createCGSCCToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG))); 1876fbd89bccSHari Limaye MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 1877fbd89bccSHari Limaye 1878c3ddc13dSArthur Eubanks // Propagate constants at call sites into the functions they call. This 1879c3ddc13dSArthur Eubanks // opens opportunities for globalopt (and inlining) by substituting function 1880c3ddc13dSArthur Eubanks // pointers passed as arguments to direct uses of functions. 1881572a757fSAlexandros Lamprineas MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ 1882572a757fSAlexandros Lamprineas Level != OptimizationLevel::Os && 1883572a757fSAlexandros Lamprineas Level != OptimizationLevel::Oz))); 1884c3ddc13dSArthur Eubanks 1885c3ddc13dSArthur Eubanks // Attach metadata to indirect call sites indicating the set of functions 1886c3ddc13dSArthur Eubanks // they may target at run-time. This should follow IPSCCP. 1887c3ddc13dSArthur Eubanks MPM.addPass(CalledValuePropagationPass()); 1888c3ddc13dSArthur Eubanks } 1889c3ddc13dSArthur Eubanks 1890c3ddc13dSArthur Eubanks // Do RPO function attribute inference across the module to forward-propagate 1891c3ddc13dSArthur Eubanks // attributes where applicable. 1892c3ddc13dSArthur Eubanks // FIXME: Is this really an optimization rather than a canonicalization? 1893c3ddc13dSArthur Eubanks MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1894c3ddc13dSArthur Eubanks 1895c3ddc13dSArthur Eubanks // Use in-range annotations on GEP indices to split globals where beneficial. 1896c3ddc13dSArthur Eubanks MPM.addPass(GlobalSplitPass()); 1897c3ddc13dSArthur Eubanks 1898c3ddc13dSArthur Eubanks // Run whole program optimization of virtual call when the list of callees 1899c3ddc13dSArthur Eubanks // is fixed. 1900c3ddc13dSArthur Eubanks MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1901c3ddc13dSArthur Eubanks 1902c3ddc13dSArthur Eubanks // Stop here at -O1. 1903c3ddc13dSArthur Eubanks if (Level == OptimizationLevel::O1) { 1904c3ddc13dSArthur Eubanks // The LowerTypeTestsPass needs to run to lower type metadata and the 1905c3ddc13dSArthur Eubanks // type.test intrinsics. The pass does nothing if CFI is disabled. 1906c3ddc13dSArthur Eubanks MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1907c3ddc13dSArthur Eubanks // Run a second time to clean up any type tests left behind by WPD for use 1908c3ddc13dSArthur Eubanks // in ICP (which is performed earlier than this in the regular LTO 1909c3ddc13dSArthur Eubanks // pipeline). 1910b01e2a8bSPaul Kirth MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, 1911b01e2a8bSPaul Kirth lowertypetests::DropTestKind::Assume)); 1912c3ddc13dSArthur Eubanks 191333817296SPrem Chintalapudi invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); 1914942efa59SElia Geretto 1915c3ddc13dSArthur Eubanks // Emit annotation remarks. 1916c3ddc13dSArthur Eubanks addAnnotationRemarksPass(MPM); 1917c3ddc13dSArthur Eubanks 1918c3ddc13dSArthur Eubanks return MPM; 1919c3ddc13dSArthur Eubanks } 1920c3ddc13dSArthur Eubanks 1921c3ddc13dSArthur Eubanks // Optimize globals to try and fold them into constants. 1922c3ddc13dSArthur Eubanks MPM.addPass(GlobalOptPass()); 1923c3ddc13dSArthur Eubanks 1924c3ddc13dSArthur Eubanks // Promote any localized globals to SSA registers. 1925c3ddc13dSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); 1926c3ddc13dSArthur Eubanks 1927c3ddc13dSArthur Eubanks // Linking modules together can lead to duplicate global constant, only 1928c3ddc13dSArthur Eubanks // keep one copy of each constant. 1929c3ddc13dSArthur Eubanks MPM.addPass(ConstantMergePass()); 1930c3ddc13dSArthur Eubanks 1931fb568344SNikita Popov // Remove unused arguments from functions. 1932fb568344SNikita Popov MPM.addPass(DeadArgumentEliminationPass()); 1933fb568344SNikita Popov 1934c3ddc13dSArthur Eubanks // Reduce the code after globalopt and ipsccp. Both can open up significant 1935c3ddc13dSArthur Eubanks // simplification opportunities, and both can propagate functions through 1936c3ddc13dSArthur Eubanks // function pointers. When this happens, we often have to resolve varargs 1937c3ddc13dSArthur Eubanks // calls, etc, so let instcombine do this. 1938c3ddc13dSArthur Eubanks FunctionPassManager PeepholeFPM; 1939c34d157fSAnton Afanasyev PeepholeFPM.addPass(InstCombinePass()); 194005a2f429SDávid Bolvanský if (Level.getSpeedupLevel() > 1) 1941c3ddc13dSArthur Eubanks PeepholeFPM.addPass(AggressiveInstCombinePass()); 1942c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(PeepholeFPM, Level); 1943c3ddc13dSArthur Eubanks 19447175886aSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM), 19457175886aSArthur Eubanks PTO.EagerlyInvalidateAnalyses)); 1946c3ddc13dSArthur Eubanks 19478758091aSJoseph Huber // Lower variadic functions for supported targets prior to inlining. 19488758091aSJoseph Huber MPM.addPass(ExpandVariadicsPass(ExpandVariadicsMode::Optimize)); 19498758091aSJoseph Huber 1950c3ddc13dSArthur Eubanks // Note: historically, the PruneEH pass was run first to deduce nounwind and 1951c3ddc13dSArthur Eubanks // generally clean up exception handling overhead. It isn't clear this is 1952c3ddc13dSArthur Eubanks // valuable as the inliner doesn't currently care whether it is inlining an 1953c3ddc13dSArthur Eubanks // invoke or a call. 1954c3ddc13dSArthur Eubanks // Run the inliner now. 19551a36eaa5Sibricchi if (EnableModuleInliner) { 19561a36eaa5Sibricchi MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), 19571a36eaa5Sibricchi UseInlineAdvisor, 19581a36eaa5Sibricchi ThinOrFullLTOPhase::FullLTOPostLink)); 19591a36eaa5Sibricchi } else { 1960e0d06959SMingming Liu MPM.addPass(ModuleInlinerWrapperPass( 1961e0d06959SMingming Liu getInlineParamsFromOptLevel(Level), 1962e0d06959SMingming Liu /* MandatoryFirst */ true, 1963e0d06959SMingming Liu InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, 1964e0d06959SMingming Liu InlinePass::CGSCCInliner})); 19651a36eaa5Sibricchi } 1966c3ddc13dSArthur Eubanks 1967700cd990STeresa Johnson // Perform context disambiguation after inlining, since that would reduce the 1968700cd990STeresa Johnson // amount of additional cloning required to distinguish the allocation 1969700cd990STeresa Johnson // contexts. 1970700cd990STeresa Johnson if (EnableMemProfContextDisambiguation) 19711de71652STeresa Johnson MPM.addPass(MemProfContextDisambiguation( 19721de71652STeresa Johnson /*Summary=*/nullptr, 19731de71652STeresa Johnson PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); 1974700cd990STeresa Johnson 1975c3ddc13dSArthur Eubanks // Optimize globals again after we ran the inliner. 1976c3ddc13dSArthur Eubanks MPM.addPass(GlobalOptPass()); 1977c3ddc13dSArthur Eubanks 19786185246fSJoseph Huber // Run the OpenMPOpt pass again after global optimizations. 19796185246fSJoseph Huber MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); 19806185246fSJoseph Huber 1981c3ddc13dSArthur Eubanks // Garbage collect dead functions. 1982200cc952STeresa Johnson MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); 1983c3ddc13dSArthur Eubanks 1984c3ddc13dSArthur Eubanks // If we didn't decide to inline a function, check to see if we can 1985c3ddc13dSArthur Eubanks // transform it to pass arguments by value instead of by reference. 1986c3ddc13dSArthur Eubanks MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); 1987c3ddc13dSArthur Eubanks 1988c3ddc13dSArthur Eubanks FunctionPassManager FPM; 1989c3ddc13dSArthur Eubanks // The IPO Passes may leave cruft around. Clean up after them. 1990c3ddc13dSArthur Eubanks FPM.addPass(InstCombinePass()); 1991c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(FPM, Level); 1992c3ddc13dSArthur Eubanks 1993f3c1d926SFlorian Hahn if (EnableConstraintElimination) 1994f3c1d926SFlorian Hahn FPM.addPass(ConstraintEliminationPass()); 1995f3c1d926SFlorian Hahn 199636096c2bSArthur Eubanks FPM.addPass(JumpThreadingPass()); 1997c3ddc13dSArthur Eubanks 1998c3ddc13dSArthur Eubanks // Do a post inline PGO instrumentation and use pass. This is a context 1999c3ddc13dSArthur Eubanks // sensitive PGO pass. 2000c3ddc13dSArthur Eubanks if (PGOOpt) { 2001c3ddc13dSArthur Eubanks if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 2002611ce241SQiongsi Wu addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, 2003611ce241SQiongsi Wu /*IsCS=*/true, PGOOpt->AtomicCounterUpdate, 2004611ce241SQiongsi Wu PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile, 200587f5e229STeresa Johnson PGOOpt->FS); 2006c3ddc13dSArthur Eubanks else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 2007611ce241SQiongsi Wu addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, 2008611ce241SQiongsi Wu /*IsCS=*/true, PGOOpt->AtomicCounterUpdate, 2009611ce241SQiongsi Wu PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, 201087f5e229STeresa Johnson PGOOpt->FS); 2011c3ddc13dSArthur Eubanks } 2012c3ddc13dSArthur Eubanks 2013c3ddc13dSArthur Eubanks // Break up allocas 20144f7e5d22SRoman Lebedev FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 2015c3ddc13dSArthur Eubanks 2016c3ddc13dSArthur Eubanks // LTO provides additional opportunities for tailcall elimination due to 2017c3ddc13dSArthur Eubanks // link-time inlining, and visibility of nocapture attribute. 2018c3ddc13dSArthur Eubanks FPM.addPass(TailCallElimPass()); 2019c3ddc13dSArthur Eubanks 2020c3ddc13dSArthur Eubanks // Run a few AA driver optimizations here and now to cleanup the code. 20217175886aSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), 20227175886aSArthur Eubanks PTO.EagerlyInvalidateAnalyses)); 2023c3ddc13dSArthur Eubanks 2024c3ddc13dSArthur Eubanks MPM.addPass( 2025c3ddc13dSArthur Eubanks createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); 2026c3ddc13dSArthur Eubanks 2027c3ddc13dSArthur Eubanks // Require the GlobalsAA analysis for the module so we can query it within 2028c3ddc13dSArthur Eubanks // MainFPM. 2029281ae490SNuno Lopes if (EnableGlobalAnalyses) { 2030c3ddc13dSArthur Eubanks MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); 2031281ae490SNuno Lopes // Invalidate AAManager so it can be recreated and pick up the newly 2032281ae490SNuno Lopes // available GlobalsAA. 2033c3ddc13dSArthur Eubanks MPM.addPass( 2034c3ddc13dSArthur Eubanks createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 2035281ae490SNuno Lopes } 2036c3ddc13dSArthur Eubanks 2037c3ddc13dSArthur Eubanks FunctionPassManager MainFPM; 2038c3ddc13dSArthur Eubanks MainFPM.addPass(createFunctionToLoopPassAdaptor( 2039d9da6a53SWilliam S. Moses LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 2040d9da6a53SWilliam S. Moses /*AllowSpeculation=*/true), 2041384a8dd1SNikita Popov /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); 2042c3ddc13dSArthur Eubanks 2043c3ddc13dSArthur Eubanks if (RunNewGVN) 2044c3ddc13dSArthur Eubanks MainFPM.addPass(NewGVNPass()); 2045c3ddc13dSArthur Eubanks else 20461d8750c3SArthur Eubanks MainFPM.addPass(GVNPass()); 2047c3ddc13dSArthur Eubanks 2048c3ddc13dSArthur Eubanks // Remove dead memcpy()'s. 2049c3ddc13dSArthur Eubanks MainFPM.addPass(MemCpyOptPass()); 2050c3ddc13dSArthur Eubanks 2051c3ddc13dSArthur Eubanks // Nuke dead stores. 2052c3ddc13dSArthur Eubanks MainFPM.addPass(DSEPass()); 2053afa13ba1Sserge-sans-paille MainFPM.addPass(MoveAutoInitPass()); 2054c3ddc13dSArthur Eubanks MainFPM.addPass(MergedLoadStoreMotionPass()); 2055c3ddc13dSArthur Eubanks 2056*d3161defSAxel Sorenson invokeVectorizerStartEPCallbacks(MainFPM, Level); 2057*d3161defSAxel Sorenson 2058c3ddc13dSArthur Eubanks LoopPassManager LPM; 2059016022e5SSjoerd Meijer if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) 2060016022e5SSjoerd Meijer LPM.addPass(LoopFlattenPass()); 2061f269ec23SSjoerd Meijer LPM.addPass(IndVarSimplifyPass()); 2062f269ec23SSjoerd Meijer LPM.addPass(LoopDeletionPass()); 2063c3ddc13dSArthur Eubanks // FIXME: Add loop interchange. 2064c3ddc13dSArthur Eubanks 2065c3ddc13dSArthur Eubanks // Unroll small loops and perform peeling. 2066c3ddc13dSArthur Eubanks LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 2067c3ddc13dSArthur Eubanks /* OnlyWhenForced= */ !PTO.LoopUnrolling, 2068c3ddc13dSArthur Eubanks PTO.ForgetAllSCEVInLoopUnroll)); 2069c3ddc13dSArthur Eubanks // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. 2070c3ddc13dSArthur Eubanks // *All* loop passes must preserve it, in order to be able to use it. 2071c3ddc13dSArthur Eubanks MainFPM.addPass(createFunctionToLoopPassAdaptor( 2072c3ddc13dSArthur Eubanks std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); 2073c3ddc13dSArthur Eubanks 2074c3ddc13dSArthur Eubanks MainFPM.addPass(LoopDistributePass()); 2075c3ddc13dSArthur Eubanks 2076c3ddc13dSArthur Eubanks addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); 2077c3ddc13dSArthur Eubanks 2078*d3161defSAxel Sorenson invokeVectorizerEndEPCallbacks(MainFPM, Level); 2079*d3161defSAxel Sorenson 20809d3a4757SJoseph Huber // Run the OpenMPOpt CGSCC pass again late. 20810bdde9dfSJoseph Huber MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( 20820bdde9dfSJoseph Huber OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink))); 20839d3a4757SJoseph Huber 2084c3ddc13dSArthur Eubanks invokePeepholeEPCallbacks(MainFPM, Level); 208536096c2bSArthur Eubanks MainFPM.addPass(JumpThreadingPass()); 20867175886aSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), 20877175886aSArthur Eubanks PTO.EagerlyInvalidateAnalyses)); 2088c3ddc13dSArthur Eubanks 2089c3ddc13dSArthur Eubanks // Lower type metadata and the type.test intrinsic. This pass supports 2090c3ddc13dSArthur Eubanks // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs 2091c3ddc13dSArthur Eubanks // to be run at link time if CFI is enabled. This pass does nothing if 2092c3ddc13dSArthur Eubanks // CFI is disabled. 2093c3ddc13dSArthur Eubanks MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 2094c3ddc13dSArthur Eubanks // Run a second time to clean up any type tests left behind by WPD for use 2095c3ddc13dSArthur Eubanks // in ICP (which is performed earlier than this in the regular LTO pipeline). 2096b01e2a8bSPaul Kirth MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, 2097b01e2a8bSPaul Kirth lowertypetests::DropTestKind::Assume)); 2098c3ddc13dSArthur Eubanks 2099ec9a0e36SKazu Hirata // Enable splitting late in the FullLTO post-link pipeline. 2100c3ddc13dSArthur Eubanks if (EnableHotColdSplit) 2101c3ddc13dSArthur Eubanks MPM.addPass(HotColdSplittingPass()); 2102c3ddc13dSArthur Eubanks 2103c3ddc13dSArthur Eubanks // Add late LTO optimization passes. 210473b6b323SNikita Popov FunctionPassManager LateFPM; 210573b6b323SNikita Popov 210673b6b323SNikita Popov // LoopSink pass sinks instructions hoisted by LICM, which serves as a 210773b6b323SNikita Popov // canonicalization pass that enables other optimizations. As a result, 210873b6b323SNikita Popov // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 210973b6b323SNikita Popov // result too early. 211073b6b323SNikita Popov LateFPM.addPass(LoopSinkPass()); 211173b6b323SNikita Popov 211273b6b323SNikita Popov // This hoists/decomposes div/rem ops. It should run after other sink/hoist 211373b6b323SNikita Popov // passes to avoid re-sinking, but before SimplifyCFG because it can allow 211473b6b323SNikita Popov // flattening of blocks. 211573b6b323SNikita Popov LateFPM.addPass(DivRemPairsPass()); 211673b6b323SNikita Popov 2117c3ddc13dSArthur Eubanks // Delete basic blocks, which optimization passes may have killed. 21183d494bfcSTianqing Wang LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 21193d494bfcSTianqing Wang .convertSwitchRangeToICmp(true) 21203d494bfcSTianqing Wang .hoistCommonInsts(true) 21213d494bfcSTianqing Wang .speculateUnpredictables(true))); 212273b6b323SNikita Popov MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM))); 2123c3ddc13dSArthur Eubanks 2124c3ddc13dSArthur Eubanks // Drop bodies of available eternally objects to improve GlobalDCE. 2125c3ddc13dSArthur Eubanks MPM.addPass(EliminateAvailableExternallyPass()); 2126c3ddc13dSArthur Eubanks 2127c3ddc13dSArthur Eubanks // Now that we have optimized the program, discard unreachable functions. 2128200cc952STeresa Johnson MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); 2129c3ddc13dSArthur Eubanks 2130c3ddc13dSArthur Eubanks if (PTO.MergeFunctions) 2131c3ddc13dSArthur Eubanks MPM.addPass(MergeFunctionsPass()); 2132c3ddc13dSArthur Eubanks 213338902153Sgulfemsavrun MPM.addPass(RelLookupTableConverterPass()); 213438902153Sgulfemsavrun 2135325e7e8bSBen Dunbobbin if (PTO.CallGraphProfile) 21365ce28684SMingming Liu MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true)); 2137325e7e8bSBen Dunbobbin 213833817296SPrem Chintalapudi invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); 2139942efa59SElia Geretto 2140c3ddc13dSArthur Eubanks // Emit annotation remarks. 2141c3ddc13dSArthur Eubanks addAnnotationRemarksPass(MPM); 2142c3ddc13dSArthur Eubanks 2143c3ddc13dSArthur Eubanks return MPM; 2144c3ddc13dSArthur Eubanks } 2145c3ddc13dSArthur Eubanks 21465445edb5SShilei Tian ModulePassManager 21475445edb5SShilei Tian PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, 21485445edb5SShilei Tian ThinOrFullLTOPhase Phase) { 2149c3ddc13dSArthur Eubanks assert(Level == OptimizationLevel::O0 && 2150c3ddc13dSArthur Eubanks "buildO0DefaultPipeline should only be used with O0"); 2151c3ddc13dSArthur Eubanks 2152c3ddc13dSArthur Eubanks ModulePassManager MPM; 2153c3ddc13dSArthur Eubanks 2154c3ddc13dSArthur Eubanks // Perform pseudo probe instrumentation in O0 mode. This is for the 2155c3ddc13dSArthur Eubanks // consistency between different build modes. For example, a LTO build can be 2156c3ddc13dSArthur Eubanks // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in 2157c3ddc13dSArthur Eubanks // the postlink will require pseudo probe instrumentation in the prelink. 2158c3ddc13dSArthur Eubanks if (PGOOpt && PGOOpt->PseudoProbeForProfiling) 2159c3ddc13dSArthur Eubanks MPM.addPass(SampleProfileProbePass(TM)); 2160c3ddc13dSArthur Eubanks 2161c3ddc13dSArthur Eubanks if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || 2162c3ddc13dSArthur Eubanks PGOOpt->Action == PGOOptions::IRUse)) 2163c3ddc13dSArthur Eubanks addPGOInstrPassesForO0( 2164c3ddc13dSArthur Eubanks MPM, 2165611ce241SQiongsi Wu /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr), 2166611ce241SQiongsi Wu /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile, 2167611ce241SQiongsi Wu PGOOpt->ProfileRemappingFile, PGOOpt->FS); 2168c3ddc13dSArthur Eubanks 2169cab81dd0SEgor Pasko // Instrument function entry and exit before all inlining. 2170cab81dd0SEgor Pasko MPM.addPass(createModuleToFunctionPassAdaptor( 2171cab81dd0SEgor Pasko EntryExitInstrumenterPass(/*PostInlining=*/false))); 2172cab81dd0SEgor Pasko 217333817296SPrem Chintalapudi invokePipelineStartEPCallbacks(MPM, Level); 2174c3ddc13dSArthur Eubanks 2175c3ddc13dSArthur Eubanks if (PGOOpt && PGOOpt->DebugInfoForProfiling) 2176c3ddc13dSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 2177c3ddc13dSArthur Eubanks 2178bc1aa286SLei Wang if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { 2179bc1aa286SLei Wang // Explicitly disable sample loader inlining and use flattened profile in O0 2180bc1aa286SLei Wang // pipeline. 2181bc1aa286SLei Wang MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 2182bc1aa286SLei Wang PGOOpt->ProfileRemappingFile, 2183bc1aa286SLei Wang ThinOrFullLTOPhase::None, nullptr, 2184bc1aa286SLei Wang /*DisableSampleProfileInlining=*/true, 2185bc1aa286SLei Wang /*UseFlattenedProfile=*/true)); 2186bc1aa286SLei Wang // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 2187bc1aa286SLei Wang // RequireAnalysisPass for PSI before subsequent non-module passes. 2188bc1aa286SLei Wang MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 2189bc1aa286SLei Wang } 2190bc1aa286SLei Wang 2191dc45ff1dSShilei Tian invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); 2192c3ddc13dSArthur Eubanks 2193c3ddc13dSArthur Eubanks // Build a minimal pipeline based on the semantics required by LLVM, 2194c3ddc13dSArthur Eubanks // which is just that always inlining occurs. Further, disable generating 2195c3ddc13dSArthur Eubanks // lifetime intrinsics to avoid enabling further optimizations during 2196c3ddc13dSArthur Eubanks // code generation. 2197c3ddc13dSArthur Eubanks MPM.addPass(AlwaysInlinerPass( 2198c3ddc13dSArthur Eubanks /*InsertLifetimeIntrinsics=*/false)); 2199c3ddc13dSArthur Eubanks 2200c3ddc13dSArthur Eubanks if (PTO.MergeFunctions) 2201c3ddc13dSArthur Eubanks MPM.addPass(MergeFunctionsPass()); 2202c3ddc13dSArthur Eubanks 2203c3ddc13dSArthur Eubanks if (EnableMatrix) 2204c3ddc13dSArthur Eubanks MPM.addPass( 2205c3ddc13dSArthur Eubanks createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true))); 2206c3ddc13dSArthur Eubanks 2207c3ddc13dSArthur Eubanks if (!CGSCCOptimizerLateEPCallbacks.empty()) { 2208c3ddc13dSArthur Eubanks CGSCCPassManager CGPM; 220933817296SPrem Chintalapudi invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level); 2210c3ddc13dSArthur Eubanks if (!CGPM.isEmpty()) 2211c3ddc13dSArthur Eubanks MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 2212c3ddc13dSArthur Eubanks } 2213c3ddc13dSArthur Eubanks if (!LateLoopOptimizationsEPCallbacks.empty()) { 2214c3ddc13dSArthur Eubanks LoopPassManager LPM; 221533817296SPrem Chintalapudi invokeLateLoopOptimizationsEPCallbacks(LPM, Level); 2216c3ddc13dSArthur Eubanks if (!LPM.isEmpty()) { 2217c3ddc13dSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor( 2218c3ddc13dSArthur Eubanks createFunctionToLoopPassAdaptor(std::move(LPM)))); 2219c3ddc13dSArthur Eubanks } 2220c3ddc13dSArthur Eubanks } 2221c3ddc13dSArthur Eubanks if (!LoopOptimizerEndEPCallbacks.empty()) { 2222c3ddc13dSArthur Eubanks LoopPassManager LPM; 222333817296SPrem Chintalapudi invokeLoopOptimizerEndEPCallbacks(LPM, Level); 2224c3ddc13dSArthur Eubanks if (!LPM.isEmpty()) { 2225c3ddc13dSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor( 2226c3ddc13dSArthur Eubanks createFunctionToLoopPassAdaptor(std::move(LPM)))); 2227c3ddc13dSArthur Eubanks } 2228c3ddc13dSArthur Eubanks } 2229c3ddc13dSArthur Eubanks if (!ScalarOptimizerLateEPCallbacks.empty()) { 2230c3ddc13dSArthur Eubanks FunctionPassManager FPM; 223133817296SPrem Chintalapudi invokeScalarOptimizerLateEPCallbacks(FPM, Level); 2232c3ddc13dSArthur Eubanks if (!FPM.isEmpty()) 2233c3ddc13dSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 2234c3ddc13dSArthur Eubanks } 22350bda12b5SWenju He 2236390300d9SShilei Tian invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase); 22370bda12b5SWenju He 2238c3ddc13dSArthur Eubanks if (!VectorizerStartEPCallbacks.empty()) { 2239c3ddc13dSArthur Eubanks FunctionPassManager FPM; 224033817296SPrem Chintalapudi invokeVectorizerStartEPCallbacks(FPM, Level); 2241c3ddc13dSArthur Eubanks if (!FPM.isEmpty()) 2242c3ddc13dSArthur Eubanks MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 2243c3ddc13dSArthur Eubanks } 2244c3ddc13dSArthur Eubanks 2245*d3161defSAxel Sorenson if (!VectorizerEndEPCallbacks.empty()) { 2246*d3161defSAxel Sorenson FunctionPassManager FPM; 2247*d3161defSAxel Sorenson invokeVectorizerEndEPCallbacks(FPM, Level); 2248*d3161defSAxel Sorenson if (!FPM.isEmpty()) 2249*d3161defSAxel Sorenson MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 2250*d3161defSAxel Sorenson } 2251*d3161defSAxel Sorenson 22529bd66b31SArthur Eubanks ModulePassManager CoroPM; 2253f9bee356SChuanqi Xu CoroPM.addPass(CoroEarlyPass()); 2254c3ddc13dSArthur Eubanks CGSCCPassManager CGPM; 2255c3ddc13dSArthur Eubanks CGPM.addPass(CoroSplitPass()); 22569bd66b31SArthur Eubanks CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 2257405bf902SChuanqi Xu CoroPM.addPass(CoroCleanupPass()); 22589bd66b31SArthur Eubanks CoroPM.addPass(GlobalDCEPass()); 22599bd66b31SArthur Eubanks MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); 2260c3ddc13dSArthur Eubanks 2261390300d9SShilei Tian invokeOptimizerLastEPCallbacks(MPM, Level, Phase); 2262c3ddc13dSArthur Eubanks 22635445edb5SShilei Tian if (isLTOPreLink(Phase)) 2264c3ddc13dSArthur Eubanks addRequiredLTOPreLinkPasses(MPM); 2265c3ddc13dSArthur Eubanks 2266770a50b2SFlorian Hahn MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); 2267770a50b2SFlorian Hahn 2268c3ddc13dSArthur Eubanks return MPM; 2269c3ddc13dSArthur Eubanks } 2270c3ddc13dSArthur Eubanks 2271c3ddc13dSArthur Eubanks AAManager PassBuilder::buildDefaultAAPipeline() { 2272c3ddc13dSArthur Eubanks AAManager AA; 2273c3ddc13dSArthur Eubanks 2274c3ddc13dSArthur Eubanks // The order in which these are registered determines their priority when 2275c3ddc13dSArthur Eubanks // being queried. 2276c3ddc13dSArthur Eubanks 2277c3ddc13dSArthur Eubanks // First we register the basic alias analysis that provides the majority of 2278c3ddc13dSArthur Eubanks // per-function local AA logic. This is a stateless, on-demand local set of 2279c3ddc13dSArthur Eubanks // AA techniques. 2280c3ddc13dSArthur Eubanks AA.registerFunctionAnalysis<BasicAA>(); 2281c3ddc13dSArthur Eubanks 2282c3ddc13dSArthur Eubanks // Next we query fast, specialized alias analyses that wrap IR-embedded 2283c3ddc13dSArthur Eubanks // information about aliasing. 2284c3ddc13dSArthur Eubanks AA.registerFunctionAnalysis<ScopedNoAliasAA>(); 2285c3ddc13dSArthur Eubanks AA.registerFunctionAnalysis<TypeBasedAA>(); 2286c3ddc13dSArthur Eubanks 2287c3ddc13dSArthur Eubanks // Add support for querying global aliasing information when available. 2288c3ddc13dSArthur Eubanks // Because the `AAManager` is a function analysis and `GlobalsAA` is a module 2289c3ddc13dSArthur Eubanks // analysis, all that the `AAManager` can do is query for any *cached* 2290c3ddc13dSArthur Eubanks // results from `GlobalsAA` through a readonly proxy. 2291d953d017SNuno Lopes if (EnableGlobalAnalyses) 2292c3ddc13dSArthur Eubanks AA.registerModuleAnalysis<GlobalsAA>(); 2293c3ddc13dSArthur Eubanks 2294c3ddc13dSArthur Eubanks // Add target-specific alias analyses. 2295c3ddc13dSArthur Eubanks if (TM) 2296c3ddc13dSArthur Eubanks TM->registerDefaultAliasAnalyses(AA); 2297c3ddc13dSArthur Eubanks 2298c3ddc13dSArthur Eubanks return AA; 2299c3ddc13dSArthur Eubanks } 2300