1fe6060f1SDimitry Andric //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric // This pass implements IR expansion for vector predication intrinsics, allowing 10fe6060f1SDimitry Andric // targets to enable vector predication until just before codegen. 11fe6060f1SDimitry Andric // 12fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 13fe6060f1SDimitry Andric 14fe6060f1SDimitry Andric #include "llvm/CodeGen/ExpandVectorPredication.h" 15fe6060f1SDimitry Andric #include "llvm/ADT/Statistic.h" 16fe6060f1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 17fe6060f1SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 18fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h" 19fe6060f1SDimitry Andric #include "llvm/IR/Constants.h" 20fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 21fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h" 22fe6060f1SDimitry Andric #include "llvm/IR/InstIterator.h" 23fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h" 24fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 25fe6060f1SDimitry Andric #include "llvm/IR/Intrinsics.h" 26fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 27fe6060f1SDimitry Andric #include "llvm/Pass.h" 28fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h" 29fe6060f1SDimitry Andric #include "llvm/Support/Compiler.h" 30fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 31fe6060f1SDimitry Andric 32fe6060f1SDimitry Andric using namespace llvm; 33fe6060f1SDimitry Andric 34fe6060f1SDimitry Andric using VPLegalization = TargetTransformInfo::VPLegalization; 35fe6060f1SDimitry Andric using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 36fe6060f1SDimitry Andric 37fe6060f1SDimitry Andric // Keep this in sync with TargetTransformInfo::VPLegalization. 38fe6060f1SDimitry Andric #define VPINTERNAL_VPLEGAL_CASES \ 39fe6060f1SDimitry Andric VPINTERNAL_CASE(Legal) \ 40fe6060f1SDimitry Andric VPINTERNAL_CASE(Discard) \ 41fe6060f1SDimitry Andric VPINTERNAL_CASE(Convert) 42fe6060f1SDimitry Andric 43fe6060f1SDimitry Andric #define VPINTERNAL_CASE(X) "|" #X 44fe6060f1SDimitry Andric 45fe6060f1SDimitry Andric // Override options. 46fe6060f1SDimitry Andric static cl::opt<std::string> EVLTransformOverride( 47fe6060f1SDimitry Andric "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 48fe6060f1SDimitry Andric cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 49fe6060f1SDimitry Andric ". If non-empty, ignore " 50fe6060f1SDimitry Andric "TargetTransformInfo and " 51fe6060f1SDimitry Andric "always use this transformation for the %evl parameter (Used in " 52fe6060f1SDimitry Andric "testing).")); 53fe6060f1SDimitry Andric 54fe6060f1SDimitry Andric static cl::opt<std::string> MaskTransformOverride( 55fe6060f1SDimitry Andric "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 56fe6060f1SDimitry Andric cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 57fe6060f1SDimitry Andric ". If non-empty, Ignore " 58fe6060f1SDimitry Andric "TargetTransformInfo and " 59fe6060f1SDimitry Andric "always use this transformation for the %mask parameter (Used in " 60fe6060f1SDimitry Andric "testing).")); 61fe6060f1SDimitry Andric 62fe6060f1SDimitry Andric #undef VPINTERNAL_CASE 63fe6060f1SDimitry Andric #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 64fe6060f1SDimitry Andric 65fe6060f1SDimitry Andric static VPTransform parseOverrideOption(const std::string &TextOpt) { 66fe6060f1SDimitry Andric return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 67fe6060f1SDimitry Andric } 68fe6060f1SDimitry Andric 69fe6060f1SDimitry Andric #undef VPINTERNAL_VPLEGAL_CASES 70fe6060f1SDimitry Andric 71fe6060f1SDimitry Andric // Whether any override options are set. 72fe6060f1SDimitry Andric static bool anyExpandVPOverridesSet() { 73fe6060f1SDimitry Andric return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 74fe6060f1SDimitry Andric } 75fe6060f1SDimitry Andric 76fe6060f1SDimitry Andric #define DEBUG_TYPE "expandvp" 77fe6060f1SDimitry Andric 78fe6060f1SDimitry Andric STATISTIC(NumFoldedVL, "Number of folded vector length params"); 79fe6060f1SDimitry Andric STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 80fe6060f1SDimitry Andric 81fe6060f1SDimitry Andric ///// Helpers { 82fe6060f1SDimitry Andric 83fe6060f1SDimitry Andric /// \returns Whether the vector mask \p MaskVal has all lane bits set. 84fe6060f1SDimitry Andric static bool isAllTrueMask(Value *MaskVal) { 85fe6060f1SDimitry Andric auto *ConstVec = dyn_cast<ConstantVector>(MaskVal); 86fe6060f1SDimitry Andric return ConstVec && ConstVec->isAllOnesValue(); 87fe6060f1SDimitry Andric } 88fe6060f1SDimitry Andric 89fe6060f1SDimitry Andric /// \returns A non-excepting divisor constant for this type. 90fe6060f1SDimitry Andric static Constant *getSafeDivisor(Type *DivTy) { 91fe6060f1SDimitry Andric assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 92fe6060f1SDimitry Andric return ConstantInt::get(DivTy, 1u, false); 93fe6060f1SDimitry Andric } 94fe6060f1SDimitry Andric 95fe6060f1SDimitry Andric /// Transfer operation properties from \p OldVPI to \p NewVal. 96fe6060f1SDimitry Andric static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 97fe6060f1SDimitry Andric auto *NewInst = dyn_cast<Instruction>(&NewVal); 98fe6060f1SDimitry Andric if (!NewInst || !isa<FPMathOperator>(NewVal)) 99fe6060f1SDimitry Andric return; 100fe6060f1SDimitry Andric 101fe6060f1SDimitry Andric auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 102fe6060f1SDimitry Andric if (!OldFMOp) 103fe6060f1SDimitry Andric return; 104fe6060f1SDimitry Andric 105fe6060f1SDimitry Andric NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 106fe6060f1SDimitry Andric } 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 109fe6060f1SDimitry Andric /// OldVP gets erased. 110fe6060f1SDimitry Andric static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 111fe6060f1SDimitry Andric transferDecorations(NewOp, OldOp); 112fe6060f1SDimitry Andric OldOp.replaceAllUsesWith(&NewOp); 113fe6060f1SDimitry Andric OldOp.eraseFromParent(); 114fe6060f1SDimitry Andric } 115fe6060f1SDimitry Andric 116*81ad6265SDimitry Andric static bool maySpeculateLanes(VPIntrinsic &VPI) { 117*81ad6265SDimitry Andric // The result of VP reductions depends on the mask and evl. 118*81ad6265SDimitry Andric if (isa<VPReductionIntrinsic>(VPI)) 119*81ad6265SDimitry Andric return false; 120*81ad6265SDimitry Andric // Fallback to whether the intrinsic is speculatable. 121*81ad6265SDimitry Andric Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode(); 122*81ad6265SDimitry Andric unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); 123*81ad6265SDimitry Andric return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, 124*81ad6265SDimitry Andric cast<Operator>(&VPI)); 125*81ad6265SDimitry Andric } 126*81ad6265SDimitry Andric 127fe6060f1SDimitry Andric //// } Helpers 128fe6060f1SDimitry Andric 129fe6060f1SDimitry Andric namespace { 130fe6060f1SDimitry Andric 131fe6060f1SDimitry Andric // Expansion pass state at function scope. 132fe6060f1SDimitry Andric struct CachingVPExpander { 133fe6060f1SDimitry Andric Function &F; 134fe6060f1SDimitry Andric const TargetTransformInfo &TTI; 135fe6060f1SDimitry Andric 136fe6060f1SDimitry Andric /// \returns A (fixed length) vector with ascending integer indices 137fe6060f1SDimitry Andric /// (<0, 1, ..., NumElems-1>). 138fe6060f1SDimitry Andric /// \p Builder 139fe6060f1SDimitry Andric /// Used for instruction creation. 140fe6060f1SDimitry Andric /// \p LaneTy 141fe6060f1SDimitry Andric /// Integer element type of the result vector. 142fe6060f1SDimitry Andric /// \p NumElems 143fe6060f1SDimitry Andric /// Number of vector elements. 144fe6060f1SDimitry Andric Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 145fe6060f1SDimitry Andric unsigned NumElems); 146fe6060f1SDimitry Andric 147fe6060f1SDimitry Andric /// \returns A bitmask that is true where the lane position is less-than \p 148fe6060f1SDimitry Andric /// EVLParam 149fe6060f1SDimitry Andric /// 150fe6060f1SDimitry Andric /// \p Builder 151fe6060f1SDimitry Andric /// Used for instruction creation. 152fe6060f1SDimitry Andric /// \p VLParam 153fe6060f1SDimitry Andric /// The explicit vector length parameter to test against the lane 154fe6060f1SDimitry Andric /// positions. 155fe6060f1SDimitry Andric /// \p ElemCount 156fe6060f1SDimitry Andric /// Static (potentially scalable) number of vector elements. 157fe6060f1SDimitry Andric Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 158fe6060f1SDimitry Andric ElementCount ElemCount); 159fe6060f1SDimitry Andric 160fe6060f1SDimitry Andric Value *foldEVLIntoMask(VPIntrinsic &VPI); 161fe6060f1SDimitry Andric 162fe6060f1SDimitry Andric /// "Remove" the %evl parameter of \p PI by setting it to the static vector 163fe6060f1SDimitry Andric /// length of the operation. 164fe6060f1SDimitry Andric void discardEVLParameter(VPIntrinsic &PI); 165fe6060f1SDimitry Andric 166fe6060f1SDimitry Andric /// \brief Lower this VP binary operator to a unpredicated binary operator. 167fe6060f1SDimitry Andric Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 168fe6060f1SDimitry Andric VPIntrinsic &PI); 169fe6060f1SDimitry Andric 170349cc55cSDimitry Andric /// \brief Lower this VP reduction to a call to an unpredicated reduction 171349cc55cSDimitry Andric /// intrinsic. 172349cc55cSDimitry Andric Value *expandPredicationInReduction(IRBuilder<> &Builder, 173349cc55cSDimitry Andric VPReductionIntrinsic &PI); 174349cc55cSDimitry Andric 175fe6060f1SDimitry Andric /// \brief Query TTI and expand the vector predication in \p P accordingly. 176fe6060f1SDimitry Andric Value *expandPredication(VPIntrinsic &PI); 177fe6060f1SDimitry Andric 178fe6060f1SDimitry Andric /// \brief Determine how and whether the VPIntrinsic \p VPI shall be 179fe6060f1SDimitry Andric /// expanded. This overrides TTI with the cl::opts listed at the top of this 180fe6060f1SDimitry Andric /// file. 181fe6060f1SDimitry Andric VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 182fe6060f1SDimitry Andric bool UsingTTIOverrides; 183fe6060f1SDimitry Andric 184fe6060f1SDimitry Andric public: 185fe6060f1SDimitry Andric CachingVPExpander(Function &F, const TargetTransformInfo &TTI) 186fe6060f1SDimitry Andric : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 187fe6060f1SDimitry Andric 188fe6060f1SDimitry Andric bool expandVectorPredication(); 189fe6060f1SDimitry Andric }; 190fe6060f1SDimitry Andric 191fe6060f1SDimitry Andric //// CachingVPExpander { 192fe6060f1SDimitry Andric 193fe6060f1SDimitry Andric Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 194fe6060f1SDimitry Andric unsigned NumElems) { 195fe6060f1SDimitry Andric // TODO add caching 196fe6060f1SDimitry Andric SmallVector<Constant *, 16> ConstElems; 197fe6060f1SDimitry Andric 198fe6060f1SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) 199fe6060f1SDimitry Andric ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 200fe6060f1SDimitry Andric 201fe6060f1SDimitry Andric return ConstantVector::get(ConstElems); 202fe6060f1SDimitry Andric } 203fe6060f1SDimitry Andric 204fe6060f1SDimitry Andric Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 205fe6060f1SDimitry Andric Value *EVLParam, 206fe6060f1SDimitry Andric ElementCount ElemCount) { 207fe6060f1SDimitry Andric // TODO add caching 208fe6060f1SDimitry Andric // Scalable vector %evl conversion. 209fe6060f1SDimitry Andric if (ElemCount.isScalable()) { 210fe6060f1SDimitry Andric auto *M = Builder.GetInsertBlock()->getModule(); 211fe6060f1SDimitry Andric Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 212fe6060f1SDimitry Andric Function *ActiveMaskFunc = Intrinsic::getDeclaration( 213fe6060f1SDimitry Andric M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); 214fe6060f1SDimitry Andric // `get_active_lane_mask` performs an implicit less-than comparison. 215fe6060f1SDimitry Andric Value *ConstZero = Builder.getInt32(0); 216fe6060f1SDimitry Andric return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); 217fe6060f1SDimitry Andric } 218fe6060f1SDimitry Andric 219fe6060f1SDimitry Andric // Fixed vector %evl conversion. 220fe6060f1SDimitry Andric Type *LaneTy = EVLParam->getType(); 221fe6060f1SDimitry Andric unsigned NumElems = ElemCount.getFixedValue(); 222fe6060f1SDimitry Andric Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 223fe6060f1SDimitry Andric Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 224fe6060f1SDimitry Andric return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 225fe6060f1SDimitry Andric } 226fe6060f1SDimitry Andric 227fe6060f1SDimitry Andric Value * 228fe6060f1SDimitry Andric CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 229fe6060f1SDimitry Andric VPIntrinsic &VPI) { 230*81ad6265SDimitry Andric assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 231fe6060f1SDimitry Andric "Implicitly dropping %evl in non-speculatable operator!"); 232fe6060f1SDimitry Andric 233fe6060f1SDimitry Andric auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 234fe6060f1SDimitry Andric assert(Instruction::isBinaryOp(OC)); 235fe6060f1SDimitry Andric 236fe6060f1SDimitry Andric Value *Op0 = VPI.getOperand(0); 237fe6060f1SDimitry Andric Value *Op1 = VPI.getOperand(1); 238fe6060f1SDimitry Andric Value *Mask = VPI.getMaskParam(); 239fe6060f1SDimitry Andric 240fe6060f1SDimitry Andric // Blend in safe operands. 241fe6060f1SDimitry Andric if (Mask && !isAllTrueMask(Mask)) { 242fe6060f1SDimitry Andric switch (OC) { 243fe6060f1SDimitry Andric default: 244fe6060f1SDimitry Andric // Can safely ignore the predicate. 245fe6060f1SDimitry Andric break; 246fe6060f1SDimitry Andric 247fe6060f1SDimitry Andric // Division operators need a safe divisor on masked-off lanes (1). 248fe6060f1SDimitry Andric case Instruction::UDiv: 249fe6060f1SDimitry Andric case Instruction::SDiv: 250fe6060f1SDimitry Andric case Instruction::URem: 251fe6060f1SDimitry Andric case Instruction::SRem: 252fe6060f1SDimitry Andric // 2nd operand must not be zero. 253fe6060f1SDimitry Andric Value *SafeDivisor = getSafeDivisor(VPI.getType()); 254fe6060f1SDimitry Andric Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 255fe6060f1SDimitry Andric } 256fe6060f1SDimitry Andric } 257fe6060f1SDimitry Andric 258fe6060f1SDimitry Andric Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 259fe6060f1SDimitry Andric 260fe6060f1SDimitry Andric replaceOperation(*NewBinOp, VPI); 261fe6060f1SDimitry Andric return NewBinOp; 262fe6060f1SDimitry Andric } 263fe6060f1SDimitry Andric 264349cc55cSDimitry Andric static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 265349cc55cSDimitry Andric Type *EltTy) { 266349cc55cSDimitry Andric bool Negative = false; 267349cc55cSDimitry Andric unsigned EltBits = EltTy->getScalarSizeInBits(); 268349cc55cSDimitry Andric switch (VPI.getIntrinsicID()) { 269349cc55cSDimitry Andric default: 270349cc55cSDimitry Andric llvm_unreachable("Expecting a VP reduction intrinsic"); 271349cc55cSDimitry Andric case Intrinsic::vp_reduce_add: 272349cc55cSDimitry Andric case Intrinsic::vp_reduce_or: 273349cc55cSDimitry Andric case Intrinsic::vp_reduce_xor: 274349cc55cSDimitry Andric case Intrinsic::vp_reduce_umax: 275349cc55cSDimitry Andric return Constant::getNullValue(EltTy); 276349cc55cSDimitry Andric case Intrinsic::vp_reduce_mul: 277349cc55cSDimitry Andric return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); 278349cc55cSDimitry Andric case Intrinsic::vp_reduce_and: 279349cc55cSDimitry Andric case Intrinsic::vp_reduce_umin: 280349cc55cSDimitry Andric return ConstantInt::getAllOnesValue(EltTy); 281349cc55cSDimitry Andric case Intrinsic::vp_reduce_smin: 282349cc55cSDimitry Andric return ConstantInt::get(EltTy->getContext(), 283349cc55cSDimitry Andric APInt::getSignedMaxValue(EltBits)); 284349cc55cSDimitry Andric case Intrinsic::vp_reduce_smax: 285349cc55cSDimitry Andric return ConstantInt::get(EltTy->getContext(), 286349cc55cSDimitry Andric APInt::getSignedMinValue(EltBits)); 287349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmax: 288349cc55cSDimitry Andric Negative = true; 289349cc55cSDimitry Andric LLVM_FALLTHROUGH; 290349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmin: { 291349cc55cSDimitry Andric FastMathFlags Flags = VPI.getFastMathFlags(); 292349cc55cSDimitry Andric const fltSemantics &Semantics = EltTy->getFltSemantics(); 293349cc55cSDimitry Andric return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative) 294349cc55cSDimitry Andric : !Flags.noInfs() 295349cc55cSDimitry Andric ? ConstantFP::getInfinity(EltTy, Negative) 296349cc55cSDimitry Andric : ConstantFP::get(EltTy, 297349cc55cSDimitry Andric APFloat::getLargest(Semantics, Negative)); 298349cc55cSDimitry Andric } 299349cc55cSDimitry Andric case Intrinsic::vp_reduce_fadd: 300349cc55cSDimitry Andric return ConstantFP::getNegativeZero(EltTy); 301349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmul: 302349cc55cSDimitry Andric return ConstantFP::get(EltTy, 1.0); 303349cc55cSDimitry Andric } 304349cc55cSDimitry Andric } 305349cc55cSDimitry Andric 306349cc55cSDimitry Andric Value * 307349cc55cSDimitry Andric CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 308349cc55cSDimitry Andric VPReductionIntrinsic &VPI) { 309*81ad6265SDimitry Andric assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 310349cc55cSDimitry Andric "Implicitly dropping %evl in non-speculatable operator!"); 311349cc55cSDimitry Andric 312349cc55cSDimitry Andric Value *Mask = VPI.getMaskParam(); 313349cc55cSDimitry Andric Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 314349cc55cSDimitry Andric 315349cc55cSDimitry Andric // Insert neutral element in masked-out positions 316349cc55cSDimitry Andric if (Mask && !isAllTrueMask(Mask)) { 317349cc55cSDimitry Andric auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 318349cc55cSDimitry Andric auto *NeutralVector = Builder.CreateVectorSplat( 319349cc55cSDimitry Andric cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 320349cc55cSDimitry Andric RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 321349cc55cSDimitry Andric } 322349cc55cSDimitry Andric 323349cc55cSDimitry Andric Value *Reduction; 324349cc55cSDimitry Andric Value *Start = VPI.getOperand(VPI.getStartParamPos()); 325349cc55cSDimitry Andric 326349cc55cSDimitry Andric switch (VPI.getIntrinsicID()) { 327349cc55cSDimitry Andric default: 328349cc55cSDimitry Andric llvm_unreachable("Impossible reduction kind"); 329349cc55cSDimitry Andric case Intrinsic::vp_reduce_add: 330349cc55cSDimitry Andric Reduction = Builder.CreateAddReduce(RedOp); 331349cc55cSDimitry Andric Reduction = Builder.CreateAdd(Reduction, Start); 332349cc55cSDimitry Andric break; 333349cc55cSDimitry Andric case Intrinsic::vp_reduce_mul: 334349cc55cSDimitry Andric Reduction = Builder.CreateMulReduce(RedOp); 335349cc55cSDimitry Andric Reduction = Builder.CreateMul(Reduction, Start); 336349cc55cSDimitry Andric break; 337349cc55cSDimitry Andric case Intrinsic::vp_reduce_and: 338349cc55cSDimitry Andric Reduction = Builder.CreateAndReduce(RedOp); 339349cc55cSDimitry Andric Reduction = Builder.CreateAnd(Reduction, Start); 340349cc55cSDimitry Andric break; 341349cc55cSDimitry Andric case Intrinsic::vp_reduce_or: 342349cc55cSDimitry Andric Reduction = Builder.CreateOrReduce(RedOp); 343349cc55cSDimitry Andric Reduction = Builder.CreateOr(Reduction, Start); 344349cc55cSDimitry Andric break; 345349cc55cSDimitry Andric case Intrinsic::vp_reduce_xor: 346349cc55cSDimitry Andric Reduction = Builder.CreateXorReduce(RedOp); 347349cc55cSDimitry Andric Reduction = Builder.CreateXor(Reduction, Start); 348349cc55cSDimitry Andric break; 349349cc55cSDimitry Andric case Intrinsic::vp_reduce_smax: 350349cc55cSDimitry Andric Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); 351349cc55cSDimitry Andric Reduction = 352349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); 353349cc55cSDimitry Andric break; 354349cc55cSDimitry Andric case Intrinsic::vp_reduce_smin: 355349cc55cSDimitry Andric Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); 356349cc55cSDimitry Andric Reduction = 357349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); 358349cc55cSDimitry Andric break; 359349cc55cSDimitry Andric case Intrinsic::vp_reduce_umax: 360349cc55cSDimitry Andric Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); 361349cc55cSDimitry Andric Reduction = 362349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); 363349cc55cSDimitry Andric break; 364349cc55cSDimitry Andric case Intrinsic::vp_reduce_umin: 365349cc55cSDimitry Andric Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); 366349cc55cSDimitry Andric Reduction = 367349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); 368349cc55cSDimitry Andric break; 369349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmax: 370349cc55cSDimitry Andric Reduction = Builder.CreateFPMaxReduce(RedOp); 371349cc55cSDimitry Andric transferDecorations(*Reduction, VPI); 372349cc55cSDimitry Andric Reduction = 373349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); 374349cc55cSDimitry Andric break; 375349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmin: 376349cc55cSDimitry Andric Reduction = Builder.CreateFPMinReduce(RedOp); 377349cc55cSDimitry Andric transferDecorations(*Reduction, VPI); 378349cc55cSDimitry Andric Reduction = 379349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); 380349cc55cSDimitry Andric break; 381349cc55cSDimitry Andric case Intrinsic::vp_reduce_fadd: 382349cc55cSDimitry Andric Reduction = Builder.CreateFAddReduce(Start, RedOp); 383349cc55cSDimitry Andric break; 384349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmul: 385349cc55cSDimitry Andric Reduction = Builder.CreateFMulReduce(Start, RedOp); 386349cc55cSDimitry Andric break; 387349cc55cSDimitry Andric } 388349cc55cSDimitry Andric 389349cc55cSDimitry Andric replaceOperation(*Reduction, VPI); 390349cc55cSDimitry Andric return Reduction; 391349cc55cSDimitry Andric } 392349cc55cSDimitry Andric 393fe6060f1SDimitry Andric void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 394fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 395fe6060f1SDimitry Andric 396fe6060f1SDimitry Andric if (VPI.canIgnoreVectorLengthParam()) 397fe6060f1SDimitry Andric return; 398fe6060f1SDimitry Andric 399fe6060f1SDimitry Andric Value *EVLParam = VPI.getVectorLengthParam(); 400fe6060f1SDimitry Andric if (!EVLParam) 401fe6060f1SDimitry Andric return; 402fe6060f1SDimitry Andric 403fe6060f1SDimitry Andric ElementCount StaticElemCount = VPI.getStaticVectorLength(); 404fe6060f1SDimitry Andric Value *MaxEVL = nullptr; 405fe6060f1SDimitry Andric Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 406fe6060f1SDimitry Andric if (StaticElemCount.isScalable()) { 407fe6060f1SDimitry Andric // TODO add caching 408fe6060f1SDimitry Andric auto *M = VPI.getModule(); 409fe6060f1SDimitry Andric Function *VScaleFunc = 410fe6060f1SDimitry Andric Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); 411fe6060f1SDimitry Andric IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 412fe6060f1SDimitry Andric Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 413fe6060f1SDimitry Andric Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); 414fe6060f1SDimitry Andric MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 415fe6060f1SDimitry Andric /*NUW*/ true, /*NSW*/ false); 416fe6060f1SDimitry Andric } else { 417fe6060f1SDimitry Andric MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 418fe6060f1SDimitry Andric } 419fe6060f1SDimitry Andric VPI.setVectorLengthParam(MaxEVL); 420fe6060f1SDimitry Andric } 421fe6060f1SDimitry Andric 422fe6060f1SDimitry Andric Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 423fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 424fe6060f1SDimitry Andric 425fe6060f1SDimitry Andric IRBuilder<> Builder(&VPI); 426fe6060f1SDimitry Andric 427fe6060f1SDimitry Andric // Ineffective %evl parameter and so nothing to do here. 428fe6060f1SDimitry Andric if (VPI.canIgnoreVectorLengthParam()) 429fe6060f1SDimitry Andric return &VPI; 430fe6060f1SDimitry Andric 431fe6060f1SDimitry Andric // Only VP intrinsics can have an %evl parameter. 432fe6060f1SDimitry Andric Value *OldMaskParam = VPI.getMaskParam(); 433fe6060f1SDimitry Andric Value *OldEVLParam = VPI.getVectorLengthParam(); 434fe6060f1SDimitry Andric assert(OldMaskParam && "no mask param to fold the vl param into"); 435fe6060f1SDimitry Andric assert(OldEVLParam && "no EVL param to fold away"); 436fe6060f1SDimitry Andric 437fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 438fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 439fe6060f1SDimitry Andric 440fe6060f1SDimitry Andric // Convert the %evl predication into vector mask predication. 441fe6060f1SDimitry Andric ElementCount ElemCount = VPI.getStaticVectorLength(); 442fe6060f1SDimitry Andric Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 443fe6060f1SDimitry Andric Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 444fe6060f1SDimitry Andric VPI.setMaskParam(NewMaskParam); 445fe6060f1SDimitry Andric 446fe6060f1SDimitry Andric // Drop the %evl parameter. 447fe6060f1SDimitry Andric discardEVLParameter(VPI); 448fe6060f1SDimitry Andric assert(VPI.canIgnoreVectorLengthParam() && 449fe6060f1SDimitry Andric "transformation did not render the evl param ineffective!"); 450fe6060f1SDimitry Andric 451fe6060f1SDimitry Andric // Reassess the modified instruction. 452fe6060f1SDimitry Andric return &VPI; 453fe6060f1SDimitry Andric } 454fe6060f1SDimitry Andric 455fe6060f1SDimitry Andric Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 456fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 457fe6060f1SDimitry Andric 458fe6060f1SDimitry Andric IRBuilder<> Builder(&VPI); 459fe6060f1SDimitry Andric 460fe6060f1SDimitry Andric // Try lowering to a LLVM instruction first. 461fe6060f1SDimitry Andric auto OC = VPI.getFunctionalOpcode(); 462fe6060f1SDimitry Andric 463fe6060f1SDimitry Andric if (OC && Instruction::isBinaryOp(*OC)) 464fe6060f1SDimitry Andric return expandPredicationInBinaryOperator(Builder, VPI); 465fe6060f1SDimitry Andric 466349cc55cSDimitry Andric if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 467349cc55cSDimitry Andric return expandPredicationInReduction(Builder, *VPRI); 468349cc55cSDimitry Andric 469fe6060f1SDimitry Andric return &VPI; 470fe6060f1SDimitry Andric } 471fe6060f1SDimitry Andric 472fe6060f1SDimitry Andric //// } CachingVPExpander 473fe6060f1SDimitry Andric 474fe6060f1SDimitry Andric struct TransformJob { 475fe6060f1SDimitry Andric VPIntrinsic *PI; 476fe6060f1SDimitry Andric TargetTransformInfo::VPLegalization Strategy; 477fe6060f1SDimitry Andric TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat) 478fe6060f1SDimitry Andric : PI(PI), Strategy(InitStrat) {} 479fe6060f1SDimitry Andric 480fe6060f1SDimitry Andric bool isDone() const { return Strategy.shouldDoNothing(); } 481fe6060f1SDimitry Andric }; 482fe6060f1SDimitry Andric 483*81ad6265SDimitry Andric void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { 484*81ad6265SDimitry Andric // Operations with speculatable lanes do not strictly need predication. 485*81ad6265SDimitry Andric if (maySpeculateLanes(VPI)) { 486fe6060f1SDimitry Andric // Converting a speculatable VP intrinsic means dropping %mask and %evl. 487fe6060f1SDimitry Andric // No need to expand %evl into the %mask only to ignore that code. 488fe6060f1SDimitry Andric if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 489fe6060f1SDimitry Andric LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 490fe6060f1SDimitry Andric return; 491fe6060f1SDimitry Andric } 492fe6060f1SDimitry Andric 493fe6060f1SDimitry Andric // We have to preserve the predicating effect of %evl for this 494fe6060f1SDimitry Andric // non-speculatable VP intrinsic. 495fe6060f1SDimitry Andric // 1) Never discard %evl. 496fe6060f1SDimitry Andric // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 497fe6060f1SDimitry Andric // %evl gets folded into %mask. 498fe6060f1SDimitry Andric if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 499fe6060f1SDimitry Andric (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 500fe6060f1SDimitry Andric LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 501fe6060f1SDimitry Andric } 502fe6060f1SDimitry Andric } 503fe6060f1SDimitry Andric 504fe6060f1SDimitry Andric VPLegalization 505fe6060f1SDimitry Andric CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 506fe6060f1SDimitry Andric auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 507fe6060f1SDimitry Andric if (LLVM_LIKELY(!UsingTTIOverrides)) { 508fe6060f1SDimitry Andric // No overrides - we are in production. 509fe6060f1SDimitry Andric return VPStrat; 510fe6060f1SDimitry Andric } 511fe6060f1SDimitry Andric 512fe6060f1SDimitry Andric // Overrides set - we are in testing, the following does not need to be 513fe6060f1SDimitry Andric // efficient. 514fe6060f1SDimitry Andric VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 515fe6060f1SDimitry Andric VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 516fe6060f1SDimitry Andric return VPStrat; 517fe6060f1SDimitry Andric } 518fe6060f1SDimitry Andric 519fe6060f1SDimitry Andric /// \brief Expand llvm.vp.* intrinsics as requested by \p TTI. 520fe6060f1SDimitry Andric bool CachingVPExpander::expandVectorPredication() { 521fe6060f1SDimitry Andric SmallVector<TransformJob, 16> Worklist; 522fe6060f1SDimitry Andric 523fe6060f1SDimitry Andric // Collect all VPIntrinsics that need expansion and determine their expansion 524fe6060f1SDimitry Andric // strategy. 525fe6060f1SDimitry Andric for (auto &I : instructions(F)) { 526fe6060f1SDimitry Andric auto *VPI = dyn_cast<VPIntrinsic>(&I); 527fe6060f1SDimitry Andric if (!VPI) 528fe6060f1SDimitry Andric continue; 529fe6060f1SDimitry Andric auto VPStrat = getVPLegalizationStrategy(*VPI); 530*81ad6265SDimitry Andric sanitizeStrategy(*VPI, VPStrat); 531fe6060f1SDimitry Andric if (!VPStrat.shouldDoNothing()) 532fe6060f1SDimitry Andric Worklist.emplace_back(VPI, VPStrat); 533fe6060f1SDimitry Andric } 534fe6060f1SDimitry Andric if (Worklist.empty()) 535fe6060f1SDimitry Andric return false; 536fe6060f1SDimitry Andric 537fe6060f1SDimitry Andric // Transform all VPIntrinsics on the worklist. 538fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size() 539fe6060f1SDimitry Andric << " instructions ::::\n"); 540fe6060f1SDimitry Andric for (TransformJob Job : Worklist) { 541fe6060f1SDimitry Andric // Transform the EVL parameter. 542fe6060f1SDimitry Andric switch (Job.Strategy.EVLParamStrategy) { 543fe6060f1SDimitry Andric case VPLegalization::Legal: 544fe6060f1SDimitry Andric break; 545fe6060f1SDimitry Andric case VPLegalization::Discard: 546fe6060f1SDimitry Andric discardEVLParameter(*Job.PI); 547fe6060f1SDimitry Andric break; 548fe6060f1SDimitry Andric case VPLegalization::Convert: 549fe6060f1SDimitry Andric if (foldEVLIntoMask(*Job.PI)) 550fe6060f1SDimitry Andric ++NumFoldedVL; 551fe6060f1SDimitry Andric break; 552fe6060f1SDimitry Andric } 553fe6060f1SDimitry Andric Job.Strategy.EVLParamStrategy = VPLegalization::Legal; 554fe6060f1SDimitry Andric 555fe6060f1SDimitry Andric // Replace with a non-predicated operation. 556fe6060f1SDimitry Andric switch (Job.Strategy.OpStrategy) { 557fe6060f1SDimitry Andric case VPLegalization::Legal: 558fe6060f1SDimitry Andric break; 559fe6060f1SDimitry Andric case VPLegalization::Discard: 560fe6060f1SDimitry Andric llvm_unreachable("Invalid strategy for operators."); 561fe6060f1SDimitry Andric case VPLegalization::Convert: 562fe6060f1SDimitry Andric expandPredication(*Job.PI); 563fe6060f1SDimitry Andric ++NumLoweredVPOps; 564fe6060f1SDimitry Andric break; 565fe6060f1SDimitry Andric } 566fe6060f1SDimitry Andric Job.Strategy.OpStrategy = VPLegalization::Legal; 567fe6060f1SDimitry Andric 568fe6060f1SDimitry Andric assert(Job.isDone() && "incomplete transformation"); 569fe6060f1SDimitry Andric } 570fe6060f1SDimitry Andric 571fe6060f1SDimitry Andric return true; 572fe6060f1SDimitry Andric } 573fe6060f1SDimitry Andric class ExpandVectorPredication : public FunctionPass { 574fe6060f1SDimitry Andric public: 575fe6060f1SDimitry Andric static char ID; 576fe6060f1SDimitry Andric ExpandVectorPredication() : FunctionPass(ID) { 577fe6060f1SDimitry Andric initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry()); 578fe6060f1SDimitry Andric } 579fe6060f1SDimitry Andric 580fe6060f1SDimitry Andric bool runOnFunction(Function &F) override { 581fe6060f1SDimitry Andric const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 582fe6060f1SDimitry Andric CachingVPExpander VPExpander(F, *TTI); 583fe6060f1SDimitry Andric return VPExpander.expandVectorPredication(); 584fe6060f1SDimitry Andric } 585fe6060f1SDimitry Andric 586fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 587fe6060f1SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 588fe6060f1SDimitry Andric AU.setPreservesCFG(); 589fe6060f1SDimitry Andric } 590fe6060f1SDimitry Andric }; 591fe6060f1SDimitry Andric } // namespace 592fe6060f1SDimitry Andric 593fe6060f1SDimitry Andric char ExpandVectorPredication::ID; 594fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp", 595fe6060f1SDimitry Andric "Expand vector predication intrinsics", false, false) 596fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 597fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 598fe6060f1SDimitry Andric INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp", 599fe6060f1SDimitry Andric "Expand vector predication intrinsics", false, false) 600fe6060f1SDimitry Andric 601fe6060f1SDimitry Andric FunctionPass *llvm::createExpandVectorPredicationPass() { 602fe6060f1SDimitry Andric return new ExpandVectorPredication(); 603fe6060f1SDimitry Andric } 604fe6060f1SDimitry Andric 605fe6060f1SDimitry Andric PreservedAnalyses 606fe6060f1SDimitry Andric ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) { 607fe6060f1SDimitry Andric const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 608fe6060f1SDimitry Andric CachingVPExpander VPExpander(F, TTI); 609fe6060f1SDimitry Andric if (!VPExpander.expandVectorPredication()) 610fe6060f1SDimitry Andric return PreservedAnalyses::all(); 611fe6060f1SDimitry Andric PreservedAnalyses PA; 612fe6060f1SDimitry Andric PA.preserveSet<CFGAnalyses>(); 613fe6060f1SDimitry Andric return PA; 614fe6060f1SDimitry Andric } 615