1fe6060f1SDimitry Andric //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric // This pass implements IR expansion for vector predication intrinsics, allowing 10fe6060f1SDimitry Andric // targets to enable vector predication until just before codegen. 11fe6060f1SDimitry Andric // 12fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 13fe6060f1SDimitry Andric 14fe6060f1SDimitry Andric #include "llvm/CodeGen/ExpandVectorPredication.h" 15fe6060f1SDimitry Andric #include "llvm/ADT/Statistic.h" 16fe6060f1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 17fe6060f1SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 18fcaf7f86SDimitry Andric #include "llvm/Analysis/VectorUtils.h" 19fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h" 20fe6060f1SDimitry Andric #include "llvm/IR/Constants.h" 21fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 22fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h" 23fe6060f1SDimitry Andric #include "llvm/IR/InstIterator.h" 24fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h" 25fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 26fe6060f1SDimitry Andric #include "llvm/IR/Intrinsics.h" 27fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 28fe6060f1SDimitry Andric #include "llvm/Pass.h" 29fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h" 30fe6060f1SDimitry Andric #include "llvm/Support/Compiler.h" 31fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 32bdd1243dSDimitry Andric #include <optional> 33fe6060f1SDimitry Andric 34fe6060f1SDimitry Andric using namespace llvm; 35fe6060f1SDimitry Andric 36fe6060f1SDimitry Andric using VPLegalization = TargetTransformInfo::VPLegalization; 37fe6060f1SDimitry Andric using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 38fe6060f1SDimitry Andric 39fe6060f1SDimitry Andric // Keep this in sync with TargetTransformInfo::VPLegalization. 40fe6060f1SDimitry Andric #define VPINTERNAL_VPLEGAL_CASES \ 41fe6060f1SDimitry Andric VPINTERNAL_CASE(Legal) \ 42fe6060f1SDimitry Andric VPINTERNAL_CASE(Discard) \ 43fe6060f1SDimitry Andric VPINTERNAL_CASE(Convert) 44fe6060f1SDimitry Andric 45fe6060f1SDimitry Andric #define VPINTERNAL_CASE(X) "|" #X 46fe6060f1SDimitry Andric 47fe6060f1SDimitry Andric // Override options. 48fe6060f1SDimitry Andric static cl::opt<std::string> EVLTransformOverride( 49fe6060f1SDimitry Andric "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 50fe6060f1SDimitry Andric cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 51fe6060f1SDimitry Andric ". If non-empty, ignore " 52fe6060f1SDimitry Andric "TargetTransformInfo and " 53fe6060f1SDimitry Andric "always use this transformation for the %evl parameter (Used in " 54fe6060f1SDimitry Andric "testing).")); 55fe6060f1SDimitry Andric 56fe6060f1SDimitry Andric static cl::opt<std::string> MaskTransformOverride( 57fe6060f1SDimitry Andric "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 58fe6060f1SDimitry Andric cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 59fe6060f1SDimitry Andric ". If non-empty, Ignore " 60fe6060f1SDimitry Andric "TargetTransformInfo and " 61fe6060f1SDimitry Andric "always use this transformation for the %mask parameter (Used in " 62fe6060f1SDimitry Andric "testing).")); 63fe6060f1SDimitry Andric 64fe6060f1SDimitry Andric #undef VPINTERNAL_CASE 65fe6060f1SDimitry Andric #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 66fe6060f1SDimitry Andric 67fe6060f1SDimitry Andric static VPTransform parseOverrideOption(const std::string &TextOpt) { 68fe6060f1SDimitry Andric return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 69fe6060f1SDimitry Andric } 70fe6060f1SDimitry Andric 71fe6060f1SDimitry Andric #undef VPINTERNAL_VPLEGAL_CASES 72fe6060f1SDimitry Andric 73fe6060f1SDimitry Andric // Whether any override options are set. 74fe6060f1SDimitry Andric static bool anyExpandVPOverridesSet() { 75fe6060f1SDimitry Andric return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 76fe6060f1SDimitry Andric } 77fe6060f1SDimitry Andric 78fe6060f1SDimitry Andric #define DEBUG_TYPE "expandvp" 79fe6060f1SDimitry Andric 80fe6060f1SDimitry Andric STATISTIC(NumFoldedVL, "Number of folded vector length params"); 81fe6060f1SDimitry Andric STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 82fe6060f1SDimitry Andric 83fe6060f1SDimitry Andric ///// Helpers { 84fe6060f1SDimitry Andric 85fe6060f1SDimitry Andric /// \returns Whether the vector mask \p MaskVal has all lane bits set. 86fe6060f1SDimitry Andric static bool isAllTrueMask(Value *MaskVal) { 87fcaf7f86SDimitry Andric if (Value *SplattedVal = getSplatValue(MaskVal)) 88fcaf7f86SDimitry Andric if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) 89fcaf7f86SDimitry Andric return ConstValue->isAllOnesValue(); 90fcaf7f86SDimitry Andric 91fcaf7f86SDimitry Andric return false; 92fe6060f1SDimitry Andric } 93fe6060f1SDimitry Andric 94fe6060f1SDimitry Andric /// \returns A non-excepting divisor constant for this type. 95fe6060f1SDimitry Andric static Constant *getSafeDivisor(Type *DivTy) { 96fe6060f1SDimitry Andric assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 97fe6060f1SDimitry Andric return ConstantInt::get(DivTy, 1u, false); 98fe6060f1SDimitry Andric } 99fe6060f1SDimitry Andric 100fe6060f1SDimitry Andric /// Transfer operation properties from \p OldVPI to \p NewVal. 101fe6060f1SDimitry Andric static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 102fe6060f1SDimitry Andric auto *NewInst = dyn_cast<Instruction>(&NewVal); 103fe6060f1SDimitry Andric if (!NewInst || !isa<FPMathOperator>(NewVal)) 104fe6060f1SDimitry Andric return; 105fe6060f1SDimitry Andric 106fe6060f1SDimitry Andric auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 107fe6060f1SDimitry Andric if (!OldFMOp) 108fe6060f1SDimitry Andric return; 109fe6060f1SDimitry Andric 110fe6060f1SDimitry Andric NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 111fe6060f1SDimitry Andric } 112fe6060f1SDimitry Andric 113fe6060f1SDimitry Andric /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 114fe6060f1SDimitry Andric /// OldVP gets erased. 115fe6060f1SDimitry Andric static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 116fe6060f1SDimitry Andric transferDecorations(NewOp, OldOp); 117fe6060f1SDimitry Andric OldOp.replaceAllUsesWith(&NewOp); 118fe6060f1SDimitry Andric OldOp.eraseFromParent(); 119fe6060f1SDimitry Andric } 120fe6060f1SDimitry Andric 12181ad6265SDimitry Andric static bool maySpeculateLanes(VPIntrinsic &VPI) { 12281ad6265SDimitry Andric // The result of VP reductions depends on the mask and evl. 12381ad6265SDimitry Andric if (isa<VPReductionIntrinsic>(VPI)) 12481ad6265SDimitry Andric return false; 12581ad6265SDimitry Andric // Fallback to whether the intrinsic is speculatable. 1265f757f3fSDimitry Andric if (auto IntrID = VPI.getFunctionalIntrinsicID()) 1275f757f3fSDimitry Andric return Intrinsic::getAttributes(VPI.getContext(), *IntrID) 1285f757f3fSDimitry Andric .hasFnAttr(Attribute::AttrKind::Speculatable); 1295f757f3fSDimitry Andric if (auto Opc = VPI.getFunctionalOpcode()) 1305f757f3fSDimitry Andric return isSafeToSpeculativelyExecuteWithOpcode(*Opc, &VPI); 1315f757f3fSDimitry Andric return false; 13281ad6265SDimitry Andric } 13381ad6265SDimitry Andric 134fe6060f1SDimitry Andric //// } Helpers 135fe6060f1SDimitry Andric 136fe6060f1SDimitry Andric namespace { 137fe6060f1SDimitry Andric 138fe6060f1SDimitry Andric // Expansion pass state at function scope. 139fe6060f1SDimitry Andric struct CachingVPExpander { 140fe6060f1SDimitry Andric Function &F; 141fe6060f1SDimitry Andric const TargetTransformInfo &TTI; 142fe6060f1SDimitry Andric 143fe6060f1SDimitry Andric /// \returns A (fixed length) vector with ascending integer indices 144fe6060f1SDimitry Andric /// (<0, 1, ..., NumElems-1>). 145fe6060f1SDimitry Andric /// \p Builder 146fe6060f1SDimitry Andric /// Used for instruction creation. 147fe6060f1SDimitry Andric /// \p LaneTy 148fe6060f1SDimitry Andric /// Integer element type of the result vector. 149fe6060f1SDimitry Andric /// \p NumElems 150fe6060f1SDimitry Andric /// Number of vector elements. 151fe6060f1SDimitry Andric Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 152fe6060f1SDimitry Andric unsigned NumElems); 153fe6060f1SDimitry Andric 154fe6060f1SDimitry Andric /// \returns A bitmask that is true where the lane position is less-than \p 155fe6060f1SDimitry Andric /// EVLParam 156fe6060f1SDimitry Andric /// 157fe6060f1SDimitry Andric /// \p Builder 158fe6060f1SDimitry Andric /// Used for instruction creation. 159fe6060f1SDimitry Andric /// \p VLParam 160fe6060f1SDimitry Andric /// The explicit vector length parameter to test against the lane 161fe6060f1SDimitry Andric /// positions. 162fe6060f1SDimitry Andric /// \p ElemCount 163fe6060f1SDimitry Andric /// Static (potentially scalable) number of vector elements. 164fe6060f1SDimitry Andric Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 165fe6060f1SDimitry Andric ElementCount ElemCount); 166fe6060f1SDimitry Andric 167fe6060f1SDimitry Andric Value *foldEVLIntoMask(VPIntrinsic &VPI); 168fe6060f1SDimitry Andric 169fe6060f1SDimitry Andric /// "Remove" the %evl parameter of \p PI by setting it to the static vector 170fe6060f1SDimitry Andric /// length of the operation. 171fe6060f1SDimitry Andric void discardEVLParameter(VPIntrinsic &PI); 172fe6060f1SDimitry Andric 173bdd1243dSDimitry Andric /// Lower this VP binary operator to a unpredicated binary operator. 174fe6060f1SDimitry Andric Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 175fe6060f1SDimitry Andric VPIntrinsic &PI); 176fe6060f1SDimitry Andric 1775f757f3fSDimitry Andric /// Lower this VP int call to a unpredicated int call. 1785f757f3fSDimitry Andric Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI, 1795f757f3fSDimitry Andric unsigned UnpredicatedIntrinsicID); 1805f757f3fSDimitry Andric 18106c3fb27SDimitry Andric /// Lower this VP fp call to a unpredicated fp call. 18206c3fb27SDimitry Andric Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, 18306c3fb27SDimitry Andric unsigned UnpredicatedIntrinsicID); 18406c3fb27SDimitry Andric 185bdd1243dSDimitry Andric /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. 186349cc55cSDimitry Andric Value *expandPredicationInReduction(IRBuilder<> &Builder, 187349cc55cSDimitry Andric VPReductionIntrinsic &PI); 188349cc55cSDimitry Andric 1895f757f3fSDimitry Andric /// Lower this VP cast operation to a non-VP intrinsic. 1905f757f3fSDimitry Andric Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 1915f757f3fSDimitry Andric VPIntrinsic &VPI); 1925f757f3fSDimitry Andric 193bdd1243dSDimitry Andric /// Lower this VP memory operation to a non-VP intrinsic. 194fcaf7f86SDimitry Andric Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 195fcaf7f86SDimitry Andric VPIntrinsic &VPI); 196fcaf7f86SDimitry Andric 197bdd1243dSDimitry Andric /// Lower this VP comparison to a call to an unpredicated comparison. 198bdd1243dSDimitry Andric Value *expandPredicationInComparison(IRBuilder<> &Builder, 199bdd1243dSDimitry Andric VPCmpIntrinsic &PI); 200bdd1243dSDimitry Andric 201bdd1243dSDimitry Andric /// Query TTI and expand the vector predication in \p P accordingly. 202fe6060f1SDimitry Andric Value *expandPredication(VPIntrinsic &PI); 203fe6060f1SDimitry Andric 204bdd1243dSDimitry Andric /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This 205bdd1243dSDimitry Andric /// overrides TTI with the cl::opts listed at the top of this file. 206fe6060f1SDimitry Andric VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 207fe6060f1SDimitry Andric bool UsingTTIOverrides; 208fe6060f1SDimitry Andric 209fe6060f1SDimitry Andric public: 210fe6060f1SDimitry Andric CachingVPExpander(Function &F, const TargetTransformInfo &TTI) 211fe6060f1SDimitry Andric : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 212fe6060f1SDimitry Andric 213fe6060f1SDimitry Andric bool expandVectorPredication(); 214fe6060f1SDimitry Andric }; 215fe6060f1SDimitry Andric 216fe6060f1SDimitry Andric //// CachingVPExpander { 217fe6060f1SDimitry Andric 218fe6060f1SDimitry Andric Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 219fe6060f1SDimitry Andric unsigned NumElems) { 220fe6060f1SDimitry Andric // TODO add caching 221fe6060f1SDimitry Andric SmallVector<Constant *, 16> ConstElems; 222fe6060f1SDimitry Andric 223fe6060f1SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) 224fe6060f1SDimitry Andric ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 225fe6060f1SDimitry Andric 226fe6060f1SDimitry Andric return ConstantVector::get(ConstElems); 227fe6060f1SDimitry Andric } 228fe6060f1SDimitry Andric 229fe6060f1SDimitry Andric Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 230fe6060f1SDimitry Andric Value *EVLParam, 231fe6060f1SDimitry Andric ElementCount ElemCount) { 232fe6060f1SDimitry Andric // TODO add caching 233fe6060f1SDimitry Andric // Scalable vector %evl conversion. 234fe6060f1SDimitry Andric if (ElemCount.isScalable()) { 235fe6060f1SDimitry Andric auto *M = Builder.GetInsertBlock()->getModule(); 236fe6060f1SDimitry Andric Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 237fe6060f1SDimitry Andric Function *ActiveMaskFunc = Intrinsic::getDeclaration( 238fe6060f1SDimitry Andric M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); 239fe6060f1SDimitry Andric // `get_active_lane_mask` performs an implicit less-than comparison. 240fe6060f1SDimitry Andric Value *ConstZero = Builder.getInt32(0); 241fe6060f1SDimitry Andric return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); 242fe6060f1SDimitry Andric } 243fe6060f1SDimitry Andric 244fe6060f1SDimitry Andric // Fixed vector %evl conversion. 245fe6060f1SDimitry Andric Type *LaneTy = EVLParam->getType(); 246fe6060f1SDimitry Andric unsigned NumElems = ElemCount.getFixedValue(); 247fe6060f1SDimitry Andric Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 248fe6060f1SDimitry Andric Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 249fe6060f1SDimitry Andric return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 250fe6060f1SDimitry Andric } 251fe6060f1SDimitry Andric 252fe6060f1SDimitry Andric Value * 253fe6060f1SDimitry Andric CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 254fe6060f1SDimitry Andric VPIntrinsic &VPI) { 25581ad6265SDimitry Andric assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 256fe6060f1SDimitry Andric "Implicitly dropping %evl in non-speculatable operator!"); 257fe6060f1SDimitry Andric 258fe6060f1SDimitry Andric auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 259fe6060f1SDimitry Andric assert(Instruction::isBinaryOp(OC)); 260fe6060f1SDimitry Andric 261fe6060f1SDimitry Andric Value *Op0 = VPI.getOperand(0); 262fe6060f1SDimitry Andric Value *Op1 = VPI.getOperand(1); 263fe6060f1SDimitry Andric Value *Mask = VPI.getMaskParam(); 264fe6060f1SDimitry Andric 265fe6060f1SDimitry Andric // Blend in safe operands. 266fe6060f1SDimitry Andric if (Mask && !isAllTrueMask(Mask)) { 267fe6060f1SDimitry Andric switch (OC) { 268fe6060f1SDimitry Andric default: 269fe6060f1SDimitry Andric // Can safely ignore the predicate. 270fe6060f1SDimitry Andric break; 271fe6060f1SDimitry Andric 272fe6060f1SDimitry Andric // Division operators need a safe divisor on masked-off lanes (1). 273fe6060f1SDimitry Andric case Instruction::UDiv: 274fe6060f1SDimitry Andric case Instruction::SDiv: 275fe6060f1SDimitry Andric case Instruction::URem: 276fe6060f1SDimitry Andric case Instruction::SRem: 277fe6060f1SDimitry Andric // 2nd operand must not be zero. 278fe6060f1SDimitry Andric Value *SafeDivisor = getSafeDivisor(VPI.getType()); 279fe6060f1SDimitry Andric Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 280fe6060f1SDimitry Andric } 281fe6060f1SDimitry Andric } 282fe6060f1SDimitry Andric 283fe6060f1SDimitry Andric Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 284fe6060f1SDimitry Andric 285fe6060f1SDimitry Andric replaceOperation(*NewBinOp, VPI); 286fe6060f1SDimitry Andric return NewBinOp; 287fe6060f1SDimitry Andric } 288fe6060f1SDimitry Andric 2895f757f3fSDimitry Andric Value *CachingVPExpander::expandPredicationToIntCall( 2905f757f3fSDimitry Andric IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 2915f757f3fSDimitry Andric switch (UnpredicatedIntrinsicID) { 2925f757f3fSDimitry Andric case Intrinsic::abs: 2935f757f3fSDimitry Andric case Intrinsic::smax: 2945f757f3fSDimitry Andric case Intrinsic::smin: 2955f757f3fSDimitry Andric case Intrinsic::umax: 2965f757f3fSDimitry Andric case Intrinsic::umin: { 2975f757f3fSDimitry Andric Value *Op0 = VPI.getOperand(0); 2985f757f3fSDimitry Andric Value *Op1 = VPI.getOperand(1); 2995f757f3fSDimitry Andric Function *Fn = Intrinsic::getDeclaration( 3005f757f3fSDimitry Andric VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 3015f757f3fSDimitry Andric Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 3025f757f3fSDimitry Andric replaceOperation(*NewOp, VPI); 3035f757f3fSDimitry Andric return NewOp; 3045f757f3fSDimitry Andric } 3055f757f3fSDimitry Andric case Intrinsic::bswap: 3065f757f3fSDimitry Andric case Intrinsic::bitreverse: { 3075f757f3fSDimitry Andric Value *Op = VPI.getOperand(0); 3085f757f3fSDimitry Andric Function *Fn = Intrinsic::getDeclaration( 3095f757f3fSDimitry Andric VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 3105f757f3fSDimitry Andric Value *NewOp = Builder.CreateCall(Fn, {Op}, VPI.getName()); 3115f757f3fSDimitry Andric replaceOperation(*NewOp, VPI); 3125f757f3fSDimitry Andric return NewOp; 3135f757f3fSDimitry Andric } 3145f757f3fSDimitry Andric } 3155f757f3fSDimitry Andric return nullptr; 3165f757f3fSDimitry Andric } 3175f757f3fSDimitry Andric 31806c3fb27SDimitry Andric Value *CachingVPExpander::expandPredicationToFPCall( 31906c3fb27SDimitry Andric IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 32006c3fb27SDimitry Andric assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 32106c3fb27SDimitry Andric "Implicitly dropping %evl in non-speculatable operator!"); 32206c3fb27SDimitry Andric 32306c3fb27SDimitry Andric switch (UnpredicatedIntrinsicID) { 32406c3fb27SDimitry Andric case Intrinsic::fabs: 32506c3fb27SDimitry Andric case Intrinsic::sqrt: { 32606c3fb27SDimitry Andric Value *Op0 = VPI.getOperand(0); 32706c3fb27SDimitry Andric Function *Fn = Intrinsic::getDeclaration( 32806c3fb27SDimitry Andric VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 32906c3fb27SDimitry Andric Value *NewOp = Builder.CreateCall(Fn, {Op0}, VPI.getName()); 33006c3fb27SDimitry Andric replaceOperation(*NewOp, VPI); 33106c3fb27SDimitry Andric return NewOp; 33206c3fb27SDimitry Andric } 3335f757f3fSDimitry Andric case Intrinsic::maxnum: 3345f757f3fSDimitry Andric case Intrinsic::minnum: { 3355f757f3fSDimitry Andric Value *Op0 = VPI.getOperand(0); 3365f757f3fSDimitry Andric Value *Op1 = VPI.getOperand(1); 3375f757f3fSDimitry Andric Function *Fn = Intrinsic::getDeclaration( 3385f757f3fSDimitry Andric VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 3395f757f3fSDimitry Andric Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 3405f757f3fSDimitry Andric replaceOperation(*NewOp, VPI); 3415f757f3fSDimitry Andric return NewOp; 3425f757f3fSDimitry Andric } 343*0fca6ea1SDimitry Andric case Intrinsic::fma: 344*0fca6ea1SDimitry Andric case Intrinsic::fmuladd: 34506c3fb27SDimitry Andric case Intrinsic::experimental_constrained_fma: 34606c3fb27SDimitry Andric case Intrinsic::experimental_constrained_fmuladd: { 34706c3fb27SDimitry Andric Value *Op0 = VPI.getOperand(0); 34806c3fb27SDimitry Andric Value *Op1 = VPI.getOperand(1); 34906c3fb27SDimitry Andric Value *Op2 = VPI.getOperand(2); 35006c3fb27SDimitry Andric Function *Fn = Intrinsic::getDeclaration( 35106c3fb27SDimitry Andric VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 352*0fca6ea1SDimitry Andric Value *NewOp; 353*0fca6ea1SDimitry Andric if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID)) 354*0fca6ea1SDimitry Andric NewOp = 35506c3fb27SDimitry Andric Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 356*0fca6ea1SDimitry Andric else 357*0fca6ea1SDimitry Andric NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 35806c3fb27SDimitry Andric replaceOperation(*NewOp, VPI); 35906c3fb27SDimitry Andric return NewOp; 36006c3fb27SDimitry Andric } 36106c3fb27SDimitry Andric } 36206c3fb27SDimitry Andric 36306c3fb27SDimitry Andric return nullptr; 36406c3fb27SDimitry Andric } 36506c3fb27SDimitry Andric 366349cc55cSDimitry Andric static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 367349cc55cSDimitry Andric Type *EltTy) { 368349cc55cSDimitry Andric bool Negative = false; 369349cc55cSDimitry Andric unsigned EltBits = EltTy->getScalarSizeInBits(); 370*0fca6ea1SDimitry Andric Intrinsic::ID VID = VPI.getIntrinsicID(); 371*0fca6ea1SDimitry Andric switch (VID) { 372349cc55cSDimitry Andric default: 373349cc55cSDimitry Andric llvm_unreachable("Expecting a VP reduction intrinsic"); 374349cc55cSDimitry Andric case Intrinsic::vp_reduce_add: 375349cc55cSDimitry Andric case Intrinsic::vp_reduce_or: 376349cc55cSDimitry Andric case Intrinsic::vp_reduce_xor: 377349cc55cSDimitry Andric case Intrinsic::vp_reduce_umax: 378349cc55cSDimitry Andric return Constant::getNullValue(EltTy); 379349cc55cSDimitry Andric case Intrinsic::vp_reduce_mul: 380349cc55cSDimitry Andric return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); 381349cc55cSDimitry Andric case Intrinsic::vp_reduce_and: 382349cc55cSDimitry Andric case Intrinsic::vp_reduce_umin: 383349cc55cSDimitry Andric return ConstantInt::getAllOnesValue(EltTy); 384349cc55cSDimitry Andric case Intrinsic::vp_reduce_smin: 385349cc55cSDimitry Andric return ConstantInt::get(EltTy->getContext(), 386349cc55cSDimitry Andric APInt::getSignedMaxValue(EltBits)); 387349cc55cSDimitry Andric case Intrinsic::vp_reduce_smax: 388349cc55cSDimitry Andric return ConstantInt::get(EltTy->getContext(), 389349cc55cSDimitry Andric APInt::getSignedMinValue(EltBits)); 390349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmax: 391*0fca6ea1SDimitry Andric case Intrinsic::vp_reduce_fmaximum: 392349cc55cSDimitry Andric Negative = true; 393bdd1243dSDimitry Andric [[fallthrough]]; 394*0fca6ea1SDimitry Andric case Intrinsic::vp_reduce_fmin: 395*0fca6ea1SDimitry Andric case Intrinsic::vp_reduce_fminimum: { 396*0fca6ea1SDimitry Andric bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum || 397*0fca6ea1SDimitry Andric VID == Intrinsic::vp_reduce_fmaximum; 398349cc55cSDimitry Andric FastMathFlags Flags = VPI.getFastMathFlags(); 399349cc55cSDimitry Andric const fltSemantics &Semantics = EltTy->getFltSemantics(); 400*0fca6ea1SDimitry Andric return (!Flags.noNaNs() && !PropagatesNaN) 401*0fca6ea1SDimitry Andric ? ConstantFP::getQNaN(EltTy, Negative) 402349cc55cSDimitry Andric : !Flags.noInfs() 403349cc55cSDimitry Andric ? ConstantFP::getInfinity(EltTy, Negative) 404349cc55cSDimitry Andric : ConstantFP::get(EltTy, 405349cc55cSDimitry Andric APFloat::getLargest(Semantics, Negative)); 406349cc55cSDimitry Andric } 407349cc55cSDimitry Andric case Intrinsic::vp_reduce_fadd: 408349cc55cSDimitry Andric return ConstantFP::getNegativeZero(EltTy); 409349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmul: 410349cc55cSDimitry Andric return ConstantFP::get(EltTy, 1.0); 411349cc55cSDimitry Andric } 412349cc55cSDimitry Andric } 413349cc55cSDimitry Andric 414349cc55cSDimitry Andric Value * 415349cc55cSDimitry Andric CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 416349cc55cSDimitry Andric VPReductionIntrinsic &VPI) { 41781ad6265SDimitry Andric assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 418349cc55cSDimitry Andric "Implicitly dropping %evl in non-speculatable operator!"); 419349cc55cSDimitry Andric 420349cc55cSDimitry Andric Value *Mask = VPI.getMaskParam(); 421349cc55cSDimitry Andric Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 422349cc55cSDimitry Andric 423349cc55cSDimitry Andric // Insert neutral element in masked-out positions 424349cc55cSDimitry Andric if (Mask && !isAllTrueMask(Mask)) { 425349cc55cSDimitry Andric auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 426349cc55cSDimitry Andric auto *NeutralVector = Builder.CreateVectorSplat( 427349cc55cSDimitry Andric cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 428349cc55cSDimitry Andric RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 429349cc55cSDimitry Andric } 430349cc55cSDimitry Andric 431349cc55cSDimitry Andric Value *Reduction; 432349cc55cSDimitry Andric Value *Start = VPI.getOperand(VPI.getStartParamPos()); 433349cc55cSDimitry Andric 434349cc55cSDimitry Andric switch (VPI.getIntrinsicID()) { 435349cc55cSDimitry Andric default: 436349cc55cSDimitry Andric llvm_unreachable("Impossible reduction kind"); 437349cc55cSDimitry Andric case Intrinsic::vp_reduce_add: 438349cc55cSDimitry Andric Reduction = Builder.CreateAddReduce(RedOp); 439349cc55cSDimitry Andric Reduction = Builder.CreateAdd(Reduction, Start); 440349cc55cSDimitry Andric break; 441349cc55cSDimitry Andric case Intrinsic::vp_reduce_mul: 442349cc55cSDimitry Andric Reduction = Builder.CreateMulReduce(RedOp); 443349cc55cSDimitry Andric Reduction = Builder.CreateMul(Reduction, Start); 444349cc55cSDimitry Andric break; 445349cc55cSDimitry Andric case Intrinsic::vp_reduce_and: 446349cc55cSDimitry Andric Reduction = Builder.CreateAndReduce(RedOp); 447349cc55cSDimitry Andric Reduction = Builder.CreateAnd(Reduction, Start); 448349cc55cSDimitry Andric break; 449349cc55cSDimitry Andric case Intrinsic::vp_reduce_or: 450349cc55cSDimitry Andric Reduction = Builder.CreateOrReduce(RedOp); 451349cc55cSDimitry Andric Reduction = Builder.CreateOr(Reduction, Start); 452349cc55cSDimitry Andric break; 453349cc55cSDimitry Andric case Intrinsic::vp_reduce_xor: 454349cc55cSDimitry Andric Reduction = Builder.CreateXorReduce(RedOp); 455349cc55cSDimitry Andric Reduction = Builder.CreateXor(Reduction, Start); 456349cc55cSDimitry Andric break; 457349cc55cSDimitry Andric case Intrinsic::vp_reduce_smax: 458349cc55cSDimitry Andric Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); 459349cc55cSDimitry Andric Reduction = 460349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); 461349cc55cSDimitry Andric break; 462349cc55cSDimitry Andric case Intrinsic::vp_reduce_smin: 463349cc55cSDimitry Andric Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); 464349cc55cSDimitry Andric Reduction = 465349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); 466349cc55cSDimitry Andric break; 467349cc55cSDimitry Andric case Intrinsic::vp_reduce_umax: 468349cc55cSDimitry Andric Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); 469349cc55cSDimitry Andric Reduction = 470349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); 471349cc55cSDimitry Andric break; 472349cc55cSDimitry Andric case Intrinsic::vp_reduce_umin: 473349cc55cSDimitry Andric Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); 474349cc55cSDimitry Andric Reduction = 475349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); 476349cc55cSDimitry Andric break; 477349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmax: 478349cc55cSDimitry Andric Reduction = Builder.CreateFPMaxReduce(RedOp); 479349cc55cSDimitry Andric transferDecorations(*Reduction, VPI); 480349cc55cSDimitry Andric Reduction = 481349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); 482349cc55cSDimitry Andric break; 483349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmin: 484349cc55cSDimitry Andric Reduction = Builder.CreateFPMinReduce(RedOp); 485349cc55cSDimitry Andric transferDecorations(*Reduction, VPI); 486349cc55cSDimitry Andric Reduction = 487349cc55cSDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); 488349cc55cSDimitry Andric break; 489*0fca6ea1SDimitry Andric case Intrinsic::vp_reduce_fmaximum: 490*0fca6ea1SDimitry Andric Reduction = Builder.CreateFPMaximumReduce(RedOp); 491*0fca6ea1SDimitry Andric transferDecorations(*Reduction, VPI); 492*0fca6ea1SDimitry Andric Reduction = 493*0fca6ea1SDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::maximum, Reduction, Start); 494*0fca6ea1SDimitry Andric break; 495*0fca6ea1SDimitry Andric case Intrinsic::vp_reduce_fminimum: 496*0fca6ea1SDimitry Andric Reduction = Builder.CreateFPMinimumReduce(RedOp); 497*0fca6ea1SDimitry Andric transferDecorations(*Reduction, VPI); 498*0fca6ea1SDimitry Andric Reduction = 499*0fca6ea1SDimitry Andric Builder.CreateBinaryIntrinsic(Intrinsic::minimum, Reduction, Start); 500*0fca6ea1SDimitry Andric break; 501349cc55cSDimitry Andric case Intrinsic::vp_reduce_fadd: 502349cc55cSDimitry Andric Reduction = Builder.CreateFAddReduce(Start, RedOp); 503349cc55cSDimitry Andric break; 504349cc55cSDimitry Andric case Intrinsic::vp_reduce_fmul: 505349cc55cSDimitry Andric Reduction = Builder.CreateFMulReduce(Start, RedOp); 506349cc55cSDimitry Andric break; 507349cc55cSDimitry Andric } 508349cc55cSDimitry Andric 509349cc55cSDimitry Andric replaceOperation(*Reduction, VPI); 510349cc55cSDimitry Andric return Reduction; 511349cc55cSDimitry Andric } 512349cc55cSDimitry Andric 5135f757f3fSDimitry Andric Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 5145f757f3fSDimitry Andric VPIntrinsic &VPI) { 5155f757f3fSDimitry Andric Value *CastOp = nullptr; 5165f757f3fSDimitry Andric switch (VPI.getIntrinsicID()) { 5175f757f3fSDimitry Andric default: 5185f757f3fSDimitry Andric llvm_unreachable("Not a VP cast intrinsic"); 5195f757f3fSDimitry Andric case Intrinsic::vp_sext: 5205f757f3fSDimitry Andric CastOp = 5215f757f3fSDimitry Andric Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5225f757f3fSDimitry Andric break; 5235f757f3fSDimitry Andric case Intrinsic::vp_zext: 5245f757f3fSDimitry Andric CastOp = 5255f757f3fSDimitry Andric Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5265f757f3fSDimitry Andric break; 5275f757f3fSDimitry Andric case Intrinsic::vp_trunc: 5285f757f3fSDimitry Andric CastOp = 5295f757f3fSDimitry Andric Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5305f757f3fSDimitry Andric break; 5315f757f3fSDimitry Andric case Intrinsic::vp_inttoptr: 5325f757f3fSDimitry Andric CastOp = 5335f757f3fSDimitry Andric Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5345f757f3fSDimitry Andric break; 5355f757f3fSDimitry Andric case Intrinsic::vp_ptrtoint: 5365f757f3fSDimitry Andric CastOp = 5375f757f3fSDimitry Andric Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5385f757f3fSDimitry Andric break; 5395f757f3fSDimitry Andric case Intrinsic::vp_fptosi: 5405f757f3fSDimitry Andric CastOp = 5415f757f3fSDimitry Andric Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5425f757f3fSDimitry Andric break; 5435f757f3fSDimitry Andric 5445f757f3fSDimitry Andric case Intrinsic::vp_fptoui: 5455f757f3fSDimitry Andric CastOp = 5465f757f3fSDimitry Andric Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5475f757f3fSDimitry Andric break; 5485f757f3fSDimitry Andric case Intrinsic::vp_sitofp: 5495f757f3fSDimitry Andric CastOp = 5505f757f3fSDimitry Andric Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5515f757f3fSDimitry Andric break; 5525f757f3fSDimitry Andric case Intrinsic::vp_uitofp: 5535f757f3fSDimitry Andric CastOp = 5545f757f3fSDimitry Andric Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5555f757f3fSDimitry Andric break; 5565f757f3fSDimitry Andric case Intrinsic::vp_fptrunc: 5575f757f3fSDimitry Andric CastOp = 5585f757f3fSDimitry Andric Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5595f757f3fSDimitry Andric break; 5605f757f3fSDimitry Andric case Intrinsic::vp_fpext: 5615f757f3fSDimitry Andric CastOp = 5625f757f3fSDimitry Andric Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 5635f757f3fSDimitry Andric break; 5645f757f3fSDimitry Andric } 5655f757f3fSDimitry Andric replaceOperation(*CastOp, VPI); 5665f757f3fSDimitry Andric return CastOp; 5675f757f3fSDimitry Andric } 5685f757f3fSDimitry Andric 569fcaf7f86SDimitry Andric Value * 570fcaf7f86SDimitry Andric CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 571fcaf7f86SDimitry Andric VPIntrinsic &VPI) { 572fcaf7f86SDimitry Andric assert(VPI.canIgnoreVectorLengthParam()); 573fcaf7f86SDimitry Andric 574*0fca6ea1SDimitry Andric const auto &DL = F.getDataLayout(); 575fcaf7f86SDimitry Andric 576fcaf7f86SDimitry Andric Value *MaskParam = VPI.getMaskParam(); 577fcaf7f86SDimitry Andric Value *PtrParam = VPI.getMemoryPointerParam(); 578fcaf7f86SDimitry Andric Value *DataParam = VPI.getMemoryDataParam(); 579fcaf7f86SDimitry Andric bool IsUnmasked = isAllTrueMask(MaskParam); 580fcaf7f86SDimitry Andric 581fcaf7f86SDimitry Andric MaybeAlign AlignOpt = VPI.getPointerAlignment(); 582fcaf7f86SDimitry Andric 583fcaf7f86SDimitry Andric Value *NewMemoryInst = nullptr; 584fcaf7f86SDimitry Andric switch (VPI.getIntrinsicID()) { 585fcaf7f86SDimitry Andric default: 586fcaf7f86SDimitry Andric llvm_unreachable("Not a VP memory intrinsic"); 587fcaf7f86SDimitry Andric case Intrinsic::vp_store: 588fcaf7f86SDimitry Andric if (IsUnmasked) { 589fcaf7f86SDimitry Andric StoreInst *NewStore = 590fcaf7f86SDimitry Andric Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); 591fcaf7f86SDimitry Andric if (AlignOpt.has_value()) 592bdd1243dSDimitry Andric NewStore->setAlignment(*AlignOpt); 593fcaf7f86SDimitry Andric NewMemoryInst = NewStore; 594fcaf7f86SDimitry Andric } else 595fcaf7f86SDimitry Andric NewMemoryInst = Builder.CreateMaskedStore( 596fcaf7f86SDimitry Andric DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); 597fcaf7f86SDimitry Andric 598fcaf7f86SDimitry Andric break; 599fcaf7f86SDimitry Andric case Intrinsic::vp_load: 600fcaf7f86SDimitry Andric if (IsUnmasked) { 601fcaf7f86SDimitry Andric LoadInst *NewLoad = 602fcaf7f86SDimitry Andric Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); 603fcaf7f86SDimitry Andric if (AlignOpt.has_value()) 604bdd1243dSDimitry Andric NewLoad->setAlignment(*AlignOpt); 605fcaf7f86SDimitry Andric NewMemoryInst = NewLoad; 606fcaf7f86SDimitry Andric } else 607fcaf7f86SDimitry Andric NewMemoryInst = Builder.CreateMaskedLoad( 608fcaf7f86SDimitry Andric VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); 609fcaf7f86SDimitry Andric 610fcaf7f86SDimitry Andric break; 611fcaf7f86SDimitry Andric case Intrinsic::vp_scatter: { 612fcaf7f86SDimitry Andric auto *ElementType = 613fcaf7f86SDimitry Andric cast<VectorType>(DataParam->getType())->getElementType(); 614fcaf7f86SDimitry Andric NewMemoryInst = Builder.CreateMaskedScatter( 615fcaf7f86SDimitry Andric DataParam, PtrParam, 616fcaf7f86SDimitry Andric AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam); 617fcaf7f86SDimitry Andric break; 618fcaf7f86SDimitry Andric } 619fcaf7f86SDimitry Andric case Intrinsic::vp_gather: { 620fcaf7f86SDimitry Andric auto *ElementType = cast<VectorType>(VPI.getType())->getElementType(); 621fcaf7f86SDimitry Andric NewMemoryInst = Builder.CreateMaskedGather( 622fcaf7f86SDimitry Andric VPI.getType(), PtrParam, 623fcaf7f86SDimitry Andric AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr, 624fcaf7f86SDimitry Andric VPI.getName()); 625fcaf7f86SDimitry Andric break; 626fcaf7f86SDimitry Andric } 627fcaf7f86SDimitry Andric } 628fcaf7f86SDimitry Andric 629fcaf7f86SDimitry Andric assert(NewMemoryInst); 630fcaf7f86SDimitry Andric replaceOperation(*NewMemoryInst, VPI); 631fcaf7f86SDimitry Andric return NewMemoryInst; 632fcaf7f86SDimitry Andric } 633fcaf7f86SDimitry Andric 634bdd1243dSDimitry Andric Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, 635bdd1243dSDimitry Andric VPCmpIntrinsic &VPI) { 636bdd1243dSDimitry Andric assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 637bdd1243dSDimitry Andric "Implicitly dropping %evl in non-speculatable operator!"); 638bdd1243dSDimitry Andric 639bdd1243dSDimitry Andric assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || 640bdd1243dSDimitry Andric *VPI.getFunctionalOpcode() == Instruction::FCmp); 641bdd1243dSDimitry Andric 642bdd1243dSDimitry Andric Value *Op0 = VPI.getOperand(0); 643bdd1243dSDimitry Andric Value *Op1 = VPI.getOperand(1); 644bdd1243dSDimitry Andric auto Pred = VPI.getPredicate(); 645bdd1243dSDimitry Andric 646bdd1243dSDimitry Andric auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1); 647bdd1243dSDimitry Andric 648bdd1243dSDimitry Andric replaceOperation(*NewCmp, VPI); 649bdd1243dSDimitry Andric return NewCmp; 650bdd1243dSDimitry Andric } 651bdd1243dSDimitry Andric 652fe6060f1SDimitry Andric void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 653fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 654fe6060f1SDimitry Andric 655fe6060f1SDimitry Andric if (VPI.canIgnoreVectorLengthParam()) 656fe6060f1SDimitry Andric return; 657fe6060f1SDimitry Andric 658fe6060f1SDimitry Andric Value *EVLParam = VPI.getVectorLengthParam(); 659fe6060f1SDimitry Andric if (!EVLParam) 660fe6060f1SDimitry Andric return; 661fe6060f1SDimitry Andric 662fe6060f1SDimitry Andric ElementCount StaticElemCount = VPI.getStaticVectorLength(); 663fe6060f1SDimitry Andric Value *MaxEVL = nullptr; 664fe6060f1SDimitry Andric Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 665fe6060f1SDimitry Andric if (StaticElemCount.isScalable()) { 666fe6060f1SDimitry Andric // TODO add caching 667fe6060f1SDimitry Andric auto *M = VPI.getModule(); 668fe6060f1SDimitry Andric Function *VScaleFunc = 669fe6060f1SDimitry Andric Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); 670fe6060f1SDimitry Andric IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 671fe6060f1SDimitry Andric Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 672fe6060f1SDimitry Andric Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); 673fe6060f1SDimitry Andric MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 674fe6060f1SDimitry Andric /*NUW*/ true, /*NSW*/ false); 675fe6060f1SDimitry Andric } else { 676fe6060f1SDimitry Andric MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 677fe6060f1SDimitry Andric } 678fe6060f1SDimitry Andric VPI.setVectorLengthParam(MaxEVL); 679fe6060f1SDimitry Andric } 680fe6060f1SDimitry Andric 681fe6060f1SDimitry Andric Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 682fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 683fe6060f1SDimitry Andric 684fe6060f1SDimitry Andric IRBuilder<> Builder(&VPI); 685fe6060f1SDimitry Andric 686fe6060f1SDimitry Andric // Ineffective %evl parameter and so nothing to do here. 687fe6060f1SDimitry Andric if (VPI.canIgnoreVectorLengthParam()) 688fe6060f1SDimitry Andric return &VPI; 689fe6060f1SDimitry Andric 690fe6060f1SDimitry Andric // Only VP intrinsics can have an %evl parameter. 691fe6060f1SDimitry Andric Value *OldMaskParam = VPI.getMaskParam(); 692fe6060f1SDimitry Andric Value *OldEVLParam = VPI.getVectorLengthParam(); 693fe6060f1SDimitry Andric assert(OldMaskParam && "no mask param to fold the vl param into"); 694fe6060f1SDimitry Andric assert(OldEVLParam && "no EVL param to fold away"); 695fe6060f1SDimitry Andric 696fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 697fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 698fe6060f1SDimitry Andric 699fe6060f1SDimitry Andric // Convert the %evl predication into vector mask predication. 700fe6060f1SDimitry Andric ElementCount ElemCount = VPI.getStaticVectorLength(); 701fe6060f1SDimitry Andric Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 702fe6060f1SDimitry Andric Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 703fe6060f1SDimitry Andric VPI.setMaskParam(NewMaskParam); 704fe6060f1SDimitry Andric 705fe6060f1SDimitry Andric // Drop the %evl parameter. 706fe6060f1SDimitry Andric discardEVLParameter(VPI); 707fe6060f1SDimitry Andric assert(VPI.canIgnoreVectorLengthParam() && 708fe6060f1SDimitry Andric "transformation did not render the evl param ineffective!"); 709fe6060f1SDimitry Andric 710fe6060f1SDimitry Andric // Reassess the modified instruction. 711fe6060f1SDimitry Andric return &VPI; 712fe6060f1SDimitry Andric } 713fe6060f1SDimitry Andric 714fe6060f1SDimitry Andric Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 715fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 716fe6060f1SDimitry Andric 717fe6060f1SDimitry Andric IRBuilder<> Builder(&VPI); 718fe6060f1SDimitry Andric 719fe6060f1SDimitry Andric // Try lowering to a LLVM instruction first. 720fe6060f1SDimitry Andric auto OC = VPI.getFunctionalOpcode(); 721fe6060f1SDimitry Andric 722fe6060f1SDimitry Andric if (OC && Instruction::isBinaryOp(*OC)) 723fe6060f1SDimitry Andric return expandPredicationInBinaryOperator(Builder, VPI); 724fe6060f1SDimitry Andric 725349cc55cSDimitry Andric if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 726349cc55cSDimitry Andric return expandPredicationInReduction(Builder, *VPRI); 727349cc55cSDimitry Andric 728bdd1243dSDimitry Andric if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI)) 729bdd1243dSDimitry Andric return expandPredicationInComparison(Builder, *VPCmp); 730bdd1243dSDimitry Andric 7315f757f3fSDimitry Andric if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) { 7325f757f3fSDimitry Andric return expandPredicationToCastIntrinsic(Builder, VPI); 7335f757f3fSDimitry Andric } 7345f757f3fSDimitry Andric 735fcaf7f86SDimitry Andric switch (VPI.getIntrinsicID()) { 736fcaf7f86SDimitry Andric default: 737fcaf7f86SDimitry Andric break; 73806c3fb27SDimitry Andric case Intrinsic::vp_fneg: { 73906c3fb27SDimitry Andric Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName()); 74006c3fb27SDimitry Andric replaceOperation(*NewNegOp, VPI); 74106c3fb27SDimitry Andric return NewNegOp; 74206c3fb27SDimitry Andric } 7435f757f3fSDimitry Andric case Intrinsic::vp_abs: 7445f757f3fSDimitry Andric case Intrinsic::vp_smax: 7455f757f3fSDimitry Andric case Intrinsic::vp_smin: 7465f757f3fSDimitry Andric case Intrinsic::vp_umax: 7475f757f3fSDimitry Andric case Intrinsic::vp_umin: 7485f757f3fSDimitry Andric case Intrinsic::vp_bswap: 7495f757f3fSDimitry Andric case Intrinsic::vp_bitreverse: 7505f757f3fSDimitry Andric return expandPredicationToIntCall(Builder, VPI, 7515f757f3fSDimitry Andric VPI.getFunctionalIntrinsicID().value()); 75206c3fb27SDimitry Andric case Intrinsic::vp_fabs: 75306c3fb27SDimitry Andric case Intrinsic::vp_sqrt: 7545f757f3fSDimitry Andric case Intrinsic::vp_maxnum: 7555f757f3fSDimitry Andric case Intrinsic::vp_minnum: 7567a6dacacSDimitry Andric case Intrinsic::vp_maximum: 7577a6dacacSDimitry Andric case Intrinsic::vp_minimum: 758*0fca6ea1SDimitry Andric case Intrinsic::vp_fma: 759*0fca6ea1SDimitry Andric case Intrinsic::vp_fmuladd: 7605f757f3fSDimitry Andric return expandPredicationToFPCall(Builder, VPI, 7615f757f3fSDimitry Andric VPI.getFunctionalIntrinsicID().value()); 762fcaf7f86SDimitry Andric case Intrinsic::vp_load: 763fcaf7f86SDimitry Andric case Intrinsic::vp_store: 764fcaf7f86SDimitry Andric case Intrinsic::vp_gather: 765fcaf7f86SDimitry Andric case Intrinsic::vp_scatter: 766fcaf7f86SDimitry Andric return expandPredicationInMemoryIntrinsic(Builder, VPI); 767fcaf7f86SDimitry Andric } 768fcaf7f86SDimitry Andric 76906c3fb27SDimitry Andric if (auto CID = VPI.getConstrainedIntrinsicID()) 77006c3fb27SDimitry Andric if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID)) 77106c3fb27SDimitry Andric return Call; 77206c3fb27SDimitry Andric 773fe6060f1SDimitry Andric return &VPI; 774fe6060f1SDimitry Andric } 775fe6060f1SDimitry Andric 776fe6060f1SDimitry Andric //// } CachingVPExpander 777fe6060f1SDimitry Andric 778fe6060f1SDimitry Andric struct TransformJob { 779fe6060f1SDimitry Andric VPIntrinsic *PI; 780fe6060f1SDimitry Andric TargetTransformInfo::VPLegalization Strategy; 781fe6060f1SDimitry Andric TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat) 782fe6060f1SDimitry Andric : PI(PI), Strategy(InitStrat) {} 783fe6060f1SDimitry Andric 784fe6060f1SDimitry Andric bool isDone() const { return Strategy.shouldDoNothing(); } 785fe6060f1SDimitry Andric }; 786fe6060f1SDimitry Andric 78781ad6265SDimitry Andric void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { 78881ad6265SDimitry Andric // Operations with speculatable lanes do not strictly need predication. 78981ad6265SDimitry Andric if (maySpeculateLanes(VPI)) { 790fe6060f1SDimitry Andric // Converting a speculatable VP intrinsic means dropping %mask and %evl. 791fe6060f1SDimitry Andric // No need to expand %evl into the %mask only to ignore that code. 792fe6060f1SDimitry Andric if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 793fe6060f1SDimitry Andric LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 794fe6060f1SDimitry Andric return; 795fe6060f1SDimitry Andric } 796fe6060f1SDimitry Andric 797fe6060f1SDimitry Andric // We have to preserve the predicating effect of %evl for this 798fe6060f1SDimitry Andric // non-speculatable VP intrinsic. 799fe6060f1SDimitry Andric // 1) Never discard %evl. 800fe6060f1SDimitry Andric // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 801fe6060f1SDimitry Andric // %evl gets folded into %mask. 802fe6060f1SDimitry Andric if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 803fe6060f1SDimitry Andric (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 804fe6060f1SDimitry Andric LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 805fe6060f1SDimitry Andric } 806fe6060f1SDimitry Andric } 807fe6060f1SDimitry Andric 808fe6060f1SDimitry Andric VPLegalization 809fe6060f1SDimitry Andric CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 810fe6060f1SDimitry Andric auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 811fe6060f1SDimitry Andric if (LLVM_LIKELY(!UsingTTIOverrides)) { 812fe6060f1SDimitry Andric // No overrides - we are in production. 813fe6060f1SDimitry Andric return VPStrat; 814fe6060f1SDimitry Andric } 815fe6060f1SDimitry Andric 816fe6060f1SDimitry Andric // Overrides set - we are in testing, the following does not need to be 817fe6060f1SDimitry Andric // efficient. 818fe6060f1SDimitry Andric VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 819fe6060f1SDimitry Andric VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 820fe6060f1SDimitry Andric return VPStrat; 821fe6060f1SDimitry Andric } 822fe6060f1SDimitry Andric 823bdd1243dSDimitry Andric /// Expand llvm.vp.* intrinsics as requested by \p TTI. 824fe6060f1SDimitry Andric bool CachingVPExpander::expandVectorPredication() { 825fe6060f1SDimitry Andric SmallVector<TransformJob, 16> Worklist; 826fe6060f1SDimitry Andric 827fe6060f1SDimitry Andric // Collect all VPIntrinsics that need expansion and determine their expansion 828fe6060f1SDimitry Andric // strategy. 829fe6060f1SDimitry Andric for (auto &I : instructions(F)) { 830fe6060f1SDimitry Andric auto *VPI = dyn_cast<VPIntrinsic>(&I); 831fe6060f1SDimitry Andric if (!VPI) 832fe6060f1SDimitry Andric continue; 833fe6060f1SDimitry Andric auto VPStrat = getVPLegalizationStrategy(*VPI); 83481ad6265SDimitry Andric sanitizeStrategy(*VPI, VPStrat); 835fe6060f1SDimitry Andric if (!VPStrat.shouldDoNothing()) 836fe6060f1SDimitry Andric Worklist.emplace_back(VPI, VPStrat); 837fe6060f1SDimitry Andric } 838fe6060f1SDimitry Andric if (Worklist.empty()) 839fe6060f1SDimitry Andric return false; 840fe6060f1SDimitry Andric 841fe6060f1SDimitry Andric // Transform all VPIntrinsics on the worklist. 842fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size() 843fe6060f1SDimitry Andric << " instructions ::::\n"); 844fe6060f1SDimitry Andric for (TransformJob Job : Worklist) { 845fe6060f1SDimitry Andric // Transform the EVL parameter. 846fe6060f1SDimitry Andric switch (Job.Strategy.EVLParamStrategy) { 847fe6060f1SDimitry Andric case VPLegalization::Legal: 848fe6060f1SDimitry Andric break; 849fe6060f1SDimitry Andric case VPLegalization::Discard: 850fe6060f1SDimitry Andric discardEVLParameter(*Job.PI); 851fe6060f1SDimitry Andric break; 852fe6060f1SDimitry Andric case VPLegalization::Convert: 853fe6060f1SDimitry Andric if (foldEVLIntoMask(*Job.PI)) 854fe6060f1SDimitry Andric ++NumFoldedVL; 855fe6060f1SDimitry Andric break; 856fe6060f1SDimitry Andric } 857fe6060f1SDimitry Andric Job.Strategy.EVLParamStrategy = VPLegalization::Legal; 858fe6060f1SDimitry Andric 859fe6060f1SDimitry Andric // Replace with a non-predicated operation. 860fe6060f1SDimitry Andric switch (Job.Strategy.OpStrategy) { 861fe6060f1SDimitry Andric case VPLegalization::Legal: 862fe6060f1SDimitry Andric break; 863fe6060f1SDimitry Andric case VPLegalization::Discard: 864fe6060f1SDimitry Andric llvm_unreachable("Invalid strategy for operators."); 865fe6060f1SDimitry Andric case VPLegalization::Convert: 866fe6060f1SDimitry Andric expandPredication(*Job.PI); 867fe6060f1SDimitry Andric ++NumLoweredVPOps; 868fe6060f1SDimitry Andric break; 869fe6060f1SDimitry Andric } 870fe6060f1SDimitry Andric Job.Strategy.OpStrategy = VPLegalization::Legal; 871fe6060f1SDimitry Andric 872fe6060f1SDimitry Andric assert(Job.isDone() && "incomplete transformation"); 873fe6060f1SDimitry Andric } 874fe6060f1SDimitry Andric 875fe6060f1SDimitry Andric return true; 876fe6060f1SDimitry Andric } 877fe6060f1SDimitry Andric class ExpandVectorPredication : public FunctionPass { 878fe6060f1SDimitry Andric public: 879fe6060f1SDimitry Andric static char ID; 880fe6060f1SDimitry Andric ExpandVectorPredication() : FunctionPass(ID) { 881fe6060f1SDimitry Andric initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry()); 882fe6060f1SDimitry Andric } 883fe6060f1SDimitry Andric 884fe6060f1SDimitry Andric bool runOnFunction(Function &F) override { 885fe6060f1SDimitry Andric const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 886fe6060f1SDimitry Andric CachingVPExpander VPExpander(F, *TTI); 887fe6060f1SDimitry Andric return VPExpander.expandVectorPredication(); 888fe6060f1SDimitry Andric } 889fe6060f1SDimitry Andric 890fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 891fe6060f1SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 892fe6060f1SDimitry Andric AU.setPreservesCFG(); 893fe6060f1SDimitry Andric } 894fe6060f1SDimitry Andric }; 895fe6060f1SDimitry Andric } // namespace 896fe6060f1SDimitry Andric 897fe6060f1SDimitry Andric char ExpandVectorPredication::ID; 898fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp", 899fe6060f1SDimitry Andric "Expand vector predication intrinsics", false, false) 900fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 901fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 902fe6060f1SDimitry Andric INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp", 903fe6060f1SDimitry Andric "Expand vector predication intrinsics", false, false) 904fe6060f1SDimitry Andric 905fe6060f1SDimitry Andric FunctionPass *llvm::createExpandVectorPredicationPass() { 906fe6060f1SDimitry Andric return new ExpandVectorPredication(); 907fe6060f1SDimitry Andric } 908fe6060f1SDimitry Andric 909fe6060f1SDimitry Andric PreservedAnalyses 910fe6060f1SDimitry Andric ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) { 911fe6060f1SDimitry Andric const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 912fe6060f1SDimitry Andric CachingVPExpander VPExpander(F, TTI); 913fe6060f1SDimitry Andric if (!VPExpander.expandVectorPredication()) 914fe6060f1SDimitry Andric return PreservedAnalyses::all(); 915fe6060f1SDimitry Andric PreservedAnalyses PA; 916fe6060f1SDimitry Andric PA.preserveSet<CFGAnalyses>(); 917fe6060f1SDimitry Andric return PA; 918fe6060f1SDimitry Andric } 919