1 //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements IR expansion for vector predication intrinsics, allowing 10 // targets to enable vector predication until just before codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/ExpandVectorPredication.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/TargetTransformInfo.h" 17 #include "llvm/Analysis/ValueTracking.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/InstIterator.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/IntrinsicInst.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Compiler.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Transforms/Utils/LoopUtils.h" 30 #include <optional> 31 32 using namespace llvm; 33 34 using VPLegalization = TargetTransformInfo::VPLegalization; 35 using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 36 37 // Keep this in sync with TargetTransformInfo::VPLegalization. 38 #define VPINTERNAL_VPLEGAL_CASES \ 39 VPINTERNAL_CASE(Legal) \ 40 VPINTERNAL_CASE(Discard) \ 41 VPINTERNAL_CASE(Convert) 42 43 #define VPINTERNAL_CASE(X) "|" #X 44 45 // Override options. 46 static cl::opt<std::string> EVLTransformOverride( 47 "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 48 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 49 ". If non-empty, ignore " 50 "TargetTransformInfo and " 51 "always use this transformation for the %evl parameter (Used in " 52 "testing).")); 53 54 static cl::opt<std::string> MaskTransformOverride( 55 "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 56 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 57 ". If non-empty, Ignore " 58 "TargetTransformInfo and " 59 "always use this transformation for the %mask parameter (Used in " 60 "testing).")); 61 62 #undef VPINTERNAL_CASE 63 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 64 65 static VPTransform parseOverrideOption(const std::string &TextOpt) { 66 return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 67 } 68 69 #undef VPINTERNAL_VPLEGAL_CASES 70 71 // Whether any override options are set. 72 static bool anyExpandVPOverridesSet() { 73 return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 74 } 75 76 #define DEBUG_TYPE "expandvp" 77 78 STATISTIC(NumFoldedVL, "Number of folded vector length params"); 79 STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 80 81 ///// Helpers { 82 83 /// \returns Whether the vector mask \p MaskVal has all lane bits set. 84 static bool isAllTrueMask(Value *MaskVal) { 85 if (Value *SplattedVal = getSplatValue(MaskVal)) 86 if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) 87 return ConstValue->isAllOnesValue(); 88 89 return false; 90 } 91 92 /// \returns A non-excepting divisor constant for this type. 93 static Constant *getSafeDivisor(Type *DivTy) { 94 assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 95 return ConstantInt::get(DivTy, 1u, false); 96 } 97 98 /// Transfer operation properties from \p OldVPI to \p NewVal. 99 static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 100 auto *NewInst = dyn_cast<Instruction>(&NewVal); 101 if (!NewInst || !isa<FPMathOperator>(NewVal)) 102 return; 103 104 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 105 if (!OldFMOp) 106 return; 107 108 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 109 } 110 111 /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 112 /// OldVP gets erased. 113 static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 114 transferDecorations(NewOp, OldOp); 115 OldOp.replaceAllUsesWith(&NewOp); 116 OldOp.eraseFromParent(); 117 } 118 119 static bool maySpeculateLanes(VPIntrinsic &VPI) { 120 // The result of VP reductions depends on the mask and evl. 121 if (isa<VPReductionIntrinsic>(VPI)) 122 return false; 123 // Fallback to whether the intrinsic is speculatable. 124 if (auto IntrID = VPI.getFunctionalIntrinsicID()) 125 return Intrinsic::getAttributes(VPI.getContext(), *IntrID) 126 .hasFnAttr(Attribute::AttrKind::Speculatable); 127 if (auto Opc = VPI.getFunctionalOpcode()) 128 return isSafeToSpeculativelyExecuteWithOpcode(*Opc, &VPI); 129 return false; 130 } 131 132 //// } Helpers 133 134 namespace { 135 136 // Expansion pass state at function scope. 137 struct CachingVPExpander { 138 const TargetTransformInfo &TTI; 139 140 /// \returns A (fixed length) vector with ascending integer indices 141 /// (<0, 1, ..., NumElems-1>). 142 /// \p Builder 143 /// Used for instruction creation. 144 /// \p LaneTy 145 /// Integer element type of the result vector. 146 /// \p NumElems 147 /// Number of vector elements. 148 Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 149 unsigned NumElems); 150 151 /// \returns A bitmask that is true where the lane position is less-than \p 152 /// EVLParam 153 /// 154 /// \p Builder 155 /// Used for instruction creation. 156 /// \p VLParam 157 /// The explicit vector length parameter to test against the lane 158 /// positions. 159 /// \p ElemCount 160 /// Static (potentially scalable) number of vector elements. 161 Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 162 ElementCount ElemCount); 163 164 /// If needed, folds the EVL in the mask operand and discards the EVL 165 /// parameter. Returns a pair of the value of the intrinsic after the change 166 /// (if any) and whether the mask was actually folded. 167 std::pair<Value *, bool> foldEVLIntoMask(VPIntrinsic &VPI); 168 169 /// "Remove" the %evl parameter of \p PI by setting it to the static vector 170 /// length of the operation. Returns true if the %evl (if any) was effectively 171 /// changed. 172 bool discardEVLParameter(VPIntrinsic &PI); 173 174 /// Lower this VP binary operator to a unpredicated binary operator. 175 Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 176 VPIntrinsic &PI); 177 178 /// Lower this VP int call to a unpredicated int call. 179 Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI, 180 unsigned UnpredicatedIntrinsicID); 181 182 /// Lower this VP fp call to a unpredicated fp call. 183 Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, 184 unsigned UnpredicatedIntrinsicID); 185 186 /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. 187 Value *expandPredicationInReduction(IRBuilder<> &Builder, 188 VPReductionIntrinsic &PI); 189 190 /// Lower this VP cast operation to a non-VP intrinsic. 191 Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 192 VPIntrinsic &VPI); 193 194 /// Lower this VP memory operation to a non-VP intrinsic. 195 Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 196 VPIntrinsic &VPI); 197 198 /// Lower this VP comparison to a call to an unpredicated comparison. 199 Value *expandPredicationInComparison(IRBuilder<> &Builder, 200 VPCmpIntrinsic &PI); 201 202 /// Query TTI and expand the vector predication in \p P accordingly. 203 Value *expandPredication(VPIntrinsic &PI); 204 205 /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This 206 /// overrides TTI with the cl::opts listed at the top of this file. 207 VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 208 bool UsingTTIOverrides; 209 210 public: 211 CachingVPExpander(const TargetTransformInfo &TTI) 212 : TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 213 214 /// Expand llvm.vp.* intrinsics as requested by \p TTI. 215 /// Returns the details of the expansion. 216 VPExpansionDetails expandVectorPredication(VPIntrinsic &VPI); 217 }; 218 219 //// CachingVPExpander { 220 221 Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 222 unsigned NumElems) { 223 // TODO add caching 224 SmallVector<Constant *, 16> ConstElems; 225 226 for (unsigned Idx = 0; Idx < NumElems; ++Idx) 227 ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 228 229 return ConstantVector::get(ConstElems); 230 } 231 232 Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 233 Value *EVLParam, 234 ElementCount ElemCount) { 235 // TODO add caching 236 // Scalable vector %evl conversion. 237 if (ElemCount.isScalable()) { 238 auto *M = Builder.GetInsertBlock()->getModule(); 239 Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 240 Function *ActiveMaskFunc = Intrinsic::getDeclaration( 241 M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); 242 // `get_active_lane_mask` performs an implicit less-than comparison. 243 Value *ConstZero = Builder.getInt32(0); 244 return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); 245 } 246 247 // Fixed vector %evl conversion. 248 Type *LaneTy = EVLParam->getType(); 249 unsigned NumElems = ElemCount.getFixedValue(); 250 Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 251 Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 252 return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 253 } 254 255 Value * 256 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 257 VPIntrinsic &VPI) { 258 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 259 "Implicitly dropping %evl in non-speculatable operator!"); 260 261 auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 262 assert(Instruction::isBinaryOp(OC)); 263 264 Value *Op0 = VPI.getOperand(0); 265 Value *Op1 = VPI.getOperand(1); 266 Value *Mask = VPI.getMaskParam(); 267 268 // Blend in safe operands. 269 if (Mask && !isAllTrueMask(Mask)) { 270 switch (OC) { 271 default: 272 // Can safely ignore the predicate. 273 break; 274 275 // Division operators need a safe divisor on masked-off lanes (1). 276 case Instruction::UDiv: 277 case Instruction::SDiv: 278 case Instruction::URem: 279 case Instruction::SRem: 280 // 2nd operand must not be zero. 281 Value *SafeDivisor = getSafeDivisor(VPI.getType()); 282 Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 283 } 284 } 285 286 Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 287 288 replaceOperation(*NewBinOp, VPI); 289 return NewBinOp; 290 } 291 292 Value *CachingVPExpander::expandPredicationToIntCall( 293 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 294 switch (UnpredicatedIntrinsicID) { 295 case Intrinsic::abs: 296 case Intrinsic::smax: 297 case Intrinsic::smin: 298 case Intrinsic::umax: 299 case Intrinsic::umin: { 300 Value *Op0 = VPI.getOperand(0); 301 Value *Op1 = VPI.getOperand(1); 302 Function *Fn = Intrinsic::getDeclaration( 303 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 304 Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 305 replaceOperation(*NewOp, VPI); 306 return NewOp; 307 } 308 case Intrinsic::bswap: 309 case Intrinsic::bitreverse: { 310 Value *Op = VPI.getOperand(0); 311 Function *Fn = Intrinsic::getDeclaration( 312 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 313 Value *NewOp = Builder.CreateCall(Fn, {Op}, VPI.getName()); 314 replaceOperation(*NewOp, VPI); 315 return NewOp; 316 } 317 } 318 return nullptr; 319 } 320 321 Value *CachingVPExpander::expandPredicationToFPCall( 322 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 323 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 324 "Implicitly dropping %evl in non-speculatable operator!"); 325 326 switch (UnpredicatedIntrinsicID) { 327 case Intrinsic::fabs: 328 case Intrinsic::sqrt: { 329 Value *Op0 = VPI.getOperand(0); 330 Function *Fn = Intrinsic::getDeclaration( 331 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 332 Value *NewOp = Builder.CreateCall(Fn, {Op0}, VPI.getName()); 333 replaceOperation(*NewOp, VPI); 334 return NewOp; 335 } 336 case Intrinsic::maxnum: 337 case Intrinsic::minnum: { 338 Value *Op0 = VPI.getOperand(0); 339 Value *Op1 = VPI.getOperand(1); 340 Function *Fn = Intrinsic::getDeclaration( 341 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 342 Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 343 replaceOperation(*NewOp, VPI); 344 return NewOp; 345 } 346 case Intrinsic::fma: 347 case Intrinsic::fmuladd: 348 case Intrinsic::experimental_constrained_fma: 349 case Intrinsic::experimental_constrained_fmuladd: { 350 Value *Op0 = VPI.getOperand(0); 351 Value *Op1 = VPI.getOperand(1); 352 Value *Op2 = VPI.getOperand(2); 353 Function *Fn = Intrinsic::getDeclaration( 354 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 355 Value *NewOp; 356 if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID)) 357 NewOp = 358 Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 359 else 360 NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 361 replaceOperation(*NewOp, VPI); 362 return NewOp; 363 } 364 } 365 366 return nullptr; 367 } 368 369 static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 370 Type *EltTy) { 371 bool Negative = false; 372 unsigned EltBits = EltTy->getScalarSizeInBits(); 373 Intrinsic::ID VID = VPI.getIntrinsicID(); 374 switch (VID) { 375 default: 376 llvm_unreachable("Expecting a VP reduction intrinsic"); 377 case Intrinsic::vp_reduce_add: 378 case Intrinsic::vp_reduce_or: 379 case Intrinsic::vp_reduce_xor: 380 case Intrinsic::vp_reduce_umax: 381 return Constant::getNullValue(EltTy); 382 case Intrinsic::vp_reduce_mul: 383 return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); 384 case Intrinsic::vp_reduce_and: 385 case Intrinsic::vp_reduce_umin: 386 return ConstantInt::getAllOnesValue(EltTy); 387 case Intrinsic::vp_reduce_smin: 388 return ConstantInt::get(EltTy->getContext(), 389 APInt::getSignedMaxValue(EltBits)); 390 case Intrinsic::vp_reduce_smax: 391 return ConstantInt::get(EltTy->getContext(), 392 APInt::getSignedMinValue(EltBits)); 393 case Intrinsic::vp_reduce_fmax: 394 case Intrinsic::vp_reduce_fmaximum: 395 Negative = true; 396 [[fallthrough]]; 397 case Intrinsic::vp_reduce_fmin: 398 case Intrinsic::vp_reduce_fminimum: { 399 bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum || 400 VID == Intrinsic::vp_reduce_fmaximum; 401 FastMathFlags Flags = VPI.getFastMathFlags(); 402 const fltSemantics &Semantics = EltTy->getFltSemantics(); 403 return (!Flags.noNaNs() && !PropagatesNaN) 404 ? ConstantFP::getQNaN(EltTy, Negative) 405 : !Flags.noInfs() 406 ? ConstantFP::getInfinity(EltTy, Negative) 407 : ConstantFP::get(EltTy, 408 APFloat::getLargest(Semantics, Negative)); 409 } 410 case Intrinsic::vp_reduce_fadd: 411 return ConstantExpr::getBinOpIdentity( 412 Instruction::FAdd, EltTy, false, 413 VPI.getFastMathFlags().noSignedZeros()); 414 case Intrinsic::vp_reduce_fmul: 415 return ConstantFP::get(EltTy, 1.0); 416 } 417 } 418 419 Value * 420 CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 421 VPReductionIntrinsic &VPI) { 422 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 423 "Implicitly dropping %evl in non-speculatable operator!"); 424 425 Value *Mask = VPI.getMaskParam(); 426 Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 427 428 // Insert neutral element in masked-out positions 429 if (Mask && !isAllTrueMask(Mask)) { 430 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 431 auto *NeutralVector = Builder.CreateVectorSplat( 432 cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 433 RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 434 } 435 436 Value *Reduction; 437 Value *Start = VPI.getOperand(VPI.getStartParamPos()); 438 439 switch (VPI.getIntrinsicID()) { 440 default: 441 llvm_unreachable("Impossible reduction kind"); 442 case Intrinsic::vp_reduce_add: 443 case Intrinsic::vp_reduce_mul: 444 case Intrinsic::vp_reduce_and: 445 case Intrinsic::vp_reduce_or: 446 case Intrinsic::vp_reduce_xor: { 447 Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); 448 unsigned Opc = getArithmeticReductionInstruction(RedID); 449 assert(Instruction::isBinaryOp(Opc)); 450 Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); 451 Reduction = 452 Builder.CreateBinOp((Instruction::BinaryOps)Opc, Reduction, Start); 453 break; 454 } 455 case Intrinsic::vp_reduce_smax: 456 case Intrinsic::vp_reduce_smin: 457 case Intrinsic::vp_reduce_umax: 458 case Intrinsic::vp_reduce_umin: 459 case Intrinsic::vp_reduce_fmax: 460 case Intrinsic::vp_reduce_fmin: 461 case Intrinsic::vp_reduce_fmaximum: 462 case Intrinsic::vp_reduce_fminimum: { 463 Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); 464 Intrinsic::ID ScalarID = getMinMaxReductionIntrinsicOp(RedID); 465 Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); 466 transferDecorations(*Reduction, VPI); 467 Reduction = Builder.CreateBinaryIntrinsic(ScalarID, Reduction, Start); 468 break; 469 } 470 case Intrinsic::vp_reduce_fadd: 471 Reduction = Builder.CreateFAddReduce(Start, RedOp); 472 break; 473 case Intrinsic::vp_reduce_fmul: 474 Reduction = Builder.CreateFMulReduce(Start, RedOp); 475 break; 476 } 477 478 replaceOperation(*Reduction, VPI); 479 return Reduction; 480 } 481 482 Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 483 VPIntrinsic &VPI) { 484 Value *CastOp = nullptr; 485 switch (VPI.getIntrinsicID()) { 486 default: 487 llvm_unreachable("Not a VP cast intrinsic"); 488 case Intrinsic::vp_sext: 489 CastOp = 490 Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 491 break; 492 case Intrinsic::vp_zext: 493 CastOp = 494 Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 495 break; 496 case Intrinsic::vp_trunc: 497 CastOp = 498 Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 499 break; 500 case Intrinsic::vp_inttoptr: 501 CastOp = 502 Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); 503 break; 504 case Intrinsic::vp_ptrtoint: 505 CastOp = 506 Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 507 break; 508 case Intrinsic::vp_fptosi: 509 CastOp = 510 Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 511 break; 512 513 case Intrinsic::vp_fptoui: 514 CastOp = 515 Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 516 break; 517 case Intrinsic::vp_sitofp: 518 CastOp = 519 Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 520 break; 521 case Intrinsic::vp_uitofp: 522 CastOp = 523 Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 524 break; 525 case Intrinsic::vp_fptrunc: 526 CastOp = 527 Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 528 break; 529 case Intrinsic::vp_fpext: 530 CastOp = 531 Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 532 break; 533 } 534 replaceOperation(*CastOp, VPI); 535 return CastOp; 536 } 537 538 Value * 539 CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 540 VPIntrinsic &VPI) { 541 assert(VPI.canIgnoreVectorLengthParam()); 542 543 const auto &DL = VPI.getDataLayout(); 544 545 Value *MaskParam = VPI.getMaskParam(); 546 Value *PtrParam = VPI.getMemoryPointerParam(); 547 Value *DataParam = VPI.getMemoryDataParam(); 548 bool IsUnmasked = isAllTrueMask(MaskParam); 549 550 MaybeAlign AlignOpt = VPI.getPointerAlignment(); 551 552 Value *NewMemoryInst = nullptr; 553 switch (VPI.getIntrinsicID()) { 554 default: 555 llvm_unreachable("Not a VP memory intrinsic"); 556 case Intrinsic::vp_store: 557 if (IsUnmasked) { 558 StoreInst *NewStore = 559 Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); 560 if (AlignOpt.has_value()) 561 NewStore->setAlignment(*AlignOpt); 562 NewMemoryInst = NewStore; 563 } else 564 NewMemoryInst = Builder.CreateMaskedStore( 565 DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); 566 567 break; 568 case Intrinsic::vp_load: 569 if (IsUnmasked) { 570 LoadInst *NewLoad = 571 Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); 572 if (AlignOpt.has_value()) 573 NewLoad->setAlignment(*AlignOpt); 574 NewMemoryInst = NewLoad; 575 } else 576 NewMemoryInst = Builder.CreateMaskedLoad( 577 VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); 578 579 break; 580 case Intrinsic::vp_scatter: { 581 auto *ElementType = 582 cast<VectorType>(DataParam->getType())->getElementType(); 583 NewMemoryInst = Builder.CreateMaskedScatter( 584 DataParam, PtrParam, 585 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam); 586 break; 587 } 588 case Intrinsic::vp_gather: { 589 auto *ElementType = cast<VectorType>(VPI.getType())->getElementType(); 590 NewMemoryInst = Builder.CreateMaskedGather( 591 VPI.getType(), PtrParam, 592 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr, 593 VPI.getName()); 594 break; 595 } 596 } 597 598 assert(NewMemoryInst); 599 replaceOperation(*NewMemoryInst, VPI); 600 return NewMemoryInst; 601 } 602 603 Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, 604 VPCmpIntrinsic &VPI) { 605 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 606 "Implicitly dropping %evl in non-speculatable operator!"); 607 608 assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || 609 *VPI.getFunctionalOpcode() == Instruction::FCmp); 610 611 Value *Op0 = VPI.getOperand(0); 612 Value *Op1 = VPI.getOperand(1); 613 auto Pred = VPI.getPredicate(); 614 615 auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1); 616 617 replaceOperation(*NewCmp, VPI); 618 return NewCmp; 619 } 620 621 bool CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 622 LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 623 624 if (VPI.canIgnoreVectorLengthParam()) 625 return false; 626 627 Value *EVLParam = VPI.getVectorLengthParam(); 628 if (!EVLParam) 629 return false; 630 631 ElementCount StaticElemCount = VPI.getStaticVectorLength(); 632 Value *MaxEVL = nullptr; 633 Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 634 if (StaticElemCount.isScalable()) { 635 // TODO add caching 636 auto *M = VPI.getModule(); 637 Function *VScaleFunc = 638 Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); 639 IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 640 Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 641 Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); 642 MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 643 /*NUW*/ true, /*NSW*/ false); 644 } else { 645 MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 646 } 647 VPI.setVectorLengthParam(MaxEVL); 648 return true; 649 } 650 651 std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 652 LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 653 654 IRBuilder<> Builder(&VPI); 655 656 // Ineffective %evl parameter and so nothing to do here. 657 if (VPI.canIgnoreVectorLengthParam()) 658 return {&VPI, false}; 659 660 // Only VP intrinsics can have an %evl parameter. 661 Value *OldMaskParam = VPI.getMaskParam(); 662 Value *OldEVLParam = VPI.getVectorLengthParam(); 663 assert(OldMaskParam && "no mask param to fold the vl param into"); 664 assert(OldEVLParam && "no EVL param to fold away"); 665 666 LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 667 LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 668 669 // Convert the %evl predication into vector mask predication. 670 ElementCount ElemCount = VPI.getStaticVectorLength(); 671 Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 672 Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 673 VPI.setMaskParam(NewMaskParam); 674 675 // Drop the %evl parameter. 676 discardEVLParameter(VPI); 677 assert(VPI.canIgnoreVectorLengthParam() && 678 "transformation did not render the evl param ineffective!"); 679 680 // Reassess the modified instruction. 681 return {&VPI, true}; 682 } 683 684 Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 685 LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 686 687 IRBuilder<> Builder(&VPI); 688 689 // Try lowering to a LLVM instruction first. 690 auto OC = VPI.getFunctionalOpcode(); 691 692 if (OC && Instruction::isBinaryOp(*OC)) 693 return expandPredicationInBinaryOperator(Builder, VPI); 694 695 if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 696 return expandPredicationInReduction(Builder, *VPRI); 697 698 if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI)) 699 return expandPredicationInComparison(Builder, *VPCmp); 700 701 if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) { 702 return expandPredicationToCastIntrinsic(Builder, VPI); 703 } 704 705 switch (VPI.getIntrinsicID()) { 706 default: 707 break; 708 case Intrinsic::vp_fneg: { 709 Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName()); 710 replaceOperation(*NewNegOp, VPI); 711 return NewNegOp; 712 } 713 case Intrinsic::vp_abs: 714 case Intrinsic::vp_smax: 715 case Intrinsic::vp_smin: 716 case Intrinsic::vp_umax: 717 case Intrinsic::vp_umin: 718 case Intrinsic::vp_bswap: 719 case Intrinsic::vp_bitreverse: 720 return expandPredicationToIntCall(Builder, VPI, 721 VPI.getFunctionalIntrinsicID().value()); 722 case Intrinsic::vp_fabs: 723 case Intrinsic::vp_sqrt: 724 case Intrinsic::vp_maxnum: 725 case Intrinsic::vp_minnum: 726 case Intrinsic::vp_maximum: 727 case Intrinsic::vp_minimum: 728 case Intrinsic::vp_fma: 729 case Intrinsic::vp_fmuladd: 730 return expandPredicationToFPCall(Builder, VPI, 731 VPI.getFunctionalIntrinsicID().value()); 732 case Intrinsic::vp_load: 733 case Intrinsic::vp_store: 734 case Intrinsic::vp_gather: 735 case Intrinsic::vp_scatter: 736 return expandPredicationInMemoryIntrinsic(Builder, VPI); 737 } 738 739 if (auto CID = VPI.getConstrainedIntrinsicID()) 740 if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID)) 741 return Call; 742 743 return &VPI; 744 } 745 746 //// } CachingVPExpander 747 748 void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { 749 // Operations with speculatable lanes do not strictly need predication. 750 if (maySpeculateLanes(VPI)) { 751 // Converting a speculatable VP intrinsic means dropping %mask and %evl. 752 // No need to expand %evl into the %mask only to ignore that code. 753 if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 754 LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 755 return; 756 } 757 758 // We have to preserve the predicating effect of %evl for this 759 // non-speculatable VP intrinsic. 760 // 1) Never discard %evl. 761 // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 762 // %evl gets folded into %mask. 763 if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 764 (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 765 LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 766 } 767 } 768 769 VPLegalization 770 CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 771 auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 772 if (LLVM_LIKELY(!UsingTTIOverrides)) { 773 // No overrides - we are in production. 774 return VPStrat; 775 } 776 777 // Overrides set - we are in testing, the following does not need to be 778 // efficient. 779 VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 780 VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 781 return VPStrat; 782 } 783 784 VPExpansionDetails 785 CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) { 786 auto Strategy = getVPLegalizationStrategy(VPI); 787 sanitizeStrategy(VPI, Strategy); 788 789 VPExpansionDetails Changed = VPExpansionDetails::IntrinsicUnchanged; 790 791 // Transform the EVL parameter. 792 switch (Strategy.EVLParamStrategy) { 793 case VPLegalization::Legal: 794 break; 795 case VPLegalization::Discard: 796 if (discardEVLParameter(VPI)) 797 Changed = VPExpansionDetails::IntrinsicUpdated; 798 break; 799 case VPLegalization::Convert: 800 if (auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) { 801 (void)NewVPI; 802 Changed = VPExpansionDetails::IntrinsicUpdated; 803 ++NumFoldedVL; 804 } 805 break; 806 } 807 808 // Replace with a non-predicated operation. 809 switch (Strategy.OpStrategy) { 810 case VPLegalization::Legal: 811 break; 812 case VPLegalization::Discard: 813 llvm_unreachable("Invalid strategy for operators."); 814 case VPLegalization::Convert: 815 if (Value *V = expandPredication(VPI); V != &VPI) { 816 ++NumLoweredVPOps; 817 Changed = VPExpansionDetails::IntrinsicReplaced; 818 } 819 break; 820 } 821 822 return Changed; 823 } 824 } // namespace 825 826 VPExpansionDetails 827 llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI, 828 const TargetTransformInfo &TTI) { 829 return CachingVPExpander(TTI).expandVectorPredication(VPI); 830 } 831