1 //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements IR expansion for vector predication intrinsics, allowing 10 // targets to enable vector predication until just before codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/ExpandVectorPredication.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/TargetTransformInfo.h" 17 #include "llvm/Analysis/ValueTracking.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/InstIterator.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/IntrinsicInst.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Compiler.h" 28 #include "llvm/Support/Debug.h" 29 #include <optional> 30 31 using namespace llvm; 32 33 using VPLegalization = TargetTransformInfo::VPLegalization; 34 using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 35 36 // Keep this in sync with TargetTransformInfo::VPLegalization. 37 #define VPINTERNAL_VPLEGAL_CASES \ 38 VPINTERNAL_CASE(Legal) \ 39 VPINTERNAL_CASE(Discard) \ 40 VPINTERNAL_CASE(Convert) 41 42 #define VPINTERNAL_CASE(X) "|" #X 43 44 // Override options. 45 static cl::opt<std::string> EVLTransformOverride( 46 "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 47 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 48 ". If non-empty, ignore " 49 "TargetTransformInfo and " 50 "always use this transformation for the %evl parameter (Used in " 51 "testing).")); 52 53 static cl::opt<std::string> MaskTransformOverride( 54 "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 55 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 56 ". If non-empty, Ignore " 57 "TargetTransformInfo and " 58 "always use this transformation for the %mask parameter (Used in " 59 "testing).")); 60 61 #undef VPINTERNAL_CASE 62 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 63 64 static VPTransform parseOverrideOption(const std::string &TextOpt) { 65 return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 66 } 67 68 #undef VPINTERNAL_VPLEGAL_CASES 69 70 // Whether any override options are set. 71 static bool anyExpandVPOverridesSet() { 72 return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 73 } 74 75 #define DEBUG_TYPE "expandvp" 76 77 STATISTIC(NumFoldedVL, "Number of folded vector length params"); 78 STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 79 80 ///// Helpers { 81 82 /// \returns Whether the vector mask \p MaskVal has all lane bits set. 83 static bool isAllTrueMask(Value *MaskVal) { 84 if (Value *SplattedVal = getSplatValue(MaskVal)) 85 if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) 86 return ConstValue->isAllOnesValue(); 87 88 return false; 89 } 90 91 /// \returns A non-excepting divisor constant for this type. 92 static Constant *getSafeDivisor(Type *DivTy) { 93 assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 94 return ConstantInt::get(DivTy, 1u, false); 95 } 96 97 /// Transfer operation properties from \p OldVPI to \p NewVal. 98 static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 99 auto *NewInst = dyn_cast<Instruction>(&NewVal); 100 if (!NewInst || !isa<FPMathOperator>(NewVal)) 101 return; 102 103 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 104 if (!OldFMOp) 105 return; 106 107 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 108 } 109 110 /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 111 /// OldVP gets erased. 112 static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 113 transferDecorations(NewOp, OldOp); 114 OldOp.replaceAllUsesWith(&NewOp); 115 OldOp.eraseFromParent(); 116 } 117 118 static bool maySpeculateLanes(VPIntrinsic &VPI) { 119 // The result of VP reductions depends on the mask and evl. 120 if (isa<VPReductionIntrinsic>(VPI)) 121 return false; 122 // Fallback to whether the intrinsic is speculatable. 123 if (auto IntrID = VPI.getFunctionalIntrinsicID()) 124 return Intrinsic::getAttributes(VPI.getContext(), *IntrID) 125 .hasFnAttr(Attribute::AttrKind::Speculatable); 126 if (auto Opc = VPI.getFunctionalOpcode()) 127 return isSafeToSpeculativelyExecuteWithOpcode(*Opc, &VPI); 128 return false; 129 } 130 131 //// } Helpers 132 133 namespace { 134 135 // Expansion pass state at function scope. 136 struct CachingVPExpander { 137 const TargetTransformInfo &TTI; 138 139 /// \returns A (fixed length) vector with ascending integer indices 140 /// (<0, 1, ..., NumElems-1>). 141 /// \p Builder 142 /// Used for instruction creation. 143 /// \p LaneTy 144 /// Integer element type of the result vector. 145 /// \p NumElems 146 /// Number of vector elements. 147 Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 148 unsigned NumElems); 149 150 /// \returns A bitmask that is true where the lane position is less-than \p 151 /// EVLParam 152 /// 153 /// \p Builder 154 /// Used for instruction creation. 155 /// \p VLParam 156 /// The explicit vector length parameter to test against the lane 157 /// positions. 158 /// \p ElemCount 159 /// Static (potentially scalable) number of vector elements. 160 Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 161 ElementCount ElemCount); 162 163 /// If needed, folds the EVL in the mask operand and discards the EVL 164 /// parameter. Returns a pair of the value of the intrinsic after the change 165 /// (if any) and whether the mask was actually folded. 166 std::pair<Value *, bool> foldEVLIntoMask(VPIntrinsic &VPI); 167 168 /// "Remove" the %evl parameter of \p PI by setting it to the static vector 169 /// length of the operation. Returns true if the %evl (if any) was effectively 170 /// changed. 171 bool discardEVLParameter(VPIntrinsic &PI); 172 173 /// Lower this VP binary operator to a unpredicated binary operator. 174 Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 175 VPIntrinsic &PI); 176 177 /// Lower this VP int call to a unpredicated int call. 178 Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI, 179 unsigned UnpredicatedIntrinsicID); 180 181 /// Lower this VP fp call to a unpredicated fp call. 182 Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, 183 unsigned UnpredicatedIntrinsicID); 184 185 /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. 186 Value *expandPredicationInReduction(IRBuilder<> &Builder, 187 VPReductionIntrinsic &PI); 188 189 /// Lower this VP cast operation to a non-VP intrinsic. 190 Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 191 VPIntrinsic &VPI); 192 193 /// Lower this VP memory operation to a non-VP intrinsic. 194 Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 195 VPIntrinsic &VPI); 196 197 /// Lower this VP comparison to a call to an unpredicated comparison. 198 Value *expandPredicationInComparison(IRBuilder<> &Builder, 199 VPCmpIntrinsic &PI); 200 201 /// Query TTI and expand the vector predication in \p P accordingly. 202 Value *expandPredication(VPIntrinsic &PI); 203 204 /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This 205 /// overrides TTI with the cl::opts listed at the top of this file. 206 VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 207 bool UsingTTIOverrides; 208 209 public: 210 CachingVPExpander(const TargetTransformInfo &TTI) 211 : TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 212 213 /// Expand llvm.vp.* intrinsics as requested by \p TTI. 214 /// Returns the details of the expansion. 215 VPExpansionDetails expandVectorPredication(VPIntrinsic &VPI); 216 }; 217 218 //// CachingVPExpander { 219 220 Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 221 unsigned NumElems) { 222 // TODO add caching 223 SmallVector<Constant *, 16> ConstElems; 224 225 for (unsigned Idx = 0; Idx < NumElems; ++Idx) 226 ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 227 228 return ConstantVector::get(ConstElems); 229 } 230 231 Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 232 Value *EVLParam, 233 ElementCount ElemCount) { 234 // TODO add caching 235 // Scalable vector %evl conversion. 236 if (ElemCount.isScalable()) { 237 auto *M = Builder.GetInsertBlock()->getModule(); 238 Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 239 Function *ActiveMaskFunc = Intrinsic::getDeclaration( 240 M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); 241 // `get_active_lane_mask` performs an implicit less-than comparison. 242 Value *ConstZero = Builder.getInt32(0); 243 return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); 244 } 245 246 // Fixed vector %evl conversion. 247 Type *LaneTy = EVLParam->getType(); 248 unsigned NumElems = ElemCount.getFixedValue(); 249 Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 250 Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 251 return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 252 } 253 254 Value * 255 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 256 VPIntrinsic &VPI) { 257 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 258 "Implicitly dropping %evl in non-speculatable operator!"); 259 260 auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 261 assert(Instruction::isBinaryOp(OC)); 262 263 Value *Op0 = VPI.getOperand(0); 264 Value *Op1 = VPI.getOperand(1); 265 Value *Mask = VPI.getMaskParam(); 266 267 // Blend in safe operands. 268 if (Mask && !isAllTrueMask(Mask)) { 269 switch (OC) { 270 default: 271 // Can safely ignore the predicate. 272 break; 273 274 // Division operators need a safe divisor on masked-off lanes (1). 275 case Instruction::UDiv: 276 case Instruction::SDiv: 277 case Instruction::URem: 278 case Instruction::SRem: 279 // 2nd operand must not be zero. 280 Value *SafeDivisor = getSafeDivisor(VPI.getType()); 281 Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 282 } 283 } 284 285 Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 286 287 replaceOperation(*NewBinOp, VPI); 288 return NewBinOp; 289 } 290 291 Value *CachingVPExpander::expandPredicationToIntCall( 292 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 293 switch (UnpredicatedIntrinsicID) { 294 case Intrinsic::abs: 295 case Intrinsic::smax: 296 case Intrinsic::smin: 297 case Intrinsic::umax: 298 case Intrinsic::umin: { 299 Value *Op0 = VPI.getOperand(0); 300 Value *Op1 = VPI.getOperand(1); 301 Function *Fn = Intrinsic::getDeclaration( 302 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 303 Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 304 replaceOperation(*NewOp, VPI); 305 return NewOp; 306 } 307 case Intrinsic::bswap: 308 case Intrinsic::bitreverse: { 309 Value *Op = VPI.getOperand(0); 310 Function *Fn = Intrinsic::getDeclaration( 311 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 312 Value *NewOp = Builder.CreateCall(Fn, {Op}, VPI.getName()); 313 replaceOperation(*NewOp, VPI); 314 return NewOp; 315 } 316 } 317 return nullptr; 318 } 319 320 Value *CachingVPExpander::expandPredicationToFPCall( 321 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 322 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 323 "Implicitly dropping %evl in non-speculatable operator!"); 324 325 switch (UnpredicatedIntrinsicID) { 326 case Intrinsic::fabs: 327 case Intrinsic::sqrt: { 328 Value *Op0 = VPI.getOperand(0); 329 Function *Fn = Intrinsic::getDeclaration( 330 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 331 Value *NewOp = Builder.CreateCall(Fn, {Op0}, VPI.getName()); 332 replaceOperation(*NewOp, VPI); 333 return NewOp; 334 } 335 case Intrinsic::maxnum: 336 case Intrinsic::minnum: { 337 Value *Op0 = VPI.getOperand(0); 338 Value *Op1 = VPI.getOperand(1); 339 Function *Fn = Intrinsic::getDeclaration( 340 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 341 Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 342 replaceOperation(*NewOp, VPI); 343 return NewOp; 344 } 345 case Intrinsic::fma: 346 case Intrinsic::fmuladd: 347 case Intrinsic::experimental_constrained_fma: 348 case Intrinsic::experimental_constrained_fmuladd: { 349 Value *Op0 = VPI.getOperand(0); 350 Value *Op1 = VPI.getOperand(1); 351 Value *Op2 = VPI.getOperand(2); 352 Function *Fn = Intrinsic::getDeclaration( 353 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 354 Value *NewOp; 355 if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID)) 356 NewOp = 357 Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 358 else 359 NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 360 replaceOperation(*NewOp, VPI); 361 return NewOp; 362 } 363 } 364 365 return nullptr; 366 } 367 368 static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 369 Type *EltTy) { 370 bool Negative = false; 371 unsigned EltBits = EltTy->getScalarSizeInBits(); 372 Intrinsic::ID VID = VPI.getIntrinsicID(); 373 switch (VID) { 374 default: 375 llvm_unreachable("Expecting a VP reduction intrinsic"); 376 case Intrinsic::vp_reduce_add: 377 case Intrinsic::vp_reduce_or: 378 case Intrinsic::vp_reduce_xor: 379 case Intrinsic::vp_reduce_umax: 380 return Constant::getNullValue(EltTy); 381 case Intrinsic::vp_reduce_mul: 382 return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); 383 case Intrinsic::vp_reduce_and: 384 case Intrinsic::vp_reduce_umin: 385 return ConstantInt::getAllOnesValue(EltTy); 386 case Intrinsic::vp_reduce_smin: 387 return ConstantInt::get(EltTy->getContext(), 388 APInt::getSignedMaxValue(EltBits)); 389 case Intrinsic::vp_reduce_smax: 390 return ConstantInt::get(EltTy->getContext(), 391 APInt::getSignedMinValue(EltBits)); 392 case Intrinsic::vp_reduce_fmax: 393 case Intrinsic::vp_reduce_fmaximum: 394 Negative = true; 395 [[fallthrough]]; 396 case Intrinsic::vp_reduce_fmin: 397 case Intrinsic::vp_reduce_fminimum: { 398 bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum || 399 VID == Intrinsic::vp_reduce_fmaximum; 400 FastMathFlags Flags = VPI.getFastMathFlags(); 401 const fltSemantics &Semantics = EltTy->getFltSemantics(); 402 return (!Flags.noNaNs() && !PropagatesNaN) 403 ? ConstantFP::getQNaN(EltTy, Negative) 404 : !Flags.noInfs() 405 ? ConstantFP::getInfinity(EltTy, Negative) 406 : ConstantFP::get(EltTy, 407 APFloat::getLargest(Semantics, Negative)); 408 } 409 case Intrinsic::vp_reduce_fadd: 410 return ConstantFP::getNegativeZero(EltTy); 411 case Intrinsic::vp_reduce_fmul: 412 return ConstantFP::get(EltTy, 1.0); 413 } 414 } 415 416 Value * 417 CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 418 VPReductionIntrinsic &VPI) { 419 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 420 "Implicitly dropping %evl in non-speculatable operator!"); 421 422 Value *Mask = VPI.getMaskParam(); 423 Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 424 425 // Insert neutral element in masked-out positions 426 if (Mask && !isAllTrueMask(Mask)) { 427 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 428 auto *NeutralVector = Builder.CreateVectorSplat( 429 cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 430 RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 431 } 432 433 Value *Reduction; 434 Value *Start = VPI.getOperand(VPI.getStartParamPos()); 435 436 switch (VPI.getIntrinsicID()) { 437 default: 438 llvm_unreachable("Impossible reduction kind"); 439 case Intrinsic::vp_reduce_add: 440 Reduction = Builder.CreateAddReduce(RedOp); 441 Reduction = Builder.CreateAdd(Reduction, Start); 442 break; 443 case Intrinsic::vp_reduce_mul: 444 Reduction = Builder.CreateMulReduce(RedOp); 445 Reduction = Builder.CreateMul(Reduction, Start); 446 break; 447 case Intrinsic::vp_reduce_and: 448 Reduction = Builder.CreateAndReduce(RedOp); 449 Reduction = Builder.CreateAnd(Reduction, Start); 450 break; 451 case Intrinsic::vp_reduce_or: 452 Reduction = Builder.CreateOrReduce(RedOp); 453 Reduction = Builder.CreateOr(Reduction, Start); 454 break; 455 case Intrinsic::vp_reduce_xor: 456 Reduction = Builder.CreateXorReduce(RedOp); 457 Reduction = Builder.CreateXor(Reduction, Start); 458 break; 459 case Intrinsic::vp_reduce_smax: 460 Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); 461 Reduction = 462 Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); 463 break; 464 case Intrinsic::vp_reduce_smin: 465 Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); 466 Reduction = 467 Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); 468 break; 469 case Intrinsic::vp_reduce_umax: 470 Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); 471 Reduction = 472 Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); 473 break; 474 case Intrinsic::vp_reduce_umin: 475 Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); 476 Reduction = 477 Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); 478 break; 479 case Intrinsic::vp_reduce_fmax: 480 Reduction = Builder.CreateFPMaxReduce(RedOp); 481 transferDecorations(*Reduction, VPI); 482 Reduction = 483 Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); 484 break; 485 case Intrinsic::vp_reduce_fmin: 486 Reduction = Builder.CreateFPMinReduce(RedOp); 487 transferDecorations(*Reduction, VPI); 488 Reduction = 489 Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); 490 break; 491 case Intrinsic::vp_reduce_fmaximum: 492 Reduction = Builder.CreateFPMaximumReduce(RedOp); 493 transferDecorations(*Reduction, VPI); 494 Reduction = 495 Builder.CreateBinaryIntrinsic(Intrinsic::maximum, Reduction, Start); 496 break; 497 case Intrinsic::vp_reduce_fminimum: 498 Reduction = Builder.CreateFPMinimumReduce(RedOp); 499 transferDecorations(*Reduction, VPI); 500 Reduction = 501 Builder.CreateBinaryIntrinsic(Intrinsic::minimum, Reduction, Start); 502 break; 503 case Intrinsic::vp_reduce_fadd: 504 Reduction = Builder.CreateFAddReduce(Start, RedOp); 505 break; 506 case Intrinsic::vp_reduce_fmul: 507 Reduction = Builder.CreateFMulReduce(Start, RedOp); 508 break; 509 } 510 511 replaceOperation(*Reduction, VPI); 512 return Reduction; 513 } 514 515 Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 516 VPIntrinsic &VPI) { 517 Value *CastOp = nullptr; 518 switch (VPI.getIntrinsicID()) { 519 default: 520 llvm_unreachable("Not a VP cast intrinsic"); 521 case Intrinsic::vp_sext: 522 CastOp = 523 Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 524 break; 525 case Intrinsic::vp_zext: 526 CastOp = 527 Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 528 break; 529 case Intrinsic::vp_trunc: 530 CastOp = 531 Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 532 break; 533 case Intrinsic::vp_inttoptr: 534 CastOp = 535 Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); 536 break; 537 case Intrinsic::vp_ptrtoint: 538 CastOp = 539 Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 540 break; 541 case Intrinsic::vp_fptosi: 542 CastOp = 543 Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 544 break; 545 546 case Intrinsic::vp_fptoui: 547 CastOp = 548 Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 549 break; 550 case Intrinsic::vp_sitofp: 551 CastOp = 552 Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 553 break; 554 case Intrinsic::vp_uitofp: 555 CastOp = 556 Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 557 break; 558 case Intrinsic::vp_fptrunc: 559 CastOp = 560 Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 561 break; 562 case Intrinsic::vp_fpext: 563 CastOp = 564 Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 565 break; 566 } 567 replaceOperation(*CastOp, VPI); 568 return CastOp; 569 } 570 571 Value * 572 CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 573 VPIntrinsic &VPI) { 574 assert(VPI.canIgnoreVectorLengthParam()); 575 576 const auto &DL = VPI.getDataLayout(); 577 578 Value *MaskParam = VPI.getMaskParam(); 579 Value *PtrParam = VPI.getMemoryPointerParam(); 580 Value *DataParam = VPI.getMemoryDataParam(); 581 bool IsUnmasked = isAllTrueMask(MaskParam); 582 583 MaybeAlign AlignOpt = VPI.getPointerAlignment(); 584 585 Value *NewMemoryInst = nullptr; 586 switch (VPI.getIntrinsicID()) { 587 default: 588 llvm_unreachable("Not a VP memory intrinsic"); 589 case Intrinsic::vp_store: 590 if (IsUnmasked) { 591 StoreInst *NewStore = 592 Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); 593 if (AlignOpt.has_value()) 594 NewStore->setAlignment(*AlignOpt); 595 NewMemoryInst = NewStore; 596 } else 597 NewMemoryInst = Builder.CreateMaskedStore( 598 DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); 599 600 break; 601 case Intrinsic::vp_load: 602 if (IsUnmasked) { 603 LoadInst *NewLoad = 604 Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); 605 if (AlignOpt.has_value()) 606 NewLoad->setAlignment(*AlignOpt); 607 NewMemoryInst = NewLoad; 608 } else 609 NewMemoryInst = Builder.CreateMaskedLoad( 610 VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); 611 612 break; 613 case Intrinsic::vp_scatter: { 614 auto *ElementType = 615 cast<VectorType>(DataParam->getType())->getElementType(); 616 NewMemoryInst = Builder.CreateMaskedScatter( 617 DataParam, PtrParam, 618 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam); 619 break; 620 } 621 case Intrinsic::vp_gather: { 622 auto *ElementType = cast<VectorType>(VPI.getType())->getElementType(); 623 NewMemoryInst = Builder.CreateMaskedGather( 624 VPI.getType(), PtrParam, 625 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr, 626 VPI.getName()); 627 break; 628 } 629 } 630 631 assert(NewMemoryInst); 632 replaceOperation(*NewMemoryInst, VPI); 633 return NewMemoryInst; 634 } 635 636 Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, 637 VPCmpIntrinsic &VPI) { 638 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 639 "Implicitly dropping %evl in non-speculatable operator!"); 640 641 assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || 642 *VPI.getFunctionalOpcode() == Instruction::FCmp); 643 644 Value *Op0 = VPI.getOperand(0); 645 Value *Op1 = VPI.getOperand(1); 646 auto Pred = VPI.getPredicate(); 647 648 auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1); 649 650 replaceOperation(*NewCmp, VPI); 651 return NewCmp; 652 } 653 654 bool CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 655 LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 656 657 if (VPI.canIgnoreVectorLengthParam()) 658 return false; 659 660 Value *EVLParam = VPI.getVectorLengthParam(); 661 if (!EVLParam) 662 return false; 663 664 ElementCount StaticElemCount = VPI.getStaticVectorLength(); 665 Value *MaxEVL = nullptr; 666 Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 667 if (StaticElemCount.isScalable()) { 668 // TODO add caching 669 auto *M = VPI.getModule(); 670 Function *VScaleFunc = 671 Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); 672 IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 673 Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 674 Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); 675 MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 676 /*NUW*/ true, /*NSW*/ false); 677 } else { 678 MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 679 } 680 VPI.setVectorLengthParam(MaxEVL); 681 return true; 682 } 683 684 std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 685 LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 686 687 IRBuilder<> Builder(&VPI); 688 689 // Ineffective %evl parameter and so nothing to do here. 690 if (VPI.canIgnoreVectorLengthParam()) 691 return {&VPI, false}; 692 693 // Only VP intrinsics can have an %evl parameter. 694 Value *OldMaskParam = VPI.getMaskParam(); 695 Value *OldEVLParam = VPI.getVectorLengthParam(); 696 assert(OldMaskParam && "no mask param to fold the vl param into"); 697 assert(OldEVLParam && "no EVL param to fold away"); 698 699 LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 700 LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 701 702 // Convert the %evl predication into vector mask predication. 703 ElementCount ElemCount = VPI.getStaticVectorLength(); 704 Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 705 Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 706 VPI.setMaskParam(NewMaskParam); 707 708 // Drop the %evl parameter. 709 discardEVLParameter(VPI); 710 assert(VPI.canIgnoreVectorLengthParam() && 711 "transformation did not render the evl param ineffective!"); 712 713 // Reassess the modified instruction. 714 return {&VPI, true}; 715 } 716 717 Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 718 LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 719 720 IRBuilder<> Builder(&VPI); 721 722 // Try lowering to a LLVM instruction first. 723 auto OC = VPI.getFunctionalOpcode(); 724 725 if (OC && Instruction::isBinaryOp(*OC)) 726 return expandPredicationInBinaryOperator(Builder, VPI); 727 728 if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 729 return expandPredicationInReduction(Builder, *VPRI); 730 731 if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI)) 732 return expandPredicationInComparison(Builder, *VPCmp); 733 734 if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) { 735 return expandPredicationToCastIntrinsic(Builder, VPI); 736 } 737 738 switch (VPI.getIntrinsicID()) { 739 default: 740 break; 741 case Intrinsic::vp_fneg: { 742 Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName()); 743 replaceOperation(*NewNegOp, VPI); 744 return NewNegOp; 745 } 746 case Intrinsic::vp_abs: 747 case Intrinsic::vp_smax: 748 case Intrinsic::vp_smin: 749 case Intrinsic::vp_umax: 750 case Intrinsic::vp_umin: 751 case Intrinsic::vp_bswap: 752 case Intrinsic::vp_bitreverse: 753 return expandPredicationToIntCall(Builder, VPI, 754 VPI.getFunctionalIntrinsicID().value()); 755 case Intrinsic::vp_fabs: 756 case Intrinsic::vp_sqrt: 757 case Intrinsic::vp_maxnum: 758 case Intrinsic::vp_minnum: 759 case Intrinsic::vp_maximum: 760 case Intrinsic::vp_minimum: 761 case Intrinsic::vp_fma: 762 case Intrinsic::vp_fmuladd: 763 return expandPredicationToFPCall(Builder, VPI, 764 VPI.getFunctionalIntrinsicID().value()); 765 case Intrinsic::vp_load: 766 case Intrinsic::vp_store: 767 case Intrinsic::vp_gather: 768 case Intrinsic::vp_scatter: 769 return expandPredicationInMemoryIntrinsic(Builder, VPI); 770 } 771 772 if (auto CID = VPI.getConstrainedIntrinsicID()) 773 if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID)) 774 return Call; 775 776 return &VPI; 777 } 778 779 //// } CachingVPExpander 780 781 void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { 782 // Operations with speculatable lanes do not strictly need predication. 783 if (maySpeculateLanes(VPI)) { 784 // Converting a speculatable VP intrinsic means dropping %mask and %evl. 785 // No need to expand %evl into the %mask only to ignore that code. 786 if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 787 LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 788 return; 789 } 790 791 // We have to preserve the predicating effect of %evl for this 792 // non-speculatable VP intrinsic. 793 // 1) Never discard %evl. 794 // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 795 // %evl gets folded into %mask. 796 if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 797 (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 798 LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 799 } 800 } 801 802 VPLegalization 803 CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 804 auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 805 if (LLVM_LIKELY(!UsingTTIOverrides)) { 806 // No overrides - we are in production. 807 return VPStrat; 808 } 809 810 // Overrides set - we are in testing, the following does not need to be 811 // efficient. 812 VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 813 VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 814 return VPStrat; 815 } 816 817 VPExpansionDetails 818 CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) { 819 auto Strategy = getVPLegalizationStrategy(VPI); 820 sanitizeStrategy(VPI, Strategy); 821 822 VPExpansionDetails Changed = VPExpansionDetails::IntrinsicUnchanged; 823 824 // Transform the EVL parameter. 825 switch (Strategy.EVLParamStrategy) { 826 case VPLegalization::Legal: 827 break; 828 case VPLegalization::Discard: 829 if (discardEVLParameter(VPI)) 830 Changed = VPExpansionDetails::IntrinsicUpdated; 831 break; 832 case VPLegalization::Convert: 833 if (auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) { 834 (void)NewVPI; 835 Changed = VPExpansionDetails::IntrinsicUpdated; 836 ++NumFoldedVL; 837 } 838 break; 839 } 840 841 // Replace with a non-predicated operation. 842 switch (Strategy.OpStrategy) { 843 case VPLegalization::Legal: 844 break; 845 case VPLegalization::Discard: 846 llvm_unreachable("Invalid strategy for operators."); 847 case VPLegalization::Convert: 848 if (Value *V = expandPredication(VPI); V != &VPI) { 849 ++NumLoweredVPOps; 850 Changed = VPExpansionDetails::IntrinsicReplaced; 851 } 852 break; 853 } 854 855 return Changed; 856 } 857 } // namespace 858 859 VPExpansionDetails 860 llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI, 861 const TargetTransformInfo &TTI) { 862 return CachingVPExpander(TTI).expandVectorPredication(VPI); 863 } 864