1 //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements IR expansion for vector predication intrinsics, allowing 10 // targets to enable vector predication until just before codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/ExpandVectorPredication.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/TargetTransformInfo.h" 17 #include "llvm/Analysis/ValueTracking.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/InstIterator.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/IntrinsicInst.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Compiler.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Transforms/Utils/LoopUtils.h" 30 #include <optional> 31 32 using namespace llvm; 33 34 using VPLegalization = TargetTransformInfo::VPLegalization; 35 using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 36 37 // Keep this in sync with TargetTransformInfo::VPLegalization. 38 #define VPINTERNAL_VPLEGAL_CASES \ 39 VPINTERNAL_CASE(Legal) \ 40 VPINTERNAL_CASE(Discard) \ 41 VPINTERNAL_CASE(Convert) 42 43 #define VPINTERNAL_CASE(X) "|" #X 44 45 // Override options. 46 static cl::opt<std::string> EVLTransformOverride( 47 "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 48 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 49 ". If non-empty, ignore " 50 "TargetTransformInfo and " 51 "always use this transformation for the %evl parameter (Used in " 52 "testing).")); 53 54 static cl::opt<std::string> MaskTransformOverride( 55 "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 56 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 57 ". If non-empty, Ignore " 58 "TargetTransformInfo and " 59 "always use this transformation for the %mask parameter (Used in " 60 "testing).")); 61 62 #undef VPINTERNAL_CASE 63 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 64 65 static VPTransform parseOverrideOption(const std::string &TextOpt) { 66 return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 67 } 68 69 #undef VPINTERNAL_VPLEGAL_CASES 70 71 // Whether any override options are set. 72 static bool anyExpandVPOverridesSet() { 73 return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 74 } 75 76 #define DEBUG_TYPE "expandvp" 77 78 STATISTIC(NumFoldedVL, "Number of folded vector length params"); 79 STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 80 81 ///// Helpers { 82 83 /// \returns Whether the vector mask \p MaskVal has all lane bits set. 84 static bool isAllTrueMask(Value *MaskVal) { 85 if (Value *SplattedVal = getSplatValue(MaskVal)) 86 if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) 87 return ConstValue->isAllOnesValue(); 88 89 return false; 90 } 91 92 /// \returns A non-excepting divisor constant for this type. 93 static Constant *getSafeDivisor(Type *DivTy) { 94 assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 95 return ConstantInt::get(DivTy, 1u, false); 96 } 97 98 /// Transfer operation properties from \p OldVPI to \p NewVal. 99 static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 100 auto *NewInst = dyn_cast<Instruction>(&NewVal); 101 if (!NewInst || !isa<FPMathOperator>(NewVal)) 102 return; 103 104 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 105 if (!OldFMOp) 106 return; 107 108 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 109 } 110 111 /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 112 /// OldVP gets erased. 113 static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 114 transferDecorations(NewOp, OldOp); 115 OldOp.replaceAllUsesWith(&NewOp); 116 OldOp.eraseFromParent(); 117 } 118 119 static bool maySpeculateLanes(VPIntrinsic &VPI) { 120 // The result of VP reductions depends on the mask and evl. 121 if (isa<VPReductionIntrinsic>(VPI)) 122 return false; 123 // Fallback to whether the intrinsic is speculatable. 124 if (auto IntrID = VPI.getFunctionalIntrinsicID()) 125 return Intrinsic::getAttributes(VPI.getContext(), *IntrID) 126 .hasFnAttr(Attribute::AttrKind::Speculatable); 127 if (auto Opc = VPI.getFunctionalOpcode()) 128 return isSafeToSpeculativelyExecuteWithOpcode(*Opc, &VPI); 129 return false; 130 } 131 132 //// } Helpers 133 134 namespace { 135 136 // Expansion pass state at function scope. 137 struct CachingVPExpander { 138 const TargetTransformInfo &TTI; 139 140 /// \returns A (fixed length) vector with ascending integer indices 141 /// (<0, 1, ..., NumElems-1>). 142 /// \p Builder 143 /// Used for instruction creation. 144 /// \p LaneTy 145 /// Integer element type of the result vector. 146 /// \p NumElems 147 /// Number of vector elements. 148 Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 149 unsigned NumElems); 150 151 /// \returns A bitmask that is true where the lane position is less-than \p 152 /// EVLParam 153 /// 154 /// \p Builder 155 /// Used for instruction creation. 156 /// \p VLParam 157 /// The explicit vector length parameter to test against the lane 158 /// positions. 159 /// \p ElemCount 160 /// Static (potentially scalable) number of vector elements. 161 Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 162 ElementCount ElemCount); 163 164 /// If needed, folds the EVL in the mask operand and discards the EVL 165 /// parameter. Returns a pair of the value of the intrinsic after the change 166 /// (if any) and whether the mask was actually folded. 167 std::pair<Value *, bool> foldEVLIntoMask(VPIntrinsic &VPI); 168 169 /// "Remove" the %evl parameter of \p PI by setting it to the static vector 170 /// length of the operation. Returns true if the %evl (if any) was effectively 171 /// changed. 172 bool discardEVLParameter(VPIntrinsic &PI); 173 174 /// Lower this VP binary operator to a unpredicated binary operator. 175 Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 176 VPIntrinsic &PI); 177 178 /// Lower this VP int call to a unpredicated int call. 179 Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI, 180 unsigned UnpredicatedIntrinsicID); 181 182 /// Lower this VP fp call to a unpredicated fp call. 183 Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, 184 unsigned UnpredicatedIntrinsicID); 185 186 /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. 187 Value *expandPredicationInReduction(IRBuilder<> &Builder, 188 VPReductionIntrinsic &PI); 189 190 /// Lower this VP cast operation to a non-VP intrinsic. 191 Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 192 VPIntrinsic &VPI); 193 194 /// Lower this VP memory operation to a non-VP intrinsic. 195 Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 196 VPIntrinsic &VPI); 197 198 /// Lower this VP comparison to a call to an unpredicated comparison. 199 Value *expandPredicationInComparison(IRBuilder<> &Builder, 200 VPCmpIntrinsic &PI); 201 202 /// Query TTI and expand the vector predication in \p P accordingly. 203 Value *expandPredication(VPIntrinsic &PI); 204 205 /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This 206 /// overrides TTI with the cl::opts listed at the top of this file. 207 VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 208 bool UsingTTIOverrides; 209 210 public: 211 CachingVPExpander(const TargetTransformInfo &TTI) 212 : TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 213 214 /// Expand llvm.vp.* intrinsics as requested by \p TTI. 215 /// Returns the details of the expansion. 216 VPExpansionDetails expandVectorPredication(VPIntrinsic &VPI); 217 }; 218 219 //// CachingVPExpander { 220 221 Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 222 unsigned NumElems) { 223 // TODO add caching 224 SmallVector<Constant *, 16> ConstElems; 225 226 for (unsigned Idx = 0; Idx < NumElems; ++Idx) 227 ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 228 229 return ConstantVector::get(ConstElems); 230 } 231 232 Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 233 Value *EVLParam, 234 ElementCount ElemCount) { 235 // TODO add caching 236 // Scalable vector %evl conversion. 237 if (ElemCount.isScalable()) { 238 auto *M = Builder.GetInsertBlock()->getModule(); 239 Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 240 Function *ActiveMaskFunc = Intrinsic::getDeclaration( 241 M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); 242 // `get_active_lane_mask` performs an implicit less-than comparison. 243 Value *ConstZero = Builder.getInt32(0); 244 return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); 245 } 246 247 // Fixed vector %evl conversion. 248 Type *LaneTy = EVLParam->getType(); 249 unsigned NumElems = ElemCount.getFixedValue(); 250 Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 251 Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 252 return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 253 } 254 255 Value * 256 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 257 VPIntrinsic &VPI) { 258 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 259 "Implicitly dropping %evl in non-speculatable operator!"); 260 261 auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 262 assert(Instruction::isBinaryOp(OC)); 263 264 Value *Op0 = VPI.getOperand(0); 265 Value *Op1 = VPI.getOperand(1); 266 Value *Mask = VPI.getMaskParam(); 267 268 // Blend in safe operands. 269 if (Mask && !isAllTrueMask(Mask)) { 270 switch (OC) { 271 default: 272 // Can safely ignore the predicate. 273 break; 274 275 // Division operators need a safe divisor on masked-off lanes (1). 276 case Instruction::UDiv: 277 case Instruction::SDiv: 278 case Instruction::URem: 279 case Instruction::SRem: 280 // 2nd operand must not be zero. 281 Value *SafeDivisor = getSafeDivisor(VPI.getType()); 282 Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 283 } 284 } 285 286 Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 287 288 replaceOperation(*NewBinOp, VPI); 289 return NewBinOp; 290 } 291 292 Value *CachingVPExpander::expandPredicationToIntCall( 293 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 294 switch (UnpredicatedIntrinsicID) { 295 case Intrinsic::abs: 296 case Intrinsic::smax: 297 case Intrinsic::smin: 298 case Intrinsic::umax: 299 case Intrinsic::umin: { 300 Value *Op0 = VPI.getOperand(0); 301 Value *Op1 = VPI.getOperand(1); 302 Function *Fn = Intrinsic::getDeclaration( 303 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 304 Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 305 replaceOperation(*NewOp, VPI); 306 return NewOp; 307 } 308 case Intrinsic::bswap: 309 case Intrinsic::bitreverse: { 310 Value *Op = VPI.getOperand(0); 311 Function *Fn = Intrinsic::getDeclaration( 312 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 313 Value *NewOp = Builder.CreateCall(Fn, {Op}, VPI.getName()); 314 replaceOperation(*NewOp, VPI); 315 return NewOp; 316 } 317 } 318 return nullptr; 319 } 320 321 Value *CachingVPExpander::expandPredicationToFPCall( 322 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 323 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 324 "Implicitly dropping %evl in non-speculatable operator!"); 325 326 switch (UnpredicatedIntrinsicID) { 327 case Intrinsic::fabs: 328 case Intrinsic::sqrt: { 329 Value *Op0 = VPI.getOperand(0); 330 Function *Fn = Intrinsic::getDeclaration( 331 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 332 Value *NewOp = Builder.CreateCall(Fn, {Op0}, VPI.getName()); 333 replaceOperation(*NewOp, VPI); 334 return NewOp; 335 } 336 case Intrinsic::maxnum: 337 case Intrinsic::minnum: { 338 Value *Op0 = VPI.getOperand(0); 339 Value *Op1 = VPI.getOperand(1); 340 Function *Fn = Intrinsic::getDeclaration( 341 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 342 Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName()); 343 replaceOperation(*NewOp, VPI); 344 return NewOp; 345 } 346 case Intrinsic::fma: 347 case Intrinsic::fmuladd: 348 case Intrinsic::experimental_constrained_fma: 349 case Intrinsic::experimental_constrained_fmuladd: { 350 Value *Op0 = VPI.getOperand(0); 351 Value *Op1 = VPI.getOperand(1); 352 Value *Op2 = VPI.getOperand(2); 353 Function *Fn = Intrinsic::getDeclaration( 354 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 355 Value *NewOp; 356 if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID)) 357 NewOp = 358 Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 359 else 360 NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 361 replaceOperation(*NewOp, VPI); 362 return NewOp; 363 } 364 } 365 366 return nullptr; 367 } 368 369 static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 370 Type *EltTy) { 371 bool Negative = false; 372 unsigned EltBits = EltTy->getScalarSizeInBits(); 373 Intrinsic::ID VID = VPI.getIntrinsicID(); 374 switch (VID) { 375 default: 376 llvm_unreachable("Expecting a VP reduction intrinsic"); 377 case Intrinsic::vp_reduce_add: 378 case Intrinsic::vp_reduce_or: 379 case Intrinsic::vp_reduce_xor: 380 case Intrinsic::vp_reduce_umax: 381 return Constant::getNullValue(EltTy); 382 case Intrinsic::vp_reduce_mul: 383 return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); 384 case Intrinsic::vp_reduce_and: 385 case Intrinsic::vp_reduce_umin: 386 return ConstantInt::getAllOnesValue(EltTy); 387 case Intrinsic::vp_reduce_smin: 388 return ConstantInt::get(EltTy->getContext(), 389 APInt::getSignedMaxValue(EltBits)); 390 case Intrinsic::vp_reduce_smax: 391 return ConstantInt::get(EltTy->getContext(), 392 APInt::getSignedMinValue(EltBits)); 393 case Intrinsic::vp_reduce_fmax: 394 case Intrinsic::vp_reduce_fmaximum: 395 Negative = true; 396 [[fallthrough]]; 397 case Intrinsic::vp_reduce_fmin: 398 case Intrinsic::vp_reduce_fminimum: { 399 bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum || 400 VID == Intrinsic::vp_reduce_fmaximum; 401 FastMathFlags Flags = VPI.getFastMathFlags(); 402 const fltSemantics &Semantics = EltTy->getFltSemantics(); 403 return (!Flags.noNaNs() && !PropagatesNaN) 404 ? ConstantFP::getQNaN(EltTy, Negative) 405 : !Flags.noInfs() 406 ? ConstantFP::getInfinity(EltTy, Negative) 407 : ConstantFP::get(EltTy, 408 APFloat::getLargest(Semantics, Negative)); 409 } 410 case Intrinsic::vp_reduce_fadd: 411 return ConstantFP::getNegativeZero(EltTy); 412 case Intrinsic::vp_reduce_fmul: 413 return ConstantFP::get(EltTy, 1.0); 414 } 415 } 416 417 Value * 418 CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 419 VPReductionIntrinsic &VPI) { 420 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 421 "Implicitly dropping %evl in non-speculatable operator!"); 422 423 Value *Mask = VPI.getMaskParam(); 424 Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 425 426 // Insert neutral element in masked-out positions 427 if (Mask && !isAllTrueMask(Mask)) { 428 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 429 auto *NeutralVector = Builder.CreateVectorSplat( 430 cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 431 RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 432 } 433 434 Value *Reduction; 435 Value *Start = VPI.getOperand(VPI.getStartParamPos()); 436 437 switch (VPI.getIntrinsicID()) { 438 default: 439 llvm_unreachable("Impossible reduction kind"); 440 case Intrinsic::vp_reduce_add: 441 case Intrinsic::vp_reduce_mul: 442 case Intrinsic::vp_reduce_and: 443 case Intrinsic::vp_reduce_or: 444 case Intrinsic::vp_reduce_xor: { 445 Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); 446 unsigned Opc = getArithmeticReductionInstruction(RedID); 447 assert(Instruction::isBinaryOp(Opc)); 448 Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); 449 Reduction = 450 Builder.CreateBinOp((Instruction::BinaryOps)Opc, Reduction, Start); 451 break; 452 } 453 case Intrinsic::vp_reduce_smax: 454 case Intrinsic::vp_reduce_smin: 455 case Intrinsic::vp_reduce_umax: 456 case Intrinsic::vp_reduce_umin: 457 case Intrinsic::vp_reduce_fmax: 458 case Intrinsic::vp_reduce_fmin: 459 case Intrinsic::vp_reduce_fmaximum: 460 case Intrinsic::vp_reduce_fminimum: { 461 Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); 462 Intrinsic::ID ScalarID = getMinMaxReductionIntrinsicOp(RedID); 463 Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); 464 transferDecorations(*Reduction, VPI); 465 Reduction = Builder.CreateBinaryIntrinsic(ScalarID, Reduction, Start); 466 break; 467 } 468 case Intrinsic::vp_reduce_fadd: 469 Reduction = Builder.CreateFAddReduce(Start, RedOp); 470 break; 471 case Intrinsic::vp_reduce_fmul: 472 Reduction = Builder.CreateFMulReduce(Start, RedOp); 473 break; 474 } 475 476 replaceOperation(*Reduction, VPI); 477 return Reduction; 478 } 479 480 Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 481 VPIntrinsic &VPI) { 482 Value *CastOp = nullptr; 483 switch (VPI.getIntrinsicID()) { 484 default: 485 llvm_unreachable("Not a VP cast intrinsic"); 486 case Intrinsic::vp_sext: 487 CastOp = 488 Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 489 break; 490 case Intrinsic::vp_zext: 491 CastOp = 492 Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 493 break; 494 case Intrinsic::vp_trunc: 495 CastOp = 496 Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 497 break; 498 case Intrinsic::vp_inttoptr: 499 CastOp = 500 Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); 501 break; 502 case Intrinsic::vp_ptrtoint: 503 CastOp = 504 Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 505 break; 506 case Intrinsic::vp_fptosi: 507 CastOp = 508 Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 509 break; 510 511 case Intrinsic::vp_fptoui: 512 CastOp = 513 Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 514 break; 515 case Intrinsic::vp_sitofp: 516 CastOp = 517 Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 518 break; 519 case Intrinsic::vp_uitofp: 520 CastOp = 521 Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 522 break; 523 case Intrinsic::vp_fptrunc: 524 CastOp = 525 Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 526 break; 527 case Intrinsic::vp_fpext: 528 CastOp = 529 Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 530 break; 531 } 532 replaceOperation(*CastOp, VPI); 533 return CastOp; 534 } 535 536 Value * 537 CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 538 VPIntrinsic &VPI) { 539 assert(VPI.canIgnoreVectorLengthParam()); 540 541 const auto &DL = VPI.getDataLayout(); 542 543 Value *MaskParam = VPI.getMaskParam(); 544 Value *PtrParam = VPI.getMemoryPointerParam(); 545 Value *DataParam = VPI.getMemoryDataParam(); 546 bool IsUnmasked = isAllTrueMask(MaskParam); 547 548 MaybeAlign AlignOpt = VPI.getPointerAlignment(); 549 550 Value *NewMemoryInst = nullptr; 551 switch (VPI.getIntrinsicID()) { 552 default: 553 llvm_unreachable("Not a VP memory intrinsic"); 554 case Intrinsic::vp_store: 555 if (IsUnmasked) { 556 StoreInst *NewStore = 557 Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); 558 if (AlignOpt.has_value()) 559 NewStore->setAlignment(*AlignOpt); 560 NewMemoryInst = NewStore; 561 } else 562 NewMemoryInst = Builder.CreateMaskedStore( 563 DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); 564 565 break; 566 case Intrinsic::vp_load: 567 if (IsUnmasked) { 568 LoadInst *NewLoad = 569 Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); 570 if (AlignOpt.has_value()) 571 NewLoad->setAlignment(*AlignOpt); 572 NewMemoryInst = NewLoad; 573 } else 574 NewMemoryInst = Builder.CreateMaskedLoad( 575 VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); 576 577 break; 578 case Intrinsic::vp_scatter: { 579 auto *ElementType = 580 cast<VectorType>(DataParam->getType())->getElementType(); 581 NewMemoryInst = Builder.CreateMaskedScatter( 582 DataParam, PtrParam, 583 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam); 584 break; 585 } 586 case Intrinsic::vp_gather: { 587 auto *ElementType = cast<VectorType>(VPI.getType())->getElementType(); 588 NewMemoryInst = Builder.CreateMaskedGather( 589 VPI.getType(), PtrParam, 590 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr, 591 VPI.getName()); 592 break; 593 } 594 } 595 596 assert(NewMemoryInst); 597 replaceOperation(*NewMemoryInst, VPI); 598 return NewMemoryInst; 599 } 600 601 Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, 602 VPCmpIntrinsic &VPI) { 603 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 604 "Implicitly dropping %evl in non-speculatable operator!"); 605 606 assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || 607 *VPI.getFunctionalOpcode() == Instruction::FCmp); 608 609 Value *Op0 = VPI.getOperand(0); 610 Value *Op1 = VPI.getOperand(1); 611 auto Pred = VPI.getPredicate(); 612 613 auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1); 614 615 replaceOperation(*NewCmp, VPI); 616 return NewCmp; 617 } 618 619 bool CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 620 LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 621 622 if (VPI.canIgnoreVectorLengthParam()) 623 return false; 624 625 Value *EVLParam = VPI.getVectorLengthParam(); 626 if (!EVLParam) 627 return false; 628 629 ElementCount StaticElemCount = VPI.getStaticVectorLength(); 630 Value *MaxEVL = nullptr; 631 Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 632 if (StaticElemCount.isScalable()) { 633 // TODO add caching 634 auto *M = VPI.getModule(); 635 Function *VScaleFunc = 636 Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); 637 IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 638 Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 639 Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); 640 MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 641 /*NUW*/ true, /*NSW*/ false); 642 } else { 643 MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 644 } 645 VPI.setVectorLengthParam(MaxEVL); 646 return true; 647 } 648 649 std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 650 LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 651 652 IRBuilder<> Builder(&VPI); 653 654 // Ineffective %evl parameter and so nothing to do here. 655 if (VPI.canIgnoreVectorLengthParam()) 656 return {&VPI, false}; 657 658 // Only VP intrinsics can have an %evl parameter. 659 Value *OldMaskParam = VPI.getMaskParam(); 660 Value *OldEVLParam = VPI.getVectorLengthParam(); 661 assert(OldMaskParam && "no mask param to fold the vl param into"); 662 assert(OldEVLParam && "no EVL param to fold away"); 663 664 LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 665 LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 666 667 // Convert the %evl predication into vector mask predication. 668 ElementCount ElemCount = VPI.getStaticVectorLength(); 669 Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 670 Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 671 VPI.setMaskParam(NewMaskParam); 672 673 // Drop the %evl parameter. 674 discardEVLParameter(VPI); 675 assert(VPI.canIgnoreVectorLengthParam() && 676 "transformation did not render the evl param ineffective!"); 677 678 // Reassess the modified instruction. 679 return {&VPI, true}; 680 } 681 682 Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 683 LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 684 685 IRBuilder<> Builder(&VPI); 686 687 // Try lowering to a LLVM instruction first. 688 auto OC = VPI.getFunctionalOpcode(); 689 690 if (OC && Instruction::isBinaryOp(*OC)) 691 return expandPredicationInBinaryOperator(Builder, VPI); 692 693 if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 694 return expandPredicationInReduction(Builder, *VPRI); 695 696 if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI)) 697 return expandPredicationInComparison(Builder, *VPCmp); 698 699 if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) { 700 return expandPredicationToCastIntrinsic(Builder, VPI); 701 } 702 703 switch (VPI.getIntrinsicID()) { 704 default: 705 break; 706 case Intrinsic::vp_fneg: { 707 Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName()); 708 replaceOperation(*NewNegOp, VPI); 709 return NewNegOp; 710 } 711 case Intrinsic::vp_abs: 712 case Intrinsic::vp_smax: 713 case Intrinsic::vp_smin: 714 case Intrinsic::vp_umax: 715 case Intrinsic::vp_umin: 716 case Intrinsic::vp_bswap: 717 case Intrinsic::vp_bitreverse: 718 return expandPredicationToIntCall(Builder, VPI, 719 VPI.getFunctionalIntrinsicID().value()); 720 case Intrinsic::vp_fabs: 721 case Intrinsic::vp_sqrt: 722 case Intrinsic::vp_maxnum: 723 case Intrinsic::vp_minnum: 724 case Intrinsic::vp_maximum: 725 case Intrinsic::vp_minimum: 726 case Intrinsic::vp_fma: 727 case Intrinsic::vp_fmuladd: 728 return expandPredicationToFPCall(Builder, VPI, 729 VPI.getFunctionalIntrinsicID().value()); 730 case Intrinsic::vp_load: 731 case Intrinsic::vp_store: 732 case Intrinsic::vp_gather: 733 case Intrinsic::vp_scatter: 734 return expandPredicationInMemoryIntrinsic(Builder, VPI); 735 } 736 737 if (auto CID = VPI.getConstrainedIntrinsicID()) 738 if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID)) 739 return Call; 740 741 return &VPI; 742 } 743 744 //// } CachingVPExpander 745 746 void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { 747 // Operations with speculatable lanes do not strictly need predication. 748 if (maySpeculateLanes(VPI)) { 749 // Converting a speculatable VP intrinsic means dropping %mask and %evl. 750 // No need to expand %evl into the %mask only to ignore that code. 751 if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 752 LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 753 return; 754 } 755 756 // We have to preserve the predicating effect of %evl for this 757 // non-speculatable VP intrinsic. 758 // 1) Never discard %evl. 759 // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 760 // %evl gets folded into %mask. 761 if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 762 (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 763 LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 764 } 765 } 766 767 VPLegalization 768 CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 769 auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 770 if (LLVM_LIKELY(!UsingTTIOverrides)) { 771 // No overrides - we are in production. 772 return VPStrat; 773 } 774 775 // Overrides set - we are in testing, the following does not need to be 776 // efficient. 777 VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 778 VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 779 return VPStrat; 780 } 781 782 VPExpansionDetails 783 CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) { 784 auto Strategy = getVPLegalizationStrategy(VPI); 785 sanitizeStrategy(VPI, Strategy); 786 787 VPExpansionDetails Changed = VPExpansionDetails::IntrinsicUnchanged; 788 789 // Transform the EVL parameter. 790 switch (Strategy.EVLParamStrategy) { 791 case VPLegalization::Legal: 792 break; 793 case VPLegalization::Discard: 794 if (discardEVLParameter(VPI)) 795 Changed = VPExpansionDetails::IntrinsicUpdated; 796 break; 797 case VPLegalization::Convert: 798 if (auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) { 799 (void)NewVPI; 800 Changed = VPExpansionDetails::IntrinsicUpdated; 801 ++NumFoldedVL; 802 } 803 break; 804 } 805 806 // Replace with a non-predicated operation. 807 switch (Strategy.OpStrategy) { 808 case VPLegalization::Legal: 809 break; 810 case VPLegalization::Discard: 811 llvm_unreachable("Invalid strategy for operators."); 812 case VPLegalization::Convert: 813 if (Value *V = expandPredication(VPI); V != &VPI) { 814 ++NumLoweredVPOps; 815 Changed = VPExpansionDetails::IntrinsicReplaced; 816 } 817 break; 818 } 819 820 return Changed; 821 } 822 } // namespace 823 824 VPExpansionDetails 825 llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI, 826 const TargetTransformInfo &TTI) { 827 return CachingVPExpander(TTI).expandVectorPredication(VPI); 828 } 829