1 //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements IR expansion for vector predication intrinsics, allowing 10 // targets to enable vector predication until just before codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/ExpandVectorPredication.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/TargetTransformInfo.h" 17 #include "llvm/Analysis/ValueTracking.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/InstIterator.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/IntrinsicInst.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Compiler.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Transforms/Utils/LoopUtils.h" 30 #include <optional> 31 32 using namespace llvm; 33 34 using VPLegalization = TargetTransformInfo::VPLegalization; 35 using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 36 37 // Keep this in sync with TargetTransformInfo::VPLegalization. 38 #define VPINTERNAL_VPLEGAL_CASES \ 39 VPINTERNAL_CASE(Legal) \ 40 VPINTERNAL_CASE(Discard) \ 41 VPINTERNAL_CASE(Convert) 42 43 #define VPINTERNAL_CASE(X) "|" #X 44 45 // Override options. 46 static cl::opt<std::string> EVLTransformOverride( 47 "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 48 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 49 ". If non-empty, ignore " 50 "TargetTransformInfo and " 51 "always use this transformation for the %evl parameter (Used in " 52 "testing).")); 53 54 static cl::opt<std::string> MaskTransformOverride( 55 "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 56 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 57 ". If non-empty, Ignore " 58 "TargetTransformInfo and " 59 "always use this transformation for the %mask parameter (Used in " 60 "testing).")); 61 62 #undef VPINTERNAL_CASE 63 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 64 65 static VPTransform parseOverrideOption(const std::string &TextOpt) { 66 return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 67 } 68 69 #undef VPINTERNAL_VPLEGAL_CASES 70 71 // Whether any override options are set. 72 static bool anyExpandVPOverridesSet() { 73 return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 74 } 75 76 #define DEBUG_TYPE "expandvp" 77 78 STATISTIC(NumFoldedVL, "Number of folded vector length params"); 79 STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 80 81 ///// Helpers { 82 83 /// \returns Whether the vector mask \p MaskVal has all lane bits set. 84 static bool isAllTrueMask(Value *MaskVal) { 85 if (Value *SplattedVal = getSplatValue(MaskVal)) 86 if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) 87 return ConstValue->isAllOnesValue(); 88 89 return false; 90 } 91 92 /// \returns A non-excepting divisor constant for this type. 93 static Constant *getSafeDivisor(Type *DivTy) { 94 assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 95 return ConstantInt::get(DivTy, 1u, false); 96 } 97 98 /// Transfer operation properties from \p OldVPI to \p NewVal. 99 static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 100 auto *NewInst = dyn_cast<Instruction>(&NewVal); 101 if (!NewInst || !isa<FPMathOperator>(NewVal)) 102 return; 103 104 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 105 if (!OldFMOp) 106 return; 107 108 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 109 } 110 111 /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 112 /// OldVP gets erased. 113 static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 114 transferDecorations(NewOp, OldOp); 115 OldOp.replaceAllUsesWith(&NewOp); 116 OldOp.eraseFromParent(); 117 } 118 119 static bool maySpeculateLanes(VPIntrinsic &VPI) { 120 // The result of VP reductions depends on the mask and evl. 121 if (isa<VPReductionIntrinsic>(VPI)) 122 return false; 123 // Fallback to whether the intrinsic is speculatable. 124 if (auto IntrID = VPI.getFunctionalIntrinsicID()) 125 return Intrinsic::getAttributes(VPI.getContext(), *IntrID) 126 .hasFnAttr(Attribute::AttrKind::Speculatable); 127 if (auto Opc = VPI.getFunctionalOpcode()) 128 return isSafeToSpeculativelyExecuteWithOpcode(*Opc, &VPI); 129 return false; 130 } 131 132 //// } Helpers 133 134 namespace { 135 136 // Expansion pass state at function scope. 137 struct CachingVPExpander { 138 const TargetTransformInfo &TTI; 139 140 /// \returns A (fixed length) vector with ascending integer indices 141 /// (<0, 1, ..., NumElems-1>). 142 /// \p Builder 143 /// Used for instruction creation. 144 /// \p LaneTy 145 /// Integer element type of the result vector. 146 /// \p NumElems 147 /// Number of vector elements. 148 Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 149 unsigned NumElems); 150 151 /// \returns A bitmask that is true where the lane position is less-than \p 152 /// EVLParam 153 /// 154 /// \p Builder 155 /// Used for instruction creation. 156 /// \p VLParam 157 /// The explicit vector length parameter to test against the lane 158 /// positions. 159 /// \p ElemCount 160 /// Static (potentially scalable) number of vector elements. 161 Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 162 ElementCount ElemCount); 163 164 /// If needed, folds the EVL in the mask operand and discards the EVL 165 /// parameter. Returns a pair of the value of the intrinsic after the change 166 /// (if any) and whether the mask was actually folded. 167 std::pair<Value *, bool> foldEVLIntoMask(VPIntrinsic &VPI); 168 169 /// "Remove" the %evl parameter of \p PI by setting it to the static vector 170 /// length of the operation. Returns true if the %evl (if any) was effectively 171 /// changed. 172 bool discardEVLParameter(VPIntrinsic &PI); 173 174 /// Lower this VP binary operator to a unpredicated binary operator. 175 Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 176 VPIntrinsic &PI); 177 178 /// Lower this VP int call to a unpredicated int call. 179 Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI, 180 unsigned UnpredicatedIntrinsicID); 181 182 /// Lower this VP fp call to a unpredicated fp call. 183 Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, 184 unsigned UnpredicatedIntrinsicID); 185 186 /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. 187 Value *expandPredicationInReduction(IRBuilder<> &Builder, 188 VPReductionIntrinsic &PI); 189 190 /// Lower this VP cast operation to a non-VP intrinsic. 191 Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 192 VPIntrinsic &VPI); 193 194 /// Lower this VP memory operation to a non-VP intrinsic. 195 Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 196 VPIntrinsic &VPI); 197 198 /// Lower this VP comparison to a call to an unpredicated comparison. 199 Value *expandPredicationInComparison(IRBuilder<> &Builder, 200 VPCmpIntrinsic &PI); 201 202 /// Query TTI and expand the vector predication in \p P accordingly. 203 Value *expandPredication(VPIntrinsic &PI); 204 205 /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This 206 /// overrides TTI with the cl::opts listed at the top of this file. 207 VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 208 bool UsingTTIOverrides; 209 210 public: 211 CachingVPExpander(const TargetTransformInfo &TTI) 212 : TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 213 214 /// Expand llvm.vp.* intrinsics as requested by \p TTI. 215 /// Returns the details of the expansion. 216 VPExpansionDetails expandVectorPredication(VPIntrinsic &VPI); 217 }; 218 219 //// CachingVPExpander { 220 221 Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 222 unsigned NumElems) { 223 // TODO add caching 224 SmallVector<Constant *, 16> ConstElems; 225 226 for (unsigned Idx = 0; Idx < NumElems; ++Idx) 227 ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 228 229 return ConstantVector::get(ConstElems); 230 } 231 232 Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 233 Value *EVLParam, 234 ElementCount ElemCount) { 235 // TODO add caching 236 // Scalable vector %evl conversion. 237 if (ElemCount.isScalable()) { 238 Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 239 // `get_active_lane_mask` performs an implicit less-than comparison. 240 Value *ConstZero = Builder.getInt32(0); 241 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 242 {BoolVecTy, EVLParam->getType()}, 243 {ConstZero, EVLParam}); 244 } 245 246 // Fixed vector %evl conversion. 247 Type *LaneTy = EVLParam->getType(); 248 unsigned NumElems = ElemCount.getFixedValue(); 249 Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 250 Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 251 return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 252 } 253 254 Value * 255 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 256 VPIntrinsic &VPI) { 257 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 258 "Implicitly dropping %evl in non-speculatable operator!"); 259 260 auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 261 assert(Instruction::isBinaryOp(OC)); 262 263 Value *Op0 = VPI.getOperand(0); 264 Value *Op1 = VPI.getOperand(1); 265 Value *Mask = VPI.getMaskParam(); 266 267 // Blend in safe operands. 268 if (Mask && !isAllTrueMask(Mask)) { 269 switch (OC) { 270 default: 271 // Can safely ignore the predicate. 272 break; 273 274 // Division operators need a safe divisor on masked-off lanes (1). 275 case Instruction::UDiv: 276 case Instruction::SDiv: 277 case Instruction::URem: 278 case Instruction::SRem: 279 // 2nd operand must not be zero. 280 Value *SafeDivisor = getSafeDivisor(VPI.getType()); 281 Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 282 } 283 } 284 285 Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 286 287 replaceOperation(*NewBinOp, VPI); 288 return NewBinOp; 289 } 290 291 Value *CachingVPExpander::expandPredicationToIntCall( 292 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 293 switch (UnpredicatedIntrinsicID) { 294 case Intrinsic::abs: 295 case Intrinsic::smax: 296 case Intrinsic::smin: 297 case Intrinsic::umax: 298 case Intrinsic::umin: { 299 Value *Op0 = VPI.getOperand(0); 300 Value *Op1 = VPI.getOperand(1); 301 Value *NewOp = Builder.CreateIntrinsic( 302 UnpredicatedIntrinsicID, {VPI.getType()}, {Op0, Op1}, 303 /*FMFSource=*/nullptr, VPI.getName()); 304 replaceOperation(*NewOp, VPI); 305 return NewOp; 306 } 307 case Intrinsic::bswap: 308 case Intrinsic::bitreverse: { 309 Value *Op = VPI.getOperand(0); 310 Value *NewOp = 311 Builder.CreateIntrinsic(UnpredicatedIntrinsicID, {VPI.getType()}, {Op}, 312 /*FMFSource=*/nullptr, VPI.getName()); 313 replaceOperation(*NewOp, VPI); 314 return NewOp; 315 } 316 } 317 return nullptr; 318 } 319 320 Value *CachingVPExpander::expandPredicationToFPCall( 321 IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { 322 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 323 "Implicitly dropping %evl in non-speculatable operator!"); 324 325 switch (UnpredicatedIntrinsicID) { 326 case Intrinsic::fabs: 327 case Intrinsic::sqrt: { 328 Value *Op0 = VPI.getOperand(0); 329 Value *NewOp = 330 Builder.CreateIntrinsic(UnpredicatedIntrinsicID, {VPI.getType()}, {Op0}, 331 /*FMFSource=*/nullptr, VPI.getName()); 332 replaceOperation(*NewOp, VPI); 333 return NewOp; 334 } 335 case Intrinsic::maxnum: 336 case Intrinsic::minnum: { 337 Value *Op0 = VPI.getOperand(0); 338 Value *Op1 = VPI.getOperand(1); 339 Value *NewOp = Builder.CreateIntrinsic( 340 UnpredicatedIntrinsicID, {VPI.getType()}, {Op0, Op1}, 341 /*FMFSource=*/nullptr, VPI.getName()); 342 replaceOperation(*NewOp, VPI); 343 return NewOp; 344 } 345 case Intrinsic::fma: 346 case Intrinsic::fmuladd: 347 case Intrinsic::experimental_constrained_fma: 348 case Intrinsic::experimental_constrained_fmuladd: { 349 Value *Op0 = VPI.getOperand(0); 350 Value *Op1 = VPI.getOperand(1); 351 Value *Op2 = VPI.getOperand(2); 352 Function *Fn = Intrinsic::getOrInsertDeclaration( 353 VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); 354 Value *NewOp; 355 if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID)) 356 NewOp = 357 Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 358 else 359 NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName()); 360 replaceOperation(*NewOp, VPI); 361 return NewOp; 362 } 363 } 364 365 return nullptr; 366 } 367 368 static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 369 Type *EltTy) { 370 Intrinsic::ID RdxID = *VPI.getFunctionalIntrinsicID(); 371 FastMathFlags FMF; 372 if (isa<FPMathOperator>(VPI)) 373 FMF = VPI.getFastMathFlags(); 374 return getReductionIdentity(RdxID, EltTy, FMF); 375 } 376 377 Value * 378 CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 379 VPReductionIntrinsic &VPI) { 380 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 381 "Implicitly dropping %evl in non-speculatable operator!"); 382 383 Value *Mask = VPI.getMaskParam(); 384 Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 385 386 // Insert neutral element in masked-out positions 387 if (Mask && !isAllTrueMask(Mask)) { 388 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 389 auto *NeutralVector = Builder.CreateVectorSplat( 390 cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 391 RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 392 } 393 394 Value *Reduction; 395 Value *Start = VPI.getOperand(VPI.getStartParamPos()); 396 397 switch (VPI.getIntrinsicID()) { 398 default: 399 llvm_unreachable("Impossible reduction kind"); 400 case Intrinsic::vp_reduce_add: 401 case Intrinsic::vp_reduce_mul: 402 case Intrinsic::vp_reduce_and: 403 case Intrinsic::vp_reduce_or: 404 case Intrinsic::vp_reduce_xor: { 405 Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); 406 unsigned Opc = getArithmeticReductionInstruction(RedID); 407 assert(Instruction::isBinaryOp(Opc)); 408 Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); 409 Reduction = 410 Builder.CreateBinOp((Instruction::BinaryOps)Opc, Reduction, Start); 411 break; 412 } 413 case Intrinsic::vp_reduce_smax: 414 case Intrinsic::vp_reduce_smin: 415 case Intrinsic::vp_reduce_umax: 416 case Intrinsic::vp_reduce_umin: 417 case Intrinsic::vp_reduce_fmax: 418 case Intrinsic::vp_reduce_fmin: 419 case Intrinsic::vp_reduce_fmaximum: 420 case Intrinsic::vp_reduce_fminimum: { 421 Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); 422 Intrinsic::ID ScalarID = getMinMaxReductionIntrinsicOp(RedID); 423 Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); 424 transferDecorations(*Reduction, VPI); 425 Reduction = Builder.CreateBinaryIntrinsic(ScalarID, Reduction, Start); 426 break; 427 } 428 case Intrinsic::vp_reduce_fadd: 429 Reduction = Builder.CreateFAddReduce(Start, RedOp); 430 break; 431 case Intrinsic::vp_reduce_fmul: 432 Reduction = Builder.CreateFMulReduce(Start, RedOp); 433 break; 434 } 435 436 replaceOperation(*Reduction, VPI); 437 return Reduction; 438 } 439 440 Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, 441 VPIntrinsic &VPI) { 442 Value *CastOp = nullptr; 443 switch (VPI.getIntrinsicID()) { 444 default: 445 llvm_unreachable("Not a VP cast intrinsic"); 446 case Intrinsic::vp_sext: 447 CastOp = 448 Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 449 break; 450 case Intrinsic::vp_zext: 451 CastOp = 452 Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 453 break; 454 case Intrinsic::vp_trunc: 455 CastOp = 456 Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 457 break; 458 case Intrinsic::vp_inttoptr: 459 CastOp = 460 Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); 461 break; 462 case Intrinsic::vp_ptrtoint: 463 CastOp = 464 Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 465 break; 466 case Intrinsic::vp_fptosi: 467 CastOp = 468 Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 469 break; 470 471 case Intrinsic::vp_fptoui: 472 CastOp = 473 Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName()); 474 break; 475 case Intrinsic::vp_sitofp: 476 CastOp = 477 Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 478 break; 479 case Intrinsic::vp_uitofp: 480 CastOp = 481 Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); 482 break; 483 case Intrinsic::vp_fptrunc: 484 CastOp = 485 Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); 486 break; 487 case Intrinsic::vp_fpext: 488 CastOp = 489 Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); 490 break; 491 } 492 replaceOperation(*CastOp, VPI); 493 return CastOp; 494 } 495 496 Value * 497 CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, 498 VPIntrinsic &VPI) { 499 assert(VPI.canIgnoreVectorLengthParam()); 500 501 const auto &DL = VPI.getDataLayout(); 502 503 Value *MaskParam = VPI.getMaskParam(); 504 Value *PtrParam = VPI.getMemoryPointerParam(); 505 Value *DataParam = VPI.getMemoryDataParam(); 506 bool IsUnmasked = isAllTrueMask(MaskParam); 507 508 MaybeAlign AlignOpt = VPI.getPointerAlignment(); 509 510 Value *NewMemoryInst = nullptr; 511 switch (VPI.getIntrinsicID()) { 512 default: 513 llvm_unreachable("Not a VP memory intrinsic"); 514 case Intrinsic::vp_store: 515 if (IsUnmasked) { 516 StoreInst *NewStore = 517 Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); 518 if (AlignOpt.has_value()) 519 NewStore->setAlignment(*AlignOpt); 520 NewMemoryInst = NewStore; 521 } else 522 NewMemoryInst = Builder.CreateMaskedStore( 523 DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); 524 525 break; 526 case Intrinsic::vp_load: 527 if (IsUnmasked) { 528 LoadInst *NewLoad = 529 Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); 530 if (AlignOpt.has_value()) 531 NewLoad->setAlignment(*AlignOpt); 532 NewMemoryInst = NewLoad; 533 } else 534 NewMemoryInst = Builder.CreateMaskedLoad( 535 VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); 536 537 break; 538 case Intrinsic::vp_scatter: { 539 auto *ElementType = 540 cast<VectorType>(DataParam->getType())->getElementType(); 541 NewMemoryInst = Builder.CreateMaskedScatter( 542 DataParam, PtrParam, 543 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam); 544 break; 545 } 546 case Intrinsic::vp_gather: { 547 auto *ElementType = cast<VectorType>(VPI.getType())->getElementType(); 548 NewMemoryInst = Builder.CreateMaskedGather( 549 VPI.getType(), PtrParam, 550 AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr, 551 VPI.getName()); 552 break; 553 } 554 } 555 556 assert(NewMemoryInst); 557 replaceOperation(*NewMemoryInst, VPI); 558 return NewMemoryInst; 559 } 560 561 Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, 562 VPCmpIntrinsic &VPI) { 563 assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 564 "Implicitly dropping %evl in non-speculatable operator!"); 565 566 assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || 567 *VPI.getFunctionalOpcode() == Instruction::FCmp); 568 569 Value *Op0 = VPI.getOperand(0); 570 Value *Op1 = VPI.getOperand(1); 571 auto Pred = VPI.getPredicate(); 572 573 auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1); 574 575 replaceOperation(*NewCmp, VPI); 576 return NewCmp; 577 } 578 579 bool CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 580 LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 581 582 if (VPI.canIgnoreVectorLengthParam()) 583 return false; 584 585 Value *EVLParam = VPI.getVectorLengthParam(); 586 if (!EVLParam) 587 return false; 588 589 ElementCount StaticElemCount = VPI.getStaticVectorLength(); 590 Value *MaxEVL = nullptr; 591 Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 592 if (StaticElemCount.isScalable()) { 593 // TODO add caching 594 IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 595 Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 596 Value *VScale = Builder.CreateIntrinsic(Intrinsic::vscale, Int32Ty, {}, 597 /*FMFSource=*/nullptr, "vscale"); 598 MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 599 /*NUW*/ true, /*NSW*/ false); 600 } else { 601 MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 602 } 603 VPI.setVectorLengthParam(MaxEVL); 604 return true; 605 } 606 607 std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 608 LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 609 610 IRBuilder<> Builder(&VPI); 611 612 // Ineffective %evl parameter and so nothing to do here. 613 if (VPI.canIgnoreVectorLengthParam()) 614 return {&VPI, false}; 615 616 // Only VP intrinsics can have an %evl parameter. 617 Value *OldMaskParam = VPI.getMaskParam(); 618 Value *OldEVLParam = VPI.getVectorLengthParam(); 619 assert(OldMaskParam && "no mask param to fold the vl param into"); 620 assert(OldEVLParam && "no EVL param to fold away"); 621 622 LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 623 LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 624 625 // Convert the %evl predication into vector mask predication. 626 ElementCount ElemCount = VPI.getStaticVectorLength(); 627 Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 628 Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 629 VPI.setMaskParam(NewMaskParam); 630 631 // Drop the %evl parameter. 632 discardEVLParameter(VPI); 633 assert(VPI.canIgnoreVectorLengthParam() && 634 "transformation did not render the evl param ineffective!"); 635 636 // Reassess the modified instruction. 637 return {&VPI, true}; 638 } 639 640 Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 641 LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 642 643 IRBuilder<> Builder(&VPI); 644 645 // Try lowering to a LLVM instruction first. 646 auto OC = VPI.getFunctionalOpcode(); 647 648 if (OC && Instruction::isBinaryOp(*OC)) 649 return expandPredicationInBinaryOperator(Builder, VPI); 650 651 if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 652 return expandPredicationInReduction(Builder, *VPRI); 653 654 if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI)) 655 return expandPredicationInComparison(Builder, *VPCmp); 656 657 if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) { 658 return expandPredicationToCastIntrinsic(Builder, VPI); 659 } 660 661 switch (VPI.getIntrinsicID()) { 662 default: 663 break; 664 case Intrinsic::vp_fneg: { 665 Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName()); 666 replaceOperation(*NewNegOp, VPI); 667 return NewNegOp; 668 } 669 case Intrinsic::vp_abs: 670 case Intrinsic::vp_smax: 671 case Intrinsic::vp_smin: 672 case Intrinsic::vp_umax: 673 case Intrinsic::vp_umin: 674 case Intrinsic::vp_bswap: 675 case Intrinsic::vp_bitreverse: 676 return expandPredicationToIntCall(Builder, VPI, 677 VPI.getFunctionalIntrinsicID().value()); 678 case Intrinsic::vp_fabs: 679 case Intrinsic::vp_sqrt: 680 case Intrinsic::vp_maxnum: 681 case Intrinsic::vp_minnum: 682 case Intrinsic::vp_maximum: 683 case Intrinsic::vp_minimum: 684 case Intrinsic::vp_fma: 685 case Intrinsic::vp_fmuladd: 686 return expandPredicationToFPCall(Builder, VPI, 687 VPI.getFunctionalIntrinsicID().value()); 688 case Intrinsic::vp_load: 689 case Intrinsic::vp_store: 690 case Intrinsic::vp_gather: 691 case Intrinsic::vp_scatter: 692 return expandPredicationInMemoryIntrinsic(Builder, VPI); 693 } 694 695 if (auto CID = VPI.getConstrainedIntrinsicID()) 696 if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID)) 697 return Call; 698 699 return &VPI; 700 } 701 702 //// } CachingVPExpander 703 704 void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { 705 // Operations with speculatable lanes do not strictly need predication. 706 if (maySpeculateLanes(VPI)) { 707 // Converting a speculatable VP intrinsic means dropping %mask and %evl. 708 // No need to expand %evl into the %mask only to ignore that code. 709 if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 710 LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 711 return; 712 } 713 714 // We have to preserve the predicating effect of %evl for this 715 // non-speculatable VP intrinsic. 716 // 1) Never discard %evl. 717 // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 718 // %evl gets folded into %mask. 719 if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 720 (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 721 LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 722 } 723 } 724 725 VPLegalization 726 CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 727 auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 728 if (LLVM_LIKELY(!UsingTTIOverrides)) { 729 // No overrides - we are in production. 730 return VPStrat; 731 } 732 733 // Overrides set - we are in testing, the following does not need to be 734 // efficient. 735 VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 736 VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 737 return VPStrat; 738 } 739 740 VPExpansionDetails 741 CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) { 742 auto Strategy = getVPLegalizationStrategy(VPI); 743 sanitizeStrategy(VPI, Strategy); 744 745 VPExpansionDetails Changed = VPExpansionDetails::IntrinsicUnchanged; 746 747 // Transform the EVL parameter. 748 switch (Strategy.EVLParamStrategy) { 749 case VPLegalization::Legal: 750 break; 751 case VPLegalization::Discard: 752 if (discardEVLParameter(VPI)) 753 Changed = VPExpansionDetails::IntrinsicUpdated; 754 break; 755 case VPLegalization::Convert: 756 if (auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) { 757 (void)NewVPI; 758 Changed = VPExpansionDetails::IntrinsicUpdated; 759 ++NumFoldedVL; 760 } 761 break; 762 } 763 764 // Replace with a non-predicated operation. 765 switch (Strategy.OpStrategy) { 766 case VPLegalization::Legal: 767 break; 768 case VPLegalization::Discard: 769 llvm_unreachable("Invalid strategy for operators."); 770 case VPLegalization::Convert: 771 if (Value *V = expandPredication(VPI); V != &VPI) { 772 ++NumLoweredVPOps; 773 Changed = VPExpansionDetails::IntrinsicReplaced; 774 } 775 break; 776 } 777 778 return Changed; 779 } 780 } // namespace 781 782 VPExpansionDetails 783 llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI, 784 const TargetTransformInfo &TTI) { 785 return CachingVPExpander(TTI).expandVectorPredication(VPI); 786 } 787