1e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// 281ad6265SDimitry Andric // intrinsics 3e8d8bef9SDimitry Andric // 4e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 6e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7e8d8bef9SDimitry Andric // 8e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 9e8d8bef9SDimitry Andric // 10e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target 11e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the 12e8d8bef9SDimitry Andric // appropriate mask bit is set. 13e8d8bef9SDimitry Andric // 14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 15e8d8bef9SDimitry Andric 16e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" 17e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h" 18fe6060f1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 19e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h" 21e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h" 22e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h" 23e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h" 24fe6060f1SDimitry Andric #include "llvm/IR/Dominators.h" 25e8d8bef9SDimitry Andric #include "llvm/IR/Function.h" 26e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h" 27e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h" 28e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h" 29e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 30e8d8bef9SDimitry Andric #include "llvm/IR/Type.h" 31e8d8bef9SDimitry Andric #include "llvm/IR/Value.h" 32e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 33e8d8bef9SDimitry Andric #include "llvm/Pass.h" 34e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h" 35e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h" 36fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 37e8d8bef9SDimitry Andric #include <cassert> 38bdd1243dSDimitry Andric #include <optional> 39e8d8bef9SDimitry Andric 40e8d8bef9SDimitry Andric using namespace llvm; 41e8d8bef9SDimitry Andric 42e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin" 43e8d8bef9SDimitry Andric 44e8d8bef9SDimitry Andric namespace { 45e8d8bef9SDimitry Andric 46e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass { 47e8d8bef9SDimitry Andric public: 48e8d8bef9SDimitry Andric static char ID; // Pass identification, replacement for typeid 49e8d8bef9SDimitry Andric 50e8d8bef9SDimitry Andric explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) { 51e8d8bef9SDimitry Andric initializeScalarizeMaskedMemIntrinLegacyPassPass( 52e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 53e8d8bef9SDimitry Andric } 54e8d8bef9SDimitry Andric 55e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override; 56e8d8bef9SDimitry Andric 57e8d8bef9SDimitry Andric StringRef getPassName() const override { 58e8d8bef9SDimitry Andric return "Scalarize Masked Memory Intrinsics"; 59e8d8bef9SDimitry Andric } 60e8d8bef9SDimitry Andric 61e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 62e8d8bef9SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 63fe6060f1SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 64e8d8bef9SDimitry Andric } 65e8d8bef9SDimitry Andric }; 66e8d8bef9SDimitry Andric 67e8d8bef9SDimitry Andric } // end anonymous namespace 68e8d8bef9SDimitry Andric 69e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 70fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL, 71fe6060f1SDimitry Andric DomTreeUpdater *DTU); 72e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 73e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 74fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU); 75e8d8bef9SDimitry Andric 76e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0; 77e8d8bef9SDimitry Andric 78e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 79e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 80e8d8bef9SDimitry Andric false) 81e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 82fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 83e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 84e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 85e8d8bef9SDimitry Andric false) 86e8d8bef9SDimitry Andric 87e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() { 88e8d8bef9SDimitry Andric return new ScalarizeMaskedMemIntrinLegacyPass(); 89e8d8bef9SDimitry Andric } 90e8d8bef9SDimitry Andric 91e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) { 92e8d8bef9SDimitry Andric Constant *C = dyn_cast<Constant>(Mask); 93e8d8bef9SDimitry Andric if (!C) 94e8d8bef9SDimitry Andric return false; 95e8d8bef9SDimitry Andric 96e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); 97e8d8bef9SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) { 98e8d8bef9SDimitry Andric Constant *CElt = C->getAggregateElement(i); 99e8d8bef9SDimitry Andric if (!CElt || !isa<ConstantInt>(CElt)) 100e8d8bef9SDimitry Andric return false; 101e8d8bef9SDimitry Andric } 102e8d8bef9SDimitry Andric 103e8d8bef9SDimitry Andric return true; 104e8d8bef9SDimitry Andric } 105e8d8bef9SDimitry Andric 106fe6060f1SDimitry Andric static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, 107fe6060f1SDimitry Andric unsigned Idx) { 108fe6060f1SDimitry Andric return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx; 109fe6060f1SDimitry Andric } 110fe6060f1SDimitry Andric 111e8d8bef9SDimitry Andric // Translate a masked load intrinsic like 112e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, 113e8d8bef9SDimitry Andric // <16 x i1> %mask, <16 x i32> %passthru) 114e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 115e8d8bef9SDimitry Andric // the appropriate mask bit is set 116e8d8bef9SDimitry Andric // 117e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 118e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 119e8d8bef9SDimitry Andric // br i1 %2, label %cond.load, label %else 120e8d8bef9SDimitry Andric // 121e8d8bef9SDimitry Andric // cond.load: ; preds = %0 122e8d8bef9SDimitry Andric // %3 = getelementptr i32* %1, i32 0 123e8d8bef9SDimitry Andric // %4 = load i32* %3 124e8d8bef9SDimitry Andric // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 125e8d8bef9SDimitry Andric // br label %else 126e8d8bef9SDimitry Andric // 127e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.load 12806c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ] 129e8d8bef9SDimitry Andric // %6 = extractelement <16 x i1> %mask, i32 1 130e8d8bef9SDimitry Andric // br i1 %6, label %cond.load1, label %else2 131e8d8bef9SDimitry Andric // 132e8d8bef9SDimitry Andric // cond.load1: ; preds = %else 133e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 134e8d8bef9SDimitry Andric // %8 = load i32* %7 135e8d8bef9SDimitry Andric // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 136e8d8bef9SDimitry Andric // br label %else2 137e8d8bef9SDimitry Andric // 138e8d8bef9SDimitry Andric // else2: ; preds = %else, %cond.load1 139e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] 140e8d8bef9SDimitry Andric // %10 = extractelement <16 x i1> %mask, i32 2 141e8d8bef9SDimitry Andric // br i1 %10, label %cond.load4, label %else5 142e8d8bef9SDimitry Andric // 143fe6060f1SDimitry Andric static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, 144fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 145e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 146e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 147e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 148e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 149e8d8bef9SDimitry Andric 150e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 151e8d8bef9SDimitry Andric VectorType *VecType = cast<FixedVectorType>(CI->getType()); 152e8d8bef9SDimitry Andric 153e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 154e8d8bef9SDimitry Andric 155e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 156e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 157e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 158e8d8bef9SDimitry Andric 159e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 160e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 161e8d8bef9SDimitry Andric 162e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 163e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 164e8d8bef9SDimitry Andric Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); 165e8d8bef9SDimitry Andric CI->replaceAllUsesWith(NewI); 166e8d8bef9SDimitry Andric CI->eraseFromParent(); 167e8d8bef9SDimitry Andric return; 168e8d8bef9SDimitry Andric } 169e8d8bef9SDimitry Andric 170e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 171e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 172e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 173e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 174e8d8bef9SDimitry Andric 175e8d8bef9SDimitry Andric // The result vector 176e8d8bef9SDimitry Andric Value *VResult = Src0; 177e8d8bef9SDimitry Andric 178e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 179e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 180e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 181e8d8bef9SDimitry Andric continue; 18206c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 183e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 184e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, Load, Idx); 185e8d8bef9SDimitry Andric } 186e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 187e8d8bef9SDimitry Andric CI->eraseFromParent(); 188e8d8bef9SDimitry Andric return; 189e8d8bef9SDimitry Andric } 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 192e8d8bef9SDimitry Andric // better results on X86 at least. 193e8d8bef9SDimitry Andric Value *SclrMask; 194e8d8bef9SDimitry Andric if (VectorWidth != 1) { 195e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 196e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 197e8d8bef9SDimitry Andric } 198e8d8bef9SDimitry Andric 199e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 200e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 201e8d8bef9SDimitry Andric // 202e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 203e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 204e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 205e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 206e8d8bef9SDimitry Andric // 207e8d8bef9SDimitry Andric Value *Predicate; 208e8d8bef9SDimitry Andric if (VectorWidth != 1) { 209fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 210fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 211e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 212e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 213e8d8bef9SDimitry Andric } else { 214e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 215e8d8bef9SDimitry Andric } 216e8d8bef9SDimitry Andric 217e8d8bef9SDimitry Andric // Create "cond" block 218e8d8bef9SDimitry Andric // 219e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 220e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 221e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 222e8d8bef9SDimitry Andric // 223fe6060f1SDimitry Andric Instruction *ThenTerm = 224fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 225fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 226e8d8bef9SDimitry Andric 227fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 228fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 229fe6060f1SDimitry Andric 230fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 23106c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 232e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 233e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 234e8d8bef9SDimitry Andric 235e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 236fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 237fe6060f1SDimitry Andric NewIfBlock->setName("else"); 238e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 239e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 240e8d8bef9SDimitry Andric 241e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 242fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 243e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 244e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 245e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 246e8d8bef9SDimitry Andric VResult = Phi; 247e8d8bef9SDimitry Andric } 248e8d8bef9SDimitry Andric 249e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 250e8d8bef9SDimitry Andric CI->eraseFromParent(); 251e8d8bef9SDimitry Andric 252e8d8bef9SDimitry Andric ModifiedDT = true; 253e8d8bef9SDimitry Andric } 254e8d8bef9SDimitry Andric 255e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like 256e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, 257e8d8bef9SDimitry Andric // <16 x i1> %mask) 258e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 259e8d8bef9SDimitry Andric // the appropriate mask bit is set 260e8d8bef9SDimitry Andric // 261e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 262e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 263e8d8bef9SDimitry Andric // br i1 %2, label %cond.store, label %else 264e8d8bef9SDimitry Andric // 265e8d8bef9SDimitry Andric // cond.store: ; preds = %0 266e8d8bef9SDimitry Andric // %3 = extractelement <16 x i32> %val, i32 0 267e8d8bef9SDimitry Andric // %4 = getelementptr i32* %1, i32 0 268e8d8bef9SDimitry Andric // store i32 %3, i32* %4 269e8d8bef9SDimitry Andric // br label %else 270e8d8bef9SDimitry Andric // 271e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.store 272e8d8bef9SDimitry Andric // %5 = extractelement <16 x i1> %mask, i32 1 273e8d8bef9SDimitry Andric // br i1 %5, label %cond.store1, label %else2 274e8d8bef9SDimitry Andric // 275e8d8bef9SDimitry Andric // cond.store1: ; preds = %else 276e8d8bef9SDimitry Andric // %6 = extractelement <16 x i32> %val, i32 1 277e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 278e8d8bef9SDimitry Andric // store i32 %6, i32* %7 279e8d8bef9SDimitry Andric // br label %else2 280e8d8bef9SDimitry Andric // . . . 281fe6060f1SDimitry Andric static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, 282fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 283e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 284e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 285e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 286e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 287e8d8bef9SDimitry Andric 288e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 289e8d8bef9SDimitry Andric auto *VecType = cast<VectorType>(Src->getType()); 290e8d8bef9SDimitry Andric 291e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 292e8d8bef9SDimitry Andric 293e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 294e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 295e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 296e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 297e8d8bef9SDimitry Andric 298e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 299e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 300e8d8bef9SDimitry Andric Builder.CreateAlignedStore(Src, Ptr, AlignVal); 301e8d8bef9SDimitry Andric CI->eraseFromParent(); 302e8d8bef9SDimitry Andric return; 303e8d8bef9SDimitry Andric } 304e8d8bef9SDimitry Andric 305e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 306e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 307e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 308e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 309e8d8bef9SDimitry Andric 310e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 311e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 312e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 313e8d8bef9SDimitry Andric continue; 314e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 31506c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 316e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 317e8d8bef9SDimitry Andric } 318e8d8bef9SDimitry Andric CI->eraseFromParent(); 319e8d8bef9SDimitry Andric return; 320e8d8bef9SDimitry Andric } 321e8d8bef9SDimitry Andric 322e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 323e8d8bef9SDimitry Andric // better results on X86 at least. 324e8d8bef9SDimitry Andric Value *SclrMask; 325e8d8bef9SDimitry Andric if (VectorWidth != 1) { 326e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 327e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 328e8d8bef9SDimitry Andric } 329e8d8bef9SDimitry Andric 330e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 331e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 332e8d8bef9SDimitry Andric // 333e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 334e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 335e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 336e8d8bef9SDimitry Andric // 337e8d8bef9SDimitry Andric Value *Predicate; 338e8d8bef9SDimitry Andric if (VectorWidth != 1) { 339fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 340fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 341e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 342e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 343e8d8bef9SDimitry Andric } else { 344e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 345e8d8bef9SDimitry Andric } 346e8d8bef9SDimitry Andric 347e8d8bef9SDimitry Andric // Create "cond" block 348e8d8bef9SDimitry Andric // 349e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 350e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 351e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 352e8d8bef9SDimitry Andric // 353fe6060f1SDimitry Andric Instruction *ThenTerm = 354fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 355fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 356e8d8bef9SDimitry Andric 357fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 358fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 359fe6060f1SDimitry Andric 360fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 361e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 36206c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 363e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 364e8d8bef9SDimitry Andric 365e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 366fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 367fe6060f1SDimitry Andric NewIfBlock->setName("else"); 368fe6060f1SDimitry Andric 369fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 370e8d8bef9SDimitry Andric } 371e8d8bef9SDimitry Andric CI->eraseFromParent(); 372e8d8bef9SDimitry Andric 373e8d8bef9SDimitry Andric ModifiedDT = true; 374e8d8bef9SDimitry Andric } 375e8d8bef9SDimitry Andric 376e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like 377e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, 378e8d8bef9SDimitry Andric // <16 x i1> %Mask, <16 x i32> %Src) 379e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 380e8d8bef9SDimitry Andric // the appropriate mask bit is set 381e8d8bef9SDimitry Andric // 382e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind 383e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 384e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else 385e8d8bef9SDimitry Andric // 386e8d8bef9SDimitry Andric // cond.load: 387e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 388e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4 38906c3fb27SDimitry Andric // %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0 390e8d8bef9SDimitry Andric // br label %else 391e8d8bef9SDimitry Andric // 392e8d8bef9SDimitry Andric // else: 39306c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0] 394e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 395e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2 396e8d8bef9SDimitry Andric // 397e8d8bef9SDimitry Andric // cond.load1: 398e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 399e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4 400e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 401e8d8bef9SDimitry Andric // br label %else2 402e8d8bef9SDimitry Andric // . . . 403e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src 404e8d8bef9SDimitry Andric // ret <16 x i32> %Result 405fe6060f1SDimitry Andric static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, 406fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 407e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(0); 408e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 409e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 410e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 411e8d8bef9SDimitry Andric 412e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 413e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 414e8d8bef9SDimitry Andric 415e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 416e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 417e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 418e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 419e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 420e8d8bef9SDimitry Andric 421e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 422e8d8bef9SDimitry Andric 423e8d8bef9SDimitry Andric // The result vector 424e8d8bef9SDimitry Andric Value *VResult = Src0; 425e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 426e8d8bef9SDimitry Andric 427e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 428e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 429e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 430e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 431e8d8bef9SDimitry Andric continue; 432e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 433e8d8bef9SDimitry Andric LoadInst *Load = 434e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 435e8d8bef9SDimitry Andric VResult = 436e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 437e8d8bef9SDimitry Andric } 438e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 439e8d8bef9SDimitry Andric CI->eraseFromParent(); 440e8d8bef9SDimitry Andric return; 441e8d8bef9SDimitry Andric } 442e8d8bef9SDimitry Andric 443e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 444e8d8bef9SDimitry Andric // better results on X86 at least. 445e8d8bef9SDimitry Andric Value *SclrMask; 446e8d8bef9SDimitry Andric if (VectorWidth != 1) { 447e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 448e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 449e8d8bef9SDimitry Andric } 450e8d8bef9SDimitry Andric 451e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 452e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 453e8d8bef9SDimitry Andric // 454e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 455e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 456e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load, label %else 457e8d8bef9SDimitry Andric // 458e8d8bef9SDimitry Andric 459e8d8bef9SDimitry Andric Value *Predicate; 460e8d8bef9SDimitry Andric if (VectorWidth != 1) { 461fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 462fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 463e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 464e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 465e8d8bef9SDimitry Andric } else { 466e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 467e8d8bef9SDimitry Andric } 468e8d8bef9SDimitry Andric 469e8d8bef9SDimitry Andric // Create "cond" block 470e8d8bef9SDimitry Andric // 471e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 472e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 473e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 474e8d8bef9SDimitry Andric // 475fe6060f1SDimitry Andric Instruction *ThenTerm = 476fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 477fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 478e8d8bef9SDimitry Andric 479fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 480fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 481fe6060f1SDimitry Andric 482fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 483e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 484e8d8bef9SDimitry Andric LoadInst *Load = 485e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 486e8d8bef9SDimitry Andric Value *NewVResult = 487e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 488e8d8bef9SDimitry Andric 489e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 490fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 491fe6060f1SDimitry Andric NewIfBlock->setName("else"); 492e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 493e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 494e8d8bef9SDimitry Andric 495fe6060f1SDimitry Andric // Create the phi to join the new and previous value. 496fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 497e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 498e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 499e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 500e8d8bef9SDimitry Andric VResult = Phi; 501e8d8bef9SDimitry Andric } 502e8d8bef9SDimitry Andric 503e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 504e8d8bef9SDimitry Andric CI->eraseFromParent(); 505e8d8bef9SDimitry Andric 506e8d8bef9SDimitry Andric ModifiedDT = true; 507e8d8bef9SDimitry Andric } 508e8d8bef9SDimitry Andric 509e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like 510e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, 511e8d8bef9SDimitry Andric // <16 x i1> %Mask) 512e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 513e8d8bef9SDimitry Andric // the appropriate mask bit is set. 514e8d8bef9SDimitry Andric // 515e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind 516e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 517e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else 518e8d8bef9SDimitry Andric // 519e8d8bef9SDimitry Andric // cond.store: 520e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0 521e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 522e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4 523e8d8bef9SDimitry Andric // br label %else 524e8d8bef9SDimitry Andric // 525e8d8bef9SDimitry Andric // else: 526e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 527e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2 528e8d8bef9SDimitry Andric // 529e8d8bef9SDimitry Andric // cond.store1: 530e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 531e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 532e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4 533e8d8bef9SDimitry Andric // br label %else2 534e8d8bef9SDimitry Andric // . . . 535fe6060f1SDimitry Andric static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, 536fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 537e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 538e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(1); 539e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 540e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 541e8d8bef9SDimitry Andric 542e8d8bef9SDimitry Andric auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); 543e8d8bef9SDimitry Andric 544e8d8bef9SDimitry Andric assert( 545e8d8bef9SDimitry Andric isa<VectorType>(Ptrs->getType()) && 546e8d8bef9SDimitry Andric isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && 547e8d8bef9SDimitry Andric "Vector of pointers is expected in masked scatter intrinsic"); 548e8d8bef9SDimitry Andric 549e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 550e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 551e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 552e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 553e8d8bef9SDimitry Andric 554e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 555e8d8bef9SDimitry Andric unsigned VectorWidth = SrcFVTy->getNumElements(); 556e8d8bef9SDimitry Andric 557e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 558e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 559e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 560e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 561e8d8bef9SDimitry Andric continue; 562e8d8bef9SDimitry Andric Value *OneElt = 563e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 564e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 565e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 566e8d8bef9SDimitry Andric } 567e8d8bef9SDimitry Andric CI->eraseFromParent(); 568e8d8bef9SDimitry Andric return; 569e8d8bef9SDimitry Andric } 570e8d8bef9SDimitry Andric 571e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 572e8d8bef9SDimitry Andric // better results on X86 at least. 573e8d8bef9SDimitry Andric Value *SclrMask; 574e8d8bef9SDimitry Andric if (VectorWidth != 1) { 575e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 576e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 577e8d8bef9SDimitry Andric } 578e8d8bef9SDimitry Andric 579e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 580e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 581e8d8bef9SDimitry Andric // 582e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 583e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 584e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store, label %else 585e8d8bef9SDimitry Andric // 586e8d8bef9SDimitry Andric Value *Predicate; 587e8d8bef9SDimitry Andric if (VectorWidth != 1) { 588fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 589fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 590e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 591e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 592e8d8bef9SDimitry Andric } else { 593e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 594e8d8bef9SDimitry Andric } 595e8d8bef9SDimitry Andric 596e8d8bef9SDimitry Andric // Create "cond" block 597e8d8bef9SDimitry Andric // 598e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 599e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 600e8d8bef9SDimitry Andric // %store i32 %Elt1, i32* %Ptr1 601e8d8bef9SDimitry Andric // 602fe6060f1SDimitry Andric Instruction *ThenTerm = 603fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 604fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 605e8d8bef9SDimitry Andric 606fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 607fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 608fe6060f1SDimitry Andric 609fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 610e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 611e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 612e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 613e8d8bef9SDimitry Andric 614e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 615fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 616fe6060f1SDimitry Andric NewIfBlock->setName("else"); 617fe6060f1SDimitry Andric 618fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 619e8d8bef9SDimitry Andric } 620e8d8bef9SDimitry Andric CI->eraseFromParent(); 621e8d8bef9SDimitry Andric 622e8d8bef9SDimitry Andric ModifiedDT = true; 623e8d8bef9SDimitry Andric } 624e8d8bef9SDimitry Andric 625fe6060f1SDimitry Andric static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, 626fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 627e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 628e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(1); 629e8d8bef9SDimitry Andric Value *PassThru = CI->getArgOperand(2); 630*0fca6ea1SDimitry Andric Align Alignment = CI->getParamAlign(0).valueOrOne(); 631e8d8bef9SDimitry Andric 632e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 633e8d8bef9SDimitry Andric 634e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 635e8d8bef9SDimitry Andric 636e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 637e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 638e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 639e8d8bef9SDimitry Andric 640e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 641e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 642e8d8bef9SDimitry Andric 643e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 644e8d8bef9SDimitry Andric 645e8d8bef9SDimitry Andric // The result vector 646e8d8bef9SDimitry Andric Value *VResult = PassThru; 647e8d8bef9SDimitry Andric 648*0fca6ea1SDimitry Andric // Adjust alignment for the scalar instruction. 649*0fca6ea1SDimitry Andric const Align AdjustedAlignment = 650*0fca6ea1SDimitry Andric commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8); 651*0fca6ea1SDimitry Andric 652e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 65306c3fb27SDimitry Andric // Create a build_vector pattern, with loads/poisons as necessary and then 654e8d8bef9SDimitry Andric // shuffle blend with the pass through value. 655e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 656e8d8bef9SDimitry Andric unsigned MemIndex = 0; 657bdd1243dSDimitry Andric VResult = PoisonValue::get(VecType); 65806c3fb27SDimitry Andric SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem); 659e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 660e8d8bef9SDimitry Andric Value *InsertElt; 661e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) { 66206c3fb27SDimitry Andric InsertElt = PoisonValue::get(EltTy); 663e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx + VectorWidth; 664e8d8bef9SDimitry Andric } else { 665e8d8bef9SDimitry Andric Value *NewPtr = 666e8d8bef9SDimitry Andric Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 667*0fca6ea1SDimitry Andric InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment, 668e8d8bef9SDimitry Andric "Load" + Twine(Idx)); 669e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx; 670e8d8bef9SDimitry Andric ++MemIndex; 671e8d8bef9SDimitry Andric } 672e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx, 673e8d8bef9SDimitry Andric "Res" + Twine(Idx)); 674e8d8bef9SDimitry Andric } 675e8d8bef9SDimitry Andric VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask); 676e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 677e8d8bef9SDimitry Andric CI->eraseFromParent(); 678e8d8bef9SDimitry Andric return; 679e8d8bef9SDimitry Andric } 680e8d8bef9SDimitry Andric 681e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 682e8d8bef9SDimitry Andric // better results on X86 at least. 683e8d8bef9SDimitry Andric Value *SclrMask; 684e8d8bef9SDimitry Andric if (VectorWidth != 1) { 685e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 686e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 687e8d8bef9SDimitry Andric } 688e8d8bef9SDimitry Andric 689e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 690e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 691e8d8bef9SDimitry Andric // 692e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 693e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 694e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 695e8d8bef9SDimitry Andric // 696e8d8bef9SDimitry Andric 697e8d8bef9SDimitry Andric Value *Predicate; 698e8d8bef9SDimitry Andric if (VectorWidth != 1) { 699fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 700fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 701e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 702e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 703e8d8bef9SDimitry Andric } else { 704e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 705e8d8bef9SDimitry Andric } 706e8d8bef9SDimitry Andric 707e8d8bef9SDimitry Andric // Create "cond" block 708e8d8bef9SDimitry Andric // 709e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 710e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 711e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 712e8d8bef9SDimitry Andric // 713fe6060f1SDimitry Andric Instruction *ThenTerm = 714fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 715fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 716e8d8bef9SDimitry Andric 717fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 718fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 719fe6060f1SDimitry Andric 720fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 721*0fca6ea1SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AdjustedAlignment); 722e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 723e8d8bef9SDimitry Andric 724e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 725e8d8bef9SDimitry Andric Value *NewPtr; 726e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 727e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 728e8d8bef9SDimitry Andric 729e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 730fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 731fe6060f1SDimitry Andric NewIfBlock->setName("else"); 732e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 733e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 734e8d8bef9SDimitry Andric 735e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 736fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 737e8d8bef9SDimitry Andric PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 738e8d8bef9SDimitry Andric ResultPhi->addIncoming(NewVResult, CondBlock); 739e8d8bef9SDimitry Andric ResultPhi->addIncoming(VResult, PrevIfBlock); 740e8d8bef9SDimitry Andric VResult = ResultPhi; 741e8d8bef9SDimitry Andric 742e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 743e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 744e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 745e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 746e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 747e8d8bef9SDimitry Andric Ptr = PtrPhi; 748e8d8bef9SDimitry Andric } 749e8d8bef9SDimitry Andric } 750e8d8bef9SDimitry Andric 751e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 752e8d8bef9SDimitry Andric CI->eraseFromParent(); 753e8d8bef9SDimitry Andric 754e8d8bef9SDimitry Andric ModifiedDT = true; 755e8d8bef9SDimitry Andric } 756e8d8bef9SDimitry Andric 757fe6060f1SDimitry Andric static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, 758fe6060f1SDimitry Andric DomTreeUpdater *DTU, 759fe6060f1SDimitry Andric bool &ModifiedDT) { 760e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 761e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 762e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 763*0fca6ea1SDimitry Andric Align Alignment = CI->getParamAlign(1).valueOrOne(); 764e8d8bef9SDimitry Andric 765e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(Src->getType()); 766e8d8bef9SDimitry Andric 767e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 768e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 769e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 770e8d8bef9SDimitry Andric 771e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 772e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 773e8d8bef9SDimitry Andric 774e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 775e8d8bef9SDimitry Andric 776*0fca6ea1SDimitry Andric // Adjust alignment for the scalar instruction. 777*0fca6ea1SDimitry Andric const Align AdjustedAlignment = 778*0fca6ea1SDimitry Andric commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8); 779*0fca6ea1SDimitry Andric 780e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 781e8d8bef9SDimitry Andric 782e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 783e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 784e8d8bef9SDimitry Andric unsigned MemIndex = 0; 785e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 786e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 787e8d8bef9SDimitry Andric continue; 788e8d8bef9SDimitry Andric Value *OneElt = 789e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 790e8d8bef9SDimitry Andric Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 791*0fca6ea1SDimitry Andric Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment); 792e8d8bef9SDimitry Andric ++MemIndex; 793e8d8bef9SDimitry Andric } 794e8d8bef9SDimitry Andric CI->eraseFromParent(); 795e8d8bef9SDimitry Andric return; 796e8d8bef9SDimitry Andric } 797e8d8bef9SDimitry Andric 798e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 799e8d8bef9SDimitry Andric // better results on X86 at least. 800e8d8bef9SDimitry Andric Value *SclrMask; 801e8d8bef9SDimitry Andric if (VectorWidth != 1) { 802e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 803e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 804e8d8bef9SDimitry Andric } 805e8d8bef9SDimitry Andric 806e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 807e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 808e8d8bef9SDimitry Andric // 809e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 810e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 811e8d8bef9SDimitry Andric // 812e8d8bef9SDimitry Andric Value *Predicate; 813e8d8bef9SDimitry Andric if (VectorWidth != 1) { 814fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 815fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 816e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 817e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 818e8d8bef9SDimitry Andric } else { 819e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 820e8d8bef9SDimitry Andric } 821e8d8bef9SDimitry Andric 822e8d8bef9SDimitry Andric // Create "cond" block 823e8d8bef9SDimitry Andric // 824e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 825e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 826e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 827e8d8bef9SDimitry Andric // 828fe6060f1SDimitry Andric Instruction *ThenTerm = 829fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 830fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 831e8d8bef9SDimitry Andric 832fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 833fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 834fe6060f1SDimitry Andric 835fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 836e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 837*0fca6ea1SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AdjustedAlignment); 838e8d8bef9SDimitry Andric 839e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 840e8d8bef9SDimitry Andric Value *NewPtr; 841e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 842e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 843e8d8bef9SDimitry Andric 844e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 845fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 846fe6060f1SDimitry Andric NewIfBlock->setName("else"); 847e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 848e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 849e8d8bef9SDimitry Andric 850fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 851fe6060f1SDimitry Andric 852e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 853e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 854e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 855e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 856e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 857e8d8bef9SDimitry Andric Ptr = PtrPhi; 858e8d8bef9SDimitry Andric } 859e8d8bef9SDimitry Andric } 860e8d8bef9SDimitry Andric CI->eraseFromParent(); 861e8d8bef9SDimitry Andric 862e8d8bef9SDimitry Andric ModifiedDT = true; 863e8d8bef9SDimitry Andric } 864e8d8bef9SDimitry Andric 865*0fca6ea1SDimitry Andric static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI, 866*0fca6ea1SDimitry Andric DomTreeUpdater *DTU, 867*0fca6ea1SDimitry Andric bool &ModifiedDT) { 868*0fca6ea1SDimitry Andric // If we extend histogram to return a result someday (like the updated vector) 869*0fca6ea1SDimitry Andric // then we'll need to support it here. 870*0fca6ea1SDimitry Andric assert(CI->getType()->isVoidTy() && "Histogram with non-void return."); 871*0fca6ea1SDimitry Andric Value *Ptrs = CI->getArgOperand(0); 872*0fca6ea1SDimitry Andric Value *Inc = CI->getArgOperand(1); 873*0fca6ea1SDimitry Andric Value *Mask = CI->getArgOperand(2); 874*0fca6ea1SDimitry Andric 875*0fca6ea1SDimitry Andric auto *AddrType = cast<FixedVectorType>(Ptrs->getType()); 876*0fca6ea1SDimitry Andric Type *EltTy = Inc->getType(); 877*0fca6ea1SDimitry Andric 878*0fca6ea1SDimitry Andric IRBuilder<> Builder(CI->getContext()); 879*0fca6ea1SDimitry Andric Instruction *InsertPt = CI; 880*0fca6ea1SDimitry Andric Builder.SetInsertPoint(InsertPt); 881*0fca6ea1SDimitry Andric 882*0fca6ea1SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 883*0fca6ea1SDimitry Andric 884*0fca6ea1SDimitry Andric // FIXME: Do we need to add an alignment parameter to the intrinsic? 885*0fca6ea1SDimitry Andric unsigned VectorWidth = AddrType->getNumElements(); 886*0fca6ea1SDimitry Andric 887*0fca6ea1SDimitry Andric // Shorten the way if the mask is a vector of constants. 888*0fca6ea1SDimitry Andric if (isConstantIntVector(Mask)) { 889*0fca6ea1SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 890*0fca6ea1SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 891*0fca6ea1SDimitry Andric continue; 892*0fca6ea1SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 893*0fca6ea1SDimitry Andric LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx)); 894*0fca6ea1SDimitry Andric Value *Add = Builder.CreateAdd(Load, Inc); 895*0fca6ea1SDimitry Andric Builder.CreateStore(Add, Ptr); 896*0fca6ea1SDimitry Andric } 897*0fca6ea1SDimitry Andric CI->eraseFromParent(); 898*0fca6ea1SDimitry Andric return; 899*0fca6ea1SDimitry Andric } 900*0fca6ea1SDimitry Andric 901*0fca6ea1SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 902*0fca6ea1SDimitry Andric Value *Predicate = 903*0fca6ea1SDimitry Andric Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 904*0fca6ea1SDimitry Andric 905*0fca6ea1SDimitry Andric Instruction *ThenTerm = 906*0fca6ea1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 907*0fca6ea1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 908*0fca6ea1SDimitry Andric 909*0fca6ea1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 910*0fca6ea1SDimitry Andric CondBlock->setName("cond.histogram.update"); 911*0fca6ea1SDimitry Andric 912*0fca6ea1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 913*0fca6ea1SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 914*0fca6ea1SDimitry Andric LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx)); 915*0fca6ea1SDimitry Andric Value *Add = Builder.CreateAdd(Load, Inc); 916*0fca6ea1SDimitry Andric Builder.CreateStore(Add, Ptr); 917*0fca6ea1SDimitry Andric 918*0fca6ea1SDimitry Andric // Create "else" block, fill it in the next iteration 919*0fca6ea1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 920*0fca6ea1SDimitry Andric NewIfBlock->setName("else"); 921*0fca6ea1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 922*0fca6ea1SDimitry Andric } 923*0fca6ea1SDimitry Andric 924*0fca6ea1SDimitry Andric CI->eraseFromParent(); 925*0fca6ea1SDimitry Andric ModifiedDT = true; 926*0fca6ea1SDimitry Andric } 927*0fca6ea1SDimitry Andric 928fe6060f1SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI, 929fe6060f1SDimitry Andric DominatorTree *DT) { 930bdd1243dSDimitry Andric std::optional<DomTreeUpdater> DTU; 931fe6060f1SDimitry Andric if (DT) 932fe6060f1SDimitry Andric DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); 933fe6060f1SDimitry Andric 934e8d8bef9SDimitry Andric bool EverMadeChange = false; 935e8d8bef9SDimitry Andric bool MadeChange = true; 936*0fca6ea1SDimitry Andric auto &DL = F.getDataLayout(); 937e8d8bef9SDimitry Andric while (MadeChange) { 938e8d8bef9SDimitry Andric MadeChange = false; 939349cc55cSDimitry Andric for (BasicBlock &BB : llvm::make_early_inc_range(F)) { 940e8d8bef9SDimitry Andric bool ModifiedDTOnIteration = false; 941349cc55cSDimitry Andric MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL, 942bdd1243dSDimitry Andric DTU ? &*DTU : nullptr); 943fe6060f1SDimitry Andric 944e8d8bef9SDimitry Andric // Restart BB iteration if the dominator tree of the Function was changed 945e8d8bef9SDimitry Andric if (ModifiedDTOnIteration) 946e8d8bef9SDimitry Andric break; 947e8d8bef9SDimitry Andric } 948e8d8bef9SDimitry Andric 949e8d8bef9SDimitry Andric EverMadeChange |= MadeChange; 950e8d8bef9SDimitry Andric } 951e8d8bef9SDimitry Andric return EverMadeChange; 952e8d8bef9SDimitry Andric } 953e8d8bef9SDimitry Andric 954e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) { 955e8d8bef9SDimitry Andric auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 956fe6060f1SDimitry Andric DominatorTree *DT = nullptr; 957fe6060f1SDimitry Andric if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 958fe6060f1SDimitry Andric DT = &DTWP->getDomTree(); 959fe6060f1SDimitry Andric return runImpl(F, TTI, DT); 960e8d8bef9SDimitry Andric } 961e8d8bef9SDimitry Andric 962e8d8bef9SDimitry Andric PreservedAnalyses 963e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) { 964e8d8bef9SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F); 965fe6060f1SDimitry Andric auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); 966fe6060f1SDimitry Andric if (!runImpl(F, TTI, DT)) 967e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 968e8d8bef9SDimitry Andric PreservedAnalyses PA; 969e8d8bef9SDimitry Andric PA.preserve<TargetIRAnalysis>(); 970fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 971e8d8bef9SDimitry Andric return PA; 972e8d8bef9SDimitry Andric } 973e8d8bef9SDimitry Andric 974e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 975fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL, 976fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 977e8d8bef9SDimitry Andric bool MadeChange = false; 978e8d8bef9SDimitry Andric 979e8d8bef9SDimitry Andric BasicBlock::iterator CurInstIterator = BB.begin(); 980e8d8bef9SDimitry Andric while (CurInstIterator != BB.end()) { 981e8d8bef9SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) 982fe6060f1SDimitry Andric MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU); 983e8d8bef9SDimitry Andric if (ModifiedDT) 984e8d8bef9SDimitry Andric return true; 985e8d8bef9SDimitry Andric } 986e8d8bef9SDimitry Andric 987e8d8bef9SDimitry Andric return MadeChange; 988e8d8bef9SDimitry Andric } 989e8d8bef9SDimitry Andric 990e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 991e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 992fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU) { 993e8d8bef9SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 994e8d8bef9SDimitry Andric if (II) { 995e8d8bef9SDimitry Andric // The scalarization code below does not work for scalable vectors. 996e8d8bef9SDimitry Andric if (isa<ScalableVectorType>(II->getType()) || 997349cc55cSDimitry Andric any_of(II->args(), 998e8d8bef9SDimitry Andric [](Value *V) { return isa<ScalableVectorType>(V->getType()); })) 999e8d8bef9SDimitry Andric return false; 1000e8d8bef9SDimitry Andric 1001e8d8bef9SDimitry Andric switch (II->getIntrinsicID()) { 1002e8d8bef9SDimitry Andric default: 1003e8d8bef9SDimitry Andric break; 1004*0fca6ea1SDimitry Andric case Intrinsic::experimental_vector_histogram_add: 1005*0fca6ea1SDimitry Andric if (TTI.isLegalMaskedVectorHistogram(CI->getArgOperand(0)->getType(), 1006*0fca6ea1SDimitry Andric CI->getArgOperand(1)->getType())) 1007*0fca6ea1SDimitry Andric return false; 1008*0fca6ea1SDimitry Andric scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT); 1009*0fca6ea1SDimitry Andric return true; 1010e8d8bef9SDimitry Andric case Intrinsic::masked_load: 1011e8d8bef9SDimitry Andric // Scalarize unsupported vector masked load 1012e8d8bef9SDimitry Andric if (TTI.isLegalMaskedLoad( 1013e8d8bef9SDimitry Andric CI->getType(), 1014e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) 1015e8d8bef9SDimitry Andric return false; 1016fe6060f1SDimitry Andric scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT); 1017e8d8bef9SDimitry Andric return true; 1018e8d8bef9SDimitry Andric case Intrinsic::masked_store: 1019e8d8bef9SDimitry Andric if (TTI.isLegalMaskedStore( 1020e8d8bef9SDimitry Andric CI->getArgOperand(0)->getType(), 1021e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) 1022e8d8bef9SDimitry Andric return false; 1023fe6060f1SDimitry Andric scalarizeMaskedStore(DL, CI, DTU, ModifiedDT); 1024e8d8bef9SDimitry Andric return true; 1025e8d8bef9SDimitry Andric case Intrinsic::masked_gather: { 1026fe6060f1SDimitry Andric MaybeAlign MA = 1027fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue(); 1028e8d8bef9SDimitry Andric Type *LoadTy = CI->getType(); 1029fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA, 1030fe6060f1SDimitry Andric LoadTy->getScalarType()); 103104eeddc0SDimitry Andric if (TTI.isLegalMaskedGather(LoadTy, Alignment) && 103204eeddc0SDimitry Andric !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment)) 1033e8d8bef9SDimitry Andric return false; 1034fe6060f1SDimitry Andric scalarizeMaskedGather(DL, CI, DTU, ModifiedDT); 1035e8d8bef9SDimitry Andric return true; 1036e8d8bef9SDimitry Andric } 1037e8d8bef9SDimitry Andric case Intrinsic::masked_scatter: { 1038fe6060f1SDimitry Andric MaybeAlign MA = 1039fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue(); 1040e8d8bef9SDimitry Andric Type *StoreTy = CI->getArgOperand(0)->getType(); 1041fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA, 1042fe6060f1SDimitry Andric StoreTy->getScalarType()); 104304eeddc0SDimitry Andric if (TTI.isLegalMaskedScatter(StoreTy, Alignment) && 104404eeddc0SDimitry Andric !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy), 104504eeddc0SDimitry Andric Alignment)) 1046e8d8bef9SDimitry Andric return false; 1047fe6060f1SDimitry Andric scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT); 1048e8d8bef9SDimitry Andric return true; 1049e8d8bef9SDimitry Andric } 1050e8d8bef9SDimitry Andric case Intrinsic::masked_expandload: 1051*0fca6ea1SDimitry Andric if (TTI.isLegalMaskedExpandLoad( 1052*0fca6ea1SDimitry Andric CI->getType(), 1053*0fca6ea1SDimitry Andric CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne())) 1054e8d8bef9SDimitry Andric return false; 1055fe6060f1SDimitry Andric scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT); 1056e8d8bef9SDimitry Andric return true; 1057e8d8bef9SDimitry Andric case Intrinsic::masked_compressstore: 1058*0fca6ea1SDimitry Andric if (TTI.isLegalMaskedCompressStore( 1059*0fca6ea1SDimitry Andric CI->getArgOperand(0)->getType(), 1060*0fca6ea1SDimitry Andric CI->getAttributes().getParamAttrs(1).getAlignment().valueOrOne())) 1061e8d8bef9SDimitry Andric return false; 1062fe6060f1SDimitry Andric scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT); 1063e8d8bef9SDimitry Andric return true; 1064e8d8bef9SDimitry Andric } 1065e8d8bef9SDimitry Andric } 1066e8d8bef9SDimitry Andric 1067e8d8bef9SDimitry Andric return false; 1068e8d8bef9SDimitry Andric } 1069