1*e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// 2*e8d8bef9SDimitry Andric // instrinsics 3*e8d8bef9SDimitry Andric // 4*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 6*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7*e8d8bef9SDimitry Andric // 8*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 9*e8d8bef9SDimitry Andric // 10*e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target 11*e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the 12*e8d8bef9SDimitry Andric // appropriate mask bit is set. 13*e8d8bef9SDimitry Andric // 14*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 15*e8d8bef9SDimitry Andric 16*e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" 17*e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h" 18*e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 19*e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h" 20*e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h" 21*e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h" 22*e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h" 23*e8d8bef9SDimitry Andric #include "llvm/IR/Function.h" 24*e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h" 25*e8d8bef9SDimitry Andric #include "llvm/IR/InstrTypes.h" 26*e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h" 27*e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h" 28*e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 29*e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h" 30*e8d8bef9SDimitry Andric #include "llvm/IR/Type.h" 31*e8d8bef9SDimitry Andric #include "llvm/IR/Value.h" 32*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 33*e8d8bef9SDimitry Andric #include "llvm/Pass.h" 34*e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h" 35*e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h" 36*e8d8bef9SDimitry Andric #include <algorithm> 37*e8d8bef9SDimitry Andric #include <cassert> 38*e8d8bef9SDimitry Andric 39*e8d8bef9SDimitry Andric using namespace llvm; 40*e8d8bef9SDimitry Andric 41*e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin" 42*e8d8bef9SDimitry Andric 43*e8d8bef9SDimitry Andric namespace { 44*e8d8bef9SDimitry Andric 45*e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass { 46*e8d8bef9SDimitry Andric public: 47*e8d8bef9SDimitry Andric static char ID; // Pass identification, replacement for typeid 48*e8d8bef9SDimitry Andric 49*e8d8bef9SDimitry Andric explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) { 50*e8d8bef9SDimitry Andric initializeScalarizeMaskedMemIntrinLegacyPassPass( 51*e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 52*e8d8bef9SDimitry Andric } 53*e8d8bef9SDimitry Andric 54*e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override; 55*e8d8bef9SDimitry Andric 56*e8d8bef9SDimitry Andric StringRef getPassName() const override { 57*e8d8bef9SDimitry Andric return "Scalarize Masked Memory Intrinsics"; 58*e8d8bef9SDimitry Andric } 59*e8d8bef9SDimitry Andric 60*e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 61*e8d8bef9SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 62*e8d8bef9SDimitry Andric } 63*e8d8bef9SDimitry Andric }; 64*e8d8bef9SDimitry Andric 65*e8d8bef9SDimitry Andric } // end anonymous namespace 66*e8d8bef9SDimitry Andric 67*e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 68*e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL); 69*e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 70*e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 71*e8d8bef9SDimitry Andric const DataLayout &DL); 72*e8d8bef9SDimitry Andric 73*e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0; 74*e8d8bef9SDimitry Andric 75*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 76*e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 77*e8d8bef9SDimitry Andric false) 78*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 79*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 80*e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 81*e8d8bef9SDimitry Andric false) 82*e8d8bef9SDimitry Andric 83*e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() { 84*e8d8bef9SDimitry Andric return new ScalarizeMaskedMemIntrinLegacyPass(); 85*e8d8bef9SDimitry Andric } 86*e8d8bef9SDimitry Andric 87*e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) { 88*e8d8bef9SDimitry Andric Constant *C = dyn_cast<Constant>(Mask); 89*e8d8bef9SDimitry Andric if (!C) 90*e8d8bef9SDimitry Andric return false; 91*e8d8bef9SDimitry Andric 92*e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); 93*e8d8bef9SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) { 94*e8d8bef9SDimitry Andric Constant *CElt = C->getAggregateElement(i); 95*e8d8bef9SDimitry Andric if (!CElt || !isa<ConstantInt>(CElt)) 96*e8d8bef9SDimitry Andric return false; 97*e8d8bef9SDimitry Andric } 98*e8d8bef9SDimitry Andric 99*e8d8bef9SDimitry Andric return true; 100*e8d8bef9SDimitry Andric } 101*e8d8bef9SDimitry Andric 102*e8d8bef9SDimitry Andric // Translate a masked load intrinsic like 103*e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, 104*e8d8bef9SDimitry Andric // <16 x i1> %mask, <16 x i32> %passthru) 105*e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 106*e8d8bef9SDimitry Andric // the appropriate mask bit is set 107*e8d8bef9SDimitry Andric // 108*e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 109*e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 110*e8d8bef9SDimitry Andric // br i1 %2, label %cond.load, label %else 111*e8d8bef9SDimitry Andric // 112*e8d8bef9SDimitry Andric // cond.load: ; preds = %0 113*e8d8bef9SDimitry Andric // %3 = getelementptr i32* %1, i32 0 114*e8d8bef9SDimitry Andric // %4 = load i32* %3 115*e8d8bef9SDimitry Andric // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 116*e8d8bef9SDimitry Andric // br label %else 117*e8d8bef9SDimitry Andric // 118*e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.load 119*e8d8bef9SDimitry Andric // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] 120*e8d8bef9SDimitry Andric // %6 = extractelement <16 x i1> %mask, i32 1 121*e8d8bef9SDimitry Andric // br i1 %6, label %cond.load1, label %else2 122*e8d8bef9SDimitry Andric // 123*e8d8bef9SDimitry Andric // cond.load1: ; preds = %else 124*e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 125*e8d8bef9SDimitry Andric // %8 = load i32* %7 126*e8d8bef9SDimitry Andric // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 127*e8d8bef9SDimitry Andric // br label %else2 128*e8d8bef9SDimitry Andric // 129*e8d8bef9SDimitry Andric // else2: ; preds = %else, %cond.load1 130*e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] 131*e8d8bef9SDimitry Andric // %10 = extractelement <16 x i1> %mask, i32 2 132*e8d8bef9SDimitry Andric // br i1 %10, label %cond.load4, label %else5 133*e8d8bef9SDimitry Andric // 134*e8d8bef9SDimitry Andric static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { 135*e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 136*e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 137*e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 138*e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 139*e8d8bef9SDimitry Andric 140*e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 141*e8d8bef9SDimitry Andric VectorType *VecType = cast<FixedVectorType>(CI->getType()); 142*e8d8bef9SDimitry Andric 143*e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 144*e8d8bef9SDimitry Andric 145*e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 146*e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 147*e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 148*e8d8bef9SDimitry Andric 149*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 150*e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 151*e8d8bef9SDimitry Andric 152*e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 153*e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 154*e8d8bef9SDimitry Andric Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); 155*e8d8bef9SDimitry Andric CI->replaceAllUsesWith(NewI); 156*e8d8bef9SDimitry Andric CI->eraseFromParent(); 157*e8d8bef9SDimitry Andric return; 158*e8d8bef9SDimitry Andric } 159*e8d8bef9SDimitry Andric 160*e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 161*e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 162*e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 163*e8d8bef9SDimitry Andric // Bitcast %addr from i8* to EltTy* 164*e8d8bef9SDimitry Andric Type *NewPtrType = 165*e8d8bef9SDimitry Andric EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); 166*e8d8bef9SDimitry Andric Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); 167*e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 168*e8d8bef9SDimitry Andric 169*e8d8bef9SDimitry Andric // The result vector 170*e8d8bef9SDimitry Andric Value *VResult = Src0; 171*e8d8bef9SDimitry Andric 172*e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 173*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 174*e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 175*e8d8bef9SDimitry Andric continue; 176*e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 177*e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 178*e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, Load, Idx); 179*e8d8bef9SDimitry Andric } 180*e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 181*e8d8bef9SDimitry Andric CI->eraseFromParent(); 182*e8d8bef9SDimitry Andric return; 183*e8d8bef9SDimitry Andric } 184*e8d8bef9SDimitry Andric 185*e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 186*e8d8bef9SDimitry Andric // better results on X86 at least. 187*e8d8bef9SDimitry Andric Value *SclrMask; 188*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 189*e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 190*e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 191*e8d8bef9SDimitry Andric } 192*e8d8bef9SDimitry Andric 193*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 194*e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 195*e8d8bef9SDimitry Andric // 196*e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 197*e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 198*e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 199*e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 200*e8d8bef9SDimitry Andric // 201*e8d8bef9SDimitry Andric Value *Predicate; 202*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 203*e8d8bef9SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); 204*e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 205*e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 206*e8d8bef9SDimitry Andric } else { 207*e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 208*e8d8bef9SDimitry Andric } 209*e8d8bef9SDimitry Andric 210*e8d8bef9SDimitry Andric // Create "cond" block 211*e8d8bef9SDimitry Andric // 212*e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 213*e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 214*e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 215*e8d8bef9SDimitry Andric // 216*e8d8bef9SDimitry Andric BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), 217*e8d8bef9SDimitry Andric "cond.load"); 218*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 219*e8d8bef9SDimitry Andric 220*e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 221*e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 222*e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 223*e8d8bef9SDimitry Andric 224*e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 225*e8d8bef9SDimitry Andric BasicBlock *NewIfBlock = 226*e8d8bef9SDimitry Andric CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); 227*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 228*e8d8bef9SDimitry Andric Instruction *OldBr = IfBlock->getTerminator(); 229*e8d8bef9SDimitry Andric BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); 230*e8d8bef9SDimitry Andric OldBr->eraseFromParent(); 231*e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 232*e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 233*e8d8bef9SDimitry Andric 234*e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 235*e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 236*e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 237*e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 238*e8d8bef9SDimitry Andric VResult = Phi; 239*e8d8bef9SDimitry Andric } 240*e8d8bef9SDimitry Andric 241*e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 242*e8d8bef9SDimitry Andric CI->eraseFromParent(); 243*e8d8bef9SDimitry Andric 244*e8d8bef9SDimitry Andric ModifiedDT = true; 245*e8d8bef9SDimitry Andric } 246*e8d8bef9SDimitry Andric 247*e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like 248*e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, 249*e8d8bef9SDimitry Andric // <16 x i1> %mask) 250*e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 251*e8d8bef9SDimitry Andric // the appropriate mask bit is set 252*e8d8bef9SDimitry Andric // 253*e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 254*e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 255*e8d8bef9SDimitry Andric // br i1 %2, label %cond.store, label %else 256*e8d8bef9SDimitry Andric // 257*e8d8bef9SDimitry Andric // cond.store: ; preds = %0 258*e8d8bef9SDimitry Andric // %3 = extractelement <16 x i32> %val, i32 0 259*e8d8bef9SDimitry Andric // %4 = getelementptr i32* %1, i32 0 260*e8d8bef9SDimitry Andric // store i32 %3, i32* %4 261*e8d8bef9SDimitry Andric // br label %else 262*e8d8bef9SDimitry Andric // 263*e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.store 264*e8d8bef9SDimitry Andric // %5 = extractelement <16 x i1> %mask, i32 1 265*e8d8bef9SDimitry Andric // br i1 %5, label %cond.store1, label %else2 266*e8d8bef9SDimitry Andric // 267*e8d8bef9SDimitry Andric // cond.store1: ; preds = %else 268*e8d8bef9SDimitry Andric // %6 = extractelement <16 x i32> %val, i32 1 269*e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 270*e8d8bef9SDimitry Andric // store i32 %6, i32* %7 271*e8d8bef9SDimitry Andric // br label %else2 272*e8d8bef9SDimitry Andric // . . . 273*e8d8bef9SDimitry Andric static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { 274*e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 275*e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 276*e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 277*e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 278*e8d8bef9SDimitry Andric 279*e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 280*e8d8bef9SDimitry Andric auto *VecType = cast<VectorType>(Src->getType()); 281*e8d8bef9SDimitry Andric 282*e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 283*e8d8bef9SDimitry Andric 284*e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 285*e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 286*e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 287*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 288*e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 289*e8d8bef9SDimitry Andric 290*e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 291*e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 292*e8d8bef9SDimitry Andric Builder.CreateAlignedStore(Src, Ptr, AlignVal); 293*e8d8bef9SDimitry Andric CI->eraseFromParent(); 294*e8d8bef9SDimitry Andric return; 295*e8d8bef9SDimitry Andric } 296*e8d8bef9SDimitry Andric 297*e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 298*e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 299*e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 300*e8d8bef9SDimitry Andric // Bitcast %addr from i8* to EltTy* 301*e8d8bef9SDimitry Andric Type *NewPtrType = 302*e8d8bef9SDimitry Andric EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); 303*e8d8bef9SDimitry Andric Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); 304*e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 305*e8d8bef9SDimitry Andric 306*e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 307*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 308*e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 309*e8d8bef9SDimitry Andric continue; 310*e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 311*e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 312*e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 313*e8d8bef9SDimitry Andric } 314*e8d8bef9SDimitry Andric CI->eraseFromParent(); 315*e8d8bef9SDimitry Andric return; 316*e8d8bef9SDimitry Andric } 317*e8d8bef9SDimitry Andric 318*e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 319*e8d8bef9SDimitry Andric // better results on X86 at least. 320*e8d8bef9SDimitry Andric Value *SclrMask; 321*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 322*e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 323*e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 324*e8d8bef9SDimitry Andric } 325*e8d8bef9SDimitry Andric 326*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 327*e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 328*e8d8bef9SDimitry Andric // 329*e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 330*e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 331*e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 332*e8d8bef9SDimitry Andric // 333*e8d8bef9SDimitry Andric Value *Predicate; 334*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 335*e8d8bef9SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); 336*e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 337*e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 338*e8d8bef9SDimitry Andric } else { 339*e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 340*e8d8bef9SDimitry Andric } 341*e8d8bef9SDimitry Andric 342*e8d8bef9SDimitry Andric // Create "cond" block 343*e8d8bef9SDimitry Andric // 344*e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 345*e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 346*e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 347*e8d8bef9SDimitry Andric // 348*e8d8bef9SDimitry Andric BasicBlock *CondBlock = 349*e8d8bef9SDimitry Andric IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); 350*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 351*e8d8bef9SDimitry Andric 352*e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 353*e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 354*e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 355*e8d8bef9SDimitry Andric 356*e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 357*e8d8bef9SDimitry Andric BasicBlock *NewIfBlock = 358*e8d8bef9SDimitry Andric CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); 359*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 360*e8d8bef9SDimitry Andric Instruction *OldBr = IfBlock->getTerminator(); 361*e8d8bef9SDimitry Andric BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); 362*e8d8bef9SDimitry Andric OldBr->eraseFromParent(); 363*e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 364*e8d8bef9SDimitry Andric } 365*e8d8bef9SDimitry Andric CI->eraseFromParent(); 366*e8d8bef9SDimitry Andric 367*e8d8bef9SDimitry Andric ModifiedDT = true; 368*e8d8bef9SDimitry Andric } 369*e8d8bef9SDimitry Andric 370*e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like 371*e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, 372*e8d8bef9SDimitry Andric // <16 x i1> %Mask, <16 x i32> %Src) 373*e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 374*e8d8bef9SDimitry Andric // the appropriate mask bit is set 375*e8d8bef9SDimitry Andric // 376*e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind 377*e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 378*e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else 379*e8d8bef9SDimitry Andric // 380*e8d8bef9SDimitry Andric // cond.load: 381*e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 382*e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4 383*e8d8bef9SDimitry Andric // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 384*e8d8bef9SDimitry Andric // br label %else 385*e8d8bef9SDimitry Andric // 386*e8d8bef9SDimitry Andric // else: 387*e8d8bef9SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] 388*e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 389*e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2 390*e8d8bef9SDimitry Andric // 391*e8d8bef9SDimitry Andric // cond.load1: 392*e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 393*e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4 394*e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 395*e8d8bef9SDimitry Andric // br label %else2 396*e8d8bef9SDimitry Andric // . . . 397*e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src 398*e8d8bef9SDimitry Andric // ret <16 x i32> %Result 399*e8d8bef9SDimitry Andric static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { 400*e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(0); 401*e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 402*e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 403*e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 404*e8d8bef9SDimitry Andric 405*e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 406*e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 407*e8d8bef9SDimitry Andric 408*e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 409*e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 410*e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 411*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 412*e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 413*e8d8bef9SDimitry Andric 414*e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 415*e8d8bef9SDimitry Andric 416*e8d8bef9SDimitry Andric // The result vector 417*e8d8bef9SDimitry Andric Value *VResult = Src0; 418*e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 419*e8d8bef9SDimitry Andric 420*e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 421*e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 422*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 423*e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 424*e8d8bef9SDimitry Andric continue; 425*e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 426*e8d8bef9SDimitry Andric LoadInst *Load = 427*e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 428*e8d8bef9SDimitry Andric VResult = 429*e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 430*e8d8bef9SDimitry Andric } 431*e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 432*e8d8bef9SDimitry Andric CI->eraseFromParent(); 433*e8d8bef9SDimitry Andric return; 434*e8d8bef9SDimitry Andric } 435*e8d8bef9SDimitry Andric 436*e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 437*e8d8bef9SDimitry Andric // better results on X86 at least. 438*e8d8bef9SDimitry Andric Value *SclrMask; 439*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 440*e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 441*e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 442*e8d8bef9SDimitry Andric } 443*e8d8bef9SDimitry Andric 444*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 445*e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 446*e8d8bef9SDimitry Andric // 447*e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 448*e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 449*e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load, label %else 450*e8d8bef9SDimitry Andric // 451*e8d8bef9SDimitry Andric 452*e8d8bef9SDimitry Andric Value *Predicate; 453*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 454*e8d8bef9SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); 455*e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 456*e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 457*e8d8bef9SDimitry Andric } else { 458*e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 459*e8d8bef9SDimitry Andric } 460*e8d8bef9SDimitry Andric 461*e8d8bef9SDimitry Andric // Create "cond" block 462*e8d8bef9SDimitry Andric // 463*e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 464*e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 465*e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 466*e8d8bef9SDimitry Andric // 467*e8d8bef9SDimitry Andric BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); 468*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 469*e8d8bef9SDimitry Andric 470*e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 471*e8d8bef9SDimitry Andric LoadInst *Load = 472*e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 473*e8d8bef9SDimitry Andric Value *NewVResult = 474*e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 475*e8d8bef9SDimitry Andric 476*e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 477*e8d8bef9SDimitry Andric BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); 478*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 479*e8d8bef9SDimitry Andric Instruction *OldBr = IfBlock->getTerminator(); 480*e8d8bef9SDimitry Andric BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); 481*e8d8bef9SDimitry Andric OldBr->eraseFromParent(); 482*e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 483*e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 484*e8d8bef9SDimitry Andric 485*e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 486*e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 487*e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 488*e8d8bef9SDimitry Andric VResult = Phi; 489*e8d8bef9SDimitry Andric } 490*e8d8bef9SDimitry Andric 491*e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 492*e8d8bef9SDimitry Andric CI->eraseFromParent(); 493*e8d8bef9SDimitry Andric 494*e8d8bef9SDimitry Andric ModifiedDT = true; 495*e8d8bef9SDimitry Andric } 496*e8d8bef9SDimitry Andric 497*e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like 498*e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, 499*e8d8bef9SDimitry Andric // <16 x i1> %Mask) 500*e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 501*e8d8bef9SDimitry Andric // the appropriate mask bit is set. 502*e8d8bef9SDimitry Andric // 503*e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind 504*e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 505*e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else 506*e8d8bef9SDimitry Andric // 507*e8d8bef9SDimitry Andric // cond.store: 508*e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0 509*e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 510*e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4 511*e8d8bef9SDimitry Andric // br label %else 512*e8d8bef9SDimitry Andric // 513*e8d8bef9SDimitry Andric // else: 514*e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 515*e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2 516*e8d8bef9SDimitry Andric // 517*e8d8bef9SDimitry Andric // cond.store1: 518*e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 519*e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 520*e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4 521*e8d8bef9SDimitry Andric // br label %else2 522*e8d8bef9SDimitry Andric // . . . 523*e8d8bef9SDimitry Andric static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { 524*e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 525*e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(1); 526*e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 527*e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 528*e8d8bef9SDimitry Andric 529*e8d8bef9SDimitry Andric auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); 530*e8d8bef9SDimitry Andric 531*e8d8bef9SDimitry Andric assert( 532*e8d8bef9SDimitry Andric isa<VectorType>(Ptrs->getType()) && 533*e8d8bef9SDimitry Andric isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && 534*e8d8bef9SDimitry Andric "Vector of pointers is expected in masked scatter intrinsic"); 535*e8d8bef9SDimitry Andric 536*e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 537*e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 538*e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 539*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 540*e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 541*e8d8bef9SDimitry Andric 542*e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 543*e8d8bef9SDimitry Andric unsigned VectorWidth = SrcFVTy->getNumElements(); 544*e8d8bef9SDimitry Andric 545*e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 546*e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 547*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 548*e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 549*e8d8bef9SDimitry Andric continue; 550*e8d8bef9SDimitry Andric Value *OneElt = 551*e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 552*e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 553*e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 554*e8d8bef9SDimitry Andric } 555*e8d8bef9SDimitry Andric CI->eraseFromParent(); 556*e8d8bef9SDimitry Andric return; 557*e8d8bef9SDimitry Andric } 558*e8d8bef9SDimitry Andric 559*e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 560*e8d8bef9SDimitry Andric // better results on X86 at least. 561*e8d8bef9SDimitry Andric Value *SclrMask; 562*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 563*e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 564*e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 565*e8d8bef9SDimitry Andric } 566*e8d8bef9SDimitry Andric 567*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 568*e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 569*e8d8bef9SDimitry Andric // 570*e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 571*e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 572*e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store, label %else 573*e8d8bef9SDimitry Andric // 574*e8d8bef9SDimitry Andric Value *Predicate; 575*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 576*e8d8bef9SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); 577*e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 578*e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 579*e8d8bef9SDimitry Andric } else { 580*e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 581*e8d8bef9SDimitry Andric } 582*e8d8bef9SDimitry Andric 583*e8d8bef9SDimitry Andric // Create "cond" block 584*e8d8bef9SDimitry Andric // 585*e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 586*e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 587*e8d8bef9SDimitry Andric // %store i32 %Elt1, i32* %Ptr1 588*e8d8bef9SDimitry Andric // 589*e8d8bef9SDimitry Andric BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); 590*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 591*e8d8bef9SDimitry Andric 592*e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 593*e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 594*e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 595*e8d8bef9SDimitry Andric 596*e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 597*e8d8bef9SDimitry Andric BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); 598*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 599*e8d8bef9SDimitry Andric Instruction *OldBr = IfBlock->getTerminator(); 600*e8d8bef9SDimitry Andric BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); 601*e8d8bef9SDimitry Andric OldBr->eraseFromParent(); 602*e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 603*e8d8bef9SDimitry Andric } 604*e8d8bef9SDimitry Andric CI->eraseFromParent(); 605*e8d8bef9SDimitry Andric 606*e8d8bef9SDimitry Andric ModifiedDT = true; 607*e8d8bef9SDimitry Andric } 608*e8d8bef9SDimitry Andric 609*e8d8bef9SDimitry Andric static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { 610*e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 611*e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(1); 612*e8d8bef9SDimitry Andric Value *PassThru = CI->getArgOperand(2); 613*e8d8bef9SDimitry Andric 614*e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 615*e8d8bef9SDimitry Andric 616*e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 617*e8d8bef9SDimitry Andric 618*e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 619*e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 620*e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 621*e8d8bef9SDimitry Andric 622*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 623*e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 624*e8d8bef9SDimitry Andric 625*e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 626*e8d8bef9SDimitry Andric 627*e8d8bef9SDimitry Andric // The result vector 628*e8d8bef9SDimitry Andric Value *VResult = PassThru; 629*e8d8bef9SDimitry Andric 630*e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 631*e8d8bef9SDimitry Andric // Create a build_vector pattern, with loads/undefs as necessary and then 632*e8d8bef9SDimitry Andric // shuffle blend with the pass through value. 633*e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 634*e8d8bef9SDimitry Andric unsigned MemIndex = 0; 635*e8d8bef9SDimitry Andric VResult = UndefValue::get(VecType); 636*e8d8bef9SDimitry Andric SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem); 637*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 638*e8d8bef9SDimitry Andric Value *InsertElt; 639*e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) { 640*e8d8bef9SDimitry Andric InsertElt = UndefValue::get(EltTy); 641*e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx + VectorWidth; 642*e8d8bef9SDimitry Andric } else { 643*e8d8bef9SDimitry Andric Value *NewPtr = 644*e8d8bef9SDimitry Andric Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 645*e8d8bef9SDimitry Andric InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1), 646*e8d8bef9SDimitry Andric "Load" + Twine(Idx)); 647*e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx; 648*e8d8bef9SDimitry Andric ++MemIndex; 649*e8d8bef9SDimitry Andric } 650*e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx, 651*e8d8bef9SDimitry Andric "Res" + Twine(Idx)); 652*e8d8bef9SDimitry Andric } 653*e8d8bef9SDimitry Andric VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask); 654*e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 655*e8d8bef9SDimitry Andric CI->eraseFromParent(); 656*e8d8bef9SDimitry Andric return; 657*e8d8bef9SDimitry Andric } 658*e8d8bef9SDimitry Andric 659*e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 660*e8d8bef9SDimitry Andric // better results on X86 at least. 661*e8d8bef9SDimitry Andric Value *SclrMask; 662*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 663*e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 664*e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 665*e8d8bef9SDimitry Andric } 666*e8d8bef9SDimitry Andric 667*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 668*e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 669*e8d8bef9SDimitry Andric // 670*e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 671*e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 672*e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 673*e8d8bef9SDimitry Andric // 674*e8d8bef9SDimitry Andric 675*e8d8bef9SDimitry Andric Value *Predicate; 676*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 677*e8d8bef9SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); 678*e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 679*e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 680*e8d8bef9SDimitry Andric } else { 681*e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 682*e8d8bef9SDimitry Andric } 683*e8d8bef9SDimitry Andric 684*e8d8bef9SDimitry Andric // Create "cond" block 685*e8d8bef9SDimitry Andric // 686*e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 687*e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 688*e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 689*e8d8bef9SDimitry Andric // 690*e8d8bef9SDimitry Andric BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), 691*e8d8bef9SDimitry Andric "cond.load"); 692*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 693*e8d8bef9SDimitry Andric 694*e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1)); 695*e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 696*e8d8bef9SDimitry Andric 697*e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 698*e8d8bef9SDimitry Andric Value *NewPtr; 699*e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 700*e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 701*e8d8bef9SDimitry Andric 702*e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 703*e8d8bef9SDimitry Andric BasicBlock *NewIfBlock = 704*e8d8bef9SDimitry Andric CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); 705*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 706*e8d8bef9SDimitry Andric Instruction *OldBr = IfBlock->getTerminator(); 707*e8d8bef9SDimitry Andric BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); 708*e8d8bef9SDimitry Andric OldBr->eraseFromParent(); 709*e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 710*e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 711*e8d8bef9SDimitry Andric 712*e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 713*e8d8bef9SDimitry Andric PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 714*e8d8bef9SDimitry Andric ResultPhi->addIncoming(NewVResult, CondBlock); 715*e8d8bef9SDimitry Andric ResultPhi->addIncoming(VResult, PrevIfBlock); 716*e8d8bef9SDimitry Andric VResult = ResultPhi; 717*e8d8bef9SDimitry Andric 718*e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 719*e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 720*e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 721*e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 722*e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 723*e8d8bef9SDimitry Andric Ptr = PtrPhi; 724*e8d8bef9SDimitry Andric } 725*e8d8bef9SDimitry Andric } 726*e8d8bef9SDimitry Andric 727*e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 728*e8d8bef9SDimitry Andric CI->eraseFromParent(); 729*e8d8bef9SDimitry Andric 730*e8d8bef9SDimitry Andric ModifiedDT = true; 731*e8d8bef9SDimitry Andric } 732*e8d8bef9SDimitry Andric 733*e8d8bef9SDimitry Andric static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { 734*e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 735*e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 736*e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 737*e8d8bef9SDimitry Andric 738*e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(Src->getType()); 739*e8d8bef9SDimitry Andric 740*e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 741*e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 742*e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 743*e8d8bef9SDimitry Andric 744*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 745*e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 746*e8d8bef9SDimitry Andric 747*e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 748*e8d8bef9SDimitry Andric 749*e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 750*e8d8bef9SDimitry Andric 751*e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 752*e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 753*e8d8bef9SDimitry Andric unsigned MemIndex = 0; 754*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 755*e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 756*e8d8bef9SDimitry Andric continue; 757*e8d8bef9SDimitry Andric Value *OneElt = 758*e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 759*e8d8bef9SDimitry Andric Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 760*e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, NewPtr, Align(1)); 761*e8d8bef9SDimitry Andric ++MemIndex; 762*e8d8bef9SDimitry Andric } 763*e8d8bef9SDimitry Andric CI->eraseFromParent(); 764*e8d8bef9SDimitry Andric return; 765*e8d8bef9SDimitry Andric } 766*e8d8bef9SDimitry Andric 767*e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 768*e8d8bef9SDimitry Andric // better results on X86 at least. 769*e8d8bef9SDimitry Andric Value *SclrMask; 770*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 771*e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 772*e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 773*e8d8bef9SDimitry Andric } 774*e8d8bef9SDimitry Andric 775*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 776*e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 777*e8d8bef9SDimitry Andric // 778*e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 779*e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 780*e8d8bef9SDimitry Andric // 781*e8d8bef9SDimitry Andric Value *Predicate; 782*e8d8bef9SDimitry Andric if (VectorWidth != 1) { 783*e8d8bef9SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); 784*e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 785*e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 786*e8d8bef9SDimitry Andric } else { 787*e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 788*e8d8bef9SDimitry Andric } 789*e8d8bef9SDimitry Andric 790*e8d8bef9SDimitry Andric // Create "cond" block 791*e8d8bef9SDimitry Andric // 792*e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 793*e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 794*e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 795*e8d8bef9SDimitry Andric // 796*e8d8bef9SDimitry Andric BasicBlock *CondBlock = 797*e8d8bef9SDimitry Andric IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); 798*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 799*e8d8bef9SDimitry Andric 800*e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 801*e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, Align(1)); 802*e8d8bef9SDimitry Andric 803*e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 804*e8d8bef9SDimitry Andric Value *NewPtr; 805*e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 806*e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 807*e8d8bef9SDimitry Andric 808*e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 809*e8d8bef9SDimitry Andric BasicBlock *NewIfBlock = 810*e8d8bef9SDimitry Andric CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); 811*e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 812*e8d8bef9SDimitry Andric Instruction *OldBr = IfBlock->getTerminator(); 813*e8d8bef9SDimitry Andric BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); 814*e8d8bef9SDimitry Andric OldBr->eraseFromParent(); 815*e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 816*e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 817*e8d8bef9SDimitry Andric 818*e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 819*e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 820*e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 821*e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 822*e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 823*e8d8bef9SDimitry Andric Ptr = PtrPhi; 824*e8d8bef9SDimitry Andric } 825*e8d8bef9SDimitry Andric } 826*e8d8bef9SDimitry Andric CI->eraseFromParent(); 827*e8d8bef9SDimitry Andric 828*e8d8bef9SDimitry Andric ModifiedDT = true; 829*e8d8bef9SDimitry Andric } 830*e8d8bef9SDimitry Andric 831*e8d8bef9SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI) { 832*e8d8bef9SDimitry Andric bool EverMadeChange = false; 833*e8d8bef9SDimitry Andric bool MadeChange = true; 834*e8d8bef9SDimitry Andric auto &DL = F.getParent()->getDataLayout(); 835*e8d8bef9SDimitry Andric while (MadeChange) { 836*e8d8bef9SDimitry Andric MadeChange = false; 837*e8d8bef9SDimitry Andric for (Function::iterator I = F.begin(); I != F.end();) { 838*e8d8bef9SDimitry Andric BasicBlock *BB = &*I++; 839*e8d8bef9SDimitry Andric bool ModifiedDTOnIteration = false; 840*e8d8bef9SDimitry Andric MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL); 841*e8d8bef9SDimitry Andric 842*e8d8bef9SDimitry Andric // Restart BB iteration if the dominator tree of the Function was changed 843*e8d8bef9SDimitry Andric if (ModifiedDTOnIteration) 844*e8d8bef9SDimitry Andric break; 845*e8d8bef9SDimitry Andric } 846*e8d8bef9SDimitry Andric 847*e8d8bef9SDimitry Andric EverMadeChange |= MadeChange; 848*e8d8bef9SDimitry Andric } 849*e8d8bef9SDimitry Andric return EverMadeChange; 850*e8d8bef9SDimitry Andric } 851*e8d8bef9SDimitry Andric 852*e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) { 853*e8d8bef9SDimitry Andric auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 854*e8d8bef9SDimitry Andric return runImpl(F, TTI); 855*e8d8bef9SDimitry Andric } 856*e8d8bef9SDimitry Andric 857*e8d8bef9SDimitry Andric PreservedAnalyses 858*e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) { 859*e8d8bef9SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F); 860*e8d8bef9SDimitry Andric if (!runImpl(F, TTI)) 861*e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 862*e8d8bef9SDimitry Andric PreservedAnalyses PA; 863*e8d8bef9SDimitry Andric PA.preserve<TargetIRAnalysis>(); 864*e8d8bef9SDimitry Andric return PA; 865*e8d8bef9SDimitry Andric } 866*e8d8bef9SDimitry Andric 867*e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 868*e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 869*e8d8bef9SDimitry Andric const DataLayout &DL) { 870*e8d8bef9SDimitry Andric bool MadeChange = false; 871*e8d8bef9SDimitry Andric 872*e8d8bef9SDimitry Andric BasicBlock::iterator CurInstIterator = BB.begin(); 873*e8d8bef9SDimitry Andric while (CurInstIterator != BB.end()) { 874*e8d8bef9SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) 875*e8d8bef9SDimitry Andric MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL); 876*e8d8bef9SDimitry Andric if (ModifiedDT) 877*e8d8bef9SDimitry Andric return true; 878*e8d8bef9SDimitry Andric } 879*e8d8bef9SDimitry Andric 880*e8d8bef9SDimitry Andric return MadeChange; 881*e8d8bef9SDimitry Andric } 882*e8d8bef9SDimitry Andric 883*e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 884*e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 885*e8d8bef9SDimitry Andric const DataLayout &DL) { 886*e8d8bef9SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 887*e8d8bef9SDimitry Andric if (II) { 888*e8d8bef9SDimitry Andric // The scalarization code below does not work for scalable vectors. 889*e8d8bef9SDimitry Andric if (isa<ScalableVectorType>(II->getType()) || 890*e8d8bef9SDimitry Andric any_of(II->arg_operands(), 891*e8d8bef9SDimitry Andric [](Value *V) { return isa<ScalableVectorType>(V->getType()); })) 892*e8d8bef9SDimitry Andric return false; 893*e8d8bef9SDimitry Andric 894*e8d8bef9SDimitry Andric switch (II->getIntrinsicID()) { 895*e8d8bef9SDimitry Andric default: 896*e8d8bef9SDimitry Andric break; 897*e8d8bef9SDimitry Andric case Intrinsic::masked_load: 898*e8d8bef9SDimitry Andric // Scalarize unsupported vector masked load 899*e8d8bef9SDimitry Andric if (TTI.isLegalMaskedLoad( 900*e8d8bef9SDimitry Andric CI->getType(), 901*e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) 902*e8d8bef9SDimitry Andric return false; 903*e8d8bef9SDimitry Andric scalarizeMaskedLoad(CI, ModifiedDT); 904*e8d8bef9SDimitry Andric return true; 905*e8d8bef9SDimitry Andric case Intrinsic::masked_store: 906*e8d8bef9SDimitry Andric if (TTI.isLegalMaskedStore( 907*e8d8bef9SDimitry Andric CI->getArgOperand(0)->getType(), 908*e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) 909*e8d8bef9SDimitry Andric return false; 910*e8d8bef9SDimitry Andric scalarizeMaskedStore(CI, ModifiedDT); 911*e8d8bef9SDimitry Andric return true; 912*e8d8bef9SDimitry Andric case Intrinsic::masked_gather: { 913*e8d8bef9SDimitry Andric unsigned AlignmentInt = 914*e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 915*e8d8bef9SDimitry Andric Type *LoadTy = CI->getType(); 916*e8d8bef9SDimitry Andric Align Alignment = 917*e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); 918*e8d8bef9SDimitry Andric if (TTI.isLegalMaskedGather(LoadTy, Alignment)) 919*e8d8bef9SDimitry Andric return false; 920*e8d8bef9SDimitry Andric scalarizeMaskedGather(CI, ModifiedDT); 921*e8d8bef9SDimitry Andric return true; 922*e8d8bef9SDimitry Andric } 923*e8d8bef9SDimitry Andric case Intrinsic::masked_scatter: { 924*e8d8bef9SDimitry Andric unsigned AlignmentInt = 925*e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 926*e8d8bef9SDimitry Andric Type *StoreTy = CI->getArgOperand(0)->getType(); 927*e8d8bef9SDimitry Andric Align Alignment = 928*e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); 929*e8d8bef9SDimitry Andric if (TTI.isLegalMaskedScatter(StoreTy, Alignment)) 930*e8d8bef9SDimitry Andric return false; 931*e8d8bef9SDimitry Andric scalarizeMaskedScatter(CI, ModifiedDT); 932*e8d8bef9SDimitry Andric return true; 933*e8d8bef9SDimitry Andric } 934*e8d8bef9SDimitry Andric case Intrinsic::masked_expandload: 935*e8d8bef9SDimitry Andric if (TTI.isLegalMaskedExpandLoad(CI->getType())) 936*e8d8bef9SDimitry Andric return false; 937*e8d8bef9SDimitry Andric scalarizeMaskedExpandLoad(CI, ModifiedDT); 938*e8d8bef9SDimitry Andric return true; 939*e8d8bef9SDimitry Andric case Intrinsic::masked_compressstore: 940*e8d8bef9SDimitry Andric if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) 941*e8d8bef9SDimitry Andric return false; 942*e8d8bef9SDimitry Andric scalarizeMaskedCompressStore(CI, ModifiedDT); 943*e8d8bef9SDimitry Andric return true; 944*e8d8bef9SDimitry Andric } 945*e8d8bef9SDimitry Andric } 946*e8d8bef9SDimitry Andric 947*e8d8bef9SDimitry Andric return false; 948*e8d8bef9SDimitry Andric } 949