1e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// 2*81ad6265SDimitry Andric // intrinsics 3e8d8bef9SDimitry Andric // 4e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 6e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7e8d8bef9SDimitry Andric // 8e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 9e8d8bef9SDimitry Andric // 10e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target 11e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the 12e8d8bef9SDimitry Andric // appropriate mask bit is set. 13e8d8bef9SDimitry Andric // 14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 15e8d8bef9SDimitry Andric 16e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" 17e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h" 18fe6060f1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 19e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h" 21e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h" 22e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h" 23e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h" 24fe6060f1SDimitry Andric #include "llvm/IR/Dominators.h" 25e8d8bef9SDimitry Andric #include "llvm/IR/Function.h" 26e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h" 27e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h" 28e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h" 29e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 30e8d8bef9SDimitry Andric #include "llvm/IR/Type.h" 31e8d8bef9SDimitry Andric #include "llvm/IR/Value.h" 32e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 33e8d8bef9SDimitry Andric #include "llvm/Pass.h" 34e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h" 35e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h" 36fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 37e8d8bef9SDimitry Andric #include <cassert> 38e8d8bef9SDimitry Andric 39e8d8bef9SDimitry Andric using namespace llvm; 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin" 42e8d8bef9SDimitry Andric 43e8d8bef9SDimitry Andric namespace { 44e8d8bef9SDimitry Andric 45e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass { 46e8d8bef9SDimitry Andric public: 47e8d8bef9SDimitry Andric static char ID; // Pass identification, replacement for typeid 48e8d8bef9SDimitry Andric 49e8d8bef9SDimitry Andric explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) { 50e8d8bef9SDimitry Andric initializeScalarizeMaskedMemIntrinLegacyPassPass( 51e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 52e8d8bef9SDimitry Andric } 53e8d8bef9SDimitry Andric 54e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override; 55e8d8bef9SDimitry Andric 56e8d8bef9SDimitry Andric StringRef getPassName() const override { 57e8d8bef9SDimitry Andric return "Scalarize Masked Memory Intrinsics"; 58e8d8bef9SDimitry Andric } 59e8d8bef9SDimitry Andric 60e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 61e8d8bef9SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 62fe6060f1SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 63e8d8bef9SDimitry Andric } 64e8d8bef9SDimitry Andric }; 65e8d8bef9SDimitry Andric 66e8d8bef9SDimitry Andric } // end anonymous namespace 67e8d8bef9SDimitry Andric 68e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 69fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL, 70fe6060f1SDimitry Andric DomTreeUpdater *DTU); 71e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 72e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 73fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU); 74e8d8bef9SDimitry Andric 75e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0; 76e8d8bef9SDimitry Andric 77e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 78e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 79e8d8bef9SDimitry Andric false) 80e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 81fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 82e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 83e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 84e8d8bef9SDimitry Andric false) 85e8d8bef9SDimitry Andric 86e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() { 87e8d8bef9SDimitry Andric return new ScalarizeMaskedMemIntrinLegacyPass(); 88e8d8bef9SDimitry Andric } 89e8d8bef9SDimitry Andric 90e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) { 91e8d8bef9SDimitry Andric Constant *C = dyn_cast<Constant>(Mask); 92e8d8bef9SDimitry Andric if (!C) 93e8d8bef9SDimitry Andric return false; 94e8d8bef9SDimitry Andric 95e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); 96e8d8bef9SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) { 97e8d8bef9SDimitry Andric Constant *CElt = C->getAggregateElement(i); 98e8d8bef9SDimitry Andric if (!CElt || !isa<ConstantInt>(CElt)) 99e8d8bef9SDimitry Andric return false; 100e8d8bef9SDimitry Andric } 101e8d8bef9SDimitry Andric 102e8d8bef9SDimitry Andric return true; 103e8d8bef9SDimitry Andric } 104e8d8bef9SDimitry Andric 105fe6060f1SDimitry Andric static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, 106fe6060f1SDimitry Andric unsigned Idx) { 107fe6060f1SDimitry Andric return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx; 108fe6060f1SDimitry Andric } 109fe6060f1SDimitry Andric 110e8d8bef9SDimitry Andric // Translate a masked load intrinsic like 111e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, 112e8d8bef9SDimitry Andric // <16 x i1> %mask, <16 x i32> %passthru) 113e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 114e8d8bef9SDimitry Andric // the appropriate mask bit is set 115e8d8bef9SDimitry Andric // 116e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 117e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 118e8d8bef9SDimitry Andric // br i1 %2, label %cond.load, label %else 119e8d8bef9SDimitry Andric // 120e8d8bef9SDimitry Andric // cond.load: ; preds = %0 121e8d8bef9SDimitry Andric // %3 = getelementptr i32* %1, i32 0 122e8d8bef9SDimitry Andric // %4 = load i32* %3 123e8d8bef9SDimitry Andric // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 124e8d8bef9SDimitry Andric // br label %else 125e8d8bef9SDimitry Andric // 126e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.load 127e8d8bef9SDimitry Andric // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] 128e8d8bef9SDimitry Andric // %6 = extractelement <16 x i1> %mask, i32 1 129e8d8bef9SDimitry Andric // br i1 %6, label %cond.load1, label %else2 130e8d8bef9SDimitry Andric // 131e8d8bef9SDimitry Andric // cond.load1: ; preds = %else 132e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 133e8d8bef9SDimitry Andric // %8 = load i32* %7 134e8d8bef9SDimitry Andric // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 135e8d8bef9SDimitry Andric // br label %else2 136e8d8bef9SDimitry Andric // 137e8d8bef9SDimitry Andric // else2: ; preds = %else, %cond.load1 138e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] 139e8d8bef9SDimitry Andric // %10 = extractelement <16 x i1> %mask, i32 2 140e8d8bef9SDimitry Andric // br i1 %10, label %cond.load4, label %else5 141e8d8bef9SDimitry Andric // 142fe6060f1SDimitry Andric static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, 143fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 144e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 145e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 146e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 147e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 148e8d8bef9SDimitry Andric 149e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 150e8d8bef9SDimitry Andric VectorType *VecType = cast<FixedVectorType>(CI->getType()); 151e8d8bef9SDimitry Andric 152e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 153e8d8bef9SDimitry Andric 154e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 155e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 156e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 157e8d8bef9SDimitry Andric 158e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 159e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 160e8d8bef9SDimitry Andric 161e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 162e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 163e8d8bef9SDimitry Andric Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); 164e8d8bef9SDimitry Andric CI->replaceAllUsesWith(NewI); 165e8d8bef9SDimitry Andric CI->eraseFromParent(); 166e8d8bef9SDimitry Andric return; 167e8d8bef9SDimitry Andric } 168e8d8bef9SDimitry Andric 169e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 170e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 171e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 172e8d8bef9SDimitry Andric // Bitcast %addr from i8* to EltTy* 173e8d8bef9SDimitry Andric Type *NewPtrType = 174e8d8bef9SDimitry Andric EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); 175e8d8bef9SDimitry Andric Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); 176e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 177e8d8bef9SDimitry Andric 178e8d8bef9SDimitry Andric // The result vector 179e8d8bef9SDimitry Andric Value *VResult = Src0; 180e8d8bef9SDimitry Andric 181e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 182e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 183e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 184e8d8bef9SDimitry Andric continue; 185e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 186e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 187e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, Load, Idx); 188e8d8bef9SDimitry Andric } 189e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 190e8d8bef9SDimitry Andric CI->eraseFromParent(); 191e8d8bef9SDimitry Andric return; 192e8d8bef9SDimitry Andric } 193e8d8bef9SDimitry Andric 194e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 195e8d8bef9SDimitry Andric // better results on X86 at least. 196e8d8bef9SDimitry Andric Value *SclrMask; 197e8d8bef9SDimitry Andric if (VectorWidth != 1) { 198e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 199e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 200e8d8bef9SDimitry Andric } 201e8d8bef9SDimitry Andric 202e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 203e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 204e8d8bef9SDimitry Andric // 205e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 206e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 207e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 208e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 209e8d8bef9SDimitry Andric // 210e8d8bef9SDimitry Andric Value *Predicate; 211e8d8bef9SDimitry Andric if (VectorWidth != 1) { 212fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 213fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 214e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 215e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 216e8d8bef9SDimitry Andric } else { 217e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 218e8d8bef9SDimitry Andric } 219e8d8bef9SDimitry Andric 220e8d8bef9SDimitry Andric // Create "cond" block 221e8d8bef9SDimitry Andric // 222e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 223e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 224e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 225e8d8bef9SDimitry Andric // 226fe6060f1SDimitry Andric Instruction *ThenTerm = 227fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 228fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 229e8d8bef9SDimitry Andric 230fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 231fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 232fe6060f1SDimitry Andric 233fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 234e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 235e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 236e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 237e8d8bef9SDimitry Andric 238e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 239fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 240fe6060f1SDimitry Andric NewIfBlock->setName("else"); 241e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 242e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 243e8d8bef9SDimitry Andric 244e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 245fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 246e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 247e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 248e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 249e8d8bef9SDimitry Andric VResult = Phi; 250e8d8bef9SDimitry Andric } 251e8d8bef9SDimitry Andric 252e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 253e8d8bef9SDimitry Andric CI->eraseFromParent(); 254e8d8bef9SDimitry Andric 255e8d8bef9SDimitry Andric ModifiedDT = true; 256e8d8bef9SDimitry Andric } 257e8d8bef9SDimitry Andric 258e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like 259e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, 260e8d8bef9SDimitry Andric // <16 x i1> %mask) 261e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 262e8d8bef9SDimitry Andric // the appropriate mask bit is set 263e8d8bef9SDimitry Andric // 264e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 265e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 266e8d8bef9SDimitry Andric // br i1 %2, label %cond.store, label %else 267e8d8bef9SDimitry Andric // 268e8d8bef9SDimitry Andric // cond.store: ; preds = %0 269e8d8bef9SDimitry Andric // %3 = extractelement <16 x i32> %val, i32 0 270e8d8bef9SDimitry Andric // %4 = getelementptr i32* %1, i32 0 271e8d8bef9SDimitry Andric // store i32 %3, i32* %4 272e8d8bef9SDimitry Andric // br label %else 273e8d8bef9SDimitry Andric // 274e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.store 275e8d8bef9SDimitry Andric // %5 = extractelement <16 x i1> %mask, i32 1 276e8d8bef9SDimitry Andric // br i1 %5, label %cond.store1, label %else2 277e8d8bef9SDimitry Andric // 278e8d8bef9SDimitry Andric // cond.store1: ; preds = %else 279e8d8bef9SDimitry Andric // %6 = extractelement <16 x i32> %val, i32 1 280e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 281e8d8bef9SDimitry Andric // store i32 %6, i32* %7 282e8d8bef9SDimitry Andric // br label %else2 283e8d8bef9SDimitry Andric // . . . 284fe6060f1SDimitry Andric static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, 285fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 286e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 287e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 288e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 289e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 290e8d8bef9SDimitry Andric 291e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 292e8d8bef9SDimitry Andric auto *VecType = cast<VectorType>(Src->getType()); 293e8d8bef9SDimitry Andric 294e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 295e8d8bef9SDimitry Andric 296e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 297e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 298e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 299e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 300e8d8bef9SDimitry Andric 301e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 302e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 303e8d8bef9SDimitry Andric Builder.CreateAlignedStore(Src, Ptr, AlignVal); 304e8d8bef9SDimitry Andric CI->eraseFromParent(); 305e8d8bef9SDimitry Andric return; 306e8d8bef9SDimitry Andric } 307e8d8bef9SDimitry Andric 308e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 309e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 310e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 311e8d8bef9SDimitry Andric // Bitcast %addr from i8* to EltTy* 312e8d8bef9SDimitry Andric Type *NewPtrType = 313e8d8bef9SDimitry Andric EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); 314e8d8bef9SDimitry Andric Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); 315e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 316e8d8bef9SDimitry Andric 317e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 318e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 319e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 320e8d8bef9SDimitry Andric continue; 321e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 322e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 323e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 324e8d8bef9SDimitry Andric } 325e8d8bef9SDimitry Andric CI->eraseFromParent(); 326e8d8bef9SDimitry Andric return; 327e8d8bef9SDimitry Andric } 328e8d8bef9SDimitry Andric 329e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 330e8d8bef9SDimitry Andric // better results on X86 at least. 331e8d8bef9SDimitry Andric Value *SclrMask; 332e8d8bef9SDimitry Andric if (VectorWidth != 1) { 333e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 334e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 335e8d8bef9SDimitry Andric } 336e8d8bef9SDimitry Andric 337e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 338e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 339e8d8bef9SDimitry Andric // 340e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 341e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 342e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 343e8d8bef9SDimitry Andric // 344e8d8bef9SDimitry Andric Value *Predicate; 345e8d8bef9SDimitry Andric if (VectorWidth != 1) { 346fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 347fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 348e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 349e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 350e8d8bef9SDimitry Andric } else { 351e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 352e8d8bef9SDimitry Andric } 353e8d8bef9SDimitry Andric 354e8d8bef9SDimitry Andric // Create "cond" block 355e8d8bef9SDimitry Andric // 356e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 357e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 358e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 359e8d8bef9SDimitry Andric // 360fe6060f1SDimitry Andric Instruction *ThenTerm = 361fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 362fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 363e8d8bef9SDimitry Andric 364fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 365fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 366fe6060f1SDimitry Andric 367fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 368e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 369e8d8bef9SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); 370e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 371e8d8bef9SDimitry Andric 372e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 373fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 374fe6060f1SDimitry Andric NewIfBlock->setName("else"); 375fe6060f1SDimitry Andric 376fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 377e8d8bef9SDimitry Andric } 378e8d8bef9SDimitry Andric CI->eraseFromParent(); 379e8d8bef9SDimitry Andric 380e8d8bef9SDimitry Andric ModifiedDT = true; 381e8d8bef9SDimitry Andric } 382e8d8bef9SDimitry Andric 383e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like 384e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, 385e8d8bef9SDimitry Andric // <16 x i1> %Mask, <16 x i32> %Src) 386e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 387e8d8bef9SDimitry Andric // the appropriate mask bit is set 388e8d8bef9SDimitry Andric // 389e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind 390e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 391e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else 392e8d8bef9SDimitry Andric // 393e8d8bef9SDimitry Andric // cond.load: 394e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 395e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4 396e8d8bef9SDimitry Andric // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 397e8d8bef9SDimitry Andric // br label %else 398e8d8bef9SDimitry Andric // 399e8d8bef9SDimitry Andric // else: 400e8d8bef9SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] 401e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 402e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2 403e8d8bef9SDimitry Andric // 404e8d8bef9SDimitry Andric // cond.load1: 405e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 406e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4 407e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 408e8d8bef9SDimitry Andric // br label %else2 409e8d8bef9SDimitry Andric // . . . 410e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src 411e8d8bef9SDimitry Andric // ret <16 x i32> %Result 412fe6060f1SDimitry Andric static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, 413fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 414e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(0); 415e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 416e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 417e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 418e8d8bef9SDimitry Andric 419e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 420e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 421e8d8bef9SDimitry Andric 422e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 423e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 424e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 425e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 426e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 427e8d8bef9SDimitry Andric 428e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 429e8d8bef9SDimitry Andric 430e8d8bef9SDimitry Andric // The result vector 431e8d8bef9SDimitry Andric Value *VResult = Src0; 432e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 433e8d8bef9SDimitry Andric 434e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 435e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 436e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 437e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 438e8d8bef9SDimitry Andric continue; 439e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 440e8d8bef9SDimitry Andric LoadInst *Load = 441e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 442e8d8bef9SDimitry Andric VResult = 443e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 444e8d8bef9SDimitry Andric } 445e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 446e8d8bef9SDimitry Andric CI->eraseFromParent(); 447e8d8bef9SDimitry Andric return; 448e8d8bef9SDimitry Andric } 449e8d8bef9SDimitry Andric 450e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 451e8d8bef9SDimitry Andric // better results on X86 at least. 452e8d8bef9SDimitry Andric Value *SclrMask; 453e8d8bef9SDimitry Andric if (VectorWidth != 1) { 454e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 455e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 456e8d8bef9SDimitry Andric } 457e8d8bef9SDimitry Andric 458e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 459e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 460e8d8bef9SDimitry Andric // 461e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 462e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 463e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load, label %else 464e8d8bef9SDimitry Andric // 465e8d8bef9SDimitry Andric 466e8d8bef9SDimitry Andric Value *Predicate; 467e8d8bef9SDimitry Andric if (VectorWidth != 1) { 468fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 469fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 470e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 471e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 472e8d8bef9SDimitry Andric } else { 473e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 474e8d8bef9SDimitry Andric } 475e8d8bef9SDimitry Andric 476e8d8bef9SDimitry Andric // Create "cond" block 477e8d8bef9SDimitry Andric // 478e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 479e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 480e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 481e8d8bef9SDimitry Andric // 482fe6060f1SDimitry Andric Instruction *ThenTerm = 483fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 484fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 485e8d8bef9SDimitry Andric 486fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 487fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 488fe6060f1SDimitry Andric 489fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 490e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 491e8d8bef9SDimitry Andric LoadInst *Load = 492e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 493e8d8bef9SDimitry Andric Value *NewVResult = 494e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 495e8d8bef9SDimitry Andric 496e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 497fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 498fe6060f1SDimitry Andric NewIfBlock->setName("else"); 499e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 500e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 501e8d8bef9SDimitry Andric 502fe6060f1SDimitry Andric // Create the phi to join the new and previous value. 503fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 504e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 505e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 506e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 507e8d8bef9SDimitry Andric VResult = Phi; 508e8d8bef9SDimitry Andric } 509e8d8bef9SDimitry Andric 510e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 511e8d8bef9SDimitry Andric CI->eraseFromParent(); 512e8d8bef9SDimitry Andric 513e8d8bef9SDimitry Andric ModifiedDT = true; 514e8d8bef9SDimitry Andric } 515e8d8bef9SDimitry Andric 516e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like 517e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, 518e8d8bef9SDimitry Andric // <16 x i1> %Mask) 519e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 520e8d8bef9SDimitry Andric // the appropriate mask bit is set. 521e8d8bef9SDimitry Andric // 522e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind 523e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 524e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else 525e8d8bef9SDimitry Andric // 526e8d8bef9SDimitry Andric // cond.store: 527e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0 528e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 529e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4 530e8d8bef9SDimitry Andric // br label %else 531e8d8bef9SDimitry Andric // 532e8d8bef9SDimitry Andric // else: 533e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 534e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2 535e8d8bef9SDimitry Andric // 536e8d8bef9SDimitry Andric // cond.store1: 537e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 538e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 539e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4 540e8d8bef9SDimitry Andric // br label %else2 541e8d8bef9SDimitry Andric // . . . 542fe6060f1SDimitry Andric static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, 543fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 544e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 545e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(1); 546e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 547e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 548e8d8bef9SDimitry Andric 549e8d8bef9SDimitry Andric auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); 550e8d8bef9SDimitry Andric 551e8d8bef9SDimitry Andric assert( 552e8d8bef9SDimitry Andric isa<VectorType>(Ptrs->getType()) && 553e8d8bef9SDimitry Andric isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && 554e8d8bef9SDimitry Andric "Vector of pointers is expected in masked scatter intrinsic"); 555e8d8bef9SDimitry Andric 556e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 557e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 558e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 559e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 560e8d8bef9SDimitry Andric 561e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 562e8d8bef9SDimitry Andric unsigned VectorWidth = SrcFVTy->getNumElements(); 563e8d8bef9SDimitry Andric 564e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 565e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 566e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 567e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 568e8d8bef9SDimitry Andric continue; 569e8d8bef9SDimitry Andric Value *OneElt = 570e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 571e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 572e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 573e8d8bef9SDimitry Andric } 574e8d8bef9SDimitry Andric CI->eraseFromParent(); 575e8d8bef9SDimitry Andric return; 576e8d8bef9SDimitry Andric } 577e8d8bef9SDimitry Andric 578e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 579e8d8bef9SDimitry Andric // better results on X86 at least. 580e8d8bef9SDimitry Andric Value *SclrMask; 581e8d8bef9SDimitry Andric if (VectorWidth != 1) { 582e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 583e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 584e8d8bef9SDimitry Andric } 585e8d8bef9SDimitry Andric 586e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 587e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 588e8d8bef9SDimitry Andric // 589e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 590e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 591e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store, label %else 592e8d8bef9SDimitry Andric // 593e8d8bef9SDimitry Andric Value *Predicate; 594e8d8bef9SDimitry Andric if (VectorWidth != 1) { 595fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 596fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 597e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 598e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 599e8d8bef9SDimitry Andric } else { 600e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 601e8d8bef9SDimitry Andric } 602e8d8bef9SDimitry Andric 603e8d8bef9SDimitry Andric // Create "cond" block 604e8d8bef9SDimitry Andric // 605e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 606e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 607e8d8bef9SDimitry Andric // %store i32 %Elt1, i32* %Ptr1 608e8d8bef9SDimitry Andric // 609fe6060f1SDimitry Andric Instruction *ThenTerm = 610fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 611fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 612e8d8bef9SDimitry Andric 613fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 614fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 615fe6060f1SDimitry Andric 616fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 617e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 618e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 619e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 620e8d8bef9SDimitry Andric 621e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 622fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 623fe6060f1SDimitry Andric NewIfBlock->setName("else"); 624fe6060f1SDimitry Andric 625fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 626e8d8bef9SDimitry Andric } 627e8d8bef9SDimitry Andric CI->eraseFromParent(); 628e8d8bef9SDimitry Andric 629e8d8bef9SDimitry Andric ModifiedDT = true; 630e8d8bef9SDimitry Andric } 631e8d8bef9SDimitry Andric 632fe6060f1SDimitry Andric static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, 633fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 634e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 635e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(1); 636e8d8bef9SDimitry Andric Value *PassThru = CI->getArgOperand(2); 637e8d8bef9SDimitry Andric 638e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 639e8d8bef9SDimitry Andric 640e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 641e8d8bef9SDimitry Andric 642e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 643e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 644e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 645e8d8bef9SDimitry Andric 646e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 647e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 648e8d8bef9SDimitry Andric 649e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 650e8d8bef9SDimitry Andric 651e8d8bef9SDimitry Andric // The result vector 652e8d8bef9SDimitry Andric Value *VResult = PassThru; 653e8d8bef9SDimitry Andric 654e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 655e8d8bef9SDimitry Andric // Create a build_vector pattern, with loads/undefs as necessary and then 656e8d8bef9SDimitry Andric // shuffle blend with the pass through value. 657e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 658e8d8bef9SDimitry Andric unsigned MemIndex = 0; 659e8d8bef9SDimitry Andric VResult = UndefValue::get(VecType); 660e8d8bef9SDimitry Andric SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem); 661e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 662e8d8bef9SDimitry Andric Value *InsertElt; 663e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) { 664e8d8bef9SDimitry Andric InsertElt = UndefValue::get(EltTy); 665e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx + VectorWidth; 666e8d8bef9SDimitry Andric } else { 667e8d8bef9SDimitry Andric Value *NewPtr = 668e8d8bef9SDimitry Andric Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 669e8d8bef9SDimitry Andric InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1), 670e8d8bef9SDimitry Andric "Load" + Twine(Idx)); 671e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx; 672e8d8bef9SDimitry Andric ++MemIndex; 673e8d8bef9SDimitry Andric } 674e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx, 675e8d8bef9SDimitry Andric "Res" + Twine(Idx)); 676e8d8bef9SDimitry Andric } 677e8d8bef9SDimitry Andric VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask); 678e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 679e8d8bef9SDimitry Andric CI->eraseFromParent(); 680e8d8bef9SDimitry Andric return; 681e8d8bef9SDimitry Andric } 682e8d8bef9SDimitry Andric 683e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 684e8d8bef9SDimitry Andric // better results on X86 at least. 685e8d8bef9SDimitry Andric Value *SclrMask; 686e8d8bef9SDimitry Andric if (VectorWidth != 1) { 687e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 688e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 689e8d8bef9SDimitry Andric } 690e8d8bef9SDimitry Andric 691e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 692e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 693e8d8bef9SDimitry Andric // 694e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 695e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 696e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 697e8d8bef9SDimitry Andric // 698e8d8bef9SDimitry Andric 699e8d8bef9SDimitry Andric Value *Predicate; 700e8d8bef9SDimitry Andric if (VectorWidth != 1) { 701fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 702fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 703e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 704e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 705e8d8bef9SDimitry Andric } else { 706e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 707e8d8bef9SDimitry Andric } 708e8d8bef9SDimitry Andric 709e8d8bef9SDimitry Andric // Create "cond" block 710e8d8bef9SDimitry Andric // 711e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 712e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 713e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 714e8d8bef9SDimitry Andric // 715fe6060f1SDimitry Andric Instruction *ThenTerm = 716fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 717fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 718e8d8bef9SDimitry Andric 719fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 720fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 721fe6060f1SDimitry Andric 722fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 723e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1)); 724e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 725e8d8bef9SDimitry Andric 726e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 727e8d8bef9SDimitry Andric Value *NewPtr; 728e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 729e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 730e8d8bef9SDimitry Andric 731e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 732fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 733fe6060f1SDimitry Andric NewIfBlock->setName("else"); 734e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 735e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 736e8d8bef9SDimitry Andric 737e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 738fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 739e8d8bef9SDimitry Andric PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 740e8d8bef9SDimitry Andric ResultPhi->addIncoming(NewVResult, CondBlock); 741e8d8bef9SDimitry Andric ResultPhi->addIncoming(VResult, PrevIfBlock); 742e8d8bef9SDimitry Andric VResult = ResultPhi; 743e8d8bef9SDimitry Andric 744e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 745e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 746e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 747e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 748e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 749e8d8bef9SDimitry Andric Ptr = PtrPhi; 750e8d8bef9SDimitry Andric } 751e8d8bef9SDimitry Andric } 752e8d8bef9SDimitry Andric 753e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 754e8d8bef9SDimitry Andric CI->eraseFromParent(); 755e8d8bef9SDimitry Andric 756e8d8bef9SDimitry Andric ModifiedDT = true; 757e8d8bef9SDimitry Andric } 758e8d8bef9SDimitry Andric 759fe6060f1SDimitry Andric static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, 760fe6060f1SDimitry Andric DomTreeUpdater *DTU, 761fe6060f1SDimitry Andric bool &ModifiedDT) { 762e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 763e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 764e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 765e8d8bef9SDimitry Andric 766e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(Src->getType()); 767e8d8bef9SDimitry Andric 768e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 769e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 770e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 771e8d8bef9SDimitry Andric 772e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 773e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 774e8d8bef9SDimitry Andric 775e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 776e8d8bef9SDimitry Andric 777e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 778e8d8bef9SDimitry Andric 779e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 780e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 781e8d8bef9SDimitry Andric unsigned MemIndex = 0; 782e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 783e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 784e8d8bef9SDimitry Andric continue; 785e8d8bef9SDimitry Andric Value *OneElt = 786e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 787e8d8bef9SDimitry Andric Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 788e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, NewPtr, Align(1)); 789e8d8bef9SDimitry Andric ++MemIndex; 790e8d8bef9SDimitry Andric } 791e8d8bef9SDimitry Andric CI->eraseFromParent(); 792e8d8bef9SDimitry Andric return; 793e8d8bef9SDimitry Andric } 794e8d8bef9SDimitry Andric 795e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 796e8d8bef9SDimitry Andric // better results on X86 at least. 797e8d8bef9SDimitry Andric Value *SclrMask; 798e8d8bef9SDimitry Andric if (VectorWidth != 1) { 799e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 800e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 801e8d8bef9SDimitry Andric } 802e8d8bef9SDimitry Andric 803e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 804e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 805e8d8bef9SDimitry Andric // 806e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 807e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 808e8d8bef9SDimitry Andric // 809e8d8bef9SDimitry Andric Value *Predicate; 810e8d8bef9SDimitry Andric if (VectorWidth != 1) { 811fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 812fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 813e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 814e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 815e8d8bef9SDimitry Andric } else { 816e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 817e8d8bef9SDimitry Andric } 818e8d8bef9SDimitry Andric 819e8d8bef9SDimitry Andric // Create "cond" block 820e8d8bef9SDimitry Andric // 821e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 822e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 823e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 824e8d8bef9SDimitry Andric // 825fe6060f1SDimitry Andric Instruction *ThenTerm = 826fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 827fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 828e8d8bef9SDimitry Andric 829fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 830fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 831fe6060f1SDimitry Andric 832fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 833e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 834e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, Align(1)); 835e8d8bef9SDimitry Andric 836e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 837e8d8bef9SDimitry Andric Value *NewPtr; 838e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 839e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 840e8d8bef9SDimitry Andric 841e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 842fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 843fe6060f1SDimitry Andric NewIfBlock->setName("else"); 844e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 845e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 846e8d8bef9SDimitry Andric 847fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 848fe6060f1SDimitry Andric 849e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 850e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 851e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 852e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 853e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 854e8d8bef9SDimitry Andric Ptr = PtrPhi; 855e8d8bef9SDimitry Andric } 856e8d8bef9SDimitry Andric } 857e8d8bef9SDimitry Andric CI->eraseFromParent(); 858e8d8bef9SDimitry Andric 859e8d8bef9SDimitry Andric ModifiedDT = true; 860e8d8bef9SDimitry Andric } 861e8d8bef9SDimitry Andric 862fe6060f1SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI, 863fe6060f1SDimitry Andric DominatorTree *DT) { 864fe6060f1SDimitry Andric Optional<DomTreeUpdater> DTU; 865fe6060f1SDimitry Andric if (DT) 866fe6060f1SDimitry Andric DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); 867fe6060f1SDimitry Andric 868e8d8bef9SDimitry Andric bool EverMadeChange = false; 869e8d8bef9SDimitry Andric bool MadeChange = true; 870e8d8bef9SDimitry Andric auto &DL = F.getParent()->getDataLayout(); 871e8d8bef9SDimitry Andric while (MadeChange) { 872e8d8bef9SDimitry Andric MadeChange = false; 873349cc55cSDimitry Andric for (BasicBlock &BB : llvm::make_early_inc_range(F)) { 874e8d8bef9SDimitry Andric bool ModifiedDTOnIteration = false; 875349cc55cSDimitry Andric MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL, 876*81ad6265SDimitry Andric DTU ? DTU.getPointer() : nullptr); 877fe6060f1SDimitry Andric 878e8d8bef9SDimitry Andric // Restart BB iteration if the dominator tree of the Function was changed 879e8d8bef9SDimitry Andric if (ModifiedDTOnIteration) 880e8d8bef9SDimitry Andric break; 881e8d8bef9SDimitry Andric } 882e8d8bef9SDimitry Andric 883e8d8bef9SDimitry Andric EverMadeChange |= MadeChange; 884e8d8bef9SDimitry Andric } 885e8d8bef9SDimitry Andric return EverMadeChange; 886e8d8bef9SDimitry Andric } 887e8d8bef9SDimitry Andric 888e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) { 889e8d8bef9SDimitry Andric auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 890fe6060f1SDimitry Andric DominatorTree *DT = nullptr; 891fe6060f1SDimitry Andric if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 892fe6060f1SDimitry Andric DT = &DTWP->getDomTree(); 893fe6060f1SDimitry Andric return runImpl(F, TTI, DT); 894e8d8bef9SDimitry Andric } 895e8d8bef9SDimitry Andric 896e8d8bef9SDimitry Andric PreservedAnalyses 897e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) { 898e8d8bef9SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F); 899fe6060f1SDimitry Andric auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); 900fe6060f1SDimitry Andric if (!runImpl(F, TTI, DT)) 901e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 902e8d8bef9SDimitry Andric PreservedAnalyses PA; 903e8d8bef9SDimitry Andric PA.preserve<TargetIRAnalysis>(); 904fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 905e8d8bef9SDimitry Andric return PA; 906e8d8bef9SDimitry Andric } 907e8d8bef9SDimitry Andric 908e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 909fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL, 910fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 911e8d8bef9SDimitry Andric bool MadeChange = false; 912e8d8bef9SDimitry Andric 913e8d8bef9SDimitry Andric BasicBlock::iterator CurInstIterator = BB.begin(); 914e8d8bef9SDimitry Andric while (CurInstIterator != BB.end()) { 915e8d8bef9SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) 916fe6060f1SDimitry Andric MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU); 917e8d8bef9SDimitry Andric if (ModifiedDT) 918e8d8bef9SDimitry Andric return true; 919e8d8bef9SDimitry Andric } 920e8d8bef9SDimitry Andric 921e8d8bef9SDimitry Andric return MadeChange; 922e8d8bef9SDimitry Andric } 923e8d8bef9SDimitry Andric 924e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 925e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 926fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU) { 927e8d8bef9SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 928e8d8bef9SDimitry Andric if (II) { 929e8d8bef9SDimitry Andric // The scalarization code below does not work for scalable vectors. 930e8d8bef9SDimitry Andric if (isa<ScalableVectorType>(II->getType()) || 931349cc55cSDimitry Andric any_of(II->args(), 932e8d8bef9SDimitry Andric [](Value *V) { return isa<ScalableVectorType>(V->getType()); })) 933e8d8bef9SDimitry Andric return false; 934e8d8bef9SDimitry Andric 935e8d8bef9SDimitry Andric switch (II->getIntrinsicID()) { 936e8d8bef9SDimitry Andric default: 937e8d8bef9SDimitry Andric break; 938e8d8bef9SDimitry Andric case Intrinsic::masked_load: 939e8d8bef9SDimitry Andric // Scalarize unsupported vector masked load 940e8d8bef9SDimitry Andric if (TTI.isLegalMaskedLoad( 941e8d8bef9SDimitry Andric CI->getType(), 942e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) 943e8d8bef9SDimitry Andric return false; 944fe6060f1SDimitry Andric scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT); 945e8d8bef9SDimitry Andric return true; 946e8d8bef9SDimitry Andric case Intrinsic::masked_store: 947e8d8bef9SDimitry Andric if (TTI.isLegalMaskedStore( 948e8d8bef9SDimitry Andric CI->getArgOperand(0)->getType(), 949e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) 950e8d8bef9SDimitry Andric return false; 951fe6060f1SDimitry Andric scalarizeMaskedStore(DL, CI, DTU, ModifiedDT); 952e8d8bef9SDimitry Andric return true; 953e8d8bef9SDimitry Andric case Intrinsic::masked_gather: { 954fe6060f1SDimitry Andric MaybeAlign MA = 955fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue(); 956e8d8bef9SDimitry Andric Type *LoadTy = CI->getType(); 957fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA, 958fe6060f1SDimitry Andric LoadTy->getScalarType()); 95904eeddc0SDimitry Andric if (TTI.isLegalMaskedGather(LoadTy, Alignment) && 96004eeddc0SDimitry Andric !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment)) 961e8d8bef9SDimitry Andric return false; 962fe6060f1SDimitry Andric scalarizeMaskedGather(DL, CI, DTU, ModifiedDT); 963e8d8bef9SDimitry Andric return true; 964e8d8bef9SDimitry Andric } 965e8d8bef9SDimitry Andric case Intrinsic::masked_scatter: { 966fe6060f1SDimitry Andric MaybeAlign MA = 967fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue(); 968e8d8bef9SDimitry Andric Type *StoreTy = CI->getArgOperand(0)->getType(); 969fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA, 970fe6060f1SDimitry Andric StoreTy->getScalarType()); 97104eeddc0SDimitry Andric if (TTI.isLegalMaskedScatter(StoreTy, Alignment) && 97204eeddc0SDimitry Andric !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy), 97304eeddc0SDimitry Andric Alignment)) 974e8d8bef9SDimitry Andric return false; 975fe6060f1SDimitry Andric scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT); 976e8d8bef9SDimitry Andric return true; 977e8d8bef9SDimitry Andric } 978e8d8bef9SDimitry Andric case Intrinsic::masked_expandload: 979e8d8bef9SDimitry Andric if (TTI.isLegalMaskedExpandLoad(CI->getType())) 980e8d8bef9SDimitry Andric return false; 981fe6060f1SDimitry Andric scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT); 982e8d8bef9SDimitry Andric return true; 983e8d8bef9SDimitry Andric case Intrinsic::masked_compressstore: 984e8d8bef9SDimitry Andric if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) 985e8d8bef9SDimitry Andric return false; 986fe6060f1SDimitry Andric scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT); 987e8d8bef9SDimitry Andric return true; 988e8d8bef9SDimitry Andric } 989e8d8bef9SDimitry Andric } 990e8d8bef9SDimitry Andric 991e8d8bef9SDimitry Andric return false; 992e8d8bef9SDimitry Andric } 993