1e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// 281ad6265SDimitry Andric // intrinsics 3e8d8bef9SDimitry Andric // 4e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 6e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7e8d8bef9SDimitry Andric // 8e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 9e8d8bef9SDimitry Andric // 10e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target 11e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the 12e8d8bef9SDimitry Andric // appropriate mask bit is set. 13e8d8bef9SDimitry Andric // 14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 15e8d8bef9SDimitry Andric 16e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" 17e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h" 18fe6060f1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 19e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h" 21e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h" 22e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h" 23e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h" 24fe6060f1SDimitry Andric #include "llvm/IR/Dominators.h" 25e8d8bef9SDimitry Andric #include "llvm/IR/Function.h" 26e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h" 27e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h" 28e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h" 29e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 30e8d8bef9SDimitry Andric #include "llvm/IR/Type.h" 31e8d8bef9SDimitry Andric #include "llvm/IR/Value.h" 32e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 33e8d8bef9SDimitry Andric #include "llvm/Pass.h" 34e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h" 35e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h" 36fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 37e8d8bef9SDimitry Andric #include <cassert> 38bdd1243dSDimitry Andric #include <optional> 39e8d8bef9SDimitry Andric 40e8d8bef9SDimitry Andric using namespace llvm; 41e8d8bef9SDimitry Andric 42e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin" 43e8d8bef9SDimitry Andric 44e8d8bef9SDimitry Andric namespace { 45e8d8bef9SDimitry Andric 46e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass { 47e8d8bef9SDimitry Andric public: 48e8d8bef9SDimitry Andric static char ID; // Pass identification, replacement for typeid 49e8d8bef9SDimitry Andric 50e8d8bef9SDimitry Andric explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) { 51e8d8bef9SDimitry Andric initializeScalarizeMaskedMemIntrinLegacyPassPass( 52e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 53e8d8bef9SDimitry Andric } 54e8d8bef9SDimitry Andric 55e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override; 56e8d8bef9SDimitry Andric 57e8d8bef9SDimitry Andric StringRef getPassName() const override { 58e8d8bef9SDimitry Andric return "Scalarize Masked Memory Intrinsics"; 59e8d8bef9SDimitry Andric } 60e8d8bef9SDimitry Andric 61e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 62e8d8bef9SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 63fe6060f1SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 64e8d8bef9SDimitry Andric } 65e8d8bef9SDimitry Andric }; 66e8d8bef9SDimitry Andric 67e8d8bef9SDimitry Andric } // end anonymous namespace 68e8d8bef9SDimitry Andric 69e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 70fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL, 71fe6060f1SDimitry Andric DomTreeUpdater *DTU); 72e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 73e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 74fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU); 75e8d8bef9SDimitry Andric 76e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0; 77e8d8bef9SDimitry Andric 78e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 79e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 80e8d8bef9SDimitry Andric false) 81e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 82fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 83e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, 84e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false, 85e8d8bef9SDimitry Andric false) 86e8d8bef9SDimitry Andric 87e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() { 88e8d8bef9SDimitry Andric return new ScalarizeMaskedMemIntrinLegacyPass(); 89e8d8bef9SDimitry Andric } 90e8d8bef9SDimitry Andric 91e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) { 92e8d8bef9SDimitry Andric Constant *C = dyn_cast<Constant>(Mask); 93e8d8bef9SDimitry Andric if (!C) 94e8d8bef9SDimitry Andric return false; 95e8d8bef9SDimitry Andric 96e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); 97e8d8bef9SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) { 98e8d8bef9SDimitry Andric Constant *CElt = C->getAggregateElement(i); 99e8d8bef9SDimitry Andric if (!CElt || !isa<ConstantInt>(CElt)) 100e8d8bef9SDimitry Andric return false; 101e8d8bef9SDimitry Andric } 102e8d8bef9SDimitry Andric 103e8d8bef9SDimitry Andric return true; 104e8d8bef9SDimitry Andric } 105e8d8bef9SDimitry Andric 106fe6060f1SDimitry Andric static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, 107fe6060f1SDimitry Andric unsigned Idx) { 108fe6060f1SDimitry Andric return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx; 109fe6060f1SDimitry Andric } 110fe6060f1SDimitry Andric 111e8d8bef9SDimitry Andric // Translate a masked load intrinsic like 112e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, 113e8d8bef9SDimitry Andric // <16 x i1> %mask, <16 x i32> %passthru) 114e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 115e8d8bef9SDimitry Andric // the appropriate mask bit is set 116e8d8bef9SDimitry Andric // 117e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 118e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 119e8d8bef9SDimitry Andric // br i1 %2, label %cond.load, label %else 120e8d8bef9SDimitry Andric // 121e8d8bef9SDimitry Andric // cond.load: ; preds = %0 122e8d8bef9SDimitry Andric // %3 = getelementptr i32* %1, i32 0 123e8d8bef9SDimitry Andric // %4 = load i32* %3 124e8d8bef9SDimitry Andric // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 125e8d8bef9SDimitry Andric // br label %else 126e8d8bef9SDimitry Andric // 127e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.load 128*06c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ] 129e8d8bef9SDimitry Andric // %6 = extractelement <16 x i1> %mask, i32 1 130e8d8bef9SDimitry Andric // br i1 %6, label %cond.load1, label %else2 131e8d8bef9SDimitry Andric // 132e8d8bef9SDimitry Andric // cond.load1: ; preds = %else 133e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 134e8d8bef9SDimitry Andric // %8 = load i32* %7 135e8d8bef9SDimitry Andric // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 136e8d8bef9SDimitry Andric // br label %else2 137e8d8bef9SDimitry Andric // 138e8d8bef9SDimitry Andric // else2: ; preds = %else, %cond.load1 139e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] 140e8d8bef9SDimitry Andric // %10 = extractelement <16 x i1> %mask, i32 2 141e8d8bef9SDimitry Andric // br i1 %10, label %cond.load4, label %else5 142e8d8bef9SDimitry Andric // 143fe6060f1SDimitry Andric static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, 144fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 145e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 146e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 147e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 148e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 149e8d8bef9SDimitry Andric 150e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 151e8d8bef9SDimitry Andric VectorType *VecType = cast<FixedVectorType>(CI->getType()); 152e8d8bef9SDimitry Andric 153e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 154e8d8bef9SDimitry Andric 155e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 156e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 157e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 158e8d8bef9SDimitry Andric 159e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 160e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 161e8d8bef9SDimitry Andric 162e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 163e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 164e8d8bef9SDimitry Andric Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); 165e8d8bef9SDimitry Andric CI->replaceAllUsesWith(NewI); 166e8d8bef9SDimitry Andric CI->eraseFromParent(); 167e8d8bef9SDimitry Andric return; 168e8d8bef9SDimitry Andric } 169e8d8bef9SDimitry Andric 170e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 171e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 172e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 173e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 174e8d8bef9SDimitry Andric 175e8d8bef9SDimitry Andric // The result vector 176e8d8bef9SDimitry Andric Value *VResult = Src0; 177e8d8bef9SDimitry Andric 178e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 179e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 180e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 181e8d8bef9SDimitry Andric continue; 182*06c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 183e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 184e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, Load, Idx); 185e8d8bef9SDimitry Andric } 186e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 187e8d8bef9SDimitry Andric CI->eraseFromParent(); 188e8d8bef9SDimitry Andric return; 189e8d8bef9SDimitry Andric } 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 192e8d8bef9SDimitry Andric // better results on X86 at least. 193e8d8bef9SDimitry Andric Value *SclrMask; 194e8d8bef9SDimitry Andric if (VectorWidth != 1) { 195e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 196e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 197e8d8bef9SDimitry Andric } 198e8d8bef9SDimitry Andric 199e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 200e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 201e8d8bef9SDimitry Andric // 202e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 203e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 204e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 205e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 206e8d8bef9SDimitry Andric // 207e8d8bef9SDimitry Andric Value *Predicate; 208e8d8bef9SDimitry Andric if (VectorWidth != 1) { 209fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 210fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 211e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 212e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 213e8d8bef9SDimitry Andric } else { 214e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 215e8d8bef9SDimitry Andric } 216e8d8bef9SDimitry Andric 217e8d8bef9SDimitry Andric // Create "cond" block 218e8d8bef9SDimitry Andric // 219e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 220e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 221e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 222e8d8bef9SDimitry Andric // 223fe6060f1SDimitry Andric Instruction *ThenTerm = 224fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 225fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 226e8d8bef9SDimitry Andric 227fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 228fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 229fe6060f1SDimitry Andric 230fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 231*06c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 232e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); 233e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 234e8d8bef9SDimitry Andric 235e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 236fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 237fe6060f1SDimitry Andric NewIfBlock->setName("else"); 238e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 239e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 240e8d8bef9SDimitry Andric 241e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 242fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 243e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 244e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 245e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 246e8d8bef9SDimitry Andric VResult = Phi; 247e8d8bef9SDimitry Andric } 248e8d8bef9SDimitry Andric 249e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 250e8d8bef9SDimitry Andric CI->eraseFromParent(); 251e8d8bef9SDimitry Andric 252e8d8bef9SDimitry Andric ModifiedDT = true; 253e8d8bef9SDimitry Andric } 254e8d8bef9SDimitry Andric 255e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like 256e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, 257e8d8bef9SDimitry Andric // <16 x i1> %mask) 258e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 259e8d8bef9SDimitry Andric // the appropriate mask bit is set 260e8d8bef9SDimitry Andric // 261e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32* 262e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0 263e8d8bef9SDimitry Andric // br i1 %2, label %cond.store, label %else 264e8d8bef9SDimitry Andric // 265e8d8bef9SDimitry Andric // cond.store: ; preds = %0 266e8d8bef9SDimitry Andric // %3 = extractelement <16 x i32> %val, i32 0 267e8d8bef9SDimitry Andric // %4 = getelementptr i32* %1, i32 0 268e8d8bef9SDimitry Andric // store i32 %3, i32* %4 269e8d8bef9SDimitry Andric // br label %else 270e8d8bef9SDimitry Andric // 271e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.store 272e8d8bef9SDimitry Andric // %5 = extractelement <16 x i1> %mask, i32 1 273e8d8bef9SDimitry Andric // br i1 %5, label %cond.store1, label %else2 274e8d8bef9SDimitry Andric // 275e8d8bef9SDimitry Andric // cond.store1: ; preds = %else 276e8d8bef9SDimitry Andric // %6 = extractelement <16 x i32> %val, i32 1 277e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1 278e8d8bef9SDimitry Andric // store i32 %6, i32* %7 279e8d8bef9SDimitry Andric // br label %else2 280e8d8bef9SDimitry Andric // . . . 281fe6060f1SDimitry Andric static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, 282fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 283e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 284e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 285e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 286e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 287e8d8bef9SDimitry Andric 288e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); 289e8d8bef9SDimitry Andric auto *VecType = cast<VectorType>(Src->getType()); 290e8d8bef9SDimitry Andric 291e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 292e8d8bef9SDimitry Andric 293e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 294e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 295e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 296e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 297e8d8bef9SDimitry Andric 298e8d8bef9SDimitry Andric // Short-cut if the mask is all-true. 299e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { 300e8d8bef9SDimitry Andric Builder.CreateAlignedStore(Src, Ptr, AlignVal); 301e8d8bef9SDimitry Andric CI->eraseFromParent(); 302e8d8bef9SDimitry Andric return; 303e8d8bef9SDimitry Andric } 304e8d8bef9SDimitry Andric 305e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction. 306e8d8bef9SDimitry Andric const Align AdjustedAlignVal = 307e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); 308e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); 309e8d8bef9SDimitry Andric 310e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 311e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 312e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 313e8d8bef9SDimitry Andric continue; 314e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 315*06c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 316e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 317e8d8bef9SDimitry Andric } 318e8d8bef9SDimitry Andric CI->eraseFromParent(); 319e8d8bef9SDimitry Andric return; 320e8d8bef9SDimitry Andric } 321e8d8bef9SDimitry Andric 322e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 323e8d8bef9SDimitry Andric // better results on X86 at least. 324e8d8bef9SDimitry Andric Value *SclrMask; 325e8d8bef9SDimitry Andric if (VectorWidth != 1) { 326e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 327e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 328e8d8bef9SDimitry Andric } 329e8d8bef9SDimitry Andric 330e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 331e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 332e8d8bef9SDimitry Andric // 333e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx 334e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 335e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 336e8d8bef9SDimitry Andric // 337e8d8bef9SDimitry Andric Value *Predicate; 338e8d8bef9SDimitry Andric if (VectorWidth != 1) { 339fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 340fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 341e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 342e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 343e8d8bef9SDimitry Andric } else { 344e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx); 345e8d8bef9SDimitry Andric } 346e8d8bef9SDimitry Andric 347e8d8bef9SDimitry Andric // Create "cond" block 348e8d8bef9SDimitry Andric // 349e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 350e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 351e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 352e8d8bef9SDimitry Andric // 353fe6060f1SDimitry Andric Instruction *ThenTerm = 354fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 355fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 356e8d8bef9SDimitry Andric 357fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 358fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 359fe6060f1SDimitry Andric 360fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 361e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 362*06c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx); 363e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); 364e8d8bef9SDimitry Andric 365e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 366fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 367fe6060f1SDimitry Andric NewIfBlock->setName("else"); 368fe6060f1SDimitry Andric 369fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 370e8d8bef9SDimitry Andric } 371e8d8bef9SDimitry Andric CI->eraseFromParent(); 372e8d8bef9SDimitry Andric 373e8d8bef9SDimitry Andric ModifiedDT = true; 374e8d8bef9SDimitry Andric } 375e8d8bef9SDimitry Andric 376e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like 377e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, 378e8d8bef9SDimitry Andric // <16 x i1> %Mask, <16 x i32> %Src) 379e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if 380e8d8bef9SDimitry Andric // the appropriate mask bit is set 381e8d8bef9SDimitry Andric // 382e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind 383e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 384e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else 385e8d8bef9SDimitry Andric // 386e8d8bef9SDimitry Andric // cond.load: 387e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 388e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4 389*06c3fb27SDimitry Andric // %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0 390e8d8bef9SDimitry Andric // br label %else 391e8d8bef9SDimitry Andric // 392e8d8bef9SDimitry Andric // else: 393*06c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0] 394e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 395e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2 396e8d8bef9SDimitry Andric // 397e8d8bef9SDimitry Andric // cond.load1: 398e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 399e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4 400e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 401e8d8bef9SDimitry Andric // br label %else2 402e8d8bef9SDimitry Andric // . . . 403e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src 404e8d8bef9SDimitry Andric // ret <16 x i32> %Result 405fe6060f1SDimitry Andric static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, 406fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 407e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(0); 408e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1); 409e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 410e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3); 411e8d8bef9SDimitry Andric 412e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 413e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 414e8d8bef9SDimitry Andric 415e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 416e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 417e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 418e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 419e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 420e8d8bef9SDimitry Andric 421e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 422e8d8bef9SDimitry Andric 423e8d8bef9SDimitry Andric // The result vector 424e8d8bef9SDimitry Andric Value *VResult = Src0; 425e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 426e8d8bef9SDimitry Andric 427e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 428e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 429e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 430e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 431e8d8bef9SDimitry Andric continue; 432e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 433e8d8bef9SDimitry Andric LoadInst *Load = 434e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 435e8d8bef9SDimitry Andric VResult = 436e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 437e8d8bef9SDimitry Andric } 438e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 439e8d8bef9SDimitry Andric CI->eraseFromParent(); 440e8d8bef9SDimitry Andric return; 441e8d8bef9SDimitry Andric } 442e8d8bef9SDimitry Andric 443e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 444e8d8bef9SDimitry Andric // better results on X86 at least. 445e8d8bef9SDimitry Andric Value *SclrMask; 446e8d8bef9SDimitry Andric if (VectorWidth != 1) { 447e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 448e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 449e8d8bef9SDimitry Andric } 450e8d8bef9SDimitry Andric 451e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 452e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 453e8d8bef9SDimitry Andric // 454e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 455e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 456e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load, label %else 457e8d8bef9SDimitry Andric // 458e8d8bef9SDimitry Andric 459e8d8bef9SDimitry Andric Value *Predicate; 460e8d8bef9SDimitry Andric if (VectorWidth != 1) { 461fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 462fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 463e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 464e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 465e8d8bef9SDimitry Andric } else { 466e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 467e8d8bef9SDimitry Andric } 468e8d8bef9SDimitry Andric 469e8d8bef9SDimitry Andric // Create "cond" block 470e8d8bef9SDimitry Andric // 471e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 472e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 473e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 474e8d8bef9SDimitry Andric // 475fe6060f1SDimitry Andric Instruction *ThenTerm = 476fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 477fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 478e8d8bef9SDimitry Andric 479fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 480fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 481fe6060f1SDimitry Andric 482fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 483e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 484e8d8bef9SDimitry Andric LoadInst *Load = 485e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); 486e8d8bef9SDimitry Andric Value *NewVResult = 487e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); 488e8d8bef9SDimitry Andric 489e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 490fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 491fe6060f1SDimitry Andric NewIfBlock->setName("else"); 492e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 493e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 494e8d8bef9SDimitry Andric 495fe6060f1SDimitry Andric // Create the phi to join the new and previous value. 496fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 497e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 498e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock); 499e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock); 500e8d8bef9SDimitry Andric VResult = Phi; 501e8d8bef9SDimitry Andric } 502e8d8bef9SDimitry Andric 503e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 504e8d8bef9SDimitry Andric CI->eraseFromParent(); 505e8d8bef9SDimitry Andric 506e8d8bef9SDimitry Andric ModifiedDT = true; 507e8d8bef9SDimitry Andric } 508e8d8bef9SDimitry Andric 509e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like 510e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, 511e8d8bef9SDimitry Andric // <16 x i1> %Mask) 512e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if 513e8d8bef9SDimitry Andric // the appropriate mask bit is set. 514e8d8bef9SDimitry Andric // 515e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind 516e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0 517e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else 518e8d8bef9SDimitry Andric // 519e8d8bef9SDimitry Andric // cond.store: 520e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0 521e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 522e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4 523e8d8bef9SDimitry Andric // br label %else 524e8d8bef9SDimitry Andric // 525e8d8bef9SDimitry Andric // else: 526e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1 527e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2 528e8d8bef9SDimitry Andric // 529e8d8bef9SDimitry Andric // cond.store1: 530e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 531e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 532e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4 533e8d8bef9SDimitry Andric // br label %else2 534e8d8bef9SDimitry Andric // . . . 535fe6060f1SDimitry Andric static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, 536fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 537e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 538e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(1); 539e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2); 540e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3); 541e8d8bef9SDimitry Andric 542e8d8bef9SDimitry Andric auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); 543e8d8bef9SDimitry Andric 544e8d8bef9SDimitry Andric assert( 545e8d8bef9SDimitry Andric isa<VectorType>(Ptrs->getType()) && 546e8d8bef9SDimitry Andric isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && 547e8d8bef9SDimitry Andric "Vector of pointers is expected in masked scatter intrinsic"); 548e8d8bef9SDimitry Andric 549e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 550e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 551e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 552e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 553e8d8bef9SDimitry Andric 554e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); 555e8d8bef9SDimitry Andric unsigned VectorWidth = SrcFVTy->getNumElements(); 556e8d8bef9SDimitry Andric 557e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 558e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 559e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 560e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 561e8d8bef9SDimitry Andric continue; 562e8d8bef9SDimitry Andric Value *OneElt = 563e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 564e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 565e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 566e8d8bef9SDimitry Andric } 567e8d8bef9SDimitry Andric CI->eraseFromParent(); 568e8d8bef9SDimitry Andric return; 569e8d8bef9SDimitry Andric } 570e8d8bef9SDimitry Andric 571e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 572e8d8bef9SDimitry Andric // better results on X86 at least. 573e8d8bef9SDimitry Andric Value *SclrMask; 574e8d8bef9SDimitry Andric if (VectorWidth != 1) { 575e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 576e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 577e8d8bef9SDimitry Andric } 578e8d8bef9SDimitry Andric 579e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 580e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 581e8d8bef9SDimitry Andric // 582e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx 583e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0 584e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store, label %else 585e8d8bef9SDimitry Andric // 586e8d8bef9SDimitry Andric Value *Predicate; 587e8d8bef9SDimitry Andric if (VectorWidth != 1) { 588fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 589fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 590e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 591e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 592e8d8bef9SDimitry Andric } else { 593e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 594e8d8bef9SDimitry Andric } 595e8d8bef9SDimitry Andric 596e8d8bef9SDimitry Andric // Create "cond" block 597e8d8bef9SDimitry Andric // 598e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1 599e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 600e8d8bef9SDimitry Andric // %store i32 %Elt1, i32* %Ptr1 601e8d8bef9SDimitry Andric // 602fe6060f1SDimitry Andric Instruction *ThenTerm = 603fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 604fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 605e8d8bef9SDimitry Andric 606fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 607fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 608fe6060f1SDimitry Andric 609fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 610e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 611e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); 612e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 613e8d8bef9SDimitry Andric 614e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 615fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 616fe6060f1SDimitry Andric NewIfBlock->setName("else"); 617fe6060f1SDimitry Andric 618fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 619e8d8bef9SDimitry Andric } 620e8d8bef9SDimitry Andric CI->eraseFromParent(); 621e8d8bef9SDimitry Andric 622e8d8bef9SDimitry Andric ModifiedDT = true; 623e8d8bef9SDimitry Andric } 624e8d8bef9SDimitry Andric 625fe6060f1SDimitry Andric static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, 626fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) { 627e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0); 628e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(1); 629e8d8bef9SDimitry Andric Value *PassThru = CI->getArgOperand(2); 630e8d8bef9SDimitry Andric 631e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType()); 632e8d8bef9SDimitry Andric 633e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 634e8d8bef9SDimitry Andric 635e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 636e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 637e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 638e8d8bef9SDimitry Andric 639e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 640e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 641e8d8bef9SDimitry Andric 642e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 643e8d8bef9SDimitry Andric 644e8d8bef9SDimitry Andric // The result vector 645e8d8bef9SDimitry Andric Value *VResult = PassThru; 646e8d8bef9SDimitry Andric 647e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 648*06c3fb27SDimitry Andric // Create a build_vector pattern, with loads/poisons as necessary and then 649e8d8bef9SDimitry Andric // shuffle blend with the pass through value. 650e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 651e8d8bef9SDimitry Andric unsigned MemIndex = 0; 652bdd1243dSDimitry Andric VResult = PoisonValue::get(VecType); 653*06c3fb27SDimitry Andric SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem); 654e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 655e8d8bef9SDimitry Andric Value *InsertElt; 656e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) { 657*06c3fb27SDimitry Andric InsertElt = PoisonValue::get(EltTy); 658e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx + VectorWidth; 659e8d8bef9SDimitry Andric } else { 660e8d8bef9SDimitry Andric Value *NewPtr = 661e8d8bef9SDimitry Andric Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 662e8d8bef9SDimitry Andric InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1), 663e8d8bef9SDimitry Andric "Load" + Twine(Idx)); 664e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx; 665e8d8bef9SDimitry Andric ++MemIndex; 666e8d8bef9SDimitry Andric } 667e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx, 668e8d8bef9SDimitry Andric "Res" + Twine(Idx)); 669e8d8bef9SDimitry Andric } 670e8d8bef9SDimitry Andric VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask); 671e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 672e8d8bef9SDimitry Andric CI->eraseFromParent(); 673e8d8bef9SDimitry Andric return; 674e8d8bef9SDimitry Andric } 675e8d8bef9SDimitry Andric 676e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 677e8d8bef9SDimitry Andric // better results on X86 at least. 678e8d8bef9SDimitry Andric Value *SclrMask; 679e8d8bef9SDimitry Andric if (VectorWidth != 1) { 680e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 681e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 682e8d8bef9SDimitry Andric } 683e8d8bef9SDimitry Andric 684e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 685e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 686e8d8bef9SDimitry Andric // 687e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 688e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 689e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else 690e8d8bef9SDimitry Andric // 691e8d8bef9SDimitry Andric 692e8d8bef9SDimitry Andric Value *Predicate; 693e8d8bef9SDimitry Andric if (VectorWidth != 1) { 694fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 695fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 696e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 697e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 698e8d8bef9SDimitry Andric } else { 699e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 700e8d8bef9SDimitry Andric } 701e8d8bef9SDimitry Andric 702e8d8bef9SDimitry Andric // Create "cond" block 703e8d8bef9SDimitry Andric // 704e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 705e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr 706e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 707e8d8bef9SDimitry Andric // 708fe6060f1SDimitry Andric Instruction *ThenTerm = 709fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 710fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 711e8d8bef9SDimitry Andric 712fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 713fe6060f1SDimitry Andric CondBlock->setName("cond.load"); 714fe6060f1SDimitry Andric 715fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 716e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1)); 717e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); 718e8d8bef9SDimitry Andric 719e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 720e8d8bef9SDimitry Andric Value *NewPtr; 721e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 722e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 723e8d8bef9SDimitry Andric 724e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 725fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 726fe6060f1SDimitry Andric NewIfBlock->setName("else"); 727e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 728e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 729e8d8bef9SDimitry Andric 730e8d8bef9SDimitry Andric // Create the phi to join the new and previous value. 731fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 732e8d8bef9SDimitry Andric PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 733e8d8bef9SDimitry Andric ResultPhi->addIncoming(NewVResult, CondBlock); 734e8d8bef9SDimitry Andric ResultPhi->addIncoming(VResult, PrevIfBlock); 735e8d8bef9SDimitry Andric VResult = ResultPhi; 736e8d8bef9SDimitry Andric 737e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 738e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 739e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 740e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 741e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 742e8d8bef9SDimitry Andric Ptr = PtrPhi; 743e8d8bef9SDimitry Andric } 744e8d8bef9SDimitry Andric } 745e8d8bef9SDimitry Andric 746e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult); 747e8d8bef9SDimitry Andric CI->eraseFromParent(); 748e8d8bef9SDimitry Andric 749e8d8bef9SDimitry Andric ModifiedDT = true; 750e8d8bef9SDimitry Andric } 751e8d8bef9SDimitry Andric 752fe6060f1SDimitry Andric static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, 753fe6060f1SDimitry Andric DomTreeUpdater *DTU, 754fe6060f1SDimitry Andric bool &ModifiedDT) { 755e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0); 756e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1); 757e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2); 758e8d8bef9SDimitry Andric 759e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(Src->getType()); 760e8d8bef9SDimitry Andric 761e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext()); 762e8d8bef9SDimitry Andric Instruction *InsertPt = CI; 763e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent(); 764e8d8bef9SDimitry Andric 765e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt); 766e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 767e8d8bef9SDimitry Andric 768e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType(); 769e8d8bef9SDimitry Andric 770e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements(); 771e8d8bef9SDimitry Andric 772e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants. 773e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) { 774e8d8bef9SDimitry Andric unsigned MemIndex = 0; 775e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 776e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) 777e8d8bef9SDimitry Andric continue; 778e8d8bef9SDimitry Andric Value *OneElt = 779e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); 780e8d8bef9SDimitry Andric Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); 781e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, NewPtr, Align(1)); 782e8d8bef9SDimitry Andric ++MemIndex; 783e8d8bef9SDimitry Andric } 784e8d8bef9SDimitry Andric CI->eraseFromParent(); 785e8d8bef9SDimitry Andric return; 786e8d8bef9SDimitry Andric } 787e8d8bef9SDimitry Andric 788e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates 789e8d8bef9SDimitry Andric // better results on X86 at least. 790e8d8bef9SDimitry Andric Value *SclrMask; 791e8d8bef9SDimitry Andric if (VectorWidth != 1) { 792e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); 793e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); 794e8d8bef9SDimitry Andric } 795e8d8bef9SDimitry Andric 796e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 797e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration 798e8d8bef9SDimitry Andric // 799e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 800e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else 801e8d8bef9SDimitry Andric // 802e8d8bef9SDimitry Andric Value *Predicate; 803e8d8bef9SDimitry Andric if (VectorWidth != 1) { 804fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet( 805fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); 806e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), 807e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0)); 808e8d8bef9SDimitry Andric } else { 809e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); 810e8d8bef9SDimitry Andric } 811e8d8bef9SDimitry Andric 812e8d8bef9SDimitry Andric // Create "cond" block 813e8d8bef9SDimitry Andric // 814e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx 815e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0 816e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr 817e8d8bef9SDimitry Andric // 818fe6060f1SDimitry Andric Instruction *ThenTerm = 819fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false, 820fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU); 821e8d8bef9SDimitry Andric 822fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent(); 823fe6060f1SDimitry Andric CondBlock->setName("cond.store"); 824fe6060f1SDimitry Andric 825fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator()); 826e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx); 827e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, Align(1)); 828e8d8bef9SDimitry Andric 829e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come. 830e8d8bef9SDimitry Andric Value *NewPtr; 831e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) 832e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); 833e8d8bef9SDimitry Andric 834e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration 835fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0); 836fe6060f1SDimitry Andric NewIfBlock->setName("else"); 837e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock; 838e8d8bef9SDimitry Andric IfBlock = NewIfBlock; 839e8d8bef9SDimitry Andric 840fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin()); 841fe6060f1SDimitry Andric 842e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration. 843e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) { 844e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); 845e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock); 846e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock); 847e8d8bef9SDimitry Andric Ptr = PtrPhi; 848e8d8bef9SDimitry Andric } 849e8d8bef9SDimitry Andric } 850e8d8bef9SDimitry Andric CI->eraseFromParent(); 851e8d8bef9SDimitry Andric 852e8d8bef9SDimitry Andric ModifiedDT = true; 853e8d8bef9SDimitry Andric } 854e8d8bef9SDimitry Andric 855fe6060f1SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI, 856fe6060f1SDimitry Andric DominatorTree *DT) { 857bdd1243dSDimitry Andric std::optional<DomTreeUpdater> DTU; 858fe6060f1SDimitry Andric if (DT) 859fe6060f1SDimitry Andric DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); 860fe6060f1SDimitry Andric 861e8d8bef9SDimitry Andric bool EverMadeChange = false; 862e8d8bef9SDimitry Andric bool MadeChange = true; 863e8d8bef9SDimitry Andric auto &DL = F.getParent()->getDataLayout(); 864e8d8bef9SDimitry Andric while (MadeChange) { 865e8d8bef9SDimitry Andric MadeChange = false; 866349cc55cSDimitry Andric for (BasicBlock &BB : llvm::make_early_inc_range(F)) { 867e8d8bef9SDimitry Andric bool ModifiedDTOnIteration = false; 868349cc55cSDimitry Andric MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL, 869bdd1243dSDimitry Andric DTU ? &*DTU : nullptr); 870fe6060f1SDimitry Andric 871e8d8bef9SDimitry Andric // Restart BB iteration if the dominator tree of the Function was changed 872e8d8bef9SDimitry Andric if (ModifiedDTOnIteration) 873e8d8bef9SDimitry Andric break; 874e8d8bef9SDimitry Andric } 875e8d8bef9SDimitry Andric 876e8d8bef9SDimitry Andric EverMadeChange |= MadeChange; 877e8d8bef9SDimitry Andric } 878e8d8bef9SDimitry Andric return EverMadeChange; 879e8d8bef9SDimitry Andric } 880e8d8bef9SDimitry Andric 881e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) { 882e8d8bef9SDimitry Andric auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 883fe6060f1SDimitry Andric DominatorTree *DT = nullptr; 884fe6060f1SDimitry Andric if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 885fe6060f1SDimitry Andric DT = &DTWP->getDomTree(); 886fe6060f1SDimitry Andric return runImpl(F, TTI, DT); 887e8d8bef9SDimitry Andric } 888e8d8bef9SDimitry Andric 889e8d8bef9SDimitry Andric PreservedAnalyses 890e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) { 891e8d8bef9SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F); 892fe6060f1SDimitry Andric auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); 893fe6060f1SDimitry Andric if (!runImpl(F, TTI, DT)) 894e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 895e8d8bef9SDimitry Andric PreservedAnalyses PA; 896e8d8bef9SDimitry Andric PA.preserve<TargetIRAnalysis>(); 897fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 898e8d8bef9SDimitry Andric return PA; 899e8d8bef9SDimitry Andric } 900e8d8bef9SDimitry Andric 901e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, 902fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL, 903fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 904e8d8bef9SDimitry Andric bool MadeChange = false; 905e8d8bef9SDimitry Andric 906e8d8bef9SDimitry Andric BasicBlock::iterator CurInstIterator = BB.begin(); 907e8d8bef9SDimitry Andric while (CurInstIterator != BB.end()) { 908e8d8bef9SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) 909fe6060f1SDimitry Andric MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU); 910e8d8bef9SDimitry Andric if (ModifiedDT) 911e8d8bef9SDimitry Andric return true; 912e8d8bef9SDimitry Andric } 913e8d8bef9SDimitry Andric 914e8d8bef9SDimitry Andric return MadeChange; 915e8d8bef9SDimitry Andric } 916e8d8bef9SDimitry Andric 917e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, 918e8d8bef9SDimitry Andric const TargetTransformInfo &TTI, 919fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU) { 920e8d8bef9SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 921e8d8bef9SDimitry Andric if (II) { 922e8d8bef9SDimitry Andric // The scalarization code below does not work for scalable vectors. 923e8d8bef9SDimitry Andric if (isa<ScalableVectorType>(II->getType()) || 924349cc55cSDimitry Andric any_of(II->args(), 925e8d8bef9SDimitry Andric [](Value *V) { return isa<ScalableVectorType>(V->getType()); })) 926e8d8bef9SDimitry Andric return false; 927e8d8bef9SDimitry Andric 928e8d8bef9SDimitry Andric switch (II->getIntrinsicID()) { 929e8d8bef9SDimitry Andric default: 930e8d8bef9SDimitry Andric break; 931e8d8bef9SDimitry Andric case Intrinsic::masked_load: 932e8d8bef9SDimitry Andric // Scalarize unsupported vector masked load 933e8d8bef9SDimitry Andric if (TTI.isLegalMaskedLoad( 934e8d8bef9SDimitry Andric CI->getType(), 935e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) 936e8d8bef9SDimitry Andric return false; 937fe6060f1SDimitry Andric scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT); 938e8d8bef9SDimitry Andric return true; 939e8d8bef9SDimitry Andric case Intrinsic::masked_store: 940e8d8bef9SDimitry Andric if (TTI.isLegalMaskedStore( 941e8d8bef9SDimitry Andric CI->getArgOperand(0)->getType(), 942e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) 943e8d8bef9SDimitry Andric return false; 944fe6060f1SDimitry Andric scalarizeMaskedStore(DL, CI, DTU, ModifiedDT); 945e8d8bef9SDimitry Andric return true; 946e8d8bef9SDimitry Andric case Intrinsic::masked_gather: { 947fe6060f1SDimitry Andric MaybeAlign MA = 948fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue(); 949e8d8bef9SDimitry Andric Type *LoadTy = CI->getType(); 950fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA, 951fe6060f1SDimitry Andric LoadTy->getScalarType()); 95204eeddc0SDimitry Andric if (TTI.isLegalMaskedGather(LoadTy, Alignment) && 95304eeddc0SDimitry Andric !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment)) 954e8d8bef9SDimitry Andric return false; 955fe6060f1SDimitry Andric scalarizeMaskedGather(DL, CI, DTU, ModifiedDT); 956e8d8bef9SDimitry Andric return true; 957e8d8bef9SDimitry Andric } 958e8d8bef9SDimitry Andric case Intrinsic::masked_scatter: { 959fe6060f1SDimitry Andric MaybeAlign MA = 960fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue(); 961e8d8bef9SDimitry Andric Type *StoreTy = CI->getArgOperand(0)->getType(); 962fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA, 963fe6060f1SDimitry Andric StoreTy->getScalarType()); 96404eeddc0SDimitry Andric if (TTI.isLegalMaskedScatter(StoreTy, Alignment) && 96504eeddc0SDimitry Andric !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy), 96604eeddc0SDimitry Andric Alignment)) 967e8d8bef9SDimitry Andric return false; 968fe6060f1SDimitry Andric scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT); 969e8d8bef9SDimitry Andric return true; 970e8d8bef9SDimitry Andric } 971e8d8bef9SDimitry Andric case Intrinsic::masked_expandload: 972e8d8bef9SDimitry Andric if (TTI.isLegalMaskedExpandLoad(CI->getType())) 973e8d8bef9SDimitry Andric return false; 974fe6060f1SDimitry Andric scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT); 975e8d8bef9SDimitry Andric return true; 976e8d8bef9SDimitry Andric case Intrinsic::masked_compressstore: 977e8d8bef9SDimitry Andric if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) 978e8d8bef9SDimitry Andric return false; 979fe6060f1SDimitry Andric scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT); 980e8d8bef9SDimitry Andric return true; 981e8d8bef9SDimitry Andric } 982e8d8bef9SDimitry Andric } 983e8d8bef9SDimitry Andric 984e8d8bef9SDimitry Andric return false; 985e8d8bef9SDimitry Andric } 986