xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1*e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2*e8d8bef9SDimitry Andric //                                    instrinsics
3*e8d8bef9SDimitry Andric //
4*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
6*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7*e8d8bef9SDimitry Andric //
8*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
9*e8d8bef9SDimitry Andric //
10*e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target
11*e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the
12*e8d8bef9SDimitry Andric // appropriate mask bit is set.
13*e8d8bef9SDimitry Andric //
14*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15*e8d8bef9SDimitry Andric 
16*e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
17*e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h"
18*e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
19*e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h"
20*e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h"
21*e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h"
22*e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h"
23*e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
24*e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h"
25*e8d8bef9SDimitry Andric #include "llvm/IR/InstrTypes.h"
26*e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h"
27*e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h"
28*e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
29*e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h"
30*e8d8bef9SDimitry Andric #include "llvm/IR/Type.h"
31*e8d8bef9SDimitry Andric #include "llvm/IR/Value.h"
32*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
33*e8d8bef9SDimitry Andric #include "llvm/Pass.h"
34*e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h"
35*e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h"
36*e8d8bef9SDimitry Andric #include <algorithm>
37*e8d8bef9SDimitry Andric #include <cassert>
38*e8d8bef9SDimitry Andric 
39*e8d8bef9SDimitry Andric using namespace llvm;
40*e8d8bef9SDimitry Andric 
41*e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin"
42*e8d8bef9SDimitry Andric 
43*e8d8bef9SDimitry Andric namespace {
44*e8d8bef9SDimitry Andric 
45*e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
46*e8d8bef9SDimitry Andric public:
47*e8d8bef9SDimitry Andric   static char ID; // Pass identification, replacement for typeid
48*e8d8bef9SDimitry Andric 
49*e8d8bef9SDimitry Andric   explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
50*e8d8bef9SDimitry Andric     initializeScalarizeMaskedMemIntrinLegacyPassPass(
51*e8d8bef9SDimitry Andric         *PassRegistry::getPassRegistry());
52*e8d8bef9SDimitry Andric   }
53*e8d8bef9SDimitry Andric 
54*e8d8bef9SDimitry Andric   bool runOnFunction(Function &F) override;
55*e8d8bef9SDimitry Andric 
56*e8d8bef9SDimitry Andric   StringRef getPassName() const override {
57*e8d8bef9SDimitry Andric     return "Scalarize Masked Memory Intrinsics";
58*e8d8bef9SDimitry Andric   }
59*e8d8bef9SDimitry Andric 
60*e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
61*e8d8bef9SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
62*e8d8bef9SDimitry Andric   }
63*e8d8bef9SDimitry Andric };
64*e8d8bef9SDimitry Andric 
65*e8d8bef9SDimitry Andric } // end anonymous namespace
66*e8d8bef9SDimitry Andric 
67*e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
68*e8d8bef9SDimitry Andric                           const TargetTransformInfo &TTI, const DataLayout &DL);
69*e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
70*e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
71*e8d8bef9SDimitry Andric                              const DataLayout &DL);
72*e8d8bef9SDimitry Andric 
73*e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
74*e8d8bef9SDimitry Andric 
75*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
76*e8d8bef9SDimitry Andric                       "Scalarize unsupported masked memory intrinsics", false,
77*e8d8bef9SDimitry Andric                       false)
78*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
79*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
80*e8d8bef9SDimitry Andric                     "Scalarize unsupported masked memory intrinsics", false,
81*e8d8bef9SDimitry Andric                     false)
82*e8d8bef9SDimitry Andric 
83*e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
84*e8d8bef9SDimitry Andric   return new ScalarizeMaskedMemIntrinLegacyPass();
85*e8d8bef9SDimitry Andric }
86*e8d8bef9SDimitry Andric 
87*e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) {
88*e8d8bef9SDimitry Andric   Constant *C = dyn_cast<Constant>(Mask);
89*e8d8bef9SDimitry Andric   if (!C)
90*e8d8bef9SDimitry Andric     return false;
91*e8d8bef9SDimitry Andric 
92*e8d8bef9SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
93*e8d8bef9SDimitry Andric   for (unsigned i = 0; i != NumElts; ++i) {
94*e8d8bef9SDimitry Andric     Constant *CElt = C->getAggregateElement(i);
95*e8d8bef9SDimitry Andric     if (!CElt || !isa<ConstantInt>(CElt))
96*e8d8bef9SDimitry Andric       return false;
97*e8d8bef9SDimitry Andric   }
98*e8d8bef9SDimitry Andric 
99*e8d8bef9SDimitry Andric   return true;
100*e8d8bef9SDimitry Andric }
101*e8d8bef9SDimitry Andric 
102*e8d8bef9SDimitry Andric // Translate a masked load intrinsic like
103*e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
104*e8d8bef9SDimitry Andric //                               <16 x i1> %mask, <16 x i32> %passthru)
105*e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
106*e8d8bef9SDimitry Andric // the appropriate mask bit is set
107*e8d8bef9SDimitry Andric //
108*e8d8bef9SDimitry Andric //  %1 = bitcast i8* %addr to i32*
109*e8d8bef9SDimitry Andric //  %2 = extractelement <16 x i1> %mask, i32 0
110*e8d8bef9SDimitry Andric //  br i1 %2, label %cond.load, label %else
111*e8d8bef9SDimitry Andric //
112*e8d8bef9SDimitry Andric // cond.load:                                        ; preds = %0
113*e8d8bef9SDimitry Andric //  %3 = getelementptr i32* %1, i32 0
114*e8d8bef9SDimitry Andric //  %4 = load i32* %3
115*e8d8bef9SDimitry Andric //  %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
116*e8d8bef9SDimitry Andric //  br label %else
117*e8d8bef9SDimitry Andric //
118*e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.load
119*e8d8bef9SDimitry Andric //  %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
120*e8d8bef9SDimitry Andric //  %6 = extractelement <16 x i1> %mask, i32 1
121*e8d8bef9SDimitry Andric //  br i1 %6, label %cond.load1, label %else2
122*e8d8bef9SDimitry Andric //
123*e8d8bef9SDimitry Andric // cond.load1:                                       ; preds = %else
124*e8d8bef9SDimitry Andric //  %7 = getelementptr i32* %1, i32 1
125*e8d8bef9SDimitry Andric //  %8 = load i32* %7
126*e8d8bef9SDimitry Andric //  %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
127*e8d8bef9SDimitry Andric //  br label %else2
128*e8d8bef9SDimitry Andric //
129*e8d8bef9SDimitry Andric // else2:                                          ; preds = %else, %cond.load1
130*e8d8bef9SDimitry Andric //  %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
131*e8d8bef9SDimitry Andric //  %10 = extractelement <16 x i1> %mask, i32 2
132*e8d8bef9SDimitry Andric //  br i1 %10, label %cond.load4, label %else5
133*e8d8bef9SDimitry Andric //
134*e8d8bef9SDimitry Andric static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
135*e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
136*e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
137*e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
138*e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
139*e8d8bef9SDimitry Andric 
140*e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
141*e8d8bef9SDimitry Andric   VectorType *VecType = cast<FixedVectorType>(CI->getType());
142*e8d8bef9SDimitry Andric 
143*e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
144*e8d8bef9SDimitry Andric 
145*e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
146*e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
147*e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
148*e8d8bef9SDimitry Andric 
149*e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
150*e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
151*e8d8bef9SDimitry Andric 
152*e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
153*e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
154*e8d8bef9SDimitry Andric     Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
155*e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(NewI);
156*e8d8bef9SDimitry Andric     CI->eraseFromParent();
157*e8d8bef9SDimitry Andric     return;
158*e8d8bef9SDimitry Andric   }
159*e8d8bef9SDimitry Andric 
160*e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
161*e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
162*e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
163*e8d8bef9SDimitry Andric   // Bitcast %addr from i8* to EltTy*
164*e8d8bef9SDimitry Andric   Type *NewPtrType =
165*e8d8bef9SDimitry Andric       EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
166*e8d8bef9SDimitry Andric   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
167*e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
168*e8d8bef9SDimitry Andric 
169*e8d8bef9SDimitry Andric   // The result vector
170*e8d8bef9SDimitry Andric   Value *VResult = Src0;
171*e8d8bef9SDimitry Andric 
172*e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
173*e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
174*e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
175*e8d8bef9SDimitry Andric         continue;
176*e8d8bef9SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
177*e8d8bef9SDimitry Andric       LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
178*e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, Load, Idx);
179*e8d8bef9SDimitry Andric     }
180*e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
181*e8d8bef9SDimitry Andric     CI->eraseFromParent();
182*e8d8bef9SDimitry Andric     return;
183*e8d8bef9SDimitry Andric   }
184*e8d8bef9SDimitry Andric 
185*e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
186*e8d8bef9SDimitry Andric   // better results on X86 at least.
187*e8d8bef9SDimitry Andric   Value *SclrMask;
188*e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
189*e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
190*e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
191*e8d8bef9SDimitry Andric   }
192*e8d8bef9SDimitry Andric 
193*e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
194*e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
195*e8d8bef9SDimitry Andric     //
196*e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
197*e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
198*e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
199*e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
200*e8d8bef9SDimitry Andric     //
201*e8d8bef9SDimitry Andric     Value *Predicate;
202*e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
203*e8d8bef9SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
204*e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
205*e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
206*e8d8bef9SDimitry Andric     } else {
207*e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
208*e8d8bef9SDimitry Andric     }
209*e8d8bef9SDimitry Andric 
210*e8d8bef9SDimitry Andric     // Create "cond" block
211*e8d8bef9SDimitry Andric     //
212*e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
213*e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
214*e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
215*e8d8bef9SDimitry Andric     //
216*e8d8bef9SDimitry Andric     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
217*e8d8bef9SDimitry Andric                                                      "cond.load");
218*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
219*e8d8bef9SDimitry Andric 
220*e8d8bef9SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
221*e8d8bef9SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
222*e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
223*e8d8bef9SDimitry Andric 
224*e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
225*e8d8bef9SDimitry Andric     BasicBlock *NewIfBlock =
226*e8d8bef9SDimitry Andric         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
227*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
228*e8d8bef9SDimitry Andric     Instruction *OldBr = IfBlock->getTerminator();
229*e8d8bef9SDimitry Andric     BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
230*e8d8bef9SDimitry Andric     OldBr->eraseFromParent();
231*e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
232*e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
233*e8d8bef9SDimitry Andric 
234*e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
235*e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
236*e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
237*e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
238*e8d8bef9SDimitry Andric     VResult = Phi;
239*e8d8bef9SDimitry Andric   }
240*e8d8bef9SDimitry Andric 
241*e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
242*e8d8bef9SDimitry Andric   CI->eraseFromParent();
243*e8d8bef9SDimitry Andric 
244*e8d8bef9SDimitry Andric   ModifiedDT = true;
245*e8d8bef9SDimitry Andric }
246*e8d8bef9SDimitry Andric 
247*e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like
248*e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
249*e8d8bef9SDimitry Andric //                               <16 x i1> %mask)
250*e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
251*e8d8bef9SDimitry Andric // the appropriate mask bit is set
252*e8d8bef9SDimitry Andric //
253*e8d8bef9SDimitry Andric //   %1 = bitcast i8* %addr to i32*
254*e8d8bef9SDimitry Andric //   %2 = extractelement <16 x i1> %mask, i32 0
255*e8d8bef9SDimitry Andric //   br i1 %2, label %cond.store, label %else
256*e8d8bef9SDimitry Andric //
257*e8d8bef9SDimitry Andric // cond.store:                                       ; preds = %0
258*e8d8bef9SDimitry Andric //   %3 = extractelement <16 x i32> %val, i32 0
259*e8d8bef9SDimitry Andric //   %4 = getelementptr i32* %1, i32 0
260*e8d8bef9SDimitry Andric //   store i32 %3, i32* %4
261*e8d8bef9SDimitry Andric //   br label %else
262*e8d8bef9SDimitry Andric //
263*e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.store
264*e8d8bef9SDimitry Andric //   %5 = extractelement <16 x i1> %mask, i32 1
265*e8d8bef9SDimitry Andric //   br i1 %5, label %cond.store1, label %else2
266*e8d8bef9SDimitry Andric //
267*e8d8bef9SDimitry Andric // cond.store1:                                      ; preds = %else
268*e8d8bef9SDimitry Andric //   %6 = extractelement <16 x i32> %val, i32 1
269*e8d8bef9SDimitry Andric //   %7 = getelementptr i32* %1, i32 1
270*e8d8bef9SDimitry Andric //   store i32 %6, i32* %7
271*e8d8bef9SDimitry Andric //   br label %else2
272*e8d8bef9SDimitry Andric //   . . .
273*e8d8bef9SDimitry Andric static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
274*e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
275*e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
276*e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
277*e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
278*e8d8bef9SDimitry Andric 
279*e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
280*e8d8bef9SDimitry Andric   auto *VecType = cast<VectorType>(Src->getType());
281*e8d8bef9SDimitry Andric 
282*e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
283*e8d8bef9SDimitry Andric 
284*e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
285*e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
286*e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
287*e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
288*e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
289*e8d8bef9SDimitry Andric 
290*e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
291*e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
292*e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(Src, Ptr, AlignVal);
293*e8d8bef9SDimitry Andric     CI->eraseFromParent();
294*e8d8bef9SDimitry Andric     return;
295*e8d8bef9SDimitry Andric   }
296*e8d8bef9SDimitry Andric 
297*e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
298*e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
299*e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
300*e8d8bef9SDimitry Andric   // Bitcast %addr from i8* to EltTy*
301*e8d8bef9SDimitry Andric   Type *NewPtrType =
302*e8d8bef9SDimitry Andric       EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
303*e8d8bef9SDimitry Andric   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
304*e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
305*e8d8bef9SDimitry Andric 
306*e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
307*e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
308*e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
309*e8d8bef9SDimitry Andric         continue;
310*e8d8bef9SDimitry Andric       Value *OneElt = Builder.CreateExtractElement(Src, Idx);
311*e8d8bef9SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
312*e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
313*e8d8bef9SDimitry Andric     }
314*e8d8bef9SDimitry Andric     CI->eraseFromParent();
315*e8d8bef9SDimitry Andric     return;
316*e8d8bef9SDimitry Andric   }
317*e8d8bef9SDimitry Andric 
318*e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
319*e8d8bef9SDimitry Andric   // better results on X86 at least.
320*e8d8bef9SDimitry Andric   Value *SclrMask;
321*e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
322*e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
323*e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
324*e8d8bef9SDimitry Andric   }
325*e8d8bef9SDimitry Andric 
326*e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
327*e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
328*e8d8bef9SDimitry Andric     //
329*e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
330*e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
331*e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
332*e8d8bef9SDimitry Andric     //
333*e8d8bef9SDimitry Andric     Value *Predicate;
334*e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
335*e8d8bef9SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
336*e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
337*e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
338*e8d8bef9SDimitry Andric     } else {
339*e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
340*e8d8bef9SDimitry Andric     }
341*e8d8bef9SDimitry Andric 
342*e8d8bef9SDimitry Andric     // Create "cond" block
343*e8d8bef9SDimitry Andric     //
344*e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
345*e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
346*e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
347*e8d8bef9SDimitry Andric     //
348*e8d8bef9SDimitry Andric     BasicBlock *CondBlock =
349*e8d8bef9SDimitry Andric         IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
350*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
351*e8d8bef9SDimitry Andric 
352*e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
353*e8d8bef9SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
354*e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
355*e8d8bef9SDimitry Andric 
356*e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
357*e8d8bef9SDimitry Andric     BasicBlock *NewIfBlock =
358*e8d8bef9SDimitry Andric         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
359*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
360*e8d8bef9SDimitry Andric     Instruction *OldBr = IfBlock->getTerminator();
361*e8d8bef9SDimitry Andric     BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
362*e8d8bef9SDimitry Andric     OldBr->eraseFromParent();
363*e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
364*e8d8bef9SDimitry Andric   }
365*e8d8bef9SDimitry Andric   CI->eraseFromParent();
366*e8d8bef9SDimitry Andric 
367*e8d8bef9SDimitry Andric   ModifiedDT = true;
368*e8d8bef9SDimitry Andric }
369*e8d8bef9SDimitry Andric 
370*e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like
371*e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
372*e8d8bef9SDimitry Andric //                               <16 x i1> %Mask, <16 x i32> %Src)
373*e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
374*e8d8bef9SDimitry Andric // the appropriate mask bit is set
375*e8d8bef9SDimitry Andric //
376*e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
377*e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
378*e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else
379*e8d8bef9SDimitry Andric //
380*e8d8bef9SDimitry Andric // cond.load:
381*e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
382*e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4
383*e8d8bef9SDimitry Andric // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
384*e8d8bef9SDimitry Andric // br label %else
385*e8d8bef9SDimitry Andric //
386*e8d8bef9SDimitry Andric // else:
387*e8d8bef9SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
388*e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
389*e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2
390*e8d8bef9SDimitry Andric //
391*e8d8bef9SDimitry Andric // cond.load1:
392*e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
393*e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4
394*e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
395*e8d8bef9SDimitry Andric // br label %else2
396*e8d8bef9SDimitry Andric // . . .
397*e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
398*e8d8bef9SDimitry Andric // ret <16 x i32> %Result
399*e8d8bef9SDimitry Andric static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
400*e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(0);
401*e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
402*e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
403*e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
404*e8d8bef9SDimitry Andric 
405*e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
406*e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
407*e8d8bef9SDimitry Andric 
408*e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
409*e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
410*e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
411*e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
412*e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
413*e8d8bef9SDimitry Andric 
414*e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
415*e8d8bef9SDimitry Andric 
416*e8d8bef9SDimitry Andric   // The result vector
417*e8d8bef9SDimitry Andric   Value *VResult = Src0;
418*e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
419*e8d8bef9SDimitry Andric 
420*e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
421*e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
422*e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
423*e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
424*e8d8bef9SDimitry Andric         continue;
425*e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
426*e8d8bef9SDimitry Andric       LoadInst *Load =
427*e8d8bef9SDimitry Andric           Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
428*e8d8bef9SDimitry Andric       VResult =
429*e8d8bef9SDimitry Andric           Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
430*e8d8bef9SDimitry Andric     }
431*e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
432*e8d8bef9SDimitry Andric     CI->eraseFromParent();
433*e8d8bef9SDimitry Andric     return;
434*e8d8bef9SDimitry Andric   }
435*e8d8bef9SDimitry Andric 
436*e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
437*e8d8bef9SDimitry Andric   // better results on X86 at least.
438*e8d8bef9SDimitry Andric   Value *SclrMask;
439*e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
440*e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
441*e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
442*e8d8bef9SDimitry Andric   }
443*e8d8bef9SDimitry Andric 
444*e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
445*e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
446*e8d8bef9SDimitry Andric     //
447*e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
448*e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
449*e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.load, label %else
450*e8d8bef9SDimitry Andric     //
451*e8d8bef9SDimitry Andric 
452*e8d8bef9SDimitry Andric     Value *Predicate;
453*e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
454*e8d8bef9SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
455*e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
456*e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
457*e8d8bef9SDimitry Andric     } else {
458*e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
459*e8d8bef9SDimitry Andric     }
460*e8d8bef9SDimitry Andric 
461*e8d8bef9SDimitry Andric     // Create "cond" block
462*e8d8bef9SDimitry Andric     //
463*e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
464*e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
465*e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
466*e8d8bef9SDimitry Andric     //
467*e8d8bef9SDimitry Andric     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
468*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
469*e8d8bef9SDimitry Andric 
470*e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
471*e8d8bef9SDimitry Andric     LoadInst *Load =
472*e8d8bef9SDimitry Andric         Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
473*e8d8bef9SDimitry Andric     Value *NewVResult =
474*e8d8bef9SDimitry Andric         Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
475*e8d8bef9SDimitry Andric 
476*e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
477*e8d8bef9SDimitry Andric     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
478*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
479*e8d8bef9SDimitry Andric     Instruction *OldBr = IfBlock->getTerminator();
480*e8d8bef9SDimitry Andric     BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
481*e8d8bef9SDimitry Andric     OldBr->eraseFromParent();
482*e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
483*e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
484*e8d8bef9SDimitry Andric 
485*e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
486*e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
487*e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
488*e8d8bef9SDimitry Andric     VResult = Phi;
489*e8d8bef9SDimitry Andric   }
490*e8d8bef9SDimitry Andric 
491*e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
492*e8d8bef9SDimitry Andric   CI->eraseFromParent();
493*e8d8bef9SDimitry Andric 
494*e8d8bef9SDimitry Andric   ModifiedDT = true;
495*e8d8bef9SDimitry Andric }
496*e8d8bef9SDimitry Andric 
497*e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like
498*e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
499*e8d8bef9SDimitry Andric //                                  <16 x i1> %Mask)
500*e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
501*e8d8bef9SDimitry Andric // the appropriate mask bit is set.
502*e8d8bef9SDimitry Andric //
503*e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
504*e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
505*e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else
506*e8d8bef9SDimitry Andric //
507*e8d8bef9SDimitry Andric // cond.store:
508*e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0
509*e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
510*e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4
511*e8d8bef9SDimitry Andric // br label %else
512*e8d8bef9SDimitry Andric //
513*e8d8bef9SDimitry Andric // else:
514*e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
515*e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2
516*e8d8bef9SDimitry Andric //
517*e8d8bef9SDimitry Andric // cond.store1:
518*e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1
519*e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
520*e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4
521*e8d8bef9SDimitry Andric // br label %else2
522*e8d8bef9SDimitry Andric //   . . .
523*e8d8bef9SDimitry Andric static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
524*e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
525*e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(1);
526*e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
527*e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
528*e8d8bef9SDimitry Andric 
529*e8d8bef9SDimitry Andric   auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
530*e8d8bef9SDimitry Andric 
531*e8d8bef9SDimitry Andric   assert(
532*e8d8bef9SDimitry Andric       isa<VectorType>(Ptrs->getType()) &&
533*e8d8bef9SDimitry Andric       isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
534*e8d8bef9SDimitry Andric       "Vector of pointers is expected in masked scatter intrinsic");
535*e8d8bef9SDimitry Andric 
536*e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
537*e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
538*e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
539*e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
540*e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
541*e8d8bef9SDimitry Andric 
542*e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
543*e8d8bef9SDimitry Andric   unsigned VectorWidth = SrcFVTy->getNumElements();
544*e8d8bef9SDimitry Andric 
545*e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
546*e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
547*e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
548*e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
549*e8d8bef9SDimitry Andric         continue;
550*e8d8bef9SDimitry Andric       Value *OneElt =
551*e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
552*e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
553*e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
554*e8d8bef9SDimitry Andric     }
555*e8d8bef9SDimitry Andric     CI->eraseFromParent();
556*e8d8bef9SDimitry Andric     return;
557*e8d8bef9SDimitry Andric   }
558*e8d8bef9SDimitry Andric 
559*e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
560*e8d8bef9SDimitry Andric   // better results on X86 at least.
561*e8d8bef9SDimitry Andric   Value *SclrMask;
562*e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
563*e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
564*e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
565*e8d8bef9SDimitry Andric   }
566*e8d8bef9SDimitry Andric 
567*e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
568*e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
569*e8d8bef9SDimitry Andric     //
570*e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
571*e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
572*e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.store, label %else
573*e8d8bef9SDimitry Andric     //
574*e8d8bef9SDimitry Andric     Value *Predicate;
575*e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
576*e8d8bef9SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
577*e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
578*e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
579*e8d8bef9SDimitry Andric     } else {
580*e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
581*e8d8bef9SDimitry Andric     }
582*e8d8bef9SDimitry Andric 
583*e8d8bef9SDimitry Andric     // Create "cond" block
584*e8d8bef9SDimitry Andric     //
585*e8d8bef9SDimitry Andric     //  %Elt1 = extractelement <16 x i32> %Src, i32 1
586*e8d8bef9SDimitry Andric     //  %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
587*e8d8bef9SDimitry Andric     //  %store i32 %Elt1, i32* %Ptr1
588*e8d8bef9SDimitry Andric     //
589*e8d8bef9SDimitry Andric     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
590*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
591*e8d8bef9SDimitry Andric 
592*e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
593*e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
594*e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
595*e8d8bef9SDimitry Andric 
596*e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
597*e8d8bef9SDimitry Andric     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
598*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
599*e8d8bef9SDimitry Andric     Instruction *OldBr = IfBlock->getTerminator();
600*e8d8bef9SDimitry Andric     BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
601*e8d8bef9SDimitry Andric     OldBr->eraseFromParent();
602*e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
603*e8d8bef9SDimitry Andric   }
604*e8d8bef9SDimitry Andric   CI->eraseFromParent();
605*e8d8bef9SDimitry Andric 
606*e8d8bef9SDimitry Andric   ModifiedDT = true;
607*e8d8bef9SDimitry Andric }
608*e8d8bef9SDimitry Andric 
609*e8d8bef9SDimitry Andric static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
610*e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
611*e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(1);
612*e8d8bef9SDimitry Andric   Value *PassThru = CI->getArgOperand(2);
613*e8d8bef9SDimitry Andric 
614*e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
615*e8d8bef9SDimitry Andric 
616*e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
617*e8d8bef9SDimitry Andric 
618*e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
619*e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
620*e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
621*e8d8bef9SDimitry Andric 
622*e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
623*e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
624*e8d8bef9SDimitry Andric 
625*e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
626*e8d8bef9SDimitry Andric 
627*e8d8bef9SDimitry Andric   // The result vector
628*e8d8bef9SDimitry Andric   Value *VResult = PassThru;
629*e8d8bef9SDimitry Andric 
630*e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
631*e8d8bef9SDimitry Andric   // Create a build_vector pattern, with loads/undefs as necessary and then
632*e8d8bef9SDimitry Andric   // shuffle blend with the pass through value.
633*e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
634*e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
635*e8d8bef9SDimitry Andric     VResult = UndefValue::get(VecType);
636*e8d8bef9SDimitry Andric     SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
637*e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
638*e8d8bef9SDimitry Andric       Value *InsertElt;
639*e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
640*e8d8bef9SDimitry Andric         InsertElt = UndefValue::get(EltTy);
641*e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx + VectorWidth;
642*e8d8bef9SDimitry Andric       } else {
643*e8d8bef9SDimitry Andric         Value *NewPtr =
644*e8d8bef9SDimitry Andric             Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
645*e8d8bef9SDimitry Andric         InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
646*e8d8bef9SDimitry Andric                                               "Load" + Twine(Idx));
647*e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx;
648*e8d8bef9SDimitry Andric         ++MemIndex;
649*e8d8bef9SDimitry Andric       }
650*e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
651*e8d8bef9SDimitry Andric                                             "Res" + Twine(Idx));
652*e8d8bef9SDimitry Andric     }
653*e8d8bef9SDimitry Andric     VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
654*e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
655*e8d8bef9SDimitry Andric     CI->eraseFromParent();
656*e8d8bef9SDimitry Andric     return;
657*e8d8bef9SDimitry Andric   }
658*e8d8bef9SDimitry Andric 
659*e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
660*e8d8bef9SDimitry Andric   // better results on X86 at least.
661*e8d8bef9SDimitry Andric   Value *SclrMask;
662*e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
663*e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
664*e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
665*e8d8bef9SDimitry Andric   }
666*e8d8bef9SDimitry Andric 
667*e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
668*e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
669*e8d8bef9SDimitry Andric     //
670*e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
671*e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
672*e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
673*e8d8bef9SDimitry Andric     //
674*e8d8bef9SDimitry Andric 
675*e8d8bef9SDimitry Andric     Value *Predicate;
676*e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
677*e8d8bef9SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
678*e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
679*e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
680*e8d8bef9SDimitry Andric     } else {
681*e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
682*e8d8bef9SDimitry Andric     }
683*e8d8bef9SDimitry Andric 
684*e8d8bef9SDimitry Andric     // Create "cond" block
685*e8d8bef9SDimitry Andric     //
686*e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
687*e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
688*e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
689*e8d8bef9SDimitry Andric     //
690*e8d8bef9SDimitry Andric     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
691*e8d8bef9SDimitry Andric                                                      "cond.load");
692*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
693*e8d8bef9SDimitry Andric 
694*e8d8bef9SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
695*e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
696*e8d8bef9SDimitry Andric 
697*e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
698*e8d8bef9SDimitry Andric     Value *NewPtr;
699*e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
700*e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
701*e8d8bef9SDimitry Andric 
702*e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
703*e8d8bef9SDimitry Andric     BasicBlock *NewIfBlock =
704*e8d8bef9SDimitry Andric         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
705*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
706*e8d8bef9SDimitry Andric     Instruction *OldBr = IfBlock->getTerminator();
707*e8d8bef9SDimitry Andric     BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
708*e8d8bef9SDimitry Andric     OldBr->eraseFromParent();
709*e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
710*e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
711*e8d8bef9SDimitry Andric 
712*e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
713*e8d8bef9SDimitry Andric     PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
714*e8d8bef9SDimitry Andric     ResultPhi->addIncoming(NewVResult, CondBlock);
715*e8d8bef9SDimitry Andric     ResultPhi->addIncoming(VResult, PrevIfBlock);
716*e8d8bef9SDimitry Andric     VResult = ResultPhi;
717*e8d8bef9SDimitry Andric 
718*e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
719*e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
720*e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
721*e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
722*e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
723*e8d8bef9SDimitry Andric       Ptr = PtrPhi;
724*e8d8bef9SDimitry Andric     }
725*e8d8bef9SDimitry Andric   }
726*e8d8bef9SDimitry Andric 
727*e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
728*e8d8bef9SDimitry Andric   CI->eraseFromParent();
729*e8d8bef9SDimitry Andric 
730*e8d8bef9SDimitry Andric   ModifiedDT = true;
731*e8d8bef9SDimitry Andric }
732*e8d8bef9SDimitry Andric 
733*e8d8bef9SDimitry Andric static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
734*e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
735*e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
736*e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
737*e8d8bef9SDimitry Andric 
738*e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(Src->getType());
739*e8d8bef9SDimitry Andric 
740*e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
741*e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
742*e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
743*e8d8bef9SDimitry Andric 
744*e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
745*e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
746*e8d8bef9SDimitry Andric 
747*e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
748*e8d8bef9SDimitry Andric 
749*e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
750*e8d8bef9SDimitry Andric 
751*e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
752*e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
753*e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
754*e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
755*e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
756*e8d8bef9SDimitry Andric         continue;
757*e8d8bef9SDimitry Andric       Value *OneElt =
758*e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
759*e8d8bef9SDimitry Andric       Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
760*e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
761*e8d8bef9SDimitry Andric       ++MemIndex;
762*e8d8bef9SDimitry Andric     }
763*e8d8bef9SDimitry Andric     CI->eraseFromParent();
764*e8d8bef9SDimitry Andric     return;
765*e8d8bef9SDimitry Andric   }
766*e8d8bef9SDimitry Andric 
767*e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
768*e8d8bef9SDimitry Andric   // better results on X86 at least.
769*e8d8bef9SDimitry Andric   Value *SclrMask;
770*e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
771*e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
772*e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
773*e8d8bef9SDimitry Andric   }
774*e8d8bef9SDimitry Andric 
775*e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
776*e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
777*e8d8bef9SDimitry Andric     //
778*e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
779*e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
780*e8d8bef9SDimitry Andric     //
781*e8d8bef9SDimitry Andric     Value *Predicate;
782*e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
783*e8d8bef9SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
784*e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
785*e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
786*e8d8bef9SDimitry Andric     } else {
787*e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
788*e8d8bef9SDimitry Andric     }
789*e8d8bef9SDimitry Andric 
790*e8d8bef9SDimitry Andric     // Create "cond" block
791*e8d8bef9SDimitry Andric     //
792*e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
793*e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
794*e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
795*e8d8bef9SDimitry Andric     //
796*e8d8bef9SDimitry Andric     BasicBlock *CondBlock =
797*e8d8bef9SDimitry Andric         IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
798*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
799*e8d8bef9SDimitry Andric 
800*e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
801*e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
802*e8d8bef9SDimitry Andric 
803*e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
804*e8d8bef9SDimitry Andric     Value *NewPtr;
805*e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
806*e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
807*e8d8bef9SDimitry Andric 
808*e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
809*e8d8bef9SDimitry Andric     BasicBlock *NewIfBlock =
810*e8d8bef9SDimitry Andric         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
811*e8d8bef9SDimitry Andric     Builder.SetInsertPoint(InsertPt);
812*e8d8bef9SDimitry Andric     Instruction *OldBr = IfBlock->getTerminator();
813*e8d8bef9SDimitry Andric     BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
814*e8d8bef9SDimitry Andric     OldBr->eraseFromParent();
815*e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
816*e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
817*e8d8bef9SDimitry Andric 
818*e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
819*e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
820*e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
821*e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
822*e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
823*e8d8bef9SDimitry Andric       Ptr = PtrPhi;
824*e8d8bef9SDimitry Andric     }
825*e8d8bef9SDimitry Andric   }
826*e8d8bef9SDimitry Andric   CI->eraseFromParent();
827*e8d8bef9SDimitry Andric 
828*e8d8bef9SDimitry Andric   ModifiedDT = true;
829*e8d8bef9SDimitry Andric }
830*e8d8bef9SDimitry Andric 
831*e8d8bef9SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
832*e8d8bef9SDimitry Andric   bool EverMadeChange = false;
833*e8d8bef9SDimitry Andric   bool MadeChange = true;
834*e8d8bef9SDimitry Andric   auto &DL = F.getParent()->getDataLayout();
835*e8d8bef9SDimitry Andric   while (MadeChange) {
836*e8d8bef9SDimitry Andric     MadeChange = false;
837*e8d8bef9SDimitry Andric     for (Function::iterator I = F.begin(); I != F.end();) {
838*e8d8bef9SDimitry Andric       BasicBlock *BB = &*I++;
839*e8d8bef9SDimitry Andric       bool ModifiedDTOnIteration = false;
840*e8d8bef9SDimitry Andric       MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL);
841*e8d8bef9SDimitry Andric 
842*e8d8bef9SDimitry Andric       // Restart BB iteration if the dominator tree of the Function was changed
843*e8d8bef9SDimitry Andric       if (ModifiedDTOnIteration)
844*e8d8bef9SDimitry Andric         break;
845*e8d8bef9SDimitry Andric     }
846*e8d8bef9SDimitry Andric 
847*e8d8bef9SDimitry Andric     EverMadeChange |= MadeChange;
848*e8d8bef9SDimitry Andric   }
849*e8d8bef9SDimitry Andric   return EverMadeChange;
850*e8d8bef9SDimitry Andric }
851*e8d8bef9SDimitry Andric 
852*e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
853*e8d8bef9SDimitry Andric   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
854*e8d8bef9SDimitry Andric   return runImpl(F, TTI);
855*e8d8bef9SDimitry Andric }
856*e8d8bef9SDimitry Andric 
857*e8d8bef9SDimitry Andric PreservedAnalyses
858*e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
859*e8d8bef9SDimitry Andric   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
860*e8d8bef9SDimitry Andric   if (!runImpl(F, TTI))
861*e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
862*e8d8bef9SDimitry Andric   PreservedAnalyses PA;
863*e8d8bef9SDimitry Andric   PA.preserve<TargetIRAnalysis>();
864*e8d8bef9SDimitry Andric   return PA;
865*e8d8bef9SDimitry Andric }
866*e8d8bef9SDimitry Andric 
867*e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
868*e8d8bef9SDimitry Andric                           const TargetTransformInfo &TTI,
869*e8d8bef9SDimitry Andric                           const DataLayout &DL) {
870*e8d8bef9SDimitry Andric   bool MadeChange = false;
871*e8d8bef9SDimitry Andric 
872*e8d8bef9SDimitry Andric   BasicBlock::iterator CurInstIterator = BB.begin();
873*e8d8bef9SDimitry Andric   while (CurInstIterator != BB.end()) {
874*e8d8bef9SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
875*e8d8bef9SDimitry Andric       MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL);
876*e8d8bef9SDimitry Andric     if (ModifiedDT)
877*e8d8bef9SDimitry Andric       return true;
878*e8d8bef9SDimitry Andric   }
879*e8d8bef9SDimitry Andric 
880*e8d8bef9SDimitry Andric   return MadeChange;
881*e8d8bef9SDimitry Andric }
882*e8d8bef9SDimitry Andric 
883*e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
884*e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
885*e8d8bef9SDimitry Andric                              const DataLayout &DL) {
886*e8d8bef9SDimitry Andric   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
887*e8d8bef9SDimitry Andric   if (II) {
888*e8d8bef9SDimitry Andric     // The scalarization code below does not work for scalable vectors.
889*e8d8bef9SDimitry Andric     if (isa<ScalableVectorType>(II->getType()) ||
890*e8d8bef9SDimitry Andric         any_of(II->arg_operands(),
891*e8d8bef9SDimitry Andric                [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
892*e8d8bef9SDimitry Andric       return false;
893*e8d8bef9SDimitry Andric 
894*e8d8bef9SDimitry Andric     switch (II->getIntrinsicID()) {
895*e8d8bef9SDimitry Andric     default:
896*e8d8bef9SDimitry Andric       break;
897*e8d8bef9SDimitry Andric     case Intrinsic::masked_load:
898*e8d8bef9SDimitry Andric       // Scalarize unsupported vector masked load
899*e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedLoad(
900*e8d8bef9SDimitry Andric               CI->getType(),
901*e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
902*e8d8bef9SDimitry Andric         return false;
903*e8d8bef9SDimitry Andric       scalarizeMaskedLoad(CI, ModifiedDT);
904*e8d8bef9SDimitry Andric       return true;
905*e8d8bef9SDimitry Andric     case Intrinsic::masked_store:
906*e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedStore(
907*e8d8bef9SDimitry Andric               CI->getArgOperand(0)->getType(),
908*e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
909*e8d8bef9SDimitry Andric         return false;
910*e8d8bef9SDimitry Andric       scalarizeMaskedStore(CI, ModifiedDT);
911*e8d8bef9SDimitry Andric       return true;
912*e8d8bef9SDimitry Andric     case Intrinsic::masked_gather: {
913*e8d8bef9SDimitry Andric       unsigned AlignmentInt =
914*e8d8bef9SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
915*e8d8bef9SDimitry Andric       Type *LoadTy = CI->getType();
916*e8d8bef9SDimitry Andric       Align Alignment =
917*e8d8bef9SDimitry Andric           DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
918*e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedGather(LoadTy, Alignment))
919*e8d8bef9SDimitry Andric         return false;
920*e8d8bef9SDimitry Andric       scalarizeMaskedGather(CI, ModifiedDT);
921*e8d8bef9SDimitry Andric       return true;
922*e8d8bef9SDimitry Andric     }
923*e8d8bef9SDimitry Andric     case Intrinsic::masked_scatter: {
924*e8d8bef9SDimitry Andric       unsigned AlignmentInt =
925*e8d8bef9SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
926*e8d8bef9SDimitry Andric       Type *StoreTy = CI->getArgOperand(0)->getType();
927*e8d8bef9SDimitry Andric       Align Alignment =
928*e8d8bef9SDimitry Andric           DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
929*e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedScatter(StoreTy, Alignment))
930*e8d8bef9SDimitry Andric         return false;
931*e8d8bef9SDimitry Andric       scalarizeMaskedScatter(CI, ModifiedDT);
932*e8d8bef9SDimitry Andric       return true;
933*e8d8bef9SDimitry Andric     }
934*e8d8bef9SDimitry Andric     case Intrinsic::masked_expandload:
935*e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedExpandLoad(CI->getType()))
936*e8d8bef9SDimitry Andric         return false;
937*e8d8bef9SDimitry Andric       scalarizeMaskedExpandLoad(CI, ModifiedDT);
938*e8d8bef9SDimitry Andric       return true;
939*e8d8bef9SDimitry Andric     case Intrinsic::masked_compressstore:
940*e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
941*e8d8bef9SDimitry Andric         return false;
942*e8d8bef9SDimitry Andric       scalarizeMaskedCompressStore(CI, ModifiedDT);
943*e8d8bef9SDimitry Andric       return true;
944*e8d8bef9SDimitry Andric     }
945*e8d8bef9SDimitry Andric   }
946*e8d8bef9SDimitry Andric 
947*e8d8bef9SDimitry Andric   return false;
948*e8d8bef9SDimitry Andric }
949