xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2*81ad6265SDimitry Andric //                                    intrinsics
3e8d8bef9SDimitry Andric //
4e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
6e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7e8d8bef9SDimitry Andric //
8e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
9e8d8bef9SDimitry Andric //
10e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target
11e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the
12e8d8bef9SDimitry Andric // appropriate mask bit is set.
13e8d8bef9SDimitry Andric //
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric 
16e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
17e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h"
18fe6060f1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
19e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
20e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h"
21e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h"
22e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h"
23e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Dominators.h"
25e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
26e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h"
27e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h"
28e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h"
29e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/Type.h"
31e8d8bef9SDimitry Andric #include "llvm/IR/Value.h"
32e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
33e8d8bef9SDimitry Andric #include "llvm/Pass.h"
34e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h"
35e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h"
36fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37e8d8bef9SDimitry Andric #include <cassert>
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric using namespace llvm;
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin"
42e8d8bef9SDimitry Andric 
43e8d8bef9SDimitry Andric namespace {
44e8d8bef9SDimitry Andric 
45e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
46e8d8bef9SDimitry Andric public:
47e8d8bef9SDimitry Andric   static char ID; // Pass identification, replacement for typeid
48e8d8bef9SDimitry Andric 
49e8d8bef9SDimitry Andric   explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
50e8d8bef9SDimitry Andric     initializeScalarizeMaskedMemIntrinLegacyPassPass(
51e8d8bef9SDimitry Andric         *PassRegistry::getPassRegistry());
52e8d8bef9SDimitry Andric   }
53e8d8bef9SDimitry Andric 
54e8d8bef9SDimitry Andric   bool runOnFunction(Function &F) override;
55e8d8bef9SDimitry Andric 
56e8d8bef9SDimitry Andric   StringRef getPassName() const override {
57e8d8bef9SDimitry Andric     return "Scalarize Masked Memory Intrinsics";
58e8d8bef9SDimitry Andric   }
59e8d8bef9SDimitry Andric 
60e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
61e8d8bef9SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
62fe6060f1SDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
63e8d8bef9SDimitry Andric   }
64e8d8bef9SDimitry Andric };
65e8d8bef9SDimitry Andric 
66e8d8bef9SDimitry Andric } // end anonymous namespace
67e8d8bef9SDimitry Andric 
68e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
69fe6060f1SDimitry Andric                           const TargetTransformInfo &TTI, const DataLayout &DL,
70fe6060f1SDimitry Andric                           DomTreeUpdater *DTU);
71e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
72e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
73fe6060f1SDimitry Andric                              const DataLayout &DL, DomTreeUpdater *DTU);
74e8d8bef9SDimitry Andric 
75e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
76e8d8bef9SDimitry Andric 
77e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
78e8d8bef9SDimitry Andric                       "Scalarize unsupported masked memory intrinsics", false,
79e8d8bef9SDimitry Andric                       false)
80e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
81fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
82e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
83e8d8bef9SDimitry Andric                     "Scalarize unsupported masked memory intrinsics", false,
84e8d8bef9SDimitry Andric                     false)
85e8d8bef9SDimitry Andric 
86e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
87e8d8bef9SDimitry Andric   return new ScalarizeMaskedMemIntrinLegacyPass();
88e8d8bef9SDimitry Andric }
89e8d8bef9SDimitry Andric 
90e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) {
91e8d8bef9SDimitry Andric   Constant *C = dyn_cast<Constant>(Mask);
92e8d8bef9SDimitry Andric   if (!C)
93e8d8bef9SDimitry Andric     return false;
94e8d8bef9SDimitry Andric 
95e8d8bef9SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
96e8d8bef9SDimitry Andric   for (unsigned i = 0; i != NumElts; ++i) {
97e8d8bef9SDimitry Andric     Constant *CElt = C->getAggregateElement(i);
98e8d8bef9SDimitry Andric     if (!CElt || !isa<ConstantInt>(CElt))
99e8d8bef9SDimitry Andric       return false;
100e8d8bef9SDimitry Andric   }
101e8d8bef9SDimitry Andric 
102e8d8bef9SDimitry Andric   return true;
103e8d8bef9SDimitry Andric }
104e8d8bef9SDimitry Andric 
105fe6060f1SDimitry Andric static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
106fe6060f1SDimitry Andric                                 unsigned Idx) {
107fe6060f1SDimitry Andric   return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
108fe6060f1SDimitry Andric }
109fe6060f1SDimitry Andric 
110e8d8bef9SDimitry Andric // Translate a masked load intrinsic like
111e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
112e8d8bef9SDimitry Andric //                               <16 x i1> %mask, <16 x i32> %passthru)
113e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
114e8d8bef9SDimitry Andric // the appropriate mask bit is set
115e8d8bef9SDimitry Andric //
116e8d8bef9SDimitry Andric //  %1 = bitcast i8* %addr to i32*
117e8d8bef9SDimitry Andric //  %2 = extractelement <16 x i1> %mask, i32 0
118e8d8bef9SDimitry Andric //  br i1 %2, label %cond.load, label %else
119e8d8bef9SDimitry Andric //
120e8d8bef9SDimitry Andric // cond.load:                                        ; preds = %0
121e8d8bef9SDimitry Andric //  %3 = getelementptr i32* %1, i32 0
122e8d8bef9SDimitry Andric //  %4 = load i32* %3
123e8d8bef9SDimitry Andric //  %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
124e8d8bef9SDimitry Andric //  br label %else
125e8d8bef9SDimitry Andric //
126e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.load
127e8d8bef9SDimitry Andric //  %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
128e8d8bef9SDimitry Andric //  %6 = extractelement <16 x i1> %mask, i32 1
129e8d8bef9SDimitry Andric //  br i1 %6, label %cond.load1, label %else2
130e8d8bef9SDimitry Andric //
131e8d8bef9SDimitry Andric // cond.load1:                                       ; preds = %else
132e8d8bef9SDimitry Andric //  %7 = getelementptr i32* %1, i32 1
133e8d8bef9SDimitry Andric //  %8 = load i32* %7
134e8d8bef9SDimitry Andric //  %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
135e8d8bef9SDimitry Andric //  br label %else2
136e8d8bef9SDimitry Andric //
137e8d8bef9SDimitry Andric // else2:                                          ; preds = %else, %cond.load1
138e8d8bef9SDimitry Andric //  %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
139e8d8bef9SDimitry Andric //  %10 = extractelement <16 x i1> %mask, i32 2
140e8d8bef9SDimitry Andric //  br i1 %10, label %cond.load4, label %else5
141e8d8bef9SDimitry Andric //
142fe6060f1SDimitry Andric static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
143fe6060f1SDimitry Andric                                 DomTreeUpdater *DTU, bool &ModifiedDT) {
144e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
145e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
146e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
147e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
148e8d8bef9SDimitry Andric 
149e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
150e8d8bef9SDimitry Andric   VectorType *VecType = cast<FixedVectorType>(CI->getType());
151e8d8bef9SDimitry Andric 
152e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
153e8d8bef9SDimitry Andric 
154e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
155e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
156e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
157e8d8bef9SDimitry Andric 
158e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
159e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
160e8d8bef9SDimitry Andric 
161e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
162e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
163e8d8bef9SDimitry Andric     Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
164e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(NewI);
165e8d8bef9SDimitry Andric     CI->eraseFromParent();
166e8d8bef9SDimitry Andric     return;
167e8d8bef9SDimitry Andric   }
168e8d8bef9SDimitry Andric 
169e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
170e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
171e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
172e8d8bef9SDimitry Andric   // Bitcast %addr from i8* to EltTy*
173e8d8bef9SDimitry Andric   Type *NewPtrType =
174e8d8bef9SDimitry Andric       EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
175e8d8bef9SDimitry Andric   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
176e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
177e8d8bef9SDimitry Andric 
178e8d8bef9SDimitry Andric   // The result vector
179e8d8bef9SDimitry Andric   Value *VResult = Src0;
180e8d8bef9SDimitry Andric 
181e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
182e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
183e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
184e8d8bef9SDimitry Andric         continue;
185e8d8bef9SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
186e8d8bef9SDimitry Andric       LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
187e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, Load, Idx);
188e8d8bef9SDimitry Andric     }
189e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
190e8d8bef9SDimitry Andric     CI->eraseFromParent();
191e8d8bef9SDimitry Andric     return;
192e8d8bef9SDimitry Andric   }
193e8d8bef9SDimitry Andric 
194e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
195e8d8bef9SDimitry Andric   // better results on X86 at least.
196e8d8bef9SDimitry Andric   Value *SclrMask;
197e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
198e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
199e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
200e8d8bef9SDimitry Andric   }
201e8d8bef9SDimitry Andric 
202e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
203e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
204e8d8bef9SDimitry Andric     //
205e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
206e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
207e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
208e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
209e8d8bef9SDimitry Andric     //
210e8d8bef9SDimitry Andric     Value *Predicate;
211e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
212fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
213fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
214e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
215e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
216e8d8bef9SDimitry Andric     } else {
217e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
218e8d8bef9SDimitry Andric     }
219e8d8bef9SDimitry Andric 
220e8d8bef9SDimitry Andric     // Create "cond" block
221e8d8bef9SDimitry Andric     //
222e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
223e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
224e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
225e8d8bef9SDimitry Andric     //
226fe6060f1SDimitry Andric     Instruction *ThenTerm =
227fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
228fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
229e8d8bef9SDimitry Andric 
230fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
231fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
232fe6060f1SDimitry Andric 
233fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
234e8d8bef9SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
235e8d8bef9SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
236e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
237e8d8bef9SDimitry Andric 
238e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
239fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
240fe6060f1SDimitry Andric     NewIfBlock->setName("else");
241e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
242e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
243e8d8bef9SDimitry Andric 
244e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
245fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
246e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
247e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
248e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
249e8d8bef9SDimitry Andric     VResult = Phi;
250e8d8bef9SDimitry Andric   }
251e8d8bef9SDimitry Andric 
252e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
253e8d8bef9SDimitry Andric   CI->eraseFromParent();
254e8d8bef9SDimitry Andric 
255e8d8bef9SDimitry Andric   ModifiedDT = true;
256e8d8bef9SDimitry Andric }
257e8d8bef9SDimitry Andric 
258e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like
259e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
260e8d8bef9SDimitry Andric //                               <16 x i1> %mask)
261e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
262e8d8bef9SDimitry Andric // the appropriate mask bit is set
263e8d8bef9SDimitry Andric //
264e8d8bef9SDimitry Andric //   %1 = bitcast i8* %addr to i32*
265e8d8bef9SDimitry Andric //   %2 = extractelement <16 x i1> %mask, i32 0
266e8d8bef9SDimitry Andric //   br i1 %2, label %cond.store, label %else
267e8d8bef9SDimitry Andric //
268e8d8bef9SDimitry Andric // cond.store:                                       ; preds = %0
269e8d8bef9SDimitry Andric //   %3 = extractelement <16 x i32> %val, i32 0
270e8d8bef9SDimitry Andric //   %4 = getelementptr i32* %1, i32 0
271e8d8bef9SDimitry Andric //   store i32 %3, i32* %4
272e8d8bef9SDimitry Andric //   br label %else
273e8d8bef9SDimitry Andric //
274e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.store
275e8d8bef9SDimitry Andric //   %5 = extractelement <16 x i1> %mask, i32 1
276e8d8bef9SDimitry Andric //   br i1 %5, label %cond.store1, label %else2
277e8d8bef9SDimitry Andric //
278e8d8bef9SDimitry Andric // cond.store1:                                      ; preds = %else
279e8d8bef9SDimitry Andric //   %6 = extractelement <16 x i32> %val, i32 1
280e8d8bef9SDimitry Andric //   %7 = getelementptr i32* %1, i32 1
281e8d8bef9SDimitry Andric //   store i32 %6, i32* %7
282e8d8bef9SDimitry Andric //   br label %else2
283e8d8bef9SDimitry Andric //   . . .
284fe6060f1SDimitry Andric static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
285fe6060f1SDimitry Andric                                  DomTreeUpdater *DTU, bool &ModifiedDT) {
286e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
287e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
288e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
289e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
290e8d8bef9SDimitry Andric 
291e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
292e8d8bef9SDimitry Andric   auto *VecType = cast<VectorType>(Src->getType());
293e8d8bef9SDimitry Andric 
294e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
295e8d8bef9SDimitry Andric 
296e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
297e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
298e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
299e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
300e8d8bef9SDimitry Andric 
301e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
302e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
303e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(Src, Ptr, AlignVal);
304e8d8bef9SDimitry Andric     CI->eraseFromParent();
305e8d8bef9SDimitry Andric     return;
306e8d8bef9SDimitry Andric   }
307e8d8bef9SDimitry Andric 
308e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
309e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
310e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
311e8d8bef9SDimitry Andric   // Bitcast %addr from i8* to EltTy*
312e8d8bef9SDimitry Andric   Type *NewPtrType =
313e8d8bef9SDimitry Andric       EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
314e8d8bef9SDimitry Andric   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
315e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
316e8d8bef9SDimitry Andric 
317e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
318e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
319e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
320e8d8bef9SDimitry Andric         continue;
321e8d8bef9SDimitry Andric       Value *OneElt = Builder.CreateExtractElement(Src, Idx);
322e8d8bef9SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
323e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
324e8d8bef9SDimitry Andric     }
325e8d8bef9SDimitry Andric     CI->eraseFromParent();
326e8d8bef9SDimitry Andric     return;
327e8d8bef9SDimitry Andric   }
328e8d8bef9SDimitry Andric 
329e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
330e8d8bef9SDimitry Andric   // better results on X86 at least.
331e8d8bef9SDimitry Andric   Value *SclrMask;
332e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
333e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
334e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
335e8d8bef9SDimitry Andric   }
336e8d8bef9SDimitry Andric 
337e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
338e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
339e8d8bef9SDimitry Andric     //
340e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
341e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
342e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
343e8d8bef9SDimitry Andric     //
344e8d8bef9SDimitry Andric     Value *Predicate;
345e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
346fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
347fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
348e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
349e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
350e8d8bef9SDimitry Andric     } else {
351e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
352e8d8bef9SDimitry Andric     }
353e8d8bef9SDimitry Andric 
354e8d8bef9SDimitry Andric     // Create "cond" block
355e8d8bef9SDimitry Andric     //
356e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
357e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
358e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
359e8d8bef9SDimitry Andric     //
360fe6060f1SDimitry Andric     Instruction *ThenTerm =
361fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
362fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
363e8d8bef9SDimitry Andric 
364fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
365fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
366fe6060f1SDimitry Andric 
367fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
368e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
369e8d8bef9SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
370e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
371e8d8bef9SDimitry Andric 
372e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
373fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
374fe6060f1SDimitry Andric     NewIfBlock->setName("else");
375fe6060f1SDimitry Andric 
376fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
377e8d8bef9SDimitry Andric   }
378e8d8bef9SDimitry Andric   CI->eraseFromParent();
379e8d8bef9SDimitry Andric 
380e8d8bef9SDimitry Andric   ModifiedDT = true;
381e8d8bef9SDimitry Andric }
382e8d8bef9SDimitry Andric 
383e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like
384e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
385e8d8bef9SDimitry Andric //                               <16 x i1> %Mask, <16 x i32> %Src)
386e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
387e8d8bef9SDimitry Andric // the appropriate mask bit is set
388e8d8bef9SDimitry Andric //
389e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
390e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
391e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else
392e8d8bef9SDimitry Andric //
393e8d8bef9SDimitry Andric // cond.load:
394e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
395e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4
396e8d8bef9SDimitry Andric // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
397e8d8bef9SDimitry Andric // br label %else
398e8d8bef9SDimitry Andric //
399e8d8bef9SDimitry Andric // else:
400e8d8bef9SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
401e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
402e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2
403e8d8bef9SDimitry Andric //
404e8d8bef9SDimitry Andric // cond.load1:
405e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
406e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4
407e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
408e8d8bef9SDimitry Andric // br label %else2
409e8d8bef9SDimitry Andric // . . .
410e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
411e8d8bef9SDimitry Andric // ret <16 x i32> %Result
412fe6060f1SDimitry Andric static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
413fe6060f1SDimitry Andric                                   DomTreeUpdater *DTU, bool &ModifiedDT) {
414e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(0);
415e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
416e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
417e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
418e8d8bef9SDimitry Andric 
419e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
420e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
421e8d8bef9SDimitry Andric 
422e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
423e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
424e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
425e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
426e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
427e8d8bef9SDimitry Andric 
428e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
429e8d8bef9SDimitry Andric 
430e8d8bef9SDimitry Andric   // The result vector
431e8d8bef9SDimitry Andric   Value *VResult = Src0;
432e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
433e8d8bef9SDimitry Andric 
434e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
435e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
436e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
437e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
438e8d8bef9SDimitry Andric         continue;
439e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
440e8d8bef9SDimitry Andric       LoadInst *Load =
441e8d8bef9SDimitry Andric           Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
442e8d8bef9SDimitry Andric       VResult =
443e8d8bef9SDimitry Andric           Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
444e8d8bef9SDimitry Andric     }
445e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
446e8d8bef9SDimitry Andric     CI->eraseFromParent();
447e8d8bef9SDimitry Andric     return;
448e8d8bef9SDimitry Andric   }
449e8d8bef9SDimitry Andric 
450e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
451e8d8bef9SDimitry Andric   // better results on X86 at least.
452e8d8bef9SDimitry Andric   Value *SclrMask;
453e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
454e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
455e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
456e8d8bef9SDimitry Andric   }
457e8d8bef9SDimitry Andric 
458e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
459e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
460e8d8bef9SDimitry Andric     //
461e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
462e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
463e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.load, label %else
464e8d8bef9SDimitry Andric     //
465e8d8bef9SDimitry Andric 
466e8d8bef9SDimitry Andric     Value *Predicate;
467e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
468fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
469fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
470e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
471e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
472e8d8bef9SDimitry Andric     } else {
473e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
474e8d8bef9SDimitry Andric     }
475e8d8bef9SDimitry Andric 
476e8d8bef9SDimitry Andric     // Create "cond" block
477e8d8bef9SDimitry Andric     //
478e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
479e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
480e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
481e8d8bef9SDimitry Andric     //
482fe6060f1SDimitry Andric     Instruction *ThenTerm =
483fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
484fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
485e8d8bef9SDimitry Andric 
486fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
487fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
488fe6060f1SDimitry Andric 
489fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
490e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
491e8d8bef9SDimitry Andric     LoadInst *Load =
492e8d8bef9SDimitry Andric         Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
493e8d8bef9SDimitry Andric     Value *NewVResult =
494e8d8bef9SDimitry Andric         Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
495e8d8bef9SDimitry Andric 
496e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
497fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
498fe6060f1SDimitry Andric     NewIfBlock->setName("else");
499e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
500e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
501e8d8bef9SDimitry Andric 
502fe6060f1SDimitry Andric     // Create the phi to join the new and previous value.
503fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
504e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
505e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
506e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
507e8d8bef9SDimitry Andric     VResult = Phi;
508e8d8bef9SDimitry Andric   }
509e8d8bef9SDimitry Andric 
510e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
511e8d8bef9SDimitry Andric   CI->eraseFromParent();
512e8d8bef9SDimitry Andric 
513e8d8bef9SDimitry Andric   ModifiedDT = true;
514e8d8bef9SDimitry Andric }
515e8d8bef9SDimitry Andric 
516e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like
517e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
518e8d8bef9SDimitry Andric //                                  <16 x i1> %Mask)
519e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
520e8d8bef9SDimitry Andric // the appropriate mask bit is set.
521e8d8bef9SDimitry Andric //
522e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
523e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
524e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else
525e8d8bef9SDimitry Andric //
526e8d8bef9SDimitry Andric // cond.store:
527e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0
528e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
529e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4
530e8d8bef9SDimitry Andric // br label %else
531e8d8bef9SDimitry Andric //
532e8d8bef9SDimitry Andric // else:
533e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
534e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2
535e8d8bef9SDimitry Andric //
536e8d8bef9SDimitry Andric // cond.store1:
537e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1
538e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
539e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4
540e8d8bef9SDimitry Andric // br label %else2
541e8d8bef9SDimitry Andric //   . . .
542fe6060f1SDimitry Andric static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
543fe6060f1SDimitry Andric                                    DomTreeUpdater *DTU, bool &ModifiedDT) {
544e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
545e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(1);
546e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
547e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
548e8d8bef9SDimitry Andric 
549e8d8bef9SDimitry Andric   auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
550e8d8bef9SDimitry Andric 
551e8d8bef9SDimitry Andric   assert(
552e8d8bef9SDimitry Andric       isa<VectorType>(Ptrs->getType()) &&
553e8d8bef9SDimitry Andric       isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
554e8d8bef9SDimitry Andric       "Vector of pointers is expected in masked scatter intrinsic");
555e8d8bef9SDimitry Andric 
556e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
557e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
558e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
559e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
560e8d8bef9SDimitry Andric 
561e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
562e8d8bef9SDimitry Andric   unsigned VectorWidth = SrcFVTy->getNumElements();
563e8d8bef9SDimitry Andric 
564e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
565e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
566e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
567e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
568e8d8bef9SDimitry Andric         continue;
569e8d8bef9SDimitry Andric       Value *OneElt =
570e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
571e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
572e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
573e8d8bef9SDimitry Andric     }
574e8d8bef9SDimitry Andric     CI->eraseFromParent();
575e8d8bef9SDimitry Andric     return;
576e8d8bef9SDimitry Andric   }
577e8d8bef9SDimitry Andric 
578e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
579e8d8bef9SDimitry Andric   // better results on X86 at least.
580e8d8bef9SDimitry Andric   Value *SclrMask;
581e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
582e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
583e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
584e8d8bef9SDimitry Andric   }
585e8d8bef9SDimitry Andric 
586e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
587e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
588e8d8bef9SDimitry Andric     //
589e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
590e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
591e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.store, label %else
592e8d8bef9SDimitry Andric     //
593e8d8bef9SDimitry Andric     Value *Predicate;
594e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
595fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
596fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
597e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
598e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
599e8d8bef9SDimitry Andric     } else {
600e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
601e8d8bef9SDimitry Andric     }
602e8d8bef9SDimitry Andric 
603e8d8bef9SDimitry Andric     // Create "cond" block
604e8d8bef9SDimitry Andric     //
605e8d8bef9SDimitry Andric     //  %Elt1 = extractelement <16 x i32> %Src, i32 1
606e8d8bef9SDimitry Andric     //  %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
607e8d8bef9SDimitry Andric     //  %store i32 %Elt1, i32* %Ptr1
608e8d8bef9SDimitry Andric     //
609fe6060f1SDimitry Andric     Instruction *ThenTerm =
610fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
611fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
612e8d8bef9SDimitry Andric 
613fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
614fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
615fe6060f1SDimitry Andric 
616fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
617e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
618e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
619e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
620e8d8bef9SDimitry Andric 
621e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
622fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
623fe6060f1SDimitry Andric     NewIfBlock->setName("else");
624fe6060f1SDimitry Andric 
625fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
626e8d8bef9SDimitry Andric   }
627e8d8bef9SDimitry Andric   CI->eraseFromParent();
628e8d8bef9SDimitry Andric 
629e8d8bef9SDimitry Andric   ModifiedDT = true;
630e8d8bef9SDimitry Andric }
631e8d8bef9SDimitry Andric 
632fe6060f1SDimitry Andric static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
633fe6060f1SDimitry Andric                                       DomTreeUpdater *DTU, bool &ModifiedDT) {
634e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
635e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(1);
636e8d8bef9SDimitry Andric   Value *PassThru = CI->getArgOperand(2);
637e8d8bef9SDimitry Andric 
638e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
639e8d8bef9SDimitry Andric 
640e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
641e8d8bef9SDimitry Andric 
642e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
643e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
644e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
645e8d8bef9SDimitry Andric 
646e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
647e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
648e8d8bef9SDimitry Andric 
649e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
650e8d8bef9SDimitry Andric 
651e8d8bef9SDimitry Andric   // The result vector
652e8d8bef9SDimitry Andric   Value *VResult = PassThru;
653e8d8bef9SDimitry Andric 
654e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
655e8d8bef9SDimitry Andric   // Create a build_vector pattern, with loads/undefs as necessary and then
656e8d8bef9SDimitry Andric   // shuffle blend with the pass through value.
657e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
658e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
659e8d8bef9SDimitry Andric     VResult = UndefValue::get(VecType);
660e8d8bef9SDimitry Andric     SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
661e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
662e8d8bef9SDimitry Andric       Value *InsertElt;
663e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
664e8d8bef9SDimitry Andric         InsertElt = UndefValue::get(EltTy);
665e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx + VectorWidth;
666e8d8bef9SDimitry Andric       } else {
667e8d8bef9SDimitry Andric         Value *NewPtr =
668e8d8bef9SDimitry Andric             Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
669e8d8bef9SDimitry Andric         InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
670e8d8bef9SDimitry Andric                                               "Load" + Twine(Idx));
671e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx;
672e8d8bef9SDimitry Andric         ++MemIndex;
673e8d8bef9SDimitry Andric       }
674e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
675e8d8bef9SDimitry Andric                                             "Res" + Twine(Idx));
676e8d8bef9SDimitry Andric     }
677e8d8bef9SDimitry Andric     VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
678e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
679e8d8bef9SDimitry Andric     CI->eraseFromParent();
680e8d8bef9SDimitry Andric     return;
681e8d8bef9SDimitry Andric   }
682e8d8bef9SDimitry Andric 
683e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
684e8d8bef9SDimitry Andric   // better results on X86 at least.
685e8d8bef9SDimitry Andric   Value *SclrMask;
686e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
687e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
688e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
689e8d8bef9SDimitry Andric   }
690e8d8bef9SDimitry Andric 
691e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
692e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
693e8d8bef9SDimitry Andric     //
694e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
695e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
696e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
697e8d8bef9SDimitry Andric     //
698e8d8bef9SDimitry Andric 
699e8d8bef9SDimitry Andric     Value *Predicate;
700e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
701fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
702fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
703e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
704e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
705e8d8bef9SDimitry Andric     } else {
706e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
707e8d8bef9SDimitry Andric     }
708e8d8bef9SDimitry Andric 
709e8d8bef9SDimitry Andric     // Create "cond" block
710e8d8bef9SDimitry Andric     //
711e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
712e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
713e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
714e8d8bef9SDimitry Andric     //
715fe6060f1SDimitry Andric     Instruction *ThenTerm =
716fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
717fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
718e8d8bef9SDimitry Andric 
719fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
720fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
721fe6060f1SDimitry Andric 
722fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
723e8d8bef9SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
724e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
725e8d8bef9SDimitry Andric 
726e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
727e8d8bef9SDimitry Andric     Value *NewPtr;
728e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
729e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
730e8d8bef9SDimitry Andric 
731e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
732fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
733fe6060f1SDimitry Andric     NewIfBlock->setName("else");
734e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
735e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
736e8d8bef9SDimitry Andric 
737e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
738fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
739e8d8bef9SDimitry Andric     PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
740e8d8bef9SDimitry Andric     ResultPhi->addIncoming(NewVResult, CondBlock);
741e8d8bef9SDimitry Andric     ResultPhi->addIncoming(VResult, PrevIfBlock);
742e8d8bef9SDimitry Andric     VResult = ResultPhi;
743e8d8bef9SDimitry Andric 
744e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
745e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
746e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
747e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
748e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
749e8d8bef9SDimitry Andric       Ptr = PtrPhi;
750e8d8bef9SDimitry Andric     }
751e8d8bef9SDimitry Andric   }
752e8d8bef9SDimitry Andric 
753e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
754e8d8bef9SDimitry Andric   CI->eraseFromParent();
755e8d8bef9SDimitry Andric 
756e8d8bef9SDimitry Andric   ModifiedDT = true;
757e8d8bef9SDimitry Andric }
758e8d8bef9SDimitry Andric 
759fe6060f1SDimitry Andric static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
760fe6060f1SDimitry Andric                                          DomTreeUpdater *DTU,
761fe6060f1SDimitry Andric                                          bool &ModifiedDT) {
762e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
763e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
764e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
765e8d8bef9SDimitry Andric 
766e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(Src->getType());
767e8d8bef9SDimitry Andric 
768e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
769e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
770e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
771e8d8bef9SDimitry Andric 
772e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
773e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
774e8d8bef9SDimitry Andric 
775e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
776e8d8bef9SDimitry Andric 
777e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
778e8d8bef9SDimitry Andric 
779e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
780e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
781e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
782e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
783e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
784e8d8bef9SDimitry Andric         continue;
785e8d8bef9SDimitry Andric       Value *OneElt =
786e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
787e8d8bef9SDimitry Andric       Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
788e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
789e8d8bef9SDimitry Andric       ++MemIndex;
790e8d8bef9SDimitry Andric     }
791e8d8bef9SDimitry Andric     CI->eraseFromParent();
792e8d8bef9SDimitry Andric     return;
793e8d8bef9SDimitry Andric   }
794e8d8bef9SDimitry Andric 
795e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
796e8d8bef9SDimitry Andric   // better results on X86 at least.
797e8d8bef9SDimitry Andric   Value *SclrMask;
798e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
799e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
800e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
801e8d8bef9SDimitry Andric   }
802e8d8bef9SDimitry Andric 
803e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
804e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
805e8d8bef9SDimitry Andric     //
806e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
807e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
808e8d8bef9SDimitry Andric     //
809e8d8bef9SDimitry Andric     Value *Predicate;
810e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
811fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
812fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
813e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
814e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
815e8d8bef9SDimitry Andric     } else {
816e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
817e8d8bef9SDimitry Andric     }
818e8d8bef9SDimitry Andric 
819e8d8bef9SDimitry Andric     // Create "cond" block
820e8d8bef9SDimitry Andric     //
821e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
822e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
823e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
824e8d8bef9SDimitry Andric     //
825fe6060f1SDimitry Andric     Instruction *ThenTerm =
826fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
827fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
828e8d8bef9SDimitry Andric 
829fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
830fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
831fe6060f1SDimitry Andric 
832fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
833e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
834e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
835e8d8bef9SDimitry Andric 
836e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
837e8d8bef9SDimitry Andric     Value *NewPtr;
838e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
839e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
840e8d8bef9SDimitry Andric 
841e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
842fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
843fe6060f1SDimitry Andric     NewIfBlock->setName("else");
844e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
845e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
846e8d8bef9SDimitry Andric 
847fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
848fe6060f1SDimitry Andric 
849e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
850e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
851e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
852e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
853e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
854e8d8bef9SDimitry Andric       Ptr = PtrPhi;
855e8d8bef9SDimitry Andric     }
856e8d8bef9SDimitry Andric   }
857e8d8bef9SDimitry Andric   CI->eraseFromParent();
858e8d8bef9SDimitry Andric 
859e8d8bef9SDimitry Andric   ModifiedDT = true;
860e8d8bef9SDimitry Andric }
861e8d8bef9SDimitry Andric 
862fe6060f1SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI,
863fe6060f1SDimitry Andric                     DominatorTree *DT) {
864fe6060f1SDimitry Andric   Optional<DomTreeUpdater> DTU;
865fe6060f1SDimitry Andric   if (DT)
866fe6060f1SDimitry Andric     DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
867fe6060f1SDimitry Andric 
868e8d8bef9SDimitry Andric   bool EverMadeChange = false;
869e8d8bef9SDimitry Andric   bool MadeChange = true;
870e8d8bef9SDimitry Andric   auto &DL = F.getParent()->getDataLayout();
871e8d8bef9SDimitry Andric   while (MadeChange) {
872e8d8bef9SDimitry Andric     MadeChange = false;
873349cc55cSDimitry Andric     for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
874e8d8bef9SDimitry Andric       bool ModifiedDTOnIteration = false;
875349cc55cSDimitry Andric       MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
876*81ad6265SDimitry Andric                                   DTU ? DTU.getPointer() : nullptr);
877fe6060f1SDimitry Andric 
878e8d8bef9SDimitry Andric       // Restart BB iteration if the dominator tree of the Function was changed
879e8d8bef9SDimitry Andric       if (ModifiedDTOnIteration)
880e8d8bef9SDimitry Andric         break;
881e8d8bef9SDimitry Andric     }
882e8d8bef9SDimitry Andric 
883e8d8bef9SDimitry Andric     EverMadeChange |= MadeChange;
884e8d8bef9SDimitry Andric   }
885e8d8bef9SDimitry Andric   return EverMadeChange;
886e8d8bef9SDimitry Andric }
887e8d8bef9SDimitry Andric 
888e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
889e8d8bef9SDimitry Andric   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
890fe6060f1SDimitry Andric   DominatorTree *DT = nullptr;
891fe6060f1SDimitry Andric   if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
892fe6060f1SDimitry Andric     DT = &DTWP->getDomTree();
893fe6060f1SDimitry Andric   return runImpl(F, TTI, DT);
894e8d8bef9SDimitry Andric }
895e8d8bef9SDimitry Andric 
896e8d8bef9SDimitry Andric PreservedAnalyses
897e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
898e8d8bef9SDimitry Andric   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
899fe6060f1SDimitry Andric   auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
900fe6060f1SDimitry Andric   if (!runImpl(F, TTI, DT))
901e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
902e8d8bef9SDimitry Andric   PreservedAnalyses PA;
903e8d8bef9SDimitry Andric   PA.preserve<TargetIRAnalysis>();
904fe6060f1SDimitry Andric   PA.preserve<DominatorTreeAnalysis>();
905e8d8bef9SDimitry Andric   return PA;
906e8d8bef9SDimitry Andric }
907e8d8bef9SDimitry Andric 
908e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
909fe6060f1SDimitry Andric                           const TargetTransformInfo &TTI, const DataLayout &DL,
910fe6060f1SDimitry Andric                           DomTreeUpdater *DTU) {
911e8d8bef9SDimitry Andric   bool MadeChange = false;
912e8d8bef9SDimitry Andric 
913e8d8bef9SDimitry Andric   BasicBlock::iterator CurInstIterator = BB.begin();
914e8d8bef9SDimitry Andric   while (CurInstIterator != BB.end()) {
915e8d8bef9SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
916fe6060f1SDimitry Andric       MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
917e8d8bef9SDimitry Andric     if (ModifiedDT)
918e8d8bef9SDimitry Andric       return true;
919e8d8bef9SDimitry Andric   }
920e8d8bef9SDimitry Andric 
921e8d8bef9SDimitry Andric   return MadeChange;
922e8d8bef9SDimitry Andric }
923e8d8bef9SDimitry Andric 
924e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
925e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
926fe6060f1SDimitry Andric                              const DataLayout &DL, DomTreeUpdater *DTU) {
927e8d8bef9SDimitry Andric   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
928e8d8bef9SDimitry Andric   if (II) {
929e8d8bef9SDimitry Andric     // The scalarization code below does not work for scalable vectors.
930e8d8bef9SDimitry Andric     if (isa<ScalableVectorType>(II->getType()) ||
931349cc55cSDimitry Andric         any_of(II->args(),
932e8d8bef9SDimitry Andric                [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
933e8d8bef9SDimitry Andric       return false;
934e8d8bef9SDimitry Andric 
935e8d8bef9SDimitry Andric     switch (II->getIntrinsicID()) {
936e8d8bef9SDimitry Andric     default:
937e8d8bef9SDimitry Andric       break;
938e8d8bef9SDimitry Andric     case Intrinsic::masked_load:
939e8d8bef9SDimitry Andric       // Scalarize unsupported vector masked load
940e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedLoad(
941e8d8bef9SDimitry Andric               CI->getType(),
942e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
943e8d8bef9SDimitry Andric         return false;
944fe6060f1SDimitry Andric       scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
945e8d8bef9SDimitry Andric       return true;
946e8d8bef9SDimitry Andric     case Intrinsic::masked_store:
947e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedStore(
948e8d8bef9SDimitry Andric               CI->getArgOperand(0)->getType(),
949e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
950e8d8bef9SDimitry Andric         return false;
951fe6060f1SDimitry Andric       scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
952e8d8bef9SDimitry Andric       return true;
953e8d8bef9SDimitry Andric     case Intrinsic::masked_gather: {
954fe6060f1SDimitry Andric       MaybeAlign MA =
955fe6060f1SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
956e8d8bef9SDimitry Andric       Type *LoadTy = CI->getType();
957fe6060f1SDimitry Andric       Align Alignment = DL.getValueOrABITypeAlignment(MA,
958fe6060f1SDimitry Andric                                                       LoadTy->getScalarType());
95904eeddc0SDimitry Andric       if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
96004eeddc0SDimitry Andric           !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
961e8d8bef9SDimitry Andric         return false;
962fe6060f1SDimitry Andric       scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
963e8d8bef9SDimitry Andric       return true;
964e8d8bef9SDimitry Andric     }
965e8d8bef9SDimitry Andric     case Intrinsic::masked_scatter: {
966fe6060f1SDimitry Andric       MaybeAlign MA =
967fe6060f1SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
968e8d8bef9SDimitry Andric       Type *StoreTy = CI->getArgOperand(0)->getType();
969fe6060f1SDimitry Andric       Align Alignment = DL.getValueOrABITypeAlignment(MA,
970fe6060f1SDimitry Andric                                                       StoreTy->getScalarType());
97104eeddc0SDimitry Andric       if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
97204eeddc0SDimitry Andric           !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
97304eeddc0SDimitry Andric                                            Alignment))
974e8d8bef9SDimitry Andric         return false;
975fe6060f1SDimitry Andric       scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
976e8d8bef9SDimitry Andric       return true;
977e8d8bef9SDimitry Andric     }
978e8d8bef9SDimitry Andric     case Intrinsic::masked_expandload:
979e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedExpandLoad(CI->getType()))
980e8d8bef9SDimitry Andric         return false;
981fe6060f1SDimitry Andric       scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
982e8d8bef9SDimitry Andric       return true;
983e8d8bef9SDimitry Andric     case Intrinsic::masked_compressstore:
984e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
985e8d8bef9SDimitry Andric         return false;
986fe6060f1SDimitry Andric       scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
987e8d8bef9SDimitry Andric       return true;
988e8d8bef9SDimitry Andric     }
989e8d8bef9SDimitry Andric   }
990e8d8bef9SDimitry Andric 
991e8d8bef9SDimitry Andric   return false;
992e8d8bef9SDimitry Andric }
993