xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
281ad6265SDimitry Andric //                                    intrinsics
3e8d8bef9SDimitry Andric //
4e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
6e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7e8d8bef9SDimitry Andric //
8e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
9e8d8bef9SDimitry Andric //
10e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target
11e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the
12e8d8bef9SDimitry Andric // appropriate mask bit is set.
13e8d8bef9SDimitry Andric //
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric 
16e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
17e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h"
18fe6060f1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
19e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
20e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h"
21e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h"
22e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h"
23e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Dominators.h"
25e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
26e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h"
27e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h"
28e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h"
29e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/Type.h"
31e8d8bef9SDimitry Andric #include "llvm/IR/Value.h"
32e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
33e8d8bef9SDimitry Andric #include "llvm/Pass.h"
34e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h"
35e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h"
36fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37e8d8bef9SDimitry Andric #include <cassert>
38bdd1243dSDimitry Andric #include <optional>
39e8d8bef9SDimitry Andric 
40e8d8bef9SDimitry Andric using namespace llvm;
41e8d8bef9SDimitry Andric 
42e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin"
43e8d8bef9SDimitry Andric 
44e8d8bef9SDimitry Andric namespace {
45e8d8bef9SDimitry Andric 
46e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
47e8d8bef9SDimitry Andric public:
48e8d8bef9SDimitry Andric   static char ID; // Pass identification, replacement for typeid
49e8d8bef9SDimitry Andric 
50e8d8bef9SDimitry Andric   explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
51e8d8bef9SDimitry Andric     initializeScalarizeMaskedMemIntrinLegacyPassPass(
52e8d8bef9SDimitry Andric         *PassRegistry::getPassRegistry());
53e8d8bef9SDimitry Andric   }
54e8d8bef9SDimitry Andric 
55e8d8bef9SDimitry Andric   bool runOnFunction(Function &F) override;
56e8d8bef9SDimitry Andric 
57e8d8bef9SDimitry Andric   StringRef getPassName() const override {
58e8d8bef9SDimitry Andric     return "Scalarize Masked Memory Intrinsics";
59e8d8bef9SDimitry Andric   }
60e8d8bef9SDimitry Andric 
61e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
62e8d8bef9SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
63fe6060f1SDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
64e8d8bef9SDimitry Andric   }
65e8d8bef9SDimitry Andric };
66e8d8bef9SDimitry Andric 
67e8d8bef9SDimitry Andric } // end anonymous namespace
68e8d8bef9SDimitry Andric 
69e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
70fe6060f1SDimitry Andric                           const TargetTransformInfo &TTI, const DataLayout &DL,
71fe6060f1SDimitry Andric                           DomTreeUpdater *DTU);
72e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
73e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
74fe6060f1SDimitry Andric                              const DataLayout &DL, DomTreeUpdater *DTU);
75e8d8bef9SDimitry Andric 
76e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
77e8d8bef9SDimitry Andric 
78e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
79e8d8bef9SDimitry Andric                       "Scalarize unsupported masked memory intrinsics", false,
80e8d8bef9SDimitry Andric                       false)
81e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
82fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
83e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
84e8d8bef9SDimitry Andric                     "Scalarize unsupported masked memory intrinsics", false,
85e8d8bef9SDimitry Andric                     false)
86e8d8bef9SDimitry Andric 
87e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
88e8d8bef9SDimitry Andric   return new ScalarizeMaskedMemIntrinLegacyPass();
89e8d8bef9SDimitry Andric }
90e8d8bef9SDimitry Andric 
91e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) {
92e8d8bef9SDimitry Andric   Constant *C = dyn_cast<Constant>(Mask);
93e8d8bef9SDimitry Andric   if (!C)
94e8d8bef9SDimitry Andric     return false;
95e8d8bef9SDimitry Andric 
96e8d8bef9SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
97e8d8bef9SDimitry Andric   for (unsigned i = 0; i != NumElts; ++i) {
98e8d8bef9SDimitry Andric     Constant *CElt = C->getAggregateElement(i);
99e8d8bef9SDimitry Andric     if (!CElt || !isa<ConstantInt>(CElt))
100e8d8bef9SDimitry Andric       return false;
101e8d8bef9SDimitry Andric   }
102e8d8bef9SDimitry Andric 
103e8d8bef9SDimitry Andric   return true;
104e8d8bef9SDimitry Andric }
105e8d8bef9SDimitry Andric 
106fe6060f1SDimitry Andric static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
107fe6060f1SDimitry Andric                                 unsigned Idx) {
108fe6060f1SDimitry Andric   return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
109fe6060f1SDimitry Andric }
110fe6060f1SDimitry Andric 
111e8d8bef9SDimitry Andric // Translate a masked load intrinsic like
112e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
113e8d8bef9SDimitry Andric //                               <16 x i1> %mask, <16 x i32> %passthru)
114e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
115e8d8bef9SDimitry Andric // the appropriate mask bit is set
116e8d8bef9SDimitry Andric //
117e8d8bef9SDimitry Andric //  %1 = bitcast i8* %addr to i32*
118e8d8bef9SDimitry Andric //  %2 = extractelement <16 x i1> %mask, i32 0
119e8d8bef9SDimitry Andric //  br i1 %2, label %cond.load, label %else
120e8d8bef9SDimitry Andric //
121e8d8bef9SDimitry Andric // cond.load:                                        ; preds = %0
122e8d8bef9SDimitry Andric //  %3 = getelementptr i32* %1, i32 0
123e8d8bef9SDimitry Andric //  %4 = load i32* %3
124e8d8bef9SDimitry Andric //  %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
125e8d8bef9SDimitry Andric //  br label %else
126e8d8bef9SDimitry Andric //
127e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.load
128*06c3fb27SDimitry Andric //  %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
129e8d8bef9SDimitry Andric //  %6 = extractelement <16 x i1> %mask, i32 1
130e8d8bef9SDimitry Andric //  br i1 %6, label %cond.load1, label %else2
131e8d8bef9SDimitry Andric //
132e8d8bef9SDimitry Andric // cond.load1:                                       ; preds = %else
133e8d8bef9SDimitry Andric //  %7 = getelementptr i32* %1, i32 1
134e8d8bef9SDimitry Andric //  %8 = load i32* %7
135e8d8bef9SDimitry Andric //  %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
136e8d8bef9SDimitry Andric //  br label %else2
137e8d8bef9SDimitry Andric //
138e8d8bef9SDimitry Andric // else2:                                          ; preds = %else, %cond.load1
139e8d8bef9SDimitry Andric //  %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
140e8d8bef9SDimitry Andric //  %10 = extractelement <16 x i1> %mask, i32 2
141e8d8bef9SDimitry Andric //  br i1 %10, label %cond.load4, label %else5
142e8d8bef9SDimitry Andric //
143fe6060f1SDimitry Andric static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
144fe6060f1SDimitry Andric                                 DomTreeUpdater *DTU, bool &ModifiedDT) {
145e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
146e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
147e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
148e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
149e8d8bef9SDimitry Andric 
150e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
151e8d8bef9SDimitry Andric   VectorType *VecType = cast<FixedVectorType>(CI->getType());
152e8d8bef9SDimitry Andric 
153e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
154e8d8bef9SDimitry Andric 
155e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
156e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
157e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
158e8d8bef9SDimitry Andric 
159e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
160e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
161e8d8bef9SDimitry Andric 
162e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
163e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
164e8d8bef9SDimitry Andric     Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
165e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(NewI);
166e8d8bef9SDimitry Andric     CI->eraseFromParent();
167e8d8bef9SDimitry Andric     return;
168e8d8bef9SDimitry Andric   }
169e8d8bef9SDimitry Andric 
170e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
171e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
172e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
173e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
174e8d8bef9SDimitry Andric 
175e8d8bef9SDimitry Andric   // The result vector
176e8d8bef9SDimitry Andric   Value *VResult = Src0;
177e8d8bef9SDimitry Andric 
178e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
179e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
180e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
181e8d8bef9SDimitry Andric         continue;
182*06c3fb27SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
183e8d8bef9SDimitry Andric       LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
184e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, Load, Idx);
185e8d8bef9SDimitry Andric     }
186e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
187e8d8bef9SDimitry Andric     CI->eraseFromParent();
188e8d8bef9SDimitry Andric     return;
189e8d8bef9SDimitry Andric   }
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
192e8d8bef9SDimitry Andric   // better results on X86 at least.
193e8d8bef9SDimitry Andric   Value *SclrMask;
194e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
195e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
196e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
197e8d8bef9SDimitry Andric   }
198e8d8bef9SDimitry Andric 
199e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
200e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
201e8d8bef9SDimitry Andric     //
202e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
203e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
204e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
205e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
206e8d8bef9SDimitry Andric     //
207e8d8bef9SDimitry Andric     Value *Predicate;
208e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
209fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
210fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
211e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
212e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
213e8d8bef9SDimitry Andric     } else {
214e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
215e8d8bef9SDimitry Andric     }
216e8d8bef9SDimitry Andric 
217e8d8bef9SDimitry Andric     // Create "cond" block
218e8d8bef9SDimitry Andric     //
219e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
220e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
221e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
222e8d8bef9SDimitry Andric     //
223fe6060f1SDimitry Andric     Instruction *ThenTerm =
224fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
225fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
226e8d8bef9SDimitry Andric 
227fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
228fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
229fe6060f1SDimitry Andric 
230fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
231*06c3fb27SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
232e8d8bef9SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
233e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
234e8d8bef9SDimitry Andric 
235e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
236fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
237fe6060f1SDimitry Andric     NewIfBlock->setName("else");
238e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
239e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
240e8d8bef9SDimitry Andric 
241e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
242fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
243e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
244e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
245e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
246e8d8bef9SDimitry Andric     VResult = Phi;
247e8d8bef9SDimitry Andric   }
248e8d8bef9SDimitry Andric 
249e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
250e8d8bef9SDimitry Andric   CI->eraseFromParent();
251e8d8bef9SDimitry Andric 
252e8d8bef9SDimitry Andric   ModifiedDT = true;
253e8d8bef9SDimitry Andric }
254e8d8bef9SDimitry Andric 
255e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like
256e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
257e8d8bef9SDimitry Andric //                               <16 x i1> %mask)
258e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
259e8d8bef9SDimitry Andric // the appropriate mask bit is set
260e8d8bef9SDimitry Andric //
261e8d8bef9SDimitry Andric //   %1 = bitcast i8* %addr to i32*
262e8d8bef9SDimitry Andric //   %2 = extractelement <16 x i1> %mask, i32 0
263e8d8bef9SDimitry Andric //   br i1 %2, label %cond.store, label %else
264e8d8bef9SDimitry Andric //
265e8d8bef9SDimitry Andric // cond.store:                                       ; preds = %0
266e8d8bef9SDimitry Andric //   %3 = extractelement <16 x i32> %val, i32 0
267e8d8bef9SDimitry Andric //   %4 = getelementptr i32* %1, i32 0
268e8d8bef9SDimitry Andric //   store i32 %3, i32* %4
269e8d8bef9SDimitry Andric //   br label %else
270e8d8bef9SDimitry Andric //
271e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.store
272e8d8bef9SDimitry Andric //   %5 = extractelement <16 x i1> %mask, i32 1
273e8d8bef9SDimitry Andric //   br i1 %5, label %cond.store1, label %else2
274e8d8bef9SDimitry Andric //
275e8d8bef9SDimitry Andric // cond.store1:                                      ; preds = %else
276e8d8bef9SDimitry Andric //   %6 = extractelement <16 x i32> %val, i32 1
277e8d8bef9SDimitry Andric //   %7 = getelementptr i32* %1, i32 1
278e8d8bef9SDimitry Andric //   store i32 %6, i32* %7
279e8d8bef9SDimitry Andric //   br label %else2
280e8d8bef9SDimitry Andric //   . . .
281fe6060f1SDimitry Andric static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
282fe6060f1SDimitry Andric                                  DomTreeUpdater *DTU, bool &ModifiedDT) {
283e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
284e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
285e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
286e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
287e8d8bef9SDimitry Andric 
288e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
289e8d8bef9SDimitry Andric   auto *VecType = cast<VectorType>(Src->getType());
290e8d8bef9SDimitry Andric 
291e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
292e8d8bef9SDimitry Andric 
293e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
294e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
295e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
296e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
297e8d8bef9SDimitry Andric 
298e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
299e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
300e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(Src, Ptr, AlignVal);
301e8d8bef9SDimitry Andric     CI->eraseFromParent();
302e8d8bef9SDimitry Andric     return;
303e8d8bef9SDimitry Andric   }
304e8d8bef9SDimitry Andric 
305e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
306e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
307e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
308e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
309e8d8bef9SDimitry Andric 
310e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
311e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
312e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
313e8d8bef9SDimitry Andric         continue;
314e8d8bef9SDimitry Andric       Value *OneElt = Builder.CreateExtractElement(Src, Idx);
315*06c3fb27SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
316e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
317e8d8bef9SDimitry Andric     }
318e8d8bef9SDimitry Andric     CI->eraseFromParent();
319e8d8bef9SDimitry Andric     return;
320e8d8bef9SDimitry Andric   }
321e8d8bef9SDimitry Andric 
322e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
323e8d8bef9SDimitry Andric   // better results on X86 at least.
324e8d8bef9SDimitry Andric   Value *SclrMask;
325e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
326e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
327e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
328e8d8bef9SDimitry Andric   }
329e8d8bef9SDimitry Andric 
330e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
331e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
332e8d8bef9SDimitry Andric     //
333e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
334e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
335e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
336e8d8bef9SDimitry Andric     //
337e8d8bef9SDimitry Andric     Value *Predicate;
338e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
339fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
340fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
341e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
342e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
343e8d8bef9SDimitry Andric     } else {
344e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
345e8d8bef9SDimitry Andric     }
346e8d8bef9SDimitry Andric 
347e8d8bef9SDimitry Andric     // Create "cond" block
348e8d8bef9SDimitry Andric     //
349e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
350e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
351e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
352e8d8bef9SDimitry Andric     //
353fe6060f1SDimitry Andric     Instruction *ThenTerm =
354fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
355fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
356e8d8bef9SDimitry Andric 
357fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
358fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
359fe6060f1SDimitry Andric 
360fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
361e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
362*06c3fb27SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
363e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
364e8d8bef9SDimitry Andric 
365e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
366fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
367fe6060f1SDimitry Andric     NewIfBlock->setName("else");
368fe6060f1SDimitry Andric 
369fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
370e8d8bef9SDimitry Andric   }
371e8d8bef9SDimitry Andric   CI->eraseFromParent();
372e8d8bef9SDimitry Andric 
373e8d8bef9SDimitry Andric   ModifiedDT = true;
374e8d8bef9SDimitry Andric }
375e8d8bef9SDimitry Andric 
376e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like
377e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
378e8d8bef9SDimitry Andric //                               <16 x i1> %Mask, <16 x i32> %Src)
379e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
380e8d8bef9SDimitry Andric // the appropriate mask bit is set
381e8d8bef9SDimitry Andric //
382e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
383e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
384e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else
385e8d8bef9SDimitry Andric //
386e8d8bef9SDimitry Andric // cond.load:
387e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
388e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4
389*06c3fb27SDimitry Andric // %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
390e8d8bef9SDimitry Andric // br label %else
391e8d8bef9SDimitry Andric //
392e8d8bef9SDimitry Andric // else:
393*06c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
394e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
395e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2
396e8d8bef9SDimitry Andric //
397e8d8bef9SDimitry Andric // cond.load1:
398e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
399e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4
400e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
401e8d8bef9SDimitry Andric // br label %else2
402e8d8bef9SDimitry Andric // . . .
403e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
404e8d8bef9SDimitry Andric // ret <16 x i32> %Result
405fe6060f1SDimitry Andric static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
406fe6060f1SDimitry Andric                                   DomTreeUpdater *DTU, bool &ModifiedDT) {
407e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(0);
408e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
409e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
410e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
411e8d8bef9SDimitry Andric 
412e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
413e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
414e8d8bef9SDimitry Andric 
415e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
416e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
417e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
418e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
419e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
420e8d8bef9SDimitry Andric 
421e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
422e8d8bef9SDimitry Andric 
423e8d8bef9SDimitry Andric   // The result vector
424e8d8bef9SDimitry Andric   Value *VResult = Src0;
425e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
426e8d8bef9SDimitry Andric 
427e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
428e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
429e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
430e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
431e8d8bef9SDimitry Andric         continue;
432e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
433e8d8bef9SDimitry Andric       LoadInst *Load =
434e8d8bef9SDimitry Andric           Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
435e8d8bef9SDimitry Andric       VResult =
436e8d8bef9SDimitry Andric           Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
437e8d8bef9SDimitry Andric     }
438e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
439e8d8bef9SDimitry Andric     CI->eraseFromParent();
440e8d8bef9SDimitry Andric     return;
441e8d8bef9SDimitry Andric   }
442e8d8bef9SDimitry Andric 
443e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
444e8d8bef9SDimitry Andric   // better results on X86 at least.
445e8d8bef9SDimitry Andric   Value *SclrMask;
446e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
447e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
448e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
449e8d8bef9SDimitry Andric   }
450e8d8bef9SDimitry Andric 
451e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
452e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
453e8d8bef9SDimitry Andric     //
454e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
455e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
456e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.load, label %else
457e8d8bef9SDimitry Andric     //
458e8d8bef9SDimitry Andric 
459e8d8bef9SDimitry Andric     Value *Predicate;
460e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
461fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
462fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
463e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
464e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
465e8d8bef9SDimitry Andric     } else {
466e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
467e8d8bef9SDimitry Andric     }
468e8d8bef9SDimitry Andric 
469e8d8bef9SDimitry Andric     // Create "cond" block
470e8d8bef9SDimitry Andric     //
471e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
472e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
473e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
474e8d8bef9SDimitry Andric     //
475fe6060f1SDimitry Andric     Instruction *ThenTerm =
476fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
477fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
478e8d8bef9SDimitry Andric 
479fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
480fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
481fe6060f1SDimitry Andric 
482fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
483e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
484e8d8bef9SDimitry Andric     LoadInst *Load =
485e8d8bef9SDimitry Andric         Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
486e8d8bef9SDimitry Andric     Value *NewVResult =
487e8d8bef9SDimitry Andric         Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
488e8d8bef9SDimitry Andric 
489e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
490fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
491fe6060f1SDimitry Andric     NewIfBlock->setName("else");
492e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
493e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
494e8d8bef9SDimitry Andric 
495fe6060f1SDimitry Andric     // Create the phi to join the new and previous value.
496fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
497e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
498e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
499e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
500e8d8bef9SDimitry Andric     VResult = Phi;
501e8d8bef9SDimitry Andric   }
502e8d8bef9SDimitry Andric 
503e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
504e8d8bef9SDimitry Andric   CI->eraseFromParent();
505e8d8bef9SDimitry Andric 
506e8d8bef9SDimitry Andric   ModifiedDT = true;
507e8d8bef9SDimitry Andric }
508e8d8bef9SDimitry Andric 
509e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like
510e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
511e8d8bef9SDimitry Andric //                                  <16 x i1> %Mask)
512e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
513e8d8bef9SDimitry Andric // the appropriate mask bit is set.
514e8d8bef9SDimitry Andric //
515e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
516e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
517e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else
518e8d8bef9SDimitry Andric //
519e8d8bef9SDimitry Andric // cond.store:
520e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0
521e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
522e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4
523e8d8bef9SDimitry Andric // br label %else
524e8d8bef9SDimitry Andric //
525e8d8bef9SDimitry Andric // else:
526e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
527e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2
528e8d8bef9SDimitry Andric //
529e8d8bef9SDimitry Andric // cond.store1:
530e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1
531e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
532e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4
533e8d8bef9SDimitry Andric // br label %else2
534e8d8bef9SDimitry Andric //   . . .
535fe6060f1SDimitry Andric static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
536fe6060f1SDimitry Andric                                    DomTreeUpdater *DTU, bool &ModifiedDT) {
537e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
538e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(1);
539e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
540e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
541e8d8bef9SDimitry Andric 
542e8d8bef9SDimitry Andric   auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
543e8d8bef9SDimitry Andric 
544e8d8bef9SDimitry Andric   assert(
545e8d8bef9SDimitry Andric       isa<VectorType>(Ptrs->getType()) &&
546e8d8bef9SDimitry Andric       isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
547e8d8bef9SDimitry Andric       "Vector of pointers is expected in masked scatter intrinsic");
548e8d8bef9SDimitry Andric 
549e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
550e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
551e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
552e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
553e8d8bef9SDimitry Andric 
554e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
555e8d8bef9SDimitry Andric   unsigned VectorWidth = SrcFVTy->getNumElements();
556e8d8bef9SDimitry Andric 
557e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
558e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
559e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
560e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
561e8d8bef9SDimitry Andric         continue;
562e8d8bef9SDimitry Andric       Value *OneElt =
563e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
564e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
565e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
566e8d8bef9SDimitry Andric     }
567e8d8bef9SDimitry Andric     CI->eraseFromParent();
568e8d8bef9SDimitry Andric     return;
569e8d8bef9SDimitry Andric   }
570e8d8bef9SDimitry Andric 
571e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
572e8d8bef9SDimitry Andric   // better results on X86 at least.
573e8d8bef9SDimitry Andric   Value *SclrMask;
574e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
575e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
576e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
577e8d8bef9SDimitry Andric   }
578e8d8bef9SDimitry Andric 
579e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
580e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
581e8d8bef9SDimitry Andric     //
582e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
583e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
584e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.store, label %else
585e8d8bef9SDimitry Andric     //
586e8d8bef9SDimitry Andric     Value *Predicate;
587e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
588fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
589fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
590e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
591e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
592e8d8bef9SDimitry Andric     } else {
593e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
594e8d8bef9SDimitry Andric     }
595e8d8bef9SDimitry Andric 
596e8d8bef9SDimitry Andric     // Create "cond" block
597e8d8bef9SDimitry Andric     //
598e8d8bef9SDimitry Andric     //  %Elt1 = extractelement <16 x i32> %Src, i32 1
599e8d8bef9SDimitry Andric     //  %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
600e8d8bef9SDimitry Andric     //  %store i32 %Elt1, i32* %Ptr1
601e8d8bef9SDimitry Andric     //
602fe6060f1SDimitry Andric     Instruction *ThenTerm =
603fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
604fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
605e8d8bef9SDimitry Andric 
606fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
607fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
608fe6060f1SDimitry Andric 
609fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
610e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
611e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
612e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
613e8d8bef9SDimitry Andric 
614e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
615fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
616fe6060f1SDimitry Andric     NewIfBlock->setName("else");
617fe6060f1SDimitry Andric 
618fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
619e8d8bef9SDimitry Andric   }
620e8d8bef9SDimitry Andric   CI->eraseFromParent();
621e8d8bef9SDimitry Andric 
622e8d8bef9SDimitry Andric   ModifiedDT = true;
623e8d8bef9SDimitry Andric }
624e8d8bef9SDimitry Andric 
625fe6060f1SDimitry Andric static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
626fe6060f1SDimitry Andric                                       DomTreeUpdater *DTU, bool &ModifiedDT) {
627e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
628e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(1);
629e8d8bef9SDimitry Andric   Value *PassThru = CI->getArgOperand(2);
630e8d8bef9SDimitry Andric 
631e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
632e8d8bef9SDimitry Andric 
633e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
634e8d8bef9SDimitry Andric 
635e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
636e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
637e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
638e8d8bef9SDimitry Andric 
639e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
640e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
641e8d8bef9SDimitry Andric 
642e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
643e8d8bef9SDimitry Andric 
644e8d8bef9SDimitry Andric   // The result vector
645e8d8bef9SDimitry Andric   Value *VResult = PassThru;
646e8d8bef9SDimitry Andric 
647e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
648*06c3fb27SDimitry Andric   // Create a build_vector pattern, with loads/poisons as necessary and then
649e8d8bef9SDimitry Andric   // shuffle blend with the pass through value.
650e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
651e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
652bdd1243dSDimitry Andric     VResult = PoisonValue::get(VecType);
653*06c3fb27SDimitry Andric     SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem);
654e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
655e8d8bef9SDimitry Andric       Value *InsertElt;
656e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
657*06c3fb27SDimitry Andric         InsertElt = PoisonValue::get(EltTy);
658e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx + VectorWidth;
659e8d8bef9SDimitry Andric       } else {
660e8d8bef9SDimitry Andric         Value *NewPtr =
661e8d8bef9SDimitry Andric             Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
662e8d8bef9SDimitry Andric         InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
663e8d8bef9SDimitry Andric                                               "Load" + Twine(Idx));
664e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx;
665e8d8bef9SDimitry Andric         ++MemIndex;
666e8d8bef9SDimitry Andric       }
667e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
668e8d8bef9SDimitry Andric                                             "Res" + Twine(Idx));
669e8d8bef9SDimitry Andric     }
670e8d8bef9SDimitry Andric     VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
671e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
672e8d8bef9SDimitry Andric     CI->eraseFromParent();
673e8d8bef9SDimitry Andric     return;
674e8d8bef9SDimitry Andric   }
675e8d8bef9SDimitry Andric 
676e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
677e8d8bef9SDimitry Andric   // better results on X86 at least.
678e8d8bef9SDimitry Andric   Value *SclrMask;
679e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
680e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
681e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
682e8d8bef9SDimitry Andric   }
683e8d8bef9SDimitry Andric 
684e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
685e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
686e8d8bef9SDimitry Andric     //
687e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
688e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
689e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
690e8d8bef9SDimitry Andric     //
691e8d8bef9SDimitry Andric 
692e8d8bef9SDimitry Andric     Value *Predicate;
693e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
694fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
695fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
696e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
697e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
698e8d8bef9SDimitry Andric     } else {
699e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
700e8d8bef9SDimitry Andric     }
701e8d8bef9SDimitry Andric 
702e8d8bef9SDimitry Andric     // Create "cond" block
703e8d8bef9SDimitry Andric     //
704e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
705e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
706e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
707e8d8bef9SDimitry Andric     //
708fe6060f1SDimitry Andric     Instruction *ThenTerm =
709fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
710fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
711e8d8bef9SDimitry Andric 
712fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
713fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
714fe6060f1SDimitry Andric 
715fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
716e8d8bef9SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
717e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
718e8d8bef9SDimitry Andric 
719e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
720e8d8bef9SDimitry Andric     Value *NewPtr;
721e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
722e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
723e8d8bef9SDimitry Andric 
724e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
725fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
726fe6060f1SDimitry Andric     NewIfBlock->setName("else");
727e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
728e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
729e8d8bef9SDimitry Andric 
730e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
731fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
732e8d8bef9SDimitry Andric     PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
733e8d8bef9SDimitry Andric     ResultPhi->addIncoming(NewVResult, CondBlock);
734e8d8bef9SDimitry Andric     ResultPhi->addIncoming(VResult, PrevIfBlock);
735e8d8bef9SDimitry Andric     VResult = ResultPhi;
736e8d8bef9SDimitry Andric 
737e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
738e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
739e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
740e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
741e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
742e8d8bef9SDimitry Andric       Ptr = PtrPhi;
743e8d8bef9SDimitry Andric     }
744e8d8bef9SDimitry Andric   }
745e8d8bef9SDimitry Andric 
746e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
747e8d8bef9SDimitry Andric   CI->eraseFromParent();
748e8d8bef9SDimitry Andric 
749e8d8bef9SDimitry Andric   ModifiedDT = true;
750e8d8bef9SDimitry Andric }
751e8d8bef9SDimitry Andric 
752fe6060f1SDimitry Andric static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
753fe6060f1SDimitry Andric                                          DomTreeUpdater *DTU,
754fe6060f1SDimitry Andric                                          bool &ModifiedDT) {
755e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
756e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
757e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
758e8d8bef9SDimitry Andric 
759e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(Src->getType());
760e8d8bef9SDimitry Andric 
761e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
762e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
763e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
764e8d8bef9SDimitry Andric 
765e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
766e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
767e8d8bef9SDimitry Andric 
768e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
769e8d8bef9SDimitry Andric 
770e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
771e8d8bef9SDimitry Andric 
772e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
773e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
774e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
775e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
776e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
777e8d8bef9SDimitry Andric         continue;
778e8d8bef9SDimitry Andric       Value *OneElt =
779e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
780e8d8bef9SDimitry Andric       Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
781e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
782e8d8bef9SDimitry Andric       ++MemIndex;
783e8d8bef9SDimitry Andric     }
784e8d8bef9SDimitry Andric     CI->eraseFromParent();
785e8d8bef9SDimitry Andric     return;
786e8d8bef9SDimitry Andric   }
787e8d8bef9SDimitry Andric 
788e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
789e8d8bef9SDimitry Andric   // better results on X86 at least.
790e8d8bef9SDimitry Andric   Value *SclrMask;
791e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
792e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
793e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
794e8d8bef9SDimitry Andric   }
795e8d8bef9SDimitry Andric 
796e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
797e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
798e8d8bef9SDimitry Andric     //
799e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
800e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
801e8d8bef9SDimitry Andric     //
802e8d8bef9SDimitry Andric     Value *Predicate;
803e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
804fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
805fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
806e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
807e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
808e8d8bef9SDimitry Andric     } else {
809e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
810e8d8bef9SDimitry Andric     }
811e8d8bef9SDimitry Andric 
812e8d8bef9SDimitry Andric     // Create "cond" block
813e8d8bef9SDimitry Andric     //
814e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
815e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
816e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
817e8d8bef9SDimitry Andric     //
818fe6060f1SDimitry Andric     Instruction *ThenTerm =
819fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
820fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
821e8d8bef9SDimitry Andric 
822fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
823fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
824fe6060f1SDimitry Andric 
825fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
826e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
827e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
828e8d8bef9SDimitry Andric 
829e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
830e8d8bef9SDimitry Andric     Value *NewPtr;
831e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
832e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
833e8d8bef9SDimitry Andric 
834e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
835fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
836fe6060f1SDimitry Andric     NewIfBlock->setName("else");
837e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
838e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
839e8d8bef9SDimitry Andric 
840fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
841fe6060f1SDimitry Andric 
842e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
843e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
844e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
845e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
846e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
847e8d8bef9SDimitry Andric       Ptr = PtrPhi;
848e8d8bef9SDimitry Andric     }
849e8d8bef9SDimitry Andric   }
850e8d8bef9SDimitry Andric   CI->eraseFromParent();
851e8d8bef9SDimitry Andric 
852e8d8bef9SDimitry Andric   ModifiedDT = true;
853e8d8bef9SDimitry Andric }
854e8d8bef9SDimitry Andric 
855fe6060f1SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI,
856fe6060f1SDimitry Andric                     DominatorTree *DT) {
857bdd1243dSDimitry Andric   std::optional<DomTreeUpdater> DTU;
858fe6060f1SDimitry Andric   if (DT)
859fe6060f1SDimitry Andric     DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
860fe6060f1SDimitry Andric 
861e8d8bef9SDimitry Andric   bool EverMadeChange = false;
862e8d8bef9SDimitry Andric   bool MadeChange = true;
863e8d8bef9SDimitry Andric   auto &DL = F.getParent()->getDataLayout();
864e8d8bef9SDimitry Andric   while (MadeChange) {
865e8d8bef9SDimitry Andric     MadeChange = false;
866349cc55cSDimitry Andric     for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
867e8d8bef9SDimitry Andric       bool ModifiedDTOnIteration = false;
868349cc55cSDimitry Andric       MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
869bdd1243dSDimitry Andric                                   DTU ? &*DTU : nullptr);
870fe6060f1SDimitry Andric 
871e8d8bef9SDimitry Andric       // Restart BB iteration if the dominator tree of the Function was changed
872e8d8bef9SDimitry Andric       if (ModifiedDTOnIteration)
873e8d8bef9SDimitry Andric         break;
874e8d8bef9SDimitry Andric     }
875e8d8bef9SDimitry Andric 
876e8d8bef9SDimitry Andric     EverMadeChange |= MadeChange;
877e8d8bef9SDimitry Andric   }
878e8d8bef9SDimitry Andric   return EverMadeChange;
879e8d8bef9SDimitry Andric }
880e8d8bef9SDimitry Andric 
881e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
882e8d8bef9SDimitry Andric   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
883fe6060f1SDimitry Andric   DominatorTree *DT = nullptr;
884fe6060f1SDimitry Andric   if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
885fe6060f1SDimitry Andric     DT = &DTWP->getDomTree();
886fe6060f1SDimitry Andric   return runImpl(F, TTI, DT);
887e8d8bef9SDimitry Andric }
888e8d8bef9SDimitry Andric 
889e8d8bef9SDimitry Andric PreservedAnalyses
890e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
891e8d8bef9SDimitry Andric   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
892fe6060f1SDimitry Andric   auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
893fe6060f1SDimitry Andric   if (!runImpl(F, TTI, DT))
894e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
895e8d8bef9SDimitry Andric   PreservedAnalyses PA;
896e8d8bef9SDimitry Andric   PA.preserve<TargetIRAnalysis>();
897fe6060f1SDimitry Andric   PA.preserve<DominatorTreeAnalysis>();
898e8d8bef9SDimitry Andric   return PA;
899e8d8bef9SDimitry Andric }
900e8d8bef9SDimitry Andric 
901e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
902fe6060f1SDimitry Andric                           const TargetTransformInfo &TTI, const DataLayout &DL,
903fe6060f1SDimitry Andric                           DomTreeUpdater *DTU) {
904e8d8bef9SDimitry Andric   bool MadeChange = false;
905e8d8bef9SDimitry Andric 
906e8d8bef9SDimitry Andric   BasicBlock::iterator CurInstIterator = BB.begin();
907e8d8bef9SDimitry Andric   while (CurInstIterator != BB.end()) {
908e8d8bef9SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
909fe6060f1SDimitry Andric       MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
910e8d8bef9SDimitry Andric     if (ModifiedDT)
911e8d8bef9SDimitry Andric       return true;
912e8d8bef9SDimitry Andric   }
913e8d8bef9SDimitry Andric 
914e8d8bef9SDimitry Andric   return MadeChange;
915e8d8bef9SDimitry Andric }
916e8d8bef9SDimitry Andric 
917e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
918e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
919fe6060f1SDimitry Andric                              const DataLayout &DL, DomTreeUpdater *DTU) {
920e8d8bef9SDimitry Andric   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
921e8d8bef9SDimitry Andric   if (II) {
922e8d8bef9SDimitry Andric     // The scalarization code below does not work for scalable vectors.
923e8d8bef9SDimitry Andric     if (isa<ScalableVectorType>(II->getType()) ||
924349cc55cSDimitry Andric         any_of(II->args(),
925e8d8bef9SDimitry Andric                [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
926e8d8bef9SDimitry Andric       return false;
927e8d8bef9SDimitry Andric 
928e8d8bef9SDimitry Andric     switch (II->getIntrinsicID()) {
929e8d8bef9SDimitry Andric     default:
930e8d8bef9SDimitry Andric       break;
931e8d8bef9SDimitry Andric     case Intrinsic::masked_load:
932e8d8bef9SDimitry Andric       // Scalarize unsupported vector masked load
933e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedLoad(
934e8d8bef9SDimitry Andric               CI->getType(),
935e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
936e8d8bef9SDimitry Andric         return false;
937fe6060f1SDimitry Andric       scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
938e8d8bef9SDimitry Andric       return true;
939e8d8bef9SDimitry Andric     case Intrinsic::masked_store:
940e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedStore(
941e8d8bef9SDimitry Andric               CI->getArgOperand(0)->getType(),
942e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
943e8d8bef9SDimitry Andric         return false;
944fe6060f1SDimitry Andric       scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
945e8d8bef9SDimitry Andric       return true;
946e8d8bef9SDimitry Andric     case Intrinsic::masked_gather: {
947fe6060f1SDimitry Andric       MaybeAlign MA =
948fe6060f1SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
949e8d8bef9SDimitry Andric       Type *LoadTy = CI->getType();
950fe6060f1SDimitry Andric       Align Alignment = DL.getValueOrABITypeAlignment(MA,
951fe6060f1SDimitry Andric                                                       LoadTy->getScalarType());
95204eeddc0SDimitry Andric       if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
95304eeddc0SDimitry Andric           !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
954e8d8bef9SDimitry Andric         return false;
955fe6060f1SDimitry Andric       scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
956e8d8bef9SDimitry Andric       return true;
957e8d8bef9SDimitry Andric     }
958e8d8bef9SDimitry Andric     case Intrinsic::masked_scatter: {
959fe6060f1SDimitry Andric       MaybeAlign MA =
960fe6060f1SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
961e8d8bef9SDimitry Andric       Type *StoreTy = CI->getArgOperand(0)->getType();
962fe6060f1SDimitry Andric       Align Alignment = DL.getValueOrABITypeAlignment(MA,
963fe6060f1SDimitry Andric                                                       StoreTy->getScalarType());
96404eeddc0SDimitry Andric       if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
96504eeddc0SDimitry Andric           !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
96604eeddc0SDimitry Andric                                            Alignment))
967e8d8bef9SDimitry Andric         return false;
968fe6060f1SDimitry Andric       scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
969e8d8bef9SDimitry Andric       return true;
970e8d8bef9SDimitry Andric     }
971e8d8bef9SDimitry Andric     case Intrinsic::masked_expandload:
972e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedExpandLoad(CI->getType()))
973e8d8bef9SDimitry Andric         return false;
974fe6060f1SDimitry Andric       scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
975e8d8bef9SDimitry Andric       return true;
976e8d8bef9SDimitry Andric     case Intrinsic::masked_compressstore:
977e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
978e8d8bef9SDimitry Andric         return false;
979fe6060f1SDimitry Andric       scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
980e8d8bef9SDimitry Andric       return true;
981e8d8bef9SDimitry Andric     }
982e8d8bef9SDimitry Andric   }
983e8d8bef9SDimitry Andric 
984e8d8bef9SDimitry Andric   return false;
985e8d8bef9SDimitry Andric }
986