xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
281ad6265SDimitry Andric //                                    intrinsics
3e8d8bef9SDimitry Andric //
4e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
6e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7e8d8bef9SDimitry Andric //
8e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
9e8d8bef9SDimitry Andric //
10e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target
11e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the
12e8d8bef9SDimitry Andric // appropriate mask bit is set.
13e8d8bef9SDimitry Andric //
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric 
16e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
17e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h"
18fe6060f1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
19e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
20e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h"
21e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h"
22e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h"
23e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Dominators.h"
25e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
26e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h"
27e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h"
28e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h"
29e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/Type.h"
31e8d8bef9SDimitry Andric #include "llvm/IR/Value.h"
32e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
33e8d8bef9SDimitry Andric #include "llvm/Pass.h"
34e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h"
35e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h"
36fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37e8d8bef9SDimitry Andric #include <cassert>
38bdd1243dSDimitry Andric #include <optional>
39e8d8bef9SDimitry Andric 
40e8d8bef9SDimitry Andric using namespace llvm;
41e8d8bef9SDimitry Andric 
42e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin"
43e8d8bef9SDimitry Andric 
44e8d8bef9SDimitry Andric namespace {
45e8d8bef9SDimitry Andric 
46e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
47e8d8bef9SDimitry Andric public:
48e8d8bef9SDimitry Andric   static char ID; // Pass identification, replacement for typeid
49e8d8bef9SDimitry Andric 
50e8d8bef9SDimitry Andric   explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
51e8d8bef9SDimitry Andric     initializeScalarizeMaskedMemIntrinLegacyPassPass(
52e8d8bef9SDimitry Andric         *PassRegistry::getPassRegistry());
53e8d8bef9SDimitry Andric   }
54e8d8bef9SDimitry Andric 
55e8d8bef9SDimitry Andric   bool runOnFunction(Function &F) override;
56e8d8bef9SDimitry Andric 
57e8d8bef9SDimitry Andric   StringRef getPassName() const override {
58e8d8bef9SDimitry Andric     return "Scalarize Masked Memory Intrinsics";
59e8d8bef9SDimitry Andric   }
60e8d8bef9SDimitry Andric 
61e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
62e8d8bef9SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
63fe6060f1SDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
64e8d8bef9SDimitry Andric   }
65e8d8bef9SDimitry Andric };
66e8d8bef9SDimitry Andric 
67e8d8bef9SDimitry Andric } // end anonymous namespace
68e8d8bef9SDimitry Andric 
69e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
70fe6060f1SDimitry Andric                           const TargetTransformInfo &TTI, const DataLayout &DL,
71fe6060f1SDimitry Andric                           DomTreeUpdater *DTU);
72e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
73e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
74fe6060f1SDimitry Andric                              const DataLayout &DL, DomTreeUpdater *DTU);
75e8d8bef9SDimitry Andric 
76e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
77e8d8bef9SDimitry Andric 
78e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
79e8d8bef9SDimitry Andric                       "Scalarize unsupported masked memory intrinsics", false,
80e8d8bef9SDimitry Andric                       false)
81e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
82fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
83e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
84e8d8bef9SDimitry Andric                     "Scalarize unsupported masked memory intrinsics", false,
85e8d8bef9SDimitry Andric                     false)
86e8d8bef9SDimitry Andric 
87e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
88e8d8bef9SDimitry Andric   return new ScalarizeMaskedMemIntrinLegacyPass();
89e8d8bef9SDimitry Andric }
90e8d8bef9SDimitry Andric 
91e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) {
92e8d8bef9SDimitry Andric   Constant *C = dyn_cast<Constant>(Mask);
93e8d8bef9SDimitry Andric   if (!C)
94e8d8bef9SDimitry Andric     return false;
95e8d8bef9SDimitry Andric 
96e8d8bef9SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
97e8d8bef9SDimitry Andric   for (unsigned i = 0; i != NumElts; ++i) {
98e8d8bef9SDimitry Andric     Constant *CElt = C->getAggregateElement(i);
99e8d8bef9SDimitry Andric     if (!CElt || !isa<ConstantInt>(CElt))
100e8d8bef9SDimitry Andric       return false;
101e8d8bef9SDimitry Andric   }
102e8d8bef9SDimitry Andric 
103e8d8bef9SDimitry Andric   return true;
104e8d8bef9SDimitry Andric }
105e8d8bef9SDimitry Andric 
106fe6060f1SDimitry Andric static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
107fe6060f1SDimitry Andric                                 unsigned Idx) {
108fe6060f1SDimitry Andric   return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
109fe6060f1SDimitry Andric }
110fe6060f1SDimitry Andric 
111e8d8bef9SDimitry Andric // Translate a masked load intrinsic like
112e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
113e8d8bef9SDimitry Andric //                               <16 x i1> %mask, <16 x i32> %passthru)
114e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
115e8d8bef9SDimitry Andric // the appropriate mask bit is set
116e8d8bef9SDimitry Andric //
117e8d8bef9SDimitry Andric //  %1 = bitcast i8* %addr to i32*
118e8d8bef9SDimitry Andric //  %2 = extractelement <16 x i1> %mask, i32 0
119e8d8bef9SDimitry Andric //  br i1 %2, label %cond.load, label %else
120e8d8bef9SDimitry Andric //
121e8d8bef9SDimitry Andric // cond.load:                                        ; preds = %0
122e8d8bef9SDimitry Andric //  %3 = getelementptr i32* %1, i32 0
123e8d8bef9SDimitry Andric //  %4 = load i32* %3
124e8d8bef9SDimitry Andric //  %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
125e8d8bef9SDimitry Andric //  br label %else
126e8d8bef9SDimitry Andric //
127e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.load
12806c3fb27SDimitry Andric //  %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
129e8d8bef9SDimitry Andric //  %6 = extractelement <16 x i1> %mask, i32 1
130e8d8bef9SDimitry Andric //  br i1 %6, label %cond.load1, label %else2
131e8d8bef9SDimitry Andric //
132e8d8bef9SDimitry Andric // cond.load1:                                       ; preds = %else
133e8d8bef9SDimitry Andric //  %7 = getelementptr i32* %1, i32 1
134e8d8bef9SDimitry Andric //  %8 = load i32* %7
135e8d8bef9SDimitry Andric //  %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
136e8d8bef9SDimitry Andric //  br label %else2
137e8d8bef9SDimitry Andric //
138e8d8bef9SDimitry Andric // else2:                                          ; preds = %else, %cond.load1
139e8d8bef9SDimitry Andric //  %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
140e8d8bef9SDimitry Andric //  %10 = extractelement <16 x i1> %mask, i32 2
141e8d8bef9SDimitry Andric //  br i1 %10, label %cond.load4, label %else5
142e8d8bef9SDimitry Andric //
143fe6060f1SDimitry Andric static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
144fe6060f1SDimitry Andric                                 DomTreeUpdater *DTU, bool &ModifiedDT) {
145e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
146e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
147e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
148e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
149e8d8bef9SDimitry Andric 
150e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
151e8d8bef9SDimitry Andric   VectorType *VecType = cast<FixedVectorType>(CI->getType());
152e8d8bef9SDimitry Andric 
153e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
154e8d8bef9SDimitry Andric 
155e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
156e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
157e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
158e8d8bef9SDimitry Andric 
159e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
160e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
161e8d8bef9SDimitry Andric 
162e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
163e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
164e8d8bef9SDimitry Andric     Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
165e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(NewI);
166e8d8bef9SDimitry Andric     CI->eraseFromParent();
167e8d8bef9SDimitry Andric     return;
168e8d8bef9SDimitry Andric   }
169e8d8bef9SDimitry Andric 
170e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
171e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
172e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
173e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
174e8d8bef9SDimitry Andric 
175e8d8bef9SDimitry Andric   // The result vector
176e8d8bef9SDimitry Andric   Value *VResult = Src0;
177e8d8bef9SDimitry Andric 
178e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
179e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
180e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
181e8d8bef9SDimitry Andric         continue;
18206c3fb27SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
183e8d8bef9SDimitry Andric       LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
184e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, Load, Idx);
185e8d8bef9SDimitry Andric     }
186e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
187e8d8bef9SDimitry Andric     CI->eraseFromParent();
188e8d8bef9SDimitry Andric     return;
189e8d8bef9SDimitry Andric   }
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
192e8d8bef9SDimitry Andric   // better results on X86 at least.
193e8d8bef9SDimitry Andric   Value *SclrMask;
194e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
195e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
196e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
197e8d8bef9SDimitry Andric   }
198e8d8bef9SDimitry Andric 
199e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
200e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
201e8d8bef9SDimitry Andric     //
202e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
203e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
204e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
205e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
206e8d8bef9SDimitry Andric     //
207e8d8bef9SDimitry Andric     Value *Predicate;
208e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
209fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
210fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
211e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
212e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
213e8d8bef9SDimitry Andric     } else {
214e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
215e8d8bef9SDimitry Andric     }
216e8d8bef9SDimitry Andric 
217e8d8bef9SDimitry Andric     // Create "cond" block
218e8d8bef9SDimitry Andric     //
219e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
220e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
221e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
222e8d8bef9SDimitry Andric     //
223fe6060f1SDimitry Andric     Instruction *ThenTerm =
224fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
225fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
226e8d8bef9SDimitry Andric 
227fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
228fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
229fe6060f1SDimitry Andric 
230fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
23106c3fb27SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
232e8d8bef9SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
233e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
234e8d8bef9SDimitry Andric 
235e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
236fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
237fe6060f1SDimitry Andric     NewIfBlock->setName("else");
238e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
239e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
240e8d8bef9SDimitry Andric 
241e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
242fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
243e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
244e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
245e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
246e8d8bef9SDimitry Andric     VResult = Phi;
247e8d8bef9SDimitry Andric   }
248e8d8bef9SDimitry Andric 
249e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
250e8d8bef9SDimitry Andric   CI->eraseFromParent();
251e8d8bef9SDimitry Andric 
252e8d8bef9SDimitry Andric   ModifiedDT = true;
253e8d8bef9SDimitry Andric }
254e8d8bef9SDimitry Andric 
255e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like
256e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
257e8d8bef9SDimitry Andric //                               <16 x i1> %mask)
258e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
259e8d8bef9SDimitry Andric // the appropriate mask bit is set
260e8d8bef9SDimitry Andric //
261e8d8bef9SDimitry Andric //   %1 = bitcast i8* %addr to i32*
262e8d8bef9SDimitry Andric //   %2 = extractelement <16 x i1> %mask, i32 0
263e8d8bef9SDimitry Andric //   br i1 %2, label %cond.store, label %else
264e8d8bef9SDimitry Andric //
265e8d8bef9SDimitry Andric // cond.store:                                       ; preds = %0
266e8d8bef9SDimitry Andric //   %3 = extractelement <16 x i32> %val, i32 0
267e8d8bef9SDimitry Andric //   %4 = getelementptr i32* %1, i32 0
268e8d8bef9SDimitry Andric //   store i32 %3, i32* %4
269e8d8bef9SDimitry Andric //   br label %else
270e8d8bef9SDimitry Andric //
271e8d8bef9SDimitry Andric // else:                                             ; preds = %0, %cond.store
272e8d8bef9SDimitry Andric //   %5 = extractelement <16 x i1> %mask, i32 1
273e8d8bef9SDimitry Andric //   br i1 %5, label %cond.store1, label %else2
274e8d8bef9SDimitry Andric //
275e8d8bef9SDimitry Andric // cond.store1:                                      ; preds = %else
276e8d8bef9SDimitry Andric //   %6 = extractelement <16 x i32> %val, i32 1
277e8d8bef9SDimitry Andric //   %7 = getelementptr i32* %1, i32 1
278e8d8bef9SDimitry Andric //   store i32 %6, i32* %7
279e8d8bef9SDimitry Andric //   br label %else2
280e8d8bef9SDimitry Andric //   . . .
281fe6060f1SDimitry Andric static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
282fe6060f1SDimitry Andric                                  DomTreeUpdater *DTU, bool &ModifiedDT) {
283e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
284e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
285e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
286e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
287e8d8bef9SDimitry Andric 
288e8d8bef9SDimitry Andric   const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
289e8d8bef9SDimitry Andric   auto *VecType = cast<VectorType>(Src->getType());
290e8d8bef9SDimitry Andric 
291e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
292e8d8bef9SDimitry Andric 
293e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
294e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
295e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
296e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
297e8d8bef9SDimitry Andric 
298e8d8bef9SDimitry Andric   // Short-cut if the mask is all-true.
299e8d8bef9SDimitry Andric   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
300e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(Src, Ptr, AlignVal);
301e8d8bef9SDimitry Andric     CI->eraseFromParent();
302e8d8bef9SDimitry Andric     return;
303e8d8bef9SDimitry Andric   }
304e8d8bef9SDimitry Andric 
305e8d8bef9SDimitry Andric   // Adjust alignment for the scalar instruction.
306e8d8bef9SDimitry Andric   const Align AdjustedAlignVal =
307e8d8bef9SDimitry Andric       commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
308e8d8bef9SDimitry Andric   unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
309e8d8bef9SDimitry Andric 
310e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
311e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
312e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
313e8d8bef9SDimitry Andric         continue;
314e8d8bef9SDimitry Andric       Value *OneElt = Builder.CreateExtractElement(Src, Idx);
31506c3fb27SDimitry Andric       Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
316e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
317e8d8bef9SDimitry Andric     }
318e8d8bef9SDimitry Andric     CI->eraseFromParent();
319e8d8bef9SDimitry Andric     return;
320e8d8bef9SDimitry Andric   }
321e8d8bef9SDimitry Andric 
322e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
323e8d8bef9SDimitry Andric   // better results on X86 at least.
324e8d8bef9SDimitry Andric   Value *SclrMask;
325e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
326e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
327e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
328e8d8bef9SDimitry Andric   }
329e8d8bef9SDimitry Andric 
330e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
331e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
332e8d8bef9SDimitry Andric     //
333e8d8bef9SDimitry Andric     //  %mask_1 = and i16 %scalar_mask, i32 1 << Idx
334e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
335e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
336e8d8bef9SDimitry Andric     //
337e8d8bef9SDimitry Andric     Value *Predicate;
338e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
339fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
340fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
341e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
342e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
343e8d8bef9SDimitry Andric     } else {
344e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx);
345e8d8bef9SDimitry Andric     }
346e8d8bef9SDimitry Andric 
347e8d8bef9SDimitry Andric     // Create "cond" block
348e8d8bef9SDimitry Andric     //
349e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
350e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
351e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
352e8d8bef9SDimitry Andric     //
353fe6060f1SDimitry Andric     Instruction *ThenTerm =
354fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
355fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
356e8d8bef9SDimitry Andric 
357fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
358fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
359fe6060f1SDimitry Andric 
360fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
361e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
36206c3fb27SDimitry Andric     Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
363e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
364e8d8bef9SDimitry Andric 
365e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
366fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
367fe6060f1SDimitry Andric     NewIfBlock->setName("else");
368fe6060f1SDimitry Andric 
369fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
370e8d8bef9SDimitry Andric   }
371e8d8bef9SDimitry Andric   CI->eraseFromParent();
372e8d8bef9SDimitry Andric 
373e8d8bef9SDimitry Andric   ModifiedDT = true;
374e8d8bef9SDimitry Andric }
375e8d8bef9SDimitry Andric 
376e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like
377e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
378e8d8bef9SDimitry Andric //                               <16 x i1> %Mask, <16 x i32> %Src)
379e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
380e8d8bef9SDimitry Andric // the appropriate mask bit is set
381e8d8bef9SDimitry Andric //
382e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
383e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
384e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else
385e8d8bef9SDimitry Andric //
386e8d8bef9SDimitry Andric // cond.load:
387e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
388e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4
38906c3fb27SDimitry Andric // %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
390e8d8bef9SDimitry Andric // br label %else
391e8d8bef9SDimitry Andric //
392e8d8bef9SDimitry Andric // else:
39306c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
394e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
395e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2
396e8d8bef9SDimitry Andric //
397e8d8bef9SDimitry Andric // cond.load1:
398e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
399e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4
400e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
401e8d8bef9SDimitry Andric // br label %else2
402e8d8bef9SDimitry Andric // . . .
403e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
404e8d8bef9SDimitry Andric // ret <16 x i32> %Result
405fe6060f1SDimitry Andric static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
406fe6060f1SDimitry Andric                                   DomTreeUpdater *DTU, bool &ModifiedDT) {
407e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(0);
408e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(1);
409e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
410e8d8bef9SDimitry Andric   Value *Src0 = CI->getArgOperand(3);
411e8d8bef9SDimitry Andric 
412e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
413e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
414e8d8bef9SDimitry Andric 
415e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
416e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
417e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
418e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
419e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
420e8d8bef9SDimitry Andric 
421e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
422e8d8bef9SDimitry Andric 
423e8d8bef9SDimitry Andric   // The result vector
424e8d8bef9SDimitry Andric   Value *VResult = Src0;
425e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
426e8d8bef9SDimitry Andric 
427e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
428e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
429e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
430e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
431e8d8bef9SDimitry Andric         continue;
432e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
433e8d8bef9SDimitry Andric       LoadInst *Load =
434e8d8bef9SDimitry Andric           Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
435e8d8bef9SDimitry Andric       VResult =
436e8d8bef9SDimitry Andric           Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
437e8d8bef9SDimitry Andric     }
438e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
439e8d8bef9SDimitry Andric     CI->eraseFromParent();
440e8d8bef9SDimitry Andric     return;
441e8d8bef9SDimitry Andric   }
442e8d8bef9SDimitry Andric 
443e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
444e8d8bef9SDimitry Andric   // better results on X86 at least.
445e8d8bef9SDimitry Andric   Value *SclrMask;
446e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
447e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
448e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
449e8d8bef9SDimitry Andric   }
450e8d8bef9SDimitry Andric 
451e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
452e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
453e8d8bef9SDimitry Andric     //
454e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
455e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
456e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.load, label %else
457e8d8bef9SDimitry Andric     //
458e8d8bef9SDimitry Andric 
459e8d8bef9SDimitry Andric     Value *Predicate;
460e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
461fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
462fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
463e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
464e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
465e8d8bef9SDimitry Andric     } else {
466e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
467e8d8bef9SDimitry Andric     }
468e8d8bef9SDimitry Andric 
469e8d8bef9SDimitry Andric     // Create "cond" block
470e8d8bef9SDimitry Andric     //
471e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
472e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
473e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
474e8d8bef9SDimitry Andric     //
475fe6060f1SDimitry Andric     Instruction *ThenTerm =
476fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
477fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
478e8d8bef9SDimitry Andric 
479fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
480fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
481fe6060f1SDimitry Andric 
482fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
483e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
484e8d8bef9SDimitry Andric     LoadInst *Load =
485e8d8bef9SDimitry Andric         Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
486e8d8bef9SDimitry Andric     Value *NewVResult =
487e8d8bef9SDimitry Andric         Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
488e8d8bef9SDimitry Andric 
489e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
490fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
491fe6060f1SDimitry Andric     NewIfBlock->setName("else");
492e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
493e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
494e8d8bef9SDimitry Andric 
495fe6060f1SDimitry Andric     // Create the phi to join the new and previous value.
496fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
497e8d8bef9SDimitry Andric     PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
498e8d8bef9SDimitry Andric     Phi->addIncoming(NewVResult, CondBlock);
499e8d8bef9SDimitry Andric     Phi->addIncoming(VResult, PrevIfBlock);
500e8d8bef9SDimitry Andric     VResult = Phi;
501e8d8bef9SDimitry Andric   }
502e8d8bef9SDimitry Andric 
503e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
504e8d8bef9SDimitry Andric   CI->eraseFromParent();
505e8d8bef9SDimitry Andric 
506e8d8bef9SDimitry Andric   ModifiedDT = true;
507e8d8bef9SDimitry Andric }
508e8d8bef9SDimitry Andric 
509e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like
510e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
511e8d8bef9SDimitry Andric //                                  <16 x i1> %Mask)
512e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
513e8d8bef9SDimitry Andric // the appropriate mask bit is set.
514e8d8bef9SDimitry Andric //
515e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
516e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
517e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else
518e8d8bef9SDimitry Andric //
519e8d8bef9SDimitry Andric // cond.store:
520e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0
521e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
522e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4
523e8d8bef9SDimitry Andric // br label %else
524e8d8bef9SDimitry Andric //
525e8d8bef9SDimitry Andric // else:
526e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
527e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2
528e8d8bef9SDimitry Andric //
529e8d8bef9SDimitry Andric // cond.store1:
530e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1
531e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
532e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4
533e8d8bef9SDimitry Andric // br label %else2
534e8d8bef9SDimitry Andric //   . . .
535fe6060f1SDimitry Andric static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
536fe6060f1SDimitry Andric                                    DomTreeUpdater *DTU, bool &ModifiedDT) {
537e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
538e8d8bef9SDimitry Andric   Value *Ptrs = CI->getArgOperand(1);
539e8d8bef9SDimitry Andric   Value *Alignment = CI->getArgOperand(2);
540e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(3);
541e8d8bef9SDimitry Andric 
542e8d8bef9SDimitry Andric   auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
543e8d8bef9SDimitry Andric 
544e8d8bef9SDimitry Andric   assert(
545e8d8bef9SDimitry Andric       isa<VectorType>(Ptrs->getType()) &&
546e8d8bef9SDimitry Andric       isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
547e8d8bef9SDimitry Andric       "Vector of pointers is expected in masked scatter intrinsic");
548e8d8bef9SDimitry Andric 
549e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
550e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
551e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
552e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
553e8d8bef9SDimitry Andric 
554e8d8bef9SDimitry Andric   MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
555e8d8bef9SDimitry Andric   unsigned VectorWidth = SrcFVTy->getNumElements();
556e8d8bef9SDimitry Andric 
557e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
558e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
559e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
560e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
561e8d8bef9SDimitry Andric         continue;
562e8d8bef9SDimitry Andric       Value *OneElt =
563e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
564e8d8bef9SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
565e8d8bef9SDimitry Andric       Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
566e8d8bef9SDimitry Andric     }
567e8d8bef9SDimitry Andric     CI->eraseFromParent();
568e8d8bef9SDimitry Andric     return;
569e8d8bef9SDimitry Andric   }
570e8d8bef9SDimitry Andric 
571e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
572e8d8bef9SDimitry Andric   // better results on X86 at least.
573e8d8bef9SDimitry Andric   Value *SclrMask;
574e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
575e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
576e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
577e8d8bef9SDimitry Andric   }
578e8d8bef9SDimitry Andric 
579e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
580e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
581e8d8bef9SDimitry Andric     //
582e8d8bef9SDimitry Andric     //  %Mask1 = and i16 %scalar_mask, i32 1 << Idx
583e8d8bef9SDimitry Andric     //  %cond = icmp ne i16 %mask_1, 0
584e8d8bef9SDimitry Andric     //  br i1 %Mask1, label %cond.store, label %else
585e8d8bef9SDimitry Andric     //
586e8d8bef9SDimitry Andric     Value *Predicate;
587e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
588fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
589fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
590e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
591e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
592e8d8bef9SDimitry Andric     } else {
593e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
594e8d8bef9SDimitry Andric     }
595e8d8bef9SDimitry Andric 
596e8d8bef9SDimitry Andric     // Create "cond" block
597e8d8bef9SDimitry Andric     //
598e8d8bef9SDimitry Andric     //  %Elt1 = extractelement <16 x i32> %Src, i32 1
599e8d8bef9SDimitry Andric     //  %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
600e8d8bef9SDimitry Andric     //  %store i32 %Elt1, i32* %Ptr1
601e8d8bef9SDimitry Andric     //
602fe6060f1SDimitry Andric     Instruction *ThenTerm =
603fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
604fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
605e8d8bef9SDimitry Andric 
606fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
607fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
608fe6060f1SDimitry Andric 
609fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
610e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
611e8d8bef9SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
612e8d8bef9SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
613e8d8bef9SDimitry Andric 
614e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
615fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
616fe6060f1SDimitry Andric     NewIfBlock->setName("else");
617fe6060f1SDimitry Andric 
618fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
619e8d8bef9SDimitry Andric   }
620e8d8bef9SDimitry Andric   CI->eraseFromParent();
621e8d8bef9SDimitry Andric 
622e8d8bef9SDimitry Andric   ModifiedDT = true;
623e8d8bef9SDimitry Andric }
624e8d8bef9SDimitry Andric 
625fe6060f1SDimitry Andric static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
626fe6060f1SDimitry Andric                                       DomTreeUpdater *DTU, bool &ModifiedDT) {
627e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(0);
628e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(1);
629e8d8bef9SDimitry Andric   Value *PassThru = CI->getArgOperand(2);
630*0fca6ea1SDimitry Andric   Align Alignment = CI->getParamAlign(0).valueOrOne();
631e8d8bef9SDimitry Andric 
632e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(CI->getType());
633e8d8bef9SDimitry Andric 
634e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
635e8d8bef9SDimitry Andric 
636e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
637e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
638e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
639e8d8bef9SDimitry Andric 
640e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
641e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
642e8d8bef9SDimitry Andric 
643e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
644e8d8bef9SDimitry Andric 
645e8d8bef9SDimitry Andric   // The result vector
646e8d8bef9SDimitry Andric   Value *VResult = PassThru;
647e8d8bef9SDimitry Andric 
648*0fca6ea1SDimitry Andric   // Adjust alignment for the scalar instruction.
649*0fca6ea1SDimitry Andric   const Align AdjustedAlignment =
650*0fca6ea1SDimitry Andric       commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
651*0fca6ea1SDimitry Andric 
652e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
65306c3fb27SDimitry Andric   // Create a build_vector pattern, with loads/poisons as necessary and then
654e8d8bef9SDimitry Andric   // shuffle blend with the pass through value.
655e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
656e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
657bdd1243dSDimitry Andric     VResult = PoisonValue::get(VecType);
65806c3fb27SDimitry Andric     SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem);
659e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
660e8d8bef9SDimitry Andric       Value *InsertElt;
661e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
66206c3fb27SDimitry Andric         InsertElt = PoisonValue::get(EltTy);
663e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx + VectorWidth;
664e8d8bef9SDimitry Andric       } else {
665e8d8bef9SDimitry Andric         Value *NewPtr =
666e8d8bef9SDimitry Andric             Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
667*0fca6ea1SDimitry Andric         InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment,
668e8d8bef9SDimitry Andric                                               "Load" + Twine(Idx));
669e8d8bef9SDimitry Andric         ShuffleMask[Idx] = Idx;
670e8d8bef9SDimitry Andric         ++MemIndex;
671e8d8bef9SDimitry Andric       }
672e8d8bef9SDimitry Andric       VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
673e8d8bef9SDimitry Andric                                             "Res" + Twine(Idx));
674e8d8bef9SDimitry Andric     }
675e8d8bef9SDimitry Andric     VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
676e8d8bef9SDimitry Andric     CI->replaceAllUsesWith(VResult);
677e8d8bef9SDimitry Andric     CI->eraseFromParent();
678e8d8bef9SDimitry Andric     return;
679e8d8bef9SDimitry Andric   }
680e8d8bef9SDimitry Andric 
681e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
682e8d8bef9SDimitry Andric   // better results on X86 at least.
683e8d8bef9SDimitry Andric   Value *SclrMask;
684e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
685e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
686e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
687e8d8bef9SDimitry Andric   }
688e8d8bef9SDimitry Andric 
689e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
690e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
691e8d8bef9SDimitry Andric     //
692e8d8bef9SDimitry Andric     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
693e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
694e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.load, label %else
695e8d8bef9SDimitry Andric     //
696e8d8bef9SDimitry Andric 
697e8d8bef9SDimitry Andric     Value *Predicate;
698e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
699fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
700fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
701e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
702e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
703e8d8bef9SDimitry Andric     } else {
704e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
705e8d8bef9SDimitry Andric     }
706e8d8bef9SDimitry Andric 
707e8d8bef9SDimitry Andric     // Create "cond" block
708e8d8bef9SDimitry Andric     //
709e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
710e8d8bef9SDimitry Andric     //  %Elt = load i32* %EltAddr
711e8d8bef9SDimitry Andric     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
712e8d8bef9SDimitry Andric     //
713fe6060f1SDimitry Andric     Instruction *ThenTerm =
714fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
715fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
716e8d8bef9SDimitry Andric 
717fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
718fe6060f1SDimitry Andric     CondBlock->setName("cond.load");
719fe6060f1SDimitry Andric 
720fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
721*0fca6ea1SDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AdjustedAlignment);
722e8d8bef9SDimitry Andric     Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
723e8d8bef9SDimitry Andric 
724e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
725e8d8bef9SDimitry Andric     Value *NewPtr;
726e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
727e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
728e8d8bef9SDimitry Andric 
729e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
730fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
731fe6060f1SDimitry Andric     NewIfBlock->setName("else");
732e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
733e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
734e8d8bef9SDimitry Andric 
735e8d8bef9SDimitry Andric     // Create the phi to join the new and previous value.
736fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
737e8d8bef9SDimitry Andric     PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
738e8d8bef9SDimitry Andric     ResultPhi->addIncoming(NewVResult, CondBlock);
739e8d8bef9SDimitry Andric     ResultPhi->addIncoming(VResult, PrevIfBlock);
740e8d8bef9SDimitry Andric     VResult = ResultPhi;
741e8d8bef9SDimitry Andric 
742e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
743e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
744e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
745e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
746e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
747e8d8bef9SDimitry Andric       Ptr = PtrPhi;
748e8d8bef9SDimitry Andric     }
749e8d8bef9SDimitry Andric   }
750e8d8bef9SDimitry Andric 
751e8d8bef9SDimitry Andric   CI->replaceAllUsesWith(VResult);
752e8d8bef9SDimitry Andric   CI->eraseFromParent();
753e8d8bef9SDimitry Andric 
754e8d8bef9SDimitry Andric   ModifiedDT = true;
755e8d8bef9SDimitry Andric }
756e8d8bef9SDimitry Andric 
757fe6060f1SDimitry Andric static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
758fe6060f1SDimitry Andric                                          DomTreeUpdater *DTU,
759fe6060f1SDimitry Andric                                          bool &ModifiedDT) {
760e8d8bef9SDimitry Andric   Value *Src = CI->getArgOperand(0);
761e8d8bef9SDimitry Andric   Value *Ptr = CI->getArgOperand(1);
762e8d8bef9SDimitry Andric   Value *Mask = CI->getArgOperand(2);
763*0fca6ea1SDimitry Andric   Align Alignment = CI->getParamAlign(1).valueOrOne();
764e8d8bef9SDimitry Andric 
765e8d8bef9SDimitry Andric   auto *VecType = cast<FixedVectorType>(Src->getType());
766e8d8bef9SDimitry Andric 
767e8d8bef9SDimitry Andric   IRBuilder<> Builder(CI->getContext());
768e8d8bef9SDimitry Andric   Instruction *InsertPt = CI;
769e8d8bef9SDimitry Andric   BasicBlock *IfBlock = CI->getParent();
770e8d8bef9SDimitry Andric 
771e8d8bef9SDimitry Andric   Builder.SetInsertPoint(InsertPt);
772e8d8bef9SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
773e8d8bef9SDimitry Andric 
774e8d8bef9SDimitry Andric   Type *EltTy = VecType->getElementType();
775e8d8bef9SDimitry Andric 
776*0fca6ea1SDimitry Andric   // Adjust alignment for the scalar instruction.
777*0fca6ea1SDimitry Andric   const Align AdjustedAlignment =
778*0fca6ea1SDimitry Andric       commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
779*0fca6ea1SDimitry Andric 
780e8d8bef9SDimitry Andric   unsigned VectorWidth = VecType->getNumElements();
781e8d8bef9SDimitry Andric 
782e8d8bef9SDimitry Andric   // Shorten the way if the mask is a vector of constants.
783e8d8bef9SDimitry Andric   if (isConstantIntVector(Mask)) {
784e8d8bef9SDimitry Andric     unsigned MemIndex = 0;
785e8d8bef9SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
786e8d8bef9SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
787e8d8bef9SDimitry Andric         continue;
788e8d8bef9SDimitry Andric       Value *OneElt =
789e8d8bef9SDimitry Andric           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
790e8d8bef9SDimitry Andric       Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
791*0fca6ea1SDimitry Andric       Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment);
792e8d8bef9SDimitry Andric       ++MemIndex;
793e8d8bef9SDimitry Andric     }
794e8d8bef9SDimitry Andric     CI->eraseFromParent();
795e8d8bef9SDimitry Andric     return;
796e8d8bef9SDimitry Andric   }
797e8d8bef9SDimitry Andric 
798e8d8bef9SDimitry Andric   // If the mask is not v1i1, use scalar bit test operations. This generates
799e8d8bef9SDimitry Andric   // better results on X86 at least.
800e8d8bef9SDimitry Andric   Value *SclrMask;
801e8d8bef9SDimitry Andric   if (VectorWidth != 1) {
802e8d8bef9SDimitry Andric     Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
803e8d8bef9SDimitry Andric     SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
804e8d8bef9SDimitry Andric   }
805e8d8bef9SDimitry Andric 
806e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
807e8d8bef9SDimitry Andric     // Fill the "else" block, created in the previous iteration
808e8d8bef9SDimitry Andric     //
809e8d8bef9SDimitry Andric     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
810e8d8bef9SDimitry Andric     //  br i1 %mask_1, label %cond.store, label %else
811e8d8bef9SDimitry Andric     //
812e8d8bef9SDimitry Andric     Value *Predicate;
813e8d8bef9SDimitry Andric     if (VectorWidth != 1) {
814fe6060f1SDimitry Andric       Value *Mask = Builder.getInt(APInt::getOneBitSet(
815fe6060f1SDimitry Andric           VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
816e8d8bef9SDimitry Andric       Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
817e8d8bef9SDimitry Andric                                        Builder.getIntN(VectorWidth, 0));
818e8d8bef9SDimitry Andric     } else {
819e8d8bef9SDimitry Andric       Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
820e8d8bef9SDimitry Andric     }
821e8d8bef9SDimitry Andric 
822e8d8bef9SDimitry Andric     // Create "cond" block
823e8d8bef9SDimitry Andric     //
824e8d8bef9SDimitry Andric     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
825e8d8bef9SDimitry Andric     //  %EltAddr = getelementptr i32* %1, i32 0
826e8d8bef9SDimitry Andric     //  %store i32 %OneElt, i32* %EltAddr
827e8d8bef9SDimitry Andric     //
828fe6060f1SDimitry Andric     Instruction *ThenTerm =
829fe6060f1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
830fe6060f1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
831e8d8bef9SDimitry Andric 
832fe6060f1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
833fe6060f1SDimitry Andric     CondBlock->setName("cond.store");
834fe6060f1SDimitry Andric 
835fe6060f1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
836e8d8bef9SDimitry Andric     Value *OneElt = Builder.CreateExtractElement(Src, Idx);
837*0fca6ea1SDimitry Andric     Builder.CreateAlignedStore(OneElt, Ptr, AdjustedAlignment);
838e8d8bef9SDimitry Andric 
839e8d8bef9SDimitry Andric     // Move the pointer if there are more blocks to come.
840e8d8bef9SDimitry Andric     Value *NewPtr;
841e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth)
842e8d8bef9SDimitry Andric       NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
843e8d8bef9SDimitry Andric 
844e8d8bef9SDimitry Andric     // Create "else" block, fill it in the next iteration
845fe6060f1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
846fe6060f1SDimitry Andric     NewIfBlock->setName("else");
847e8d8bef9SDimitry Andric     BasicBlock *PrevIfBlock = IfBlock;
848e8d8bef9SDimitry Andric     IfBlock = NewIfBlock;
849e8d8bef9SDimitry Andric 
850fe6060f1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
851fe6060f1SDimitry Andric 
852e8d8bef9SDimitry Andric     // Add a PHI for the pointer if this isn't the last iteration.
853e8d8bef9SDimitry Andric     if ((Idx + 1) != VectorWidth) {
854e8d8bef9SDimitry Andric       PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
855e8d8bef9SDimitry Andric       PtrPhi->addIncoming(NewPtr, CondBlock);
856e8d8bef9SDimitry Andric       PtrPhi->addIncoming(Ptr, PrevIfBlock);
857e8d8bef9SDimitry Andric       Ptr = PtrPhi;
858e8d8bef9SDimitry Andric     }
859e8d8bef9SDimitry Andric   }
860e8d8bef9SDimitry Andric   CI->eraseFromParent();
861e8d8bef9SDimitry Andric 
862e8d8bef9SDimitry Andric   ModifiedDT = true;
863e8d8bef9SDimitry Andric }
864e8d8bef9SDimitry Andric 
865*0fca6ea1SDimitry Andric static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI,
866*0fca6ea1SDimitry Andric                                            DomTreeUpdater *DTU,
867*0fca6ea1SDimitry Andric                                            bool &ModifiedDT) {
868*0fca6ea1SDimitry Andric   // If we extend histogram to return a result someday (like the updated vector)
869*0fca6ea1SDimitry Andric   // then we'll need to support it here.
870*0fca6ea1SDimitry Andric   assert(CI->getType()->isVoidTy() && "Histogram with non-void return.");
871*0fca6ea1SDimitry Andric   Value *Ptrs = CI->getArgOperand(0);
872*0fca6ea1SDimitry Andric   Value *Inc = CI->getArgOperand(1);
873*0fca6ea1SDimitry Andric   Value *Mask = CI->getArgOperand(2);
874*0fca6ea1SDimitry Andric 
875*0fca6ea1SDimitry Andric   auto *AddrType = cast<FixedVectorType>(Ptrs->getType());
876*0fca6ea1SDimitry Andric   Type *EltTy = Inc->getType();
877*0fca6ea1SDimitry Andric 
878*0fca6ea1SDimitry Andric   IRBuilder<> Builder(CI->getContext());
879*0fca6ea1SDimitry Andric   Instruction *InsertPt = CI;
880*0fca6ea1SDimitry Andric   Builder.SetInsertPoint(InsertPt);
881*0fca6ea1SDimitry Andric 
882*0fca6ea1SDimitry Andric   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
883*0fca6ea1SDimitry Andric 
884*0fca6ea1SDimitry Andric   // FIXME: Do we need to add an alignment parameter to the intrinsic?
885*0fca6ea1SDimitry Andric   unsigned VectorWidth = AddrType->getNumElements();
886*0fca6ea1SDimitry Andric 
887*0fca6ea1SDimitry Andric   // Shorten the way if the mask is a vector of constants.
888*0fca6ea1SDimitry Andric   if (isConstantIntVector(Mask)) {
889*0fca6ea1SDimitry Andric     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
890*0fca6ea1SDimitry Andric       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
891*0fca6ea1SDimitry Andric         continue;
892*0fca6ea1SDimitry Andric       Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
893*0fca6ea1SDimitry Andric       LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
894*0fca6ea1SDimitry Andric       Value *Add = Builder.CreateAdd(Load, Inc);
895*0fca6ea1SDimitry Andric       Builder.CreateStore(Add, Ptr);
896*0fca6ea1SDimitry Andric     }
897*0fca6ea1SDimitry Andric     CI->eraseFromParent();
898*0fca6ea1SDimitry Andric     return;
899*0fca6ea1SDimitry Andric   }
900*0fca6ea1SDimitry Andric 
901*0fca6ea1SDimitry Andric   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
902*0fca6ea1SDimitry Andric     Value *Predicate =
903*0fca6ea1SDimitry Andric         Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
904*0fca6ea1SDimitry Andric 
905*0fca6ea1SDimitry Andric     Instruction *ThenTerm =
906*0fca6ea1SDimitry Andric         SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
907*0fca6ea1SDimitry Andric                                   /*BranchWeights=*/nullptr, DTU);
908*0fca6ea1SDimitry Andric 
909*0fca6ea1SDimitry Andric     BasicBlock *CondBlock = ThenTerm->getParent();
910*0fca6ea1SDimitry Andric     CondBlock->setName("cond.histogram.update");
911*0fca6ea1SDimitry Andric 
912*0fca6ea1SDimitry Andric     Builder.SetInsertPoint(CondBlock->getTerminator());
913*0fca6ea1SDimitry Andric     Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
914*0fca6ea1SDimitry Andric     LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
915*0fca6ea1SDimitry Andric     Value *Add = Builder.CreateAdd(Load, Inc);
916*0fca6ea1SDimitry Andric     Builder.CreateStore(Add, Ptr);
917*0fca6ea1SDimitry Andric 
918*0fca6ea1SDimitry Andric     // Create "else" block, fill it in the next iteration
919*0fca6ea1SDimitry Andric     BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
920*0fca6ea1SDimitry Andric     NewIfBlock->setName("else");
921*0fca6ea1SDimitry Andric     Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
922*0fca6ea1SDimitry Andric   }
923*0fca6ea1SDimitry Andric 
924*0fca6ea1SDimitry Andric   CI->eraseFromParent();
925*0fca6ea1SDimitry Andric   ModifiedDT = true;
926*0fca6ea1SDimitry Andric }
927*0fca6ea1SDimitry Andric 
928fe6060f1SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI,
929fe6060f1SDimitry Andric                     DominatorTree *DT) {
930bdd1243dSDimitry Andric   std::optional<DomTreeUpdater> DTU;
931fe6060f1SDimitry Andric   if (DT)
932fe6060f1SDimitry Andric     DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
933fe6060f1SDimitry Andric 
934e8d8bef9SDimitry Andric   bool EverMadeChange = false;
935e8d8bef9SDimitry Andric   bool MadeChange = true;
936*0fca6ea1SDimitry Andric   auto &DL = F.getDataLayout();
937e8d8bef9SDimitry Andric   while (MadeChange) {
938e8d8bef9SDimitry Andric     MadeChange = false;
939349cc55cSDimitry Andric     for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
940e8d8bef9SDimitry Andric       bool ModifiedDTOnIteration = false;
941349cc55cSDimitry Andric       MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
942bdd1243dSDimitry Andric                                   DTU ? &*DTU : nullptr);
943fe6060f1SDimitry Andric 
944e8d8bef9SDimitry Andric       // Restart BB iteration if the dominator tree of the Function was changed
945e8d8bef9SDimitry Andric       if (ModifiedDTOnIteration)
946e8d8bef9SDimitry Andric         break;
947e8d8bef9SDimitry Andric     }
948e8d8bef9SDimitry Andric 
949e8d8bef9SDimitry Andric     EverMadeChange |= MadeChange;
950e8d8bef9SDimitry Andric   }
951e8d8bef9SDimitry Andric   return EverMadeChange;
952e8d8bef9SDimitry Andric }
953e8d8bef9SDimitry Andric 
954e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
955e8d8bef9SDimitry Andric   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
956fe6060f1SDimitry Andric   DominatorTree *DT = nullptr;
957fe6060f1SDimitry Andric   if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
958fe6060f1SDimitry Andric     DT = &DTWP->getDomTree();
959fe6060f1SDimitry Andric   return runImpl(F, TTI, DT);
960e8d8bef9SDimitry Andric }
961e8d8bef9SDimitry Andric 
962e8d8bef9SDimitry Andric PreservedAnalyses
963e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
964e8d8bef9SDimitry Andric   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
965fe6060f1SDimitry Andric   auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
966fe6060f1SDimitry Andric   if (!runImpl(F, TTI, DT))
967e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
968e8d8bef9SDimitry Andric   PreservedAnalyses PA;
969e8d8bef9SDimitry Andric   PA.preserve<TargetIRAnalysis>();
970fe6060f1SDimitry Andric   PA.preserve<DominatorTreeAnalysis>();
971e8d8bef9SDimitry Andric   return PA;
972e8d8bef9SDimitry Andric }
973e8d8bef9SDimitry Andric 
974e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
975fe6060f1SDimitry Andric                           const TargetTransformInfo &TTI, const DataLayout &DL,
976fe6060f1SDimitry Andric                           DomTreeUpdater *DTU) {
977e8d8bef9SDimitry Andric   bool MadeChange = false;
978e8d8bef9SDimitry Andric 
979e8d8bef9SDimitry Andric   BasicBlock::iterator CurInstIterator = BB.begin();
980e8d8bef9SDimitry Andric   while (CurInstIterator != BB.end()) {
981e8d8bef9SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
982fe6060f1SDimitry Andric       MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
983e8d8bef9SDimitry Andric     if (ModifiedDT)
984e8d8bef9SDimitry Andric       return true;
985e8d8bef9SDimitry Andric   }
986e8d8bef9SDimitry Andric 
987e8d8bef9SDimitry Andric   return MadeChange;
988e8d8bef9SDimitry Andric }
989e8d8bef9SDimitry Andric 
990e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
991e8d8bef9SDimitry Andric                              const TargetTransformInfo &TTI,
992fe6060f1SDimitry Andric                              const DataLayout &DL, DomTreeUpdater *DTU) {
993e8d8bef9SDimitry Andric   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
994e8d8bef9SDimitry Andric   if (II) {
995e8d8bef9SDimitry Andric     // The scalarization code below does not work for scalable vectors.
996e8d8bef9SDimitry Andric     if (isa<ScalableVectorType>(II->getType()) ||
997349cc55cSDimitry Andric         any_of(II->args(),
998e8d8bef9SDimitry Andric                [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
999e8d8bef9SDimitry Andric       return false;
1000e8d8bef9SDimitry Andric 
1001e8d8bef9SDimitry Andric     switch (II->getIntrinsicID()) {
1002e8d8bef9SDimitry Andric     default:
1003e8d8bef9SDimitry Andric       break;
1004*0fca6ea1SDimitry Andric     case Intrinsic::experimental_vector_histogram_add:
1005*0fca6ea1SDimitry Andric       if (TTI.isLegalMaskedVectorHistogram(CI->getArgOperand(0)->getType(),
1006*0fca6ea1SDimitry Andric                                            CI->getArgOperand(1)->getType()))
1007*0fca6ea1SDimitry Andric         return false;
1008*0fca6ea1SDimitry Andric       scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
1009*0fca6ea1SDimitry Andric       return true;
1010e8d8bef9SDimitry Andric     case Intrinsic::masked_load:
1011e8d8bef9SDimitry Andric       // Scalarize unsupported vector masked load
1012e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedLoad(
1013e8d8bef9SDimitry Andric               CI->getType(),
1014e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
1015e8d8bef9SDimitry Andric         return false;
1016fe6060f1SDimitry Andric       scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
1017e8d8bef9SDimitry Andric       return true;
1018e8d8bef9SDimitry Andric     case Intrinsic::masked_store:
1019e8d8bef9SDimitry Andric       if (TTI.isLegalMaskedStore(
1020e8d8bef9SDimitry Andric               CI->getArgOperand(0)->getType(),
1021e8d8bef9SDimitry Andric               cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
1022e8d8bef9SDimitry Andric         return false;
1023fe6060f1SDimitry Andric       scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
1024e8d8bef9SDimitry Andric       return true;
1025e8d8bef9SDimitry Andric     case Intrinsic::masked_gather: {
1026fe6060f1SDimitry Andric       MaybeAlign MA =
1027fe6060f1SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
1028e8d8bef9SDimitry Andric       Type *LoadTy = CI->getType();
1029fe6060f1SDimitry Andric       Align Alignment = DL.getValueOrABITypeAlignment(MA,
1030fe6060f1SDimitry Andric                                                       LoadTy->getScalarType());
103104eeddc0SDimitry Andric       if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
103204eeddc0SDimitry Andric           !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
1033e8d8bef9SDimitry Andric         return false;
1034fe6060f1SDimitry Andric       scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
1035e8d8bef9SDimitry Andric       return true;
1036e8d8bef9SDimitry Andric     }
1037e8d8bef9SDimitry Andric     case Intrinsic::masked_scatter: {
1038fe6060f1SDimitry Andric       MaybeAlign MA =
1039fe6060f1SDimitry Andric           cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
1040e8d8bef9SDimitry Andric       Type *StoreTy = CI->getArgOperand(0)->getType();
1041fe6060f1SDimitry Andric       Align Alignment = DL.getValueOrABITypeAlignment(MA,
1042fe6060f1SDimitry Andric                                                       StoreTy->getScalarType());
104304eeddc0SDimitry Andric       if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
104404eeddc0SDimitry Andric           !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
104504eeddc0SDimitry Andric                                            Alignment))
1046e8d8bef9SDimitry Andric         return false;
1047fe6060f1SDimitry Andric       scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
1048e8d8bef9SDimitry Andric       return true;
1049e8d8bef9SDimitry Andric     }
1050e8d8bef9SDimitry Andric     case Intrinsic::masked_expandload:
1051*0fca6ea1SDimitry Andric       if (TTI.isLegalMaskedExpandLoad(
1052*0fca6ea1SDimitry Andric               CI->getType(),
1053*0fca6ea1SDimitry Andric               CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne()))
1054e8d8bef9SDimitry Andric         return false;
1055fe6060f1SDimitry Andric       scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
1056e8d8bef9SDimitry Andric       return true;
1057e8d8bef9SDimitry Andric     case Intrinsic::masked_compressstore:
1058*0fca6ea1SDimitry Andric       if (TTI.isLegalMaskedCompressStore(
1059*0fca6ea1SDimitry Andric               CI->getArgOperand(0)->getType(),
1060*0fca6ea1SDimitry Andric               CI->getAttributes().getParamAttrs(1).getAlignment().valueOrOne()))
1061e8d8bef9SDimitry Andric         return false;
1062fe6060f1SDimitry Andric       scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
1063e8d8bef9SDimitry Andric       return true;
1064e8d8bef9SDimitry Andric     }
1065e8d8bef9SDimitry Andric   }
1066e8d8bef9SDimitry Andric 
1067e8d8bef9SDimitry Andric   return false;
1068e8d8bef9SDimitry Andric }
1069