xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1e8d8bef9SDimitry Andric //===-- HexagonVectorCombine.cpp ------------------------------------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric // HexagonVectorCombine is a utility class implementing a variety of functions
9e8d8bef9SDimitry Andric // that assist in vector-based optimizations.
10e8d8bef9SDimitry Andric //
11e8d8bef9SDimitry Andric // AlignVectors: replace unaligned vector loads and stores with aligned ones.
12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
13e8d8bef9SDimitry Andric 
14e8d8bef9SDimitry Andric #include "llvm/ADT/APInt.h"
15e8d8bef9SDimitry Andric #include "llvm/ADT/ArrayRef.h"
16e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h"
17e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h"
18e8d8bef9SDimitry Andric #include "llvm/ADT/SmallVector.h"
19e8d8bef9SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
20e8d8bef9SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
21*bdd1243dSDimitry Andric #include "llvm/Analysis/InstSimplifyFolder.h"
22e8d8bef9SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
23e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
24e8d8bef9SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
25fe6060f1SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
26e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
27*bdd1243dSDimitry Andric #include "llvm/CodeGen/ValueTypes.h"
28e8d8bef9SDimitry Andric #include "llvm/IR/Dominators.h"
29e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
31e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h"
32e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsHexagon.h"
33fe6060f1SDimitry Andric #include "llvm/IR/Metadata.h"
34*bdd1243dSDimitry Andric #include "llvm/IR/PatternMatch.h"
35e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
36e8d8bef9SDimitry Andric #include "llvm/Pass.h"
37e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h"
38e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h"
39e8d8bef9SDimitry Andric #include "llvm/Support/raw_ostream.h"
40e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
41*bdd1243dSDimitry Andric #include "llvm/Transforms/Utils/Local.h"
42e8d8bef9SDimitry Andric 
43e8d8bef9SDimitry Andric #include "HexagonSubtarget.h"
44e8d8bef9SDimitry Andric #include "HexagonTargetMachine.h"
45e8d8bef9SDimitry Andric 
46e8d8bef9SDimitry Andric #include <algorithm>
47e8d8bef9SDimitry Andric #include <deque>
48e8d8bef9SDimitry Andric #include <map>
49*bdd1243dSDimitry Andric #include <optional>
50e8d8bef9SDimitry Andric #include <set>
51e8d8bef9SDimitry Andric #include <utility>
52e8d8bef9SDimitry Andric #include <vector>
53e8d8bef9SDimitry Andric 
54e8d8bef9SDimitry Andric #define DEBUG_TYPE "hexagon-vc"
55e8d8bef9SDimitry Andric 
56e8d8bef9SDimitry Andric using namespace llvm;
57e8d8bef9SDimitry Andric 
58e8d8bef9SDimitry Andric namespace {
59e8d8bef9SDimitry Andric class HexagonVectorCombine {
60e8d8bef9SDimitry Andric public:
61e8d8bef9SDimitry Andric   HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
62e8d8bef9SDimitry Andric                        DominatorTree &DT_, TargetLibraryInfo &TLI_,
63e8d8bef9SDimitry Andric                        const TargetMachine &TM_)
64e8d8bef9SDimitry Andric       : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
65e8d8bef9SDimitry Andric         TLI(TLI_),
66e8d8bef9SDimitry Andric         HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
67e8d8bef9SDimitry Andric 
68e8d8bef9SDimitry Andric   bool run();
69e8d8bef9SDimitry Andric 
70e8d8bef9SDimitry Andric   // Common integer type.
71*bdd1243dSDimitry Andric   IntegerType *getIntTy(unsigned Width = 32) const;
72e8d8bef9SDimitry Andric   // Byte type: either scalar (when Length = 0), or vector with given
73e8d8bef9SDimitry Andric   // element count.
74e8d8bef9SDimitry Andric   Type *getByteTy(int ElemCount = 0) const;
75e8d8bef9SDimitry Andric   // Boolean type: either scalar (when Length = 0), or vector with given
76e8d8bef9SDimitry Andric   // element count.
77e8d8bef9SDimitry Andric   Type *getBoolTy(int ElemCount = 0) const;
78e8d8bef9SDimitry Andric   // Create a ConstantInt of type returned by getIntTy with the value Val.
79*bdd1243dSDimitry Andric   ConstantInt *getConstInt(int Val, unsigned Width = 32) const;
80e8d8bef9SDimitry Andric   // Get the integer value of V, if it exists.
81*bdd1243dSDimitry Andric   std::optional<APInt> getIntValue(const Value *Val) const;
82e8d8bef9SDimitry Andric   // Is V a constant 0, or a vector of 0s?
83e8d8bef9SDimitry Andric   bool isZero(const Value *Val) const;
84e8d8bef9SDimitry Andric   // Is V an undef value?
85e8d8bef9SDimitry Andric   bool isUndef(const Value *Val) const;
86e8d8bef9SDimitry Andric 
87*bdd1243dSDimitry Andric   // Get HVX vector type with the given element type.
88*bdd1243dSDimitry Andric   VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
89*bdd1243dSDimitry Andric 
90*bdd1243dSDimitry Andric   enum SizeKind {
91*bdd1243dSDimitry Andric     Store, // Store size
92*bdd1243dSDimitry Andric     Alloc, // Alloc size
93*bdd1243dSDimitry Andric   };
94*bdd1243dSDimitry Andric   int getSizeOf(const Value *Val, SizeKind Kind = Store) const;
95*bdd1243dSDimitry Andric   int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;
96e8d8bef9SDimitry Andric   int getTypeAlignment(Type *Ty) const;
97*bdd1243dSDimitry Andric   size_t length(Value *Val) const;
98*bdd1243dSDimitry Andric   size_t length(Type *Ty) const;
99e8d8bef9SDimitry Andric 
100e8d8bef9SDimitry Andric   Constant *getNullValue(Type *Ty) const;
101e8d8bef9SDimitry Andric   Constant *getFullValue(Type *Ty) const;
102*bdd1243dSDimitry Andric   Constant *getConstSplat(Type *Ty, int Val) const;
103e8d8bef9SDimitry Andric 
104*bdd1243dSDimitry Andric   Value *simplify(Value *Val) const;
105*bdd1243dSDimitry Andric 
106*bdd1243dSDimitry Andric   Value *insertb(IRBuilderBase &Builder, Value *Dest, Value *Src, int Start,
107e8d8bef9SDimitry Andric                  int Length, int Where) const;
108*bdd1243dSDimitry Andric   Value *vlalignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
109*bdd1243dSDimitry Andric                   Value *Amt) const;
110*bdd1243dSDimitry Andric   Value *vralignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
111*bdd1243dSDimitry Andric                   Value *Amt) const;
112*bdd1243dSDimitry Andric   Value *concat(IRBuilderBase &Builder, ArrayRef<Value *> Vecs) const;
113*bdd1243dSDimitry Andric   Value *vresize(IRBuilderBase &Builder, Value *Val, int NewSize,
114e8d8bef9SDimitry Andric                  Value *Pad) const;
115*bdd1243dSDimitry Andric   Value *rescale(IRBuilderBase &Builder, Value *Mask, Type *FromTy,
116e8d8bef9SDimitry Andric                  Type *ToTy) const;
117*bdd1243dSDimitry Andric   Value *vlsb(IRBuilderBase &Builder, Value *Val) const;
118*bdd1243dSDimitry Andric   Value *vbytes(IRBuilderBase &Builder, Value *Val) const;
119*bdd1243dSDimitry Andric   Value *subvector(IRBuilderBase &Builder, Value *Val, unsigned Start,
120*bdd1243dSDimitry Andric                    unsigned Length) const;
121*bdd1243dSDimitry Andric   Value *sublo(IRBuilderBase &Builder, Value *Val) const;
122*bdd1243dSDimitry Andric   Value *subhi(IRBuilderBase &Builder, Value *Val) const;
123*bdd1243dSDimitry Andric   Value *vdeal(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
124*bdd1243dSDimitry Andric   Value *vshuff(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
125e8d8bef9SDimitry Andric 
126*bdd1243dSDimitry Andric   Value *createHvxIntrinsic(IRBuilderBase &Builder, Intrinsic::ID IntID,
127*bdd1243dSDimitry Andric                             Type *RetTy, ArrayRef<Value *> Args,
128*bdd1243dSDimitry Andric                             ArrayRef<Type *> ArgTys = std::nullopt) const;
129*bdd1243dSDimitry Andric   SmallVector<Value *> splitVectorElements(IRBuilderBase &Builder, Value *Vec,
130*bdd1243dSDimitry Andric                                            unsigned ToWidth) const;
131*bdd1243dSDimitry Andric   Value *joinVectorElements(IRBuilderBase &Builder, ArrayRef<Value *> Values,
132*bdd1243dSDimitry Andric                             VectorType *ToType) const;
133e8d8bef9SDimitry Andric 
134*bdd1243dSDimitry Andric   std::optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
135*bdd1243dSDimitry Andric 
136*bdd1243dSDimitry Andric   unsigned getNumSignificantBits(const Value *V,
137*bdd1243dSDimitry Andric                                  const Instruction *CtxI = nullptr) const;
138*bdd1243dSDimitry Andric   KnownBits getKnownBits(const Value *V,
139*bdd1243dSDimitry Andric                          const Instruction *CtxI = nullptr) const;
140e8d8bef9SDimitry Andric 
141e8d8bef9SDimitry Andric   template <typename T = std::vector<Instruction *>>
142e8d8bef9SDimitry Andric   bool isSafeToMoveBeforeInBB(const Instruction &In,
143e8d8bef9SDimitry Andric                               BasicBlock::const_iterator To,
144*bdd1243dSDimitry Andric                               const T &IgnoreInsts = {}) const;
145*bdd1243dSDimitry Andric 
146*bdd1243dSDimitry Andric   // This function is only used for assertions at the moment.
147*bdd1243dSDimitry Andric   [[maybe_unused]] bool isByteVecTy(Type *Ty) const;
148e8d8bef9SDimitry Andric 
149e8d8bef9SDimitry Andric   Function &F;
150e8d8bef9SDimitry Andric   const DataLayout &DL;
151e8d8bef9SDimitry Andric   AliasAnalysis &AA;
152e8d8bef9SDimitry Andric   AssumptionCache &AC;
153e8d8bef9SDimitry Andric   DominatorTree &DT;
154e8d8bef9SDimitry Andric   TargetLibraryInfo &TLI;
155e8d8bef9SDimitry Andric   const HexagonSubtarget &HST;
156e8d8bef9SDimitry Andric 
157e8d8bef9SDimitry Andric private:
158*bdd1243dSDimitry Andric   Value *getElementRange(IRBuilderBase &Builder, Value *Lo, Value *Hi,
159*bdd1243dSDimitry Andric                          int Start, int Length) const;
160e8d8bef9SDimitry Andric };
161e8d8bef9SDimitry Andric 
162e8d8bef9SDimitry Andric class AlignVectors {
163e8d8bef9SDimitry Andric public:
164*bdd1243dSDimitry Andric   AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
165e8d8bef9SDimitry Andric 
166e8d8bef9SDimitry Andric   bool run();
167e8d8bef9SDimitry Andric 
168e8d8bef9SDimitry Andric private:
169e8d8bef9SDimitry Andric   using InstList = std::vector<Instruction *>;
170e8d8bef9SDimitry Andric 
171e8d8bef9SDimitry Andric   struct Segment {
172e8d8bef9SDimitry Andric     void *Data;
173e8d8bef9SDimitry Andric     int Start;
174e8d8bef9SDimitry Andric     int Size;
175e8d8bef9SDimitry Andric   };
176e8d8bef9SDimitry Andric 
177e8d8bef9SDimitry Andric   struct AddrInfo {
178e8d8bef9SDimitry Andric     AddrInfo(const AddrInfo &) = default;
179e8d8bef9SDimitry Andric     AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
180e8d8bef9SDimitry Andric              Align H)
181e8d8bef9SDimitry Andric         : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
182e8d8bef9SDimitry Andric           NeedAlign(HVC.getTypeAlignment(ValTy)) {}
1836246ae0bSDimitry Andric     AddrInfo &operator=(const AddrInfo &) = default;
184e8d8bef9SDimitry Andric 
185e8d8bef9SDimitry Andric     // XXX: add Size member?
186e8d8bef9SDimitry Andric     Instruction *Inst;
187e8d8bef9SDimitry Andric     Value *Addr;
188e8d8bef9SDimitry Andric     Type *ValTy;
189e8d8bef9SDimitry Andric     Align HaveAlign;
190e8d8bef9SDimitry Andric     Align NeedAlign;
191e8d8bef9SDimitry Andric     int Offset = 0; // Offset (in bytes) from the first member of the
192e8d8bef9SDimitry Andric                     // containing AddrList.
193e8d8bef9SDimitry Andric   };
194e8d8bef9SDimitry Andric   using AddrList = std::vector<AddrInfo>;
195e8d8bef9SDimitry Andric 
196e8d8bef9SDimitry Andric   struct InstrLess {
197e8d8bef9SDimitry Andric     bool operator()(const Instruction *A, const Instruction *B) const {
198e8d8bef9SDimitry Andric       return A->comesBefore(B);
199e8d8bef9SDimitry Andric     }
200e8d8bef9SDimitry Andric   };
201e8d8bef9SDimitry Andric   using DepList = std::set<Instruction *, InstrLess>;
202e8d8bef9SDimitry Andric 
203e8d8bef9SDimitry Andric   struct MoveGroup {
204e8d8bef9SDimitry Andric     MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
205e8d8bef9SDimitry Andric         : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
206e8d8bef9SDimitry Andric     Instruction *Base; // Base instruction of the parent address group.
207e8d8bef9SDimitry Andric     InstList Main;     // Main group of instructions.
208e8d8bef9SDimitry Andric     InstList Deps;     // List of dependencies.
209e8d8bef9SDimitry Andric     bool IsHvx;        // Is this group of HVX instructions?
210e8d8bef9SDimitry Andric     bool IsLoad;       // Is this a load group?
211e8d8bef9SDimitry Andric   };
212e8d8bef9SDimitry Andric   using MoveList = std::vector<MoveGroup>;
213e8d8bef9SDimitry Andric 
214e8d8bef9SDimitry Andric   struct ByteSpan {
215e8d8bef9SDimitry Andric     struct Segment {
216fe6060f1SDimitry Andric       // Segment of a Value: 'Len' bytes starting at byte 'Begin'.
217e8d8bef9SDimitry Andric       Segment(Value *Val, int Begin, int Len)
218e8d8bef9SDimitry Andric           : Val(Val), Start(Begin), Size(Len) {}
219e8d8bef9SDimitry Andric       Segment(const Segment &Seg) = default;
2206246ae0bSDimitry Andric       Segment &operator=(const Segment &Seg) = default;
221fe6060f1SDimitry Andric       Value *Val; // Value representable as a sequence of bytes.
222fe6060f1SDimitry Andric       int Start;  // First byte of the value that belongs to the segment.
223fe6060f1SDimitry Andric       int Size;   // Number of bytes in the segment.
224e8d8bef9SDimitry Andric     };
225e8d8bef9SDimitry Andric 
226e8d8bef9SDimitry Andric     struct Block {
227e8d8bef9SDimitry Andric       Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
228e8d8bef9SDimitry Andric       Block(Value *Val, int Off, int Len, int Pos)
229e8d8bef9SDimitry Andric           : Seg(Val, Off, Len), Pos(Pos) {}
230e8d8bef9SDimitry Andric       Block(const Block &Blk) = default;
2316246ae0bSDimitry Andric       Block &operator=(const Block &Blk) = default;
232fe6060f1SDimitry Andric       Segment Seg; // Value segment.
233fe6060f1SDimitry Andric       int Pos;     // Position (offset) of the segment in the Block.
234e8d8bef9SDimitry Andric     };
235e8d8bef9SDimitry Andric 
236e8d8bef9SDimitry Andric     int extent() const;
237e8d8bef9SDimitry Andric     ByteSpan section(int Start, int Length) const;
238e8d8bef9SDimitry Andric     ByteSpan &shift(int Offset);
239fe6060f1SDimitry Andric     SmallVector<Value *, 8> values() const;
240e8d8bef9SDimitry Andric 
241e8d8bef9SDimitry Andric     int size() const { return Blocks.size(); }
242e8d8bef9SDimitry Andric     Block &operator[](int i) { return Blocks[i]; }
243e8d8bef9SDimitry Andric 
244e8d8bef9SDimitry Andric     std::vector<Block> Blocks;
245e8d8bef9SDimitry Andric 
246e8d8bef9SDimitry Andric     using iterator = decltype(Blocks)::iterator;
247e8d8bef9SDimitry Andric     iterator begin() { return Blocks.begin(); }
248e8d8bef9SDimitry Andric     iterator end() { return Blocks.end(); }
249e8d8bef9SDimitry Andric     using const_iterator = decltype(Blocks)::const_iterator;
250e8d8bef9SDimitry Andric     const_iterator begin() const { return Blocks.begin(); }
251e8d8bef9SDimitry Andric     const_iterator end() const { return Blocks.end(); }
252e8d8bef9SDimitry Andric   };
253e8d8bef9SDimitry Andric 
254e8d8bef9SDimitry Andric   Align getAlignFromValue(const Value *V) const;
255*bdd1243dSDimitry Andric   std::optional<MemoryLocation> getLocation(const Instruction &In) const;
256*bdd1243dSDimitry Andric   std::optional<AddrInfo> getAddrInfo(Instruction &In) const;
257e8d8bef9SDimitry Andric   bool isHvx(const AddrInfo &AI) const;
258*bdd1243dSDimitry Andric   // This function is only used for assertions at the moment.
259*bdd1243dSDimitry Andric   [[maybe_unused]] bool isSectorTy(Type *Ty) const;
260e8d8bef9SDimitry Andric 
261e8d8bef9SDimitry Andric   Value *getPayload(Value *Val) const;
262e8d8bef9SDimitry Andric   Value *getMask(Value *Val) const;
263e8d8bef9SDimitry Andric   Value *getPassThrough(Value *Val) const;
264e8d8bef9SDimitry Andric 
265*bdd1243dSDimitry Andric   Value *createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
266e8d8bef9SDimitry Andric                                int Adjust) const;
267*bdd1243dSDimitry Andric   Value *createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
268e8d8bef9SDimitry Andric                               int Alignment) const;
269*bdd1243dSDimitry Andric   Value *createAlignedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
270e8d8bef9SDimitry Andric                            int Alignment, Value *Mask, Value *PassThru) const;
271*bdd1243dSDimitry Andric   Value *createAlignedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
272e8d8bef9SDimitry Andric                             int Alignment, Value *Mask) const;
273e8d8bef9SDimitry Andric 
274*bdd1243dSDimitry Andric   DepList getUpwardDeps(Instruction *In, Instruction *Base) const;
275e8d8bef9SDimitry Andric   bool createAddressGroups();
276e8d8bef9SDimitry Andric   MoveList createLoadGroups(const AddrList &Group) const;
277e8d8bef9SDimitry Andric   MoveList createStoreGroups(const AddrList &Group) const;
278e8d8bef9SDimitry Andric   bool move(const MoveGroup &Move) const;
279*bdd1243dSDimitry Andric   void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
280*bdd1243dSDimitry Andric                         int ScLen, Value *AlignVal, Value *AlignAddr) const;
281*bdd1243dSDimitry Andric   void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
282*bdd1243dSDimitry Andric                          int ScLen, Value *AlignVal, Value *AlignAddr) const;
283e8d8bef9SDimitry Andric   bool realignGroup(const MoveGroup &Move) const;
284e8d8bef9SDimitry Andric 
285e8d8bef9SDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
286e8d8bef9SDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
287*bdd1243dSDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan::Block &B);
288e8d8bef9SDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
289e8d8bef9SDimitry Andric 
290e8d8bef9SDimitry Andric   std::map<Instruction *, AddrList> AddrGroups;
291*bdd1243dSDimitry Andric   const HexagonVectorCombine &HVC;
292e8d8bef9SDimitry Andric };
293e8d8bef9SDimitry Andric 
294e8d8bef9SDimitry Andric LLVM_ATTRIBUTE_UNUSED
295e8d8bef9SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
296e8d8bef9SDimitry Andric   OS << "Inst: " << AI.Inst << "  " << *AI.Inst << '\n';
297e8d8bef9SDimitry Andric   OS << "Addr: " << *AI.Addr << '\n';
298e8d8bef9SDimitry Andric   OS << "Type: " << *AI.ValTy << '\n';
299e8d8bef9SDimitry Andric   OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
300e8d8bef9SDimitry Andric   OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
301e8d8bef9SDimitry Andric   OS << "Offset: " << AI.Offset;
302e8d8bef9SDimitry Andric   return OS;
303e8d8bef9SDimitry Andric }
304e8d8bef9SDimitry Andric 
305e8d8bef9SDimitry Andric LLVM_ATTRIBUTE_UNUSED
306e8d8bef9SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
307e8d8bef9SDimitry Andric   OS << "Main\n";
308e8d8bef9SDimitry Andric   for (Instruction *I : MG.Main)
309e8d8bef9SDimitry Andric     OS << "  " << *I << '\n';
310e8d8bef9SDimitry Andric   OS << "Deps\n";
311e8d8bef9SDimitry Andric   for (Instruction *I : MG.Deps)
312e8d8bef9SDimitry Andric     OS << "  " << *I << '\n';
313e8d8bef9SDimitry Andric   return OS;
314e8d8bef9SDimitry Andric }
315e8d8bef9SDimitry Andric 
316e8d8bef9SDimitry Andric LLVM_ATTRIBUTE_UNUSED
317*bdd1243dSDimitry Andric raw_ostream &operator<<(raw_ostream &OS,
318*bdd1243dSDimitry Andric                         const AlignVectors::ByteSpan::Block &B) {
319*bdd1243dSDimitry Andric   OS << "  @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
320*bdd1243dSDimitry Andric      << *B.Seg.Val;
321*bdd1243dSDimitry Andric   return OS;
322*bdd1243dSDimitry Andric }
323*bdd1243dSDimitry Andric 
324*bdd1243dSDimitry Andric LLVM_ATTRIBUTE_UNUSED
325e8d8bef9SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
326e8d8bef9SDimitry Andric   OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
327*bdd1243dSDimitry Andric   for (const AlignVectors::ByteSpan::Block &B : BS)
328*bdd1243dSDimitry Andric     OS << B << '\n';
329e8d8bef9SDimitry Andric   OS << ']';
330e8d8bef9SDimitry Andric   return OS;
331e8d8bef9SDimitry Andric }
332e8d8bef9SDimitry Andric 
333*bdd1243dSDimitry Andric class HvxIdioms {
334*bdd1243dSDimitry Andric public:
335*bdd1243dSDimitry Andric   HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
336*bdd1243dSDimitry Andric     auto *Int32Ty = HVC.getIntTy(32);
337*bdd1243dSDimitry Andric     HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false);
338*bdd1243dSDimitry Andric     HvxP32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/true);
339*bdd1243dSDimitry Andric   }
340*bdd1243dSDimitry Andric 
341*bdd1243dSDimitry Andric   bool run();
342*bdd1243dSDimitry Andric 
343*bdd1243dSDimitry Andric private:
344*bdd1243dSDimitry Andric   enum Signedness { Positive, Signed, Unsigned };
345*bdd1243dSDimitry Andric 
346*bdd1243dSDimitry Andric   // Value + sign
347*bdd1243dSDimitry Andric   // This is to keep track of whether the value should be treated as signed
348*bdd1243dSDimitry Andric   // or unsigned, or is known to be positive.
349*bdd1243dSDimitry Andric   struct SValue {
350*bdd1243dSDimitry Andric     Value *Val;
351*bdd1243dSDimitry Andric     Signedness Sgn;
352*bdd1243dSDimitry Andric   };
353*bdd1243dSDimitry Andric 
354*bdd1243dSDimitry Andric   struct FxpOp {
355*bdd1243dSDimitry Andric     unsigned Opcode;
356*bdd1243dSDimitry Andric     unsigned Frac; // Number of fraction bits
357*bdd1243dSDimitry Andric     SValue X, Y;
358*bdd1243dSDimitry Andric     // If present, add 1 << RoundAt before shift:
359*bdd1243dSDimitry Andric     std::optional<unsigned> RoundAt;
360*bdd1243dSDimitry Andric     VectorType *ResTy;
361*bdd1243dSDimitry Andric   };
362*bdd1243dSDimitry Andric 
363*bdd1243dSDimitry Andric   auto getNumSignificantBits(Value *V, Instruction *In) const
364*bdd1243dSDimitry Andric       -> std::pair<unsigned, Signedness>;
365*bdd1243dSDimitry Andric   auto canonSgn(SValue X, SValue Y) const -> std::pair<SValue, SValue>;
366*bdd1243dSDimitry Andric 
367*bdd1243dSDimitry Andric   auto matchFxpMul(Instruction &In) const -> std::optional<FxpOp>;
368*bdd1243dSDimitry Andric   auto processFxpMul(Instruction &In, const FxpOp &Op) const -> Value *;
369*bdd1243dSDimitry Andric 
370*bdd1243dSDimitry Andric   auto processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
371*bdd1243dSDimitry Andric                             const FxpOp &Op) const -> Value *;
372*bdd1243dSDimitry Andric   auto createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
373*bdd1243dSDimitry Andric                     bool Rounding) const -> Value *;
374*bdd1243dSDimitry Andric   auto createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
375*bdd1243dSDimitry Andric                     bool Rounding) const -> Value *;
376*bdd1243dSDimitry Andric   // Return {Result, Carry}, where Carry is a vector predicate.
377*bdd1243dSDimitry Andric   auto createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
378*bdd1243dSDimitry Andric                       Value *CarryIn = nullptr) const
379*bdd1243dSDimitry Andric       -> std::pair<Value *, Value *>;
380*bdd1243dSDimitry Andric   auto createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const -> Value *;
381*bdd1243dSDimitry Andric   auto createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
382*bdd1243dSDimitry Andric       -> Value *;
383*bdd1243dSDimitry Andric   auto createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
384*bdd1243dSDimitry Andric       -> std::pair<Value *, Value *>;
385*bdd1243dSDimitry Andric   auto createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
386*bdd1243dSDimitry Andric                      ArrayRef<Value *> WordY) const -> SmallVector<Value *>;
387*bdd1243dSDimitry Andric   auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
388*bdd1243dSDimitry Andric                      Signedness SgnX, ArrayRef<Value *> WordY,
389*bdd1243dSDimitry Andric                      Signedness SgnY) const -> SmallVector<Value *>;
390*bdd1243dSDimitry Andric 
391*bdd1243dSDimitry Andric   VectorType *HvxI32Ty;
392*bdd1243dSDimitry Andric   VectorType *HvxP32Ty;
393*bdd1243dSDimitry Andric   const HexagonVectorCombine &HVC;
394*bdd1243dSDimitry Andric 
395*bdd1243dSDimitry Andric   friend raw_ostream &operator<<(raw_ostream &, const FxpOp &);
396*bdd1243dSDimitry Andric };
397*bdd1243dSDimitry Andric 
398*bdd1243dSDimitry Andric [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
399*bdd1243dSDimitry Andric                                          const HvxIdioms::FxpOp &Op) {
400*bdd1243dSDimitry Andric   static const char *SgnNames[] = {"Positive", "Signed", "Unsigned"};
401*bdd1243dSDimitry Andric   OS << Instruction::getOpcodeName(Op.Opcode) << '.' << Op.Frac;
402*bdd1243dSDimitry Andric   if (Op.RoundAt.has_value()) {
403*bdd1243dSDimitry Andric     if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {
404*bdd1243dSDimitry Andric       OS << ":rnd";
405*bdd1243dSDimitry Andric     } else {
406*bdd1243dSDimitry Andric       OS << " + 1<<" << *Op.RoundAt;
407*bdd1243dSDimitry Andric     }
408*bdd1243dSDimitry Andric   }
409*bdd1243dSDimitry Andric   OS << "\n  X:(" << SgnNames[Op.X.Sgn] << ") " << *Op.X.Val << "\n"
410*bdd1243dSDimitry Andric      << "  Y:(" << SgnNames[Op.Y.Sgn] << ") " << *Op.Y.Val;
411*bdd1243dSDimitry Andric   return OS;
412*bdd1243dSDimitry Andric }
413*bdd1243dSDimitry Andric 
414e8d8bef9SDimitry Andric } // namespace
415e8d8bef9SDimitry Andric 
416e8d8bef9SDimitry Andric namespace {
417e8d8bef9SDimitry Andric 
418e8d8bef9SDimitry Andric template <typename T> T *getIfUnordered(T *MaybeT) {
419e8d8bef9SDimitry Andric   return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
420e8d8bef9SDimitry Andric }
421e8d8bef9SDimitry Andric template <typename T> T *isCandidate(Instruction *In) {
422e8d8bef9SDimitry Andric   return dyn_cast<T>(In);
423e8d8bef9SDimitry Andric }
424e8d8bef9SDimitry Andric template <> LoadInst *isCandidate<LoadInst>(Instruction *In) {
425e8d8bef9SDimitry Andric   return getIfUnordered(dyn_cast<LoadInst>(In));
426e8d8bef9SDimitry Andric }
427e8d8bef9SDimitry Andric template <> StoreInst *isCandidate<StoreInst>(Instruction *In) {
428e8d8bef9SDimitry Andric   return getIfUnordered(dyn_cast<StoreInst>(In));
429e8d8bef9SDimitry Andric }
430e8d8bef9SDimitry Andric 
431fe6060f1SDimitry Andric #if !defined(_MSC_VER) || _MSC_VER >= 1926
432fe6060f1SDimitry Andric // VS2017 and some versions of VS2019 have trouble compiling this:
433e8d8bef9SDimitry Andric // error C2976: 'std::map': too few template arguments
434fe6060f1SDimitry Andric // VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925)
435e8d8bef9SDimitry Andric template <typename Pred, typename... Ts>
436e8d8bef9SDimitry Andric void erase_if(std::map<Ts...> &map, Pred p)
437e8d8bef9SDimitry Andric #else
438e8d8bef9SDimitry Andric template <typename Pred, typename T, typename U>
439e8d8bef9SDimitry Andric void erase_if(std::map<T, U> &map, Pred p)
440e8d8bef9SDimitry Andric #endif
441e8d8bef9SDimitry Andric {
442e8d8bef9SDimitry Andric   for (auto i = map.begin(), e = map.end(); i != e;) {
443e8d8bef9SDimitry Andric     if (p(*i))
444e8d8bef9SDimitry Andric       i = map.erase(i);
445e8d8bef9SDimitry Andric     else
446e8d8bef9SDimitry Andric       i = std::next(i);
447e8d8bef9SDimitry Andric   }
448e8d8bef9SDimitry Andric }
449e8d8bef9SDimitry Andric 
450e8d8bef9SDimitry Andric // Forward other erase_ifs to the LLVM implementations.
451e8d8bef9SDimitry Andric template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
452e8d8bef9SDimitry Andric   llvm::erase_if(std::forward<T>(container), p);
453e8d8bef9SDimitry Andric }
454e8d8bef9SDimitry Andric 
455e8d8bef9SDimitry Andric } // namespace
456e8d8bef9SDimitry Andric 
457e8d8bef9SDimitry Andric // --- Begin AlignVectors
458e8d8bef9SDimitry Andric 
459e8d8bef9SDimitry Andric auto AlignVectors::ByteSpan::extent() const -> int {
460e8d8bef9SDimitry Andric   if (size() == 0)
461e8d8bef9SDimitry Andric     return 0;
462e8d8bef9SDimitry Andric   int Min = Blocks[0].Pos;
463e8d8bef9SDimitry Andric   int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
464e8d8bef9SDimitry Andric   for (int i = 1, e = size(); i != e; ++i) {
465e8d8bef9SDimitry Andric     Min = std::min(Min, Blocks[i].Pos);
466e8d8bef9SDimitry Andric     Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
467e8d8bef9SDimitry Andric   }
468e8d8bef9SDimitry Andric   return Max - Min;
469e8d8bef9SDimitry Andric }
470e8d8bef9SDimitry Andric 
471e8d8bef9SDimitry Andric auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
472e8d8bef9SDimitry Andric   ByteSpan Section;
473e8d8bef9SDimitry Andric   for (const ByteSpan::Block &B : Blocks) {
474e8d8bef9SDimitry Andric     int L = std::max(B.Pos, Start);                       // Left end.
475e8d8bef9SDimitry Andric     int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
476e8d8bef9SDimitry Andric     if (L < R) {
477e8d8bef9SDimitry Andric       // How much to chop off the beginning of the segment:
478e8d8bef9SDimitry Andric       int Off = L > B.Pos ? L - B.Pos : 0;
479e8d8bef9SDimitry Andric       Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
480e8d8bef9SDimitry Andric     }
481e8d8bef9SDimitry Andric   }
482e8d8bef9SDimitry Andric   return Section;
483e8d8bef9SDimitry Andric }
484e8d8bef9SDimitry Andric 
485e8d8bef9SDimitry Andric auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
486e8d8bef9SDimitry Andric   for (Block &B : Blocks)
487e8d8bef9SDimitry Andric     B.Pos += Offset;
488e8d8bef9SDimitry Andric   return *this;
489e8d8bef9SDimitry Andric }
490e8d8bef9SDimitry Andric 
491fe6060f1SDimitry Andric auto AlignVectors::ByteSpan::values() const -> SmallVector<Value *, 8> {
492fe6060f1SDimitry Andric   SmallVector<Value *, 8> Values(Blocks.size());
493fe6060f1SDimitry Andric   for (int i = 0, e = Blocks.size(); i != e; ++i)
494fe6060f1SDimitry Andric     Values[i] = Blocks[i].Seg.Val;
495fe6060f1SDimitry Andric   return Values;
496fe6060f1SDimitry Andric }
497fe6060f1SDimitry Andric 
498e8d8bef9SDimitry Andric auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
499e8d8bef9SDimitry Andric   const auto *C = dyn_cast<ConstantInt>(V);
500e8d8bef9SDimitry Andric   assert(C && "Alignment must be a compile-time constant integer");
501e8d8bef9SDimitry Andric   return C->getAlignValue();
502e8d8bef9SDimitry Andric }
503e8d8bef9SDimitry Andric 
504*bdd1243dSDimitry Andric auto AlignVectors::getAddrInfo(Instruction &In) const
505*bdd1243dSDimitry Andric     -> std::optional<AddrInfo> {
506e8d8bef9SDimitry Andric   if (auto *L = isCandidate<LoadInst>(&In))
507e8d8bef9SDimitry Andric     return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
508e8d8bef9SDimitry Andric                     L->getAlign());
509e8d8bef9SDimitry Andric   if (auto *S = isCandidate<StoreInst>(&In))
510e8d8bef9SDimitry Andric     return AddrInfo(HVC, S, S->getPointerOperand(),
511e8d8bef9SDimitry Andric                     S->getValueOperand()->getType(), S->getAlign());
512e8d8bef9SDimitry Andric   if (auto *II = isCandidate<IntrinsicInst>(&In)) {
513e8d8bef9SDimitry Andric     Intrinsic::ID ID = II->getIntrinsicID();
514e8d8bef9SDimitry Andric     switch (ID) {
515e8d8bef9SDimitry Andric     case Intrinsic::masked_load:
516e8d8bef9SDimitry Andric       return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
517e8d8bef9SDimitry Andric                       getAlignFromValue(II->getArgOperand(1)));
518e8d8bef9SDimitry Andric     case Intrinsic::masked_store:
519e8d8bef9SDimitry Andric       return AddrInfo(HVC, II, II->getArgOperand(1),
520e8d8bef9SDimitry Andric                       II->getArgOperand(0)->getType(),
521e8d8bef9SDimitry Andric                       getAlignFromValue(II->getArgOperand(2)));
522e8d8bef9SDimitry Andric     }
523e8d8bef9SDimitry Andric   }
524*bdd1243dSDimitry Andric   return std::nullopt;
525e8d8bef9SDimitry Andric }
526e8d8bef9SDimitry Andric 
527e8d8bef9SDimitry Andric auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
528e8d8bef9SDimitry Andric   return HVC.HST.isTypeForHVX(AI.ValTy);
529e8d8bef9SDimitry Andric }
530e8d8bef9SDimitry Andric 
531e8d8bef9SDimitry Andric auto AlignVectors::getPayload(Value *Val) const -> Value * {
532e8d8bef9SDimitry Andric   if (auto *In = dyn_cast<Instruction>(Val)) {
533e8d8bef9SDimitry Andric     Intrinsic::ID ID = 0;
534e8d8bef9SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(In))
535e8d8bef9SDimitry Andric       ID = II->getIntrinsicID();
536e8d8bef9SDimitry Andric     if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
537e8d8bef9SDimitry Andric       return In->getOperand(0);
538e8d8bef9SDimitry Andric   }
539e8d8bef9SDimitry Andric   return Val;
540e8d8bef9SDimitry Andric }
541e8d8bef9SDimitry Andric 
542e8d8bef9SDimitry Andric auto AlignVectors::getMask(Value *Val) const -> Value * {
543e8d8bef9SDimitry Andric   if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
544e8d8bef9SDimitry Andric     switch (II->getIntrinsicID()) {
545e8d8bef9SDimitry Andric     case Intrinsic::masked_load:
546e8d8bef9SDimitry Andric       return II->getArgOperand(2);
547e8d8bef9SDimitry Andric     case Intrinsic::masked_store:
548e8d8bef9SDimitry Andric       return II->getArgOperand(3);
549e8d8bef9SDimitry Andric     }
550e8d8bef9SDimitry Andric   }
551e8d8bef9SDimitry Andric 
552e8d8bef9SDimitry Andric   Type *ValTy = getPayload(Val)->getType();
553*bdd1243dSDimitry Andric   if (auto *VecTy = dyn_cast<VectorType>(ValTy))
554*bdd1243dSDimitry Andric     return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
555e8d8bef9SDimitry Andric   return HVC.getFullValue(HVC.getBoolTy());
556e8d8bef9SDimitry Andric }
557e8d8bef9SDimitry Andric 
558e8d8bef9SDimitry Andric auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
559e8d8bef9SDimitry Andric   if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
560e8d8bef9SDimitry Andric     if (II->getIntrinsicID() == Intrinsic::masked_load)
561e8d8bef9SDimitry Andric       return II->getArgOperand(3);
562e8d8bef9SDimitry Andric   }
563e8d8bef9SDimitry Andric   return UndefValue::get(getPayload(Val)->getType());
564e8d8bef9SDimitry Andric }
565e8d8bef9SDimitry Andric 
566*bdd1243dSDimitry Andric auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,
567e8d8bef9SDimitry Andric                                          Type *ValTy, int Adjust) const
568e8d8bef9SDimitry Andric     -> Value * {
569e8d8bef9SDimitry Andric   // The adjustment is in bytes, but if it's a multiple of the type size,
570e8d8bef9SDimitry Andric   // we don't need to do pointer casts.
571fe6060f1SDimitry Andric   auto *PtrTy = cast<PointerType>(Ptr->getType());
572fe6060f1SDimitry Andric   if (!PtrTy->isOpaque()) {
57304eeddc0SDimitry Andric     Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
574*bdd1243dSDimitry Andric     int ElemSize = HVC.getSizeOf(ElemTy, HVC.Alloc);
575349cc55cSDimitry Andric     if (Adjust % ElemSize == 0 && Adjust != 0) {
576fe6060f1SDimitry Andric       Value *Tmp0 =
577fe6060f1SDimitry Andric           Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
578e8d8bef9SDimitry Andric       return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
579e8d8bef9SDimitry Andric     }
580fe6060f1SDimitry Andric   }
581e8d8bef9SDimitry Andric 
582e8d8bef9SDimitry Andric   PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
583e8d8bef9SDimitry Andric   Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
584fe6060f1SDimitry Andric   Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0,
585fe6060f1SDimitry Andric                                   HVC.getConstInt(Adjust));
586e8d8bef9SDimitry Andric   return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
587e8d8bef9SDimitry Andric }
588e8d8bef9SDimitry Andric 
589*bdd1243dSDimitry Andric auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
590e8d8bef9SDimitry Andric                                         Type *ValTy, int Alignment) const
591e8d8bef9SDimitry Andric     -> Value * {
592e8d8bef9SDimitry Andric   Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
593e8d8bef9SDimitry Andric   Value *Mask = HVC.getConstInt(-Alignment);
594e8d8bef9SDimitry Andric   Value *And = Builder.CreateAnd(AsInt, Mask);
595e8d8bef9SDimitry Andric   return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
596e8d8bef9SDimitry Andric }
597e8d8bef9SDimitry Andric 
598*bdd1243dSDimitry Andric auto AlignVectors::createAlignedLoad(IRBuilderBase &Builder, Type *ValTy,
599e8d8bef9SDimitry Andric                                      Value *Ptr, int Alignment, Value *Mask,
600e8d8bef9SDimitry Andric                                      Value *PassThru) const -> Value * {
601e8d8bef9SDimitry Andric   assert(!HVC.isUndef(Mask)); // Should this be allowed?
602e8d8bef9SDimitry Andric   if (HVC.isZero(Mask))
603e8d8bef9SDimitry Andric     return PassThru;
604e8d8bef9SDimitry Andric   if (Mask == ConstantInt::getTrue(Mask->getType()))
605e8d8bef9SDimitry Andric     return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
606fe6060f1SDimitry Andric   return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru);
607e8d8bef9SDimitry Andric }
608e8d8bef9SDimitry Andric 
609*bdd1243dSDimitry Andric auto AlignVectors::createAlignedStore(IRBuilderBase &Builder, Value *Val,
610e8d8bef9SDimitry Andric                                       Value *Ptr, int Alignment,
611e8d8bef9SDimitry Andric                                       Value *Mask) const -> Value * {
612e8d8bef9SDimitry Andric   if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
613e8d8bef9SDimitry Andric     return UndefValue::get(Val->getType());
614e8d8bef9SDimitry Andric   if (Mask == ConstantInt::getTrue(Mask->getType()))
615e8d8bef9SDimitry Andric     return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
616e8d8bef9SDimitry Andric   return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
617e8d8bef9SDimitry Andric }
618e8d8bef9SDimitry Andric 
619*bdd1243dSDimitry Andric auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
620*bdd1243dSDimitry Andric     -> DepList {
621*bdd1243dSDimitry Andric   BasicBlock *Parent = Base->getParent();
622*bdd1243dSDimitry Andric   assert(In->getParent() == Parent &&
623*bdd1243dSDimitry Andric          "Base and In should be in the same block");
624*bdd1243dSDimitry Andric   assert(Base->comesBefore(In) && "Base should come before In");
625*bdd1243dSDimitry Andric 
626*bdd1243dSDimitry Andric   DepList Deps;
627*bdd1243dSDimitry Andric   std::deque<Instruction *> WorkQ = {In};
628*bdd1243dSDimitry Andric   while (!WorkQ.empty()) {
629*bdd1243dSDimitry Andric     Instruction *D = WorkQ.front();
630*bdd1243dSDimitry Andric     WorkQ.pop_front();
631*bdd1243dSDimitry Andric     Deps.insert(D);
632*bdd1243dSDimitry Andric     for (Value *Op : D->operands()) {
633*bdd1243dSDimitry Andric       if (auto *I = dyn_cast<Instruction>(Op)) {
634*bdd1243dSDimitry Andric         if (I->getParent() == Parent && Base->comesBefore(I))
635*bdd1243dSDimitry Andric           WorkQ.push_back(I);
636*bdd1243dSDimitry Andric       }
637*bdd1243dSDimitry Andric     }
638*bdd1243dSDimitry Andric   }
639*bdd1243dSDimitry Andric   return Deps;
640*bdd1243dSDimitry Andric }
641*bdd1243dSDimitry Andric 
642e8d8bef9SDimitry Andric auto AlignVectors::createAddressGroups() -> bool {
643e8d8bef9SDimitry Andric   // An address group created here may contain instructions spanning
644e8d8bef9SDimitry Andric   // multiple basic blocks.
645e8d8bef9SDimitry Andric   AddrList WorkStack;
646e8d8bef9SDimitry Andric 
647e8d8bef9SDimitry Andric   auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
648e8d8bef9SDimitry Andric     for (AddrInfo &W : WorkStack) {
649e8d8bef9SDimitry Andric       if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
650e8d8bef9SDimitry Andric         return std::make_pair(W.Inst, *D);
651e8d8bef9SDimitry Andric     }
652e8d8bef9SDimitry Andric     return std::make_pair(nullptr, 0);
653e8d8bef9SDimitry Andric   };
654e8d8bef9SDimitry Andric 
655e8d8bef9SDimitry Andric   auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
656e8d8bef9SDimitry Andric     BasicBlock &Block = *DomN->getBlock();
657e8d8bef9SDimitry Andric     for (Instruction &I : Block) {
658e8d8bef9SDimitry Andric       auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
659e8d8bef9SDimitry Andric       if (!AI)
660e8d8bef9SDimitry Andric         continue;
661e8d8bef9SDimitry Andric       auto F = findBaseAndOffset(*AI);
662e8d8bef9SDimitry Andric       Instruction *GroupInst;
663e8d8bef9SDimitry Andric       if (Instruction *BI = F.first) {
664e8d8bef9SDimitry Andric         AI->Offset = F.second;
665e8d8bef9SDimitry Andric         GroupInst = BI;
666e8d8bef9SDimitry Andric       } else {
667e8d8bef9SDimitry Andric         WorkStack.push_back(*AI);
668e8d8bef9SDimitry Andric         GroupInst = AI->Inst;
669e8d8bef9SDimitry Andric       }
670e8d8bef9SDimitry Andric       AddrGroups[GroupInst].push_back(*AI);
671e8d8bef9SDimitry Andric     }
672e8d8bef9SDimitry Andric 
673e8d8bef9SDimitry Andric     for (DomTreeNode *C : DomN->children())
674e8d8bef9SDimitry Andric       Visit(C, Visit);
675e8d8bef9SDimitry Andric 
676e8d8bef9SDimitry Andric     while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
677e8d8bef9SDimitry Andric       WorkStack.pop_back();
678e8d8bef9SDimitry Andric   };
679e8d8bef9SDimitry Andric 
680e8d8bef9SDimitry Andric   traverseBlock(HVC.DT.getRootNode(), traverseBlock);
681e8d8bef9SDimitry Andric   assert(WorkStack.empty());
682e8d8bef9SDimitry Andric 
683e8d8bef9SDimitry Andric   // AddrGroups are formed.
684e8d8bef9SDimitry Andric 
685e8d8bef9SDimitry Andric   // Remove groups of size 1.
686e8d8bef9SDimitry Andric   erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
687e8d8bef9SDimitry Andric   // Remove groups that don't use HVX types.
688e8d8bef9SDimitry Andric   erase_if(AddrGroups, [&](auto &G) {
6890eae32dcSDimitry Andric     return llvm::none_of(
690e8d8bef9SDimitry Andric         G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
691e8d8bef9SDimitry Andric   });
692e8d8bef9SDimitry Andric 
693e8d8bef9SDimitry Andric   return !AddrGroups.empty();
694e8d8bef9SDimitry Andric }
695e8d8bef9SDimitry Andric 
696e8d8bef9SDimitry Andric auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
697e8d8bef9SDimitry Andric   // Form load groups.
698e8d8bef9SDimitry Andric   // To avoid complications with moving code across basic blocks, only form
699e8d8bef9SDimitry Andric   // groups that are contained within a single basic block.
700e8d8bef9SDimitry Andric 
701e8d8bef9SDimitry Andric   auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
702e8d8bef9SDimitry Andric     assert(!Move.Main.empty() && "Move group should have non-empty Main");
703e8d8bef9SDimitry Andric     // Don't mix HVX and non-HVX instructions.
704e8d8bef9SDimitry Andric     if (Move.IsHvx != isHvx(Info))
705e8d8bef9SDimitry Andric       return false;
706e8d8bef9SDimitry Andric     // Leading instruction in the load group.
707e8d8bef9SDimitry Andric     Instruction *Base = Move.Main.front();
708e8d8bef9SDimitry Andric     if (Base->getParent() != Info.Inst->getParent())
709e8d8bef9SDimitry Andric       return false;
710e8d8bef9SDimitry Andric 
711e8d8bef9SDimitry Andric     auto isSafeToMoveToBase = [&](const Instruction *I) {
712e8d8bef9SDimitry Andric       return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
713e8d8bef9SDimitry Andric     };
714e8d8bef9SDimitry Andric     DepList Deps = getUpwardDeps(Info.Inst, Base);
715e8d8bef9SDimitry Andric     if (!llvm::all_of(Deps, isSafeToMoveToBase))
716e8d8bef9SDimitry Andric       return false;
717e8d8bef9SDimitry Andric 
718e8d8bef9SDimitry Andric     // The dependencies will be moved together with the load, so make sure
719e8d8bef9SDimitry Andric     // that none of them could be moved independently in another group.
720e8d8bef9SDimitry Andric     Deps.erase(Info.Inst);
721e8d8bef9SDimitry Andric     auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
722e8d8bef9SDimitry Andric     if (llvm::any_of(Deps, inAddrMap))
723e8d8bef9SDimitry Andric       return false;
724e8d8bef9SDimitry Andric     Move.Main.push_back(Info.Inst);
725e8d8bef9SDimitry Andric     llvm::append_range(Move.Deps, Deps);
726e8d8bef9SDimitry Andric     return true;
727e8d8bef9SDimitry Andric   };
728e8d8bef9SDimitry Andric 
729e8d8bef9SDimitry Andric   MoveList LoadGroups;
730e8d8bef9SDimitry Andric 
731e8d8bef9SDimitry Andric   for (const AddrInfo &Info : Group) {
732e8d8bef9SDimitry Andric     if (!Info.Inst->mayReadFromMemory())
733e8d8bef9SDimitry Andric       continue;
734e8d8bef9SDimitry Andric     if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
735e8d8bef9SDimitry Andric       LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
736e8d8bef9SDimitry Andric   }
737e8d8bef9SDimitry Andric 
738e8d8bef9SDimitry Andric   // Erase singleton groups.
739e8d8bef9SDimitry Andric   erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
740e8d8bef9SDimitry Andric   return LoadGroups;
741e8d8bef9SDimitry Andric }
742e8d8bef9SDimitry Andric 
743e8d8bef9SDimitry Andric auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
744e8d8bef9SDimitry Andric   // Form store groups.
745e8d8bef9SDimitry Andric   // To avoid complications with moving code across basic blocks, only form
746e8d8bef9SDimitry Andric   // groups that are contained within a single basic block.
747e8d8bef9SDimitry Andric 
748e8d8bef9SDimitry Andric   auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
749e8d8bef9SDimitry Andric     assert(!Move.Main.empty() && "Move group should have non-empty Main");
750e8d8bef9SDimitry Andric     // For stores with return values we'd have to collect downward depenencies.
751e8d8bef9SDimitry Andric     // There are no such stores that we handle at the moment, so omit that.
752e8d8bef9SDimitry Andric     assert(Info.Inst->getType()->isVoidTy() &&
753e8d8bef9SDimitry Andric            "Not handling stores with return values");
754e8d8bef9SDimitry Andric     // Don't mix HVX and non-HVX instructions.
755e8d8bef9SDimitry Andric     if (Move.IsHvx != isHvx(Info))
756e8d8bef9SDimitry Andric       return false;
757e8d8bef9SDimitry Andric     // For stores we need to be careful whether it's safe to move them.
758e8d8bef9SDimitry Andric     // Stores that are otherwise safe to move together may not appear safe
759e8d8bef9SDimitry Andric     // to move over one another (i.e. isSafeToMoveBefore may return false).
760e8d8bef9SDimitry Andric     Instruction *Base = Move.Main.front();
761e8d8bef9SDimitry Andric     if (Base->getParent() != Info.Inst->getParent())
762e8d8bef9SDimitry Andric       return false;
763e8d8bef9SDimitry Andric     if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
764e8d8bef9SDimitry Andric       return false;
765e8d8bef9SDimitry Andric     Move.Main.push_back(Info.Inst);
766e8d8bef9SDimitry Andric     return true;
767e8d8bef9SDimitry Andric   };
768e8d8bef9SDimitry Andric 
769e8d8bef9SDimitry Andric   MoveList StoreGroups;
770e8d8bef9SDimitry Andric 
771e8d8bef9SDimitry Andric   for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
772e8d8bef9SDimitry Andric     const AddrInfo &Info = *I;
773e8d8bef9SDimitry Andric     if (!Info.Inst->mayWriteToMemory())
774e8d8bef9SDimitry Andric       continue;
775e8d8bef9SDimitry Andric     if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
776e8d8bef9SDimitry Andric       StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
777e8d8bef9SDimitry Andric   }
778e8d8bef9SDimitry Andric 
779e8d8bef9SDimitry Andric   // Erase singleton groups.
780e8d8bef9SDimitry Andric   erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
781e8d8bef9SDimitry Andric   return StoreGroups;
782e8d8bef9SDimitry Andric }
783e8d8bef9SDimitry Andric 
784e8d8bef9SDimitry Andric auto AlignVectors::move(const MoveGroup &Move) const -> bool {
785e8d8bef9SDimitry Andric   assert(!Move.Main.empty() && "Move group should have non-empty Main");
786e8d8bef9SDimitry Andric   Instruction *Where = Move.Main.front();
787e8d8bef9SDimitry Andric 
788e8d8bef9SDimitry Andric   if (Move.IsLoad) {
789e8d8bef9SDimitry Andric     // Move all deps to before Where, keeping order.
790e8d8bef9SDimitry Andric     for (Instruction *D : Move.Deps)
791e8d8bef9SDimitry Andric       D->moveBefore(Where);
792e8d8bef9SDimitry Andric     // Move all main instructions to after Where, keeping order.
793e8d8bef9SDimitry Andric     ArrayRef<Instruction *> Main(Move.Main);
794e8d8bef9SDimitry Andric     for (Instruction *M : Main.drop_front(1)) {
795e8d8bef9SDimitry Andric       M->moveAfter(Where);
796e8d8bef9SDimitry Andric       Where = M;
797e8d8bef9SDimitry Andric     }
798e8d8bef9SDimitry Andric   } else {
799e8d8bef9SDimitry Andric     // NOTE: Deps are empty for "store" groups. If they need to be
800e8d8bef9SDimitry Andric     // non-empty, decide on the order.
801e8d8bef9SDimitry Andric     assert(Move.Deps.empty());
802e8d8bef9SDimitry Andric     // Move all main instructions to before Where, inverting order.
803e8d8bef9SDimitry Andric     ArrayRef<Instruction *> Main(Move.Main);
804e8d8bef9SDimitry Andric     for (Instruction *M : Main.drop_front(1)) {
805e8d8bef9SDimitry Andric       M->moveBefore(Where);
806e8d8bef9SDimitry Andric       Where = M;
807e8d8bef9SDimitry Andric     }
808e8d8bef9SDimitry Andric   }
809e8d8bef9SDimitry Andric 
810e8d8bef9SDimitry Andric   return Move.Main.size() + Move.Deps.size() > 1;
811e8d8bef9SDimitry Andric }
812e8d8bef9SDimitry Andric 
813*bdd1243dSDimitry Andric auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
814*bdd1243dSDimitry Andric                                     const ByteSpan &VSpan, int ScLen,
815*bdd1243dSDimitry Andric                                     Value *AlignVal, Value *AlignAddr) const
816*bdd1243dSDimitry Andric     -> void {
817*bdd1243dSDimitry Andric   Type *SecTy = HVC.getByteTy(ScLen);
818*bdd1243dSDimitry Andric   int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
819*bdd1243dSDimitry Andric   bool DoAlign = !HVC.isZero(AlignVal);
820*bdd1243dSDimitry Andric   BasicBlock::iterator BasePos = Builder.GetInsertPoint();
821*bdd1243dSDimitry Andric   BasicBlock *BaseBlock = Builder.GetInsertBlock();
822*bdd1243dSDimitry Andric 
823*bdd1243dSDimitry Andric   ByteSpan ASpan;
824*bdd1243dSDimitry Andric   auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
825*bdd1243dSDimitry Andric   auto *Undef = UndefValue::get(SecTy);
826*bdd1243dSDimitry Andric 
827*bdd1243dSDimitry Andric   SmallVector<Instruction *> Loads(NumSectors + DoAlign, nullptr);
828*bdd1243dSDimitry Andric 
829*bdd1243dSDimitry Andric   // We could create all of the aligned loads, and generate the valigns
830*bdd1243dSDimitry Andric   // at the location of the first load, but for large load groups, this
831*bdd1243dSDimitry Andric   // could create highly suboptimal code (there have been groups of 140+
832*bdd1243dSDimitry Andric   // loads in real code).
833*bdd1243dSDimitry Andric   // Instead, place the loads/valigns as close to the users as possible.
834*bdd1243dSDimitry Andric   // In any case we need to have a mapping from the blocks of VSpan (the
835*bdd1243dSDimitry Andric   // span covered by the pre-existing loads) to ASpan (the span covered
836*bdd1243dSDimitry Andric   // by the aligned loads). There is a small problem, though: ASpan needs
837*bdd1243dSDimitry Andric   // to have pointers to the loads/valigns, but we don't know where to put
838*bdd1243dSDimitry Andric   // them yet. We can't use nullptr, because when we create sections of
839*bdd1243dSDimitry Andric   // ASpan (corresponding to blocks from VSpan), for each block in the
840*bdd1243dSDimitry Andric   // section we need to know which blocks of ASpan they are a part of.
841*bdd1243dSDimitry Andric   // To have 1-1 mapping between blocks of ASpan and the temporary value
842*bdd1243dSDimitry Andric   // pointers, use the addresses of the blocks themselves.
843*bdd1243dSDimitry Andric 
844*bdd1243dSDimitry Andric   // Populate the blocks first, to avoid reallocations of the vector
845*bdd1243dSDimitry Andric   // interfering with generating the placeholder addresses.
846*bdd1243dSDimitry Andric   for (int Index = 0; Index != NumSectors; ++Index)
847*bdd1243dSDimitry Andric     ASpan.Blocks.emplace_back(nullptr, ScLen, Index * ScLen);
848*bdd1243dSDimitry Andric   for (int Index = 0; Index != NumSectors; ++Index) {
849*bdd1243dSDimitry Andric     ASpan.Blocks[Index].Seg.Val =
850*bdd1243dSDimitry Andric         reinterpret_cast<Value *>(&ASpan.Blocks[Index]);
851*bdd1243dSDimitry Andric   }
852*bdd1243dSDimitry Andric 
853*bdd1243dSDimitry Andric   // Multiple values from VSpan can map to the same value in ASpan. Since we
854*bdd1243dSDimitry Andric   // try to create loads lazily, we need to find the earliest use for each
855*bdd1243dSDimitry Andric   // value from ASpan.
856*bdd1243dSDimitry Andric   DenseMap<void *, Instruction *> EarliestUser;
857*bdd1243dSDimitry Andric   auto isEarlier = [](Instruction *A, Instruction *B) {
858*bdd1243dSDimitry Andric     if (B == nullptr)
859*bdd1243dSDimitry Andric       return true;
860*bdd1243dSDimitry Andric     if (A == nullptr)
861*bdd1243dSDimitry Andric       return false;
862*bdd1243dSDimitry Andric     assert(A->getParent() == B->getParent());
863*bdd1243dSDimitry Andric     return A->comesBefore(B);
864*bdd1243dSDimitry Andric   };
865*bdd1243dSDimitry Andric   auto earliestUser = [&](const auto &Uses) {
866*bdd1243dSDimitry Andric     Instruction *User = nullptr;
867*bdd1243dSDimitry Andric     for (const Use &U : Uses) {
868*bdd1243dSDimitry Andric       auto *I = dyn_cast<Instruction>(U.getUser());
869*bdd1243dSDimitry Andric       assert(I != nullptr && "Load used in a non-instruction?");
870*bdd1243dSDimitry Andric       // Make sure we only consider at users in this block, but we need
871*bdd1243dSDimitry Andric       // to remember if there were users outside the block too. This is
872*bdd1243dSDimitry Andric       // because if there are no users, aligned loads will not be created.
873*bdd1243dSDimitry Andric       if (I->getParent() == BaseBlock) {
874*bdd1243dSDimitry Andric         if (!isa<PHINode>(I))
875*bdd1243dSDimitry Andric           User = std::min(User, I, isEarlier);
876*bdd1243dSDimitry Andric       } else {
877*bdd1243dSDimitry Andric         User = std::min(User, BaseBlock->getTerminator(), isEarlier);
878*bdd1243dSDimitry Andric       }
879*bdd1243dSDimitry Andric     }
880*bdd1243dSDimitry Andric     return User;
881*bdd1243dSDimitry Andric   };
882*bdd1243dSDimitry Andric 
883*bdd1243dSDimitry Andric   for (const ByteSpan::Block &B : VSpan) {
884*bdd1243dSDimitry Andric     ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size);
885*bdd1243dSDimitry Andric     for (const ByteSpan::Block &S : ASection) {
886*bdd1243dSDimitry Andric       EarliestUser[S.Seg.Val] = std::min(
887*bdd1243dSDimitry Andric           EarliestUser[S.Seg.Val], earliestUser(B.Seg.Val->uses()), isEarlier);
888*bdd1243dSDimitry Andric     }
889*bdd1243dSDimitry Andric   }
890*bdd1243dSDimitry Andric 
891*bdd1243dSDimitry Andric   auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,
892*bdd1243dSDimitry Andric                         int Index) {
893*bdd1243dSDimitry Andric     Value *Ptr =
894*bdd1243dSDimitry Andric         createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
895*bdd1243dSDimitry Andric     // FIXME: generate a predicated load?
896*bdd1243dSDimitry Andric     Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
897*bdd1243dSDimitry Andric     // If vector shifting is potentially needed, accumulate metadata
898*bdd1243dSDimitry Andric     // from source sections of twice the load width.
899*bdd1243dSDimitry Andric     int Start = (Index - DoAlign) * ScLen;
900*bdd1243dSDimitry Andric     int Width = (1 + DoAlign) * ScLen;
901*bdd1243dSDimitry Andric     propagateMetadata(cast<Instruction>(Load),
902*bdd1243dSDimitry Andric                       VSpan.section(Start, Width).values());
903*bdd1243dSDimitry Andric     return cast<Instruction>(Load);
904*bdd1243dSDimitry Andric   };
905*bdd1243dSDimitry Andric 
906*bdd1243dSDimitry Andric   auto moveBefore = [this](Instruction *In, Instruction *To) {
907*bdd1243dSDimitry Andric     // Move In and its upward dependencies to before To.
908*bdd1243dSDimitry Andric     assert(In->getParent() == To->getParent());
909*bdd1243dSDimitry Andric     DepList Deps = getUpwardDeps(In, To);
910*bdd1243dSDimitry Andric     // DepList is sorted with respect to positions in the basic block.
911*bdd1243dSDimitry Andric     for (Instruction *I : Deps)
912*bdd1243dSDimitry Andric       I->moveBefore(To);
913*bdd1243dSDimitry Andric   };
914*bdd1243dSDimitry Andric 
915*bdd1243dSDimitry Andric   // Generate necessary loads at appropriate locations.
916*bdd1243dSDimitry Andric   for (int Index = 0; Index != NumSectors + 1; ++Index) {
917*bdd1243dSDimitry Andric     // In ASpan, each block will be either a single aligned load, or a
918*bdd1243dSDimitry Andric     // valign of a pair of loads. In the latter case, an aligned load j
919*bdd1243dSDimitry Andric     // will belong to the current valign, and the one in the previous
920*bdd1243dSDimitry Andric     // block (for j > 0).
921*bdd1243dSDimitry Andric     Instruction *PrevAt =
922*bdd1243dSDimitry Andric         DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;
923*bdd1243dSDimitry Andric     Instruction *ThisAt =
924*bdd1243dSDimitry Andric         Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;
925*bdd1243dSDimitry Andric     if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
926*bdd1243dSDimitry Andric       Builder.SetInsertPoint(Where);
927*bdd1243dSDimitry Andric       Loads[Index] = createLoad(Builder, VSpan, Index);
928*bdd1243dSDimitry Andric       // We know it's safe to put the load at BasePos, so if it's not safe
929*bdd1243dSDimitry Andric       // to move it from this location to BasePos, then the current location
930*bdd1243dSDimitry Andric       // is not valid.
931*bdd1243dSDimitry Andric       // We can't do this check proactively because we need the load to exist
932*bdd1243dSDimitry Andric       // in order to check legality.
933*bdd1243dSDimitry Andric       if (!HVC.isSafeToMoveBeforeInBB(*Loads[Index], BasePos))
934*bdd1243dSDimitry Andric         moveBefore(Loads[Index], &*BasePos);
935*bdd1243dSDimitry Andric     }
936*bdd1243dSDimitry Andric   }
937*bdd1243dSDimitry Andric   // Generate valigns if needed, and fill in proper values in ASpan
938*bdd1243dSDimitry Andric   for (int Index = 0; Index != NumSectors; ++Index) {
939*bdd1243dSDimitry Andric     ASpan[Index].Seg.Val = nullptr;
940*bdd1243dSDimitry Andric     if (auto *Where = EarliestUser[&ASpan[Index]]) {
941*bdd1243dSDimitry Andric       Builder.SetInsertPoint(Where);
942*bdd1243dSDimitry Andric       Value *Val = Loads[Index];
943*bdd1243dSDimitry Andric       assert(Val != nullptr);
944*bdd1243dSDimitry Andric       if (DoAlign) {
945*bdd1243dSDimitry Andric         Value *NextLoad = Loads[Index + 1];
946*bdd1243dSDimitry Andric         assert(NextLoad != nullptr);
947*bdd1243dSDimitry Andric         Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
948*bdd1243dSDimitry Andric       }
949*bdd1243dSDimitry Andric       ASpan[Index].Seg.Val = Val;
950*bdd1243dSDimitry Andric     }
951*bdd1243dSDimitry Andric   }
952*bdd1243dSDimitry Andric 
953*bdd1243dSDimitry Andric   for (const ByteSpan::Block &B : VSpan) {
954*bdd1243dSDimitry Andric     ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
955*bdd1243dSDimitry Andric     Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
956*bdd1243dSDimitry Andric     Builder.SetInsertPoint(cast<Instruction>(B.Seg.Val));
957*bdd1243dSDimitry Andric 
958*bdd1243dSDimitry Andric     for (ByteSpan::Block &S : ASection) {
959*bdd1243dSDimitry Andric       if (S.Seg.Val == nullptr)
960*bdd1243dSDimitry Andric         continue;
961*bdd1243dSDimitry Andric       // The processing of the data loaded by the aligned loads
962*bdd1243dSDimitry Andric       // needs to be inserted after the data is available.
963*bdd1243dSDimitry Andric       Instruction *SegI = cast<Instruction>(S.Seg.Val);
964*bdd1243dSDimitry Andric       Builder.SetInsertPoint(&*std::next(SegI->getIterator()));
965*bdd1243dSDimitry Andric       Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
966*bdd1243dSDimitry Andric       Accum = HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
967*bdd1243dSDimitry Andric     }
968*bdd1243dSDimitry Andric     // Instead of casting everything to bytes for the vselect, cast to the
969*bdd1243dSDimitry Andric     // original value type. This will avoid complications with casting masks.
970*bdd1243dSDimitry Andric     // For example, in cases when the original mask applied to i32, it could
971*bdd1243dSDimitry Andric     // be converted to a mask applicable to i8 via pred_typecast intrinsic,
972*bdd1243dSDimitry Andric     // but if the mask is not exactly of HVX length, extra handling would be
973*bdd1243dSDimitry Andric     // needed to make it work.
974*bdd1243dSDimitry Andric     Type *ValTy = getPayload(B.Seg.Val)->getType();
975*bdd1243dSDimitry Andric     Value *Cast = Builder.CreateBitCast(Accum, ValTy);
976*bdd1243dSDimitry Andric     Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
977*bdd1243dSDimitry Andric                                       getPassThrough(B.Seg.Val));
978*bdd1243dSDimitry Andric     B.Seg.Val->replaceAllUsesWith(Sel);
979*bdd1243dSDimitry Andric   }
980*bdd1243dSDimitry Andric }
981*bdd1243dSDimitry Andric 
982*bdd1243dSDimitry Andric auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
983*bdd1243dSDimitry Andric                                      const ByteSpan &VSpan, int ScLen,
984*bdd1243dSDimitry Andric                                      Value *AlignVal, Value *AlignAddr) const
985*bdd1243dSDimitry Andric     -> void {
986*bdd1243dSDimitry Andric   Type *SecTy = HVC.getByteTy(ScLen);
987*bdd1243dSDimitry Andric   int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
988*bdd1243dSDimitry Andric   bool DoAlign = !HVC.isZero(AlignVal);
989*bdd1243dSDimitry Andric 
990*bdd1243dSDimitry Andric   // Stores.
991*bdd1243dSDimitry Andric   ByteSpan ASpanV, ASpanM;
992*bdd1243dSDimitry Andric 
993*bdd1243dSDimitry Andric   // Return a vector value corresponding to the input value Val:
994*bdd1243dSDimitry Andric   // either <1 x Val> for scalar Val, or Val itself for vector Val.
995*bdd1243dSDimitry Andric   auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {
996*bdd1243dSDimitry Andric     Type *Ty = Val->getType();
997*bdd1243dSDimitry Andric     if (Ty->isVectorTy())
998*bdd1243dSDimitry Andric       return Val;
999*bdd1243dSDimitry Andric     auto *VecTy = VectorType::get(Ty, 1, /*Scalable=*/false);
1000*bdd1243dSDimitry Andric     return Builder.CreateBitCast(Val, VecTy);
1001*bdd1243dSDimitry Andric   };
1002*bdd1243dSDimitry Andric 
1003*bdd1243dSDimitry Andric   // Create an extra "undef" sector at the beginning and at the end.
1004*bdd1243dSDimitry Andric   // They will be used as the left/right filler in the vlalign step.
1005*bdd1243dSDimitry Andric   for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
1006*bdd1243dSDimitry Andric     // For stores, the size of each section is an aligned vector length.
1007*bdd1243dSDimitry Andric     // Adjust the store offsets relative to the section start offset.
1008*bdd1243dSDimitry Andric     ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
1009*bdd1243dSDimitry Andric     Value *AccumV = UndefValue::get(SecTy);
1010*bdd1243dSDimitry Andric     Value *AccumM = HVC.getNullValue(SecTy);
1011*bdd1243dSDimitry Andric     for (ByteSpan::Block &S : VSection) {
1012*bdd1243dSDimitry Andric       Value *Pay = getPayload(S.Seg.Val);
1013*bdd1243dSDimitry Andric       Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1014*bdd1243dSDimitry Andric                                 Pay->getType(), HVC.getByteTy());
1015*bdd1243dSDimitry Andric       AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
1016*bdd1243dSDimitry Andric                            S.Seg.Start, S.Seg.Size, S.Pos);
1017*bdd1243dSDimitry Andric       AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
1018*bdd1243dSDimitry Andric                            S.Seg.Start, S.Seg.Size, S.Pos);
1019*bdd1243dSDimitry Andric     }
1020*bdd1243dSDimitry Andric     ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
1021*bdd1243dSDimitry Andric     ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
1022*bdd1243dSDimitry Andric   }
1023*bdd1243dSDimitry Andric 
1024*bdd1243dSDimitry Andric   // vlalign
1025*bdd1243dSDimitry Andric   if (DoAlign) {
1026*bdd1243dSDimitry Andric     for (int j = 1; j != NumSectors + 2; ++j) {
1027*bdd1243dSDimitry Andric       Value *PrevV = ASpanV[j - 1].Seg.Val, *ThisV = ASpanV[j].Seg.Val;
1028*bdd1243dSDimitry Andric       Value *PrevM = ASpanM[j - 1].Seg.Val, *ThisM = ASpanM[j].Seg.Val;
1029*bdd1243dSDimitry Andric       assert(isSectorTy(PrevV->getType()) && isSectorTy(PrevM->getType()));
1030*bdd1243dSDimitry Andric       ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1031*bdd1243dSDimitry Andric       ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1032*bdd1243dSDimitry Andric     }
1033*bdd1243dSDimitry Andric   }
1034*bdd1243dSDimitry Andric 
1035*bdd1243dSDimitry Andric   for (int i = 0; i != NumSectors + DoAlign; ++i) {
1036*bdd1243dSDimitry Andric     Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
1037*bdd1243dSDimitry Andric     Value *Val = ASpanV[i].Seg.Val;
1038*bdd1243dSDimitry Andric     Value *Mask = ASpanM[i].Seg.Val; // bytes
1039*bdd1243dSDimitry Andric     if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
1040*bdd1243dSDimitry Andric       Value *Store =
1041*bdd1243dSDimitry Andric           createAlignedStore(Builder, Val, Ptr, ScLen, HVC.vlsb(Builder, Mask));
1042*bdd1243dSDimitry Andric       // If vector shifting is potentially needed, accumulate metadata
1043*bdd1243dSDimitry Andric       // from source sections of twice the store width.
1044*bdd1243dSDimitry Andric       int Start = (i - DoAlign) * ScLen;
1045*bdd1243dSDimitry Andric       int Width = (1 + DoAlign) * ScLen;
1046*bdd1243dSDimitry Andric       propagateMetadata(cast<Instruction>(Store),
1047*bdd1243dSDimitry Andric                         VSpan.section(Start, Width).values());
1048*bdd1243dSDimitry Andric     }
1049*bdd1243dSDimitry Andric   }
1050*bdd1243dSDimitry Andric }
1051*bdd1243dSDimitry Andric 
1052e8d8bef9SDimitry Andric auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
1053e8d8bef9SDimitry Andric   // TODO: Needs support for masked loads/stores of "scalar" vectors.
1054e8d8bef9SDimitry Andric   if (!Move.IsHvx)
1055e8d8bef9SDimitry Andric     return false;
1056e8d8bef9SDimitry Andric 
1057e8d8bef9SDimitry Andric   // Return the element with the maximum alignment from Range,
1058e8d8bef9SDimitry Andric   // where GetValue obtains the value to compare from an element.
1059e8d8bef9SDimitry Andric   auto getMaxOf = [](auto Range, auto GetValue) {
1060e8d8bef9SDimitry Andric     return *std::max_element(
1061e8d8bef9SDimitry Andric         Range.begin(), Range.end(),
1062e8d8bef9SDimitry Andric         [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); });
1063e8d8bef9SDimitry Andric   };
1064e8d8bef9SDimitry Andric 
1065e8d8bef9SDimitry Andric   const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1066e8d8bef9SDimitry Andric 
1067e8d8bef9SDimitry Andric   // Conceptually, there is a vector of N bytes covering the addresses
1068e8d8bef9SDimitry Andric   // starting from the minimum offset (i.e. Base.Addr+Start). This vector
1069e8d8bef9SDimitry Andric   // represents a contiguous memory region that spans all accessed memory
1070e8d8bef9SDimitry Andric   // locations.
1071e8d8bef9SDimitry Andric   // The correspondence between loaded or stored values will be expressed
1072e8d8bef9SDimitry Andric   // in terms of this vector. For example, the 0th element of the vector
1073e8d8bef9SDimitry Andric   // from the Base address info will start at byte Start from the beginning
1074e8d8bef9SDimitry Andric   // of this conceptual vector.
1075e8d8bef9SDimitry Andric   //
1076e8d8bef9SDimitry Andric   // This vector will be loaded/stored starting at the nearest down-aligned
1077e8d8bef9SDimitry Andric   // address and the amount od the down-alignment will be AlignVal:
1078e8d8bef9SDimitry Andric   //   valign(load_vector(align_down(Base+Start)), AlignVal)
1079e8d8bef9SDimitry Andric 
1080e8d8bef9SDimitry Andric   std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1081e8d8bef9SDimitry Andric   AddrList MoveInfos;
1082e8d8bef9SDimitry Andric   llvm::copy_if(
1083e8d8bef9SDimitry Andric       BaseInfos, std::back_inserter(MoveInfos),
1084e8d8bef9SDimitry Andric       [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
1085e8d8bef9SDimitry Andric 
1086e8d8bef9SDimitry Andric   // Maximum alignment present in the whole address group.
1087e8d8bef9SDimitry Andric   const AddrInfo &WithMaxAlign =
108804eeddc0SDimitry Andric       getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
1089e8d8bef9SDimitry Andric   Align MaxGiven = WithMaxAlign.HaveAlign;
1090e8d8bef9SDimitry Andric 
1091e8d8bef9SDimitry Andric   // Minimum alignment present in the move address group.
1092e8d8bef9SDimitry Andric   const AddrInfo &WithMinOffset =
1093e8d8bef9SDimitry Andric       getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
1094e8d8bef9SDimitry Andric 
1095e8d8bef9SDimitry Andric   const AddrInfo &WithMaxNeeded =
1096e8d8bef9SDimitry Andric       getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
1097e8d8bef9SDimitry Andric   Align MinNeeded = WithMaxNeeded.NeedAlign;
1098e8d8bef9SDimitry Andric 
1099*bdd1243dSDimitry Andric   // Set the builder's insertion point right before the load group, or
1100*bdd1243dSDimitry Andric   // immediately after the store group. (Instructions in a store group are
1101*bdd1243dSDimitry Andric   // listed in reverse order.)
1102*bdd1243dSDimitry Andric   Instruction *InsertAt = Move.Main.front();
1103*bdd1243dSDimitry Andric   if (!Move.IsLoad) {
1104*bdd1243dSDimitry Andric     // There should be a terminator (which store isn't, but check anyways).
1105*bdd1243dSDimitry Andric     assert(InsertAt->getIterator() != InsertAt->getParent()->end());
1106*bdd1243dSDimitry Andric     InsertAt = &*std::next(InsertAt->getIterator());
1107*bdd1243dSDimitry Andric   }
1108*bdd1243dSDimitry Andric 
1109*bdd1243dSDimitry Andric   IRBuilder Builder(InsertAt->getParent(), InsertAt->getIterator(),
1110*bdd1243dSDimitry Andric                     InstSimplifyFolder(HVC.DL));
1111e8d8bef9SDimitry Andric   Value *AlignAddr = nullptr; // Actual aligned address.
1112e8d8bef9SDimitry Andric   Value *AlignVal = nullptr;  // Right-shift amount (for valign).
1113e8d8bef9SDimitry Andric 
1114e8d8bef9SDimitry Andric   if (MinNeeded <= MaxGiven) {
1115e8d8bef9SDimitry Andric     int Start = WithMinOffset.Offset;
1116e8d8bef9SDimitry Andric     int OffAtMax = WithMaxAlign.Offset;
1117e8d8bef9SDimitry Andric     // Shift the offset of the maximally aligned instruction (OffAtMax)
1118e8d8bef9SDimitry Andric     // back by just enough multiples of the required alignment to cover the
1119e8d8bef9SDimitry Andric     // distance from Start to OffAtMax.
1120e8d8bef9SDimitry Andric     // Calculate the address adjustment amount based on the address with the
1121e8d8bef9SDimitry Andric     // maximum alignment. This is to allow a simple gep instruction instead
1122e8d8bef9SDimitry Andric     // of potential bitcasts to i8*.
1123e8d8bef9SDimitry Andric     int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
1124e8d8bef9SDimitry Andric     AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1125e8d8bef9SDimitry Andric                                       WithMaxAlign.ValTy, Adjust);
1126e8d8bef9SDimitry Andric     int Diff = Start - (OffAtMax + Adjust);
1127e8d8bef9SDimitry Andric     AlignVal = HVC.getConstInt(Diff);
1128e8d8bef9SDimitry Andric     assert(Diff >= 0);
1129e8d8bef9SDimitry Andric     assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
1130e8d8bef9SDimitry Andric   } else {
1131e8d8bef9SDimitry Andric     // WithMinOffset is the lowest address in the group,
1132e8d8bef9SDimitry Andric     //   WithMinOffset.Addr = Base+Start.
1133e8d8bef9SDimitry Andric     // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
1134e8d8bef9SDimitry Andric     // mask off unnecessary bits, so it's ok to just the original pointer as
1135e8d8bef9SDimitry Andric     // the alignment amount.
1136e8d8bef9SDimitry Andric     // Do an explicit down-alignment of the address to avoid creating an
1137e8d8bef9SDimitry Andric     // aligned instruction with an address that is not really aligned.
1138e8d8bef9SDimitry Andric     AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
1139e8d8bef9SDimitry Andric                                      WithMinOffset.ValTy, MinNeeded.value());
1140e8d8bef9SDimitry Andric     AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
1141e8d8bef9SDimitry Andric   }
1142e8d8bef9SDimitry Andric 
1143e8d8bef9SDimitry Andric   ByteSpan VSpan;
1144e8d8bef9SDimitry Andric   for (const AddrInfo &AI : MoveInfos) {
1145e8d8bef9SDimitry Andric     VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1146e8d8bef9SDimitry Andric                               AI.Offset - WithMinOffset.Offset);
1147e8d8bef9SDimitry Andric   }
1148e8d8bef9SDimitry Andric 
1149e8d8bef9SDimitry Andric   // The aligned loads/stores will use blocks that are either scalars,
1150e8d8bef9SDimitry Andric   // or HVX vectors. Let "sector" be the unified term for such a block.
1151e8d8bef9SDimitry Andric   // blend(scalar, vector) -> sector...
1152e8d8bef9SDimitry Andric   int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
1153e8d8bef9SDimitry Andric                          : std::max<int>(MinNeeded.value(), 4);
1154e8d8bef9SDimitry Andric   assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1155e8d8bef9SDimitry Andric   assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1156e8d8bef9SDimitry Andric 
1157*bdd1243dSDimitry Andric   if (Move.IsLoad)
1158*bdd1243dSDimitry Andric     realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1159*bdd1243dSDimitry Andric   else
1160*bdd1243dSDimitry Andric     realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1161e8d8bef9SDimitry Andric 
1162e8d8bef9SDimitry Andric   for (auto *Inst : Move.Main)
1163e8d8bef9SDimitry Andric     Inst->eraseFromParent();
1164e8d8bef9SDimitry Andric 
1165e8d8bef9SDimitry Andric   return true;
1166e8d8bef9SDimitry Andric }
1167e8d8bef9SDimitry Andric 
1168*bdd1243dSDimitry Andric auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
1169*bdd1243dSDimitry Andric   if (!HVC.isByteVecTy(Ty))
1170*bdd1243dSDimitry Andric     return false;
1171*bdd1243dSDimitry Andric   int Size = HVC.getSizeOf(Ty);
1172*bdd1243dSDimitry Andric   if (HVC.HST.isTypeForHVX(Ty))
1173*bdd1243dSDimitry Andric     return Size == static_cast<int>(HVC.HST.getVectorLength());
1174*bdd1243dSDimitry Andric   return Size == 4 || Size == 8;
1175*bdd1243dSDimitry Andric }
1176*bdd1243dSDimitry Andric 
1177e8d8bef9SDimitry Andric auto AlignVectors::run() -> bool {
1178e8d8bef9SDimitry Andric   if (!createAddressGroups())
1179e8d8bef9SDimitry Andric     return false;
1180e8d8bef9SDimitry Andric 
1181e8d8bef9SDimitry Andric   bool Changed = false;
1182e8d8bef9SDimitry Andric   MoveList LoadGroups, StoreGroups;
1183e8d8bef9SDimitry Andric 
1184e8d8bef9SDimitry Andric   for (auto &G : AddrGroups) {
1185e8d8bef9SDimitry Andric     llvm::append_range(LoadGroups, createLoadGroups(G.second));
1186e8d8bef9SDimitry Andric     llvm::append_range(StoreGroups, createStoreGroups(G.second));
1187e8d8bef9SDimitry Andric   }
1188e8d8bef9SDimitry Andric 
1189e8d8bef9SDimitry Andric   for (auto &M : LoadGroups)
1190e8d8bef9SDimitry Andric     Changed |= move(M);
1191e8d8bef9SDimitry Andric   for (auto &M : StoreGroups)
1192e8d8bef9SDimitry Andric     Changed |= move(M);
1193e8d8bef9SDimitry Andric 
1194e8d8bef9SDimitry Andric   for (auto &M : LoadGroups)
1195e8d8bef9SDimitry Andric     Changed |= realignGroup(M);
1196e8d8bef9SDimitry Andric   for (auto &M : StoreGroups)
1197e8d8bef9SDimitry Andric     Changed |= realignGroup(M);
1198e8d8bef9SDimitry Andric 
1199e8d8bef9SDimitry Andric   return Changed;
1200e8d8bef9SDimitry Andric }
1201e8d8bef9SDimitry Andric 
1202e8d8bef9SDimitry Andric // --- End AlignVectors
1203e8d8bef9SDimitry Andric 
1204*bdd1243dSDimitry Andric // --- Begin HvxIdioms
1205*bdd1243dSDimitry Andric 
1206*bdd1243dSDimitry Andric auto HvxIdioms::getNumSignificantBits(Value *V, Instruction *In) const
1207*bdd1243dSDimitry Andric     -> std::pair<unsigned, Signedness> {
1208*bdd1243dSDimitry Andric   unsigned Bits = HVC.getNumSignificantBits(V, In);
1209*bdd1243dSDimitry Andric   // The significant bits are calculated including the sign bit. This may
1210*bdd1243dSDimitry Andric   // add an extra bit for zero-extended values, e.g. (zext i32 to i64) may
1211*bdd1243dSDimitry Andric   // result in 33 significant bits. To avoid extra words, skip the extra
1212*bdd1243dSDimitry Andric   // sign bit, but keep information that the value is to be treated as
1213*bdd1243dSDimitry Andric   // unsigned.
1214*bdd1243dSDimitry Andric   KnownBits Known = HVC.getKnownBits(V, In);
1215*bdd1243dSDimitry Andric   Signedness Sign = Signed;
1216*bdd1243dSDimitry Andric   unsigned NumToTest = 0; // Number of bits used in test for unsignedness.
1217*bdd1243dSDimitry Andric   if (isPowerOf2_32(Bits))
1218*bdd1243dSDimitry Andric     NumToTest = Bits;
1219*bdd1243dSDimitry Andric   else if (Bits > 1 && isPowerOf2_32(Bits - 1))
1220*bdd1243dSDimitry Andric     NumToTest = Bits - 1;
1221*bdd1243dSDimitry Andric 
1222*bdd1243dSDimitry Andric   if (NumToTest != 0 && Known.Zero.ashr(NumToTest).isAllOnes()) {
1223*bdd1243dSDimitry Andric     Sign = Unsigned;
1224*bdd1243dSDimitry Andric     Bits = NumToTest;
1225*bdd1243dSDimitry Andric   }
1226*bdd1243dSDimitry Andric 
1227*bdd1243dSDimitry Andric   // If the top bit of the nearest power-of-2 is zero, this value is
1228*bdd1243dSDimitry Andric   // positive. It could be treated as either signed or unsigned.
1229*bdd1243dSDimitry Andric   if (unsigned Pow2 = PowerOf2Ceil(Bits); Pow2 != Bits) {
1230*bdd1243dSDimitry Andric     if (Known.Zero.ashr(Pow2 - 1).isAllOnes())
1231*bdd1243dSDimitry Andric       Sign = Positive;
1232*bdd1243dSDimitry Andric   }
1233*bdd1243dSDimitry Andric   return {Bits, Sign};
1234*bdd1243dSDimitry Andric }
1235*bdd1243dSDimitry Andric 
1236*bdd1243dSDimitry Andric auto HvxIdioms::canonSgn(SValue X, SValue Y) const
1237*bdd1243dSDimitry Andric     -> std::pair<SValue, SValue> {
1238*bdd1243dSDimitry Andric   // Canonicalize the signedness of X and Y, so that the result is one of:
1239*bdd1243dSDimitry Andric   //   S, S
1240*bdd1243dSDimitry Andric   //   U/P, S
1241*bdd1243dSDimitry Andric   //   U/P, U/P
1242*bdd1243dSDimitry Andric   if (X.Sgn == Signed && Y.Sgn != Signed)
1243*bdd1243dSDimitry Andric     std::swap(X, Y);
1244*bdd1243dSDimitry Andric   return {X, Y};
1245*bdd1243dSDimitry Andric }
1246*bdd1243dSDimitry Andric 
1247*bdd1243dSDimitry Andric // Match
1248*bdd1243dSDimitry Andric //   (X * Y) [>> N], or
1249*bdd1243dSDimitry Andric //   ((X * Y) + (1 << M)) >> N
1250*bdd1243dSDimitry Andric auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional<FxpOp> {
1251*bdd1243dSDimitry Andric   using namespace PatternMatch;
1252*bdd1243dSDimitry Andric   auto *Ty = In.getType();
1253*bdd1243dSDimitry Andric 
1254*bdd1243dSDimitry Andric   if (!Ty->isVectorTy() || !Ty->getScalarType()->isIntegerTy())
1255*bdd1243dSDimitry Andric     return std::nullopt;
1256*bdd1243dSDimitry Andric 
1257*bdd1243dSDimitry Andric   unsigned Width = cast<IntegerType>(Ty->getScalarType())->getBitWidth();
1258*bdd1243dSDimitry Andric 
1259*bdd1243dSDimitry Andric   FxpOp Op;
1260*bdd1243dSDimitry Andric   Value *Exp = &In;
1261*bdd1243dSDimitry Andric 
1262*bdd1243dSDimitry Andric   // Fixed-point multiplication is always shifted right (except when the
1263*bdd1243dSDimitry Andric   // fraction is 0 bits).
1264*bdd1243dSDimitry Andric   auto m_Shr = [](auto &&V, auto &&S) {
1265*bdd1243dSDimitry Andric     return m_CombineOr(m_LShr(V, S), m_AShr(V, S));
1266*bdd1243dSDimitry Andric   };
1267*bdd1243dSDimitry Andric 
1268*bdd1243dSDimitry Andric   const APInt *Qn = nullptr;
1269*bdd1243dSDimitry Andric   if (Value * T; match(Exp, m_Shr(m_Value(T), m_APInt(Qn)))) {
1270*bdd1243dSDimitry Andric     Op.Frac = Qn->getZExtValue();
1271*bdd1243dSDimitry Andric     Exp = T;
1272*bdd1243dSDimitry Andric   } else {
1273*bdd1243dSDimitry Andric     Op.Frac = 0;
1274*bdd1243dSDimitry Andric   }
1275*bdd1243dSDimitry Andric 
1276*bdd1243dSDimitry Andric   if (Op.Frac > Width)
1277*bdd1243dSDimitry Andric     return std::nullopt;
1278*bdd1243dSDimitry Andric 
1279*bdd1243dSDimitry Andric   // Check if there is rounding added.
1280*bdd1243dSDimitry Andric   const APInt *C = nullptr;
1281*bdd1243dSDimitry Andric   if (Value * T; Op.Frac > 0 && match(Exp, m_Add(m_Value(T), m_APInt(C)))) {
1282*bdd1243dSDimitry Andric     uint64_t CV = C->getZExtValue();
1283*bdd1243dSDimitry Andric     if (CV != 0 && !isPowerOf2_64(CV))
1284*bdd1243dSDimitry Andric       return std::nullopt;
1285*bdd1243dSDimitry Andric     if (CV != 0)
1286*bdd1243dSDimitry Andric       Op.RoundAt = Log2_64(CV);
1287*bdd1243dSDimitry Andric     Exp = T;
1288*bdd1243dSDimitry Andric   }
1289*bdd1243dSDimitry Andric 
1290*bdd1243dSDimitry Andric   // Check if the rest is a multiplication.
1291*bdd1243dSDimitry Andric   if (match(Exp, m_Mul(m_Value(Op.X.Val), m_Value(Op.Y.Val)))) {
1292*bdd1243dSDimitry Andric     Op.Opcode = Instruction::Mul;
1293*bdd1243dSDimitry Andric     // FIXME: The information below is recomputed.
1294*bdd1243dSDimitry Andric     Op.X.Sgn = getNumSignificantBits(Op.X.Val, &In).second;
1295*bdd1243dSDimitry Andric     Op.Y.Sgn = getNumSignificantBits(Op.Y.Val, &In).second;
1296*bdd1243dSDimitry Andric     Op.ResTy = cast<VectorType>(Ty);
1297*bdd1243dSDimitry Andric     return Op;
1298*bdd1243dSDimitry Andric   }
1299*bdd1243dSDimitry Andric 
1300*bdd1243dSDimitry Andric   return std::nullopt;
1301*bdd1243dSDimitry Andric }
1302*bdd1243dSDimitry Andric 
1303*bdd1243dSDimitry Andric auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
1304*bdd1243dSDimitry Andric     -> Value * {
1305*bdd1243dSDimitry Andric   assert(Op.X.Val->getType() == Op.Y.Val->getType());
1306*bdd1243dSDimitry Andric 
1307*bdd1243dSDimitry Andric   auto *VecTy = dyn_cast<VectorType>(Op.X.Val->getType());
1308*bdd1243dSDimitry Andric   if (VecTy == nullptr)
1309*bdd1243dSDimitry Andric     return nullptr;
1310*bdd1243dSDimitry Andric   auto *ElemTy = cast<IntegerType>(VecTy->getElementType());
1311*bdd1243dSDimitry Andric   unsigned ElemWidth = ElemTy->getBitWidth();
1312*bdd1243dSDimitry Andric 
1313*bdd1243dSDimitry Andric   // TODO: This can be relaxed after legalization is done pre-isel.
1314*bdd1243dSDimitry Andric   if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1315*bdd1243dSDimitry Andric     return nullptr;
1316*bdd1243dSDimitry Andric 
1317*bdd1243dSDimitry Andric   // There are no special intrinsics that should be used for multiplying
1318*bdd1243dSDimitry Andric   // signed 8-bit values, so just skip them. Normal codegen should handle
1319*bdd1243dSDimitry Andric   // this just fine.
1320*bdd1243dSDimitry Andric   if (ElemWidth <= 8)
1321*bdd1243dSDimitry Andric     return nullptr;
1322*bdd1243dSDimitry Andric   // Similarly, if this is just a multiplication that can be handled without
1323*bdd1243dSDimitry Andric   // intervention, then leave it alone.
1324*bdd1243dSDimitry Andric   if (ElemWidth <= 32 && Op.Frac == 0)
1325*bdd1243dSDimitry Andric     return nullptr;
1326*bdd1243dSDimitry Andric 
1327*bdd1243dSDimitry Andric   auto [BitsX, SignX] = getNumSignificantBits(Op.X.Val, &In);
1328*bdd1243dSDimitry Andric   auto [BitsY, SignY] = getNumSignificantBits(Op.Y.Val, &In);
1329*bdd1243dSDimitry Andric 
1330*bdd1243dSDimitry Andric   // TODO: Add multiplication of vectors by scalar registers (up to 4 bytes).
1331*bdd1243dSDimitry Andric 
1332*bdd1243dSDimitry Andric   Value *X = Op.X.Val, *Y = Op.Y.Val;
1333*bdd1243dSDimitry Andric   IRBuilder Builder(In.getParent(), In.getIterator(),
1334*bdd1243dSDimitry Andric                     InstSimplifyFolder(HVC.DL));
1335*bdd1243dSDimitry Andric 
1336*bdd1243dSDimitry Andric   auto roundUpWidth = [](unsigned Width) -> unsigned {
1337*bdd1243dSDimitry Andric     if (Width <= 32 && !isPowerOf2_32(Width)) {
1338*bdd1243dSDimitry Andric       // If the element width is not a power of 2, round it up
1339*bdd1243dSDimitry Andric       // to the next one. Do this for widths not exceeding 32.
1340*bdd1243dSDimitry Andric       return PowerOf2Ceil(Width);
1341*bdd1243dSDimitry Andric     }
1342*bdd1243dSDimitry Andric     if (Width > 32 && Width % 32 != 0) {
1343*bdd1243dSDimitry Andric       // For wider elements, round it up to the multiple of 32.
1344*bdd1243dSDimitry Andric       return alignTo(Width, 32u);
1345*bdd1243dSDimitry Andric     }
1346*bdd1243dSDimitry Andric     return Width;
1347*bdd1243dSDimitry Andric   };
1348*bdd1243dSDimitry Andric 
1349*bdd1243dSDimitry Andric   BitsX = roundUpWidth(BitsX);
1350*bdd1243dSDimitry Andric   BitsY = roundUpWidth(BitsY);
1351*bdd1243dSDimitry Andric 
1352*bdd1243dSDimitry Andric   // For elementwise multiplication vectors must have the same lengths, so
1353*bdd1243dSDimitry Andric   // resize the elements of both inputs to the same width, the max of the
1354*bdd1243dSDimitry Andric   // calculated significant bits.
1355*bdd1243dSDimitry Andric   unsigned Width = std::max(BitsX, BitsY);
1356*bdd1243dSDimitry Andric 
1357*bdd1243dSDimitry Andric   auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1358*bdd1243dSDimitry Andric   if (Width < ElemWidth) {
1359*bdd1243dSDimitry Andric     X = Builder.CreateTrunc(X, ResizeTy);
1360*bdd1243dSDimitry Andric     Y = Builder.CreateTrunc(Y, ResizeTy);
1361*bdd1243dSDimitry Andric   } else if (Width > ElemWidth) {
1362*bdd1243dSDimitry Andric     X = SignX == Signed ? Builder.CreateSExt(X, ResizeTy)
1363*bdd1243dSDimitry Andric                         : Builder.CreateZExt(X, ResizeTy);
1364*bdd1243dSDimitry Andric     Y = SignY == Signed ? Builder.CreateSExt(Y, ResizeTy)
1365*bdd1243dSDimitry Andric                         : Builder.CreateZExt(Y, ResizeTy);
1366*bdd1243dSDimitry Andric   };
1367*bdd1243dSDimitry Andric 
1368*bdd1243dSDimitry Andric   assert(X->getType() == Y->getType() && X->getType() == ResizeTy);
1369*bdd1243dSDimitry Andric 
1370*bdd1243dSDimitry Andric   unsigned VecLen = HVC.length(ResizeTy);
1371*bdd1243dSDimitry Andric   unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1372*bdd1243dSDimitry Andric 
1373*bdd1243dSDimitry Andric   SmallVector<Value *> Results;
1374*bdd1243dSDimitry Andric   FxpOp ChopOp = Op;
1375*bdd1243dSDimitry Andric   ChopOp.ResTy = VectorType::get(Op.ResTy->getElementType(), ChopLen, false);
1376*bdd1243dSDimitry Andric 
1377*bdd1243dSDimitry Andric   for (unsigned V = 0; V != VecLen / ChopLen; ++V) {
1378*bdd1243dSDimitry Andric     ChopOp.X.Val = HVC.subvector(Builder, X, V * ChopLen, ChopLen);
1379*bdd1243dSDimitry Andric     ChopOp.Y.Val = HVC.subvector(Builder, Y, V * ChopLen, ChopLen);
1380*bdd1243dSDimitry Andric     Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1381*bdd1243dSDimitry Andric     if (Results.back() == nullptr)
1382*bdd1243dSDimitry Andric       break;
1383*bdd1243dSDimitry Andric   }
1384*bdd1243dSDimitry Andric 
1385*bdd1243dSDimitry Andric   if (Results.empty() || Results.back() == nullptr)
1386*bdd1243dSDimitry Andric     return nullptr;
1387*bdd1243dSDimitry Andric 
1388*bdd1243dSDimitry Andric   Value *Cat = HVC.concat(Builder, Results);
1389*bdd1243dSDimitry Andric   Value *Ext = SignX == Signed || SignY == Signed
1390*bdd1243dSDimitry Andric                    ? Builder.CreateSExt(Cat, VecTy)
1391*bdd1243dSDimitry Andric                    : Builder.CreateZExt(Cat, VecTy);
1392*bdd1243dSDimitry Andric   return Ext;
1393*bdd1243dSDimitry Andric }
1394*bdd1243dSDimitry Andric 
1395*bdd1243dSDimitry Andric auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
1396*bdd1243dSDimitry Andric                                      const FxpOp &Op) const -> Value * {
1397*bdd1243dSDimitry Andric   assert(Op.X.Val->getType() == Op.Y.Val->getType());
1398*bdd1243dSDimitry Andric   auto *InpTy = cast<VectorType>(Op.X.Val->getType());
1399*bdd1243dSDimitry Andric   unsigned Width = InpTy->getScalarSizeInBits();
1400*bdd1243dSDimitry Andric   bool Rounding = Op.RoundAt.has_value();
1401*bdd1243dSDimitry Andric 
1402*bdd1243dSDimitry Andric   if (!Op.RoundAt || *Op.RoundAt == Op.Frac - 1) {
1403*bdd1243dSDimitry Andric     // The fixed-point intrinsics do signed multiplication.
1404*bdd1243dSDimitry Andric     if (Width == Op.Frac + 1 && Op.X.Sgn != Unsigned && Op.Y.Sgn != Unsigned) {
1405*bdd1243dSDimitry Andric       Value *QMul = nullptr;
1406*bdd1243dSDimitry Andric       if (Width == 16) {
1407*bdd1243dSDimitry Andric         QMul = createMulQ15(Builder, Op.X, Op.Y, Rounding);
1408*bdd1243dSDimitry Andric       } else if (Width == 32) {
1409*bdd1243dSDimitry Andric         QMul = createMulQ31(Builder, Op.X, Op.Y, Rounding);
1410*bdd1243dSDimitry Andric       }
1411*bdd1243dSDimitry Andric       if (QMul != nullptr)
1412*bdd1243dSDimitry Andric         return QMul;
1413*bdd1243dSDimitry Andric     }
1414*bdd1243dSDimitry Andric   }
1415*bdd1243dSDimitry Andric 
1416*bdd1243dSDimitry Andric   assert(Width >= 32 || isPowerOf2_32(Width)); // Width <= 32 => Width is 2^n
1417*bdd1243dSDimitry Andric   assert(Width < 32 || Width % 32 == 0);       // Width > 32 => Width is 32*k
1418*bdd1243dSDimitry Andric 
1419*bdd1243dSDimitry Andric   // If Width < 32, then it should really be 16.
1420*bdd1243dSDimitry Andric   if (Width < 32) {
1421*bdd1243dSDimitry Andric     if (Width < 16)
1422*bdd1243dSDimitry Andric       return nullptr;
1423*bdd1243dSDimitry Andric     // Getting here with Op.Frac == 0 isn't wrong, but suboptimal: here we
1424*bdd1243dSDimitry Andric     // generate a full precision products, which is unnecessary if there is
1425*bdd1243dSDimitry Andric     // no shift.
1426*bdd1243dSDimitry Andric     assert(Width == 16);
1427*bdd1243dSDimitry Andric     assert(Op.Frac != 0 && "Unshifted mul should have been skipped");
1428*bdd1243dSDimitry Andric     if (Op.Frac == 16) {
1429*bdd1243dSDimitry Andric       // Multiply high
1430*bdd1243dSDimitry Andric       if (Value *MulH = createMulH16(Builder, Op.X, Op.Y))
1431*bdd1243dSDimitry Andric         return MulH;
1432*bdd1243dSDimitry Andric     }
1433*bdd1243dSDimitry Andric     // Do full-precision multiply and shift.
1434*bdd1243dSDimitry Andric     Value *Prod32 = createMul16(Builder, Op.X, Op.Y);
1435*bdd1243dSDimitry Andric     if (Rounding) {
1436*bdd1243dSDimitry Andric       Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt);
1437*bdd1243dSDimitry Andric       Prod32 = Builder.CreateAdd(Prod32, RoundVal);
1438*bdd1243dSDimitry Andric     }
1439*bdd1243dSDimitry Andric 
1440*bdd1243dSDimitry Andric     Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac);
1441*bdd1243dSDimitry Andric     Value *Shifted = Op.X.Sgn == Signed || Op.Y.Sgn == Signed
1442*bdd1243dSDimitry Andric                ? Builder.CreateAShr(Prod32, ShiftAmt)
1443*bdd1243dSDimitry Andric                : Builder.CreateLShr(Prod32, ShiftAmt);
1444*bdd1243dSDimitry Andric     return Builder.CreateTrunc(Shifted, InpTy);
1445*bdd1243dSDimitry Andric   }
1446*bdd1243dSDimitry Andric 
1447*bdd1243dSDimitry Andric   // Width >= 32
1448*bdd1243dSDimitry Andric 
1449*bdd1243dSDimitry Andric   // Break up the arguments Op.X and Op.Y into vectors of smaller widths
1450*bdd1243dSDimitry Andric   // in preparation of doing the multiplication by 32-bit parts.
1451*bdd1243dSDimitry Andric   auto WordX = HVC.splitVectorElements(Builder, Op.X.Val, /*ToWidth=*/32);
1452*bdd1243dSDimitry Andric   auto WordY = HVC.splitVectorElements(Builder, Op.Y.Val, /*ToWidth=*/32);
1453*bdd1243dSDimitry Andric   auto WordP = createMulLong(Builder, WordX, Op.X.Sgn, WordY, Op.Y.Sgn);
1454*bdd1243dSDimitry Andric 
1455*bdd1243dSDimitry Andric   auto *HvxWordTy = cast<VectorType>(WordP.front()->getType());
1456*bdd1243dSDimitry Andric 
1457*bdd1243dSDimitry Andric   // Add the optional rounding to the proper word.
1458*bdd1243dSDimitry Andric   if (Op.RoundAt.has_value()) {
1459*bdd1243dSDimitry Andric     Value *Zero = HVC.getNullValue(WordX[0]->getType());
1460*bdd1243dSDimitry Andric     SmallVector<Value *> RoundV(WordP.size(), Zero);
1461*bdd1243dSDimitry Andric     RoundV[*Op.RoundAt / 32] =
1462*bdd1243dSDimitry Andric         HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32));
1463*bdd1243dSDimitry Andric     WordP = createAddLong(Builder, WordP, RoundV);
1464*bdd1243dSDimitry Andric   }
1465*bdd1243dSDimitry Andric 
1466*bdd1243dSDimitry Andric   // createRightShiftLong?
1467*bdd1243dSDimitry Andric 
1468*bdd1243dSDimitry Andric   // Shift all products right by Op.Frac.
1469*bdd1243dSDimitry Andric   unsigned SkipWords = Op.Frac / 32;
1470*bdd1243dSDimitry Andric   Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32);
1471*bdd1243dSDimitry Andric 
1472*bdd1243dSDimitry Andric   for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
1473*bdd1243dSDimitry Andric     int Src = Dst + SkipWords;
1474*bdd1243dSDimitry Andric     Value *Lo = WordP[Src];
1475*bdd1243dSDimitry Andric     if (Src + 1 < End) {
1476*bdd1243dSDimitry Andric       Value *Hi = WordP[Src + 1];
1477*bdd1243dSDimitry Andric       WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
1478*bdd1243dSDimitry Andric                                            {Hi, Lo, ShiftAmt});
1479*bdd1243dSDimitry Andric     } else {
1480*bdd1243dSDimitry Andric       // The shift of the most significant word.
1481*bdd1243dSDimitry Andric       WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt);
1482*bdd1243dSDimitry Andric     }
1483*bdd1243dSDimitry Andric   }
1484*bdd1243dSDimitry Andric   if (SkipWords != 0)
1485*bdd1243dSDimitry Andric     WordP.resize(WordP.size() - SkipWords);
1486*bdd1243dSDimitry Andric 
1487*bdd1243dSDimitry Andric   return HVC.joinVectorElements(Builder, WordP, Op.ResTy);
1488*bdd1243dSDimitry Andric }
1489*bdd1243dSDimitry Andric 
1490*bdd1243dSDimitry Andric auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
1491*bdd1243dSDimitry Andric                              bool Rounding) const -> Value * {
1492*bdd1243dSDimitry Andric   assert(X.Val->getType() == Y.Val->getType());
1493*bdd1243dSDimitry Andric   assert(X.Val->getType()->getScalarType() == HVC.getIntTy(16));
1494*bdd1243dSDimitry Andric   assert(HVC.HST.isHVXVectorType(EVT::getEVT(X.Val->getType(), false)));
1495*bdd1243dSDimitry Andric 
1496*bdd1243dSDimitry Andric   // There is no non-rounding intrinsic for i16.
1497*bdd1243dSDimitry Andric   if (!Rounding || X.Sgn == Unsigned || Y.Sgn == Unsigned)
1498*bdd1243dSDimitry Andric     return nullptr;
1499*bdd1243dSDimitry Andric 
1500*bdd1243dSDimitry Andric   auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1501*bdd1243dSDimitry Andric   return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X.Val->getType(),
1502*bdd1243dSDimitry Andric                                 {X.Val, Y.Val});
1503*bdd1243dSDimitry Andric }
1504*bdd1243dSDimitry Andric 
1505*bdd1243dSDimitry Andric auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
1506*bdd1243dSDimitry Andric                              bool Rounding) const -> Value * {
1507*bdd1243dSDimitry Andric   Type *InpTy = X.Val->getType();
1508*bdd1243dSDimitry Andric   assert(InpTy == Y.Val->getType());
1509*bdd1243dSDimitry Andric   assert(InpTy->getScalarType() == HVC.getIntTy(32));
1510*bdd1243dSDimitry Andric   assert(HVC.HST.isHVXVectorType(EVT::getEVT(InpTy, false)));
1511*bdd1243dSDimitry Andric 
1512*bdd1243dSDimitry Andric   if (X.Sgn == Unsigned || Y.Sgn == Unsigned)
1513*bdd1243dSDimitry Andric     return nullptr;
1514*bdd1243dSDimitry Andric 
1515*bdd1243dSDimitry Andric   auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1516*bdd1243dSDimitry Andric   auto V6_vmpyo_acc = Rounding
1517*bdd1243dSDimitry Andric                           ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1518*bdd1243dSDimitry Andric                           : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1519*bdd1243dSDimitry Andric   Value *V1 =
1520*bdd1243dSDimitry Andric       HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {X.Val, Y.Val});
1521*bdd1243dSDimitry Andric   return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
1522*bdd1243dSDimitry Andric                                 {V1, X.Val, Y.Val});
1523*bdd1243dSDimitry Andric }
1524*bdd1243dSDimitry Andric 
1525*bdd1243dSDimitry Andric auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
1526*bdd1243dSDimitry Andric                                Value *CarryIn) const
1527*bdd1243dSDimitry Andric     -> std::pair<Value *, Value *> {
1528*bdd1243dSDimitry Andric   assert(X->getType() == Y->getType());
1529*bdd1243dSDimitry Andric   auto VecTy = cast<VectorType>(X->getType());
1530*bdd1243dSDimitry Andric   if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
1531*bdd1243dSDimitry Andric     SmallVector<Value *> Args = {X, Y};
1532*bdd1243dSDimitry Andric     Intrinsic::ID AddCarry;
1533*bdd1243dSDimitry Andric     if (CarryIn == nullptr && HVC.HST.useHVXV66Ops()) {
1534*bdd1243dSDimitry Andric       AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
1535*bdd1243dSDimitry Andric     } else {
1536*bdd1243dSDimitry Andric       AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1537*bdd1243dSDimitry Andric       if (CarryIn == nullptr)
1538*bdd1243dSDimitry Andric         CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
1539*bdd1243dSDimitry Andric       Args.push_back(CarryIn);
1540*bdd1243dSDimitry Andric     }
1541*bdd1243dSDimitry Andric     Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
1542*bdd1243dSDimitry Andric                                         /*RetTy=*/nullptr, Args);
1543*bdd1243dSDimitry Andric     Value *Result = Builder.CreateExtractValue(Ret, {0});
1544*bdd1243dSDimitry Andric     Value *CarryOut = Builder.CreateExtractValue(Ret, {1});
1545*bdd1243dSDimitry Andric     return {Result, CarryOut};
1546*bdd1243dSDimitry Andric   }
1547*bdd1243dSDimitry Andric 
1548*bdd1243dSDimitry Andric   // In other cases, do a regular add, and unsigned compare-less-than.
1549*bdd1243dSDimitry Andric   // The carry-out can originate in two places: adding the carry-in or adding
1550*bdd1243dSDimitry Andric   // the two input values.
1551*bdd1243dSDimitry Andric   Value *Result1 = X; // Result1 = X + CarryIn
1552*bdd1243dSDimitry Andric   if (CarryIn != nullptr) {
1553*bdd1243dSDimitry Andric     unsigned Width = VecTy->getScalarSizeInBits();
1554*bdd1243dSDimitry Andric     uint32_t Mask = 1;
1555*bdd1243dSDimitry Andric     if (Width < 32) {
1556*bdd1243dSDimitry Andric       for (unsigned i = 0, e = 32 / Width; i != e; ++i)
1557*bdd1243dSDimitry Andric         Mask = (Mask << Width) | 1;
1558*bdd1243dSDimitry Andric     }
1559*bdd1243dSDimitry Andric     auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
1560*bdd1243dSDimitry Andric     Value *ValueIn =
1561*bdd1243dSDimitry Andric         HVC.createHvxIntrinsic(Builder, V6_vandqrt, /*RetTy=*/nullptr,
1562*bdd1243dSDimitry Andric                                {CarryIn, HVC.getConstInt(Mask)});
1563*bdd1243dSDimitry Andric     Result1 = Builder.CreateAdd(X, ValueIn);
1564*bdd1243dSDimitry Andric   }
1565*bdd1243dSDimitry Andric 
1566*bdd1243dSDimitry Andric   Value *CarryOut1 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result1, X);
1567*bdd1243dSDimitry Andric   Value *Result2 = Builder.CreateAdd(Result1, Y);
1568*bdd1243dSDimitry Andric   Value *CarryOut2 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result2, Y);
1569*bdd1243dSDimitry Andric   return {Result2, Builder.CreateOr(CarryOut1, CarryOut2)};
1570*bdd1243dSDimitry Andric }
1571*bdd1243dSDimitry Andric 
1572*bdd1243dSDimitry Andric auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
1573*bdd1243dSDimitry Andric     -> Value * {
1574*bdd1243dSDimitry Andric   Intrinsic::ID V6_vmpyh = 0;
1575*bdd1243dSDimitry Andric   std::tie(X, Y) = canonSgn(X, Y);
1576*bdd1243dSDimitry Andric 
1577*bdd1243dSDimitry Andric   if (X.Sgn == Signed) {
1578*bdd1243dSDimitry Andric     V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1579*bdd1243dSDimitry Andric   } else if (Y.Sgn == Signed) {
1580*bdd1243dSDimitry Andric     // In vmpyhus the second operand is unsigned
1581*bdd1243dSDimitry Andric     V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
1582*bdd1243dSDimitry Andric   } else {
1583*bdd1243dSDimitry Andric     V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
1584*bdd1243dSDimitry Andric   }
1585*bdd1243dSDimitry Andric 
1586*bdd1243dSDimitry Andric   // i16*i16 -> i32 / interleaved
1587*bdd1243dSDimitry Andric   Value *P =
1588*bdd1243dSDimitry Andric       HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});
1589*bdd1243dSDimitry Andric   // Deinterleave
1590*bdd1243dSDimitry Andric   return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
1591*bdd1243dSDimitry Andric }
1592*bdd1243dSDimitry Andric 
1593*bdd1243dSDimitry Andric auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
1594*bdd1243dSDimitry Andric     -> Value * {
1595*bdd1243dSDimitry Andric   Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/false);
1596*bdd1243dSDimitry Andric 
1597*bdd1243dSDimitry Andric   if (HVC.HST.useHVXV69Ops()) {
1598*bdd1243dSDimitry Andric     if (X.Sgn != Signed && Y.Sgn != Signed) {
1599*bdd1243dSDimitry Andric       auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
1600*bdd1243dSDimitry Andric       return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
1601*bdd1243dSDimitry Andric                                     {X.Val, Y.Val});
1602*bdd1243dSDimitry Andric     }
1603*bdd1243dSDimitry Andric   }
1604*bdd1243dSDimitry Andric 
1605*bdd1243dSDimitry Andric   Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/true);
1606*bdd1243dSDimitry Andric   Value *Pair16 = Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty);
1607*bdd1243dSDimitry Andric   unsigned Len = HVC.length(HvxP16Ty) / 2;
1608*bdd1243dSDimitry Andric 
1609*bdd1243dSDimitry Andric   SmallVector<int, 128> PickOdd(Len);
1610*bdd1243dSDimitry Andric   for (int i = 0; i != static_cast<int>(Len); ++i)
1611*bdd1243dSDimitry Andric     PickOdd[i] = 2 * i + 1;
1612*bdd1243dSDimitry Andric 
1613*bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(HVC.sublo(Builder, Pair16),
1614*bdd1243dSDimitry Andric                                      HVC.subhi(Builder, Pair16), PickOdd);
1615*bdd1243dSDimitry Andric }
1616*bdd1243dSDimitry Andric 
1617*bdd1243dSDimitry Andric auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
1618*bdd1243dSDimitry Andric     -> std::pair<Value *, Value *> {
1619*bdd1243dSDimitry Andric   assert(X.Val->getType() == Y.Val->getType());
1620*bdd1243dSDimitry Andric   assert(X.Val->getType() == HvxI32Ty);
1621*bdd1243dSDimitry Andric 
1622*bdd1243dSDimitry Andric   Intrinsic::ID V6_vmpy_parts;
1623*bdd1243dSDimitry Andric   std::tie(X, Y) = canonSgn(X, Y);
1624*bdd1243dSDimitry Andric 
1625*bdd1243dSDimitry Andric   if (X.Sgn == Signed) {
1626*bdd1243dSDimitry Andric     V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
1627*bdd1243dSDimitry Andric   } else if (Y.Sgn == Signed) {
1628*bdd1243dSDimitry Andric     V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
1629*bdd1243dSDimitry Andric   } else {
1630*bdd1243dSDimitry Andric     V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
1631*bdd1243dSDimitry Andric   }
1632*bdd1243dSDimitry Andric 
1633*bdd1243dSDimitry Andric   Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,
1634*bdd1243dSDimitry Andric                                         {X.Val, Y.Val}, {HvxI32Ty});
1635*bdd1243dSDimitry Andric   Value *Hi = Builder.CreateExtractValue(Parts, {0});
1636*bdd1243dSDimitry Andric   Value *Lo = Builder.CreateExtractValue(Parts, {1});
1637*bdd1243dSDimitry Andric   return {Lo, Hi};
1638*bdd1243dSDimitry Andric }
1639*bdd1243dSDimitry Andric 
1640*bdd1243dSDimitry Andric auto HvxIdioms::createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
1641*bdd1243dSDimitry Andric                               ArrayRef<Value *> WordY) const
1642*bdd1243dSDimitry Andric     -> SmallVector<Value *> {
1643*bdd1243dSDimitry Andric   assert(WordX.size() == WordY.size());
1644*bdd1243dSDimitry Andric   unsigned Idx = 0, Length = WordX.size();
1645*bdd1243dSDimitry Andric   SmallVector<Value *> Sum(Length);
1646*bdd1243dSDimitry Andric 
1647*bdd1243dSDimitry Andric   while (Idx != Length) {
1648*bdd1243dSDimitry Andric     if (HVC.isZero(WordX[Idx]))
1649*bdd1243dSDimitry Andric       Sum[Idx] = WordY[Idx];
1650*bdd1243dSDimitry Andric     else if (HVC.isZero(WordY[Idx]))
1651*bdd1243dSDimitry Andric       Sum[Idx] = WordX[Idx];
1652*bdd1243dSDimitry Andric     else
1653*bdd1243dSDimitry Andric       break;
1654*bdd1243dSDimitry Andric     ++Idx;
1655*bdd1243dSDimitry Andric   }
1656*bdd1243dSDimitry Andric 
1657*bdd1243dSDimitry Andric   Value *Carry = nullptr;
1658*bdd1243dSDimitry Andric   for (; Idx != Length; ++Idx) {
1659*bdd1243dSDimitry Andric     std::tie(Sum[Idx], Carry) =
1660*bdd1243dSDimitry Andric         createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
1661*bdd1243dSDimitry Andric   }
1662*bdd1243dSDimitry Andric 
1663*bdd1243dSDimitry Andric   // This drops the final carry beyond the highest word.
1664*bdd1243dSDimitry Andric   return Sum;
1665*bdd1243dSDimitry Andric }
1666*bdd1243dSDimitry Andric 
1667*bdd1243dSDimitry Andric auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
1668*bdd1243dSDimitry Andric                               Signedness SgnX, ArrayRef<Value *> WordY,
1669*bdd1243dSDimitry Andric                               Signedness SgnY) const -> SmallVector<Value *> {
1670*bdd1243dSDimitry Andric   SmallVector<SmallVector<Value *>> Products(WordX.size() + WordY.size());
1671*bdd1243dSDimitry Andric 
1672*bdd1243dSDimitry Andric   // WordX[i] * WordY[j] produces words i+j and i+j+1 of the results,
1673*bdd1243dSDimitry Andric   // that is halves 2(i+j), 2(i+j)+1, 2(i+j)+2, 2(i+j)+3.
1674*bdd1243dSDimitry Andric   for (int i = 0, e = WordX.size(); i != e; ++i) {
1675*bdd1243dSDimitry Andric     for (int j = 0, f = WordY.size(); j != f; ++j) {
1676*bdd1243dSDimitry Andric       // Check the 4 halves that this multiplication can generate.
1677*bdd1243dSDimitry Andric       Signedness SX = (i + 1 == e) ? SgnX : Unsigned;
1678*bdd1243dSDimitry Andric       Signedness SY = (j + 1 == f) ? SgnY : Unsigned;
1679*bdd1243dSDimitry Andric       auto [Lo, Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j], SY});
1680*bdd1243dSDimitry Andric       Products[i + j + 0].push_back(Lo);
1681*bdd1243dSDimitry Andric       Products[i + j + 1].push_back(Hi);
1682*bdd1243dSDimitry Andric     }
1683*bdd1243dSDimitry Andric   }
1684*bdd1243dSDimitry Andric 
1685*bdd1243dSDimitry Andric   Value *Zero = HVC.getNullValue(WordX[0]->getType());
1686*bdd1243dSDimitry Andric 
1687*bdd1243dSDimitry Andric   auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {
1688*bdd1243dSDimitry Andric     if (Vector.empty())
1689*bdd1243dSDimitry Andric       return Zero;
1690*bdd1243dSDimitry Andric     auto Last = Vector.back();
1691*bdd1243dSDimitry Andric     Vector.pop_back();
1692*bdd1243dSDimitry Andric     return Last;
1693*bdd1243dSDimitry Andric   };
1694*bdd1243dSDimitry Andric 
1695*bdd1243dSDimitry Andric   for (int i = 0, e = Products.size(); i != e; ++i) {
1696*bdd1243dSDimitry Andric     while (Products[i].size() > 1) {
1697*bdd1243dSDimitry Andric       Value *Carry = nullptr; // no carry-in
1698*bdd1243dSDimitry Andric       for (int j = i; j != e; ++j) {
1699*bdd1243dSDimitry Andric         auto &ProdJ = Products[j];
1700*bdd1243dSDimitry Andric         auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
1701*bdd1243dSDimitry Andric                                               pop_back_or_zero(ProdJ), Carry);
1702*bdd1243dSDimitry Andric         ProdJ.insert(ProdJ.begin(), Sum);
1703*bdd1243dSDimitry Andric         Carry = CarryOut;
1704*bdd1243dSDimitry Andric       }
1705*bdd1243dSDimitry Andric     }
1706*bdd1243dSDimitry Andric   }
1707*bdd1243dSDimitry Andric 
1708*bdd1243dSDimitry Andric   SmallVector<Value *> WordP;
1709*bdd1243dSDimitry Andric   for (auto &P : Products) {
1710*bdd1243dSDimitry Andric     assert(P.size() == 1 && "Should have been added together");
1711*bdd1243dSDimitry Andric     WordP.push_back(P.front());
1712*bdd1243dSDimitry Andric   }
1713*bdd1243dSDimitry Andric 
1714*bdd1243dSDimitry Andric   return WordP;
1715*bdd1243dSDimitry Andric }
1716*bdd1243dSDimitry Andric 
1717*bdd1243dSDimitry Andric auto HvxIdioms::run() -> bool {
1718*bdd1243dSDimitry Andric   bool Changed = false;
1719*bdd1243dSDimitry Andric 
1720*bdd1243dSDimitry Andric   for (BasicBlock &B : HVC.F) {
1721*bdd1243dSDimitry Andric     for (auto It = B.rbegin(); It != B.rend(); ++It) {
1722*bdd1243dSDimitry Andric       if (auto Fxm = matchFxpMul(*It)) {
1723*bdd1243dSDimitry Andric         Value *New = processFxpMul(*It, *Fxm);
1724*bdd1243dSDimitry Andric         // Always report "changed" for now.
1725*bdd1243dSDimitry Andric         Changed = true;
1726*bdd1243dSDimitry Andric         if (!New)
1727*bdd1243dSDimitry Andric           continue;
1728*bdd1243dSDimitry Andric         bool StartOver = !isa<Instruction>(New);
1729*bdd1243dSDimitry Andric         It->replaceAllUsesWith(New);
1730*bdd1243dSDimitry Andric         RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
1731*bdd1243dSDimitry Andric         It = StartOver ? B.rbegin()
1732*bdd1243dSDimitry Andric                        : cast<Instruction>(New)->getReverseIterator();
1733*bdd1243dSDimitry Andric         Changed = true;
1734*bdd1243dSDimitry Andric       }
1735*bdd1243dSDimitry Andric     }
1736*bdd1243dSDimitry Andric   }
1737*bdd1243dSDimitry Andric 
1738*bdd1243dSDimitry Andric   return Changed;
1739*bdd1243dSDimitry Andric }
1740*bdd1243dSDimitry Andric 
1741*bdd1243dSDimitry Andric // --- End HvxIdioms
1742*bdd1243dSDimitry Andric 
1743e8d8bef9SDimitry Andric auto HexagonVectorCombine::run() -> bool {
1744e8d8bef9SDimitry Andric   if (!HST.useHVXOps())
1745e8d8bef9SDimitry Andric     return false;
1746e8d8bef9SDimitry Andric 
1747*bdd1243dSDimitry Andric   bool Changed = false;
1748*bdd1243dSDimitry Andric   Changed |= AlignVectors(*this).run();
1749*bdd1243dSDimitry Andric   Changed |= HvxIdioms(*this).run();
1750*bdd1243dSDimitry Andric 
1751e8d8bef9SDimitry Andric   return Changed;
1752e8d8bef9SDimitry Andric }
1753e8d8bef9SDimitry Andric 
1754*bdd1243dSDimitry Andric auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {
1755*bdd1243dSDimitry Andric   return IntegerType::get(F.getContext(), Width);
1756e8d8bef9SDimitry Andric }
1757e8d8bef9SDimitry Andric 
1758e8d8bef9SDimitry Andric auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
1759e8d8bef9SDimitry Andric   assert(ElemCount >= 0);
1760e8d8bef9SDimitry Andric   IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
1761e8d8bef9SDimitry Andric   if (ElemCount == 0)
1762e8d8bef9SDimitry Andric     return ByteTy;
1763*bdd1243dSDimitry Andric   return VectorType::get(ByteTy, ElemCount, /*Scalable=*/false);
1764e8d8bef9SDimitry Andric }
1765e8d8bef9SDimitry Andric 
1766e8d8bef9SDimitry Andric auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
1767e8d8bef9SDimitry Andric   assert(ElemCount >= 0);
1768e8d8bef9SDimitry Andric   IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
1769e8d8bef9SDimitry Andric   if (ElemCount == 0)
1770e8d8bef9SDimitry Andric     return BoolTy;
1771*bdd1243dSDimitry Andric   return VectorType::get(BoolTy, ElemCount, /*Scalable=*/false);
1772e8d8bef9SDimitry Andric }
1773e8d8bef9SDimitry Andric 
1774*bdd1243dSDimitry Andric auto HexagonVectorCombine::getConstInt(int Val, unsigned Width) const
1775*bdd1243dSDimitry Andric     -> ConstantInt * {
1776*bdd1243dSDimitry Andric   return ConstantInt::getSigned(getIntTy(Width), Val);
1777e8d8bef9SDimitry Andric }
1778e8d8bef9SDimitry Andric 
1779e8d8bef9SDimitry Andric auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
1780e8d8bef9SDimitry Andric   if (auto *C = dyn_cast<Constant>(Val))
1781e8d8bef9SDimitry Andric     return C->isZeroValue();
1782e8d8bef9SDimitry Andric   return false;
1783e8d8bef9SDimitry Andric }
1784e8d8bef9SDimitry Andric 
1785e8d8bef9SDimitry Andric auto HexagonVectorCombine::getIntValue(const Value *Val) const
1786*bdd1243dSDimitry Andric     -> std::optional<APInt> {
1787e8d8bef9SDimitry Andric   if (auto *CI = dyn_cast<ConstantInt>(Val))
1788e8d8bef9SDimitry Andric     return CI->getValue();
1789*bdd1243dSDimitry Andric   return std::nullopt;
1790e8d8bef9SDimitry Andric }
1791e8d8bef9SDimitry Andric 
1792e8d8bef9SDimitry Andric auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
1793e8d8bef9SDimitry Andric   return isa<UndefValue>(Val);
1794e8d8bef9SDimitry Andric }
1795e8d8bef9SDimitry Andric 
1796*bdd1243dSDimitry Andric auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
1797*bdd1243dSDimitry Andric     -> VectorType * {
1798*bdd1243dSDimitry Andric   EVT ETy = EVT::getEVT(ElemTy, false);
1799*bdd1243dSDimitry Andric   assert(ETy.isSimple() && "Invalid HVX element type");
1800*bdd1243dSDimitry Andric   // Do not allow boolean types here: they don't have a fixed length.
1801*bdd1243dSDimitry Andric   assert(HST.isHVXElementType(ETy.getSimpleVT(), /*IncludeBool=*/false) &&
1802*bdd1243dSDimitry Andric          "Invalid HVX element type");
1803*bdd1243dSDimitry Andric   unsigned HwLen = HST.getVectorLength();
1804*bdd1243dSDimitry Andric   unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();
1805*bdd1243dSDimitry Andric   return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
1806*bdd1243dSDimitry Andric                          /*Scalable=*/false);
1807e8d8bef9SDimitry Andric }
1808e8d8bef9SDimitry Andric 
1809*bdd1243dSDimitry Andric auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const
1810*bdd1243dSDimitry Andric     -> int {
1811*bdd1243dSDimitry Andric   return getSizeOf(Val->getType(), Kind);
1812e8d8bef9SDimitry Andric }
1813e8d8bef9SDimitry Andric 
1814*bdd1243dSDimitry Andric auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const
1815*bdd1243dSDimitry Andric     -> int {
1816*bdd1243dSDimitry Andric   auto *NcTy = const_cast<Type *>(Ty);
1817*bdd1243dSDimitry Andric   switch (Kind) {
1818*bdd1243dSDimitry Andric   case Store:
1819*bdd1243dSDimitry Andric     return DL.getTypeStoreSize(NcTy).getFixedValue();
1820*bdd1243dSDimitry Andric   case Alloc:
1821*bdd1243dSDimitry Andric     return DL.getTypeAllocSize(NcTy).getFixedValue();
1822*bdd1243dSDimitry Andric   }
1823*bdd1243dSDimitry Andric   llvm_unreachable("Unhandled SizeKind enum");
1824349cc55cSDimitry Andric }
1825349cc55cSDimitry Andric 
1826e8d8bef9SDimitry Andric auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
1827e8d8bef9SDimitry Andric   // The actual type may be shorter than the HVX vector, so determine
1828e8d8bef9SDimitry Andric   // the alignment based on subtarget info.
1829e8d8bef9SDimitry Andric   if (HST.isTypeForHVX(Ty))
1830e8d8bef9SDimitry Andric     return HST.getVectorLength();
1831e8d8bef9SDimitry Andric   return DL.getABITypeAlign(Ty).value();
1832e8d8bef9SDimitry Andric }
1833e8d8bef9SDimitry Andric 
1834*bdd1243dSDimitry Andric auto HexagonVectorCombine::length(Value *Val) const -> size_t {
1835*bdd1243dSDimitry Andric   return length(Val->getType());
1836*bdd1243dSDimitry Andric }
1837*bdd1243dSDimitry Andric 
1838*bdd1243dSDimitry Andric auto HexagonVectorCombine::length(Type *Ty) const -> size_t {
1839*bdd1243dSDimitry Andric   auto *VecTy = dyn_cast<VectorType>(Ty);
1840*bdd1243dSDimitry Andric   assert(VecTy && "Must be a vector type");
1841*bdd1243dSDimitry Andric   return VecTy->getElementCount().getFixedValue();
1842*bdd1243dSDimitry Andric }
1843*bdd1243dSDimitry Andric 
1844e8d8bef9SDimitry Andric auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
1845e8d8bef9SDimitry Andric   assert(Ty->isIntOrIntVectorTy());
1846e8d8bef9SDimitry Andric   auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
1847e8d8bef9SDimitry Andric   if (auto *VecTy = dyn_cast<VectorType>(Ty))
1848e8d8bef9SDimitry Andric     return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
1849e8d8bef9SDimitry Andric   return Zero;
1850e8d8bef9SDimitry Andric }
1851e8d8bef9SDimitry Andric 
1852e8d8bef9SDimitry Andric auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
1853e8d8bef9SDimitry Andric   assert(Ty->isIntOrIntVectorTy());
1854e8d8bef9SDimitry Andric   auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
1855e8d8bef9SDimitry Andric   if (auto *VecTy = dyn_cast<VectorType>(Ty))
1856e8d8bef9SDimitry Andric     return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
1857e8d8bef9SDimitry Andric   return Minus1;
1858e8d8bef9SDimitry Andric }
1859e8d8bef9SDimitry Andric 
1860*bdd1243dSDimitry Andric auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const
1861*bdd1243dSDimitry Andric     -> Constant * {
1862*bdd1243dSDimitry Andric   assert(Ty->isVectorTy());
1863*bdd1243dSDimitry Andric   auto VecTy = cast<VectorType>(Ty);
1864*bdd1243dSDimitry Andric   Type *ElemTy = VecTy->getElementType();
1865*bdd1243dSDimitry Andric   // Add support for floats if needed.
1866*bdd1243dSDimitry Andric   auto *Splat = ConstantVector::getSplat(VecTy->getElementCount(),
1867*bdd1243dSDimitry Andric                                          ConstantInt::get(ElemTy, Val));
1868*bdd1243dSDimitry Andric   return Splat;
1869*bdd1243dSDimitry Andric }
1870*bdd1243dSDimitry Andric 
1871*bdd1243dSDimitry Andric auto HexagonVectorCombine::simplify(Value *V) const -> Value * {
1872*bdd1243dSDimitry Andric   if (auto *In = dyn_cast<Instruction>(V)) {
1873*bdd1243dSDimitry Andric     SimplifyQuery Q(DL, &TLI, &DT, &AC, In);
1874*bdd1243dSDimitry Andric     return simplifyInstruction(In, Q);
1875*bdd1243dSDimitry Andric   }
1876*bdd1243dSDimitry Andric   return nullptr;
1877*bdd1243dSDimitry Andric }
1878*bdd1243dSDimitry Andric 
1879e8d8bef9SDimitry Andric // Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
1880*bdd1243dSDimitry Andric auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,
1881*bdd1243dSDimitry Andric                                    Value *Src, int Start, int Length,
1882*bdd1243dSDimitry Andric                                    int Where) const -> Value * {
1883e8d8bef9SDimitry Andric   assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
1884e8d8bef9SDimitry Andric   int SrcLen = getSizeOf(Src);
1885e8d8bef9SDimitry Andric   int DstLen = getSizeOf(Dst);
1886e8d8bef9SDimitry Andric   assert(0 <= Start && Start + Length <= SrcLen);
1887e8d8bef9SDimitry Andric   assert(0 <= Where && Where + Length <= DstLen);
1888e8d8bef9SDimitry Andric 
1889e8d8bef9SDimitry Andric   int P2Len = PowerOf2Ceil(SrcLen | DstLen);
1890e8d8bef9SDimitry Andric   auto *Undef = UndefValue::get(getByteTy());
1891e8d8bef9SDimitry Andric   Value *P2Src = vresize(Builder, Src, P2Len, Undef);
1892e8d8bef9SDimitry Andric   Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
1893e8d8bef9SDimitry Andric 
1894e8d8bef9SDimitry Andric   SmallVector<int, 256> SMask(P2Len);
1895e8d8bef9SDimitry Andric   for (int i = 0; i != P2Len; ++i) {
1896e8d8bef9SDimitry Andric     // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
1897e8d8bef9SDimitry Andric     // Otherwise, pick Dst[i];
1898e8d8bef9SDimitry Andric     SMask[i] =
1899e8d8bef9SDimitry Andric         (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
1900e8d8bef9SDimitry Andric   }
1901e8d8bef9SDimitry Andric 
1902e8d8bef9SDimitry Andric   Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
1903e8d8bef9SDimitry Andric   return vresize(Builder, P2Insert, DstLen, Undef);
1904e8d8bef9SDimitry Andric }
1905e8d8bef9SDimitry Andric 
1906*bdd1243dSDimitry Andric auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,
1907*bdd1243dSDimitry Andric                                     Value *Hi, Value *Amt) const -> Value * {
1908e8d8bef9SDimitry Andric   assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1909e8d8bef9SDimitry Andric   if (isZero(Amt))
1910e8d8bef9SDimitry Andric     return Hi;
1911e8d8bef9SDimitry Andric   int VecLen = getSizeOf(Hi);
1912e8d8bef9SDimitry Andric   if (auto IntAmt = getIntValue(Amt))
1913e8d8bef9SDimitry Andric     return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
1914e8d8bef9SDimitry Andric                            VecLen);
1915e8d8bef9SDimitry Andric 
1916e8d8bef9SDimitry Andric   if (HST.isTypeForHVX(Hi->getType())) {
1917*bdd1243dSDimitry Andric     assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
1918*bdd1243dSDimitry Andric            "Expecting an exact HVX type");
1919*bdd1243dSDimitry Andric     return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
1920*bdd1243dSDimitry Andric                               Hi->getType(), {Hi, Lo, Amt});
1921e8d8bef9SDimitry Andric   }
1922e8d8bef9SDimitry Andric 
1923e8d8bef9SDimitry Andric   if (VecLen == 4) {
1924e8d8bef9SDimitry Andric     Value *Pair = concat(Builder, {Lo, Hi});
1925e8d8bef9SDimitry Andric     Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
1926e8d8bef9SDimitry Andric     Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1927e8d8bef9SDimitry Andric     return Builder.CreateBitCast(Trunc, Hi->getType());
1928e8d8bef9SDimitry Andric   }
1929e8d8bef9SDimitry Andric   if (VecLen == 8) {
1930e8d8bef9SDimitry Andric     Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
1931e8d8bef9SDimitry Andric     return vralignb(Builder, Lo, Hi, Sub);
1932e8d8bef9SDimitry Andric   }
1933e8d8bef9SDimitry Andric   llvm_unreachable("Unexpected vector length");
1934e8d8bef9SDimitry Andric }
1935e8d8bef9SDimitry Andric 
1936*bdd1243dSDimitry Andric auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,
1937*bdd1243dSDimitry Andric                                     Value *Hi, Value *Amt) const -> Value * {
1938e8d8bef9SDimitry Andric   assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1939e8d8bef9SDimitry Andric   if (isZero(Amt))
1940e8d8bef9SDimitry Andric     return Lo;
1941e8d8bef9SDimitry Andric   int VecLen = getSizeOf(Lo);
1942e8d8bef9SDimitry Andric   if (auto IntAmt = getIntValue(Amt))
1943e8d8bef9SDimitry Andric     return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
1944e8d8bef9SDimitry Andric 
1945e8d8bef9SDimitry Andric   if (HST.isTypeForHVX(Lo->getType())) {
1946*bdd1243dSDimitry Andric     assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
1947*bdd1243dSDimitry Andric            "Expecting an exact HVX type");
1948*bdd1243dSDimitry Andric     return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
1949*bdd1243dSDimitry Andric                               Lo->getType(), {Hi, Lo, Amt});
1950e8d8bef9SDimitry Andric   }
1951e8d8bef9SDimitry Andric 
1952e8d8bef9SDimitry Andric   if (VecLen == 4) {
1953e8d8bef9SDimitry Andric     Value *Pair = concat(Builder, {Lo, Hi});
1954e8d8bef9SDimitry Andric     Value *Shift = Builder.CreateLShr(Pair, Amt);
1955e8d8bef9SDimitry Andric     Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1956e8d8bef9SDimitry Andric     return Builder.CreateBitCast(Trunc, Lo->getType());
1957e8d8bef9SDimitry Andric   }
1958e8d8bef9SDimitry Andric   if (VecLen == 8) {
1959e8d8bef9SDimitry Andric     Type *Int64Ty = Type::getInt64Ty(F.getContext());
1960e8d8bef9SDimitry Andric     Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
1961e8d8bef9SDimitry Andric     Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
1962e8d8bef9SDimitry Andric     Function *FI = Intrinsic::getDeclaration(F.getParent(),
1963e8d8bef9SDimitry Andric                                              Intrinsic::hexagon_S2_valignrb);
1964e8d8bef9SDimitry Andric     Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
1965e8d8bef9SDimitry Andric     return Builder.CreateBitCast(Call, Lo->getType());
1966e8d8bef9SDimitry Andric   }
1967e8d8bef9SDimitry Andric   llvm_unreachable("Unexpected vector length");
1968e8d8bef9SDimitry Andric }
1969e8d8bef9SDimitry Andric 
1970e8d8bef9SDimitry Andric // Concatenates a sequence of vectors of the same type.
1971*bdd1243dSDimitry Andric auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
1972e8d8bef9SDimitry Andric                                   ArrayRef<Value *> Vecs) const -> Value * {
1973e8d8bef9SDimitry Andric   assert(!Vecs.empty());
1974e8d8bef9SDimitry Andric   SmallVector<int, 256> SMask;
1975e8d8bef9SDimitry Andric   std::vector<Value *> Work[2];
1976e8d8bef9SDimitry Andric   int ThisW = 0, OtherW = 1;
1977e8d8bef9SDimitry Andric 
1978e8d8bef9SDimitry Andric   Work[ThisW].assign(Vecs.begin(), Vecs.end());
1979e8d8bef9SDimitry Andric   while (Work[ThisW].size() > 1) {
1980e8d8bef9SDimitry Andric     auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
1981*bdd1243dSDimitry Andric     SMask.resize(length(Ty) * 2);
1982e8d8bef9SDimitry Andric     std::iota(SMask.begin(), SMask.end(), 0);
1983e8d8bef9SDimitry Andric 
1984e8d8bef9SDimitry Andric     Work[OtherW].clear();
1985e8d8bef9SDimitry Andric     if (Work[ThisW].size() % 2 != 0)
1986e8d8bef9SDimitry Andric       Work[ThisW].push_back(UndefValue::get(Ty));
1987e8d8bef9SDimitry Andric     for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
1988e8d8bef9SDimitry Andric       Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
1989e8d8bef9SDimitry Andric                                                   Work[ThisW][i + 1], SMask);
1990e8d8bef9SDimitry Andric       Work[OtherW].push_back(Joined);
1991e8d8bef9SDimitry Andric     }
1992e8d8bef9SDimitry Andric     std::swap(ThisW, OtherW);
1993e8d8bef9SDimitry Andric   }
1994e8d8bef9SDimitry Andric 
1995e8d8bef9SDimitry Andric   // Since there may have been some undefs appended to make shuffle operands
1996e8d8bef9SDimitry Andric   // have the same type, perform the last shuffle to only pick the original
1997e8d8bef9SDimitry Andric   // elements.
1998*bdd1243dSDimitry Andric   SMask.resize(Vecs.size() * length(Vecs.front()->getType()));
1999e8d8bef9SDimitry Andric   std::iota(SMask.begin(), SMask.end(), 0);
2000*bdd1243dSDimitry Andric   Value *Total = Work[ThisW].front();
2001e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(Total, SMask);
2002e8d8bef9SDimitry Andric }
2003e8d8bef9SDimitry Andric 
2004*bdd1243dSDimitry Andric auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
2005e8d8bef9SDimitry Andric                                    int NewSize, Value *Pad) const -> Value * {
2006e8d8bef9SDimitry Andric   assert(isa<VectorType>(Val->getType()));
2007e8d8bef9SDimitry Andric   auto *ValTy = cast<VectorType>(Val->getType());
2008e8d8bef9SDimitry Andric   assert(ValTy->getElementType() == Pad->getType());
2009e8d8bef9SDimitry Andric 
2010*bdd1243dSDimitry Andric   int CurSize = length(ValTy);
2011e8d8bef9SDimitry Andric   if (CurSize == NewSize)
2012e8d8bef9SDimitry Andric     return Val;
2013e8d8bef9SDimitry Andric   // Truncate?
2014e8d8bef9SDimitry Andric   if (CurSize > NewSize)
2015*bdd1243dSDimitry Andric     return getElementRange(Builder, Val, /*Ignored*/ Val, 0, NewSize);
2016e8d8bef9SDimitry Andric   // Extend.
2017e8d8bef9SDimitry Andric   SmallVector<int, 128> SMask(NewSize);
2018e8d8bef9SDimitry Andric   std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
2019e8d8bef9SDimitry Andric   std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
2020e8d8bef9SDimitry Andric   Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
2021e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(Val, PadVec, SMask);
2022e8d8bef9SDimitry Andric }
2023e8d8bef9SDimitry Andric 
2024*bdd1243dSDimitry Andric auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
2025e8d8bef9SDimitry Andric                                    Type *FromTy, Type *ToTy) const -> Value * {
2026e8d8bef9SDimitry Andric   // Mask is a vector <N x i1>, where each element corresponds to an
2027e8d8bef9SDimitry Andric   // element of FromTy. Remap it so that each element will correspond
2028e8d8bef9SDimitry Andric   // to an element of ToTy.
2029e8d8bef9SDimitry Andric   assert(isa<VectorType>(Mask->getType()));
2030e8d8bef9SDimitry Andric 
2031e8d8bef9SDimitry Andric   Type *FromSTy = FromTy->getScalarType();
2032e8d8bef9SDimitry Andric   Type *ToSTy = ToTy->getScalarType();
2033e8d8bef9SDimitry Andric   if (FromSTy == ToSTy)
2034e8d8bef9SDimitry Andric     return Mask;
2035e8d8bef9SDimitry Andric 
2036e8d8bef9SDimitry Andric   int FromSize = getSizeOf(FromSTy);
2037e8d8bef9SDimitry Andric   int ToSize = getSizeOf(ToSTy);
2038e8d8bef9SDimitry Andric   assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
2039e8d8bef9SDimitry Andric 
2040e8d8bef9SDimitry Andric   auto *MaskTy = cast<VectorType>(Mask->getType());
2041*bdd1243dSDimitry Andric   int FromCount = length(MaskTy);
2042e8d8bef9SDimitry Andric   int ToCount = (FromCount * FromSize) / ToSize;
2043e8d8bef9SDimitry Andric   assert((FromCount * FromSize) % ToSize == 0);
2044e8d8bef9SDimitry Andric 
2045*bdd1243dSDimitry Andric   auto *FromITy = getIntTy(FromSize * 8);
2046*bdd1243dSDimitry Andric   auto *ToITy = getIntTy(ToSize * 8);
204704eeddc0SDimitry Andric 
2048e8d8bef9SDimitry Andric   // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
2049e8d8bef9SDimitry Andric   // -> trunc to <M x i1>.
2050e8d8bef9SDimitry Andric   Value *Ext = Builder.CreateSExt(
2051*bdd1243dSDimitry Andric       Mask, VectorType::get(FromITy, FromCount, /*Scalable=*/false));
2052e8d8bef9SDimitry Andric   Value *Cast = Builder.CreateBitCast(
2053*bdd1243dSDimitry Andric       Ext, VectorType::get(ToITy, ToCount, /*Scalable=*/false));
2054e8d8bef9SDimitry Andric   return Builder.CreateTrunc(
2055*bdd1243dSDimitry Andric       Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable=*/false));
2056e8d8bef9SDimitry Andric }
2057e8d8bef9SDimitry Andric 
2058e8d8bef9SDimitry Andric // Bitcast to bytes, and return least significant bits.
2059*bdd1243dSDimitry Andric auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const
2060e8d8bef9SDimitry Andric     -> Value * {
2061e8d8bef9SDimitry Andric   Type *ScalarTy = Val->getType()->getScalarType();
2062e8d8bef9SDimitry Andric   if (ScalarTy == getBoolTy())
2063e8d8bef9SDimitry Andric     return Val;
2064e8d8bef9SDimitry Andric 
2065e8d8bef9SDimitry Andric   Value *Bytes = vbytes(Builder, Val);
2066e8d8bef9SDimitry Andric   if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
2067e8d8bef9SDimitry Andric     return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
2068e8d8bef9SDimitry Andric   // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
2069e8d8bef9SDimitry Andric   // <1 x i1>.
2070e8d8bef9SDimitry Andric   return Builder.CreateTrunc(Bytes, getBoolTy());
2071e8d8bef9SDimitry Andric }
2072e8d8bef9SDimitry Andric 
2073e8d8bef9SDimitry Andric // Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
2074*bdd1243dSDimitry Andric auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const
2075e8d8bef9SDimitry Andric     -> Value * {
2076e8d8bef9SDimitry Andric   Type *ScalarTy = Val->getType()->getScalarType();
2077e8d8bef9SDimitry Andric   if (ScalarTy == getByteTy())
2078e8d8bef9SDimitry Andric     return Val;
2079e8d8bef9SDimitry Andric 
2080e8d8bef9SDimitry Andric   if (ScalarTy != getBoolTy())
2081e8d8bef9SDimitry Andric     return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
2082e8d8bef9SDimitry Andric   // For bool, return a sext from i1 to i8.
2083e8d8bef9SDimitry Andric   if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
2084e8d8bef9SDimitry Andric     return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
2085e8d8bef9SDimitry Andric   return Builder.CreateSExt(Val, getByteTy());
2086e8d8bef9SDimitry Andric }
2087e8d8bef9SDimitry Andric 
2088*bdd1243dSDimitry Andric auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,
2089*bdd1243dSDimitry Andric                                      unsigned Start, unsigned Length) const
2090e8d8bef9SDimitry Andric     -> Value * {
2091*bdd1243dSDimitry Andric   assert(Start + Length <= length(Val));
2092*bdd1243dSDimitry Andric   return getElementRange(Builder, Val, /*Ignored*/ Val, Start, Length);
2093e8d8bef9SDimitry Andric }
2094e8d8bef9SDimitry Andric 
2095*bdd1243dSDimitry Andric auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const
2096*bdd1243dSDimitry Andric     -> Value * {
2097*bdd1243dSDimitry Andric   size_t Len = length(Val);
2098*bdd1243dSDimitry Andric   assert(Len % 2 == 0 && "Length should be even");
2099*bdd1243dSDimitry Andric   return subvector(Builder, Val, 0, Len / 2);
2100*bdd1243dSDimitry Andric }
2101*bdd1243dSDimitry Andric 
2102*bdd1243dSDimitry Andric auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const
2103*bdd1243dSDimitry Andric     -> Value * {
2104*bdd1243dSDimitry Andric   size_t Len = length(Val);
2105*bdd1243dSDimitry Andric   assert(Len % 2 == 0 && "Length should be even");
2106*bdd1243dSDimitry Andric   return subvector(Builder, Val, Len / 2, Len / 2);
2107*bdd1243dSDimitry Andric }
2108*bdd1243dSDimitry Andric 
2109*bdd1243dSDimitry Andric auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,
2110*bdd1243dSDimitry Andric                                  Value *Val1) const -> Value * {
2111*bdd1243dSDimitry Andric   assert(Val0->getType() == Val1->getType());
2112*bdd1243dSDimitry Andric   int Len = length(Val0);
2113*bdd1243dSDimitry Andric   SmallVector<int, 128> Mask(2 * Len);
2114*bdd1243dSDimitry Andric 
2115*bdd1243dSDimitry Andric   for (int i = 0; i != Len; ++i) {
2116*bdd1243dSDimitry Andric     Mask[i] = 2 * i;           // Even
2117*bdd1243dSDimitry Andric     Mask[i + Len] = 2 * i + 1; // Odd
2118*bdd1243dSDimitry Andric   }
2119*bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(Val0, Val1, Mask);
2120*bdd1243dSDimitry Andric }
2121*bdd1243dSDimitry Andric 
2122*bdd1243dSDimitry Andric auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
2123*bdd1243dSDimitry Andric                                   Value *Val1) const -> Value * { //
2124*bdd1243dSDimitry Andric   assert(Val0->getType() == Val1->getType());
2125*bdd1243dSDimitry Andric   int Len = length(Val0);
2126*bdd1243dSDimitry Andric   SmallVector<int, 128> Mask(2 * Len);
2127*bdd1243dSDimitry Andric 
2128*bdd1243dSDimitry Andric   for (int i = 0; i != Len; ++i) {
2129*bdd1243dSDimitry Andric     Mask[2 * i + 0] = i;       // Val0
2130*bdd1243dSDimitry Andric     Mask[2 * i + 1] = i + Len; // Val1
2131*bdd1243dSDimitry Andric   }
2132*bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(Val0, Val1, Mask);
2133*bdd1243dSDimitry Andric }
2134*bdd1243dSDimitry Andric 
2135*bdd1243dSDimitry Andric auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
2136*bdd1243dSDimitry Andric                                               Intrinsic::ID IntID, Type *RetTy,
2137*bdd1243dSDimitry Andric                                               ArrayRef<Value *> Args,
2138*bdd1243dSDimitry Andric                                               ArrayRef<Type *> ArgTys) const
2139*bdd1243dSDimitry Andric     -> Value * {
2140*bdd1243dSDimitry Andric   auto getCast = [&](IRBuilderBase &Builder, Value *Val,
2141e8d8bef9SDimitry Andric                      Type *DestTy) -> Value * {
2142e8d8bef9SDimitry Andric     Type *SrcTy = Val->getType();
2143e8d8bef9SDimitry Andric     if (SrcTy == DestTy)
2144e8d8bef9SDimitry Andric       return Val;
2145*bdd1243dSDimitry Andric 
2146e8d8bef9SDimitry Andric     // Non-HVX type. It should be a scalar, and it should already have
2147e8d8bef9SDimitry Andric     // a valid type.
2148*bdd1243dSDimitry Andric     assert(HST.isTypeForHVX(SrcTy, /*IncludeBool=*/true));
2149*bdd1243dSDimitry Andric 
2150*bdd1243dSDimitry Andric     Type *BoolTy = Type::getInt1Ty(F.getContext());
2151*bdd1243dSDimitry Andric     if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
2152*bdd1243dSDimitry Andric       return Builder.CreateBitCast(Val, DestTy);
2153*bdd1243dSDimitry Andric 
2154*bdd1243dSDimitry Andric     // Predicate HVX vector.
2155*bdd1243dSDimitry Andric     unsigned HwLen = HST.getVectorLength();
2156*bdd1243dSDimitry Andric     Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
2157*bdd1243dSDimitry Andric                                    : Intrinsic::hexagon_V6_pred_typecast_128B;
2158*bdd1243dSDimitry Andric     Function *FI =
2159*bdd1243dSDimitry Andric         Intrinsic::getDeclaration(F.getParent(), TC, {DestTy, Val->getType()});
2160*bdd1243dSDimitry Andric     return Builder.CreateCall(FI, {Val});
2161e8d8bef9SDimitry Andric   };
2162e8d8bef9SDimitry Andric 
2163*bdd1243dSDimitry Andric   Function *IntrFn = Intrinsic::getDeclaration(F.getParent(), IntID, ArgTys);
2164*bdd1243dSDimitry Andric   FunctionType *IntrTy = IntrFn->getFunctionType();
2165*bdd1243dSDimitry Andric 
2166*bdd1243dSDimitry Andric   SmallVector<Value *, 4> IntrArgs;
2167*bdd1243dSDimitry Andric   for (int i = 0, e = Args.size(); i != e; ++i) {
2168*bdd1243dSDimitry Andric     Value *A = Args[i];
2169*bdd1243dSDimitry Andric     Type *T = IntrTy->getParamType(i);
2170*bdd1243dSDimitry Andric     if (A->getType() != T) {
2171*bdd1243dSDimitry Andric       IntrArgs.push_back(getCast(Builder, A, T));
2172*bdd1243dSDimitry Andric     } else {
2173*bdd1243dSDimitry Andric       IntrArgs.push_back(A);
2174*bdd1243dSDimitry Andric     }
2175*bdd1243dSDimitry Andric   }
2176*bdd1243dSDimitry Andric   Value *Call = Builder.CreateCall(IntrFn, IntrArgs);
2177e8d8bef9SDimitry Andric 
2178e8d8bef9SDimitry Andric   Type *CallTy = Call->getType();
2179*bdd1243dSDimitry Andric   if (RetTy == nullptr || CallTy == RetTy)
2180e8d8bef9SDimitry Andric     return Call;
2181e8d8bef9SDimitry Andric   // Scalar types should have RetTy matching the call return type.
2182*bdd1243dSDimitry Andric   assert(HST.isTypeForHVX(CallTy, /*IncludeBool=*/true));
2183e8d8bef9SDimitry Andric   return getCast(Builder, Call, RetTy);
2184*bdd1243dSDimitry Andric }
2185*bdd1243dSDimitry Andric 
2186*bdd1243dSDimitry Andric auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
2187*bdd1243dSDimitry Andric                                                Value *Vec,
2188*bdd1243dSDimitry Andric                                                unsigned ToWidth) const
2189*bdd1243dSDimitry Andric     -> SmallVector<Value *> {
2190*bdd1243dSDimitry Andric   // Break a vector of wide elements into a series of vectors with narrow
2191*bdd1243dSDimitry Andric   // elements:
2192*bdd1243dSDimitry Andric   //   (...c0:b0:a0, ...c1:b1:a1, ...c2:b2:a2, ...)
2193*bdd1243dSDimitry Andric   // -->
2194*bdd1243dSDimitry Andric   //   (a0, a1, a2, ...)    // lowest "ToWidth" bits
2195*bdd1243dSDimitry Andric   //   (b0, b1, b2, ...)    // the next lowest...
2196*bdd1243dSDimitry Andric   //   (c0, c1, c2, ...)    // ...
2197*bdd1243dSDimitry Andric   //   ...
2198*bdd1243dSDimitry Andric   //
2199*bdd1243dSDimitry Andric   // The number of elements in each resulting vector is the same as
2200*bdd1243dSDimitry Andric   // in the original vector.
2201*bdd1243dSDimitry Andric 
2202*bdd1243dSDimitry Andric   auto *VecTy = cast<VectorType>(Vec->getType());
2203*bdd1243dSDimitry Andric   assert(VecTy->getElementType()->isIntegerTy());
2204*bdd1243dSDimitry Andric   unsigned FromWidth = VecTy->getScalarSizeInBits();
2205*bdd1243dSDimitry Andric   assert(isPowerOf2_32(ToWidth) && isPowerOf2_32(FromWidth));
2206*bdd1243dSDimitry Andric   assert(ToWidth <= FromWidth && "Breaking up into wider elements?");
2207*bdd1243dSDimitry Andric   unsigned NumResults = FromWidth / ToWidth;
2208*bdd1243dSDimitry Andric 
2209*bdd1243dSDimitry Andric   SmallVector<Value *> Results(NumResults);
2210*bdd1243dSDimitry Andric   Results[0] = Vec;
2211*bdd1243dSDimitry Andric   unsigned Length = length(VecTy);
2212*bdd1243dSDimitry Andric 
2213*bdd1243dSDimitry Andric   // Do it by splitting in half, since those operations correspond to deal
2214*bdd1243dSDimitry Andric   // instructions.
2215*bdd1243dSDimitry Andric   auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {
2216*bdd1243dSDimitry Andric     // Take V = Results[Begin], split it in L, H.
2217*bdd1243dSDimitry Andric     // Store Results[Begin] = L, Results[(Begin+End)/2] = H
2218*bdd1243dSDimitry Andric     // Call itself recursively split(Begin, Half), split(Half+1, End)
2219*bdd1243dSDimitry Andric     if (Begin + 1 == End)
2220*bdd1243dSDimitry Andric       return;
2221*bdd1243dSDimitry Andric 
2222*bdd1243dSDimitry Andric     Value *Val = Results[Begin];
2223*bdd1243dSDimitry Andric     unsigned Width = Val->getType()->getScalarSizeInBits();
2224*bdd1243dSDimitry Andric 
2225*bdd1243dSDimitry Andric     auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);
2226*bdd1243dSDimitry Andric     Value *VVal = Builder.CreateBitCast(Val, VTy);
2227*bdd1243dSDimitry Andric 
2228*bdd1243dSDimitry Andric     Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
2229*bdd1243dSDimitry Andric 
2230*bdd1243dSDimitry Andric     unsigned Half = (Begin + End) / 2;
2231*bdd1243dSDimitry Andric     Results[Begin] = sublo(Builder, Res);
2232*bdd1243dSDimitry Andric     Results[Half] = subhi(Builder, Res);
2233*bdd1243dSDimitry Andric 
2234*bdd1243dSDimitry Andric     splitFunc(Begin, Half, splitFunc);
2235*bdd1243dSDimitry Andric     splitFunc(Half, End, splitFunc);
2236*bdd1243dSDimitry Andric   };
2237*bdd1243dSDimitry Andric 
2238*bdd1243dSDimitry Andric   splitInHalf(0, NumResults, splitInHalf);
2239*bdd1243dSDimitry Andric   return Results;
2240*bdd1243dSDimitry Andric }
2241*bdd1243dSDimitry Andric 
2242*bdd1243dSDimitry Andric auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
2243*bdd1243dSDimitry Andric                                               ArrayRef<Value *> Values,
2244*bdd1243dSDimitry Andric                                               VectorType *ToType) const
2245*bdd1243dSDimitry Andric     -> Value * {
2246*bdd1243dSDimitry Andric   assert(ToType->getElementType()->isIntegerTy());
2247*bdd1243dSDimitry Andric 
2248*bdd1243dSDimitry Andric   // If the list of values does not have power-of-2 elements, append copies
2249*bdd1243dSDimitry Andric   // of the sign bit to it, to make the size be 2^n.
2250*bdd1243dSDimitry Andric   // The reason for this is that the values will be joined in pairs, because
2251*bdd1243dSDimitry Andric   // otherwise the shuffles will result in convoluted code. With pairwise
2252*bdd1243dSDimitry Andric   // joins, the shuffles will hopefully be folded into a perfect shuffle.
2253*bdd1243dSDimitry Andric   // The output will need to be sign-extended to a type with element width
2254*bdd1243dSDimitry Andric   // being a power-of-2 anyways.
2255*bdd1243dSDimitry Andric   SmallVector<Value *> Inputs(Values.begin(), Values.end());
2256*bdd1243dSDimitry Andric 
2257*bdd1243dSDimitry Andric   unsigned ToWidth = ToType->getScalarSizeInBits();
2258*bdd1243dSDimitry Andric   unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
2259*bdd1243dSDimitry Andric   assert(Width <= ToWidth);
2260*bdd1243dSDimitry Andric   assert(isPowerOf2_32(Width) && isPowerOf2_32(ToWidth));
2261*bdd1243dSDimitry Andric   unsigned Length = length(Inputs.front()->getType());
2262*bdd1243dSDimitry Andric 
2263*bdd1243dSDimitry Andric   unsigned NeedInputs = ToWidth / Width;
2264*bdd1243dSDimitry Andric   if (Inputs.size() != NeedInputs) {
2265*bdd1243dSDimitry Andric     // Having too many inputs is ok: drop the high bits (usual wrap-around).
2266*bdd1243dSDimitry Andric     // If there are too few, fill them with the sign bit.
2267*bdd1243dSDimitry Andric     Value *Last = Inputs.back();
2268*bdd1243dSDimitry Andric     Value *Sign =
2269*bdd1243dSDimitry Andric         Builder.CreateAShr(Last, getConstSplat(Last->getType(), Width - 1));
2270*bdd1243dSDimitry Andric     Inputs.resize(NeedInputs, Sign);
2271*bdd1243dSDimitry Andric   }
2272*bdd1243dSDimitry Andric 
2273*bdd1243dSDimitry Andric   while (Inputs.size() > 1) {
2274*bdd1243dSDimitry Andric     Width *= 2;
2275*bdd1243dSDimitry Andric     auto *VTy = VectorType::get(getIntTy(Width), Length, false);
2276*bdd1243dSDimitry Andric     for (int i = 0, e = Inputs.size(); i < e; i += 2) {
2277*bdd1243dSDimitry Andric       Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);
2278*bdd1243dSDimitry Andric       Inputs[i / 2] = Builder.CreateBitCast(Res, VTy);
2279*bdd1243dSDimitry Andric     }
2280*bdd1243dSDimitry Andric     Inputs.resize(Inputs.size() / 2);
2281*bdd1243dSDimitry Andric   }
2282*bdd1243dSDimitry Andric 
2283*bdd1243dSDimitry Andric   assert(Inputs.front()->getType() == ToType);
2284*bdd1243dSDimitry Andric   return Inputs.front();
2285e8d8bef9SDimitry Andric }
2286e8d8bef9SDimitry Andric 
2287e8d8bef9SDimitry Andric auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
2288e8d8bef9SDimitry Andric                                                       Value *Ptr1) const
2289*bdd1243dSDimitry Andric     -> std::optional<int> {
2290e8d8bef9SDimitry Andric   struct Builder : IRBuilder<> {
2291*bdd1243dSDimitry Andric     Builder(BasicBlock *B) : IRBuilder<>(B->getTerminator()) {}
2292e8d8bef9SDimitry Andric     ~Builder() {
2293e8d8bef9SDimitry Andric       for (Instruction *I : llvm::reverse(ToErase))
2294e8d8bef9SDimitry Andric         I->eraseFromParent();
2295e8d8bef9SDimitry Andric     }
2296e8d8bef9SDimitry Andric     SmallVector<Instruction *, 8> ToErase;
2297e8d8bef9SDimitry Andric   };
2298e8d8bef9SDimitry Andric 
2299e8d8bef9SDimitry Andric #define CallBuilder(B, F)                                                      \
2300e8d8bef9SDimitry Andric   [&](auto &B_) {                                                              \
2301e8d8bef9SDimitry Andric     Value *V = B_.F;                                                           \
2302e8d8bef9SDimitry Andric     if (auto *I = dyn_cast<Instruction>(V))                                    \
2303e8d8bef9SDimitry Andric       B_.ToErase.push_back(I);                                                 \
2304e8d8bef9SDimitry Andric     return V;                                                                  \
2305e8d8bef9SDimitry Andric   }(B)
2306e8d8bef9SDimitry Andric 
2307*bdd1243dSDimitry Andric   auto Simplify = [this](Value *V) {
2308*bdd1243dSDimitry Andric     if (Value *S = simplify(V))
2309e8d8bef9SDimitry Andric       return S;
2310e8d8bef9SDimitry Andric     return V;
2311e8d8bef9SDimitry Andric   };
2312e8d8bef9SDimitry Andric 
2313e8d8bef9SDimitry Andric   auto StripBitCast = [](Value *V) {
2314e8d8bef9SDimitry Andric     while (auto *C = dyn_cast<BitCastInst>(V))
2315e8d8bef9SDimitry Andric       V = C->getOperand(0);
2316e8d8bef9SDimitry Andric     return V;
2317e8d8bef9SDimitry Andric   };
2318e8d8bef9SDimitry Andric 
2319e8d8bef9SDimitry Andric   Ptr0 = StripBitCast(Ptr0);
2320e8d8bef9SDimitry Andric   Ptr1 = StripBitCast(Ptr1);
2321e8d8bef9SDimitry Andric   if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
2322*bdd1243dSDimitry Andric     return std::nullopt;
2323e8d8bef9SDimitry Andric 
2324e8d8bef9SDimitry Andric   auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
2325e8d8bef9SDimitry Andric   auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
2326e8d8bef9SDimitry Andric   if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
2327*bdd1243dSDimitry Andric     return std::nullopt;
2328*bdd1243dSDimitry Andric   if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
2329*bdd1243dSDimitry Andric     return std::nullopt;
2330e8d8bef9SDimitry Andric 
2331e8d8bef9SDimitry Andric   Builder B(Gep0->getParent());
2332*bdd1243dSDimitry Andric   int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
2333e8d8bef9SDimitry Andric 
2334e8d8bef9SDimitry Andric   // FIXME: for now only check GEPs with a single index.
2335e8d8bef9SDimitry Andric   if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
2336*bdd1243dSDimitry Andric     return std::nullopt;
2337e8d8bef9SDimitry Andric 
2338e8d8bef9SDimitry Andric   Value *Idx0 = Gep0->getOperand(1);
2339e8d8bef9SDimitry Andric   Value *Idx1 = Gep1->getOperand(1);
2340e8d8bef9SDimitry Andric 
2341e8d8bef9SDimitry Andric   // First, try to simplify the subtraction directly.
2342e8d8bef9SDimitry Andric   if (auto *Diff = dyn_cast<ConstantInt>(
2343e8d8bef9SDimitry Andric           Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
2344e8d8bef9SDimitry Andric     return Diff->getSExtValue() * Scale;
2345e8d8bef9SDimitry Andric 
2346*bdd1243dSDimitry Andric   KnownBits Known0 = getKnownBits(Idx0, Gep0);
2347*bdd1243dSDimitry Andric   KnownBits Known1 = getKnownBits(Idx1, Gep1);
2348e8d8bef9SDimitry Andric   APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
2349349cc55cSDimitry Andric   if (Unknown.isAllOnes())
2350*bdd1243dSDimitry Andric     return std::nullopt;
2351e8d8bef9SDimitry Andric 
2352e8d8bef9SDimitry Andric   Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
2353e8d8bef9SDimitry Andric   Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
2354e8d8bef9SDimitry Andric   Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
2355e8d8bef9SDimitry Andric   Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
2356e8d8bef9SDimitry Andric   int Diff0 = 0;
2357e8d8bef9SDimitry Andric   if (auto *C = dyn_cast<ConstantInt>(SubU)) {
2358e8d8bef9SDimitry Andric     Diff0 = C->getSExtValue();
2359e8d8bef9SDimitry Andric   } else {
2360*bdd1243dSDimitry Andric     return std::nullopt;
2361e8d8bef9SDimitry Andric   }
2362e8d8bef9SDimitry Andric 
2363e8d8bef9SDimitry Andric   Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
2364e8d8bef9SDimitry Andric   Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
2365e8d8bef9SDimitry Andric   Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
2366e8d8bef9SDimitry Andric   Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
2367e8d8bef9SDimitry Andric   int Diff1 = 0;
2368e8d8bef9SDimitry Andric   if (auto *C = dyn_cast<ConstantInt>(SubK)) {
2369e8d8bef9SDimitry Andric     Diff1 = C->getSExtValue();
2370e8d8bef9SDimitry Andric   } else {
2371*bdd1243dSDimitry Andric     return std::nullopt;
2372e8d8bef9SDimitry Andric   }
2373e8d8bef9SDimitry Andric 
2374e8d8bef9SDimitry Andric   return (Diff0 + Diff1) * Scale;
2375e8d8bef9SDimitry Andric 
2376e8d8bef9SDimitry Andric #undef CallBuilder
2377e8d8bef9SDimitry Andric }
2378e8d8bef9SDimitry Andric 
2379*bdd1243dSDimitry Andric auto HexagonVectorCombine::getNumSignificantBits(const Value *V,
2380*bdd1243dSDimitry Andric                                                  const Instruction *CtxI) const
2381*bdd1243dSDimitry Andric     -> unsigned {
2382*bdd1243dSDimitry Andric   return ComputeMaxSignificantBits(V, DL, /*Depth=*/0, &AC, CtxI, &DT);
2383*bdd1243dSDimitry Andric }
2384*bdd1243dSDimitry Andric 
2385*bdd1243dSDimitry Andric auto HexagonVectorCombine::getKnownBits(const Value *V,
2386*bdd1243dSDimitry Andric                                         const Instruction *CtxI) const
2387*bdd1243dSDimitry Andric     -> KnownBits {
2388*bdd1243dSDimitry Andric   return computeKnownBits(V, DL, /*Depth=*/0, &AC, CtxI, &DT, /*ORE=*/nullptr,
2389*bdd1243dSDimitry Andric                           /*UseInstrInfo=*/true);
2390*bdd1243dSDimitry Andric }
2391*bdd1243dSDimitry Andric 
2392e8d8bef9SDimitry Andric template <typename T>
2393e8d8bef9SDimitry Andric auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
2394e8d8bef9SDimitry Andric                                                   BasicBlock::const_iterator To,
2395*bdd1243dSDimitry Andric                                                   const T &IgnoreInsts) const
2396e8d8bef9SDimitry Andric     -> bool {
2397*bdd1243dSDimitry Andric   auto getLocOrNone =
2398*bdd1243dSDimitry Andric       [this](const Instruction &I) -> std::optional<MemoryLocation> {
2399e8d8bef9SDimitry Andric     if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
2400e8d8bef9SDimitry Andric       switch (II->getIntrinsicID()) {
2401e8d8bef9SDimitry Andric       case Intrinsic::masked_load:
2402e8d8bef9SDimitry Andric         return MemoryLocation::getForArgument(II, 0, TLI);
2403e8d8bef9SDimitry Andric       case Intrinsic::masked_store:
2404e8d8bef9SDimitry Andric         return MemoryLocation::getForArgument(II, 1, TLI);
2405e8d8bef9SDimitry Andric       }
2406e8d8bef9SDimitry Andric     }
2407e8d8bef9SDimitry Andric     return MemoryLocation::getOrNone(&I);
2408e8d8bef9SDimitry Andric   };
2409e8d8bef9SDimitry Andric 
2410e8d8bef9SDimitry Andric   // The source and the destination must be in the same basic block.
2411e8d8bef9SDimitry Andric   const BasicBlock &Block = *In.getParent();
2412e8d8bef9SDimitry Andric   assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
2413e8d8bef9SDimitry Andric   // No PHIs.
2414e8d8bef9SDimitry Andric   if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
2415e8d8bef9SDimitry Andric     return false;
2416e8d8bef9SDimitry Andric 
241781ad6265SDimitry Andric   if (!mayHaveNonDefUseDependency(In))
2418e8d8bef9SDimitry Andric     return true;
2419e8d8bef9SDimitry Andric   bool MayWrite = In.mayWriteToMemory();
2420e8d8bef9SDimitry Andric   auto MaybeLoc = getLocOrNone(In);
2421e8d8bef9SDimitry Andric 
2422e8d8bef9SDimitry Andric   auto From = In.getIterator();
2423e8d8bef9SDimitry Andric   if (From == To)
2424e8d8bef9SDimitry Andric     return true;
2425e8d8bef9SDimitry Andric   bool MoveUp = (To != Block.end() && To->comesBefore(&In));
2426e8d8bef9SDimitry Andric   auto Range =
2427e8d8bef9SDimitry Andric       MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
2428e8d8bef9SDimitry Andric   for (auto It = Range.first; It != Range.second; ++It) {
2429e8d8bef9SDimitry Andric     const Instruction &I = *It;
2430*bdd1243dSDimitry Andric     if (llvm::is_contained(IgnoreInsts, &I))
2431e8d8bef9SDimitry Andric       continue;
2432fe6060f1SDimitry Andric     // assume intrinsic can be ignored
2433fe6060f1SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
2434fe6060f1SDimitry Andric       if (II->getIntrinsicID() == Intrinsic::assume)
2435fe6060f1SDimitry Andric         continue;
2436fe6060f1SDimitry Andric     }
2437e8d8bef9SDimitry Andric     // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
2438e8d8bef9SDimitry Andric     if (I.mayThrow())
2439e8d8bef9SDimitry Andric       return false;
2440e8d8bef9SDimitry Andric     if (auto *CB = dyn_cast<CallBase>(&I)) {
2441e8d8bef9SDimitry Andric       if (!CB->hasFnAttr(Attribute::WillReturn))
2442e8d8bef9SDimitry Andric         return false;
2443e8d8bef9SDimitry Andric       if (!CB->hasFnAttr(Attribute::NoSync))
2444e8d8bef9SDimitry Andric         return false;
2445e8d8bef9SDimitry Andric     }
2446e8d8bef9SDimitry Andric     if (I.mayReadOrWriteMemory()) {
2447e8d8bef9SDimitry Andric       auto MaybeLocI = getLocOrNone(I);
2448e8d8bef9SDimitry Andric       if (MayWrite || I.mayWriteToMemory()) {
2449e8d8bef9SDimitry Andric         if (!MaybeLoc || !MaybeLocI)
2450e8d8bef9SDimitry Andric           return false;
2451e8d8bef9SDimitry Andric         if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2452e8d8bef9SDimitry Andric           return false;
2453e8d8bef9SDimitry Andric       }
2454e8d8bef9SDimitry Andric     }
2455e8d8bef9SDimitry Andric   }
2456e8d8bef9SDimitry Andric   return true;
2457e8d8bef9SDimitry Andric }
2458e8d8bef9SDimitry Andric 
2459e8d8bef9SDimitry Andric auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
2460e8d8bef9SDimitry Andric   if (auto *VecTy = dyn_cast<VectorType>(Ty))
2461e8d8bef9SDimitry Andric     return VecTy->getElementType() == getByteTy();
2462e8d8bef9SDimitry Andric   return false;
2463e8d8bef9SDimitry Andric }
2464e8d8bef9SDimitry Andric 
2465*bdd1243dSDimitry Andric auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,
2466e8d8bef9SDimitry Andric                                            Value *Hi, int Start,
2467e8d8bef9SDimitry Andric                                            int Length) const -> Value * {
2468*bdd1243dSDimitry Andric   assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));
2469e8d8bef9SDimitry Andric   SmallVector<int, 128> SMask(Length);
2470e8d8bef9SDimitry Andric   std::iota(SMask.begin(), SMask.end(), Start);
2471e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(Lo, Hi, SMask);
2472e8d8bef9SDimitry Andric }
2473e8d8bef9SDimitry Andric 
2474e8d8bef9SDimitry Andric // Pass management.
2475e8d8bef9SDimitry Andric 
2476e8d8bef9SDimitry Andric namespace llvm {
2477e8d8bef9SDimitry Andric void initializeHexagonVectorCombineLegacyPass(PassRegistry &);
2478e8d8bef9SDimitry Andric FunctionPass *createHexagonVectorCombineLegacyPass();
2479e8d8bef9SDimitry Andric } // namespace llvm
2480e8d8bef9SDimitry Andric 
2481e8d8bef9SDimitry Andric namespace {
2482e8d8bef9SDimitry Andric class HexagonVectorCombineLegacy : public FunctionPass {
2483e8d8bef9SDimitry Andric public:
2484e8d8bef9SDimitry Andric   static char ID;
2485e8d8bef9SDimitry Andric 
2486e8d8bef9SDimitry Andric   HexagonVectorCombineLegacy() : FunctionPass(ID) {}
2487e8d8bef9SDimitry Andric 
2488e8d8bef9SDimitry Andric   StringRef getPassName() const override { return "Hexagon Vector Combine"; }
2489e8d8bef9SDimitry Andric 
2490e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
2491e8d8bef9SDimitry Andric     AU.setPreservesCFG();
2492e8d8bef9SDimitry Andric     AU.addRequired<AAResultsWrapperPass>();
2493e8d8bef9SDimitry Andric     AU.addRequired<AssumptionCacheTracker>();
2494e8d8bef9SDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
2495e8d8bef9SDimitry Andric     AU.addRequired<TargetLibraryInfoWrapperPass>();
2496e8d8bef9SDimitry Andric     AU.addRequired<TargetPassConfig>();
2497e8d8bef9SDimitry Andric     FunctionPass::getAnalysisUsage(AU);
2498e8d8bef9SDimitry Andric   }
2499e8d8bef9SDimitry Andric 
2500e8d8bef9SDimitry Andric   bool runOnFunction(Function &F) override {
2501fe6060f1SDimitry Andric     if (skipFunction(F))
2502fe6060f1SDimitry Andric       return false;
2503e8d8bef9SDimitry Andric     AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2504e8d8bef9SDimitry Andric     AssumptionCache &AC =
2505e8d8bef9SDimitry Andric         getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2506e8d8bef9SDimitry Andric     DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2507e8d8bef9SDimitry Andric     TargetLibraryInfo &TLI =
2508e8d8bef9SDimitry Andric         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2509e8d8bef9SDimitry Andric     auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
2510e8d8bef9SDimitry Andric     HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
2511e8d8bef9SDimitry Andric     return HVC.run();
2512e8d8bef9SDimitry Andric   }
2513e8d8bef9SDimitry Andric };
2514e8d8bef9SDimitry Andric } // namespace
2515e8d8bef9SDimitry Andric 
2516e8d8bef9SDimitry Andric char HexagonVectorCombineLegacy::ID = 0;
2517e8d8bef9SDimitry Andric 
2518e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
2519e8d8bef9SDimitry Andric                       "Hexagon Vector Combine", false, false)
2520e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2521e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2522e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2523e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2524e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
2525e8d8bef9SDimitry Andric INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
2526e8d8bef9SDimitry Andric                     "Hexagon Vector Combine", false, false)
2527e8d8bef9SDimitry Andric 
2528e8d8bef9SDimitry Andric FunctionPass *llvm::createHexagonVectorCombineLegacyPass() {
2529e8d8bef9SDimitry Andric   return new HexagonVectorCombineLegacy();
2530e8d8bef9SDimitry Andric }
2531