xref: /llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp (revision e5d9ab08c39aa674def376dcffead2242a097536)
1 //===-- HexagonVectorCombine.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // HexagonVectorCombine is a utility class implementing a variety of functions
9 // that assist in vector-based optimizations.
10 //
11 // AlignVectors: replace unaligned vector loads and stores with aligned ones.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/Analysis/AliasAnalysis.h"
21 #include "llvm/Analysis/AssumptionCache.h"
22 #include "llvm/Analysis/InstSimplifyFolder.h"
23 #include "llvm/Analysis/InstructionSimplify.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/Analysis/VectorUtils.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/CodeGen/ValueTypes.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/Intrinsics.h"
33 #include "llvm/IR/IntrinsicsHexagon.h"
34 #include "llvm/IR/Metadata.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/KnownBits.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Target/TargetMachine.h"
42 #include "llvm/Transforms/Utils/Local.h"
43 
44 #include "HexagonSubtarget.h"
45 #include "HexagonTargetMachine.h"
46 
47 #include <algorithm>
48 #include <deque>
49 #include <map>
50 #include <optional>
51 #include <set>
52 #include <utility>
53 #include <vector>
54 
55 #define DEBUG_TYPE "hexagon-vc"
56 
57 using namespace llvm;
58 
59 namespace {
60 class HexagonVectorCombine {
61 public:
62   HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
63                        DominatorTree &DT_, TargetLibraryInfo &TLI_,
64                        const TargetMachine &TM_)
65       : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
66         TLI(TLI_),
67         HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
68 
69   bool run();
70 
71   // Common integer type.
72   IntegerType *getIntTy(unsigned Width = 32) const;
73   // Byte type: either scalar (when Length = 0), or vector with given
74   // element count.
75   Type *getByteTy(int ElemCount = 0) const;
76   // Boolean type: either scalar (when Length = 0), or vector with given
77   // element count.
78   Type *getBoolTy(int ElemCount = 0) const;
79   // Create a ConstantInt of type returned by getIntTy with the value Val.
80   ConstantInt *getConstInt(int Val) const;
81   // Get the integer value of V, if it exists.
82   std::optional<APInt> getIntValue(const Value *Val) const;
83   // Is V a constant 0, or a vector of 0s?
84   bool isZero(const Value *Val) const;
85   // Is V an undef value?
86   bool isUndef(const Value *Val) const;
87 
88   // Get HVX vector type with the given element type.
89   VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
90 
91   enum SizeKind {
92     Store, // Store size
93     Alloc, // Alloc size
94   };
95   int getSizeOf(const Value *Val, SizeKind Kind = Store) const;
96   int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;
97   int getTypeAlignment(Type *Ty) const;
98   size_t length(Value *Val) const;
99   size_t length(Type *Ty) const;
100 
101   Constant *getNullValue(Type *Ty) const;
102   Constant *getFullValue(Type *Ty) const;
103   Constant *getConstSplat(Type *Ty, int Val) const;
104 
105   Value *simplify(Value *Val) const;
106 
107   Value *insertb(IRBuilderBase &Builder, Value *Dest, Value *Src, int Start,
108                  int Length, int Where) const;
109   Value *vlalignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
110                   Value *Amt) const;
111   Value *vralignb(IRBuilderBase &Builder, Value *Lo, Value *Hi,
112                   Value *Amt) const;
113   Value *concat(IRBuilderBase &Builder, ArrayRef<Value *> Vecs) const;
114   Value *vresize(IRBuilderBase &Builder, Value *Val, int NewSize,
115                  Value *Pad) const;
116   Value *rescale(IRBuilderBase &Builder, Value *Mask, Type *FromTy,
117                  Type *ToTy) const;
118   Value *vlsb(IRBuilderBase &Builder, Value *Val) const;
119   Value *vbytes(IRBuilderBase &Builder, Value *Val) const;
120   Value *subvector(IRBuilderBase &Builder, Value *Val, unsigned Start,
121                    unsigned Length) const;
122   Value *sublo(IRBuilderBase &Builder, Value *Val) const;
123   Value *subhi(IRBuilderBase &Builder, Value *Val) const;
124   Value *vdeal(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
125   Value *vshuff(IRBuilderBase &Builder, Value *Val0, Value *Val1) const;
126 
127   Value *createHvxIntrinsic(IRBuilderBase &Builder, Intrinsic::ID IntID,
128                             Type *RetTy, ArrayRef<Value *> Args) const;
129   SmallVector<Value *> splitVectorElements(IRBuilderBase &Builder, Value *Vec,
130                                            unsigned ToWidth) const;
131   Value *joinVectorElements(IRBuilderBase &Builder, ArrayRef<Value *> Values,
132                             VectorType *ToType) const;
133 
134   std::optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
135 
136   unsigned getNumSignificantBits(const Value *V,
137                                  const Instruction *CtxI = nullptr) const;
138   KnownBits getKnownBits(const Value *V,
139                          const Instruction *CtxI = nullptr) const;
140 
141   template <typename T = std::vector<Instruction *>>
142   bool isSafeToMoveBeforeInBB(const Instruction &In,
143                               BasicBlock::const_iterator To,
144                               const T &Ignore = {}) const;
145 
146   // This function is only used for assertions at the moment.
147   [[maybe_unused]] bool isByteVecTy(Type *Ty) const;
148 
149   Function &F;
150   const DataLayout &DL;
151   AliasAnalysis &AA;
152   AssumptionCache &AC;
153   DominatorTree &DT;
154   TargetLibraryInfo &TLI;
155   const HexagonSubtarget &HST;
156 
157 private:
158   Value *getElementRange(IRBuilderBase &Builder, Value *Lo, Value *Hi,
159                          int Start, int Length) const;
160 };
161 
162 class AlignVectors {
163 public:
164   AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
165 
166   bool run();
167 
168 private:
169   using InstList = std::vector<Instruction *>;
170 
171   struct Segment {
172     void *Data;
173     int Start;
174     int Size;
175   };
176 
177   struct AddrInfo {
178     AddrInfo(const AddrInfo &) = default;
179     AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
180              Align H)
181         : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
182           NeedAlign(HVC.getTypeAlignment(ValTy)) {}
183     AddrInfo &operator=(const AddrInfo &) = default;
184 
185     // XXX: add Size member?
186     Instruction *Inst;
187     Value *Addr;
188     Type *ValTy;
189     Align HaveAlign;
190     Align NeedAlign;
191     int Offset = 0; // Offset (in bytes) from the first member of the
192                     // containing AddrList.
193   };
194   using AddrList = std::vector<AddrInfo>;
195 
196   struct InstrLess {
197     bool operator()(const Instruction *A, const Instruction *B) const {
198       return A->comesBefore(B);
199     }
200   };
201   using DepList = std::set<Instruction *, InstrLess>;
202 
203   struct MoveGroup {
204     MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
205         : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
206     Instruction *Base; // Base instruction of the parent address group.
207     InstList Main;     // Main group of instructions.
208     InstList Deps;     // List of dependencies.
209     bool IsHvx;        // Is this group of HVX instructions?
210     bool IsLoad;       // Is this a load group?
211   };
212   using MoveList = std::vector<MoveGroup>;
213 
214   struct ByteSpan {
215     struct Segment {
216       // Segment of a Value: 'Len' bytes starting at byte 'Begin'.
217       Segment(Value *Val, int Begin, int Len)
218           : Val(Val), Start(Begin), Size(Len) {}
219       Segment(const Segment &Seg) = default;
220       Segment &operator=(const Segment &Seg) = default;
221       Value *Val; // Value representable as a sequence of bytes.
222       int Start;  // First byte of the value that belongs to the segment.
223       int Size;   // Number of bytes in the segment.
224     };
225 
226     struct Block {
227       Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
228       Block(Value *Val, int Off, int Len, int Pos)
229           : Seg(Val, Off, Len), Pos(Pos) {}
230       Block(const Block &Blk) = default;
231       Block &operator=(const Block &Blk) = default;
232       Segment Seg; // Value segment.
233       int Pos;     // Position (offset) of the segment in the Block.
234     };
235 
236     int extent() const;
237     ByteSpan section(int Start, int Length) const;
238     ByteSpan &shift(int Offset);
239     SmallVector<Value *, 8> values() const;
240 
241     int size() const { return Blocks.size(); }
242     Block &operator[](int i) { return Blocks[i]; }
243 
244     std::vector<Block> Blocks;
245 
246     using iterator = decltype(Blocks)::iterator;
247     iterator begin() { return Blocks.begin(); }
248     iterator end() { return Blocks.end(); }
249     using const_iterator = decltype(Blocks)::const_iterator;
250     const_iterator begin() const { return Blocks.begin(); }
251     const_iterator end() const { return Blocks.end(); }
252   };
253 
254   Align getAlignFromValue(const Value *V) const;
255   std::optional<MemoryLocation> getLocation(const Instruction &In) const;
256   std::optional<AddrInfo> getAddrInfo(Instruction &In) const;
257   bool isHvx(const AddrInfo &AI) const;
258   // This function is only used for assertions at the moment.
259   [[maybe_unused]] bool isSectorTy(Type *Ty) const;
260 
261   Value *getPayload(Value *Val) const;
262   Value *getMask(Value *Val) const;
263   Value *getPassThrough(Value *Val) const;
264 
265   Value *createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
266                                int Adjust) const;
267   Value *createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
268                               int Alignment) const;
269   Value *createAlignedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
270                            int Alignment, Value *Mask, Value *PassThru) const;
271   Value *createAlignedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
272                             int Alignment, Value *Mask) const;
273 
274   bool createAddressGroups();
275   MoveList createLoadGroups(const AddrList &Group) const;
276   MoveList createStoreGroups(const AddrList &Group) const;
277   bool move(const MoveGroup &Move) const;
278   bool realignGroup(const MoveGroup &Move) const;
279 
280   friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
281   friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
282   friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
283 
284   std::map<Instruction *, AddrList> AddrGroups;
285   const HexagonVectorCombine &HVC;
286 };
287 
288 LLVM_ATTRIBUTE_UNUSED
289 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
290   OS << "Inst: " << AI.Inst << "  " << *AI.Inst << '\n';
291   OS << "Addr: " << *AI.Addr << '\n';
292   OS << "Type: " << *AI.ValTy << '\n';
293   OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
294   OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
295   OS << "Offset: " << AI.Offset;
296   return OS;
297 }
298 
299 LLVM_ATTRIBUTE_UNUSED
300 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
301   OS << "Main\n";
302   for (Instruction *I : MG.Main)
303     OS << "  " << *I << '\n';
304   OS << "Deps\n";
305   for (Instruction *I : MG.Deps)
306     OS << "  " << *I << '\n';
307   return OS;
308 }
309 
310 LLVM_ATTRIBUTE_UNUSED
311 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
312   OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
313   for (const AlignVectors::ByteSpan::Block &B : BS) {
314     OS << "  @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
315        << *B.Seg.Val << '\n';
316   }
317   OS << ']';
318   return OS;
319 }
320 
321 class HvxIdioms {
322 public:
323   HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
324     auto *Int32Ty = HVC.getIntTy(32);
325     HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false);
326     HvxP32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/true);
327   }
328 
329   bool run();
330 
331 private:
332   struct FxpOp {
333     unsigned Opcode;
334     unsigned Frac; // Number of fraction bits
335     Value *X, *Y;
336     // If present, add 1 << RoundAt before shift:
337     std::optional<unsigned> RoundAt;
338   };
339 
340   // Value + sign
341   // This is to distinguish multiplications: s*s, s*u, u*s, u*u.
342   struct SValue {
343     Value *Val;
344     bool Signed;
345   };
346 
347   std::optional<FxpOp> matchFxpMul(Instruction &In) const;
348   Value *processFxpMul(Instruction &In, const FxpOp &Op) const;
349   Value *createMulQ15(IRBuilderBase &Builder, Value *X, Value *Y,
350                       bool Rounding) const;
351   Value *createMulQ31(IRBuilderBase &Builder, Value *X, Value *Y,
352                       bool Rounding) const;
353   std::pair<Value *, Value *> createMul32(IRBuilderBase &Builder, SValue X,
354                                           SValue Y) const;
355 
356   VectorType *HvxI32Ty;
357   VectorType *HvxP32Ty;
358   const HexagonVectorCombine &HVC;
359 
360   friend raw_ostream &operator<<(raw_ostream &, const FxpOp &);
361 };
362 
363 [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
364                                          const HvxIdioms::FxpOp &Op) {
365   OS << Instruction::getOpcodeName(Op.Opcode) << '.' << Op.Frac;
366   if (Op.RoundAt.has_value()) {
367     if (Op.Frac != 0 && Op.RoundAt.value() == Op.Frac - 1) {
368       OS << ":rnd";
369     } else {
370       OS << " + 1<<" << Op.RoundAt.value();
371     }
372   }
373   OS << "\n  X:" << *Op.X << "\n  Y:" << *Op.Y;
374   return OS;
375 }
376 
377 } // namespace
378 
379 namespace {
380 
381 template <typename T> T *getIfUnordered(T *MaybeT) {
382   return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
383 }
384 template <typename T> T *isCandidate(Instruction *In) {
385   return dyn_cast<T>(In);
386 }
387 template <> LoadInst *isCandidate<LoadInst>(Instruction *In) {
388   return getIfUnordered(dyn_cast<LoadInst>(In));
389 }
390 template <> StoreInst *isCandidate<StoreInst>(Instruction *In) {
391   return getIfUnordered(dyn_cast<StoreInst>(In));
392 }
393 
394 #if !defined(_MSC_VER) || _MSC_VER >= 1926
395 // VS2017 and some versions of VS2019 have trouble compiling this:
396 // error C2976: 'std::map': too few template arguments
397 // VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925)
398 template <typename Pred, typename... Ts>
399 void erase_if(std::map<Ts...> &map, Pred p)
400 #else
401 template <typename Pred, typename T, typename U>
402 void erase_if(std::map<T, U> &map, Pred p)
403 #endif
404 {
405   for (auto i = map.begin(), e = map.end(); i != e;) {
406     if (p(*i))
407       i = map.erase(i);
408     else
409       i = std::next(i);
410   }
411 }
412 
413 // Forward other erase_ifs to the LLVM implementations.
414 template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
415   llvm::erase_if(std::forward<T>(container), p);
416 }
417 
418 } // namespace
419 
420 // --- Begin AlignVectors
421 
422 auto AlignVectors::ByteSpan::extent() const -> int {
423   if (size() == 0)
424     return 0;
425   int Min = Blocks[0].Pos;
426   int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
427   for (int i = 1, e = size(); i != e; ++i) {
428     Min = std::min(Min, Blocks[i].Pos);
429     Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
430   }
431   return Max - Min;
432 }
433 
434 auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
435   ByteSpan Section;
436   for (const ByteSpan::Block &B : Blocks) {
437     int L = std::max(B.Pos, Start);                       // Left end.
438     int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
439     if (L < R) {
440       // How much to chop off the beginning of the segment:
441       int Off = L > B.Pos ? L - B.Pos : 0;
442       Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
443     }
444   }
445   return Section;
446 }
447 
448 auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
449   for (Block &B : Blocks)
450     B.Pos += Offset;
451   return *this;
452 }
453 
454 auto AlignVectors::ByteSpan::values() const -> SmallVector<Value *, 8> {
455   SmallVector<Value *, 8> Values(Blocks.size());
456   for (int i = 0, e = Blocks.size(); i != e; ++i)
457     Values[i] = Blocks[i].Seg.Val;
458   return Values;
459 }
460 
461 auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
462   const auto *C = dyn_cast<ConstantInt>(V);
463   assert(C && "Alignment must be a compile-time constant integer");
464   return C->getAlignValue();
465 }
466 
467 auto AlignVectors::getAddrInfo(Instruction &In) const
468     -> std::optional<AddrInfo> {
469   if (auto *L = isCandidate<LoadInst>(&In))
470     return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
471                     L->getAlign());
472   if (auto *S = isCandidate<StoreInst>(&In))
473     return AddrInfo(HVC, S, S->getPointerOperand(),
474                     S->getValueOperand()->getType(), S->getAlign());
475   if (auto *II = isCandidate<IntrinsicInst>(&In)) {
476     Intrinsic::ID ID = II->getIntrinsicID();
477     switch (ID) {
478     case Intrinsic::masked_load:
479       return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
480                       getAlignFromValue(II->getArgOperand(1)));
481     case Intrinsic::masked_store:
482       return AddrInfo(HVC, II, II->getArgOperand(1),
483                       II->getArgOperand(0)->getType(),
484                       getAlignFromValue(II->getArgOperand(2)));
485     }
486   }
487   return std::nullopt;
488 }
489 
490 auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
491   return HVC.HST.isTypeForHVX(AI.ValTy);
492 }
493 
494 auto AlignVectors::getPayload(Value *Val) const -> Value * {
495   if (auto *In = dyn_cast<Instruction>(Val)) {
496     Intrinsic::ID ID = 0;
497     if (auto *II = dyn_cast<IntrinsicInst>(In))
498       ID = II->getIntrinsicID();
499     if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
500       return In->getOperand(0);
501   }
502   return Val;
503 }
504 
505 auto AlignVectors::getMask(Value *Val) const -> Value * {
506   if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
507     switch (II->getIntrinsicID()) {
508     case Intrinsic::masked_load:
509       return II->getArgOperand(2);
510     case Intrinsic::masked_store:
511       return II->getArgOperand(3);
512     }
513   }
514 
515   Type *ValTy = getPayload(Val)->getType();
516   if (auto *VecTy = dyn_cast<VectorType>(ValTy))
517     return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
518   return HVC.getFullValue(HVC.getBoolTy());
519 }
520 
521 auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
522   if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
523     if (II->getIntrinsicID() == Intrinsic::masked_load)
524       return II->getArgOperand(3);
525   }
526   return UndefValue::get(getPayload(Val)->getType());
527 }
528 
529 auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,
530                                          Type *ValTy, int Adjust) const
531     -> Value * {
532   // The adjustment is in bytes, but if it's a multiple of the type size,
533   // we don't need to do pointer casts.
534   auto *PtrTy = cast<PointerType>(Ptr->getType());
535   if (!PtrTy->isOpaque()) {
536     Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
537     int ElemSize = HVC.getSizeOf(ElemTy, HVC.Alloc);
538     if (Adjust % ElemSize == 0 && Adjust != 0) {
539       Value *Tmp0 =
540           Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
541       return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
542     }
543   }
544 
545   PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
546   Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
547   Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0,
548                                   HVC.getConstInt(Adjust));
549   return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
550 }
551 
552 auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
553                                         Type *ValTy, int Alignment) const
554     -> Value * {
555   Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
556   Value *Mask = HVC.getConstInt(-Alignment);
557   Value *And = Builder.CreateAnd(AsInt, Mask);
558   return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
559 }
560 
561 auto AlignVectors::createAlignedLoad(IRBuilderBase &Builder, Type *ValTy,
562                                      Value *Ptr, int Alignment, Value *Mask,
563                                      Value *PassThru) const -> Value * {
564   assert(!HVC.isUndef(Mask)); // Should this be allowed?
565   if (HVC.isZero(Mask))
566     return PassThru;
567   if (Mask == ConstantInt::getTrue(Mask->getType()))
568     return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
569   return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru);
570 }
571 
572 auto AlignVectors::createAlignedStore(IRBuilderBase &Builder, Value *Val,
573                                       Value *Ptr, int Alignment,
574                                       Value *Mask) const -> Value * {
575   if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
576     return UndefValue::get(Val->getType());
577   if (Mask == ConstantInt::getTrue(Mask->getType()))
578     return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
579   return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
580 }
581 
582 auto AlignVectors::createAddressGroups() -> bool {
583   // An address group created here may contain instructions spanning
584   // multiple basic blocks.
585   AddrList WorkStack;
586 
587   auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
588     for (AddrInfo &W : WorkStack) {
589       if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
590         return std::make_pair(W.Inst, *D);
591     }
592     return std::make_pair(nullptr, 0);
593   };
594 
595   auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
596     BasicBlock &Block = *DomN->getBlock();
597     for (Instruction &I : Block) {
598       auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
599       if (!AI)
600         continue;
601       auto F = findBaseAndOffset(*AI);
602       Instruction *GroupInst;
603       if (Instruction *BI = F.first) {
604         AI->Offset = F.second;
605         GroupInst = BI;
606       } else {
607         WorkStack.push_back(*AI);
608         GroupInst = AI->Inst;
609       }
610       AddrGroups[GroupInst].push_back(*AI);
611     }
612 
613     for (DomTreeNode *C : DomN->children())
614       Visit(C, Visit);
615 
616     while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
617       WorkStack.pop_back();
618   };
619 
620   traverseBlock(HVC.DT.getRootNode(), traverseBlock);
621   assert(WorkStack.empty());
622 
623   // AddrGroups are formed.
624 
625   // Remove groups of size 1.
626   erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
627   // Remove groups that don't use HVX types.
628   erase_if(AddrGroups, [&](auto &G) {
629     return llvm::none_of(
630         G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
631   });
632 
633   return !AddrGroups.empty();
634 }
635 
636 auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
637   // Form load groups.
638   // To avoid complications with moving code across basic blocks, only form
639   // groups that are contained within a single basic block.
640 
641   auto getUpwardDeps = [](Instruction *In, Instruction *Base) {
642     BasicBlock *Parent = Base->getParent();
643     assert(In->getParent() == Parent &&
644            "Base and In should be in the same block");
645     assert(Base->comesBefore(In) && "Base should come before In");
646 
647     DepList Deps;
648     std::deque<Instruction *> WorkQ = {In};
649     while (!WorkQ.empty()) {
650       Instruction *D = WorkQ.front();
651       WorkQ.pop_front();
652       Deps.insert(D);
653       for (Value *Op : D->operands()) {
654         if (auto *I = dyn_cast<Instruction>(Op)) {
655           if (I->getParent() == Parent && Base->comesBefore(I))
656             WorkQ.push_back(I);
657         }
658       }
659     }
660     return Deps;
661   };
662 
663   auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
664     assert(!Move.Main.empty() && "Move group should have non-empty Main");
665     // Don't mix HVX and non-HVX instructions.
666     if (Move.IsHvx != isHvx(Info))
667       return false;
668     // Leading instruction in the load group.
669     Instruction *Base = Move.Main.front();
670     if (Base->getParent() != Info.Inst->getParent())
671       return false;
672 
673     auto isSafeToMoveToBase = [&](const Instruction *I) {
674       return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
675     };
676     DepList Deps = getUpwardDeps(Info.Inst, Base);
677     if (!llvm::all_of(Deps, isSafeToMoveToBase))
678       return false;
679 
680     // The dependencies will be moved together with the load, so make sure
681     // that none of them could be moved independently in another group.
682     Deps.erase(Info.Inst);
683     auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
684     if (llvm::any_of(Deps, inAddrMap))
685       return false;
686     Move.Main.push_back(Info.Inst);
687     llvm::append_range(Move.Deps, Deps);
688     return true;
689   };
690 
691   MoveList LoadGroups;
692 
693   for (const AddrInfo &Info : Group) {
694     if (!Info.Inst->mayReadFromMemory())
695       continue;
696     if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
697       LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
698   }
699 
700   // Erase singleton groups.
701   erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
702   return LoadGroups;
703 }
704 
705 auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
706   // Form store groups.
707   // To avoid complications with moving code across basic blocks, only form
708   // groups that are contained within a single basic block.
709 
710   auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
711     assert(!Move.Main.empty() && "Move group should have non-empty Main");
712     // For stores with return values we'd have to collect downward depenencies.
713     // There are no such stores that we handle at the moment, so omit that.
714     assert(Info.Inst->getType()->isVoidTy() &&
715            "Not handling stores with return values");
716     // Don't mix HVX and non-HVX instructions.
717     if (Move.IsHvx != isHvx(Info))
718       return false;
719     // For stores we need to be careful whether it's safe to move them.
720     // Stores that are otherwise safe to move together may not appear safe
721     // to move over one another (i.e. isSafeToMoveBefore may return false).
722     Instruction *Base = Move.Main.front();
723     if (Base->getParent() != Info.Inst->getParent())
724       return false;
725     if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
726       return false;
727     Move.Main.push_back(Info.Inst);
728     return true;
729   };
730 
731   MoveList StoreGroups;
732 
733   for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
734     const AddrInfo &Info = *I;
735     if (!Info.Inst->mayWriteToMemory())
736       continue;
737     if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
738       StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
739   }
740 
741   // Erase singleton groups.
742   erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
743   return StoreGroups;
744 }
745 
746 auto AlignVectors::move(const MoveGroup &Move) const -> bool {
747   assert(!Move.Main.empty() && "Move group should have non-empty Main");
748   Instruction *Where = Move.Main.front();
749 
750   if (Move.IsLoad) {
751     // Move all deps to before Where, keeping order.
752     for (Instruction *D : Move.Deps)
753       D->moveBefore(Where);
754     // Move all main instructions to after Where, keeping order.
755     ArrayRef<Instruction *> Main(Move.Main);
756     for (Instruction *M : Main.drop_front(1)) {
757       M->moveAfter(Where);
758       Where = M;
759     }
760   } else {
761     // NOTE: Deps are empty for "store" groups. If they need to be
762     // non-empty, decide on the order.
763     assert(Move.Deps.empty());
764     // Move all main instructions to before Where, inverting order.
765     ArrayRef<Instruction *> Main(Move.Main);
766     for (Instruction *M : Main.drop_front(1)) {
767       M->moveBefore(Where);
768       Where = M;
769     }
770   }
771 
772   return Move.Main.size() + Move.Deps.size() > 1;
773 }
774 
775 auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
776   // TODO: Needs support for masked loads/stores of "scalar" vectors.
777   if (!Move.IsHvx)
778     return false;
779 
780   // Return the element with the maximum alignment from Range,
781   // where GetValue obtains the value to compare from an element.
782   auto getMaxOf = [](auto Range, auto GetValue) {
783     return *std::max_element(
784         Range.begin(), Range.end(),
785         [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); });
786   };
787 
788   const AddrList &BaseInfos = AddrGroups.at(Move.Base);
789 
790   // Conceptually, there is a vector of N bytes covering the addresses
791   // starting from the minimum offset (i.e. Base.Addr+Start). This vector
792   // represents a contiguous memory region that spans all accessed memory
793   // locations.
794   // The correspondence between loaded or stored values will be expressed
795   // in terms of this vector. For example, the 0th element of the vector
796   // from the Base address info will start at byte Start from the beginning
797   // of this conceptual vector.
798   //
799   // This vector will be loaded/stored starting at the nearest down-aligned
800   // address and the amount od the down-alignment will be AlignVal:
801   //   valign(load_vector(align_down(Base+Start)), AlignVal)
802 
803   std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
804   AddrList MoveInfos;
805   llvm::copy_if(
806       BaseInfos, std::back_inserter(MoveInfos),
807       [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
808 
809   // Maximum alignment present in the whole address group.
810   const AddrInfo &WithMaxAlign =
811       getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
812   Align MaxGiven = WithMaxAlign.HaveAlign;
813 
814   // Minimum alignment present in the move address group.
815   const AddrInfo &WithMinOffset =
816       getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
817 
818   const AddrInfo &WithMaxNeeded =
819       getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
820   Align MinNeeded = WithMaxNeeded.NeedAlign;
821 
822   // Set the builder at the top instruction in the move group.
823   Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back();
824   IRBuilder<> Builder(TopIn);
825   Value *AlignAddr = nullptr; // Actual aligned address.
826   Value *AlignVal = nullptr;  // Right-shift amount (for valign).
827 
828   if (MinNeeded <= MaxGiven) {
829     int Start = WithMinOffset.Offset;
830     int OffAtMax = WithMaxAlign.Offset;
831     // Shift the offset of the maximally aligned instruction (OffAtMax)
832     // back by just enough multiples of the required alignment to cover the
833     // distance from Start to OffAtMax.
834     // Calculate the address adjustment amount based on the address with the
835     // maximum alignment. This is to allow a simple gep instruction instead
836     // of potential bitcasts to i8*.
837     int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
838     AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
839                                       WithMaxAlign.ValTy, Adjust);
840     int Diff = Start - (OffAtMax + Adjust);
841     AlignVal = HVC.getConstInt(Diff);
842     assert(Diff >= 0);
843     assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
844   } else {
845     // WithMinOffset is the lowest address in the group,
846     //   WithMinOffset.Addr = Base+Start.
847     // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
848     // mask off unnecessary bits, so it's ok to just the original pointer as
849     // the alignment amount.
850     // Do an explicit down-alignment of the address to avoid creating an
851     // aligned instruction with an address that is not really aligned.
852     AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
853                                      WithMinOffset.ValTy, MinNeeded.value());
854     AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
855   }
856 
857   ByteSpan VSpan;
858   for (const AddrInfo &AI : MoveInfos) {
859     VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
860                               AI.Offset - WithMinOffset.Offset);
861   }
862 
863   // The aligned loads/stores will use blocks that are either scalars,
864   // or HVX vectors. Let "sector" be the unified term for such a block.
865   // blend(scalar, vector) -> sector...
866   int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
867                          : std::max<int>(MinNeeded.value(), 4);
868   assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
869   assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
870 
871   Type *SecTy = HVC.getByteTy(ScLen);
872   int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
873   bool DoAlign = !HVC.isZero(AlignVal);
874 
875   if (Move.IsLoad) {
876     ByteSpan ASpan;
877     auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
878     auto *Undef = UndefValue::get(SecTy);
879 
880     for (int i = 0; i != NumSectors + DoAlign; ++i) {
881       Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
882       // FIXME: generate a predicated load?
883       Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
884       // If vector shifting is potentially needed, accumulate metadata
885       // from source sections of twice the load width.
886       int Start = (i - DoAlign) * ScLen;
887       int Width = (1 + DoAlign) * ScLen;
888       propagateMetadata(cast<Instruction>(Load),
889                         VSpan.section(Start, Width).values());
890       ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen);
891     }
892 
893     if (DoAlign) {
894       for (int j = 0; j != NumSectors; ++j) {
895         assert(isSectorTy(ASpan[j].Seg.Val->getType()));
896         ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val,
897                                         ASpan[j + 1].Seg.Val, AlignVal);
898       }
899     }
900 
901     for (ByteSpan::Block &B : VSpan) {
902       ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
903       Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
904       for (ByteSpan::Block &S : ASection) {
905         Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
906         Accum =
907             HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
908       }
909       // Instead of casting everything to bytes for the vselect, cast to the
910       // original value type. This will avoid complications with casting masks.
911       // For example, in cases when the original mask applied to i32, it could
912       // be converted to a mask applicable to i8 via pred_typecast intrinsic,
913       // but if the mask is not exactly of HVX length, extra handling would be
914       // needed to make it work.
915       Type *ValTy = getPayload(B.Seg.Val)->getType();
916       Value *Cast = Builder.CreateBitCast(Accum, ValTy);
917       Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
918                                         getPassThrough(B.Seg.Val));
919       B.Seg.Val->replaceAllUsesWith(Sel);
920     }
921   } else {
922     // Stores.
923     ByteSpan ASpanV, ASpanM;
924 
925     // Return a vector value corresponding to the input value Val:
926     // either <1 x Val> for scalar Val, or Val itself for vector Val.
927     auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {
928       Type *Ty = Val->getType();
929       if (Ty->isVectorTy())
930         return Val;
931       auto *VecTy = VectorType::get(Ty, 1, /*Scalable=*/false);
932       return Builder.CreateBitCast(Val, VecTy);
933     };
934 
935     // Create an extra "undef" sector at the beginning and at the end.
936     // They will be used as the left/right filler in the vlalign step.
937     for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
938       // For stores, the size of each section is an aligned vector length.
939       // Adjust the store offsets relative to the section start offset.
940       ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
941       Value *AccumV = UndefValue::get(SecTy);
942       Value *AccumM = HVC.getNullValue(SecTy);
943       for (ByteSpan::Block &S : VSection) {
944         Value *Pay = getPayload(S.Seg.Val);
945         Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
946                                   Pay->getType(), HVC.getByteTy());
947         AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
948                              S.Seg.Start, S.Seg.Size, S.Pos);
949         AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
950                              S.Seg.Start, S.Seg.Size, S.Pos);
951       }
952       ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
953       ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
954     }
955 
956     // vlalign
957     if (DoAlign) {
958       for (int j = 1; j != NumSectors + 2; ++j) {
959         Value *PrevV = ASpanV[j - 1].Seg.Val, *ThisV = ASpanV[j].Seg.Val;
960         Value *PrevM = ASpanM[j - 1].Seg.Val, *ThisM = ASpanM[j].Seg.Val;
961         assert(isSectorTy(PrevV->getType()) && isSectorTy(PrevM->getType()));
962         ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
963         ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
964       }
965     }
966 
967     for (int i = 0; i != NumSectors + DoAlign; ++i) {
968       Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
969       Value *Val = ASpanV[i].Seg.Val;
970       Value *Mask = ASpanM[i].Seg.Val; // bytes
971       if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
972         Value *Store = createAlignedStore(Builder, Val, Ptr, ScLen,
973                                           HVC.vlsb(Builder, Mask));
974         // If vector shifting is potentially needed, accumulate metadata
975         // from source sections of twice the store width.
976         int Start = (i - DoAlign) * ScLen;
977         int Width = (1 + DoAlign) * ScLen;
978         propagateMetadata(cast<Instruction>(Store),
979                           VSpan.section(Start, Width).values());
980       }
981     }
982   }
983 
984   for (auto *Inst : Move.Main)
985     Inst->eraseFromParent();
986 
987   return true;
988 }
989 
990 auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
991   if (!HVC.isByteVecTy(Ty))
992     return false;
993   int Size = HVC.getSizeOf(Ty);
994   if (HVC.HST.isTypeForHVX(Ty))
995     return Size == static_cast<int>(HVC.HST.getVectorLength());
996   return Size == 4 || Size == 8;
997 }
998 
999 auto AlignVectors::run() -> bool {
1000   if (!createAddressGroups())
1001     return false;
1002 
1003   bool Changed = false;
1004   MoveList LoadGroups, StoreGroups;
1005 
1006   for (auto &G : AddrGroups) {
1007     llvm::append_range(LoadGroups, createLoadGroups(G.second));
1008     llvm::append_range(StoreGroups, createStoreGroups(G.second));
1009   }
1010 
1011   for (auto &M : LoadGroups)
1012     Changed |= move(M);
1013   for (auto &M : StoreGroups)
1014     Changed |= move(M);
1015 
1016   for (auto &M : LoadGroups)
1017     Changed |= realignGroup(M);
1018   for (auto &M : StoreGroups)
1019     Changed |= realignGroup(M);
1020 
1021   return Changed;
1022 }
1023 
1024 // --- End AlignVectors
1025 
1026 // --- Begin HvxIdioms
1027 
1028 // Match
1029 //   (X * Y) [>> N], or
1030 //   ((X * Y) + (1 << N-1)) >> N
1031 auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional<FxpOp> {
1032   using namespace PatternMatch;
1033   auto *Ty = In.getType();
1034 
1035   if (!Ty->isVectorTy() || !Ty->getScalarType()->isIntegerTy())
1036     return std::nullopt;
1037 
1038   unsigned Width = cast<IntegerType>(Ty->getScalarType())->getBitWidth();
1039 
1040   FxpOp Op;
1041   Value *Exp = &In;
1042 
1043   // Fixed-point multiplication is always shifted right (except when the
1044   // fraction is 0 bits).
1045   const APInt *Qn = nullptr;
1046   if (Value * T; match(Exp, m_LShr(m_Value(T), m_APInt(Qn)))) {
1047     Op.Frac = Qn->getZExtValue();
1048     Exp = T;
1049   } else {
1050     Op.Frac = 0;
1051   }
1052 
1053   if (Op.Frac > Width)
1054     return std::nullopt;
1055 
1056   // Check if there is rounding added.
1057   const APInt *C = nullptr;
1058   if (Value * T; Op.Frac > 0 && match(Exp, m_Add(m_Value(T), m_APInt(C)))) {
1059     unsigned CV = C->getZExtValue();
1060     if (CV != 0 && !isPowerOf2_32(CV))
1061       return std::nullopt;
1062     if (CV != 0)
1063       Op.RoundAt = Log2_32(CV);
1064     Exp = T;
1065   }
1066 
1067   // Check if the rest is a multiplication.
1068   if (match(Exp, m_Mul(m_Value(Op.X), m_Value(Op.Y)))) {
1069     Op.Opcode = Instruction::Mul;
1070     return Op;
1071   }
1072 
1073   return std::nullopt;
1074 }
1075 
1076 auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
1077     -> Value * {
1078   // FIXME: make this more elegant
1079   struct TempValues {
1080     void insert(Value* V) {
1081       Values.push_back(V);
1082     }
1083     void insert(ArrayRef<Value*> Vs) {
1084       Values.insert(Values.end(), Vs.begin(), Vs.end());
1085     }
1086     void clear() { //
1087       Values.clear();
1088     }
1089     ~TempValues() {
1090       for (Value *V : llvm::reverse(Values)) {
1091         if (auto *In = dyn_cast<Instruction>(V))
1092           In->eraseFromParent();
1093       }
1094     }
1095     SmallVector<Value*> Values;
1096   };
1097   TempValues DeleteOnFailure;
1098 
1099   // TODO: Make it general.
1100   if (Op.Frac != 15 && Op.Frac != 31)
1101     return nullptr;
1102 
1103   auto *OrigTy = dyn_cast<VectorType>(Op.X->getType());
1104   if (OrigTy == nullptr)
1105     return nullptr;
1106 
1107   unsigned BitsX = HVC.getNumSignificantBits(Op.X, &In);
1108   unsigned BitsY = HVC.getNumSignificantBits(Op.Y, &In);
1109 
1110   unsigned SigBits = std::max(BitsX, BitsY);
1111   unsigned Width = PowerOf2Ceil(SigBits);
1112   auto *TruncTy = VectorType::get(HVC.getIntTy(Width), OrigTy);
1113 
1114   IRBuilder<InstSimplifyFolder> Builder(In.getParent(), In.getIterator(),
1115                                         InstSimplifyFolder(HVC.DL));
1116   // These may end up dead, but should be removed in isel.
1117   Value *NewX = Builder.CreateTrunc(Op.X, TruncTy);
1118   Value *NewY = Builder.CreateTrunc(Op.Y, TruncTy);
1119   if (NewX != Op.X)
1120     DeleteOnFailure.insert(NewX);
1121   if (NewY != Op.Y)
1122     DeleteOnFailure.insert(NewY);
1123 
1124   if (!Op.RoundAt || *Op.RoundAt == Op.Frac - 1) {
1125     bool Rounding = Op.RoundAt.has_value();
1126     if (Width == Op.Frac + 1) {
1127       Value *QMul = nullptr;
1128       if (Width == 16) {
1129         QMul = createMulQ15(Builder, NewX, NewY, Rounding);
1130       } else if (Width == 32) {
1131         QMul = createMulQ31(Builder, NewX, NewY, Rounding);
1132       }
1133       if (QMul != nullptr) {
1134         DeleteOnFailure.clear();
1135         return Builder.CreateSExt(QMul, OrigTy);
1136       }
1137     }
1138   }
1139 
1140   // FIXME: make it general, _64, addcarry
1141   if (!HVC.HST.useHVXV62Ops())
1142     return nullptr;
1143 
1144   // The check for Frac will make sure of this, but keep this check for when
1145   // this function handles all Frac cases.
1146   assert(Width > 32);
1147   if (Width > 64)
1148     return nullptr;
1149 
1150   // At this point, NewX and NewY may be truncated to different element
1151   // widths to save on the number of multiplications to perform.
1152   unsigned WidthX = PowerOf2Ceil(BitsX);
1153   unsigned WidthY = PowerOf2Ceil(BitsY);
1154   Value *OldX = NewX, *OldY = NewY;
1155   NewX = Builder.CreateTrunc(
1156       NewX, VectorType::get(HVC.getIntTy(WidthX), HVC.length(NewX), false));
1157   NewY = Builder.CreateTrunc(
1158       NewY, VectorType::get(HVC.getIntTy(WidthY), HVC.length(NewY), false));
1159   if (NewX != OldX)
1160     DeleteOnFailure.insert(NewX);
1161   if (NewY != OldY)
1162     DeleteOnFailure.insert(NewY);
1163 
1164   // Break up the arguments NewX and NewY into vectors of smaller widths
1165   // in preparation of doing the multiplication via HVX intrinsics.
1166   // TODO:
1167   // Make sure that the number of elements in NewX/NewY is 32. In the future
1168   // add generic code that will break up a (presumable long) vector into
1169   // shorter pieces, pad the last one, then concatenate all the pieces back.
1170   if (HVC.length(NewX) != 32)
1171     return nullptr;
1172   auto WordX = HVC.splitVectorElements(Builder, NewX, /*ToWidth=*/32);
1173   auto WordY = HVC.splitVectorElements(Builder, NewY, /*ToWidth=*/32);
1174   auto HvxWordTy = WordX[0]->getType();
1175 
1176   SmallVector<SmallVector<Value *>> Products(WordX.size() + WordY.size());
1177 
1178   // WordX[i] * WordY[j] produces words i+j and i+j+1 of the results,
1179   // that is halves 2(i+j), 2(i+j)+1, 2(i+j)+2, 2(i+j)+3.
1180   for (int i = 0, e = WordX.size(); i != e; ++i) {
1181     for (int j = 0, f = WordY.size(); j != f; ++j) {
1182       bool SgnX = (i + 1 == e), SgnY = (j + 1 == f);
1183       auto [Lo, Hi] = createMul32(Builder, {WordX[i], SgnX}, {WordY[j], SgnY});
1184       Products[i + j + 0].push_back(Lo);
1185       Products[i + j + 1].push_back(Hi);
1186     }
1187   }
1188 
1189   // Add the optional rounding to the proper word.
1190   if (Op.RoundAt.has_value()) {
1191     Products[*Op.RoundAt / 32].push_back(
1192         HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32)));
1193   }
1194 
1195   auto V6_vaddcarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1196   Value *NoCarry = HVC.getNullValue(HVC.getBoolTy(HVC.length(HvxWordTy)));
1197   auto pop_back_or_zero = [this, HvxWordTy](auto &Vector) -> Value * {
1198     if (Vector.empty())
1199       return HVC.getNullValue(HvxWordTy);
1200     auto Last = Vector.back();
1201     Vector.pop_back();
1202     return Last;
1203   };
1204 
1205   for (int i = 0, e = Products.size(); i != e; ++i) {
1206     while (Products[i].size() > 1) {
1207       Value *Carry = NoCarry;
1208       for (int j = i; j != e; ++j) {
1209         auto &ProdJ = Products[j];
1210         Value *Ret = HVC.createHvxIntrinsic(
1211             Builder, V6_vaddcarry, nullptr,
1212             {pop_back_or_zero(ProdJ), pop_back_or_zero(ProdJ), Carry});
1213         ProdJ.insert(ProdJ.begin(), Builder.CreateExtractValue(Ret, {0}));
1214         Carry = Builder.CreateExtractValue(Ret, {1});
1215       }
1216     }
1217   }
1218 
1219   SmallVector<Value *> WordP;
1220   for (auto &P : Products) {
1221     assert(P.size() == 1 && "Should have been added together");
1222     WordP.push_back(P.front());
1223   }
1224 
1225   // Shift all products right by Op.Frac.
1226   unsigned SkipWords = Op.Frac / 32;
1227   Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32);
1228 
1229   for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
1230     int Src = Dst + SkipWords;
1231     Value *Lo = WordP[Src];
1232     if (Src + 1 < End) {
1233       Value *Hi = WordP[Src + 1];
1234       WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
1235                                            {Hi, Lo, ShiftAmt});
1236     } else {
1237       // The shift of the most significant word.
1238       WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt);
1239     }
1240   }
1241   if (SkipWords != 0)
1242     WordP.resize(WordP.size() - SkipWords);
1243 
1244   DeleteOnFailure.clear();
1245   return HVC.joinVectorElements(Builder, WordP, OrigTy);
1246 }
1247 
1248 auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, Value *X, Value *Y,
1249                              bool Rounding) const -> Value * {
1250   assert(X->getType() == Y->getType());
1251   assert(X->getType()->getScalarType() == HVC.getIntTy(16));
1252   if (!HVC.HST.isHVXVectorType(EVT::getEVT(X->getType(), false)))
1253     return nullptr;
1254 
1255   unsigned HwLen = HVC.HST.getVectorLength();
1256 
1257   if (Rounding) {
1258     auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1259     return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X->getType(), {X, Y});
1260   }
1261   // No rounding, do i16*i16 -> i32, << 1, take upper half.
1262   auto V6_vmpyhv = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1263 
1264   // i16*i16 -> i32 / interleaved
1265   Value *V1 = HVC.createHvxIntrinsic(Builder, V6_vmpyhv, HvxP32Ty, {X, Y});
1266   // <<1
1267   Value *V2 = Builder.CreateAdd(V1, V1);
1268   // i32 -> i32 deinterleave
1269   SmallVector<int, 64> DeintMask;
1270   for (int i = 0; i != static_cast<int>(HwLen) / 4; ++i) {
1271     DeintMask.push_back(i);
1272     DeintMask.push_back(i + HwLen / 4);
1273   }
1274 
1275   Value *V3 =
1276       HVC.vdeal(Builder, HVC.sublo(Builder, V2), HVC.subhi(Builder, V2));
1277   // High halves: i32 -> i16
1278   SmallVector<int, 64> HighMask;
1279   for (int i = 0; i != static_cast<int>(HwLen) / 2; ++i) {
1280     HighMask.push_back(2 * i + 1);
1281   }
1282   auto *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/true);
1283   Value *V4 = Builder.CreateBitCast(V3, HvxP16Ty);
1284   return Builder.CreateShuffleVector(V4, HighMask);
1285 }
1286 
1287 auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, Value *X, Value *Y,
1288                              bool Rounding) const -> Value * {
1289   assert(X->getType() == Y->getType());
1290   assert(X->getType()->getScalarType() == HVC.getIntTy(32));
1291   if (!HVC.HST.isHVXVectorType(EVT::getEVT(X->getType(), false)))
1292     return nullptr;
1293 
1294   auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1295   auto MpyOddAcc = Rounding
1296                        ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1297                        : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1298   Value *V1 =
1299       HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, X->getType(), {X, Y});
1300   return HVC.createHvxIntrinsic(Builder, MpyOddAcc, X->getType(), {V1, X, Y});
1301 }
1302 
1303 auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
1304     -> std::pair<Value *, Value *> {
1305   assert(X.Val->getType() == Y.Val->getType());
1306   assert(X.Val->getType() == HVC.getHvxTy(HVC.getIntTy(32), /*Pair=*/false));
1307 
1308   assert(HVC.HST.useHVXV62Ops());
1309 
1310   auto simplifyOrSame = [this](Value *V) {
1311     if (Value *S = HVC.simplify(V))
1312       return S;
1313     return V;
1314   };
1315   Value *VX = simplifyOrSame(X.Val);
1316   Value *VY = simplifyOrSame(Y.Val);
1317 
1318   if (isa<Constant>(VX) || isa<Constant>(VY)) {
1319     auto getSplatValue = [](Constant *CV) -> ConstantInt * {
1320       if (auto T = dyn_cast<ConstantVector>(CV))
1321         return dyn_cast<ConstantInt>(T->getSplatValue());
1322       if (auto T = dyn_cast<ConstantDataVector>(CV))
1323         return dyn_cast<ConstantInt>(T->getSplatValue());
1324       return nullptr;
1325     };
1326 
1327     if (isa<Constant>(VX) && isa<Constant>(VY)) {
1328       // Both are constants, fold the multiplication.
1329       auto *Ty = cast<VectorType>(VX->getType());
1330       auto *ExtTy = VectorType::getExtendedElementVectorType(Ty);
1331       Value *EX = X.Signed ? Builder.CreateSExt(VX, ExtTy)
1332                            : Builder.CreateZExt(VX, ExtTy);
1333       Value *EY = Y.Signed ? Builder.CreateSExt(VY, ExtTy)
1334                            : Builder.CreateZExt(VY, ExtTy);
1335       Value *EXY = simplifyOrSame(Builder.CreateMul(EX, EY));
1336       auto WordXY = HVC.splitVectorElements(Builder, EXY, /*ToWidth=*/32);
1337       return {simplifyOrSame(WordXY[0]), simplifyOrSame(WordXY[1])};
1338     }
1339     // Make VX = constant.
1340     if (isa<Constant>(VY))
1341       std::swap(VX, VY);
1342 
1343     if (auto *SplatX = getSplatValue(cast<Constant>(VX))) {
1344       APInt S = SplatX->getValue();
1345       if (S == 1) {
1346         if (!X.Signed && !Y.Signed)
1347           return {VY, HVC.getConstSplat(HvxI32Ty, 0)};
1348         return {VY, Builder.CreateAShr(VY, HVC.getConstSplat(HvxI32Ty, 31))};
1349       }
1350     }
1351   }
1352 
1353   auto V6_vmpyewuh_64 = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh_64);
1354   auto V6_vmpyowh_64_acc = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_64_acc);
1355 
1356   Value *Vxx =
1357       HVC.createHvxIntrinsic(Builder, V6_vmpyewuh_64, HvxP32Ty, {X.Val, Y.Val});
1358   Value *Vdd = HVC.createHvxIntrinsic(Builder, V6_vmpyowh_64_acc, HvxP32Ty,
1359                                       {Vxx, X.Val, Y.Val});
1360 
1361   return {HVC.sublo(Builder, Vdd), HVC.subhi(Builder, Vdd)};
1362 }
1363 
1364 auto HvxIdioms::run() -> bool {
1365   bool Changed = false;
1366 
1367   for (BasicBlock &B : HVC.F) {
1368     for (auto It = B.rbegin(); It != B.rend(); ++It) {
1369       if (auto Fxm = matchFxpMul(*It)) {
1370         Value *New = processFxpMul(*It, *Fxm);
1371         if (!New)
1372           continue;
1373         bool StartOver = !isa<Instruction>(New);
1374         It->replaceAllUsesWith(New);
1375         RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
1376         It = StartOver ? B.rbegin()
1377                        : cast<Instruction>(New)->getReverseIterator();
1378         Changed = true;
1379       }
1380     }
1381   }
1382 
1383   return Changed;
1384 }
1385 
1386 // --- End HvxIdioms
1387 
1388 auto HexagonVectorCombine::run() -> bool {
1389   if (!HST.useHVXOps())
1390     return false;
1391 
1392   bool Changed = false;
1393   Changed |= AlignVectors(*this).run();
1394   Changed |= HvxIdioms(*this).run();
1395 
1396   return Changed;
1397 }
1398 
1399 auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {
1400   return IntegerType::get(F.getContext(), Width);
1401 }
1402 
1403 auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
1404   assert(ElemCount >= 0);
1405   IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
1406   if (ElemCount == 0)
1407     return ByteTy;
1408   return VectorType::get(ByteTy, ElemCount, /*Scalable=*/false);
1409 }
1410 
1411 auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
1412   assert(ElemCount >= 0);
1413   IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
1414   if (ElemCount == 0)
1415     return BoolTy;
1416   return VectorType::get(BoolTy, ElemCount, /*Scalable=*/false);
1417 }
1418 
1419 auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * {
1420   return ConstantInt::getSigned(getIntTy(), Val);
1421 }
1422 
1423 auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
1424   if (auto *C = dyn_cast<Constant>(Val))
1425     return C->isZeroValue();
1426   return false;
1427 }
1428 
1429 auto HexagonVectorCombine::getIntValue(const Value *Val) const
1430     -> std::optional<APInt> {
1431   if (auto *CI = dyn_cast<ConstantInt>(Val))
1432     return CI->getValue();
1433   return std::nullopt;
1434 }
1435 
1436 auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
1437   return isa<UndefValue>(Val);
1438 }
1439 
1440 auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
1441     -> VectorType * {
1442   EVT ETy = EVT::getEVT(ElemTy, false);
1443   assert(ETy.isSimple() && "Invalid HVX element type");
1444   // Do not allow boolean types here: they don't have a fixed length.
1445   assert(HST.isHVXElementType(ETy.getSimpleVT(), /*IncludeBool=*/false) &&
1446          "Invalid HVX element type");
1447   unsigned HwLen = HST.getVectorLength();
1448   unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();
1449   return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
1450                          /*Scalable=*/false);
1451 }
1452 
1453 auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const
1454     -> int {
1455   return getSizeOf(Val->getType(), Kind);
1456 }
1457 
1458 auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const
1459     -> int {
1460   auto *NcTy = const_cast<Type *>(Ty);
1461   switch (Kind) {
1462   case Store:
1463     return DL.getTypeStoreSize(NcTy).getFixedValue();
1464   case Alloc:
1465     return DL.getTypeAllocSize(NcTy).getFixedValue();
1466   }
1467   llvm_unreachable("Unhandled SizeKind enum");
1468 }
1469 
1470 auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
1471   // The actual type may be shorter than the HVX vector, so determine
1472   // the alignment based on subtarget info.
1473   if (HST.isTypeForHVX(Ty))
1474     return HST.getVectorLength();
1475   return DL.getABITypeAlign(Ty).value();
1476 }
1477 
1478 auto HexagonVectorCombine::length(Value *Val) const -> size_t {
1479   return length(Val->getType());
1480 }
1481 
1482 auto HexagonVectorCombine::length(Type *Ty) const -> size_t {
1483   auto *VecTy = dyn_cast<VectorType>(Ty);
1484   assert(VecTy && "Must be a vector type");
1485   return VecTy->getElementCount().getFixedValue();
1486 }
1487 
1488 auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
1489   assert(Ty->isIntOrIntVectorTy());
1490   auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
1491   if (auto *VecTy = dyn_cast<VectorType>(Ty))
1492     return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
1493   return Zero;
1494 }
1495 
1496 auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
1497   assert(Ty->isIntOrIntVectorTy());
1498   auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
1499   if (auto *VecTy = dyn_cast<VectorType>(Ty))
1500     return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
1501   return Minus1;
1502 }
1503 
1504 auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const
1505     -> Constant * {
1506   assert(Ty->isVectorTy());
1507   auto VecTy = cast<VectorType>(Ty);
1508   Type *ElemTy = VecTy->getElementType();
1509   // Add support for floats if needed.
1510   auto *Splat = ConstantVector::getSplat(VecTy->getElementCount(),
1511                                          ConstantInt::get(ElemTy, Val));
1512   return Splat;
1513 }
1514 
1515 auto HexagonVectorCombine::simplify(Value *V) const -> Value * {
1516   if (auto *In = dyn_cast<Instruction>(V)) {
1517     SimplifyQuery Q(DL, &TLI, &DT, &AC, In);
1518     return simplifyInstruction(In, Q);
1519   }
1520   return nullptr;
1521 }
1522 
1523 // Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
1524 auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,
1525                                    Value *Src, int Start, int Length,
1526                                    int Where) const -> Value * {
1527   assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
1528   int SrcLen = getSizeOf(Src);
1529   int DstLen = getSizeOf(Dst);
1530   assert(0 <= Start && Start + Length <= SrcLen);
1531   assert(0 <= Where && Where + Length <= DstLen);
1532 
1533   int P2Len = PowerOf2Ceil(SrcLen | DstLen);
1534   auto *Undef = UndefValue::get(getByteTy());
1535   Value *P2Src = vresize(Builder, Src, P2Len, Undef);
1536   Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
1537 
1538   SmallVector<int, 256> SMask(P2Len);
1539   for (int i = 0; i != P2Len; ++i) {
1540     // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
1541     // Otherwise, pick Dst[i];
1542     SMask[i] =
1543         (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
1544   }
1545 
1546   Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
1547   return vresize(Builder, P2Insert, DstLen, Undef);
1548 }
1549 
1550 auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,
1551                                     Value *Hi, Value *Amt) const -> Value * {
1552   assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1553   if (isZero(Amt))
1554     return Hi;
1555   int VecLen = getSizeOf(Hi);
1556   if (auto IntAmt = getIntValue(Amt))
1557     return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
1558                            VecLen);
1559 
1560   if (HST.isTypeForHVX(Hi->getType())) {
1561     assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
1562            "Expecting an exact HVX type");
1563     return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
1564                               Hi->getType(), {Hi, Lo, Amt});
1565   }
1566 
1567   if (VecLen == 4) {
1568     Value *Pair = concat(Builder, {Lo, Hi});
1569     Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
1570     Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1571     return Builder.CreateBitCast(Trunc, Hi->getType());
1572   }
1573   if (VecLen == 8) {
1574     Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
1575     return vralignb(Builder, Lo, Hi, Sub);
1576   }
1577   llvm_unreachable("Unexpected vector length");
1578 }
1579 
1580 auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,
1581                                     Value *Hi, Value *Amt) const -> Value * {
1582   assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1583   if (isZero(Amt))
1584     return Lo;
1585   int VecLen = getSizeOf(Lo);
1586   if (auto IntAmt = getIntValue(Amt))
1587     return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
1588 
1589   if (HST.isTypeForHVX(Lo->getType())) {
1590     assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
1591            "Expecting an exact HVX type");
1592     return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
1593                               Lo->getType(), {Hi, Lo, Amt});
1594   }
1595 
1596   if (VecLen == 4) {
1597     Value *Pair = concat(Builder, {Lo, Hi});
1598     Value *Shift = Builder.CreateLShr(Pair, Amt);
1599     Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1600     return Builder.CreateBitCast(Trunc, Lo->getType());
1601   }
1602   if (VecLen == 8) {
1603     Type *Int64Ty = Type::getInt64Ty(F.getContext());
1604     Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
1605     Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
1606     Function *FI = Intrinsic::getDeclaration(F.getParent(),
1607                                              Intrinsic::hexagon_S2_valignrb);
1608     Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
1609     return Builder.CreateBitCast(Call, Lo->getType());
1610   }
1611   llvm_unreachable("Unexpected vector length");
1612 }
1613 
1614 // Concatenates a sequence of vectors of the same type.
1615 auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
1616                                   ArrayRef<Value *> Vecs) const -> Value * {
1617   assert(!Vecs.empty());
1618   SmallVector<int, 256> SMask;
1619   std::vector<Value *> Work[2];
1620   int ThisW = 0, OtherW = 1;
1621 
1622   Work[ThisW].assign(Vecs.begin(), Vecs.end());
1623   while (Work[ThisW].size() > 1) {
1624     auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
1625     SMask.resize(length(Ty) * 2);
1626     std::iota(SMask.begin(), SMask.end(), 0);
1627 
1628     Work[OtherW].clear();
1629     if (Work[ThisW].size() % 2 != 0)
1630       Work[ThisW].push_back(UndefValue::get(Ty));
1631     for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
1632       Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
1633                                                   Work[ThisW][i + 1], SMask);
1634       Work[OtherW].push_back(Joined);
1635     }
1636     std::swap(ThisW, OtherW);
1637   }
1638 
1639   // Since there may have been some undefs appended to make shuffle operands
1640   // have the same type, perform the last shuffle to only pick the original
1641   // elements.
1642   SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
1643   std::iota(SMask.begin(), SMask.end(), 0);
1644   Value *Total = Work[OtherW].front();
1645   return Builder.CreateShuffleVector(Total, SMask);
1646 }
1647 
1648 auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
1649                                    int NewSize, Value *Pad) const -> Value * {
1650   assert(isa<VectorType>(Val->getType()));
1651   auto *ValTy = cast<VectorType>(Val->getType());
1652   assert(ValTy->getElementType() == Pad->getType());
1653 
1654   int CurSize = length(ValTy);
1655   if (CurSize == NewSize)
1656     return Val;
1657   // Truncate?
1658   if (CurSize > NewSize)
1659     return getElementRange(Builder, Val, /*Ignored*/ Val, 0, NewSize);
1660   // Extend.
1661   SmallVector<int, 128> SMask(NewSize);
1662   std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
1663   std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
1664   Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
1665   return Builder.CreateShuffleVector(Val, PadVec, SMask);
1666 }
1667 
1668 auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
1669                                    Type *FromTy, Type *ToTy) const -> Value * {
1670   // Mask is a vector <N x i1>, where each element corresponds to an
1671   // element of FromTy. Remap it so that each element will correspond
1672   // to an element of ToTy.
1673   assert(isa<VectorType>(Mask->getType()));
1674 
1675   Type *FromSTy = FromTy->getScalarType();
1676   Type *ToSTy = ToTy->getScalarType();
1677   if (FromSTy == ToSTy)
1678     return Mask;
1679 
1680   int FromSize = getSizeOf(FromSTy);
1681   int ToSize = getSizeOf(ToSTy);
1682   assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
1683 
1684   auto *MaskTy = cast<VectorType>(Mask->getType());
1685   int FromCount = length(MaskTy);
1686   int ToCount = (FromCount * FromSize) / ToSize;
1687   assert((FromCount * FromSize) % ToSize == 0);
1688 
1689   auto *FromITy = getIntTy(FromSize * 8);
1690   auto *ToITy = getIntTy(ToSize * 8);
1691 
1692   // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
1693   // -> trunc to <M x i1>.
1694   Value *Ext = Builder.CreateSExt(
1695       Mask, VectorType::get(FromITy, FromCount, /*Scalable=*/false));
1696   Value *Cast = Builder.CreateBitCast(
1697       Ext, VectorType::get(ToITy, ToCount, /*Scalable=*/false));
1698   return Builder.CreateTrunc(
1699       Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable=*/false));
1700 }
1701 
1702 // Bitcast to bytes, and return least significant bits.
1703 auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const
1704     -> Value * {
1705   Type *ScalarTy = Val->getType()->getScalarType();
1706   if (ScalarTy == getBoolTy())
1707     return Val;
1708 
1709   Value *Bytes = vbytes(Builder, Val);
1710   if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
1711     return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
1712   // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
1713   // <1 x i1>.
1714   return Builder.CreateTrunc(Bytes, getBoolTy());
1715 }
1716 
1717 // Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
1718 auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const
1719     -> Value * {
1720   Type *ScalarTy = Val->getType()->getScalarType();
1721   if (ScalarTy == getByteTy())
1722     return Val;
1723 
1724   if (ScalarTy != getBoolTy())
1725     return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
1726   // For bool, return a sext from i1 to i8.
1727   if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
1728     return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
1729   return Builder.CreateSExt(Val, getByteTy());
1730 }
1731 
1732 auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,
1733                                      unsigned Start, unsigned Length) const
1734     -> Value * {
1735   assert(Start + Length <= length(Val));
1736   return getElementRange(Builder, Val, /*Ignored*/ Val, Start, Length);
1737 }
1738 
1739 auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const
1740     -> Value * {
1741   size_t Len = length(Val);
1742   assert(Len % 2 == 0 && "Length should be even");
1743   return subvector(Builder, Val, 0, Len / 2);
1744 }
1745 
1746 auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const
1747     -> Value * {
1748   size_t Len = length(Val);
1749   assert(Len % 2 == 0 && "Length should be even");
1750   return subvector(Builder, Val, Len / 2, Len / 2);
1751 }
1752 
1753 auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,
1754                                  Value *Val1) const -> Value * {
1755   assert(Val0->getType() == Val1->getType());
1756   int Len = length(Val0);
1757   SmallVector<int, 128> Mask(2 * Len);
1758 
1759   for (int i = 0; i != Len; ++i) {
1760     Mask[i] = 2 * i;           // Even
1761     Mask[i + Len] = 2 * i + 1; // Odd
1762   }
1763   return Builder.CreateShuffleVector(Val0, Val1, Mask);
1764 }
1765 
1766 auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
1767                                   Value *Val1) const -> Value * { //
1768   assert(Val0->getType() == Val1->getType());
1769   int Len = length(Val0);
1770   SmallVector<int, 128> Mask(2 * Len);
1771 
1772   for (int i = 0; i != Len; ++i) {
1773     Mask[2 * i + 0] = i;       // Val0
1774     Mask[2 * i + 1] = i + Len; // Val1
1775   }
1776   return Builder.CreateShuffleVector(Val0, Val1, Mask);
1777 }
1778 
1779 auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
1780                                               Intrinsic::ID IntID, Type *RetTy,
1781                                               ArrayRef<Value *> Args) const
1782     -> Value * {
1783   auto getCast = [&](IRBuilderBase &Builder, Value *Val,
1784                      Type *DestTy) -> Value * {
1785     Type *SrcTy = Val->getType();
1786     if (SrcTy == DestTy)
1787       return Val;
1788 
1789     // Non-HVX type. It should be a scalar, and it should already have
1790     // a valid type.
1791     assert(HST.isTypeForHVX(SrcTy, /*IncludeBool=*/true));
1792 
1793     Type *BoolTy = Type::getInt1Ty(F.getContext());
1794     if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
1795       return Builder.CreateBitCast(Val, DestTy);
1796 
1797     // Predicate HVX vector.
1798     unsigned HwLen = HST.getVectorLength();
1799     Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
1800                                    : Intrinsic::hexagon_V6_pred_typecast_128B;
1801     Function *FI =
1802         Intrinsic::getDeclaration(F.getParent(), TC, {DestTy, Val->getType()});
1803     return Builder.CreateCall(FI, {Val});
1804   };
1805 
1806   Function *IntrFn = Intrinsic::getDeclaration(F.getParent(), IntID);
1807   FunctionType *IntrTy = IntrFn->getFunctionType();
1808 
1809   SmallVector<Value *, 4> IntrArgs;
1810   for (int i = 0, e = Args.size(); i != e; ++i) {
1811     Value *A = Args[i];
1812     Type *T = IntrTy->getParamType(i);
1813     if (A->getType() != T) {
1814       IntrArgs.push_back(getCast(Builder, A, T));
1815     } else {
1816       IntrArgs.push_back(A);
1817     }
1818   }
1819   Value *Call = Builder.CreateCall(IntrFn, IntrArgs);
1820 
1821   Type *CallTy = Call->getType();
1822   if (RetTy == nullptr || CallTy == RetTy)
1823     return Call;
1824   // Scalar types should have RetTy matching the call return type.
1825   assert(HST.isTypeForHVX(CallTy, /*IncludeBool=*/true));
1826   return getCast(Builder, Call, RetTy);
1827 }
1828 
1829 auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
1830                                                Value *Vec,
1831                                                unsigned ToWidth) const
1832     -> SmallVector<Value *> {
1833   // Break a vector of wide elements into a series of vectors with narrow
1834   // elements:
1835   //   (...c0:b0:a0, ...c1:b1:a1, ...c2:b2:a2, ...)
1836   // -->
1837   //   (a0, a1, a2, ...)    // lowest "ToWidth" bits
1838   //   (b0, b1, b2, ...)    // the next lowest...
1839   //   (c0, c1, c2, ...)    // ...
1840   //   ...
1841   //
1842   // The number of elements in each resulting vector is the same as
1843   // in the original vector.
1844 
1845   auto *VecTy = cast<VectorType>(Vec->getType());
1846   assert(VecTy->getElementType()->isIntegerTy());
1847   unsigned FromWidth = VecTy->getScalarSizeInBits();
1848   assert(isPowerOf2_32(ToWidth) && isPowerOf2_32(FromWidth));
1849 
1850   assert(ToWidth <= FromWidth && "Breaking up into wider elements?");
1851   unsigned NumResults = FromWidth / ToWidth;
1852 
1853   SmallVector<Value *> Results(NumResults);
1854   Results[0] = Vec;
1855   unsigned Length = length(VecTy);
1856 
1857   // Do it by splitting in half, since those operations correspond to deal
1858   // instructions.
1859   auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {
1860     // Take V = Results[Begin], split it in L, H.
1861     // Store Results[Begin] = L, Results[(Begin+End)/2] = H
1862     // Call itself recursively split(Begin, Half), split(Half+1, End)
1863     if (Begin + 1 == End)
1864       return;
1865 
1866     Value *Val = Results[Begin];
1867     unsigned Width = Val->getType()->getScalarSizeInBits();
1868 
1869     auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);
1870     Value *VVal = Builder.CreateBitCast(Val, VTy);
1871 
1872     Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
1873 
1874     unsigned Half = (Begin + End) / 2;
1875     Results[Begin] = sublo(Builder, Res);
1876     Results[Half] = subhi(Builder, Res);
1877 
1878     splitFunc(Begin, Half, splitFunc);
1879     splitFunc(Half, End, splitFunc);
1880   };
1881 
1882   splitInHalf(0, NumResults, splitInHalf);
1883   return Results;
1884 }
1885 
1886 auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
1887                                               ArrayRef<Value *> Values,
1888                                               VectorType *ToType) const
1889     -> Value * {
1890   assert(ToType->getElementType()->isIntegerTy());
1891 
1892   // If the list of values does not have power-of-2 elements, append copies
1893   // of the sign bit to it, to make the size be 2^n.
1894   // The reason for this is that the values will be joined in pairs, because
1895   // otherwise the shuffles will result in convoluted code. With pairwise
1896   // joins, the shuffles will hopefully be folded into a perfect shuffle.
1897   // The output will need to be sign-extended to a type with element width
1898   // being a power-of-2 anyways.
1899   SmallVector<Value *> Inputs(Values.begin(), Values.end());
1900 
1901   unsigned ToWidth = ToType->getScalarSizeInBits();
1902   unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
1903   assert(Width <= ToWidth);
1904   assert(isPowerOf2_32(Width) && isPowerOf2_32(ToWidth));
1905   unsigned Length = length(Inputs.front()->getType());
1906 
1907   unsigned NeedInputs = ToWidth / Width;
1908   if (Inputs.size() != NeedInputs) {
1909     Value *Last = Inputs.back();
1910     Value *Sign =
1911         Builder.CreateAShr(Last, getConstSplat(Last->getType(), Width - 1));
1912     Inputs.resize(NeedInputs, Sign);
1913   }
1914 
1915   while (Inputs.size() > 1) {
1916     Width *= 2;
1917     auto *VTy = VectorType::get(getIntTy(Width), Length, false);
1918     for (int i = 0, e = Inputs.size(); i < e; i += 2) {
1919       Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);
1920       Inputs[i / 2] = Builder.CreateBitCast(Res, VTy);
1921     }
1922     Inputs.resize(Inputs.size() / 2);
1923   }
1924 
1925   assert(Inputs.front()->getType() == ToType);
1926   return Inputs.front();
1927 }
1928 
1929 auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
1930                                                       Value *Ptr1) const
1931     -> std::optional<int> {
1932   struct Builder : IRBuilder<> {
1933     Builder(BasicBlock *B) : IRBuilder<>(B->getTerminator()) {}
1934     ~Builder() {
1935       for (Instruction *I : llvm::reverse(ToErase))
1936         I->eraseFromParent();
1937     }
1938     SmallVector<Instruction *, 8> ToErase;
1939   };
1940 
1941 #define CallBuilder(B, F)                                                      \
1942   [&](auto &B_) {                                                              \
1943     Value *V = B_.F;                                                           \
1944     if (auto *I = dyn_cast<Instruction>(V))                                    \
1945       B_.ToErase.push_back(I);                                                 \
1946     return V;                                                                  \
1947   }(B)
1948 
1949   auto Simplify = [&](Value *V) {
1950     if (auto *I = dyn_cast<Instruction>(V)) {
1951       SimplifyQuery Q(DL, &TLI, &DT, &AC, I);
1952       if (Value *S = simplifyInstruction(I, Q))
1953         return S;
1954     }
1955     return V;
1956   };
1957 
1958   auto StripBitCast = [](Value *V) {
1959     while (auto *C = dyn_cast<BitCastInst>(V))
1960       V = C->getOperand(0);
1961     return V;
1962   };
1963 
1964   Ptr0 = StripBitCast(Ptr0);
1965   Ptr1 = StripBitCast(Ptr1);
1966   if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
1967     return std::nullopt;
1968 
1969   auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
1970   auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
1971   if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
1972     return std::nullopt;
1973 
1974   Builder B(Gep0->getParent());
1975   int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
1976 
1977   // FIXME: for now only check GEPs with a single index.
1978   if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
1979     return std::nullopt;
1980 
1981   Value *Idx0 = Gep0->getOperand(1);
1982   Value *Idx1 = Gep1->getOperand(1);
1983 
1984   // First, try to simplify the subtraction directly.
1985   if (auto *Diff = dyn_cast<ConstantInt>(
1986           Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
1987     return Diff->getSExtValue() * Scale;
1988 
1989   KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
1990   KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
1991   APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
1992   if (Unknown.isAllOnes())
1993     return std::nullopt;
1994 
1995   Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
1996   Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
1997   Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
1998   Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
1999   int Diff0 = 0;
2000   if (auto *C = dyn_cast<ConstantInt>(SubU)) {
2001     Diff0 = C->getSExtValue();
2002   } else {
2003     return std::nullopt;
2004   }
2005 
2006   Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
2007   Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
2008   Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
2009   Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
2010   int Diff1 = 0;
2011   if (auto *C = dyn_cast<ConstantInt>(SubK)) {
2012     Diff1 = C->getSExtValue();
2013   } else {
2014     return std::nullopt;
2015   }
2016 
2017   return (Diff0 + Diff1) * Scale;
2018 
2019 #undef CallBuilder
2020 }
2021 
2022 auto HexagonVectorCombine::getNumSignificantBits(const Value *V,
2023                                                  const Instruction *CtxI) const
2024     -> unsigned {
2025   return ComputeMaxSignificantBits(V, DL, /*Depth=*/0, &AC, CtxI, &DT);
2026 }
2027 
2028 auto HexagonVectorCombine::getKnownBits(const Value *V,
2029                                         const Instruction *CtxI) const
2030     -> KnownBits {
2031   return computeKnownBits(V, DL, /*Depth=*/0, &AC, CtxI, &DT, /*ORE=*/nullptr,
2032                           /*UseInstrInfo=*/true);
2033 }
2034 
2035 template <typename T>
2036 auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
2037                                                   BasicBlock::const_iterator To,
2038                                                   const T &Ignore) const
2039     -> bool {
2040   auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> {
2041     if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
2042       switch (II->getIntrinsicID()) {
2043       case Intrinsic::masked_load:
2044         return MemoryLocation::getForArgument(II, 0, TLI);
2045       case Intrinsic::masked_store:
2046         return MemoryLocation::getForArgument(II, 1, TLI);
2047       }
2048     }
2049     return MemoryLocation::getOrNone(&I);
2050   };
2051 
2052   // The source and the destination must be in the same basic block.
2053   const BasicBlock &Block = *In.getParent();
2054   assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
2055   // No PHIs.
2056   if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
2057     return false;
2058 
2059   if (!mayHaveNonDefUseDependency(In))
2060     return true;
2061   bool MayWrite = In.mayWriteToMemory();
2062   auto MaybeLoc = getLocOrNone(In);
2063 
2064   auto From = In.getIterator();
2065   if (From == To)
2066     return true;
2067   bool MoveUp = (To != Block.end() && To->comesBefore(&In));
2068   auto Range =
2069       MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
2070   for (auto It = Range.first; It != Range.second; ++It) {
2071     const Instruction &I = *It;
2072     if (llvm::is_contained(Ignore, &I))
2073       continue;
2074     // assume intrinsic can be ignored
2075     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
2076       if (II->getIntrinsicID() == Intrinsic::assume)
2077         continue;
2078     }
2079     // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
2080     if (I.mayThrow())
2081       return false;
2082     if (auto *CB = dyn_cast<CallBase>(&I)) {
2083       if (!CB->hasFnAttr(Attribute::WillReturn))
2084         return false;
2085       if (!CB->hasFnAttr(Attribute::NoSync))
2086         return false;
2087     }
2088     if (I.mayReadOrWriteMemory()) {
2089       auto MaybeLocI = getLocOrNone(I);
2090       if (MayWrite || I.mayWriteToMemory()) {
2091         if (!MaybeLoc || !MaybeLocI)
2092           return false;
2093         if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2094           return false;
2095       }
2096     }
2097   }
2098   return true;
2099 }
2100 
2101 auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
2102   if (auto *VecTy = dyn_cast<VectorType>(Ty))
2103     return VecTy->getElementType() == getByteTy();
2104   return false;
2105 }
2106 
2107 auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,
2108                                            Value *Hi, int Start,
2109                                            int Length) const -> Value * {
2110   assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));
2111   SmallVector<int, 128> SMask(Length);
2112   std::iota(SMask.begin(), SMask.end(), Start);
2113   return Builder.CreateShuffleVector(Lo, Hi, SMask);
2114 }
2115 
2116 // Pass management.
2117 
2118 namespace llvm {
2119 void initializeHexagonVectorCombineLegacyPass(PassRegistry &);
2120 FunctionPass *createHexagonVectorCombineLegacyPass();
2121 } // namespace llvm
2122 
2123 namespace {
2124 class HexagonVectorCombineLegacy : public FunctionPass {
2125 public:
2126   static char ID;
2127 
2128   HexagonVectorCombineLegacy() : FunctionPass(ID) {}
2129 
2130   StringRef getPassName() const override { return "Hexagon Vector Combine"; }
2131 
2132   void getAnalysisUsage(AnalysisUsage &AU) const override {
2133     AU.setPreservesCFG();
2134     AU.addRequired<AAResultsWrapperPass>();
2135     AU.addRequired<AssumptionCacheTracker>();
2136     AU.addRequired<DominatorTreeWrapperPass>();
2137     AU.addRequired<TargetLibraryInfoWrapperPass>();
2138     AU.addRequired<TargetPassConfig>();
2139     FunctionPass::getAnalysisUsage(AU);
2140   }
2141 
2142   bool runOnFunction(Function &F) override {
2143     if (skipFunction(F))
2144       return false;
2145     AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2146     AssumptionCache &AC =
2147         getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2148     DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2149     TargetLibraryInfo &TLI =
2150         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2151     auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
2152     HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
2153     return HVC.run();
2154   }
2155 };
2156 } // namespace
2157 
2158 char HexagonVectorCombineLegacy::ID = 0;
2159 
2160 INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
2161                       "Hexagon Vector Combine", false, false)
2162 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2163 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2164 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2165 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2166 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
2167 INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
2168                     "Hexagon Vector Combine", false, false)
2169 
2170 FunctionPass *llvm::createHexagonVectorCombineLegacyPass() {
2171   return new HexagonVectorCombineLegacy();
2172 }
2173