1*e8d8bef9SDimitry Andric //===-- HexagonVectorCombine.cpp ------------------------------------------===// 2*e8d8bef9SDimitry Andric // 3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e8d8bef9SDimitry Andric // 7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8*e8d8bef9SDimitry Andric // HexagonVectorCombine is a utility class implementing a variety of functions 9*e8d8bef9SDimitry Andric // that assist in vector-based optimizations. 10*e8d8bef9SDimitry Andric // 11*e8d8bef9SDimitry Andric // AlignVectors: replace unaligned vector loads and stores with aligned ones. 12*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 13*e8d8bef9SDimitry Andric 14*e8d8bef9SDimitry Andric #include "llvm/ADT/APInt.h" 15*e8d8bef9SDimitry Andric #include "llvm/ADT/ArrayRef.h" 16*e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h" 17*e8d8bef9SDimitry Andric #include "llvm/ADT/Optional.h" 18*e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h" 19*e8d8bef9SDimitry Andric #include "llvm/ADT/SmallVector.h" 20*e8d8bef9SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 21*e8d8bef9SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 22*e8d8bef9SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 23*e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 24*e8d8bef9SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 25*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 26*e8d8bef9SDimitry Andric #include "llvm/IR/Dominators.h" 27*e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h" 28*e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 29*e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h" 30*e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsHexagon.h" 31*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 32*e8d8bef9SDimitry Andric #include "llvm/Pass.h" 33*e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h" 34*e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h" 35*e8d8bef9SDimitry Andric #include "llvm/Support/raw_ostream.h" 36*e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 37*e8d8bef9SDimitry Andric 38*e8d8bef9SDimitry Andric #include "HexagonSubtarget.h" 39*e8d8bef9SDimitry Andric #include "HexagonTargetMachine.h" 40*e8d8bef9SDimitry Andric 41*e8d8bef9SDimitry Andric #include <algorithm> 42*e8d8bef9SDimitry Andric #include <deque> 43*e8d8bef9SDimitry Andric #include <map> 44*e8d8bef9SDimitry Andric #include <set> 45*e8d8bef9SDimitry Andric #include <utility> 46*e8d8bef9SDimitry Andric #include <vector> 47*e8d8bef9SDimitry Andric 48*e8d8bef9SDimitry Andric #define DEBUG_TYPE "hexagon-vc" 49*e8d8bef9SDimitry Andric 50*e8d8bef9SDimitry Andric using namespace llvm; 51*e8d8bef9SDimitry Andric 52*e8d8bef9SDimitry Andric namespace { 53*e8d8bef9SDimitry Andric class HexagonVectorCombine { 54*e8d8bef9SDimitry Andric public: 55*e8d8bef9SDimitry Andric HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_, 56*e8d8bef9SDimitry Andric DominatorTree &DT_, TargetLibraryInfo &TLI_, 57*e8d8bef9SDimitry Andric const TargetMachine &TM_) 58*e8d8bef9SDimitry Andric : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_), 59*e8d8bef9SDimitry Andric TLI(TLI_), 60*e8d8bef9SDimitry Andric HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {} 61*e8d8bef9SDimitry Andric 62*e8d8bef9SDimitry Andric bool run(); 63*e8d8bef9SDimitry Andric 64*e8d8bef9SDimitry Andric // Common integer type. 65*e8d8bef9SDimitry Andric IntegerType *getIntTy() const; 66*e8d8bef9SDimitry Andric // Byte type: either scalar (when Length = 0), or vector with given 67*e8d8bef9SDimitry Andric // element count. 68*e8d8bef9SDimitry Andric Type *getByteTy(int ElemCount = 0) const; 69*e8d8bef9SDimitry Andric // Boolean type: either scalar (when Length = 0), or vector with given 70*e8d8bef9SDimitry Andric // element count. 71*e8d8bef9SDimitry Andric Type *getBoolTy(int ElemCount = 0) const; 72*e8d8bef9SDimitry Andric // Create a ConstantInt of type returned by getIntTy with the value Val. 73*e8d8bef9SDimitry Andric ConstantInt *getConstInt(int Val) const; 74*e8d8bef9SDimitry Andric // Get the integer value of V, if it exists. 75*e8d8bef9SDimitry Andric Optional<APInt> getIntValue(const Value *Val) const; 76*e8d8bef9SDimitry Andric // Is V a constant 0, or a vector of 0s? 77*e8d8bef9SDimitry Andric bool isZero(const Value *Val) const; 78*e8d8bef9SDimitry Andric // Is V an undef value? 79*e8d8bef9SDimitry Andric bool isUndef(const Value *Val) const; 80*e8d8bef9SDimitry Andric 81*e8d8bef9SDimitry Andric int getSizeOf(const Value *Val) const; 82*e8d8bef9SDimitry Andric int getSizeOf(const Type *Ty) const; 83*e8d8bef9SDimitry Andric int getTypeAlignment(Type *Ty) const; 84*e8d8bef9SDimitry Andric 85*e8d8bef9SDimitry Andric VectorType *getByteVectorTy(int ScLen) const; 86*e8d8bef9SDimitry Andric Constant *getNullValue(Type *Ty) const; 87*e8d8bef9SDimitry Andric Constant *getFullValue(Type *Ty) const; 88*e8d8bef9SDimitry Andric 89*e8d8bef9SDimitry Andric Value *insertb(IRBuilder<> &Builder, Value *Dest, Value *Src, int Start, 90*e8d8bef9SDimitry Andric int Length, int Where) const; 91*e8d8bef9SDimitry Andric Value *vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const; 92*e8d8bef9SDimitry Andric Value *vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const; 93*e8d8bef9SDimitry Andric Value *concat(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) const; 94*e8d8bef9SDimitry Andric Value *vresize(IRBuilder<> &Builder, Value *Val, int NewSize, 95*e8d8bef9SDimitry Andric Value *Pad) const; 96*e8d8bef9SDimitry Andric Value *rescale(IRBuilder<> &Builder, Value *Mask, Type *FromTy, 97*e8d8bef9SDimitry Andric Type *ToTy) const; 98*e8d8bef9SDimitry Andric Value *vlsb(IRBuilder<> &Builder, Value *Val) const; 99*e8d8bef9SDimitry Andric Value *vbytes(IRBuilder<> &Builder, Value *Val) const; 100*e8d8bef9SDimitry Andric 101*e8d8bef9SDimitry Andric Value *createHvxIntrinsic(IRBuilder<> &Builder, Intrinsic::ID IntID, 102*e8d8bef9SDimitry Andric Type *RetTy, ArrayRef<Value *> Args) const; 103*e8d8bef9SDimitry Andric 104*e8d8bef9SDimitry Andric Optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const; 105*e8d8bef9SDimitry Andric 106*e8d8bef9SDimitry Andric template <typename T = std::vector<Instruction *>> 107*e8d8bef9SDimitry Andric bool isSafeToMoveBeforeInBB(const Instruction &In, 108*e8d8bef9SDimitry Andric BasicBlock::const_iterator To, 109*e8d8bef9SDimitry Andric const T &Ignore = {}) const; 110*e8d8bef9SDimitry Andric 111*e8d8bef9SDimitry Andric Function &F; 112*e8d8bef9SDimitry Andric const DataLayout &DL; 113*e8d8bef9SDimitry Andric AliasAnalysis &AA; 114*e8d8bef9SDimitry Andric AssumptionCache &AC; 115*e8d8bef9SDimitry Andric DominatorTree &DT; 116*e8d8bef9SDimitry Andric TargetLibraryInfo &TLI; 117*e8d8bef9SDimitry Andric const HexagonSubtarget &HST; 118*e8d8bef9SDimitry Andric 119*e8d8bef9SDimitry Andric private: 120*e8d8bef9SDimitry Andric #ifndef NDEBUG 121*e8d8bef9SDimitry Andric // These two functions are only used for assertions at the moment. 122*e8d8bef9SDimitry Andric bool isByteVecTy(Type *Ty) const; 123*e8d8bef9SDimitry Andric bool isSectorTy(Type *Ty) const; 124*e8d8bef9SDimitry Andric #endif 125*e8d8bef9SDimitry Andric Value *getElementRange(IRBuilder<> &Builder, Value *Lo, Value *Hi, int Start, 126*e8d8bef9SDimitry Andric int Length) const; 127*e8d8bef9SDimitry Andric }; 128*e8d8bef9SDimitry Andric 129*e8d8bef9SDimitry Andric class AlignVectors { 130*e8d8bef9SDimitry Andric public: 131*e8d8bef9SDimitry Andric AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {} 132*e8d8bef9SDimitry Andric 133*e8d8bef9SDimitry Andric bool run(); 134*e8d8bef9SDimitry Andric 135*e8d8bef9SDimitry Andric private: 136*e8d8bef9SDimitry Andric using InstList = std::vector<Instruction *>; 137*e8d8bef9SDimitry Andric 138*e8d8bef9SDimitry Andric struct Segment { 139*e8d8bef9SDimitry Andric void *Data; 140*e8d8bef9SDimitry Andric int Start; 141*e8d8bef9SDimitry Andric int Size; 142*e8d8bef9SDimitry Andric }; 143*e8d8bef9SDimitry Andric 144*e8d8bef9SDimitry Andric struct AddrInfo { 145*e8d8bef9SDimitry Andric AddrInfo(const AddrInfo &) = default; 146*e8d8bef9SDimitry Andric AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T, 147*e8d8bef9SDimitry Andric Align H) 148*e8d8bef9SDimitry Andric : Inst(I), Addr(A), ValTy(T), HaveAlign(H), 149*e8d8bef9SDimitry Andric NeedAlign(HVC.getTypeAlignment(ValTy)) {} 150*e8d8bef9SDimitry Andric 151*e8d8bef9SDimitry Andric // XXX: add Size member? 152*e8d8bef9SDimitry Andric Instruction *Inst; 153*e8d8bef9SDimitry Andric Value *Addr; 154*e8d8bef9SDimitry Andric Type *ValTy; 155*e8d8bef9SDimitry Andric Align HaveAlign; 156*e8d8bef9SDimitry Andric Align NeedAlign; 157*e8d8bef9SDimitry Andric int Offset = 0; // Offset (in bytes) from the first member of the 158*e8d8bef9SDimitry Andric // containing AddrList. 159*e8d8bef9SDimitry Andric }; 160*e8d8bef9SDimitry Andric using AddrList = std::vector<AddrInfo>; 161*e8d8bef9SDimitry Andric 162*e8d8bef9SDimitry Andric struct InstrLess { 163*e8d8bef9SDimitry Andric bool operator()(const Instruction *A, const Instruction *B) const { 164*e8d8bef9SDimitry Andric return A->comesBefore(B); 165*e8d8bef9SDimitry Andric } 166*e8d8bef9SDimitry Andric }; 167*e8d8bef9SDimitry Andric using DepList = std::set<Instruction *, InstrLess>; 168*e8d8bef9SDimitry Andric 169*e8d8bef9SDimitry Andric struct MoveGroup { 170*e8d8bef9SDimitry Andric MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load) 171*e8d8bef9SDimitry Andric : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {} 172*e8d8bef9SDimitry Andric Instruction *Base; // Base instruction of the parent address group. 173*e8d8bef9SDimitry Andric InstList Main; // Main group of instructions. 174*e8d8bef9SDimitry Andric InstList Deps; // List of dependencies. 175*e8d8bef9SDimitry Andric bool IsHvx; // Is this group of HVX instructions? 176*e8d8bef9SDimitry Andric bool IsLoad; // Is this a load group? 177*e8d8bef9SDimitry Andric }; 178*e8d8bef9SDimitry Andric using MoveList = std::vector<MoveGroup>; 179*e8d8bef9SDimitry Andric 180*e8d8bef9SDimitry Andric struct ByteSpan { 181*e8d8bef9SDimitry Andric struct Segment { 182*e8d8bef9SDimitry Andric Segment(Value *Val, int Begin, int Len) 183*e8d8bef9SDimitry Andric : Val(Val), Start(Begin), Size(Len) {} 184*e8d8bef9SDimitry Andric Segment(const Segment &Seg) = default; 185*e8d8bef9SDimitry Andric Value *Val; 186*e8d8bef9SDimitry Andric int Start; 187*e8d8bef9SDimitry Andric int Size; 188*e8d8bef9SDimitry Andric }; 189*e8d8bef9SDimitry Andric 190*e8d8bef9SDimitry Andric struct Block { 191*e8d8bef9SDimitry Andric Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {} 192*e8d8bef9SDimitry Andric Block(Value *Val, int Off, int Len, int Pos) 193*e8d8bef9SDimitry Andric : Seg(Val, Off, Len), Pos(Pos) {} 194*e8d8bef9SDimitry Andric Block(const Block &Blk) = default; 195*e8d8bef9SDimitry Andric Segment Seg; 196*e8d8bef9SDimitry Andric int Pos; 197*e8d8bef9SDimitry Andric }; 198*e8d8bef9SDimitry Andric 199*e8d8bef9SDimitry Andric int extent() const; 200*e8d8bef9SDimitry Andric ByteSpan section(int Start, int Length) const; 201*e8d8bef9SDimitry Andric ByteSpan &shift(int Offset); 202*e8d8bef9SDimitry Andric 203*e8d8bef9SDimitry Andric int size() const { return Blocks.size(); } 204*e8d8bef9SDimitry Andric Block &operator[](int i) { return Blocks[i]; } 205*e8d8bef9SDimitry Andric 206*e8d8bef9SDimitry Andric std::vector<Block> Blocks; 207*e8d8bef9SDimitry Andric 208*e8d8bef9SDimitry Andric using iterator = decltype(Blocks)::iterator; 209*e8d8bef9SDimitry Andric iterator begin() { return Blocks.begin(); } 210*e8d8bef9SDimitry Andric iterator end() { return Blocks.end(); } 211*e8d8bef9SDimitry Andric using const_iterator = decltype(Blocks)::const_iterator; 212*e8d8bef9SDimitry Andric const_iterator begin() const { return Blocks.begin(); } 213*e8d8bef9SDimitry Andric const_iterator end() const { return Blocks.end(); } 214*e8d8bef9SDimitry Andric }; 215*e8d8bef9SDimitry Andric 216*e8d8bef9SDimitry Andric Align getAlignFromValue(const Value *V) const; 217*e8d8bef9SDimitry Andric Optional<MemoryLocation> getLocation(const Instruction &In) const; 218*e8d8bef9SDimitry Andric Optional<AddrInfo> getAddrInfo(Instruction &In) const; 219*e8d8bef9SDimitry Andric bool isHvx(const AddrInfo &AI) const; 220*e8d8bef9SDimitry Andric 221*e8d8bef9SDimitry Andric Value *getPayload(Value *Val) const; 222*e8d8bef9SDimitry Andric Value *getMask(Value *Val) const; 223*e8d8bef9SDimitry Andric Value *getPassThrough(Value *Val) const; 224*e8d8bef9SDimitry Andric 225*e8d8bef9SDimitry Andric Value *createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy, 226*e8d8bef9SDimitry Andric int Adjust) const; 227*e8d8bef9SDimitry Andric Value *createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy, 228*e8d8bef9SDimitry Andric int Alignment) const; 229*e8d8bef9SDimitry Andric Value *createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, Value *Ptr, 230*e8d8bef9SDimitry Andric int Alignment, Value *Mask, Value *PassThru) const; 231*e8d8bef9SDimitry Andric Value *createAlignedStore(IRBuilder<> &Builder, Value *Val, Value *Ptr, 232*e8d8bef9SDimitry Andric int Alignment, Value *Mask) const; 233*e8d8bef9SDimitry Andric 234*e8d8bef9SDimitry Andric bool createAddressGroups(); 235*e8d8bef9SDimitry Andric MoveList createLoadGroups(const AddrList &Group) const; 236*e8d8bef9SDimitry Andric MoveList createStoreGroups(const AddrList &Group) const; 237*e8d8bef9SDimitry Andric bool move(const MoveGroup &Move) const; 238*e8d8bef9SDimitry Andric bool realignGroup(const MoveGroup &Move) const; 239*e8d8bef9SDimitry Andric 240*e8d8bef9SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI); 241*e8d8bef9SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG); 242*e8d8bef9SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS); 243*e8d8bef9SDimitry Andric 244*e8d8bef9SDimitry Andric std::map<Instruction *, AddrList> AddrGroups; 245*e8d8bef9SDimitry Andric HexagonVectorCombine &HVC; 246*e8d8bef9SDimitry Andric }; 247*e8d8bef9SDimitry Andric 248*e8d8bef9SDimitry Andric LLVM_ATTRIBUTE_UNUSED 249*e8d8bef9SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { 250*e8d8bef9SDimitry Andric OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; 251*e8d8bef9SDimitry Andric OS << "Addr: " << *AI.Addr << '\n'; 252*e8d8bef9SDimitry Andric OS << "Type: " << *AI.ValTy << '\n'; 253*e8d8bef9SDimitry Andric OS << "HaveAlign: " << AI.HaveAlign.value() << '\n'; 254*e8d8bef9SDimitry Andric OS << "NeedAlign: " << AI.NeedAlign.value() << '\n'; 255*e8d8bef9SDimitry Andric OS << "Offset: " << AI.Offset; 256*e8d8bef9SDimitry Andric return OS; 257*e8d8bef9SDimitry Andric } 258*e8d8bef9SDimitry Andric 259*e8d8bef9SDimitry Andric LLVM_ATTRIBUTE_UNUSED 260*e8d8bef9SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { 261*e8d8bef9SDimitry Andric OS << "Main\n"; 262*e8d8bef9SDimitry Andric for (Instruction *I : MG.Main) 263*e8d8bef9SDimitry Andric OS << " " << *I << '\n'; 264*e8d8bef9SDimitry Andric OS << "Deps\n"; 265*e8d8bef9SDimitry Andric for (Instruction *I : MG.Deps) 266*e8d8bef9SDimitry Andric OS << " " << *I << '\n'; 267*e8d8bef9SDimitry Andric return OS; 268*e8d8bef9SDimitry Andric } 269*e8d8bef9SDimitry Andric 270*e8d8bef9SDimitry Andric LLVM_ATTRIBUTE_UNUSED 271*e8d8bef9SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { 272*e8d8bef9SDimitry Andric OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; 273*e8d8bef9SDimitry Andric for (const AlignVectors::ByteSpan::Block &B : BS) { 274*e8d8bef9SDimitry Andric OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] " 275*e8d8bef9SDimitry Andric << *B.Seg.Val << '\n'; 276*e8d8bef9SDimitry Andric } 277*e8d8bef9SDimitry Andric OS << ']'; 278*e8d8bef9SDimitry Andric return OS; 279*e8d8bef9SDimitry Andric } 280*e8d8bef9SDimitry Andric 281*e8d8bef9SDimitry Andric } // namespace 282*e8d8bef9SDimitry Andric 283*e8d8bef9SDimitry Andric namespace { 284*e8d8bef9SDimitry Andric 285*e8d8bef9SDimitry Andric template <typename T> T *getIfUnordered(T *MaybeT) { 286*e8d8bef9SDimitry Andric return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr; 287*e8d8bef9SDimitry Andric } 288*e8d8bef9SDimitry Andric template <typename T> T *isCandidate(Instruction *In) { 289*e8d8bef9SDimitry Andric return dyn_cast<T>(In); 290*e8d8bef9SDimitry Andric } 291*e8d8bef9SDimitry Andric template <> LoadInst *isCandidate<LoadInst>(Instruction *In) { 292*e8d8bef9SDimitry Andric return getIfUnordered(dyn_cast<LoadInst>(In)); 293*e8d8bef9SDimitry Andric } 294*e8d8bef9SDimitry Andric template <> StoreInst *isCandidate<StoreInst>(Instruction *In) { 295*e8d8bef9SDimitry Andric return getIfUnordered(dyn_cast<StoreInst>(In)); 296*e8d8bef9SDimitry Andric } 297*e8d8bef9SDimitry Andric 298*e8d8bef9SDimitry Andric #if !defined(_MSC_VER) || _MSC_VER >= 1924 299*e8d8bef9SDimitry Andric // VS2017 has trouble compiling this: 300*e8d8bef9SDimitry Andric // error C2976: 'std::map': too few template arguments 301*e8d8bef9SDimitry Andric template <typename Pred, typename... Ts> 302*e8d8bef9SDimitry Andric void erase_if(std::map<Ts...> &map, Pred p) 303*e8d8bef9SDimitry Andric #else 304*e8d8bef9SDimitry Andric template <typename Pred, typename T, typename U> 305*e8d8bef9SDimitry Andric void erase_if(std::map<T, U> &map, Pred p) 306*e8d8bef9SDimitry Andric #endif 307*e8d8bef9SDimitry Andric { 308*e8d8bef9SDimitry Andric for (auto i = map.begin(), e = map.end(); i != e;) { 309*e8d8bef9SDimitry Andric if (p(*i)) 310*e8d8bef9SDimitry Andric i = map.erase(i); 311*e8d8bef9SDimitry Andric else 312*e8d8bef9SDimitry Andric i = std::next(i); 313*e8d8bef9SDimitry Andric } 314*e8d8bef9SDimitry Andric } 315*e8d8bef9SDimitry Andric 316*e8d8bef9SDimitry Andric // Forward other erase_ifs to the LLVM implementations. 317*e8d8bef9SDimitry Andric template <typename Pred, typename T> void erase_if(T &&container, Pred p) { 318*e8d8bef9SDimitry Andric llvm::erase_if(std::forward<T>(container), p); 319*e8d8bef9SDimitry Andric } 320*e8d8bef9SDimitry Andric 321*e8d8bef9SDimitry Andric } // namespace 322*e8d8bef9SDimitry Andric 323*e8d8bef9SDimitry Andric // --- Begin AlignVectors 324*e8d8bef9SDimitry Andric 325*e8d8bef9SDimitry Andric auto AlignVectors::ByteSpan::extent() const -> int { 326*e8d8bef9SDimitry Andric if (size() == 0) 327*e8d8bef9SDimitry Andric return 0; 328*e8d8bef9SDimitry Andric int Min = Blocks[0].Pos; 329*e8d8bef9SDimitry Andric int Max = Blocks[0].Pos + Blocks[0].Seg.Size; 330*e8d8bef9SDimitry Andric for (int i = 1, e = size(); i != e; ++i) { 331*e8d8bef9SDimitry Andric Min = std::min(Min, Blocks[i].Pos); 332*e8d8bef9SDimitry Andric Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size); 333*e8d8bef9SDimitry Andric } 334*e8d8bef9SDimitry Andric return Max - Min; 335*e8d8bef9SDimitry Andric } 336*e8d8bef9SDimitry Andric 337*e8d8bef9SDimitry Andric auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan { 338*e8d8bef9SDimitry Andric ByteSpan Section; 339*e8d8bef9SDimitry Andric for (const ByteSpan::Block &B : Blocks) { 340*e8d8bef9SDimitry Andric int L = std::max(B.Pos, Start); // Left end. 341*e8d8bef9SDimitry Andric int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1. 342*e8d8bef9SDimitry Andric if (L < R) { 343*e8d8bef9SDimitry Andric // How much to chop off the beginning of the segment: 344*e8d8bef9SDimitry Andric int Off = L > B.Pos ? L - B.Pos : 0; 345*e8d8bef9SDimitry Andric Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L); 346*e8d8bef9SDimitry Andric } 347*e8d8bef9SDimitry Andric } 348*e8d8bef9SDimitry Andric return Section; 349*e8d8bef9SDimitry Andric } 350*e8d8bef9SDimitry Andric 351*e8d8bef9SDimitry Andric auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & { 352*e8d8bef9SDimitry Andric for (Block &B : Blocks) 353*e8d8bef9SDimitry Andric B.Pos += Offset; 354*e8d8bef9SDimitry Andric return *this; 355*e8d8bef9SDimitry Andric } 356*e8d8bef9SDimitry Andric 357*e8d8bef9SDimitry Andric auto AlignVectors::getAlignFromValue(const Value *V) const -> Align { 358*e8d8bef9SDimitry Andric const auto *C = dyn_cast<ConstantInt>(V); 359*e8d8bef9SDimitry Andric assert(C && "Alignment must be a compile-time constant integer"); 360*e8d8bef9SDimitry Andric return C->getAlignValue(); 361*e8d8bef9SDimitry Andric } 362*e8d8bef9SDimitry Andric 363*e8d8bef9SDimitry Andric auto AlignVectors::getAddrInfo(Instruction &In) const -> Optional<AddrInfo> { 364*e8d8bef9SDimitry Andric if (auto *L = isCandidate<LoadInst>(&In)) 365*e8d8bef9SDimitry Andric return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(), 366*e8d8bef9SDimitry Andric L->getAlign()); 367*e8d8bef9SDimitry Andric if (auto *S = isCandidate<StoreInst>(&In)) 368*e8d8bef9SDimitry Andric return AddrInfo(HVC, S, S->getPointerOperand(), 369*e8d8bef9SDimitry Andric S->getValueOperand()->getType(), S->getAlign()); 370*e8d8bef9SDimitry Andric if (auto *II = isCandidate<IntrinsicInst>(&In)) { 371*e8d8bef9SDimitry Andric Intrinsic::ID ID = II->getIntrinsicID(); 372*e8d8bef9SDimitry Andric switch (ID) { 373*e8d8bef9SDimitry Andric case Intrinsic::masked_load: 374*e8d8bef9SDimitry Andric return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(), 375*e8d8bef9SDimitry Andric getAlignFromValue(II->getArgOperand(1))); 376*e8d8bef9SDimitry Andric case Intrinsic::masked_store: 377*e8d8bef9SDimitry Andric return AddrInfo(HVC, II, II->getArgOperand(1), 378*e8d8bef9SDimitry Andric II->getArgOperand(0)->getType(), 379*e8d8bef9SDimitry Andric getAlignFromValue(II->getArgOperand(2))); 380*e8d8bef9SDimitry Andric } 381*e8d8bef9SDimitry Andric } 382*e8d8bef9SDimitry Andric return Optional<AddrInfo>(); 383*e8d8bef9SDimitry Andric } 384*e8d8bef9SDimitry Andric 385*e8d8bef9SDimitry Andric auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool { 386*e8d8bef9SDimitry Andric return HVC.HST.isTypeForHVX(AI.ValTy); 387*e8d8bef9SDimitry Andric } 388*e8d8bef9SDimitry Andric 389*e8d8bef9SDimitry Andric auto AlignVectors::getPayload(Value *Val) const -> Value * { 390*e8d8bef9SDimitry Andric if (auto *In = dyn_cast<Instruction>(Val)) { 391*e8d8bef9SDimitry Andric Intrinsic::ID ID = 0; 392*e8d8bef9SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(In)) 393*e8d8bef9SDimitry Andric ID = II->getIntrinsicID(); 394*e8d8bef9SDimitry Andric if (isa<StoreInst>(In) || ID == Intrinsic::masked_store) 395*e8d8bef9SDimitry Andric return In->getOperand(0); 396*e8d8bef9SDimitry Andric } 397*e8d8bef9SDimitry Andric return Val; 398*e8d8bef9SDimitry Andric } 399*e8d8bef9SDimitry Andric 400*e8d8bef9SDimitry Andric auto AlignVectors::getMask(Value *Val) const -> Value * { 401*e8d8bef9SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Val)) { 402*e8d8bef9SDimitry Andric switch (II->getIntrinsicID()) { 403*e8d8bef9SDimitry Andric case Intrinsic::masked_load: 404*e8d8bef9SDimitry Andric return II->getArgOperand(2); 405*e8d8bef9SDimitry Andric case Intrinsic::masked_store: 406*e8d8bef9SDimitry Andric return II->getArgOperand(3); 407*e8d8bef9SDimitry Andric } 408*e8d8bef9SDimitry Andric } 409*e8d8bef9SDimitry Andric 410*e8d8bef9SDimitry Andric Type *ValTy = getPayload(Val)->getType(); 411*e8d8bef9SDimitry Andric if (auto *VecTy = dyn_cast<VectorType>(ValTy)) { 412*e8d8bef9SDimitry Andric int ElemCount = VecTy->getElementCount().getFixedValue(); 413*e8d8bef9SDimitry Andric return HVC.getFullValue(HVC.getBoolTy(ElemCount)); 414*e8d8bef9SDimitry Andric } 415*e8d8bef9SDimitry Andric return HVC.getFullValue(HVC.getBoolTy()); 416*e8d8bef9SDimitry Andric } 417*e8d8bef9SDimitry Andric 418*e8d8bef9SDimitry Andric auto AlignVectors::getPassThrough(Value *Val) const -> Value * { 419*e8d8bef9SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Val)) { 420*e8d8bef9SDimitry Andric if (II->getIntrinsicID() == Intrinsic::masked_load) 421*e8d8bef9SDimitry Andric return II->getArgOperand(3); 422*e8d8bef9SDimitry Andric } 423*e8d8bef9SDimitry Andric return UndefValue::get(getPayload(Val)->getType()); 424*e8d8bef9SDimitry Andric } 425*e8d8bef9SDimitry Andric 426*e8d8bef9SDimitry Andric auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, 427*e8d8bef9SDimitry Andric Type *ValTy, int Adjust) const 428*e8d8bef9SDimitry Andric -> Value * { 429*e8d8bef9SDimitry Andric // The adjustment is in bytes, but if it's a multiple of the type size, 430*e8d8bef9SDimitry Andric // we don't need to do pointer casts. 431*e8d8bef9SDimitry Andric Type *ElemTy = cast<PointerType>(Ptr->getType())->getElementType(); 432*e8d8bef9SDimitry Andric int ElemSize = HVC.getSizeOf(ElemTy); 433*e8d8bef9SDimitry Andric if (Adjust % ElemSize == 0) { 434*e8d8bef9SDimitry Andric Value *Tmp0 = Builder.CreateGEP(Ptr, HVC.getConstInt(Adjust / ElemSize)); 435*e8d8bef9SDimitry Andric return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo()); 436*e8d8bef9SDimitry Andric } 437*e8d8bef9SDimitry Andric 438*e8d8bef9SDimitry Andric PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext()); 439*e8d8bef9SDimitry Andric Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy); 440*e8d8bef9SDimitry Andric Value *Tmp1 = Builder.CreateGEP(Tmp0, HVC.getConstInt(Adjust)); 441*e8d8bef9SDimitry Andric return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo()); 442*e8d8bef9SDimitry Andric } 443*e8d8bef9SDimitry Andric 444*e8d8bef9SDimitry Andric auto AlignVectors::createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, 445*e8d8bef9SDimitry Andric Type *ValTy, int Alignment) const 446*e8d8bef9SDimitry Andric -> Value * { 447*e8d8bef9SDimitry Andric Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy()); 448*e8d8bef9SDimitry Andric Value *Mask = HVC.getConstInt(-Alignment); 449*e8d8bef9SDimitry Andric Value *And = Builder.CreateAnd(AsInt, Mask); 450*e8d8bef9SDimitry Andric return Builder.CreateIntToPtr(And, ValTy->getPointerTo()); 451*e8d8bef9SDimitry Andric } 452*e8d8bef9SDimitry Andric 453*e8d8bef9SDimitry Andric auto AlignVectors::createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, 454*e8d8bef9SDimitry Andric Value *Ptr, int Alignment, Value *Mask, 455*e8d8bef9SDimitry Andric Value *PassThru) const -> Value * { 456*e8d8bef9SDimitry Andric assert(!HVC.isUndef(Mask)); // Should this be allowed? 457*e8d8bef9SDimitry Andric if (HVC.isZero(Mask)) 458*e8d8bef9SDimitry Andric return PassThru; 459*e8d8bef9SDimitry Andric if (Mask == ConstantInt::getTrue(Mask->getType())) 460*e8d8bef9SDimitry Andric return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment)); 461*e8d8bef9SDimitry Andric return Builder.CreateMaskedLoad(Ptr, Align(Alignment), Mask, PassThru); 462*e8d8bef9SDimitry Andric } 463*e8d8bef9SDimitry Andric 464*e8d8bef9SDimitry Andric auto AlignVectors::createAlignedStore(IRBuilder<> &Builder, Value *Val, 465*e8d8bef9SDimitry Andric Value *Ptr, int Alignment, 466*e8d8bef9SDimitry Andric Value *Mask) const -> Value * { 467*e8d8bef9SDimitry Andric if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask)) 468*e8d8bef9SDimitry Andric return UndefValue::get(Val->getType()); 469*e8d8bef9SDimitry Andric if (Mask == ConstantInt::getTrue(Mask->getType())) 470*e8d8bef9SDimitry Andric return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment)); 471*e8d8bef9SDimitry Andric return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask); 472*e8d8bef9SDimitry Andric } 473*e8d8bef9SDimitry Andric 474*e8d8bef9SDimitry Andric auto AlignVectors::createAddressGroups() -> bool { 475*e8d8bef9SDimitry Andric // An address group created here may contain instructions spanning 476*e8d8bef9SDimitry Andric // multiple basic blocks. 477*e8d8bef9SDimitry Andric AddrList WorkStack; 478*e8d8bef9SDimitry Andric 479*e8d8bef9SDimitry Andric auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> { 480*e8d8bef9SDimitry Andric for (AddrInfo &W : WorkStack) { 481*e8d8bef9SDimitry Andric if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr)) 482*e8d8bef9SDimitry Andric return std::make_pair(W.Inst, *D); 483*e8d8bef9SDimitry Andric } 484*e8d8bef9SDimitry Andric return std::make_pair(nullptr, 0); 485*e8d8bef9SDimitry Andric }; 486*e8d8bef9SDimitry Andric 487*e8d8bef9SDimitry Andric auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void { 488*e8d8bef9SDimitry Andric BasicBlock &Block = *DomN->getBlock(); 489*e8d8bef9SDimitry Andric for (Instruction &I : Block) { 490*e8d8bef9SDimitry Andric auto AI = this->getAddrInfo(I); // Use this-> for gcc6. 491*e8d8bef9SDimitry Andric if (!AI) 492*e8d8bef9SDimitry Andric continue; 493*e8d8bef9SDimitry Andric auto F = findBaseAndOffset(*AI); 494*e8d8bef9SDimitry Andric Instruction *GroupInst; 495*e8d8bef9SDimitry Andric if (Instruction *BI = F.first) { 496*e8d8bef9SDimitry Andric AI->Offset = F.second; 497*e8d8bef9SDimitry Andric GroupInst = BI; 498*e8d8bef9SDimitry Andric } else { 499*e8d8bef9SDimitry Andric WorkStack.push_back(*AI); 500*e8d8bef9SDimitry Andric GroupInst = AI->Inst; 501*e8d8bef9SDimitry Andric } 502*e8d8bef9SDimitry Andric AddrGroups[GroupInst].push_back(*AI); 503*e8d8bef9SDimitry Andric } 504*e8d8bef9SDimitry Andric 505*e8d8bef9SDimitry Andric for (DomTreeNode *C : DomN->children()) 506*e8d8bef9SDimitry Andric Visit(C, Visit); 507*e8d8bef9SDimitry Andric 508*e8d8bef9SDimitry Andric while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block) 509*e8d8bef9SDimitry Andric WorkStack.pop_back(); 510*e8d8bef9SDimitry Andric }; 511*e8d8bef9SDimitry Andric 512*e8d8bef9SDimitry Andric traverseBlock(HVC.DT.getRootNode(), traverseBlock); 513*e8d8bef9SDimitry Andric assert(WorkStack.empty()); 514*e8d8bef9SDimitry Andric 515*e8d8bef9SDimitry Andric // AddrGroups are formed. 516*e8d8bef9SDimitry Andric 517*e8d8bef9SDimitry Andric // Remove groups of size 1. 518*e8d8bef9SDimitry Andric erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; }); 519*e8d8bef9SDimitry Andric // Remove groups that don't use HVX types. 520*e8d8bef9SDimitry Andric erase_if(AddrGroups, [&](auto &G) { 521*e8d8bef9SDimitry Andric return !llvm::any_of( 522*e8d8bef9SDimitry Andric G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); }); 523*e8d8bef9SDimitry Andric }); 524*e8d8bef9SDimitry Andric // Remove groups where everything is properly aligned. 525*e8d8bef9SDimitry Andric erase_if(AddrGroups, [&](auto &G) { 526*e8d8bef9SDimitry Andric return llvm::all_of(G.second, 527*e8d8bef9SDimitry Andric [&](auto &I) { return I.HaveAlign >= I.NeedAlign; }); 528*e8d8bef9SDimitry Andric }); 529*e8d8bef9SDimitry Andric 530*e8d8bef9SDimitry Andric return !AddrGroups.empty(); 531*e8d8bef9SDimitry Andric } 532*e8d8bef9SDimitry Andric 533*e8d8bef9SDimitry Andric auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList { 534*e8d8bef9SDimitry Andric // Form load groups. 535*e8d8bef9SDimitry Andric // To avoid complications with moving code across basic blocks, only form 536*e8d8bef9SDimitry Andric // groups that are contained within a single basic block. 537*e8d8bef9SDimitry Andric 538*e8d8bef9SDimitry Andric auto getUpwardDeps = [](Instruction *In, Instruction *Base) { 539*e8d8bef9SDimitry Andric BasicBlock *Parent = Base->getParent(); 540*e8d8bef9SDimitry Andric assert(In->getParent() == Parent && 541*e8d8bef9SDimitry Andric "Base and In should be in the same block"); 542*e8d8bef9SDimitry Andric assert(Base->comesBefore(In) && "Base should come before In"); 543*e8d8bef9SDimitry Andric 544*e8d8bef9SDimitry Andric DepList Deps; 545*e8d8bef9SDimitry Andric std::deque<Instruction *> WorkQ = {In}; 546*e8d8bef9SDimitry Andric while (!WorkQ.empty()) { 547*e8d8bef9SDimitry Andric Instruction *D = WorkQ.front(); 548*e8d8bef9SDimitry Andric WorkQ.pop_front(); 549*e8d8bef9SDimitry Andric Deps.insert(D); 550*e8d8bef9SDimitry Andric for (Value *Op : D->operands()) { 551*e8d8bef9SDimitry Andric if (auto *I = dyn_cast<Instruction>(Op)) { 552*e8d8bef9SDimitry Andric if (I->getParent() == Parent && Base->comesBefore(I)) 553*e8d8bef9SDimitry Andric WorkQ.push_back(I); 554*e8d8bef9SDimitry Andric } 555*e8d8bef9SDimitry Andric } 556*e8d8bef9SDimitry Andric } 557*e8d8bef9SDimitry Andric return Deps; 558*e8d8bef9SDimitry Andric }; 559*e8d8bef9SDimitry Andric 560*e8d8bef9SDimitry Andric auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) { 561*e8d8bef9SDimitry Andric assert(!Move.Main.empty() && "Move group should have non-empty Main"); 562*e8d8bef9SDimitry Andric // Don't mix HVX and non-HVX instructions. 563*e8d8bef9SDimitry Andric if (Move.IsHvx != isHvx(Info)) 564*e8d8bef9SDimitry Andric return false; 565*e8d8bef9SDimitry Andric // Leading instruction in the load group. 566*e8d8bef9SDimitry Andric Instruction *Base = Move.Main.front(); 567*e8d8bef9SDimitry Andric if (Base->getParent() != Info.Inst->getParent()) 568*e8d8bef9SDimitry Andric return false; 569*e8d8bef9SDimitry Andric 570*e8d8bef9SDimitry Andric auto isSafeToMoveToBase = [&](const Instruction *I) { 571*e8d8bef9SDimitry Andric return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()); 572*e8d8bef9SDimitry Andric }; 573*e8d8bef9SDimitry Andric DepList Deps = getUpwardDeps(Info.Inst, Base); 574*e8d8bef9SDimitry Andric if (!llvm::all_of(Deps, isSafeToMoveToBase)) 575*e8d8bef9SDimitry Andric return false; 576*e8d8bef9SDimitry Andric 577*e8d8bef9SDimitry Andric // The dependencies will be moved together with the load, so make sure 578*e8d8bef9SDimitry Andric // that none of them could be moved independently in another group. 579*e8d8bef9SDimitry Andric Deps.erase(Info.Inst); 580*e8d8bef9SDimitry Andric auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; }; 581*e8d8bef9SDimitry Andric if (llvm::any_of(Deps, inAddrMap)) 582*e8d8bef9SDimitry Andric return false; 583*e8d8bef9SDimitry Andric Move.Main.push_back(Info.Inst); 584*e8d8bef9SDimitry Andric llvm::append_range(Move.Deps, Deps); 585*e8d8bef9SDimitry Andric return true; 586*e8d8bef9SDimitry Andric }; 587*e8d8bef9SDimitry Andric 588*e8d8bef9SDimitry Andric MoveList LoadGroups; 589*e8d8bef9SDimitry Andric 590*e8d8bef9SDimitry Andric for (const AddrInfo &Info : Group) { 591*e8d8bef9SDimitry Andric if (!Info.Inst->mayReadFromMemory()) 592*e8d8bef9SDimitry Andric continue; 593*e8d8bef9SDimitry Andric if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back())) 594*e8d8bef9SDimitry Andric LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true); 595*e8d8bef9SDimitry Andric } 596*e8d8bef9SDimitry Andric 597*e8d8bef9SDimitry Andric // Erase singleton groups. 598*e8d8bef9SDimitry Andric erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; }); 599*e8d8bef9SDimitry Andric return LoadGroups; 600*e8d8bef9SDimitry Andric } 601*e8d8bef9SDimitry Andric 602*e8d8bef9SDimitry Andric auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList { 603*e8d8bef9SDimitry Andric // Form store groups. 604*e8d8bef9SDimitry Andric // To avoid complications with moving code across basic blocks, only form 605*e8d8bef9SDimitry Andric // groups that are contained within a single basic block. 606*e8d8bef9SDimitry Andric 607*e8d8bef9SDimitry Andric auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) { 608*e8d8bef9SDimitry Andric assert(!Move.Main.empty() && "Move group should have non-empty Main"); 609*e8d8bef9SDimitry Andric // For stores with return values we'd have to collect downward depenencies. 610*e8d8bef9SDimitry Andric // There are no such stores that we handle at the moment, so omit that. 611*e8d8bef9SDimitry Andric assert(Info.Inst->getType()->isVoidTy() && 612*e8d8bef9SDimitry Andric "Not handling stores with return values"); 613*e8d8bef9SDimitry Andric // Don't mix HVX and non-HVX instructions. 614*e8d8bef9SDimitry Andric if (Move.IsHvx != isHvx(Info)) 615*e8d8bef9SDimitry Andric return false; 616*e8d8bef9SDimitry Andric // For stores we need to be careful whether it's safe to move them. 617*e8d8bef9SDimitry Andric // Stores that are otherwise safe to move together may not appear safe 618*e8d8bef9SDimitry Andric // to move over one another (i.e. isSafeToMoveBefore may return false). 619*e8d8bef9SDimitry Andric Instruction *Base = Move.Main.front(); 620*e8d8bef9SDimitry Andric if (Base->getParent() != Info.Inst->getParent()) 621*e8d8bef9SDimitry Andric return false; 622*e8d8bef9SDimitry Andric if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main)) 623*e8d8bef9SDimitry Andric return false; 624*e8d8bef9SDimitry Andric Move.Main.push_back(Info.Inst); 625*e8d8bef9SDimitry Andric return true; 626*e8d8bef9SDimitry Andric }; 627*e8d8bef9SDimitry Andric 628*e8d8bef9SDimitry Andric MoveList StoreGroups; 629*e8d8bef9SDimitry Andric 630*e8d8bef9SDimitry Andric for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) { 631*e8d8bef9SDimitry Andric const AddrInfo &Info = *I; 632*e8d8bef9SDimitry Andric if (!Info.Inst->mayWriteToMemory()) 633*e8d8bef9SDimitry Andric continue; 634*e8d8bef9SDimitry Andric if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back())) 635*e8d8bef9SDimitry Andric StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false); 636*e8d8bef9SDimitry Andric } 637*e8d8bef9SDimitry Andric 638*e8d8bef9SDimitry Andric // Erase singleton groups. 639*e8d8bef9SDimitry Andric erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; }); 640*e8d8bef9SDimitry Andric return StoreGroups; 641*e8d8bef9SDimitry Andric } 642*e8d8bef9SDimitry Andric 643*e8d8bef9SDimitry Andric auto AlignVectors::move(const MoveGroup &Move) const -> bool { 644*e8d8bef9SDimitry Andric assert(!Move.Main.empty() && "Move group should have non-empty Main"); 645*e8d8bef9SDimitry Andric Instruction *Where = Move.Main.front(); 646*e8d8bef9SDimitry Andric 647*e8d8bef9SDimitry Andric if (Move.IsLoad) { 648*e8d8bef9SDimitry Andric // Move all deps to before Where, keeping order. 649*e8d8bef9SDimitry Andric for (Instruction *D : Move.Deps) 650*e8d8bef9SDimitry Andric D->moveBefore(Where); 651*e8d8bef9SDimitry Andric // Move all main instructions to after Where, keeping order. 652*e8d8bef9SDimitry Andric ArrayRef<Instruction *> Main(Move.Main); 653*e8d8bef9SDimitry Andric for (Instruction *M : Main.drop_front(1)) { 654*e8d8bef9SDimitry Andric M->moveAfter(Where); 655*e8d8bef9SDimitry Andric Where = M; 656*e8d8bef9SDimitry Andric } 657*e8d8bef9SDimitry Andric } else { 658*e8d8bef9SDimitry Andric // NOTE: Deps are empty for "store" groups. If they need to be 659*e8d8bef9SDimitry Andric // non-empty, decide on the order. 660*e8d8bef9SDimitry Andric assert(Move.Deps.empty()); 661*e8d8bef9SDimitry Andric // Move all main instructions to before Where, inverting order. 662*e8d8bef9SDimitry Andric ArrayRef<Instruction *> Main(Move.Main); 663*e8d8bef9SDimitry Andric for (Instruction *M : Main.drop_front(1)) { 664*e8d8bef9SDimitry Andric M->moveBefore(Where); 665*e8d8bef9SDimitry Andric Where = M; 666*e8d8bef9SDimitry Andric } 667*e8d8bef9SDimitry Andric } 668*e8d8bef9SDimitry Andric 669*e8d8bef9SDimitry Andric return Move.Main.size() + Move.Deps.size() > 1; 670*e8d8bef9SDimitry Andric } 671*e8d8bef9SDimitry Andric 672*e8d8bef9SDimitry Andric auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool { 673*e8d8bef9SDimitry Andric // TODO: Needs support for masked loads/stores of "scalar" vectors. 674*e8d8bef9SDimitry Andric if (!Move.IsHvx) 675*e8d8bef9SDimitry Andric return false; 676*e8d8bef9SDimitry Andric 677*e8d8bef9SDimitry Andric // Return the element with the maximum alignment from Range, 678*e8d8bef9SDimitry Andric // where GetValue obtains the value to compare from an element. 679*e8d8bef9SDimitry Andric auto getMaxOf = [](auto Range, auto GetValue) { 680*e8d8bef9SDimitry Andric return *std::max_element( 681*e8d8bef9SDimitry Andric Range.begin(), Range.end(), 682*e8d8bef9SDimitry Andric [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); }); 683*e8d8bef9SDimitry Andric }; 684*e8d8bef9SDimitry Andric 685*e8d8bef9SDimitry Andric const AddrList &BaseInfos = AddrGroups.at(Move.Base); 686*e8d8bef9SDimitry Andric 687*e8d8bef9SDimitry Andric // Conceptually, there is a vector of N bytes covering the addresses 688*e8d8bef9SDimitry Andric // starting from the minimum offset (i.e. Base.Addr+Start). This vector 689*e8d8bef9SDimitry Andric // represents a contiguous memory region that spans all accessed memory 690*e8d8bef9SDimitry Andric // locations. 691*e8d8bef9SDimitry Andric // The correspondence between loaded or stored values will be expressed 692*e8d8bef9SDimitry Andric // in terms of this vector. For example, the 0th element of the vector 693*e8d8bef9SDimitry Andric // from the Base address info will start at byte Start from the beginning 694*e8d8bef9SDimitry Andric // of this conceptual vector. 695*e8d8bef9SDimitry Andric // 696*e8d8bef9SDimitry Andric // This vector will be loaded/stored starting at the nearest down-aligned 697*e8d8bef9SDimitry Andric // address and the amount od the down-alignment will be AlignVal: 698*e8d8bef9SDimitry Andric // valign(load_vector(align_down(Base+Start)), AlignVal) 699*e8d8bef9SDimitry Andric 700*e8d8bef9SDimitry Andric std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end()); 701*e8d8bef9SDimitry Andric AddrList MoveInfos; 702*e8d8bef9SDimitry Andric llvm::copy_if( 703*e8d8bef9SDimitry Andric BaseInfos, std::back_inserter(MoveInfos), 704*e8d8bef9SDimitry Andric [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); }); 705*e8d8bef9SDimitry Andric 706*e8d8bef9SDimitry Andric // Maximum alignment present in the whole address group. 707*e8d8bef9SDimitry Andric const AddrInfo &WithMaxAlign = 708*e8d8bef9SDimitry Andric getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; }); 709*e8d8bef9SDimitry Andric Align MaxGiven = WithMaxAlign.HaveAlign; 710*e8d8bef9SDimitry Andric 711*e8d8bef9SDimitry Andric // Minimum alignment present in the move address group. 712*e8d8bef9SDimitry Andric const AddrInfo &WithMinOffset = 713*e8d8bef9SDimitry Andric getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; }); 714*e8d8bef9SDimitry Andric 715*e8d8bef9SDimitry Andric const AddrInfo &WithMaxNeeded = 716*e8d8bef9SDimitry Andric getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; }); 717*e8d8bef9SDimitry Andric Align MinNeeded = WithMaxNeeded.NeedAlign; 718*e8d8bef9SDimitry Andric 719*e8d8bef9SDimitry Andric // Set the builder at the top instruction in the move group. 720*e8d8bef9SDimitry Andric Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back(); 721*e8d8bef9SDimitry Andric IRBuilder<> Builder(TopIn); 722*e8d8bef9SDimitry Andric Value *AlignAddr = nullptr; // Actual aligned address. 723*e8d8bef9SDimitry Andric Value *AlignVal = nullptr; // Right-shift amount (for valign). 724*e8d8bef9SDimitry Andric 725*e8d8bef9SDimitry Andric if (MinNeeded <= MaxGiven) { 726*e8d8bef9SDimitry Andric int Start = WithMinOffset.Offset; 727*e8d8bef9SDimitry Andric int OffAtMax = WithMaxAlign.Offset; 728*e8d8bef9SDimitry Andric // Shift the offset of the maximally aligned instruction (OffAtMax) 729*e8d8bef9SDimitry Andric // back by just enough multiples of the required alignment to cover the 730*e8d8bef9SDimitry Andric // distance from Start to OffAtMax. 731*e8d8bef9SDimitry Andric // Calculate the address adjustment amount based on the address with the 732*e8d8bef9SDimitry Andric // maximum alignment. This is to allow a simple gep instruction instead 733*e8d8bef9SDimitry Andric // of potential bitcasts to i8*. 734*e8d8bef9SDimitry Andric int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value()); 735*e8d8bef9SDimitry Andric AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr, 736*e8d8bef9SDimitry Andric WithMaxAlign.ValTy, Adjust); 737*e8d8bef9SDimitry Andric int Diff = Start - (OffAtMax + Adjust); 738*e8d8bef9SDimitry Andric AlignVal = HVC.getConstInt(Diff); 739*e8d8bef9SDimitry Andric // Sanity. 740*e8d8bef9SDimitry Andric assert(Diff >= 0); 741*e8d8bef9SDimitry Andric assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value()); 742*e8d8bef9SDimitry Andric } else { 743*e8d8bef9SDimitry Andric // WithMinOffset is the lowest address in the group, 744*e8d8bef9SDimitry Andric // WithMinOffset.Addr = Base+Start. 745*e8d8bef9SDimitry Andric // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb) 746*e8d8bef9SDimitry Andric // mask off unnecessary bits, so it's ok to just the original pointer as 747*e8d8bef9SDimitry Andric // the alignment amount. 748*e8d8bef9SDimitry Andric // Do an explicit down-alignment of the address to avoid creating an 749*e8d8bef9SDimitry Andric // aligned instruction with an address that is not really aligned. 750*e8d8bef9SDimitry Andric AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr, 751*e8d8bef9SDimitry Andric WithMinOffset.ValTy, MinNeeded.value()); 752*e8d8bef9SDimitry Andric AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy()); 753*e8d8bef9SDimitry Andric } 754*e8d8bef9SDimitry Andric 755*e8d8bef9SDimitry Andric ByteSpan VSpan; 756*e8d8bef9SDimitry Andric for (const AddrInfo &AI : MoveInfos) { 757*e8d8bef9SDimitry Andric VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy), 758*e8d8bef9SDimitry Andric AI.Offset - WithMinOffset.Offset); 759*e8d8bef9SDimitry Andric } 760*e8d8bef9SDimitry Andric 761*e8d8bef9SDimitry Andric // The aligned loads/stores will use blocks that are either scalars, 762*e8d8bef9SDimitry Andric // or HVX vectors. Let "sector" be the unified term for such a block. 763*e8d8bef9SDimitry Andric // blend(scalar, vector) -> sector... 764*e8d8bef9SDimitry Andric int ScLen = Move.IsHvx ? HVC.HST.getVectorLength() 765*e8d8bef9SDimitry Andric : std::max<int>(MinNeeded.value(), 4); 766*e8d8bef9SDimitry Andric assert(!Move.IsHvx || ScLen == 64 || ScLen == 128); 767*e8d8bef9SDimitry Andric assert(Move.IsHvx || ScLen == 4 || ScLen == 8); 768*e8d8bef9SDimitry Andric 769*e8d8bef9SDimitry Andric Type *SecTy = HVC.getByteTy(ScLen); 770*e8d8bef9SDimitry Andric int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen; 771*e8d8bef9SDimitry Andric 772*e8d8bef9SDimitry Andric if (Move.IsLoad) { 773*e8d8bef9SDimitry Andric ByteSpan ASpan; 774*e8d8bef9SDimitry Andric auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen)); 775*e8d8bef9SDimitry Andric auto *Undef = UndefValue::get(SecTy); 776*e8d8bef9SDimitry Andric 777*e8d8bef9SDimitry Andric for (int i = 0; i != NumSectors + 1; ++i) { 778*e8d8bef9SDimitry Andric Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen); 779*e8d8bef9SDimitry Andric // FIXME: generate a predicated load? 780*e8d8bef9SDimitry Andric Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef); 781*e8d8bef9SDimitry Andric ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen); 782*e8d8bef9SDimitry Andric } 783*e8d8bef9SDimitry Andric 784*e8d8bef9SDimitry Andric for (int j = 0; j != NumSectors; ++j) { 785*e8d8bef9SDimitry Andric ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val, 786*e8d8bef9SDimitry Andric ASpan[j + 1].Seg.Val, AlignVal); 787*e8d8bef9SDimitry Andric } 788*e8d8bef9SDimitry Andric 789*e8d8bef9SDimitry Andric for (ByteSpan::Block &B : VSpan) { 790*e8d8bef9SDimitry Andric ByteSpan Section = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos); 791*e8d8bef9SDimitry Andric Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size)); 792*e8d8bef9SDimitry Andric for (ByteSpan::Block &S : Section) { 793*e8d8bef9SDimitry Andric Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val)); 794*e8d8bef9SDimitry Andric Accum = 795*e8d8bef9SDimitry Andric HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos); 796*e8d8bef9SDimitry Andric } 797*e8d8bef9SDimitry Andric // Instead of casting everything to bytes for the vselect, cast to the 798*e8d8bef9SDimitry Andric // original value type. This will avoid complications with casting masks. 799*e8d8bef9SDimitry Andric // For example, in cases when the original mask applied to i32, it could 800*e8d8bef9SDimitry Andric // be converted to a mask applicable to i8 via pred_typecast intrinsic, 801*e8d8bef9SDimitry Andric // but if the mask is not exactly of HVX length, extra handling would be 802*e8d8bef9SDimitry Andric // needed to make it work. 803*e8d8bef9SDimitry Andric Type *ValTy = getPayload(B.Seg.Val)->getType(); 804*e8d8bef9SDimitry Andric Value *Cast = Builder.CreateBitCast(Accum, ValTy); 805*e8d8bef9SDimitry Andric Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast, 806*e8d8bef9SDimitry Andric getPassThrough(B.Seg.Val)); 807*e8d8bef9SDimitry Andric B.Seg.Val->replaceAllUsesWith(Sel); 808*e8d8bef9SDimitry Andric } 809*e8d8bef9SDimitry Andric } else { 810*e8d8bef9SDimitry Andric // Stores. 811*e8d8bef9SDimitry Andric ByteSpan ASpanV, ASpanM; 812*e8d8bef9SDimitry Andric 813*e8d8bef9SDimitry Andric // Return a vector value corresponding to the input value Val: 814*e8d8bef9SDimitry Andric // either <1 x Val> for scalar Val, or Val itself for vector Val. 815*e8d8bef9SDimitry Andric auto MakeVec = [](IRBuilder<> &Builder, Value *Val) -> Value * { 816*e8d8bef9SDimitry Andric Type *Ty = Val->getType(); 817*e8d8bef9SDimitry Andric if (Ty->isVectorTy()) 818*e8d8bef9SDimitry Andric return Val; 819*e8d8bef9SDimitry Andric auto *VecTy = VectorType::get(Ty, 1, /*Scalable*/ false); 820*e8d8bef9SDimitry Andric return Builder.CreateBitCast(Val, VecTy); 821*e8d8bef9SDimitry Andric }; 822*e8d8bef9SDimitry Andric 823*e8d8bef9SDimitry Andric // Create an extra "undef" sector at the beginning and at the end. 824*e8d8bef9SDimitry Andric // They will be used as the left/right filler in the vlalign step. 825*e8d8bef9SDimitry Andric for (int i = -1; i != NumSectors + 1; ++i) { 826*e8d8bef9SDimitry Andric // For stores, the size of each section is an aligned vector length. 827*e8d8bef9SDimitry Andric // Adjust the store offsets relative to the section start offset. 828*e8d8bef9SDimitry Andric ByteSpan Section = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen); 829*e8d8bef9SDimitry Andric Value *AccumV = UndefValue::get(SecTy); 830*e8d8bef9SDimitry Andric Value *AccumM = HVC.getNullValue(SecTy); 831*e8d8bef9SDimitry Andric for (ByteSpan::Block &S : Section) { 832*e8d8bef9SDimitry Andric Value *Pay = getPayload(S.Seg.Val); 833*e8d8bef9SDimitry Andric Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)), 834*e8d8bef9SDimitry Andric Pay->getType(), HVC.getByteTy()); 835*e8d8bef9SDimitry Andric AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask), 836*e8d8bef9SDimitry Andric S.Seg.Start, S.Seg.Size, S.Pos); 837*e8d8bef9SDimitry Andric AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay), 838*e8d8bef9SDimitry Andric S.Seg.Start, S.Seg.Size, S.Pos); 839*e8d8bef9SDimitry Andric } 840*e8d8bef9SDimitry Andric ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen); 841*e8d8bef9SDimitry Andric ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen); 842*e8d8bef9SDimitry Andric } 843*e8d8bef9SDimitry Andric 844*e8d8bef9SDimitry Andric // vlalign 845*e8d8bef9SDimitry Andric for (int j = 1; j != NumSectors + 2; ++j) { 846*e8d8bef9SDimitry Andric ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanV[j - 1].Seg.Val, 847*e8d8bef9SDimitry Andric ASpanV[j].Seg.Val, AlignVal); 848*e8d8bef9SDimitry Andric ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanM[j - 1].Seg.Val, 849*e8d8bef9SDimitry Andric ASpanM[j].Seg.Val, AlignVal); 850*e8d8bef9SDimitry Andric } 851*e8d8bef9SDimitry Andric 852*e8d8bef9SDimitry Andric for (int i = 0; i != NumSectors + 1; ++i) { 853*e8d8bef9SDimitry Andric Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen); 854*e8d8bef9SDimitry Andric Value *Val = ASpanV[i].Seg.Val; 855*e8d8bef9SDimitry Andric Value *Mask = ASpanM[i].Seg.Val; // bytes 856*e8d8bef9SDimitry Andric if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) 857*e8d8bef9SDimitry Andric createAlignedStore(Builder, Val, Ptr, ScLen, HVC.vlsb(Builder, Mask)); 858*e8d8bef9SDimitry Andric } 859*e8d8bef9SDimitry Andric } 860*e8d8bef9SDimitry Andric 861*e8d8bef9SDimitry Andric for (auto *Inst : Move.Main) 862*e8d8bef9SDimitry Andric Inst->eraseFromParent(); 863*e8d8bef9SDimitry Andric 864*e8d8bef9SDimitry Andric return true; 865*e8d8bef9SDimitry Andric } 866*e8d8bef9SDimitry Andric 867*e8d8bef9SDimitry Andric auto AlignVectors::run() -> bool { 868*e8d8bef9SDimitry Andric if (!createAddressGroups()) 869*e8d8bef9SDimitry Andric return false; 870*e8d8bef9SDimitry Andric 871*e8d8bef9SDimitry Andric bool Changed = false; 872*e8d8bef9SDimitry Andric MoveList LoadGroups, StoreGroups; 873*e8d8bef9SDimitry Andric 874*e8d8bef9SDimitry Andric for (auto &G : AddrGroups) { 875*e8d8bef9SDimitry Andric llvm::append_range(LoadGroups, createLoadGroups(G.second)); 876*e8d8bef9SDimitry Andric llvm::append_range(StoreGroups, createStoreGroups(G.second)); 877*e8d8bef9SDimitry Andric } 878*e8d8bef9SDimitry Andric 879*e8d8bef9SDimitry Andric for (auto &M : LoadGroups) 880*e8d8bef9SDimitry Andric Changed |= move(M); 881*e8d8bef9SDimitry Andric for (auto &M : StoreGroups) 882*e8d8bef9SDimitry Andric Changed |= move(M); 883*e8d8bef9SDimitry Andric 884*e8d8bef9SDimitry Andric for (auto &M : LoadGroups) 885*e8d8bef9SDimitry Andric Changed |= realignGroup(M); 886*e8d8bef9SDimitry Andric for (auto &M : StoreGroups) 887*e8d8bef9SDimitry Andric Changed |= realignGroup(M); 888*e8d8bef9SDimitry Andric 889*e8d8bef9SDimitry Andric return Changed; 890*e8d8bef9SDimitry Andric } 891*e8d8bef9SDimitry Andric 892*e8d8bef9SDimitry Andric // --- End AlignVectors 893*e8d8bef9SDimitry Andric 894*e8d8bef9SDimitry Andric auto HexagonVectorCombine::run() -> bool { 895*e8d8bef9SDimitry Andric if (!HST.useHVXOps()) 896*e8d8bef9SDimitry Andric return false; 897*e8d8bef9SDimitry Andric 898*e8d8bef9SDimitry Andric bool Changed = AlignVectors(*this).run(); 899*e8d8bef9SDimitry Andric return Changed; 900*e8d8bef9SDimitry Andric } 901*e8d8bef9SDimitry Andric 902*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getIntTy() const -> IntegerType * { 903*e8d8bef9SDimitry Andric return Type::getInt32Ty(F.getContext()); 904*e8d8bef9SDimitry Andric } 905*e8d8bef9SDimitry Andric 906*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * { 907*e8d8bef9SDimitry Andric assert(ElemCount >= 0); 908*e8d8bef9SDimitry Andric IntegerType *ByteTy = Type::getInt8Ty(F.getContext()); 909*e8d8bef9SDimitry Andric if (ElemCount == 0) 910*e8d8bef9SDimitry Andric return ByteTy; 911*e8d8bef9SDimitry Andric return VectorType::get(ByteTy, ElemCount, /*Scalable*/ false); 912*e8d8bef9SDimitry Andric } 913*e8d8bef9SDimitry Andric 914*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * { 915*e8d8bef9SDimitry Andric assert(ElemCount >= 0); 916*e8d8bef9SDimitry Andric IntegerType *BoolTy = Type::getInt1Ty(F.getContext()); 917*e8d8bef9SDimitry Andric if (ElemCount == 0) 918*e8d8bef9SDimitry Andric return BoolTy; 919*e8d8bef9SDimitry Andric return VectorType::get(BoolTy, ElemCount, /*Scalable*/ false); 920*e8d8bef9SDimitry Andric } 921*e8d8bef9SDimitry Andric 922*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * { 923*e8d8bef9SDimitry Andric return ConstantInt::getSigned(getIntTy(), Val); 924*e8d8bef9SDimitry Andric } 925*e8d8bef9SDimitry Andric 926*e8d8bef9SDimitry Andric auto HexagonVectorCombine::isZero(const Value *Val) const -> bool { 927*e8d8bef9SDimitry Andric if (auto *C = dyn_cast<Constant>(Val)) 928*e8d8bef9SDimitry Andric return C->isZeroValue(); 929*e8d8bef9SDimitry Andric return false; 930*e8d8bef9SDimitry Andric } 931*e8d8bef9SDimitry Andric 932*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getIntValue(const Value *Val) const 933*e8d8bef9SDimitry Andric -> Optional<APInt> { 934*e8d8bef9SDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(Val)) 935*e8d8bef9SDimitry Andric return CI->getValue(); 936*e8d8bef9SDimitry Andric return None; 937*e8d8bef9SDimitry Andric } 938*e8d8bef9SDimitry Andric 939*e8d8bef9SDimitry Andric auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool { 940*e8d8bef9SDimitry Andric return isa<UndefValue>(Val); 941*e8d8bef9SDimitry Andric } 942*e8d8bef9SDimitry Andric 943*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getSizeOf(const Value *Val) const -> int { 944*e8d8bef9SDimitry Andric return getSizeOf(Val->getType()); 945*e8d8bef9SDimitry Andric } 946*e8d8bef9SDimitry Andric 947*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int { 948*e8d8bef9SDimitry Andric return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue(); 949*e8d8bef9SDimitry Andric } 950*e8d8bef9SDimitry Andric 951*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int { 952*e8d8bef9SDimitry Andric // The actual type may be shorter than the HVX vector, so determine 953*e8d8bef9SDimitry Andric // the alignment based on subtarget info. 954*e8d8bef9SDimitry Andric if (HST.isTypeForHVX(Ty)) 955*e8d8bef9SDimitry Andric return HST.getVectorLength(); 956*e8d8bef9SDimitry Andric return DL.getABITypeAlign(Ty).value(); 957*e8d8bef9SDimitry Andric } 958*e8d8bef9SDimitry Andric 959*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * { 960*e8d8bef9SDimitry Andric assert(Ty->isIntOrIntVectorTy()); 961*e8d8bef9SDimitry Andric auto Zero = ConstantInt::get(Ty->getScalarType(), 0); 962*e8d8bef9SDimitry Andric if (auto *VecTy = dyn_cast<VectorType>(Ty)) 963*e8d8bef9SDimitry Andric return ConstantVector::getSplat(VecTy->getElementCount(), Zero); 964*e8d8bef9SDimitry Andric return Zero; 965*e8d8bef9SDimitry Andric } 966*e8d8bef9SDimitry Andric 967*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * { 968*e8d8bef9SDimitry Andric assert(Ty->isIntOrIntVectorTy()); 969*e8d8bef9SDimitry Andric auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1); 970*e8d8bef9SDimitry Andric if (auto *VecTy = dyn_cast<VectorType>(Ty)) 971*e8d8bef9SDimitry Andric return ConstantVector::getSplat(VecTy->getElementCount(), Minus1); 972*e8d8bef9SDimitry Andric return Minus1; 973*e8d8bef9SDimitry Andric } 974*e8d8bef9SDimitry Andric 975*e8d8bef9SDimitry Andric // Insert bytes [Start..Start+Length) of Src into Dst at byte Where. 976*e8d8bef9SDimitry Andric auto HexagonVectorCombine::insertb(IRBuilder<> &Builder, Value *Dst, Value *Src, 977*e8d8bef9SDimitry Andric int Start, int Length, int Where) const 978*e8d8bef9SDimitry Andric -> Value * { 979*e8d8bef9SDimitry Andric assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType())); 980*e8d8bef9SDimitry Andric int SrcLen = getSizeOf(Src); 981*e8d8bef9SDimitry Andric int DstLen = getSizeOf(Dst); 982*e8d8bef9SDimitry Andric assert(0 <= Start && Start + Length <= SrcLen); 983*e8d8bef9SDimitry Andric assert(0 <= Where && Where + Length <= DstLen); 984*e8d8bef9SDimitry Andric 985*e8d8bef9SDimitry Andric int P2Len = PowerOf2Ceil(SrcLen | DstLen); 986*e8d8bef9SDimitry Andric auto *Undef = UndefValue::get(getByteTy()); 987*e8d8bef9SDimitry Andric Value *P2Src = vresize(Builder, Src, P2Len, Undef); 988*e8d8bef9SDimitry Andric Value *P2Dst = vresize(Builder, Dst, P2Len, Undef); 989*e8d8bef9SDimitry Andric 990*e8d8bef9SDimitry Andric SmallVector<int, 256> SMask(P2Len); 991*e8d8bef9SDimitry Andric for (int i = 0; i != P2Len; ++i) { 992*e8d8bef9SDimitry Andric // If i is in [Where, Where+Length), pick Src[Start+(i-Where)]. 993*e8d8bef9SDimitry Andric // Otherwise, pick Dst[i]; 994*e8d8bef9SDimitry Andric SMask[i] = 995*e8d8bef9SDimitry Andric (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i; 996*e8d8bef9SDimitry Andric } 997*e8d8bef9SDimitry Andric 998*e8d8bef9SDimitry Andric Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask); 999*e8d8bef9SDimitry Andric return vresize(Builder, P2Insert, DstLen, Undef); 1000*e8d8bef9SDimitry Andric } 1001*e8d8bef9SDimitry Andric 1002*e8d8bef9SDimitry Andric auto HexagonVectorCombine::vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, 1003*e8d8bef9SDimitry Andric Value *Amt) const -> Value * { 1004*e8d8bef9SDimitry Andric assert(Lo->getType() == Hi->getType() && "Argument type mismatch"); 1005*e8d8bef9SDimitry Andric assert(isSectorTy(Hi->getType())); 1006*e8d8bef9SDimitry Andric if (isZero(Amt)) 1007*e8d8bef9SDimitry Andric return Hi; 1008*e8d8bef9SDimitry Andric int VecLen = getSizeOf(Hi); 1009*e8d8bef9SDimitry Andric if (auto IntAmt = getIntValue(Amt)) 1010*e8d8bef9SDimitry Andric return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(), 1011*e8d8bef9SDimitry Andric VecLen); 1012*e8d8bef9SDimitry Andric 1013*e8d8bef9SDimitry Andric if (HST.isTypeForHVX(Hi->getType())) { 1014*e8d8bef9SDimitry Andric int HwLen = HST.getVectorLength(); 1015*e8d8bef9SDimitry Andric assert(VecLen == HwLen && "Expecting an exact HVX type"); 1016*e8d8bef9SDimitry Andric Intrinsic::ID V6_vlalignb = HwLen == 64 1017*e8d8bef9SDimitry Andric ? Intrinsic::hexagon_V6_vlalignb 1018*e8d8bef9SDimitry Andric : Intrinsic::hexagon_V6_vlalignb_128B; 1019*e8d8bef9SDimitry Andric return createHvxIntrinsic(Builder, V6_vlalignb, Hi->getType(), 1020*e8d8bef9SDimitry Andric {Hi, Lo, Amt}); 1021*e8d8bef9SDimitry Andric } 1022*e8d8bef9SDimitry Andric 1023*e8d8bef9SDimitry Andric if (VecLen == 4) { 1024*e8d8bef9SDimitry Andric Value *Pair = concat(Builder, {Lo, Hi}); 1025*e8d8bef9SDimitry Andric Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32); 1026*e8d8bef9SDimitry Andric Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext())); 1027*e8d8bef9SDimitry Andric return Builder.CreateBitCast(Trunc, Hi->getType()); 1028*e8d8bef9SDimitry Andric } 1029*e8d8bef9SDimitry Andric if (VecLen == 8) { 1030*e8d8bef9SDimitry Andric Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt); 1031*e8d8bef9SDimitry Andric return vralignb(Builder, Lo, Hi, Sub); 1032*e8d8bef9SDimitry Andric } 1033*e8d8bef9SDimitry Andric llvm_unreachable("Unexpected vector length"); 1034*e8d8bef9SDimitry Andric } 1035*e8d8bef9SDimitry Andric 1036*e8d8bef9SDimitry Andric auto HexagonVectorCombine::vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, 1037*e8d8bef9SDimitry Andric Value *Amt) const -> Value * { 1038*e8d8bef9SDimitry Andric assert(Lo->getType() == Hi->getType() && "Argument type mismatch"); 1039*e8d8bef9SDimitry Andric assert(isSectorTy(Lo->getType())); 1040*e8d8bef9SDimitry Andric if (isZero(Amt)) 1041*e8d8bef9SDimitry Andric return Lo; 1042*e8d8bef9SDimitry Andric int VecLen = getSizeOf(Lo); 1043*e8d8bef9SDimitry Andric if (auto IntAmt = getIntValue(Amt)) 1044*e8d8bef9SDimitry Andric return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen); 1045*e8d8bef9SDimitry Andric 1046*e8d8bef9SDimitry Andric if (HST.isTypeForHVX(Lo->getType())) { 1047*e8d8bef9SDimitry Andric int HwLen = HST.getVectorLength(); 1048*e8d8bef9SDimitry Andric assert(VecLen == HwLen && "Expecting an exact HVX type"); 1049*e8d8bef9SDimitry Andric Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb 1050*e8d8bef9SDimitry Andric : Intrinsic::hexagon_V6_valignb_128B; 1051*e8d8bef9SDimitry Andric return createHvxIntrinsic(Builder, V6_valignb, Lo->getType(), 1052*e8d8bef9SDimitry Andric {Hi, Lo, Amt}); 1053*e8d8bef9SDimitry Andric } 1054*e8d8bef9SDimitry Andric 1055*e8d8bef9SDimitry Andric if (VecLen == 4) { 1056*e8d8bef9SDimitry Andric Value *Pair = concat(Builder, {Lo, Hi}); 1057*e8d8bef9SDimitry Andric Value *Shift = Builder.CreateLShr(Pair, Amt); 1058*e8d8bef9SDimitry Andric Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext())); 1059*e8d8bef9SDimitry Andric return Builder.CreateBitCast(Trunc, Lo->getType()); 1060*e8d8bef9SDimitry Andric } 1061*e8d8bef9SDimitry Andric if (VecLen == 8) { 1062*e8d8bef9SDimitry Andric Type *Int64Ty = Type::getInt64Ty(F.getContext()); 1063*e8d8bef9SDimitry Andric Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty); 1064*e8d8bef9SDimitry Andric Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty); 1065*e8d8bef9SDimitry Andric Function *FI = Intrinsic::getDeclaration(F.getParent(), 1066*e8d8bef9SDimitry Andric Intrinsic::hexagon_S2_valignrb); 1067*e8d8bef9SDimitry Andric Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt}); 1068*e8d8bef9SDimitry Andric return Builder.CreateBitCast(Call, Lo->getType()); 1069*e8d8bef9SDimitry Andric } 1070*e8d8bef9SDimitry Andric llvm_unreachable("Unexpected vector length"); 1071*e8d8bef9SDimitry Andric } 1072*e8d8bef9SDimitry Andric 1073*e8d8bef9SDimitry Andric // Concatenates a sequence of vectors of the same type. 1074*e8d8bef9SDimitry Andric auto HexagonVectorCombine::concat(IRBuilder<> &Builder, 1075*e8d8bef9SDimitry Andric ArrayRef<Value *> Vecs) const -> Value * { 1076*e8d8bef9SDimitry Andric assert(!Vecs.empty()); 1077*e8d8bef9SDimitry Andric SmallVector<int, 256> SMask; 1078*e8d8bef9SDimitry Andric std::vector<Value *> Work[2]; 1079*e8d8bef9SDimitry Andric int ThisW = 0, OtherW = 1; 1080*e8d8bef9SDimitry Andric 1081*e8d8bef9SDimitry Andric Work[ThisW].assign(Vecs.begin(), Vecs.end()); 1082*e8d8bef9SDimitry Andric while (Work[ThisW].size() > 1) { 1083*e8d8bef9SDimitry Andric auto *Ty = cast<VectorType>(Work[ThisW].front()->getType()); 1084*e8d8bef9SDimitry Andric int ElemCount = Ty->getElementCount().getFixedValue(); 1085*e8d8bef9SDimitry Andric SMask.resize(ElemCount * 2); 1086*e8d8bef9SDimitry Andric std::iota(SMask.begin(), SMask.end(), 0); 1087*e8d8bef9SDimitry Andric 1088*e8d8bef9SDimitry Andric Work[OtherW].clear(); 1089*e8d8bef9SDimitry Andric if (Work[ThisW].size() % 2 != 0) 1090*e8d8bef9SDimitry Andric Work[ThisW].push_back(UndefValue::get(Ty)); 1091*e8d8bef9SDimitry Andric for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) { 1092*e8d8bef9SDimitry Andric Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i], 1093*e8d8bef9SDimitry Andric Work[ThisW][i + 1], SMask); 1094*e8d8bef9SDimitry Andric Work[OtherW].push_back(Joined); 1095*e8d8bef9SDimitry Andric } 1096*e8d8bef9SDimitry Andric std::swap(ThisW, OtherW); 1097*e8d8bef9SDimitry Andric } 1098*e8d8bef9SDimitry Andric 1099*e8d8bef9SDimitry Andric // Since there may have been some undefs appended to make shuffle operands 1100*e8d8bef9SDimitry Andric // have the same type, perform the last shuffle to only pick the original 1101*e8d8bef9SDimitry Andric // elements. 1102*e8d8bef9SDimitry Andric SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType())); 1103*e8d8bef9SDimitry Andric std::iota(SMask.begin(), SMask.end(), 0); 1104*e8d8bef9SDimitry Andric Value *Total = Work[OtherW].front(); 1105*e8d8bef9SDimitry Andric return Builder.CreateShuffleVector(Total, SMask); 1106*e8d8bef9SDimitry Andric } 1107*e8d8bef9SDimitry Andric 1108*e8d8bef9SDimitry Andric auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val, 1109*e8d8bef9SDimitry Andric int NewSize, Value *Pad) const -> Value * { 1110*e8d8bef9SDimitry Andric assert(isa<VectorType>(Val->getType())); 1111*e8d8bef9SDimitry Andric auto *ValTy = cast<VectorType>(Val->getType()); 1112*e8d8bef9SDimitry Andric assert(ValTy->getElementType() == Pad->getType()); 1113*e8d8bef9SDimitry Andric 1114*e8d8bef9SDimitry Andric int CurSize = ValTy->getElementCount().getFixedValue(); 1115*e8d8bef9SDimitry Andric if (CurSize == NewSize) 1116*e8d8bef9SDimitry Andric return Val; 1117*e8d8bef9SDimitry Andric // Truncate? 1118*e8d8bef9SDimitry Andric if (CurSize > NewSize) 1119*e8d8bef9SDimitry Andric return getElementRange(Builder, Val, /*Unused*/ Val, 0, NewSize); 1120*e8d8bef9SDimitry Andric // Extend. 1121*e8d8bef9SDimitry Andric SmallVector<int, 128> SMask(NewSize); 1122*e8d8bef9SDimitry Andric std::iota(SMask.begin(), SMask.begin() + CurSize, 0); 1123*e8d8bef9SDimitry Andric std::fill(SMask.begin() + CurSize, SMask.end(), CurSize); 1124*e8d8bef9SDimitry Andric Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad); 1125*e8d8bef9SDimitry Andric return Builder.CreateShuffleVector(Val, PadVec, SMask); 1126*e8d8bef9SDimitry Andric } 1127*e8d8bef9SDimitry Andric 1128*e8d8bef9SDimitry Andric auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask, 1129*e8d8bef9SDimitry Andric Type *FromTy, Type *ToTy) const -> Value * { 1130*e8d8bef9SDimitry Andric // Mask is a vector <N x i1>, where each element corresponds to an 1131*e8d8bef9SDimitry Andric // element of FromTy. Remap it so that each element will correspond 1132*e8d8bef9SDimitry Andric // to an element of ToTy. 1133*e8d8bef9SDimitry Andric assert(isa<VectorType>(Mask->getType())); 1134*e8d8bef9SDimitry Andric 1135*e8d8bef9SDimitry Andric Type *FromSTy = FromTy->getScalarType(); 1136*e8d8bef9SDimitry Andric Type *ToSTy = ToTy->getScalarType(); 1137*e8d8bef9SDimitry Andric if (FromSTy == ToSTy) 1138*e8d8bef9SDimitry Andric return Mask; 1139*e8d8bef9SDimitry Andric 1140*e8d8bef9SDimitry Andric int FromSize = getSizeOf(FromSTy); 1141*e8d8bef9SDimitry Andric int ToSize = getSizeOf(ToSTy); 1142*e8d8bef9SDimitry Andric assert(FromSize % ToSize == 0 || ToSize % FromSize == 0); 1143*e8d8bef9SDimitry Andric 1144*e8d8bef9SDimitry Andric auto *MaskTy = cast<VectorType>(Mask->getType()); 1145*e8d8bef9SDimitry Andric int FromCount = MaskTy->getElementCount().getFixedValue(); 1146*e8d8bef9SDimitry Andric int ToCount = (FromCount * FromSize) / ToSize; 1147*e8d8bef9SDimitry Andric assert((FromCount * FromSize) % ToSize == 0); 1148*e8d8bef9SDimitry Andric 1149*e8d8bef9SDimitry Andric // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> -> 1150*e8d8bef9SDimitry Andric // -> trunc to <M x i1>. 1151*e8d8bef9SDimitry Andric Value *Ext = Builder.CreateSExt( 1152*e8d8bef9SDimitry Andric Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false)); 1153*e8d8bef9SDimitry Andric Value *Cast = Builder.CreateBitCast( 1154*e8d8bef9SDimitry Andric Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false)); 1155*e8d8bef9SDimitry Andric return Builder.CreateTrunc( 1156*e8d8bef9SDimitry Andric Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false)); 1157*e8d8bef9SDimitry Andric } 1158*e8d8bef9SDimitry Andric 1159*e8d8bef9SDimitry Andric // Bitcast to bytes, and return least significant bits. 1160*e8d8bef9SDimitry Andric auto HexagonVectorCombine::vlsb(IRBuilder<> &Builder, Value *Val) const 1161*e8d8bef9SDimitry Andric -> Value * { 1162*e8d8bef9SDimitry Andric Type *ScalarTy = Val->getType()->getScalarType(); 1163*e8d8bef9SDimitry Andric if (ScalarTy == getBoolTy()) 1164*e8d8bef9SDimitry Andric return Val; 1165*e8d8bef9SDimitry Andric 1166*e8d8bef9SDimitry Andric Value *Bytes = vbytes(Builder, Val); 1167*e8d8bef9SDimitry Andric if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType())) 1168*e8d8bef9SDimitry Andric return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy))); 1169*e8d8bef9SDimitry Andric // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not 1170*e8d8bef9SDimitry Andric // <1 x i1>. 1171*e8d8bef9SDimitry Andric return Builder.CreateTrunc(Bytes, getBoolTy()); 1172*e8d8bef9SDimitry Andric } 1173*e8d8bef9SDimitry Andric 1174*e8d8bef9SDimitry Andric // Bitcast to bytes for non-bool. For bool, convert i1 -> i8. 1175*e8d8bef9SDimitry Andric auto HexagonVectorCombine::vbytes(IRBuilder<> &Builder, Value *Val) const 1176*e8d8bef9SDimitry Andric -> Value * { 1177*e8d8bef9SDimitry Andric Type *ScalarTy = Val->getType()->getScalarType(); 1178*e8d8bef9SDimitry Andric if (ScalarTy == getByteTy()) 1179*e8d8bef9SDimitry Andric return Val; 1180*e8d8bef9SDimitry Andric 1181*e8d8bef9SDimitry Andric if (ScalarTy != getBoolTy()) 1182*e8d8bef9SDimitry Andric return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val))); 1183*e8d8bef9SDimitry Andric // For bool, return a sext from i1 to i8. 1184*e8d8bef9SDimitry Andric if (auto *VecTy = dyn_cast<VectorType>(Val->getType())) 1185*e8d8bef9SDimitry Andric return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy)); 1186*e8d8bef9SDimitry Andric return Builder.CreateSExt(Val, getByteTy()); 1187*e8d8bef9SDimitry Andric } 1188*e8d8bef9SDimitry Andric 1189*e8d8bef9SDimitry Andric auto HexagonVectorCombine::createHvxIntrinsic(IRBuilder<> &Builder, 1190*e8d8bef9SDimitry Andric Intrinsic::ID IntID, Type *RetTy, 1191*e8d8bef9SDimitry Andric ArrayRef<Value *> Args) const 1192*e8d8bef9SDimitry Andric -> Value * { 1193*e8d8bef9SDimitry Andric int HwLen = HST.getVectorLength(); 1194*e8d8bef9SDimitry Andric Type *BoolTy = Type::getInt1Ty(F.getContext()); 1195*e8d8bef9SDimitry Andric Type *Int32Ty = Type::getInt32Ty(F.getContext()); 1196*e8d8bef9SDimitry Andric // HVX vector -> v16i32/v32i32 1197*e8d8bef9SDimitry Andric // HVX vector predicate -> v512i1/v1024i1 1198*e8d8bef9SDimitry Andric auto getTypeForIntrin = [&](Type *Ty) -> Type * { 1199*e8d8bef9SDimitry Andric if (HST.isTypeForHVX(Ty, /*IncludeBool*/ true)) { 1200*e8d8bef9SDimitry Andric Type *ElemTy = cast<VectorType>(Ty)->getElementType(); 1201*e8d8bef9SDimitry Andric if (ElemTy == Int32Ty) 1202*e8d8bef9SDimitry Andric return Ty; 1203*e8d8bef9SDimitry Andric if (ElemTy == BoolTy) 1204*e8d8bef9SDimitry Andric return VectorType::get(BoolTy, 8 * HwLen, /*Scalable*/ false); 1205*e8d8bef9SDimitry Andric return VectorType::get(Int32Ty, HwLen / 4, /*Scalable*/ false); 1206*e8d8bef9SDimitry Andric } 1207*e8d8bef9SDimitry Andric // Non-HVX type. It should be a scalar. 1208*e8d8bef9SDimitry Andric assert(Ty == Int32Ty || Ty->isIntegerTy(64)); 1209*e8d8bef9SDimitry Andric return Ty; 1210*e8d8bef9SDimitry Andric }; 1211*e8d8bef9SDimitry Andric 1212*e8d8bef9SDimitry Andric auto getCast = [&](IRBuilder<> &Builder, Value *Val, 1213*e8d8bef9SDimitry Andric Type *DestTy) -> Value * { 1214*e8d8bef9SDimitry Andric Type *SrcTy = Val->getType(); 1215*e8d8bef9SDimitry Andric if (SrcTy == DestTy) 1216*e8d8bef9SDimitry Andric return Val; 1217*e8d8bef9SDimitry Andric if (HST.isTypeForHVX(SrcTy, /*IncludeBool*/ true)) { 1218*e8d8bef9SDimitry Andric if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) { 1219*e8d8bef9SDimitry Andric // This should take care of casts the other way too, for example 1220*e8d8bef9SDimitry Andric // v1024i1 -> v32i1. 1221*e8d8bef9SDimitry Andric Intrinsic::ID TC = HwLen == 64 1222*e8d8bef9SDimitry Andric ? Intrinsic::hexagon_V6_pred_typecast 1223*e8d8bef9SDimitry Andric : Intrinsic::hexagon_V6_pred_typecast_128B; 1224*e8d8bef9SDimitry Andric Function *FI = Intrinsic::getDeclaration(F.getParent(), TC, 1225*e8d8bef9SDimitry Andric {DestTy, Val->getType()}); 1226*e8d8bef9SDimitry Andric return Builder.CreateCall(FI, {Val}); 1227*e8d8bef9SDimitry Andric } 1228*e8d8bef9SDimitry Andric // Non-predicate HVX vector. 1229*e8d8bef9SDimitry Andric return Builder.CreateBitCast(Val, DestTy); 1230*e8d8bef9SDimitry Andric } 1231*e8d8bef9SDimitry Andric // Non-HVX type. It should be a scalar, and it should already have 1232*e8d8bef9SDimitry Andric // a valid type. 1233*e8d8bef9SDimitry Andric llvm_unreachable("Unexpected type"); 1234*e8d8bef9SDimitry Andric }; 1235*e8d8bef9SDimitry Andric 1236*e8d8bef9SDimitry Andric SmallVector<Value *, 4> IntOps; 1237*e8d8bef9SDimitry Andric for (Value *A : Args) 1238*e8d8bef9SDimitry Andric IntOps.push_back(getCast(Builder, A, getTypeForIntrin(A->getType()))); 1239*e8d8bef9SDimitry Andric Function *FI = Intrinsic::getDeclaration(F.getParent(), IntID); 1240*e8d8bef9SDimitry Andric Value *Call = Builder.CreateCall(FI, IntOps); 1241*e8d8bef9SDimitry Andric 1242*e8d8bef9SDimitry Andric Type *CallTy = Call->getType(); 1243*e8d8bef9SDimitry Andric if (CallTy == RetTy) 1244*e8d8bef9SDimitry Andric return Call; 1245*e8d8bef9SDimitry Andric // Scalar types should have RetTy matching the call return type. 1246*e8d8bef9SDimitry Andric assert(HST.isTypeForHVX(CallTy, /*IncludeBool*/ true)); 1247*e8d8bef9SDimitry Andric if (cast<VectorType>(CallTy)->getElementType() == BoolTy) 1248*e8d8bef9SDimitry Andric return getCast(Builder, Call, RetTy); 1249*e8d8bef9SDimitry Andric return Builder.CreateBitCast(Call, RetTy); 1250*e8d8bef9SDimitry Andric } 1251*e8d8bef9SDimitry Andric 1252*e8d8bef9SDimitry Andric auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0, 1253*e8d8bef9SDimitry Andric Value *Ptr1) const 1254*e8d8bef9SDimitry Andric -> Optional<int> { 1255*e8d8bef9SDimitry Andric struct Builder : IRBuilder<> { 1256*e8d8bef9SDimitry Andric Builder(BasicBlock *B) : IRBuilder<>(B) {} 1257*e8d8bef9SDimitry Andric ~Builder() { 1258*e8d8bef9SDimitry Andric for (Instruction *I : llvm::reverse(ToErase)) 1259*e8d8bef9SDimitry Andric I->eraseFromParent(); 1260*e8d8bef9SDimitry Andric } 1261*e8d8bef9SDimitry Andric SmallVector<Instruction *, 8> ToErase; 1262*e8d8bef9SDimitry Andric }; 1263*e8d8bef9SDimitry Andric 1264*e8d8bef9SDimitry Andric #define CallBuilder(B, F) \ 1265*e8d8bef9SDimitry Andric [&](auto &B_) { \ 1266*e8d8bef9SDimitry Andric Value *V = B_.F; \ 1267*e8d8bef9SDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) \ 1268*e8d8bef9SDimitry Andric B_.ToErase.push_back(I); \ 1269*e8d8bef9SDimitry Andric return V; \ 1270*e8d8bef9SDimitry Andric }(B) 1271*e8d8bef9SDimitry Andric 1272*e8d8bef9SDimitry Andric auto Simplify = [&](Value *V) { 1273*e8d8bef9SDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) { 1274*e8d8bef9SDimitry Andric SimplifyQuery Q(DL, &TLI, &DT, &AC, I); 1275*e8d8bef9SDimitry Andric if (Value *S = SimplifyInstruction(I, Q)) 1276*e8d8bef9SDimitry Andric return S; 1277*e8d8bef9SDimitry Andric } 1278*e8d8bef9SDimitry Andric return V; 1279*e8d8bef9SDimitry Andric }; 1280*e8d8bef9SDimitry Andric 1281*e8d8bef9SDimitry Andric auto StripBitCast = [](Value *V) { 1282*e8d8bef9SDimitry Andric while (auto *C = dyn_cast<BitCastInst>(V)) 1283*e8d8bef9SDimitry Andric V = C->getOperand(0); 1284*e8d8bef9SDimitry Andric return V; 1285*e8d8bef9SDimitry Andric }; 1286*e8d8bef9SDimitry Andric 1287*e8d8bef9SDimitry Andric Ptr0 = StripBitCast(Ptr0); 1288*e8d8bef9SDimitry Andric Ptr1 = StripBitCast(Ptr1); 1289*e8d8bef9SDimitry Andric if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1)) 1290*e8d8bef9SDimitry Andric return None; 1291*e8d8bef9SDimitry Andric 1292*e8d8bef9SDimitry Andric auto *Gep0 = cast<GetElementPtrInst>(Ptr0); 1293*e8d8bef9SDimitry Andric auto *Gep1 = cast<GetElementPtrInst>(Ptr1); 1294*e8d8bef9SDimitry Andric if (Gep0->getPointerOperand() != Gep1->getPointerOperand()) 1295*e8d8bef9SDimitry Andric return None; 1296*e8d8bef9SDimitry Andric 1297*e8d8bef9SDimitry Andric Builder B(Gep0->getParent()); 1298*e8d8bef9SDimitry Andric Value *BasePtr = Gep0->getPointerOperand(); 1299*e8d8bef9SDimitry Andric int Scale = DL.getTypeStoreSize(BasePtr->getType()->getPointerElementType()); 1300*e8d8bef9SDimitry Andric 1301*e8d8bef9SDimitry Andric // FIXME: for now only check GEPs with a single index. 1302*e8d8bef9SDimitry Andric if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2) 1303*e8d8bef9SDimitry Andric return None; 1304*e8d8bef9SDimitry Andric 1305*e8d8bef9SDimitry Andric Value *Idx0 = Gep0->getOperand(1); 1306*e8d8bef9SDimitry Andric Value *Idx1 = Gep1->getOperand(1); 1307*e8d8bef9SDimitry Andric 1308*e8d8bef9SDimitry Andric // First, try to simplify the subtraction directly. 1309*e8d8bef9SDimitry Andric if (auto *Diff = dyn_cast<ConstantInt>( 1310*e8d8bef9SDimitry Andric Simplify(CallBuilder(B, CreateSub(Idx0, Idx1))))) 1311*e8d8bef9SDimitry Andric return Diff->getSExtValue() * Scale; 1312*e8d8bef9SDimitry Andric 1313*e8d8bef9SDimitry Andric KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT); 1314*e8d8bef9SDimitry Andric KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT); 1315*e8d8bef9SDimitry Andric APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One); 1316*e8d8bef9SDimitry Andric if (Unknown.isAllOnesValue()) 1317*e8d8bef9SDimitry Andric return None; 1318*e8d8bef9SDimitry Andric 1319*e8d8bef9SDimitry Andric Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown); 1320*e8d8bef9SDimitry Andric Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU))); 1321*e8d8bef9SDimitry Andric Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU))); 1322*e8d8bef9SDimitry Andric Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1))); 1323*e8d8bef9SDimitry Andric int Diff0 = 0; 1324*e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(SubU)) { 1325*e8d8bef9SDimitry Andric Diff0 = C->getSExtValue(); 1326*e8d8bef9SDimitry Andric } else { 1327*e8d8bef9SDimitry Andric return None; 1328*e8d8bef9SDimitry Andric } 1329*e8d8bef9SDimitry Andric 1330*e8d8bef9SDimitry Andric Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown); 1331*e8d8bef9SDimitry Andric Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK))); 1332*e8d8bef9SDimitry Andric Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK))); 1333*e8d8bef9SDimitry Andric Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1))); 1334*e8d8bef9SDimitry Andric int Diff1 = 0; 1335*e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(SubK)) { 1336*e8d8bef9SDimitry Andric Diff1 = C->getSExtValue(); 1337*e8d8bef9SDimitry Andric } else { 1338*e8d8bef9SDimitry Andric return None; 1339*e8d8bef9SDimitry Andric } 1340*e8d8bef9SDimitry Andric 1341*e8d8bef9SDimitry Andric return (Diff0 + Diff1) * Scale; 1342*e8d8bef9SDimitry Andric 1343*e8d8bef9SDimitry Andric #undef CallBuilder 1344*e8d8bef9SDimitry Andric } 1345*e8d8bef9SDimitry Andric 1346*e8d8bef9SDimitry Andric template <typename T> 1347*e8d8bef9SDimitry Andric auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In, 1348*e8d8bef9SDimitry Andric BasicBlock::const_iterator To, 1349*e8d8bef9SDimitry Andric const T &Ignore) const 1350*e8d8bef9SDimitry Andric -> bool { 1351*e8d8bef9SDimitry Andric auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> { 1352*e8d8bef9SDimitry Andric if (const auto *II = dyn_cast<IntrinsicInst>(&I)) { 1353*e8d8bef9SDimitry Andric switch (II->getIntrinsicID()) { 1354*e8d8bef9SDimitry Andric case Intrinsic::masked_load: 1355*e8d8bef9SDimitry Andric return MemoryLocation::getForArgument(II, 0, TLI); 1356*e8d8bef9SDimitry Andric case Intrinsic::masked_store: 1357*e8d8bef9SDimitry Andric return MemoryLocation::getForArgument(II, 1, TLI); 1358*e8d8bef9SDimitry Andric } 1359*e8d8bef9SDimitry Andric } 1360*e8d8bef9SDimitry Andric return MemoryLocation::getOrNone(&I); 1361*e8d8bef9SDimitry Andric }; 1362*e8d8bef9SDimitry Andric 1363*e8d8bef9SDimitry Andric // The source and the destination must be in the same basic block. 1364*e8d8bef9SDimitry Andric const BasicBlock &Block = *In.getParent(); 1365*e8d8bef9SDimitry Andric assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block); 1366*e8d8bef9SDimitry Andric // No PHIs. 1367*e8d8bef9SDimitry Andric if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To))) 1368*e8d8bef9SDimitry Andric return false; 1369*e8d8bef9SDimitry Andric 1370*e8d8bef9SDimitry Andric if (!mayBeMemoryDependent(In)) 1371*e8d8bef9SDimitry Andric return true; 1372*e8d8bef9SDimitry Andric bool MayWrite = In.mayWriteToMemory(); 1373*e8d8bef9SDimitry Andric auto MaybeLoc = getLocOrNone(In); 1374*e8d8bef9SDimitry Andric 1375*e8d8bef9SDimitry Andric auto From = In.getIterator(); 1376*e8d8bef9SDimitry Andric if (From == To) 1377*e8d8bef9SDimitry Andric return true; 1378*e8d8bef9SDimitry Andric bool MoveUp = (To != Block.end() && To->comesBefore(&In)); 1379*e8d8bef9SDimitry Andric auto Range = 1380*e8d8bef9SDimitry Andric MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To); 1381*e8d8bef9SDimitry Andric for (auto It = Range.first; It != Range.second; ++It) { 1382*e8d8bef9SDimitry Andric const Instruction &I = *It; 1383*e8d8bef9SDimitry Andric if (llvm::is_contained(Ignore, &I)) 1384*e8d8bef9SDimitry Andric continue; 1385*e8d8bef9SDimitry Andric // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp. 1386*e8d8bef9SDimitry Andric if (I.mayThrow()) 1387*e8d8bef9SDimitry Andric return false; 1388*e8d8bef9SDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) { 1389*e8d8bef9SDimitry Andric if (!CB->hasFnAttr(Attribute::WillReturn)) 1390*e8d8bef9SDimitry Andric return false; 1391*e8d8bef9SDimitry Andric if (!CB->hasFnAttr(Attribute::NoSync)) 1392*e8d8bef9SDimitry Andric return false; 1393*e8d8bef9SDimitry Andric } 1394*e8d8bef9SDimitry Andric if (I.mayReadOrWriteMemory()) { 1395*e8d8bef9SDimitry Andric auto MaybeLocI = getLocOrNone(I); 1396*e8d8bef9SDimitry Andric if (MayWrite || I.mayWriteToMemory()) { 1397*e8d8bef9SDimitry Andric if (!MaybeLoc || !MaybeLocI) 1398*e8d8bef9SDimitry Andric return false; 1399*e8d8bef9SDimitry Andric if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI)) 1400*e8d8bef9SDimitry Andric return false; 1401*e8d8bef9SDimitry Andric } 1402*e8d8bef9SDimitry Andric } 1403*e8d8bef9SDimitry Andric } 1404*e8d8bef9SDimitry Andric return true; 1405*e8d8bef9SDimitry Andric } 1406*e8d8bef9SDimitry Andric 1407*e8d8bef9SDimitry Andric #ifndef NDEBUG 1408*e8d8bef9SDimitry Andric auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool { 1409*e8d8bef9SDimitry Andric if (auto *VecTy = dyn_cast<VectorType>(Ty)) 1410*e8d8bef9SDimitry Andric return VecTy->getElementType() == getByteTy(); 1411*e8d8bef9SDimitry Andric return false; 1412*e8d8bef9SDimitry Andric } 1413*e8d8bef9SDimitry Andric 1414*e8d8bef9SDimitry Andric auto HexagonVectorCombine::isSectorTy(Type *Ty) const -> bool { 1415*e8d8bef9SDimitry Andric if (!isByteVecTy(Ty)) 1416*e8d8bef9SDimitry Andric return false; 1417*e8d8bef9SDimitry Andric int Size = getSizeOf(Ty); 1418*e8d8bef9SDimitry Andric if (HST.isTypeForHVX(Ty)) 1419*e8d8bef9SDimitry Andric return Size == static_cast<int>(HST.getVectorLength()); 1420*e8d8bef9SDimitry Andric return Size == 4 || Size == 8; 1421*e8d8bef9SDimitry Andric } 1422*e8d8bef9SDimitry Andric #endif 1423*e8d8bef9SDimitry Andric 1424*e8d8bef9SDimitry Andric auto HexagonVectorCombine::getElementRange(IRBuilder<> &Builder, Value *Lo, 1425*e8d8bef9SDimitry Andric Value *Hi, int Start, 1426*e8d8bef9SDimitry Andric int Length) const -> Value * { 1427*e8d8bef9SDimitry Andric assert(0 <= Start && Start < Length); 1428*e8d8bef9SDimitry Andric SmallVector<int, 128> SMask(Length); 1429*e8d8bef9SDimitry Andric std::iota(SMask.begin(), SMask.end(), Start); 1430*e8d8bef9SDimitry Andric return Builder.CreateShuffleVector(Lo, Hi, SMask); 1431*e8d8bef9SDimitry Andric } 1432*e8d8bef9SDimitry Andric 1433*e8d8bef9SDimitry Andric // Pass management. 1434*e8d8bef9SDimitry Andric 1435*e8d8bef9SDimitry Andric namespace llvm { 1436*e8d8bef9SDimitry Andric void initializeHexagonVectorCombineLegacyPass(PassRegistry &); 1437*e8d8bef9SDimitry Andric FunctionPass *createHexagonVectorCombineLegacyPass(); 1438*e8d8bef9SDimitry Andric } // namespace llvm 1439*e8d8bef9SDimitry Andric 1440*e8d8bef9SDimitry Andric namespace { 1441*e8d8bef9SDimitry Andric class HexagonVectorCombineLegacy : public FunctionPass { 1442*e8d8bef9SDimitry Andric public: 1443*e8d8bef9SDimitry Andric static char ID; 1444*e8d8bef9SDimitry Andric 1445*e8d8bef9SDimitry Andric HexagonVectorCombineLegacy() : FunctionPass(ID) {} 1446*e8d8bef9SDimitry Andric 1447*e8d8bef9SDimitry Andric StringRef getPassName() const override { return "Hexagon Vector Combine"; } 1448*e8d8bef9SDimitry Andric 1449*e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1450*e8d8bef9SDimitry Andric AU.setPreservesCFG(); 1451*e8d8bef9SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 1452*e8d8bef9SDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 1453*e8d8bef9SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 1454*e8d8bef9SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 1455*e8d8bef9SDimitry Andric AU.addRequired<TargetPassConfig>(); 1456*e8d8bef9SDimitry Andric FunctionPass::getAnalysisUsage(AU); 1457*e8d8bef9SDimitry Andric } 1458*e8d8bef9SDimitry Andric 1459*e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override { 1460*e8d8bef9SDimitry Andric AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1461*e8d8bef9SDimitry Andric AssumptionCache &AC = 1462*e8d8bef9SDimitry Andric getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 1463*e8d8bef9SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1464*e8d8bef9SDimitry Andric TargetLibraryInfo &TLI = 1465*e8d8bef9SDimitry Andric getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1466*e8d8bef9SDimitry Andric auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>(); 1467*e8d8bef9SDimitry Andric HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM); 1468*e8d8bef9SDimitry Andric return HVC.run(); 1469*e8d8bef9SDimitry Andric } 1470*e8d8bef9SDimitry Andric }; 1471*e8d8bef9SDimitry Andric } // namespace 1472*e8d8bef9SDimitry Andric 1473*e8d8bef9SDimitry Andric char HexagonVectorCombineLegacy::ID = 0; 1474*e8d8bef9SDimitry Andric 1475*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE, 1476*e8d8bef9SDimitry Andric "Hexagon Vector Combine", false, false) 1477*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1478*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1479*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1480*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 1481*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 1482*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE, 1483*e8d8bef9SDimitry Andric "Hexagon Vector Combine", false, false) 1484*e8d8bef9SDimitry Andric 1485*e8d8bef9SDimitry Andric FunctionPass *llvm::createHexagonVectorCombineLegacyPass() { 1486*e8d8bef9SDimitry Andric return new HexagonVectorCombineLegacy(); 1487*e8d8bef9SDimitry Andric } 1488