10b57cec5SDimitry Andric //===- Scalarizer.cpp - Scalarize vector operations -----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 906c3fb27SDimitry Andric // This pass converts vector operations into scalar operations (or, optionally, 1006c3fb27SDimitry Andric // operations on smaller vector widths), in order to expose optimization 1106c3fb27SDimitry Andric // opportunities on the individual scalar operations. 120b57cec5SDimitry Andric // It is mainly intended for targets that do not have vector units, but it 130b57cec5SDimitry Andric // may also be useful for revectorizing code to different vector widths. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 17480093f4SDimitry Andric #include "llvm/Transforms/Scalar/Scalarizer.h" 180b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 200b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h" 220b57cec5SDimitry Andric #include "llvm/IR/Argument.h" 230b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 240b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 250b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 260b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 275ffd83dbSDimitry Andric #include "llvm/IR/Dominators.h" 280b57cec5SDimitry Andric #include "llvm/IR/Function.h" 290b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 300b57cec5SDimitry Andric #include "llvm/IR/InstVisitor.h" 310b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 320b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 330b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 340b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 350b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 360b57cec5SDimitry Andric #include "llvm/IR/Module.h" 370b57cec5SDimitry Andric #include "llvm/IR/Type.h" 380b57cec5SDimitry Andric #include "llvm/IR/Value.h" 390b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 40480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 415ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/Local.h" 420b57cec5SDimitry Andric #include <cassert> 430b57cec5SDimitry Andric #include <cstdint> 440b57cec5SDimitry Andric #include <iterator> 450b57cec5SDimitry Andric #include <map> 460b57cec5SDimitry Andric #include <utility> 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric using namespace llvm; 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric #define DEBUG_TYPE "scalarizer" 510b57cec5SDimitry Andric 5281ad6265SDimitry Andric static cl::opt<bool> ClScalarizeVariableInsertExtract( 535ffd83dbSDimitry Andric "scalarize-variable-insert-extract", cl::init(true), cl::Hidden, 545ffd83dbSDimitry Andric cl::desc("Allow the scalarizer pass to scalarize " 555ffd83dbSDimitry Andric "insertelement/extractelement with variable index")); 565ffd83dbSDimitry Andric 570b57cec5SDimitry Andric // This is disabled by default because having separate loads and stores 580b57cec5SDimitry Andric // makes it more likely that the -combiner-alias-analysis limits will be 590b57cec5SDimitry Andric // reached. 6081ad6265SDimitry Andric static cl::opt<bool> ClScalarizeLoadStore( 6181ad6265SDimitry Andric "scalarize-load-store", cl::init(false), cl::Hidden, 620b57cec5SDimitry Andric cl::desc("Allow the scalarizer pass to scalarize loads and store")); 630b57cec5SDimitry Andric 6406c3fb27SDimitry Andric // Split vectors larger than this size into fragments, where each fragment is 6506c3fb27SDimitry Andric // either a vector no larger than this size or a scalar. 6606c3fb27SDimitry Andric // 6706c3fb27SDimitry Andric // Instructions with operands or results of different sizes that would be split 6806c3fb27SDimitry Andric // into a different number of fragments are currently left as-is. 6906c3fb27SDimitry Andric static cl::opt<unsigned> ClScalarizeMinBits( 7006c3fb27SDimitry Andric "scalarize-min-bits", cl::init(0), cl::Hidden, 7106c3fb27SDimitry Andric cl::desc("Instruct the scalarizer pass to attempt to keep values of a " 7206c3fb27SDimitry Andric "minimum number of bits")); 7306c3fb27SDimitry Andric 740b57cec5SDimitry Andric namespace { 750b57cec5SDimitry Andric 76349cc55cSDimitry Andric BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { 77349cc55cSDimitry Andric BasicBlock *BB = Itr->getParent(); 78349cc55cSDimitry Andric if (isa<PHINode>(Itr)) 79349cc55cSDimitry Andric Itr = BB->getFirstInsertionPt(); 80349cc55cSDimitry Andric if (Itr != BB->end()) 81349cc55cSDimitry Andric Itr = skipDebugIntrinsics(Itr); 82349cc55cSDimitry Andric return Itr; 83349cc55cSDimitry Andric } 84349cc55cSDimitry Andric 850b57cec5SDimitry Andric // Used to store the scattered form of a vector. 860b57cec5SDimitry Andric using ValueVector = SmallVector<Value *, 8>; 870b57cec5SDimitry Andric 88bdd1243dSDimitry Andric // Used to map a vector Value and associated type to its scattered form. 89bdd1243dSDimitry Andric // The associated type is only non-null for pointer values that are "scattered" 90bdd1243dSDimitry Andric // when used as pointer operands to load or store. 91bdd1243dSDimitry Andric // 92bdd1243dSDimitry Andric // We use std::map because we want iterators to persist across insertion and 93bdd1243dSDimitry Andric // because the values are relatively large. 94bdd1243dSDimitry Andric using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>; 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric // Lists Instructions that have been replaced with scalar implementations, 970b57cec5SDimitry Andric // along with a pointer to their scattered forms. 980b57cec5SDimitry Andric using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; 990b57cec5SDimitry Andric 10006c3fb27SDimitry Andric struct VectorSplit { 10106c3fb27SDimitry Andric // The type of the vector. 10206c3fb27SDimitry Andric FixedVectorType *VecTy = nullptr; 10306c3fb27SDimitry Andric 10406c3fb27SDimitry Andric // The number of elements packed in a fragment (other than the remainder). 10506c3fb27SDimitry Andric unsigned NumPacked = 0; 10606c3fb27SDimitry Andric 10706c3fb27SDimitry Andric // The number of fragments (scalars or smaller vectors) into which the vector 10806c3fb27SDimitry Andric // shall be split. 10906c3fb27SDimitry Andric unsigned NumFragments = 0; 11006c3fb27SDimitry Andric 11106c3fb27SDimitry Andric // The type of each complete fragment. 11206c3fb27SDimitry Andric Type *SplitTy = nullptr; 11306c3fb27SDimitry Andric 11406c3fb27SDimitry Andric // The type of the remainder (last) fragment; null if all fragments are 11506c3fb27SDimitry Andric // complete. 11606c3fb27SDimitry Andric Type *RemainderTy = nullptr; 11706c3fb27SDimitry Andric 11806c3fb27SDimitry Andric Type *getFragmentType(unsigned I) const { 11906c3fb27SDimitry Andric return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy; 12006c3fb27SDimitry Andric } 12106c3fb27SDimitry Andric }; 12206c3fb27SDimitry Andric 1230b57cec5SDimitry Andric // Provides a very limited vector-like interface for lazily accessing one 1240b57cec5SDimitry Andric // component of a scattered vector or vector pointer. 1250b57cec5SDimitry Andric class Scatterer { 1260b57cec5SDimitry Andric public: 1270b57cec5SDimitry Andric Scatterer() = default; 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric // Scatter V into Size components. If new instructions are needed, 1300b57cec5SDimitry Andric // insert them before BBI in BB. If Cache is nonnull, use it to cache 1310b57cec5SDimitry Andric // the results. 13206c3fb27SDimitry Andric Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 13306c3fb27SDimitry Andric const VectorSplit &VS, ValueVector *cachePtr = nullptr); 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric // Return component I, creating a new Value for it if necessary. 1360b57cec5SDimitry Andric Value *operator[](unsigned I); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric // Return the number of components. 13906c3fb27SDimitry Andric unsigned size() const { return VS.NumFragments; } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric private: 1420b57cec5SDimitry Andric BasicBlock *BB; 1430b57cec5SDimitry Andric BasicBlock::iterator BBI; 1440b57cec5SDimitry Andric Value *V; 14506c3fb27SDimitry Andric VectorSplit VS; 14606c3fb27SDimitry Andric bool IsPointer; 1470b57cec5SDimitry Andric ValueVector *CachePtr; 1480b57cec5SDimitry Andric ValueVector Tmp; 1490b57cec5SDimitry Andric }; 1500b57cec5SDimitry Andric 151bdd1243dSDimitry Andric // FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp 1520b57cec5SDimitry Andric // called Name that compares X and Y in the same way as FCI. 1530b57cec5SDimitry Andric struct FCmpSplitter { 1540b57cec5SDimitry Andric FCmpSplitter(FCmpInst &fci) : FCI(fci) {} 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 1570b57cec5SDimitry Andric const Twine &Name) const { 1580b57cec5SDimitry Andric return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric FCmpInst &FCI; 1620b57cec5SDimitry Andric }; 1630b57cec5SDimitry Andric 164bdd1243dSDimitry Andric // ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp 1650b57cec5SDimitry Andric // called Name that compares X and Y in the same way as ICI. 1660b57cec5SDimitry Andric struct ICmpSplitter { 1670b57cec5SDimitry Andric ICmpSplitter(ICmpInst &ici) : ICI(ici) {} 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 1700b57cec5SDimitry Andric const Twine &Name) const { 1710b57cec5SDimitry Andric return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric ICmpInst &ICI; 1750b57cec5SDimitry Andric }; 1760b57cec5SDimitry Andric 177bdd1243dSDimitry Andric // UnarySplitter(UO)(Builder, X, Name) uses Builder to create 1780b57cec5SDimitry Andric // a unary operator like UO called Name with operand X. 1790b57cec5SDimitry Andric struct UnarySplitter { 1800b57cec5SDimitry Andric UnarySplitter(UnaryOperator &uo) : UO(uo) {} 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const { 1830b57cec5SDimitry Andric return Builder.CreateUnOp(UO.getOpcode(), Op, Name); 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric UnaryOperator &UO; 1870b57cec5SDimitry Andric }; 1880b57cec5SDimitry Andric 189bdd1243dSDimitry Andric // BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create 1900b57cec5SDimitry Andric // a binary operator like BO called Name with operands X and Y. 1910b57cec5SDimitry Andric struct BinarySplitter { 1920b57cec5SDimitry Andric BinarySplitter(BinaryOperator &bo) : BO(bo) {} 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 1950b57cec5SDimitry Andric const Twine &Name) const { 1960b57cec5SDimitry Andric return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric BinaryOperator &BO; 2000b57cec5SDimitry Andric }; 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric // Information about a load or store that we're scalarizing. 2030b57cec5SDimitry Andric struct VectorLayout { 2040b57cec5SDimitry Andric VectorLayout() = default; 2050b57cec5SDimitry Andric 20606c3fb27SDimitry Andric // Return the alignment of fragment Frag. 20706c3fb27SDimitry Andric Align getFragmentAlign(unsigned Frag) { 20806c3fb27SDimitry Andric return commonAlignment(VecAlign, Frag * SplitSize); 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 21106c3fb27SDimitry Andric // The split of the underlying vector type. 21206c3fb27SDimitry Andric VectorSplit VS; 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric // The alignment of the vector. 2155ffd83dbSDimitry Andric Align VecAlign; 2160b57cec5SDimitry Andric 21706c3fb27SDimitry Andric // The size of each (non-remainder) fragment in bytes. 21806c3fb27SDimitry Andric uint64_t SplitSize = 0; 2190b57cec5SDimitry Andric }; 2200b57cec5SDimitry Andric 22106c3fb27SDimitry Andric /// Concatenate the given fragments to a single vector value of the type 22206c3fb27SDimitry Andric /// described in @p VS. 22306c3fb27SDimitry Andric static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments, 22406c3fb27SDimitry Andric const VectorSplit &VS, Twine Name) { 22506c3fb27SDimitry Andric unsigned NumElements = VS.VecTy->getNumElements(); 22606c3fb27SDimitry Andric SmallVector<int> ExtendMask; 22706c3fb27SDimitry Andric SmallVector<int> InsertMask; 22806c3fb27SDimitry Andric 22906c3fb27SDimitry Andric if (VS.NumPacked > 1) { 23006c3fb27SDimitry Andric // Prepare the shufflevector masks once and re-use them for all 23106c3fb27SDimitry Andric // fragments. 23206c3fb27SDimitry Andric ExtendMask.resize(NumElements, -1); 23306c3fb27SDimitry Andric for (unsigned I = 0; I < VS.NumPacked; ++I) 23406c3fb27SDimitry Andric ExtendMask[I] = I; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric InsertMask.resize(NumElements); 23706c3fb27SDimitry Andric for (unsigned I = 0; I < NumElements; ++I) 23806c3fb27SDimitry Andric InsertMask[I] = I; 23906c3fb27SDimitry Andric } 24006c3fb27SDimitry Andric 24106c3fb27SDimitry Andric Value *Res = PoisonValue::get(VS.VecTy); 24206c3fb27SDimitry Andric for (unsigned I = 0; I < VS.NumFragments; ++I) { 24306c3fb27SDimitry Andric Value *Fragment = Fragments[I]; 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric unsigned NumPacked = VS.NumPacked; 24606c3fb27SDimitry Andric if (I == VS.NumFragments - 1 && VS.RemainderTy) { 24706c3fb27SDimitry Andric if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy)) 24806c3fb27SDimitry Andric NumPacked = RemVecTy->getNumElements(); 24906c3fb27SDimitry Andric else 25006c3fb27SDimitry Andric NumPacked = 1; 25106c3fb27SDimitry Andric } 25206c3fb27SDimitry Andric 25306c3fb27SDimitry Andric if (NumPacked == 1) { 25406c3fb27SDimitry Andric Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked, 25506c3fb27SDimitry Andric Name + ".upto" + Twine(I)); 25606c3fb27SDimitry Andric } else { 25706c3fb27SDimitry Andric Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask); 25806c3fb27SDimitry Andric if (I == 0) { 25906c3fb27SDimitry Andric Res = Fragment; 26006c3fb27SDimitry Andric } else { 26106c3fb27SDimitry Andric for (unsigned J = 0; J < NumPacked; ++J) 26206c3fb27SDimitry Andric InsertMask[I * VS.NumPacked + J] = NumElements + J; 26306c3fb27SDimitry Andric Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask, 26406c3fb27SDimitry Andric Name + ".upto" + Twine(I)); 26506c3fb27SDimitry Andric for (unsigned J = 0; J < NumPacked; ++J) 26606c3fb27SDimitry Andric InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J; 26706c3fb27SDimitry Andric } 26806c3fb27SDimitry Andric } 26906c3fb27SDimitry Andric } 27006c3fb27SDimitry Andric 27106c3fb27SDimitry Andric return Res; 27206c3fb27SDimitry Andric } 27306c3fb27SDimitry Andric 27481ad6265SDimitry Andric template <typename T> 27581ad6265SDimitry Andric T getWithDefaultOverride(const cl::opt<T> &ClOption, 276bdd1243dSDimitry Andric const std::optional<T> &DefaultOverride) { 27781ad6265SDimitry Andric return ClOption.getNumOccurrences() ? ClOption 27881ad6265SDimitry Andric : DefaultOverride.value_or(ClOption); 27981ad6265SDimitry Andric } 28081ad6265SDimitry Andric 2810b57cec5SDimitry Andric class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> { 2820b57cec5SDimitry Andric public: 2835f757f3fSDimitry Andric ScalarizerVisitor(DominatorTree *DT, ScalarizerPassOptions Options) 2845f757f3fSDimitry Andric : DT(DT), ScalarizeVariableInsertExtract(getWithDefaultOverride( 2855f757f3fSDimitry Andric ClScalarizeVariableInsertExtract, 28681ad6265SDimitry Andric Options.ScalarizeVariableInsertExtract)), 28781ad6265SDimitry Andric ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore, 28806c3fb27SDimitry Andric Options.ScalarizeLoadStore)), 28906c3fb27SDimitry Andric ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits, 29006c3fb27SDimitry Andric Options.ScalarizeMinBits)) {} 2910b57cec5SDimitry Andric 2920b57cec5SDimitry Andric bool visit(Function &F); 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric // InstVisitor methods. They return true if the instruction was scalarized, 2950b57cec5SDimitry Andric // false if nothing changed. 2960b57cec5SDimitry Andric bool visitInstruction(Instruction &I) { return false; } 2970b57cec5SDimitry Andric bool visitSelectInst(SelectInst &SI); 2980b57cec5SDimitry Andric bool visitICmpInst(ICmpInst &ICI); 2990b57cec5SDimitry Andric bool visitFCmpInst(FCmpInst &FCI); 3000b57cec5SDimitry Andric bool visitUnaryOperator(UnaryOperator &UO); 3010b57cec5SDimitry Andric bool visitBinaryOperator(BinaryOperator &BO); 3020b57cec5SDimitry Andric bool visitGetElementPtrInst(GetElementPtrInst &GEPI); 3030b57cec5SDimitry Andric bool visitCastInst(CastInst &CI); 3040b57cec5SDimitry Andric bool visitBitCastInst(BitCastInst &BCI); 3055ffd83dbSDimitry Andric bool visitInsertElementInst(InsertElementInst &IEI); 3065ffd83dbSDimitry Andric bool visitExtractElementInst(ExtractElementInst &EEI); 3070b57cec5SDimitry Andric bool visitShuffleVectorInst(ShuffleVectorInst &SVI); 3080b57cec5SDimitry Andric bool visitPHINode(PHINode &PHI); 3090b57cec5SDimitry Andric bool visitLoadInst(LoadInst &LI); 3100b57cec5SDimitry Andric bool visitStoreInst(StoreInst &SI); 3110b57cec5SDimitry Andric bool visitCallInst(CallInst &ICI); 31206c3fb27SDimitry Andric bool visitFreezeInst(FreezeInst &FI); 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric private: 31506c3fb27SDimitry Andric Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS); 31606c3fb27SDimitry Andric void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS); 31781ad6265SDimitry Andric void replaceUses(Instruction *Op, Value *CV); 3180b57cec5SDimitry Andric bool canTransferMetadata(unsigned Kind); 3190b57cec5SDimitry Andric void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV); 32006c3fb27SDimitry Andric std::optional<VectorSplit> getVectorSplit(Type *Ty); 321bdd1243dSDimitry Andric std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment, 3220b57cec5SDimitry Andric const DataLayout &DL); 3230b57cec5SDimitry Andric bool finish(); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric template<typename T> bool splitUnary(Instruction &, const T &); 3260b57cec5SDimitry Andric template<typename T> bool splitBinary(Instruction &, const T &); 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric bool splitCall(CallInst &CI); 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric ScatterMap Scattered; 3310b57cec5SDimitry Andric GatherList Gathered; 33281ad6265SDimitry Andric bool Scalarized; 3330b57cec5SDimitry Andric 3345ffd83dbSDimitry Andric SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs; 3355ffd83dbSDimitry Andric 336480093f4SDimitry Andric DominatorTree *DT; 33781ad6265SDimitry Andric 33881ad6265SDimitry Andric const bool ScalarizeVariableInsertExtract; 33981ad6265SDimitry Andric const bool ScalarizeLoadStore; 34006c3fb27SDimitry Andric const unsigned ScalarizeMinBits; 3410b57cec5SDimitry Andric }; 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric } // end anonymous namespace 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 34606c3fb27SDimitry Andric const VectorSplit &VS, ValueVector *cachePtr) 34706c3fb27SDimitry Andric : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) { 34806c3fb27SDimitry Andric IsPointer = V->getType()->isPointerTy(); 34906c3fb27SDimitry Andric if (!CachePtr) { 35006c3fb27SDimitry Andric Tmp.resize(VS.NumFragments, nullptr); 35106c3fb27SDimitry Andric } else { 35206c3fb27SDimitry Andric assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() || 35306c3fb27SDimitry Andric IsPointer) && 35406c3fb27SDimitry Andric "Inconsistent vector sizes"); 35506c3fb27SDimitry Andric if (VS.NumFragments > CachePtr->size()) 35606c3fb27SDimitry Andric CachePtr->resize(VS.NumFragments, nullptr); 35781ad6265SDimitry Andric } 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 36006c3fb27SDimitry Andric // Return fragment Frag, creating a new Value for it if necessary. 36106c3fb27SDimitry Andric Value *Scatterer::operator[](unsigned Frag) { 36206c3fb27SDimitry Andric ValueVector &CV = CachePtr ? *CachePtr : Tmp; 3630b57cec5SDimitry Andric // Try to reuse a previous value. 36406c3fb27SDimitry Andric if (CV[Frag]) 36506c3fb27SDimitry Andric return CV[Frag]; 3660b57cec5SDimitry Andric IRBuilder<> Builder(BB, BBI); 36706c3fb27SDimitry Andric if (IsPointer) { 36806c3fb27SDimitry Andric if (Frag == 0) 36906c3fb27SDimitry Andric CV[Frag] = V; 37006c3fb27SDimitry Andric else 37106c3fb27SDimitry Andric CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag, 37206c3fb27SDimitry Andric V->getName() + ".i" + Twine(Frag)); 37306c3fb27SDimitry Andric return CV[Frag]; 3740b57cec5SDimitry Andric } 37506c3fb27SDimitry Andric 37606c3fb27SDimitry Andric Type *FragmentTy = VS.getFragmentType(Frag); 37706c3fb27SDimitry Andric 37806c3fb27SDimitry Andric if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) { 37906c3fb27SDimitry Andric SmallVector<int> Mask; 38006c3fb27SDimitry Andric for (unsigned J = 0; J < VecTy->getNumElements(); ++J) 38106c3fb27SDimitry Andric Mask.push_back(Frag * VS.NumPacked + J); 38206c3fb27SDimitry Andric CV[Frag] = 38306c3fb27SDimitry Andric Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, 38406c3fb27SDimitry Andric V->getName() + ".i" + Twine(Frag)); 3850b57cec5SDimitry Andric } else { 38606c3fb27SDimitry Andric // Search through a chain of InsertElementInsts looking for element Frag. 3870b57cec5SDimitry Andric // Record other elements in the cache. The new V is still suitable 3880b57cec5SDimitry Andric // for all uncached indices. 3890b57cec5SDimitry Andric while (true) { 3900b57cec5SDimitry Andric InsertElementInst *Insert = dyn_cast<InsertElementInst>(V); 3910b57cec5SDimitry Andric if (!Insert) 3920b57cec5SDimitry Andric break; 3930b57cec5SDimitry Andric ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2)); 3940b57cec5SDimitry Andric if (!Idx) 3950b57cec5SDimitry Andric break; 3960b57cec5SDimitry Andric unsigned J = Idx->getZExtValue(); 3970b57cec5SDimitry Andric V = Insert->getOperand(0); 39806c3fb27SDimitry Andric if (Frag * VS.NumPacked == J) { 39906c3fb27SDimitry Andric CV[Frag] = Insert->getOperand(1); 40006c3fb27SDimitry Andric return CV[Frag]; 40106c3fb27SDimitry Andric } 40206c3fb27SDimitry Andric 40306c3fb27SDimitry Andric if (VS.NumPacked == 1 && !CV[J]) { 4040b57cec5SDimitry Andric // Only cache the first entry we find for each index we're not actively 4050b57cec5SDimitry Andric // searching for. This prevents us from going too far up the chain and 4060b57cec5SDimitry Andric // caching incorrect entries. 4070b57cec5SDimitry Andric CV[J] = Insert->getOperand(1); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric } 41006c3fb27SDimitry Andric CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked, 41106c3fb27SDimitry Andric V->getName() + ".i" + Twine(Frag)); 4120b57cec5SDimitry Andric } 41306c3fb27SDimitry Andric 41406c3fb27SDimitry Andric return CV[Frag]; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric bool ScalarizerVisitor::visit(Function &F) { 4180b57cec5SDimitry Andric assert(Gathered.empty() && Scattered.empty()); 4190b57cec5SDimitry Andric 42081ad6265SDimitry Andric Scalarized = false; 42181ad6265SDimitry Andric 4220b57cec5SDimitry Andric // To ensure we replace gathered components correctly we need to do an ordered 4230b57cec5SDimitry Andric // traversal of the basic blocks in the function. 4240b57cec5SDimitry Andric ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock()); 4250b57cec5SDimitry Andric for (BasicBlock *BB : RPOT) { 4260b57cec5SDimitry Andric for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { 4270b57cec5SDimitry Andric Instruction *I = &*II; 4280b57cec5SDimitry Andric bool Done = InstVisitor::visit(I); 4290b57cec5SDimitry Andric ++II; 4300b57cec5SDimitry Andric if (Done && I->getType()->isVoidTy()) 4310b57cec5SDimitry Andric I->eraseFromParent(); 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric return finish(); 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric // Return a scattered form of V that can be accessed by Point. V must be a 4380b57cec5SDimitry Andric // vector or a pointer to a vector. 43981ad6265SDimitry Andric Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V, 44006c3fb27SDimitry Andric const VectorSplit &VS) { 4410b57cec5SDimitry Andric if (Argument *VArg = dyn_cast<Argument>(V)) { 4420b57cec5SDimitry Andric // Put the scattered form of arguments in the entry block, 4430b57cec5SDimitry Andric // so that it can be used everywhere. 4440b57cec5SDimitry Andric Function *F = VArg->getParent(); 4450b57cec5SDimitry Andric BasicBlock *BB = &F->getEntryBlock(); 44606c3fb27SDimitry Andric return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric if (Instruction *VOp = dyn_cast<Instruction>(V)) { 449480093f4SDimitry Andric // When scalarizing PHI nodes we might try to examine/rewrite InsertElement 450480093f4SDimitry Andric // nodes in predecessors. If those predecessors are unreachable from entry, 451480093f4SDimitry Andric // then the IR in those blocks could have unexpected properties resulting in 452480093f4SDimitry Andric // infinite loops in Scatterer::operator[]. By simply treating values 453480093f4SDimitry Andric // originating from instructions in unreachable blocks as undef we do not 454480093f4SDimitry Andric // need to analyse them further. 455480093f4SDimitry Andric if (!DT->isReachableFromEntry(VOp->getParent())) 456480093f4SDimitry Andric return Scatterer(Point->getParent(), Point->getIterator(), 45706c3fb27SDimitry Andric PoisonValue::get(V->getType()), VS); 4580b57cec5SDimitry Andric // Put the scattered form of an instruction directly after the 459349cc55cSDimitry Andric // instruction, skipping over PHI nodes and debug intrinsics. 4600b57cec5SDimitry Andric BasicBlock *BB = VOp->getParent(); 461349cc55cSDimitry Andric return Scatterer( 46206c3fb27SDimitry Andric BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS, 46306c3fb27SDimitry Andric &Scattered[{V, VS.SplitTy}]); 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric // In the fallback case, just put the scattered before Point and 4660b57cec5SDimitry Andric // keep the result local to Point. 46706c3fb27SDimitry Andric return Scatterer(Point->getParent(), Point->getIterator(), V, VS); 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric // Replace Op with the gathered form of the components in CV. Defer the 4710b57cec5SDimitry Andric // deletion of Op and creation of the gathered form to the end of the pass, 4720b57cec5SDimitry Andric // so that we can avoid creating the gathered form if all uses of Op are 4730b57cec5SDimitry Andric // replaced with uses of CV. 47406c3fb27SDimitry Andric void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV, 47506c3fb27SDimitry Andric const VectorSplit &VS) { 4760b57cec5SDimitry Andric transferMetadataAndIRFlags(Op, CV); 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric // If we already have a scattered form of Op (created from ExtractElements 4790b57cec5SDimitry Andric // of Op itself), replace them with the new form. 48006c3fb27SDimitry Andric ValueVector &SV = Scattered[{Op, VS.SplitTy}]; 4810b57cec5SDimitry Andric if (!SV.empty()) { 4820b57cec5SDimitry Andric for (unsigned I = 0, E = SV.size(); I != E; ++I) { 4830b57cec5SDimitry Andric Value *V = SV[I]; 4845ffd83dbSDimitry Andric if (V == nullptr || SV[I] == CV[I]) 4850b57cec5SDimitry Andric continue; 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric Instruction *Old = cast<Instruction>(V); 488e8d8bef9SDimitry Andric if (isa<Instruction>(CV[I])) 4890b57cec5SDimitry Andric CV[I]->takeName(Old); 4900b57cec5SDimitry Andric Old->replaceAllUsesWith(CV[I]); 4915ffd83dbSDimitry Andric PotentiallyDeadInstrs.emplace_back(Old); 4920b57cec5SDimitry Andric } 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric SV = CV; 4950b57cec5SDimitry Andric Gathered.push_back(GatherList::value_type(Op, &SV)); 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric 49881ad6265SDimitry Andric // Replace Op with CV and collect Op has a potentially dead instruction. 49981ad6265SDimitry Andric void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) { 50081ad6265SDimitry Andric if (CV != Op) { 50181ad6265SDimitry Andric Op->replaceAllUsesWith(CV); 50281ad6265SDimitry Andric PotentiallyDeadInstrs.emplace_back(Op); 50381ad6265SDimitry Andric Scalarized = true; 50481ad6265SDimitry Andric } 50581ad6265SDimitry Andric } 50681ad6265SDimitry Andric 5070b57cec5SDimitry Andric // Return true if it is safe to transfer the given metadata tag from 5080b57cec5SDimitry Andric // vector to scalar instructions. 5090b57cec5SDimitry Andric bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) { 5100b57cec5SDimitry Andric return (Tag == LLVMContext::MD_tbaa 5110b57cec5SDimitry Andric || Tag == LLVMContext::MD_fpmath 5120b57cec5SDimitry Andric || Tag == LLVMContext::MD_tbaa_struct 5130b57cec5SDimitry Andric || Tag == LLVMContext::MD_invariant_load 5140b57cec5SDimitry Andric || Tag == LLVMContext::MD_alias_scope 5150b57cec5SDimitry Andric || Tag == LLVMContext::MD_noalias 5165f757f3fSDimitry Andric || Tag == LLVMContext::MD_mem_parallel_loop_access 5170b57cec5SDimitry Andric || Tag == LLVMContext::MD_access_group); 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric // Transfer metadata from Op to the instructions in CV if it is known 5210b57cec5SDimitry Andric // to be safe to do so. 5220b57cec5SDimitry Andric void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op, 5230b57cec5SDimitry Andric const ValueVector &CV) { 5240b57cec5SDimitry Andric SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; 5250b57cec5SDimitry Andric Op->getAllMetadataOtherThanDebugLoc(MDs); 526*0fca6ea1SDimitry Andric for (Value *V : CV) { 527*0fca6ea1SDimitry Andric if (Instruction *New = dyn_cast<Instruction>(V)) { 5280b57cec5SDimitry Andric for (const auto &MD : MDs) 5290b57cec5SDimitry Andric if (canTransferMetadata(MD.first)) 5300b57cec5SDimitry Andric New->setMetadata(MD.first, MD.second); 5310b57cec5SDimitry Andric New->copyIRFlags(Op); 5320b57cec5SDimitry Andric if (Op->getDebugLoc() && !New->getDebugLoc()) 5330b57cec5SDimitry Andric New->setDebugLoc(Op->getDebugLoc()); 5340b57cec5SDimitry Andric } 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric } 5370b57cec5SDimitry Andric 53806c3fb27SDimitry Andric // Determine how Ty is split, if at all. 53906c3fb27SDimitry Andric std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) { 54006c3fb27SDimitry Andric VectorSplit Split; 54106c3fb27SDimitry Andric Split.VecTy = dyn_cast<FixedVectorType>(Ty); 54206c3fb27SDimitry Andric if (!Split.VecTy) 54306c3fb27SDimitry Andric return {}; 54406c3fb27SDimitry Andric 54506c3fb27SDimitry Andric unsigned NumElems = Split.VecTy->getNumElements(); 54606c3fb27SDimitry Andric Type *ElemTy = Split.VecTy->getElementType(); 54706c3fb27SDimitry Andric 54806c3fb27SDimitry Andric if (NumElems == 1 || ElemTy->isPointerTy() || 54906c3fb27SDimitry Andric 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) { 55006c3fb27SDimitry Andric Split.NumPacked = 1; 55106c3fb27SDimitry Andric Split.NumFragments = NumElems; 55206c3fb27SDimitry Andric Split.SplitTy = ElemTy; 55306c3fb27SDimitry Andric } else { 55406c3fb27SDimitry Andric Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits(); 55506c3fb27SDimitry Andric if (Split.NumPacked >= NumElems) 55606c3fb27SDimitry Andric return {}; 55706c3fb27SDimitry Andric 55806c3fb27SDimitry Andric Split.NumFragments = divideCeil(NumElems, Split.NumPacked); 55906c3fb27SDimitry Andric Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked); 56006c3fb27SDimitry Andric 56106c3fb27SDimitry Andric unsigned RemainderElems = NumElems % Split.NumPacked; 56206c3fb27SDimitry Andric if (RemainderElems > 1) 56306c3fb27SDimitry Andric Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems); 56406c3fb27SDimitry Andric else if (RemainderElems == 1) 56506c3fb27SDimitry Andric Split.RemainderTy = ElemTy; 56606c3fb27SDimitry Andric } 56706c3fb27SDimitry Andric 56806c3fb27SDimitry Andric return Split; 56906c3fb27SDimitry Andric } 57006c3fb27SDimitry Andric 5710b57cec5SDimitry Andric // Try to fill in Layout from Ty, returning true on success. Alignment is 572bdd1243dSDimitry Andric // the alignment of the vector, or std::nullopt if the ABI default should be 573bdd1243dSDimitry Andric // used. 574bdd1243dSDimitry Andric std::optional<VectorLayout> 5755ffd83dbSDimitry Andric ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment, 5765ffd83dbSDimitry Andric const DataLayout &DL) { 57706c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(Ty); 57806c3fb27SDimitry Andric if (!VS) 57906c3fb27SDimitry Andric return {}; 58006c3fb27SDimitry Andric 5815ffd83dbSDimitry Andric VectorLayout Layout; 58206c3fb27SDimitry Andric Layout.VS = *VS; 58306c3fb27SDimitry Andric // Check that we're dealing with full-byte fragments. 58406c3fb27SDimitry Andric if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) || 58506c3fb27SDimitry Andric (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy))) 58606c3fb27SDimitry Andric return {}; 5870b57cec5SDimitry Andric Layout.VecAlign = Alignment; 58806c3fb27SDimitry Andric Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy); 5895ffd83dbSDimitry Andric return Layout; 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric // Scalarize one-operand instruction I, using Split(Builder, X, Name) 5930b57cec5SDimitry Andric // to create an instruction like I with operand X and name Name. 5940b57cec5SDimitry Andric template<typename Splitter> 5950b57cec5SDimitry Andric bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) { 59606c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(I.getType()); 59706c3fb27SDimitry Andric if (!VS) 5980b57cec5SDimitry Andric return false; 5990b57cec5SDimitry Andric 60006c3fb27SDimitry Andric std::optional<VectorSplit> OpVS; 60106c3fb27SDimitry Andric if (I.getOperand(0)->getType() == I.getType()) { 60206c3fb27SDimitry Andric OpVS = VS; 60306c3fb27SDimitry Andric } else { 60406c3fb27SDimitry Andric OpVS = getVectorSplit(I.getOperand(0)->getType()); 60506c3fb27SDimitry Andric if (!OpVS || VS->NumPacked != OpVS->NumPacked) 60606c3fb27SDimitry Andric return false; 60706c3fb27SDimitry Andric } 60806c3fb27SDimitry Andric 6090b57cec5SDimitry Andric IRBuilder<> Builder(&I); 61006c3fb27SDimitry Andric Scatterer Op = scatter(&I, I.getOperand(0), *OpVS); 61106c3fb27SDimitry Andric assert(Op.size() == VS->NumFragments && "Mismatched unary operation"); 6120b57cec5SDimitry Andric ValueVector Res; 61306c3fb27SDimitry Andric Res.resize(VS->NumFragments); 61406c3fb27SDimitry Andric for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) 61506c3fb27SDimitry Andric Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag)); 61606c3fb27SDimitry Andric gather(&I, Res, *VS); 6170b57cec5SDimitry Andric return true; 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) 6210b57cec5SDimitry Andric // to create an instruction like I with operands X and Y and name Name. 6220b57cec5SDimitry Andric template<typename Splitter> 6230b57cec5SDimitry Andric bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { 62406c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(I.getType()); 62506c3fb27SDimitry Andric if (!VS) 6260b57cec5SDimitry Andric return false; 6270b57cec5SDimitry Andric 62806c3fb27SDimitry Andric std::optional<VectorSplit> OpVS; 62906c3fb27SDimitry Andric if (I.getOperand(0)->getType() == I.getType()) { 63006c3fb27SDimitry Andric OpVS = VS; 63106c3fb27SDimitry Andric } else { 63206c3fb27SDimitry Andric OpVS = getVectorSplit(I.getOperand(0)->getType()); 63306c3fb27SDimitry Andric if (!OpVS || VS->NumPacked != OpVS->NumPacked) 63406c3fb27SDimitry Andric return false; 6355ffd83dbSDimitry Andric } 63606c3fb27SDimitry Andric 63706c3fb27SDimitry Andric IRBuilder<> Builder(&I); 63806c3fb27SDimitry Andric Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS); 63906c3fb27SDimitry Andric Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS); 64006c3fb27SDimitry Andric assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation"); 64106c3fb27SDimitry Andric assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation"); 64206c3fb27SDimitry Andric ValueVector Res; 64306c3fb27SDimitry Andric Res.resize(VS->NumFragments); 64406c3fb27SDimitry Andric for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) { 64506c3fb27SDimitry Andric Value *Op0 = VOp0[Frag]; 64606c3fb27SDimitry Andric Value *Op1 = VOp1[Frag]; 64706c3fb27SDimitry Andric Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag)); 64806c3fb27SDimitry Andric } 64906c3fb27SDimitry Andric gather(&I, Res, *VS); 6500b57cec5SDimitry Andric return true; 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric static bool isTriviallyScalariable(Intrinsic::ID ID) { 6540b57cec5SDimitry Andric return isTriviallyVectorizable(ID); 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric /// If a call to a vector typed intrinsic function, split into a scalar call per 6580b57cec5SDimitry Andric /// element if possible for the intrinsic. 6590b57cec5SDimitry Andric bool ScalarizerVisitor::splitCall(CallInst &CI) { 66006c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(CI.getType()); 66106c3fb27SDimitry Andric if (!VS) 6620b57cec5SDimitry Andric return false; 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andric Function *F = CI.getCalledFunction(); 6650b57cec5SDimitry Andric if (!F) 6660b57cec5SDimitry Andric return false; 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric Intrinsic::ID ID = F->getIntrinsicID(); 6690b57cec5SDimitry Andric if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID)) 6700b57cec5SDimitry Andric return false; 6710b57cec5SDimitry Andric 67206c3fb27SDimitry Andric // unsigned NumElems = VT->getNumElements(); 673349cc55cSDimitry Andric unsigned NumArgs = CI.arg_size(); 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric ValueVector ScalarOperands(NumArgs); 6760b57cec5SDimitry Andric SmallVector<Scatterer, 8> Scattered(NumArgs); 67706c3fb27SDimitry Andric SmallVector<int> OverloadIdx(NumArgs, -1); 6780b57cec5SDimitry Andric 679fe6060f1SDimitry Andric SmallVector<llvm::Type *, 3> Tys; 68006c3fb27SDimitry Andric // Add return type if intrinsic is overloaded on it. 68106c3fb27SDimitry Andric if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) 68206c3fb27SDimitry Andric Tys.push_back(VS->SplitTy); 683fe6060f1SDimitry Andric 6840b57cec5SDimitry Andric // Assumes that any vector type has the same number of elements as the return 6850b57cec5SDimitry Andric // vector type, which is true for all current intrinsics. 6860b57cec5SDimitry Andric for (unsigned I = 0; I != NumArgs; ++I) { 6870b57cec5SDimitry Andric Value *OpI = CI.getOperand(I); 6885f757f3fSDimitry Andric if ([[maybe_unused]] auto *OpVecTy = 6895f757f3fSDimitry Andric dyn_cast<FixedVectorType>(OpI->getType())) { 69006c3fb27SDimitry Andric assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements()); 69106c3fb27SDimitry Andric std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType()); 69206c3fb27SDimitry Andric if (!OpVS || OpVS->NumPacked != VS->NumPacked) { 69306c3fb27SDimitry Andric // The natural split of the operand doesn't match the result. This could 69406c3fb27SDimitry Andric // happen if the vector elements are different and the ScalarizeMinBits 69506c3fb27SDimitry Andric // option is used. 69606c3fb27SDimitry Andric // 69706c3fb27SDimitry Andric // We could in principle handle this case as well, at the cost of 69806c3fb27SDimitry Andric // complicating the scattering machinery to support multiple scattering 69906c3fb27SDimitry Andric // granularities for a single value. 70006c3fb27SDimitry Andric return false; 70106c3fb27SDimitry Andric } 70206c3fb27SDimitry Andric 70306c3fb27SDimitry Andric Scattered[I] = scatter(&CI, OpI, *OpVS); 70406c3fb27SDimitry Andric if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) { 70506c3fb27SDimitry Andric OverloadIdx[I] = Tys.size(); 70606c3fb27SDimitry Andric Tys.push_back(OpVS->SplitTy); 70706c3fb27SDimitry Andric } 7080b57cec5SDimitry Andric } else { 7090b57cec5SDimitry Andric ScalarOperands[I] = OpI; 71081ad6265SDimitry Andric if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) 711fe6060f1SDimitry Andric Tys.push_back(OpI->getType()); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric 71506c3fb27SDimitry Andric ValueVector Res(VS->NumFragments); 7160b57cec5SDimitry Andric ValueVector ScalarCallOps(NumArgs); 7170b57cec5SDimitry Andric 71806c3fb27SDimitry Andric Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); 7190b57cec5SDimitry Andric IRBuilder<> Builder(&CI); 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric // Perform actual scalarization, taking care to preserve any scalar operands. 72206c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 72306c3fb27SDimitry Andric bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy; 7240b57cec5SDimitry Andric ScalarCallOps.clear(); 7250b57cec5SDimitry Andric 72606c3fb27SDimitry Andric if (IsRemainder) 72706c3fb27SDimitry Andric Tys[0] = VS->RemainderTy; 72806c3fb27SDimitry Andric 7290b57cec5SDimitry Andric for (unsigned J = 0; J != NumArgs; ++J) { 73006c3fb27SDimitry Andric if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) { 7310b57cec5SDimitry Andric ScalarCallOps.push_back(ScalarOperands[J]); 73206c3fb27SDimitry Andric } else { 73306c3fb27SDimitry Andric ScalarCallOps.push_back(Scattered[J][I]); 73406c3fb27SDimitry Andric if (IsRemainder && OverloadIdx[J] >= 0) 73506c3fb27SDimitry Andric Tys[OverloadIdx[J]] = Scattered[J][I]->getType(); 73606c3fb27SDimitry Andric } 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 73906c3fb27SDimitry Andric if (IsRemainder) 74006c3fb27SDimitry Andric NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); 74106c3fb27SDimitry Andric 74206c3fb27SDimitry Andric Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps, 74306c3fb27SDimitry Andric CI.getName() + ".i" + Twine(I)); 7440b57cec5SDimitry Andric } 7450b57cec5SDimitry Andric 74606c3fb27SDimitry Andric gather(&CI, Res, *VS); 7470b57cec5SDimitry Andric return true; 7480b57cec5SDimitry Andric } 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) { 75106c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(SI.getType()); 75206c3fb27SDimitry Andric if (!VS) 7530b57cec5SDimitry Andric return false; 7540b57cec5SDimitry Andric 75506c3fb27SDimitry Andric std::optional<VectorSplit> CondVS; 75606c3fb27SDimitry Andric if (isa<FixedVectorType>(SI.getCondition()->getType())) { 75706c3fb27SDimitry Andric CondVS = getVectorSplit(SI.getCondition()->getType()); 75806c3fb27SDimitry Andric if (!CondVS || CondVS->NumPacked != VS->NumPacked) { 75906c3fb27SDimitry Andric // This happens when ScalarizeMinBits is used. 76006c3fb27SDimitry Andric return false; 76106c3fb27SDimitry Andric } 76206c3fb27SDimitry Andric } 7630b57cec5SDimitry Andric 76406c3fb27SDimitry Andric IRBuilder<> Builder(&SI); 76506c3fb27SDimitry Andric Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS); 76606c3fb27SDimitry Andric Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS); 76706c3fb27SDimitry Andric assert(VOp1.size() == VS->NumFragments && "Mismatched select"); 76806c3fb27SDimitry Andric assert(VOp2.size() == VS->NumFragments && "Mismatched select"); 76906c3fb27SDimitry Andric ValueVector Res; 77006c3fb27SDimitry Andric Res.resize(VS->NumFragments); 77106c3fb27SDimitry Andric 77206c3fb27SDimitry Andric if (CondVS) { 77306c3fb27SDimitry Andric Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS); 77406c3fb27SDimitry Andric assert(VOp0.size() == CondVS->NumFragments && "Mismatched select"); 77506c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 7765ffd83dbSDimitry Andric Value *Op0 = VOp0[I]; 7775ffd83dbSDimitry Andric Value *Op1 = VOp1[I]; 7785ffd83dbSDimitry Andric Value *Op2 = VOp2[I]; 7795ffd83dbSDimitry Andric Res[I] = Builder.CreateSelect(Op0, Op1, Op2, 7800b57cec5SDimitry Andric SI.getName() + ".i" + Twine(I)); 7815ffd83dbSDimitry Andric } 7820b57cec5SDimitry Andric } else { 7830b57cec5SDimitry Andric Value *Op0 = SI.getOperand(0); 78406c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 7855ffd83dbSDimitry Andric Value *Op1 = VOp1[I]; 7865ffd83dbSDimitry Andric Value *Op2 = VOp2[I]; 7875ffd83dbSDimitry Andric Res[I] = Builder.CreateSelect(Op0, Op1, Op2, 7880b57cec5SDimitry Andric SI.getName() + ".i" + Twine(I)); 7890b57cec5SDimitry Andric } 7905ffd83dbSDimitry Andric } 79106c3fb27SDimitry Andric gather(&SI, Res, *VS); 7920b57cec5SDimitry Andric return true; 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) { 7960b57cec5SDimitry Andric return splitBinary(ICI, ICmpSplitter(ICI)); 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) { 8000b57cec5SDimitry Andric return splitBinary(FCI, FCmpSplitter(FCI)); 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) { 8040b57cec5SDimitry Andric return splitUnary(UO, UnarySplitter(UO)); 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) { 8080b57cec5SDimitry Andric return splitBinary(BO, BinarySplitter(BO)); 8090b57cec5SDimitry Andric } 8100b57cec5SDimitry Andric 8110b57cec5SDimitry Andric bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { 81206c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType()); 81306c3fb27SDimitry Andric if (!VS) 8140b57cec5SDimitry Andric return false; 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric IRBuilder<> Builder(&GEPI); 8170b57cec5SDimitry Andric unsigned NumIndices = GEPI.getNumIndices(); 8180b57cec5SDimitry Andric 81906c3fb27SDimitry Andric // The base pointer and indices might be scalar even if it's a vector GEP. 82006c3fb27SDimitry Andric SmallVector<Value *, 8> ScalarOps{1 + NumIndices}; 82106c3fb27SDimitry Andric SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices}; 8220b57cec5SDimitry Andric 82306c3fb27SDimitry Andric for (unsigned I = 0; I < 1 + NumIndices; ++I) { 82406c3fb27SDimitry Andric if (auto *VecTy = 82506c3fb27SDimitry Andric dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) { 82606c3fb27SDimitry Andric std::optional<VectorSplit> OpVS = getVectorSplit(VecTy); 82706c3fb27SDimitry Andric if (!OpVS || OpVS->NumPacked != VS->NumPacked) { 82806c3fb27SDimitry Andric // This can happen when ScalarizeMinBits is used. 82906c3fb27SDimitry Andric return false; 83006c3fb27SDimitry Andric } 83106c3fb27SDimitry Andric ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS); 83206c3fb27SDimitry Andric } else { 83306c3fb27SDimitry Andric ScalarOps[I] = GEPI.getOperand(I); 83406c3fb27SDimitry Andric } 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric ValueVector Res; 83806c3fb27SDimitry Andric Res.resize(VS->NumFragments); 83906c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 84006c3fb27SDimitry Andric SmallVector<Value *, 8> SplitOps; 84106c3fb27SDimitry Andric SplitOps.resize(1 + NumIndices); 84206c3fb27SDimitry Andric for (unsigned J = 0; J < 1 + NumIndices; ++J) { 84306c3fb27SDimitry Andric if (ScalarOps[J]) 84406c3fb27SDimitry Andric SplitOps[J] = ScalarOps[J]; 84506c3fb27SDimitry Andric else 84606c3fb27SDimitry Andric SplitOps[J] = ScatterOps[J][I]; 84706c3fb27SDimitry Andric } 84806c3fb27SDimitry Andric Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0], 84906c3fb27SDimitry Andric ArrayRef(SplitOps).drop_front(), 8500b57cec5SDimitry Andric GEPI.getName() + ".i" + Twine(I)); 8510b57cec5SDimitry Andric if (GEPI.isInBounds()) 8520b57cec5SDimitry Andric if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I])) 8530b57cec5SDimitry Andric NewGEPI->setIsInBounds(); 8540b57cec5SDimitry Andric } 85506c3fb27SDimitry Andric gather(&GEPI, Res, *VS); 8560b57cec5SDimitry Andric return true; 8570b57cec5SDimitry Andric } 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric bool ScalarizerVisitor::visitCastInst(CastInst &CI) { 86006c3fb27SDimitry Andric std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy()); 86106c3fb27SDimitry Andric if (!DestVS) 8620b57cec5SDimitry Andric return false; 8630b57cec5SDimitry Andric 86406c3fb27SDimitry Andric std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy()); 86506c3fb27SDimitry Andric if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked) 86606c3fb27SDimitry Andric return false; 86706c3fb27SDimitry Andric 8680b57cec5SDimitry Andric IRBuilder<> Builder(&CI); 86906c3fb27SDimitry Andric Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS); 87006c3fb27SDimitry Andric assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast"); 8710b57cec5SDimitry Andric ValueVector Res; 87206c3fb27SDimitry Andric Res.resize(DestVS->NumFragments); 87306c3fb27SDimitry Andric for (unsigned I = 0; I < DestVS->NumFragments; ++I) 87406c3fb27SDimitry Andric Res[I] = 87506c3fb27SDimitry Andric Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I), 8760b57cec5SDimitry Andric CI.getName() + ".i" + Twine(I)); 87706c3fb27SDimitry Andric gather(&CI, Res, *DestVS); 8780b57cec5SDimitry Andric return true; 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) { 88206c3fb27SDimitry Andric std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy()); 88306c3fb27SDimitry Andric std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy()); 88406c3fb27SDimitry Andric if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy) 8850b57cec5SDimitry Andric return false; 8860b57cec5SDimitry Andric 88706c3fb27SDimitry Andric const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy(); 8880b57cec5SDimitry Andric 88906c3fb27SDimitry Andric // Vectors of pointers are always fully scalarized. 89006c3fb27SDimitry Andric assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1)); 89106c3fb27SDimitry Andric 89206c3fb27SDimitry Andric IRBuilder<> Builder(&BCI); 89306c3fb27SDimitry Andric Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS); 89406c3fb27SDimitry Andric ValueVector Res; 89506c3fb27SDimitry Andric Res.resize(DstVS->NumFragments); 89606c3fb27SDimitry Andric 89706c3fb27SDimitry Andric unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits(); 89806c3fb27SDimitry Andric unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits(); 89906c3fb27SDimitry Andric 90006c3fb27SDimitry Andric if (isPointerTy || DstSplitBits == SrcSplitBits) { 90106c3fb27SDimitry Andric assert(DstVS->NumFragments == SrcVS->NumFragments); 90206c3fb27SDimitry Andric for (unsigned I = 0; I < DstVS->NumFragments; ++I) { 90306c3fb27SDimitry Andric Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I), 9040b57cec5SDimitry Andric BCI.getName() + ".i" + Twine(I)); 90506c3fb27SDimitry Andric } 90606c3fb27SDimitry Andric } else if (SrcSplitBits % DstSplitBits == 0) { 90706c3fb27SDimitry Andric // Convert each source fragment to the same-sized destination vector and 90806c3fb27SDimitry Andric // then scatter the result to the destination. 90906c3fb27SDimitry Andric VectorSplit MidVS; 91006c3fb27SDimitry Andric MidVS.NumPacked = DstVS->NumPacked; 91106c3fb27SDimitry Andric MidVS.NumFragments = SrcSplitBits / DstSplitBits; 91206c3fb27SDimitry Andric MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(), 91306c3fb27SDimitry Andric MidVS.NumPacked * MidVS.NumFragments); 91406c3fb27SDimitry Andric MidVS.SplitTy = DstVS->SplitTy; 91506c3fb27SDimitry Andric 9160b57cec5SDimitry Andric unsigned ResI = 0; 91706c3fb27SDimitry Andric for (unsigned I = 0; I < SrcVS->NumFragments; ++I) { 91806c3fb27SDimitry Andric Value *V = Op0[I]; 91906c3fb27SDimitry Andric 9200b57cec5SDimitry Andric // Look through any existing bitcasts before converting to <N x t2>. 9210b57cec5SDimitry Andric // In the best case, the resulting conversion might be a no-op. 92206c3fb27SDimitry Andric Instruction *VI; 9230b57cec5SDimitry Andric while ((VI = dyn_cast<Instruction>(V)) && 9240b57cec5SDimitry Andric VI->getOpcode() == Instruction::BitCast) 9250b57cec5SDimitry Andric V = VI->getOperand(0); 92606c3fb27SDimitry Andric 92706c3fb27SDimitry Andric V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast"); 92806c3fb27SDimitry Andric 92906c3fb27SDimitry Andric Scatterer Mid = scatter(&BCI, V, MidVS); 93006c3fb27SDimitry Andric for (unsigned J = 0; J < MidVS.NumFragments; ++J) 93106c3fb27SDimitry Andric Res[ResI++] = Mid[J]; 93206c3fb27SDimitry Andric } 93306c3fb27SDimitry Andric } else if (DstSplitBits % SrcSplitBits == 0) { 93406c3fb27SDimitry Andric // Gather enough source fragments to make up a destination fragment and 93506c3fb27SDimitry Andric // then convert to the destination type. 93606c3fb27SDimitry Andric VectorSplit MidVS; 93706c3fb27SDimitry Andric MidVS.NumFragments = DstSplitBits / SrcSplitBits; 93806c3fb27SDimitry Andric MidVS.NumPacked = SrcVS->NumPacked; 93906c3fb27SDimitry Andric MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(), 94006c3fb27SDimitry Andric MidVS.NumPacked * MidVS.NumFragments); 94106c3fb27SDimitry Andric MidVS.SplitTy = SrcVS->SplitTy; 94206c3fb27SDimitry Andric 94306c3fb27SDimitry Andric unsigned SrcI = 0; 94406c3fb27SDimitry Andric SmallVector<Value *, 8> ConcatOps; 94506c3fb27SDimitry Andric ConcatOps.resize(MidVS.NumFragments); 94606c3fb27SDimitry Andric for (unsigned I = 0; I < DstVS->NumFragments; ++I) { 94706c3fb27SDimitry Andric for (unsigned J = 0; J < MidVS.NumFragments; ++J) 94806c3fb27SDimitry Andric ConcatOps[J] = Op0[SrcI++]; 94906c3fb27SDimitry Andric Value *V = concatenate(Builder, ConcatOps, MidVS, 95006c3fb27SDimitry Andric BCI.getName() + ".i" + Twine(I)); 95106c3fb27SDimitry Andric Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I), 95206c3fb27SDimitry Andric BCI.getName() + ".i" + Twine(I)); 9530b57cec5SDimitry Andric } 9540b57cec5SDimitry Andric } else { 95506c3fb27SDimitry Andric return false; 9560b57cec5SDimitry Andric } 95706c3fb27SDimitry Andric 95806c3fb27SDimitry Andric gather(&BCI, Res, *DstVS); 9590b57cec5SDimitry Andric return true; 9600b57cec5SDimitry Andric } 9610b57cec5SDimitry Andric 9625ffd83dbSDimitry Andric bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { 96306c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(IEI.getType()); 96406c3fb27SDimitry Andric if (!VS) 9655ffd83dbSDimitry Andric return false; 9665ffd83dbSDimitry Andric 9675ffd83dbSDimitry Andric IRBuilder<> Builder(&IEI); 96806c3fb27SDimitry Andric Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS); 9695ffd83dbSDimitry Andric Value *NewElt = IEI.getOperand(1); 9705ffd83dbSDimitry Andric Value *InsIdx = IEI.getOperand(2); 9715ffd83dbSDimitry Andric 9725ffd83dbSDimitry Andric ValueVector Res; 97306c3fb27SDimitry Andric Res.resize(VS->NumFragments); 9745ffd83dbSDimitry Andric 9755ffd83dbSDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) { 97606c3fb27SDimitry Andric unsigned Idx = CI->getZExtValue(); 97706c3fb27SDimitry Andric unsigned Fragment = Idx / VS->NumPacked; 97806c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 97906c3fb27SDimitry Andric if (I == Fragment) { 98006c3fb27SDimitry Andric bool IsPacked = VS->NumPacked > 1; 98106c3fb27SDimitry Andric if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && 98206c3fb27SDimitry Andric !VS->RemainderTy->isVectorTy()) 98306c3fb27SDimitry Andric IsPacked = false; 98406c3fb27SDimitry Andric if (IsPacked) { 98506c3fb27SDimitry Andric Res[I] = 98606c3fb27SDimitry Andric Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked); 9875ffd83dbSDimitry Andric } else { 98806c3fb27SDimitry Andric Res[I] = NewElt; 98906c3fb27SDimitry Andric } 99006c3fb27SDimitry Andric } else { 99106c3fb27SDimitry Andric Res[I] = Op0[I]; 99206c3fb27SDimitry Andric } 99306c3fb27SDimitry Andric } 99406c3fb27SDimitry Andric } else { 99506c3fb27SDimitry Andric // Never split a variable insertelement that isn't fully scalarized. 99606c3fb27SDimitry Andric if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) 9975ffd83dbSDimitry Andric return false; 9985ffd83dbSDimitry Andric 99906c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 10005ffd83dbSDimitry Andric Value *ShouldReplace = 10015ffd83dbSDimitry Andric Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I), 10025ffd83dbSDimitry Andric InsIdx->getName() + ".is." + Twine(I)); 10035ffd83dbSDimitry Andric Value *OldElt = Op0[I]; 10045ffd83dbSDimitry Andric Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt, 10055ffd83dbSDimitry Andric IEI.getName() + ".i" + Twine(I)); 10065ffd83dbSDimitry Andric } 10075ffd83dbSDimitry Andric } 10085ffd83dbSDimitry Andric 100906c3fb27SDimitry Andric gather(&IEI, Res, *VS); 10105ffd83dbSDimitry Andric return true; 10115ffd83dbSDimitry Andric } 10125ffd83dbSDimitry Andric 10135ffd83dbSDimitry Andric bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { 101406c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType()); 101506c3fb27SDimitry Andric if (!VS) 10165ffd83dbSDimitry Andric return false; 10175ffd83dbSDimitry Andric 10185ffd83dbSDimitry Andric IRBuilder<> Builder(&EEI); 101906c3fb27SDimitry Andric Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS); 10205ffd83dbSDimitry Andric Value *ExtIdx = EEI.getOperand(1); 10215ffd83dbSDimitry Andric 10225ffd83dbSDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) { 102306c3fb27SDimitry Andric unsigned Idx = CI->getZExtValue(); 102406c3fb27SDimitry Andric unsigned Fragment = Idx / VS->NumPacked; 102506c3fb27SDimitry Andric Value *Res = Op0[Fragment]; 102606c3fb27SDimitry Andric bool IsPacked = VS->NumPacked > 1; 102706c3fb27SDimitry Andric if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && 102806c3fb27SDimitry Andric !VS->RemainderTy->isVectorTy()) 102906c3fb27SDimitry Andric IsPacked = false; 103006c3fb27SDimitry Andric if (IsPacked) 103106c3fb27SDimitry Andric Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked); 103281ad6265SDimitry Andric replaceUses(&EEI, Res); 10335ffd83dbSDimitry Andric return true; 10345ffd83dbSDimitry Andric } 10355ffd83dbSDimitry Andric 103606c3fb27SDimitry Andric // Never split a variable extractelement that isn't fully scalarized. 103706c3fb27SDimitry Andric if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) 10385ffd83dbSDimitry Andric return false; 10395ffd83dbSDimitry Andric 104006c3fb27SDimitry Andric Value *Res = PoisonValue::get(VS->VecTy->getElementType()); 104106c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 10425ffd83dbSDimitry Andric Value *ShouldExtract = 10435ffd83dbSDimitry Andric Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I), 10445ffd83dbSDimitry Andric ExtIdx->getName() + ".is." + Twine(I)); 10455ffd83dbSDimitry Andric Value *Elt = Op0[I]; 10465ffd83dbSDimitry Andric Res = Builder.CreateSelect(ShouldExtract, Elt, Res, 10475ffd83dbSDimitry Andric EEI.getName() + ".upto" + Twine(I)); 10485ffd83dbSDimitry Andric } 104981ad6265SDimitry Andric replaceUses(&EEI, Res); 10505ffd83dbSDimitry Andric return true; 10515ffd83dbSDimitry Andric } 10525ffd83dbSDimitry Andric 10530b57cec5SDimitry Andric bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 105406c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(SVI.getType()); 105506c3fb27SDimitry Andric std::optional<VectorSplit> VSOp = 105606c3fb27SDimitry Andric getVectorSplit(SVI.getOperand(0)->getType()); 105706c3fb27SDimitry Andric if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1) 10580b57cec5SDimitry Andric return false; 10590b57cec5SDimitry Andric 106006c3fb27SDimitry Andric Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp); 106106c3fb27SDimitry Andric Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp); 10620b57cec5SDimitry Andric ValueVector Res; 106306c3fb27SDimitry Andric Res.resize(VS->NumFragments); 10640b57cec5SDimitry Andric 106506c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 10660b57cec5SDimitry Andric int Selector = SVI.getMaskValue(I); 10670b57cec5SDimitry Andric if (Selector < 0) 106806c3fb27SDimitry Andric Res[I] = PoisonValue::get(VS->VecTy->getElementType()); 10690b57cec5SDimitry Andric else if (unsigned(Selector) < Op0.size()) 10700b57cec5SDimitry Andric Res[I] = Op0[Selector]; 10710b57cec5SDimitry Andric else 10720b57cec5SDimitry Andric Res[I] = Op1[Selector - Op0.size()]; 10730b57cec5SDimitry Andric } 107406c3fb27SDimitry Andric gather(&SVI, Res, *VS); 10750b57cec5SDimitry Andric return true; 10760b57cec5SDimitry Andric } 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric bool ScalarizerVisitor::visitPHINode(PHINode &PHI) { 107906c3fb27SDimitry Andric std::optional<VectorSplit> VS = getVectorSplit(PHI.getType()); 108006c3fb27SDimitry Andric if (!VS) 10810b57cec5SDimitry Andric return false; 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric IRBuilder<> Builder(&PHI); 10840b57cec5SDimitry Andric ValueVector Res; 108506c3fb27SDimitry Andric Res.resize(VS->NumFragments); 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric unsigned NumOps = PHI.getNumOperands(); 108806c3fb27SDimitry Andric for (unsigned I = 0; I < VS->NumFragments; ++I) { 108906c3fb27SDimitry Andric Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps, 10900b57cec5SDimitry Andric PHI.getName() + ".i" + Twine(I)); 109106c3fb27SDimitry Andric } 10920b57cec5SDimitry Andric 10930b57cec5SDimitry Andric for (unsigned I = 0; I < NumOps; ++I) { 109406c3fb27SDimitry Andric Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS); 10950b57cec5SDimitry Andric BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); 109606c3fb27SDimitry Andric for (unsigned J = 0; J < VS->NumFragments; ++J) 10970b57cec5SDimitry Andric cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); 10980b57cec5SDimitry Andric } 109906c3fb27SDimitry Andric gather(&PHI, Res, *VS); 11000b57cec5SDimitry Andric return true; 11010b57cec5SDimitry Andric } 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) { 11040b57cec5SDimitry Andric if (!ScalarizeLoadStore) 11050b57cec5SDimitry Andric return false; 11060b57cec5SDimitry Andric if (!LI.isSimple()) 11070b57cec5SDimitry Andric return false; 11080b57cec5SDimitry Andric 1109bdd1243dSDimitry Andric std::optional<VectorLayout> Layout = getVectorLayout( 1110*0fca6ea1SDimitry Andric LI.getType(), LI.getAlign(), LI.getDataLayout()); 11115ffd83dbSDimitry Andric if (!Layout) 11120b57cec5SDimitry Andric return false; 11130b57cec5SDimitry Andric 11140b57cec5SDimitry Andric IRBuilder<> Builder(&LI); 111506c3fb27SDimitry Andric Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS); 11160b57cec5SDimitry Andric ValueVector Res; 111706c3fb27SDimitry Andric Res.resize(Layout->VS.NumFragments); 11180b57cec5SDimitry Andric 111906c3fb27SDimitry Andric for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { 112006c3fb27SDimitry Andric Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I], 112106c3fb27SDimitry Andric Align(Layout->getFragmentAlign(I)), 11220b57cec5SDimitry Andric LI.getName() + ".i" + Twine(I)); 112306c3fb27SDimitry Andric } 112406c3fb27SDimitry Andric gather(&LI, Res, Layout->VS); 11250b57cec5SDimitry Andric return true; 11260b57cec5SDimitry Andric } 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) { 11290b57cec5SDimitry Andric if (!ScalarizeLoadStore) 11300b57cec5SDimitry Andric return false; 11310b57cec5SDimitry Andric if (!SI.isSimple()) 11320b57cec5SDimitry Andric return false; 11330b57cec5SDimitry Andric 11340b57cec5SDimitry Andric Value *FullValue = SI.getValueOperand(); 1135bdd1243dSDimitry Andric std::optional<VectorLayout> Layout = getVectorLayout( 1136*0fca6ea1SDimitry Andric FullValue->getType(), SI.getAlign(), SI.getDataLayout()); 11375ffd83dbSDimitry Andric if (!Layout) 11380b57cec5SDimitry Andric return false; 11390b57cec5SDimitry Andric 11400b57cec5SDimitry Andric IRBuilder<> Builder(&SI); 114106c3fb27SDimitry Andric Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS); 114206c3fb27SDimitry Andric Scatterer VVal = scatter(&SI, FullValue, Layout->VS); 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric ValueVector Stores; 114506c3fb27SDimitry Andric Stores.resize(Layout->VS.NumFragments); 114606c3fb27SDimitry Andric for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { 11475ffd83dbSDimitry Andric Value *Val = VVal[I]; 11485ffd83dbSDimitry Andric Value *Ptr = VPtr[I]; 114906c3fb27SDimitry Andric Stores[I] = 115006c3fb27SDimitry Andric Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I)); 11510b57cec5SDimitry Andric } 11520b57cec5SDimitry Andric transferMetadataAndIRFlags(&SI, Stores); 11530b57cec5SDimitry Andric return true; 11540b57cec5SDimitry Andric } 11550b57cec5SDimitry Andric 11560b57cec5SDimitry Andric bool ScalarizerVisitor::visitCallInst(CallInst &CI) { 11570b57cec5SDimitry Andric return splitCall(CI); 11580b57cec5SDimitry Andric } 11590b57cec5SDimitry Andric 116006c3fb27SDimitry Andric bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) { 116106c3fb27SDimitry Andric return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) { 116206c3fb27SDimitry Andric return Builder.CreateFreeze(Op, Name); 116306c3fb27SDimitry Andric }); 116406c3fb27SDimitry Andric } 116506c3fb27SDimitry Andric 11660b57cec5SDimitry Andric // Delete the instructions that we scalarized. If a full vector result 11670b57cec5SDimitry Andric // is still needed, recreate it using InsertElements. 11680b57cec5SDimitry Andric bool ScalarizerVisitor::finish() { 11690b57cec5SDimitry Andric // The presence of data in Gathered or Scattered indicates changes 11700b57cec5SDimitry Andric // made to the Function. 117181ad6265SDimitry Andric if (Gathered.empty() && Scattered.empty() && !Scalarized) 11720b57cec5SDimitry Andric return false; 11730b57cec5SDimitry Andric for (const auto &GMI : Gathered) { 11740b57cec5SDimitry Andric Instruction *Op = GMI.first; 11750b57cec5SDimitry Andric ValueVector &CV = *GMI.second; 11760b57cec5SDimitry Andric if (!Op->use_empty()) { 11770b57cec5SDimitry Andric // The value is still needed, so recreate it using a series of 117806c3fb27SDimitry Andric // insertelements and/or shufflevectors. 117906c3fb27SDimitry Andric Value *Res; 1180bdd1243dSDimitry Andric if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) { 11810b57cec5SDimitry Andric BasicBlock *BB = Op->getParent(); 11820b57cec5SDimitry Andric IRBuilder<> Builder(Op); 11830b57cec5SDimitry Andric if (isa<PHINode>(Op)) 11840b57cec5SDimitry Andric Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 118506c3fb27SDimitry Andric 118606c3fb27SDimitry Andric VectorSplit VS = *getVectorSplit(Ty); 118706c3fb27SDimitry Andric assert(VS.NumFragments == CV.size()); 118806c3fb27SDimitry Andric 118906c3fb27SDimitry Andric Res = concatenate(Builder, CV, VS, Op->getName()); 119006c3fb27SDimitry Andric 1191e8d8bef9SDimitry Andric Res->takeName(Op); 11925ffd83dbSDimitry Andric } else { 11935ffd83dbSDimitry Andric assert(CV.size() == 1 && Op->getType() == CV[0]->getType()); 11945ffd83dbSDimitry Andric Res = CV[0]; 11955ffd83dbSDimitry Andric if (Op == Res) 11965ffd83dbSDimitry Andric continue; 11975ffd83dbSDimitry Andric } 11980b57cec5SDimitry Andric Op->replaceAllUsesWith(Res); 11990b57cec5SDimitry Andric } 12005ffd83dbSDimitry Andric PotentiallyDeadInstrs.emplace_back(Op); 12010b57cec5SDimitry Andric } 12020b57cec5SDimitry Andric Gathered.clear(); 12030b57cec5SDimitry Andric Scattered.clear(); 120481ad6265SDimitry Andric Scalarized = false; 12055ffd83dbSDimitry Andric 12065ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); 12075ffd83dbSDimitry Andric 12080b57cec5SDimitry Andric return true; 12090b57cec5SDimitry Andric } 12100b57cec5SDimitry Andric 12110b57cec5SDimitry Andric PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) { 1212480093f4SDimitry Andric DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 12135f757f3fSDimitry Andric ScalarizerVisitor Impl(DT, Options); 12140b57cec5SDimitry Andric bool Changed = Impl.visit(F); 1215480093f4SDimitry Andric PreservedAnalyses PA; 1216480093f4SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 1217480093f4SDimitry Andric return Changed ? PA : PreservedAnalyses::all(); 12180b57cec5SDimitry Andric } 1219