139f2d2f1Svporpo //===- BottomUpVec.cpp - A bottom-up vectorizer pass ----------------------===// 239f2d2f1Svporpo // 339f2d2f1Svporpo // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 439f2d2f1Svporpo // See https://llvm.org/LICENSE.txt for license information. 539f2d2f1Svporpo // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 639f2d2f1Svporpo // 739f2d2f1Svporpo //===----------------------------------------------------------------------===// 839f2d2f1Svporpo 939f2d2f1Svporpo #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h" 1042c5a301Svporpo #include "llvm/ADT/SmallVector.h" 116312beefSvporpo #include "llvm/Analysis/TargetTransformInfo.h" 12e22b07e7Svporpo #include "llvm/SandboxIR/Function.h" 13eba106d4Svporpo #include "llvm/SandboxIR/Instruction.h" 14083369fdSvporpo #include "llvm/SandboxIR/Module.h" 15320389d4Svporpo #include "llvm/SandboxIR/Utils.h" 162e8ad49eSJorge Gorbe Moya #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h" 176312beefSvporpo #include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" 18320389d4Svporpo #include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" 1939f2d2f1Svporpo 206312beefSvporpo namespace llvm { 216312beefSvporpo 226312beefSvporpo static cl::opt<unsigned> 236312beefSvporpo OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden, 246312beefSvporpo cl::desc("Override the vector register size in bits, " 256312beefSvporpo "which is otherwise found by querying TTI.")); 266312beefSvporpo static cl::opt<bool> 276312beefSvporpo AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden, 286312beefSvporpo cl::desc("Allow non-power-of-2 vectorization.")); 296312beefSvporpo 30cff7ad56Svporpo #ifndef NDEBUG 31cff7ad56Svporpo static cl::opt<bool> 32cff7ad56Svporpo AlwaysVerify("sbvec-always-verify", cl::init(false), cl::Hidden, 33cff7ad56Svporpo cl::desc("Helps find bugs by verifying the IR whenever we " 34cff7ad56Svporpo "emit new instructions (*very* expensive).")); 35cff7ad56Svporpo #endif // NDEBUG 36cff7ad56Svporpo 376312beefSvporpo namespace sandboxir { 38756ec99cSJorge Gorbe Moya 392e8ad49eSJorge Gorbe Moya BottomUpVec::BottomUpVec(StringRef Pipeline) 402e8ad49eSJorge Gorbe Moya : FunctionPass("bottom-up-vec"), 412e8ad49eSJorge Gorbe Moya RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {} 42756ec99cSJorge Gorbe Moya 4342c5a301Svporpo static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl, 4442c5a301Svporpo unsigned OpIdx) { 4542c5a301Svporpo SmallVector<Value *, 4> Operands; 4642c5a301Svporpo for (Value *BndlV : Bndl) { 4742c5a301Svporpo auto *BndlI = cast<Instruction>(BndlV); 4842c5a301Svporpo Operands.push_back(BndlI->getOperand(OpIdx)); 4942c5a301Svporpo } 5042c5a301Svporpo return Operands; 5142c5a301Svporpo } 5242c5a301Svporpo 538110af75Svporpo /// \Returns the BB iterator after the lowest instruction in \p Vals, or the top 548110af75Svporpo /// of BB if no instruction found in \p Vals. 558110af75Svporpo static BasicBlock::iterator getInsertPointAfterInstrs(ArrayRef<Value *> Vals, 568110af75Svporpo BasicBlock *BB) { 57*ac75d322Svporpo auto *BotI = VecUtils::getLastPHIOrSelf(VecUtils::getLowest(Vals, BB)); 588110af75Svporpo if (BotI == nullptr) 59d2234ca1Svporpo // We are using BB->begin() (or after PHIs) as the fallback insert point. 60d2234ca1Svporpo return BB->empty() 61d2234ca1Svporpo ? BB->begin() 62d2234ca1Svporpo : std::next( 63d2234ca1Svporpo VecUtils::getLastPHIOrSelf(&*BB->begin())->getIterator()); 64320389d4Svporpo return std::next(BotI->getIterator()); 65320389d4Svporpo } 66320389d4Svporpo 67320389d4Svporpo Value *BottomUpVec::createVectorInstr(ArrayRef<Value *> Bndl, 68320389d4Svporpo ArrayRef<Value *> Operands) { 69e902c696Svporpo auto CreateVectorInstr = [](ArrayRef<Value *> Bndl, 70e902c696Svporpo ArrayRef<Value *> Operands) -> Value * { 71320389d4Svporpo assert(all_of(Bndl, [](auto *V) { return isa<Instruction>(V); }) && 72320389d4Svporpo "Expect Instructions!"); 73320389d4Svporpo auto &Ctx = Bndl[0]->getContext(); 74320389d4Svporpo 75320389d4Svporpo Type *ScalarTy = VecUtils::getElementType(Utils::getExpectedType(Bndl[0])); 76320389d4Svporpo auto *VecTy = VecUtils::getWideType(ScalarTy, VecUtils::getNumLanes(Bndl)); 77320389d4Svporpo 788110af75Svporpo BasicBlock::iterator WhereIt = getInsertPointAfterInstrs( 798110af75Svporpo Bndl, cast<Instruction>(Bndl[0])->getParent()); 80320389d4Svporpo 81320389d4Svporpo auto Opcode = cast<Instruction>(Bndl[0])->getOpcode(); 82320389d4Svporpo switch (Opcode) { 83320389d4Svporpo case Instruction::Opcode::ZExt: 84320389d4Svporpo case Instruction::Opcode::SExt: 85320389d4Svporpo case Instruction::Opcode::FPToUI: 86320389d4Svporpo case Instruction::Opcode::FPToSI: 87320389d4Svporpo case Instruction::Opcode::FPExt: 88320389d4Svporpo case Instruction::Opcode::PtrToInt: 89320389d4Svporpo case Instruction::Opcode::IntToPtr: 90320389d4Svporpo case Instruction::Opcode::SIToFP: 91320389d4Svporpo case Instruction::Opcode::UIToFP: 92320389d4Svporpo case Instruction::Opcode::Trunc: 93320389d4Svporpo case Instruction::Opcode::FPTrunc: 94320389d4Svporpo case Instruction::Opcode::BitCast: { 95320389d4Svporpo assert(Operands.size() == 1u && "Casts are unary!"); 96e902c696Svporpo return CastInst::create(VecTy, Opcode, Operands[0], WhereIt, Ctx, 97e902c696Svporpo "VCast"); 98320389d4Svporpo } 99320389d4Svporpo case Instruction::Opcode::FCmp: 100320389d4Svporpo case Instruction::Opcode::ICmp: { 101320389d4Svporpo auto Pred = cast<CmpInst>(Bndl[0])->getPredicate(); 102320389d4Svporpo assert(all_of(drop_begin(Bndl), 103320389d4Svporpo [Pred](auto *SBV) { 104320389d4Svporpo return cast<CmpInst>(SBV)->getPredicate() == Pred; 105320389d4Svporpo }) && 106320389d4Svporpo "Expected same predicate across bundle."); 107320389d4Svporpo return CmpInst::create(Pred, Operands[0], Operands[1], WhereIt, Ctx, 108320389d4Svporpo "VCmp"); 109320389d4Svporpo } 110320389d4Svporpo case Instruction::Opcode::Select: { 111320389d4Svporpo return SelectInst::create(Operands[0], Operands[1], Operands[2], WhereIt, 112320389d4Svporpo Ctx, "Vec"); 113320389d4Svporpo } 114320389d4Svporpo case Instruction::Opcode::FNeg: { 115320389d4Svporpo auto *UOp0 = cast<UnaryOperator>(Bndl[0]); 116320389d4Svporpo auto OpC = UOp0->getOpcode(); 117e902c696Svporpo return UnaryOperator::createWithCopiedFlags(OpC, Operands[0], UOp0, 118e902c696Svporpo WhereIt, Ctx, "Vec"); 119320389d4Svporpo } 120320389d4Svporpo case Instruction::Opcode::Add: 121320389d4Svporpo case Instruction::Opcode::FAdd: 122320389d4Svporpo case Instruction::Opcode::Sub: 123320389d4Svporpo case Instruction::Opcode::FSub: 124320389d4Svporpo case Instruction::Opcode::Mul: 125320389d4Svporpo case Instruction::Opcode::FMul: 126320389d4Svporpo case Instruction::Opcode::UDiv: 127320389d4Svporpo case Instruction::Opcode::SDiv: 128320389d4Svporpo case Instruction::Opcode::FDiv: 129320389d4Svporpo case Instruction::Opcode::URem: 130320389d4Svporpo case Instruction::Opcode::SRem: 131320389d4Svporpo case Instruction::Opcode::FRem: 132320389d4Svporpo case Instruction::Opcode::Shl: 133320389d4Svporpo case Instruction::Opcode::LShr: 134320389d4Svporpo case Instruction::Opcode::AShr: 135320389d4Svporpo case Instruction::Opcode::And: 136320389d4Svporpo case Instruction::Opcode::Or: 137320389d4Svporpo case Instruction::Opcode::Xor: { 138320389d4Svporpo auto *BinOp0 = cast<BinaryOperator>(Bndl[0]); 139320389d4Svporpo auto *LHS = Operands[0]; 140320389d4Svporpo auto *RHS = Operands[1]; 141e902c696Svporpo return BinaryOperator::createWithCopiedFlags( 142e902c696Svporpo BinOp0->getOpcode(), LHS, RHS, BinOp0, WhereIt, Ctx, "Vec"); 143320389d4Svporpo } 144320389d4Svporpo case Instruction::Opcode::Load: { 145320389d4Svporpo auto *Ld0 = cast<LoadInst>(Bndl[0]); 146320389d4Svporpo Value *Ptr = Ld0->getPointerOperand(); 147e902c696Svporpo return LoadInst::create(VecTy, Ptr, Ld0->getAlign(), WhereIt, Ctx, 148e902c696Svporpo "VecL"); 149320389d4Svporpo } 150320389d4Svporpo case Instruction::Opcode::Store: { 151320389d4Svporpo auto Align = cast<StoreInst>(Bndl[0])->getAlign(); 152320389d4Svporpo Value *Val = Operands[0]; 153320389d4Svporpo Value *Ptr = Operands[1]; 154320389d4Svporpo return StoreInst::create(Val, Ptr, Align, WhereIt, Ctx); 155320389d4Svporpo } 156320389d4Svporpo case Instruction::Opcode::Br: 157320389d4Svporpo case Instruction::Opcode::Ret: 158320389d4Svporpo case Instruction::Opcode::PHI: 159320389d4Svporpo case Instruction::Opcode::AddrSpaceCast: 160320389d4Svporpo case Instruction::Opcode::Call: 161320389d4Svporpo case Instruction::Opcode::GetElementPtr: 162320389d4Svporpo llvm_unreachable("Unimplemented"); 163320389d4Svporpo break; 164320389d4Svporpo default: 165320389d4Svporpo llvm_unreachable("Unimplemented"); 166320389d4Svporpo break; 167320389d4Svporpo } 168320389d4Svporpo llvm_unreachable("Missing switch case!"); 169320389d4Svporpo // TODO: Propagate debug info. 170e902c696Svporpo }; 171e902c696Svporpo 172e902c696Svporpo auto *VecI = CreateVectorInstr(Bndl, Operands); 173e902c696Svporpo if (VecI != nullptr) { 174e902c696Svporpo Change = true; 175d6315affSvporpo IMaps->registerVector(Bndl, VecI); 176e902c696Svporpo } 177e902c696Svporpo return VecI; 178320389d4Svporpo } 179320389d4Svporpo 1807dffc96aSvporpo void BottomUpVec::tryEraseDeadInstrs() { 181c7053ac2Svporpo DenseMap<BasicBlock *, SmallVector<Instruction *>> SortedDeadInstrCandidates; 182c7053ac2Svporpo // The dead instrs could span BBs, so we need to collect and sort them per BB. 183c7053ac2Svporpo for (auto *DeadI : DeadInstrCandidates) 184c7053ac2Svporpo SortedDeadInstrCandidates[DeadI->getParent()].push_back(DeadI); 185c7053ac2Svporpo for (auto &Pair : SortedDeadInstrCandidates) 186c7053ac2Svporpo sort(Pair.second, 1877dffc96aSvporpo [](Instruction *I1, Instruction *I2) { return I1->comesBefore(I2); }); 188c7053ac2Svporpo for (const auto &Pair : SortedDeadInstrCandidates) { 189c7053ac2Svporpo for (Instruction *I : reverse(Pair.second)) { 1907dffc96aSvporpo if (I->hasNUses(0)) 191c7053ac2Svporpo // Erase the dead instructions bottom-to-top. 1927dffc96aSvporpo I->eraseFromParent(); 1937dffc96aSvporpo } 194c7053ac2Svporpo } 1957dffc96aSvporpo DeadInstrCandidates.clear(); 1967dffc96aSvporpo } 1977dffc96aSvporpo 1988110af75Svporpo Value *BottomUpVec::createShuffle(Value *VecOp, const ShuffleMask &Mask, 1998110af75Svporpo BasicBlock *UserBB) { 2008110af75Svporpo BasicBlock::iterator WhereIt = getInsertPointAfterInstrs({VecOp}, UserBB); 20187e4b681Svporpo return ShuffleVectorInst::create(VecOp, VecOp, Mask, WhereIt, 20287e4b681Svporpo VecOp->getContext(), "VShuf"); 20387e4b681Svporpo } 20487e4b681Svporpo 2058110af75Svporpo Value *BottomUpVec::createPack(ArrayRef<Value *> ToPack, BasicBlock *UserBB) { 2068110af75Svporpo BasicBlock::iterator WhereIt = getInsertPointAfterInstrs(ToPack, UserBB); 2073be3b33eSvporpo 2083be3b33eSvporpo Type *ScalarTy = VecUtils::getCommonScalarType(ToPack); 2093be3b33eSvporpo unsigned Lanes = VecUtils::getNumLanes(ToPack); 2103be3b33eSvporpo Type *VecTy = VecUtils::getWideType(ScalarTy, Lanes); 2113be3b33eSvporpo 2123be3b33eSvporpo // Create a series of pack instructions. 2133be3b33eSvporpo Value *LastInsert = PoisonValue::get(VecTy); 2143be3b33eSvporpo 2153be3b33eSvporpo Context &Ctx = ToPack[0]->getContext(); 2163be3b33eSvporpo 2173be3b33eSvporpo unsigned InsertIdx = 0; 2183be3b33eSvporpo for (Value *Elm : ToPack) { 2193be3b33eSvporpo // An element can be either scalar or vector. We need to generate different 2203be3b33eSvporpo // IR for each case. 2213be3b33eSvporpo if (Elm->getType()->isVectorTy()) { 2221be98277Svporpo unsigned NumElms = 2231be98277Svporpo cast<FixedVectorType>(Elm->getType())->getNumElements(); 2241be98277Svporpo for (auto ExtrLane : seq<int>(0, NumElms)) { 2251be98277Svporpo // We generate extract-insert pairs, for each lane in `Elm`. 2261be98277Svporpo Constant *ExtrLaneC = 2271be98277Svporpo ConstantInt::getSigned(Type::getInt32Ty(Ctx), ExtrLane); 2281be98277Svporpo // This may return a Constant if Elm is a Constant. 2291be98277Svporpo auto *ExtrI = 2301be98277Svporpo ExtractElementInst::create(Elm, ExtrLaneC, WhereIt, Ctx, "VPack"); 2311be98277Svporpo if (!isa<Constant>(ExtrI)) 2321be98277Svporpo WhereIt = std::next(cast<Instruction>(ExtrI)->getIterator()); 2331be98277Svporpo Constant *InsertLaneC = 2341be98277Svporpo ConstantInt::getSigned(Type::getInt32Ty(Ctx), InsertIdx++); 2351be98277Svporpo // This may also return a Constant if ExtrI is a Constant. 2361be98277Svporpo auto *InsertI = InsertElementInst::create( 2371be98277Svporpo LastInsert, ExtrI, InsertLaneC, WhereIt, Ctx, "VPack"); 2381be98277Svporpo if (!isa<Constant>(InsertI)) { 2391be98277Svporpo LastInsert = InsertI; 2401be98277Svporpo WhereIt = std::next(cast<Instruction>(LastInsert)->getIterator()); 2411be98277Svporpo } 2421be98277Svporpo } 2433be3b33eSvporpo } else { 2443be3b33eSvporpo Constant *InsertLaneC = 2453be3b33eSvporpo ConstantInt::getSigned(Type::getInt32Ty(Ctx), InsertIdx++); 2461be98277Svporpo // This may be folded into a Constant if LastInsert is a Constant. In 2471be98277Svporpo // that case we only collect the last constant. 2483be3b33eSvporpo LastInsert = InsertElementInst::create(LastInsert, Elm, InsertLaneC, 2493be3b33eSvporpo WhereIt, Ctx, "Pack"); 2503be3b33eSvporpo if (auto *NewI = dyn_cast<Instruction>(LastInsert)) 2513be3b33eSvporpo WhereIt = std::next(NewI->getIterator()); 2523be3b33eSvporpo } 2533be3b33eSvporpo } 2543be3b33eSvporpo return LastInsert; 2553be3b33eSvporpo } 2563be3b33eSvporpo 2577c51c310Svporpo void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl) { 2587c51c310Svporpo for (Value *V : Bndl) 2597c51c310Svporpo DeadInstrCandidates.insert(cast<Instruction>(V)); 2607c51c310Svporpo // Also collect the GEPs of vectorized loads and stores. 2617c51c310Svporpo auto Opcode = cast<Instruction>(Bndl[0])->getOpcode(); 2627c51c310Svporpo switch (Opcode) { 2637c51c310Svporpo case Instruction::Opcode::Load: { 2647c51c310Svporpo for (Value *V : drop_begin(Bndl)) 2657c51c310Svporpo if (auto *Ptr = 2667c51c310Svporpo dyn_cast<Instruction>(cast<LoadInst>(V)->getPointerOperand())) 2677c51c310Svporpo DeadInstrCandidates.insert(Ptr); 2687c51c310Svporpo break; 2697c51c310Svporpo } 2707c51c310Svporpo case Instruction::Opcode::Store: { 2717c51c310Svporpo for (Value *V : drop_begin(Bndl)) 2727c51c310Svporpo if (auto *Ptr = 2737c51c310Svporpo dyn_cast<Instruction>(cast<StoreInst>(V)->getPointerOperand())) 2747c51c310Svporpo DeadInstrCandidates.insert(Ptr); 2757c51c310Svporpo break; 2767c51c310Svporpo } 2777c51c310Svporpo default: 2787c51c310Svporpo break; 2797c51c310Svporpo } 2807c51c310Svporpo } 2817c51c310Svporpo 2828110af75Svporpo Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, 2838110af75Svporpo ArrayRef<Value *> UserBndl, unsigned Depth) { 284320389d4Svporpo Value *NewVec = nullptr; 2858110af75Svporpo auto *UserBB = !UserBndl.empty() 2868110af75Svporpo ? cast<Instruction>(UserBndl.front())->getParent() 2878110af75Svporpo : cast<Instruction>(Bndl[0])->getParent(); 288083369fdSvporpo const auto &LegalityRes = Legality->canVectorize(Bndl); 28942c5a301Svporpo switch (LegalityRes.getSubclassID()) { 29042c5a301Svporpo case LegalityResultID::Widen: { 29142c5a301Svporpo auto *I = cast<Instruction>(Bndl[0]); 292320389d4Svporpo SmallVector<Value *, 2> VecOperands; 293320389d4Svporpo switch (I->getOpcode()) { 294320389d4Svporpo case Instruction::Opcode::Load: 295320389d4Svporpo // Don't recurse towards the pointer operand. 296320389d4Svporpo VecOperands.push_back(cast<LoadInst>(I)->getPointerOperand()); 297320389d4Svporpo break; 298320389d4Svporpo case Instruction::Opcode::Store: { 299320389d4Svporpo // Don't recurse towards the pointer operand. 3008110af75Svporpo auto *VecOp = vectorizeRec(getOperand(Bndl, 0), Bndl, Depth + 1); 301320389d4Svporpo VecOperands.push_back(VecOp); 302320389d4Svporpo VecOperands.push_back(cast<StoreInst>(I)->getPointerOperand()); 303320389d4Svporpo break; 30442c5a301Svporpo } 305320389d4Svporpo default: 306320389d4Svporpo // Visit all operands. 307320389d4Svporpo for (auto OpIdx : seq<unsigned>(I->getNumOperands())) { 3088110af75Svporpo auto *VecOp = vectorizeRec(getOperand(Bndl, OpIdx), Bndl, Depth + 1); 309320389d4Svporpo VecOperands.push_back(VecOp); 310320389d4Svporpo } 311320389d4Svporpo break; 312320389d4Svporpo } 313320389d4Svporpo NewVec = createVectorInstr(Bndl, VecOperands); 314320389d4Svporpo 3157c51c310Svporpo // Collect any potentially dead scalar instructions, including the original 3167c51c310Svporpo // scalars and pointer operands of loads/stores. 3177c51c310Svporpo if (NewVec != nullptr) 3187c51c310Svporpo collectPotentiallyDeadInstrs(Bndl); 31942c5a301Svporpo break; 32042c5a301Svporpo } 321e902c696Svporpo case LegalityResultID::DiamondReuse: { 322e902c696Svporpo NewVec = cast<DiamondReuse>(LegalityRes).getVector(); 323e902c696Svporpo break; 324e902c696Svporpo } 32587e4b681Svporpo case LegalityResultID::DiamondReuseWithShuffle: { 32687e4b681Svporpo auto *VecOp = cast<DiamondReuseWithShuffle>(LegalityRes).getVector(); 32787e4b681Svporpo const ShuffleMask &Mask = 32887e4b681Svporpo cast<DiamondReuseWithShuffle>(LegalityRes).getMask(); 3298110af75Svporpo NewVec = createShuffle(VecOp, Mask, UserBB); 33087e4b681Svporpo break; 33187e4b681Svporpo } 332fd087135Svporpo case LegalityResultID::DiamondReuseMultiInput: { 333fd087135Svporpo const auto &Descr = 334fd087135Svporpo cast<DiamondReuseMultiInput>(LegalityRes).getCollectDescr(); 335fd087135Svporpo Type *ResTy = FixedVectorType::get(Bndl[0]->getType(), Bndl.size()); 336fd087135Svporpo 337fd087135Svporpo // TODO: Try to get WhereIt without creating a vector. 338fd087135Svporpo SmallVector<Value *, 4> DescrInstrs; 339fd087135Svporpo for (const auto &ElmDescr : Descr.getDescrs()) { 340fd087135Svporpo if (auto *I = dyn_cast<Instruction>(ElmDescr.getValue())) 341fd087135Svporpo DescrInstrs.push_back(I); 342fd087135Svporpo } 3438110af75Svporpo BasicBlock::iterator WhereIt = 3448110af75Svporpo getInsertPointAfterInstrs(DescrInstrs, UserBB); 345fd087135Svporpo 346fd087135Svporpo Value *LastV = PoisonValue::get(ResTy); 347fd087135Svporpo for (auto [Lane, ElmDescr] : enumerate(Descr.getDescrs())) { 348fd087135Svporpo Value *VecOp = ElmDescr.getValue(); 349fd087135Svporpo Context &Ctx = VecOp->getContext(); 350fd087135Svporpo Value *ValueToInsert; 351fd087135Svporpo if (ElmDescr.needsExtract()) { 352fd087135Svporpo ConstantInt *IdxC = 353fd087135Svporpo ConstantInt::get(Type::getInt32Ty(Ctx), ElmDescr.getExtractIdx()); 354fd087135Svporpo ValueToInsert = ExtractElementInst::create(VecOp, IdxC, WhereIt, 355fd087135Svporpo VecOp->getContext(), "VExt"); 356fd087135Svporpo } else { 357fd087135Svporpo ValueToInsert = VecOp; 358fd087135Svporpo } 359fd087135Svporpo ConstantInt *LaneC = ConstantInt::get(Type::getInt32Ty(Ctx), Lane); 360fd087135Svporpo Value *Ins = InsertElementInst::create(LastV, ValueToInsert, LaneC, 361fd087135Svporpo WhereIt, Ctx, "VIns"); 362fd087135Svporpo LastV = Ins; 363fd087135Svporpo } 364fd087135Svporpo NewVec = LastV; 365fd087135Svporpo break; 366fd087135Svporpo } 36754c93aabSvporpo case LegalityResultID::Pack: { 3683be3b33eSvporpo // If we can't vectorize the seeds then just return. 3693be3b33eSvporpo if (Depth == 0) 3703be3b33eSvporpo return nullptr; 3718110af75Svporpo NewVec = createPack(Bndl, UserBB); 3723be3b33eSvporpo break; 37354c93aabSvporpo } 37442c5a301Svporpo } 375cff7ad56Svporpo #ifndef NDEBUG 376cff7ad56Svporpo if (AlwaysVerify) { 377cff7ad56Svporpo // This helps find broken IR by constantly verifying the function. Note that 378cff7ad56Svporpo // this is very expensive and should only be used for debugging. 379cff7ad56Svporpo Instruction *I0 = isa<Instruction>(Bndl[0]) 380cff7ad56Svporpo ? cast<Instruction>(Bndl[0]) 381cff7ad56Svporpo : cast<Instruction>(UserBndl[0]); 382cff7ad56Svporpo assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) && 383cff7ad56Svporpo "Broken function!"); 384cff7ad56Svporpo } 385cff7ad56Svporpo #endif 386320389d4Svporpo return NewVec; 38742c5a301Svporpo } 38842c5a301Svporpo 38911b768afSVasileios Porpodas bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) { 3907dffc96aSvporpo DeadInstrCandidates.clear(); 3916312beefSvporpo Legality->clear(); 3928110af75Svporpo vectorizeRec(Bndl, {}, /*Depth=*/0); 3937dffc96aSvporpo tryEraseDeadInstrs(); 39411b768afSVasileios Porpodas return Change; 39511b768afSVasileios Porpodas } 39642c5a301Svporpo 397a461869dSvporpo bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) { 398d6315affSvporpo IMaps = std::make_unique<InstrMaps>(F.getContext()); 399ce0d0858Svporpo Legality = std::make_unique<LegalityAnalysis>( 4005942a99fSvporpo A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(), 401d6315affSvporpo F.getContext(), *IMaps); 40242c5a301Svporpo Change = false; 4036312beefSvporpo const auto &DL = F.getParent()->getDataLayout(); 4046312beefSvporpo unsigned VecRegBits = 4056312beefSvporpo OverrideVecRegBits != 0 4066312beefSvporpo ? OverrideVecRegBits 4076312beefSvporpo : A.getTTI() 4086312beefSvporpo .getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) 4096312beefSvporpo .getFixedValue(); 4106312beefSvporpo 41142c5a301Svporpo // TODO: Start from innermost BBs first 41242c5a301Svporpo for (auto &BB : F) { 4136312beefSvporpo SeedCollector SC(&BB, A.getScalarEvolution()); 4146312beefSvporpo for (SeedBundle &Seeds : SC.getStoreSeeds()) { 4156312beefSvporpo unsigned ElmBits = 4166312beefSvporpo Utils::getNumBits(VecUtils::getElementType(Utils::getExpectedType( 4176312beefSvporpo Seeds[Seeds.getFirstUnusedElementIdx()])), 4186312beefSvporpo DL); 4196312beefSvporpo 4206312beefSvporpo auto DivideBy2 = [](unsigned Num) { 4216312beefSvporpo auto Floor = VecUtils::getFloorPowerOf2(Num); 4226312beefSvporpo if (Floor == Num) 4236312beefSvporpo return Floor / 2; 4246312beefSvporpo return Floor; 4256312beefSvporpo }; 4266312beefSvporpo // Try to create the largest vector supported by the target. If it fails 4276312beefSvporpo // reduce the vector size by half. 4286312beefSvporpo for (unsigned SliceElms = std::min(VecRegBits / ElmBits, 4296312beefSvporpo Seeds.getNumUnusedBits() / ElmBits); 4306312beefSvporpo SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) { 4316312beefSvporpo if (Seeds.allUsed()) 4326312beefSvporpo break; 4336312beefSvporpo // Keep trying offsets after FirstUnusedElementIdx, until we vectorize 4346312beefSvporpo // the slice. This could be quite expensive, so we enforce a limit. 4356312beefSvporpo for (unsigned Offset = Seeds.getFirstUnusedElementIdx(), 4366312beefSvporpo OE = Seeds.size(); 4376312beefSvporpo Offset + 1 < OE; Offset += 1) { 4386312beefSvporpo // Seeds are getting used as we vectorize, so skip them. 4396312beefSvporpo if (Seeds.isUsed(Offset)) 4406312beefSvporpo continue; 4416312beefSvporpo if (Seeds.allUsed()) 4426312beefSvporpo break; 4436312beefSvporpo 4446312beefSvporpo auto SeedSlice = 4456312beefSvporpo Seeds.getSlice(Offset, SliceElms * ElmBits, !AllowNonPow2); 4466312beefSvporpo if (SeedSlice.empty()) 4476312beefSvporpo continue; 4486312beefSvporpo 4496312beefSvporpo assert(SeedSlice.size() >= 2 && "Should have been rejected!"); 4506312beefSvporpo 451756ec99cSJorge Gorbe Moya // TODO: If vectorization succeeds, run the RegionPassManager on the 452756ec99cSJorge Gorbe Moya // resulting region. 4536312beefSvporpo 4546312beefSvporpo // TODO: Refactor to remove the unnecessary copy to SeedSliceVals. 4556312beefSvporpo SmallVector<Value *> SeedSliceVals(SeedSlice.begin(), 4566312beefSvporpo SeedSlice.end()); 4576312beefSvporpo Change |= tryVectorize(SeedSliceVals); 4586312beefSvporpo } 4596312beefSvporpo } 4606312beefSvporpo } 46142c5a301Svporpo } 46242c5a301Svporpo return Change; 46342c5a301Svporpo } 464756ec99cSJorge Gorbe Moya 4656312beefSvporpo } // namespace sandboxir 4666312beefSvporpo } // namespace llvm 467