10b57cec5SDimitry Andric //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// This file implements SLP analysis based on VPlan. The analysis is based on 90b57cec5SDimitry Andric /// the ideas described in 100b57cec5SDimitry Andric /// 110b57cec5SDimitry Andric /// Look-ahead SLP: auto-vectorization in the presence of commutative 120b57cec5SDimitry Andric /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha, 130b57cec5SDimitry Andric /// Luís F. W. Góes 140b57cec5SDimitry Andric /// 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "VPlan.h" 1881ad6265SDimitry Andric #include "VPlanValue.h" 1981ad6265SDimitry Andric #include "llvm/ADT/DenseMap.h" 200b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h" 220b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 230b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 240b57cec5SDimitry Andric #include "llvm/IR/Type.h" 250b57cec5SDimitry Andric #include "llvm/IR/Value.h" 260b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 270b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 280b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 290b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 30fe6060f1SDimitry Andric #include <algorithm> 310b57cec5SDimitry Andric #include <cassert> 32*bdd1243dSDimitry Andric #include <optional> 33fe6060f1SDimitry Andric #include <utility> 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric using namespace llvm; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric #define DEBUG_TYPE "vplan-slp" 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric // Number of levels to look ahead when re-ordering multi node operands. 400b57cec5SDimitry Andric static unsigned LookaheadMaxDepth = 5; 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric VPInstruction *VPlanSlp::markFailed() { 430b57cec5SDimitry Andric // FIXME: Currently this is used to signal we hit instructions we cannot 440b57cec5SDimitry Andric // trivially SLP'ize. 450b57cec5SDimitry Andric CompletelySLP = false; 460b57cec5SDimitry Andric return nullptr; 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) { 500b57cec5SDimitry Andric if (all_of(Operands, [](VPValue *V) { 510b57cec5SDimitry Andric return cast<VPInstruction>(V)->getUnderlyingInstr(); 520b57cec5SDimitry Andric })) { 530b57cec5SDimitry Andric unsigned BundleSize = 0; 540b57cec5SDimitry Andric for (VPValue *V : Operands) { 550b57cec5SDimitry Andric Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType(); 560b57cec5SDimitry Andric assert(!T->isVectorTy() && "Only scalar types supported for now"); 570b57cec5SDimitry Andric BundleSize += T->getScalarSizeInBits(); 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric WidestBundleBits = std::max(WidestBundleBits, BundleSize); 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New); 630b57cec5SDimitry Andric assert(Res.second && 640b57cec5SDimitry Andric "Already created a combined instruction for the operand bundle"); 650b57cec5SDimitry Andric (void)Res; 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const { 690b57cec5SDimitry Andric // Currently we only support VPInstructions. 700b57cec5SDimitry Andric if (!all_of(Operands, [](VPValue *Op) { 710b57cec5SDimitry Andric return Op && isa<VPInstruction>(Op) && 720b57cec5SDimitry Andric cast<VPInstruction>(Op)->getUnderlyingInstr(); 730b57cec5SDimitry Andric })) { 740b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n"); 750b57cec5SDimitry Andric return false; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric // Check if opcodes and type width agree for all instructions in the bundle. 790b57cec5SDimitry Andric // FIXME: Differing widths/opcodes can be handled by inserting additional 800b57cec5SDimitry Andric // instructions. 810b57cec5SDimitry Andric // FIXME: Deal with non-primitive types. 820b57cec5SDimitry Andric const Instruction *OriginalInstr = 830b57cec5SDimitry Andric cast<VPInstruction>(Operands[0])->getUnderlyingInstr(); 840b57cec5SDimitry Andric unsigned Opcode = OriginalInstr->getOpcode(); 850b57cec5SDimitry Andric unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits(); 860b57cec5SDimitry Andric if (!all_of(Operands, [Opcode, Width](VPValue *Op) { 870b57cec5SDimitry Andric const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr(); 880b57cec5SDimitry Andric return I->getOpcode() == Opcode && 890b57cec5SDimitry Andric I->getType()->getPrimitiveSizeInBits() == Width; 900b57cec5SDimitry Andric })) { 910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n"); 920b57cec5SDimitry Andric return false; 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric // For now, all operands must be defined in the same BB. 960b57cec5SDimitry Andric if (any_of(Operands, [this](VPValue *Op) { 970b57cec5SDimitry Andric return cast<VPInstruction>(Op)->getParent() != &this->BB; 980b57cec5SDimitry Andric })) { 990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n"); 1000b57cec5SDimitry Andric return false; 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric if (any_of(Operands, 1040b57cec5SDimitry Andric [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) { 1050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n"); 1060b57cec5SDimitry Andric return false; 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric // For loads, check that there are no instructions writing to memory in 1100b57cec5SDimitry Andric // between them. 1110b57cec5SDimitry Andric // TODO: we only have to forbid instructions writing to memory that could 1120b57cec5SDimitry Andric // interfere with any of the loads in the bundle 1130b57cec5SDimitry Andric if (Opcode == Instruction::Load) { 1140b57cec5SDimitry Andric unsigned LoadsSeen = 0; 1150b57cec5SDimitry Andric VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent(); 1160b57cec5SDimitry Andric for (auto &I : *Parent) { 117fe6060f1SDimitry Andric auto *VPI = dyn_cast<VPInstruction>(&I); 118fe6060f1SDimitry Andric if (!VPI) 119fe6060f1SDimitry Andric break; 1200b57cec5SDimitry Andric if (VPI->getOpcode() == Instruction::Load && 121e8d8bef9SDimitry Andric llvm::is_contained(Operands, VPI)) 1220b57cec5SDimitry Andric LoadsSeen++; 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric if (LoadsSeen == Operands.size()) 1250b57cec5SDimitry Andric break; 1260b57cec5SDimitry Andric if (LoadsSeen > 0 && VPI->mayWriteToMemory()) { 1270b57cec5SDimitry Andric LLVM_DEBUG( 1280b57cec5SDimitry Andric dbgs() << "VPSLP: instruction modifying memory between loads\n"); 1290b57cec5SDimitry Andric return false; 1300b57cec5SDimitry Andric } 1310b57cec5SDimitry Andric } 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric if (!all_of(Operands, [](VPValue *Op) { 1340b57cec5SDimitry Andric return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr()) 1350b57cec5SDimitry Andric ->isSimple(); 1360b57cec5SDimitry Andric })) { 1370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n"); 1380b57cec5SDimitry Andric return false; 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric if (Opcode == Instruction::Store) 1430b57cec5SDimitry Andric if (!all_of(Operands, [](VPValue *Op) { 1440b57cec5SDimitry Andric return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr()) 1450b57cec5SDimitry Andric ->isSimple(); 1460b57cec5SDimitry Andric })) { 1470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n"); 1480b57cec5SDimitry Andric return false; 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric return true; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values, 1550b57cec5SDimitry Andric unsigned OperandIndex) { 1560b57cec5SDimitry Andric SmallVector<VPValue *, 4> Operands; 1570b57cec5SDimitry Andric for (VPValue *V : Values) { 158e8d8bef9SDimitry Andric // Currently we only support VPInstructions. 159e8d8bef9SDimitry Andric auto *U = cast<VPInstruction>(V); 1600b57cec5SDimitry Andric Operands.push_back(U->getOperand(OperandIndex)); 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric return Operands; 1630b57cec5SDimitry Andric } 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric static bool areCommutative(ArrayRef<VPValue *> Values) { 1660b57cec5SDimitry Andric return Instruction::isCommutative( 1670b57cec5SDimitry Andric cast<VPInstruction>(Values[0])->getOpcode()); 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric static SmallVector<SmallVector<VPValue *, 4>, 4> 1710b57cec5SDimitry Andric getOperands(ArrayRef<VPValue *> Values) { 1720b57cec5SDimitry Andric SmallVector<SmallVector<VPValue *, 4>, 4> Result; 1730b57cec5SDimitry Andric auto *VPI = cast<VPInstruction>(Values[0]); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric switch (VPI->getOpcode()) { 1760b57cec5SDimitry Andric case Instruction::Load: 1770b57cec5SDimitry Andric llvm_unreachable("Loads terminate a tree, no need to get operands"); 1780b57cec5SDimitry Andric case Instruction::Store: 1790b57cec5SDimitry Andric Result.push_back(getOperands(Values, 0)); 1800b57cec5SDimitry Andric break; 1810b57cec5SDimitry Andric default: 1820b57cec5SDimitry Andric for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I) 1830b57cec5SDimitry Andric Result.push_back(getOperands(Values, I)); 1840b57cec5SDimitry Andric break; 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric return Result; 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric /// Returns the opcode of Values or ~0 if they do not all agree. 191*bdd1243dSDimitry Andric static std::optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) { 1920b57cec5SDimitry Andric unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode(); 1930b57cec5SDimitry Andric if (any_of(Values, [Opcode](VPValue *V) { 1940b57cec5SDimitry Andric return cast<VPInstruction>(V)->getOpcode() != Opcode; 1950b57cec5SDimitry Andric })) 196*bdd1243dSDimitry Andric return std::nullopt; 1970b57cec5SDimitry Andric return {Opcode}; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric /// Returns true if A and B access sequential memory if they are loads or 2010b57cec5SDimitry Andric /// stores or if they have identical opcodes otherwise. 2020b57cec5SDimitry Andric static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, 2030b57cec5SDimitry Andric VPInterleavedAccessInfo &IAI) { 2040b57cec5SDimitry Andric if (A->getOpcode() != B->getOpcode()) 2050b57cec5SDimitry Andric return false; 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric if (A->getOpcode() != Instruction::Load && 2080b57cec5SDimitry Andric A->getOpcode() != Instruction::Store) 2090b57cec5SDimitry Andric return true; 2100b57cec5SDimitry Andric auto *GA = IAI.getInterleaveGroup(A); 2110b57cec5SDimitry Andric auto *GB = IAI.getInterleaveGroup(B); 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B); 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /// Implements getLAScore from Listing 7 in the paper. 2170b57cec5SDimitry Andric /// Traverses and compares operands of V1 and V2 to MaxLevel. 2180b57cec5SDimitry Andric static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, 2190b57cec5SDimitry Andric VPInterleavedAccessInfo &IAI) { 220e8d8bef9SDimitry Andric auto *I1 = dyn_cast<VPInstruction>(V1); 221e8d8bef9SDimitry Andric auto *I2 = dyn_cast<VPInstruction>(V2); 222e8d8bef9SDimitry Andric // Currently we only support VPInstructions. 223e8d8bef9SDimitry Andric if (!I1 || !I2) 2240b57cec5SDimitry Andric return 0; 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric if (MaxLevel == 0) 227e8d8bef9SDimitry Andric return (unsigned)areConsecutiveOrMatch(I1, I2, IAI); 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric unsigned Score = 0; 230e8d8bef9SDimitry Andric for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I) 231e8d8bef9SDimitry Andric for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J) 232e8d8bef9SDimitry Andric Score += 233e8d8bef9SDimitry Andric getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI); 2340b57cec5SDimitry Andric return Score; 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric std::pair<VPlanSlp::OpMode, VPValue *> 2380b57cec5SDimitry Andric VPlanSlp::getBest(OpMode Mode, VPValue *Last, 2390b57cec5SDimitry Andric SmallPtrSetImpl<VPValue *> &Candidates, 2400b57cec5SDimitry Andric VPInterleavedAccessInfo &IAI) { 2410b57cec5SDimitry Andric assert((Mode == OpMode::Load || Mode == OpMode::Opcode) && 2420b57cec5SDimitry Andric "Currently we only handle load and commutative opcodes"); 2430b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " getBest\n"); 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric SmallVector<VPValue *, 4> BestCandidates; 2460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Candidates for " 2470b57cec5SDimitry Andric << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " "); 2480b57cec5SDimitry Andric for (auto *Candidate : Candidates) { 2490b57cec5SDimitry Andric auto *LastI = cast<VPInstruction>(Last); 2500b57cec5SDimitry Andric auto *CandidateI = cast<VPInstruction>(Candidate); 2510b57cec5SDimitry Andric if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) { 2520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr() 2530b57cec5SDimitry Andric << " "); 2540b57cec5SDimitry Andric BestCandidates.push_back(Candidate); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric if (BestCandidates.empty()) 2600b57cec5SDimitry Andric return {OpMode::Failed, nullptr}; 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric if (BestCandidates.size() == 1) 2630b57cec5SDimitry Andric return {Mode, BestCandidates[0]}; 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric VPValue *Best = nullptr; 2660b57cec5SDimitry Andric unsigned BestScore = 0; 2670b57cec5SDimitry Andric for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) { 2680b57cec5SDimitry Andric unsigned PrevScore = ~0u; 2690b57cec5SDimitry Andric bool AllSame = true; 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric // FIXME: Avoid visiting the same operands multiple times. 2720b57cec5SDimitry Andric for (auto *Candidate : BestCandidates) { 2730b57cec5SDimitry Andric unsigned Score = getLAScore(Last, Candidate, Depth, IAI); 2740b57cec5SDimitry Andric if (PrevScore == ~0u) 2750b57cec5SDimitry Andric PrevScore = Score; 2760b57cec5SDimitry Andric if (PrevScore != Score) 2770b57cec5SDimitry Andric AllSame = false; 2780b57cec5SDimitry Andric PrevScore = Score; 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric if (Score > BestScore) { 2810b57cec5SDimitry Andric BestScore = Score; 2820b57cec5SDimitry Andric Best = Candidate; 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric if (!AllSame) 2860b57cec5SDimitry Andric break; 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found best " 2890b57cec5SDimitry Andric << *cast<VPInstruction>(Best)->getUnderlyingInstr() 2900b57cec5SDimitry Andric << "\n"); 2910b57cec5SDimitry Andric Candidates.erase(Best); 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric return {Mode, Best}; 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() { 2970b57cec5SDimitry Andric SmallVector<MultiNodeOpTy, 4> FinalOrder; 2980b57cec5SDimitry Andric SmallVector<OpMode, 4> Mode; 2990b57cec5SDimitry Andric FinalOrder.reserve(MultiNodeOps.size()); 3000b57cec5SDimitry Andric Mode.reserve(MultiNodeOps.size()); 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Reordering multinode\n"); 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric for (auto &Operands : MultiNodeOps) { 3050b57cec5SDimitry Andric FinalOrder.push_back({Operands.first, {Operands.second[0]}}); 3060b57cec5SDimitry Andric if (cast<VPInstruction>(Operands.second[0])->getOpcode() == 3070b57cec5SDimitry Andric Instruction::Load) 3080b57cec5SDimitry Andric Mode.push_back(OpMode::Load); 3090b57cec5SDimitry Andric else 3100b57cec5SDimitry Andric Mode.push_back(OpMode::Opcode); 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric 3130b57cec5SDimitry Andric for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) { 3140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n"); 3150b57cec5SDimitry Andric SmallPtrSet<VPValue *, 4> Candidates; 3160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Candidates "); 3170b57cec5SDimitry Andric for (auto Ops : MultiNodeOps) { 3180b57cec5SDimitry Andric LLVM_DEBUG( 3190b57cec5SDimitry Andric dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr() 3200b57cec5SDimitry Andric << " "); 3210b57cec5SDimitry Andric Candidates.insert(Ops.second[Lane]); 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) { 3260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Checking " << Op << "\n"); 3270b57cec5SDimitry Andric if (Mode[Op] == OpMode::Failed) 3280b57cec5SDimitry Andric continue; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric VPValue *Last = FinalOrder[Op].second[Lane - 1]; 3310b57cec5SDimitry Andric std::pair<OpMode, VPValue *> Res = 3320b57cec5SDimitry Andric getBest(Mode[Op], Last, Candidates, IAI); 3330b57cec5SDimitry Andric if (Res.second) 3340b57cec5SDimitry Andric FinalOrder[Op].second.push_back(Res.second); 3350b57cec5SDimitry Andric else 3360b57cec5SDimitry Andric // TODO: handle this case 3370b57cec5SDimitry Andric FinalOrder[Op].second.push_back(markFailed()); 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric } 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric return FinalOrder; 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 344fe6060f1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3450b57cec5SDimitry Andric void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) { 3460b57cec5SDimitry Andric dbgs() << " Ops: "; 347*bdd1243dSDimitry Andric for (auto *Op : Values) { 3488bcb0991SDimitry Andric if (auto *VPInstr = cast_or_null<VPInstruction>(Op)) 3498bcb0991SDimitry Andric if (auto *Instr = VPInstr->getUnderlyingInstr()) { 3500b57cec5SDimitry Andric dbgs() << *Instr << " | "; 3518bcb0991SDimitry Andric continue; 3528bcb0991SDimitry Andric } 3530b57cec5SDimitry Andric dbgs() << " nullptr | "; 3548bcb0991SDimitry Andric } 3550b57cec5SDimitry Andric dbgs() << "\n"; 3560b57cec5SDimitry Andric } 357fe6060f1SDimitry Andric #endif 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) { 3600b57cec5SDimitry Andric assert(!Values.empty() && "Need some operands!"); 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric // If we already visited this instruction bundle, re-use the existing node 3630b57cec5SDimitry Andric auto I = BundleToCombined.find(to_vector<4>(Values)); 3640b57cec5SDimitry Andric if (I != BundleToCombined.end()) { 3650b57cec5SDimitry Andric #ifndef NDEBUG 3660b57cec5SDimitry Andric // Check that the resulting graph is a tree. If we re-use a node, this means 3670b57cec5SDimitry Andric // its values have multiple users. We only allow this, if all users of each 3680b57cec5SDimitry Andric // value are the same instruction. 3690b57cec5SDimitry Andric for (auto *V : Values) { 3700b57cec5SDimitry Andric auto UI = V->user_begin(); 3710b57cec5SDimitry Andric auto *FirstUser = *UI++; 3720b57cec5SDimitry Andric while (UI != V->user_end()) { 3730b57cec5SDimitry Andric assert(*UI == FirstUser && "Currently we only support SLP trees."); 3740b57cec5SDimitry Andric UI++; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric #endif 3780b57cec5SDimitry Andric return I->second; 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric // Dump inputs 3820b57cec5SDimitry Andric LLVM_DEBUG({ 3830b57cec5SDimitry Andric dbgs() << "buildGraph: "; 3840b57cec5SDimitry Andric dumpBundle(Values); 3850b57cec5SDimitry Andric }); 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric if (!areVectorizable(Values)) 3880b57cec5SDimitry Andric return markFailed(); 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric assert(getOpcode(Values) && "Opcodes for all values must match"); 39181ad6265SDimitry Andric unsigned ValuesOpcode = *getOpcode(Values); 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric SmallVector<VPValue *, 4> CombinedOperands; 3940b57cec5SDimitry Andric if (areCommutative(Values)) { 3950b57cec5SDimitry Andric bool MultiNodeRoot = !MultiNodeActive; 3960b57cec5SDimitry Andric MultiNodeActive = true; 3970b57cec5SDimitry Andric for (auto &Operands : getOperands(Values)) { 3980b57cec5SDimitry Andric LLVM_DEBUG({ 3990b57cec5SDimitry Andric dbgs() << " Visiting Commutative"; 4000b57cec5SDimitry Andric dumpBundle(Operands); 4010b57cec5SDimitry Andric }); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric auto OperandsOpcode = getOpcode(Operands); 4040b57cec5SDimitry Andric if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) { 4050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Same opcode, continue building\n"); 4060b57cec5SDimitry Andric CombinedOperands.push_back(buildGraph(Operands)); 4070b57cec5SDimitry Andric } else { 4080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Adding multinode Ops\n"); 4090b57cec5SDimitry Andric // Create dummy VPInstruction, which will we replace later by the 4100b57cec5SDimitry Andric // re-ordered operand. 4110b57cec5SDimitry Andric VPInstruction *Op = new VPInstruction(0, {}); 4120b57cec5SDimitry Andric CombinedOperands.push_back(Op); 4130b57cec5SDimitry Andric MultiNodeOps.emplace_back(Op, Operands); 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric if (MultiNodeRoot) { 4180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Reorder \n"); 4190b57cec5SDimitry Andric MultiNodeActive = false; 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric auto FinalOrder = reorderMultiNodeOps(); 4220b57cec5SDimitry Andric 4230b57cec5SDimitry Andric MultiNodeOps.clear(); 4240b57cec5SDimitry Andric for (auto &Ops : FinalOrder) { 4250b57cec5SDimitry Andric VPInstruction *NewOp = buildGraph(Ops.second); 4260b57cec5SDimitry Andric Ops.first->replaceAllUsesWith(NewOp); 4270b57cec5SDimitry Andric for (unsigned i = 0; i < CombinedOperands.size(); i++) 4280b57cec5SDimitry Andric if (CombinedOperands[i] == Ops.first) 4290b57cec5SDimitry Andric CombinedOperands[i] = NewOp; 4300b57cec5SDimitry Andric delete Ops.first; 4310b57cec5SDimitry Andric Ops.first = NewOp; 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found final order\n"); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric } else { 4360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " NonCommuntative\n"); 4370b57cec5SDimitry Andric if (ValuesOpcode == Instruction::Load) 4380b57cec5SDimitry Andric for (VPValue *V : Values) 4390b57cec5SDimitry Andric CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0)); 4400b57cec5SDimitry Andric else 4410b57cec5SDimitry Andric for (auto &Operands : getOperands(Values)) 4420b57cec5SDimitry Andric CombinedOperands.push_back(buildGraph(Operands)); 4430b57cec5SDimitry Andric } 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric unsigned Opcode; 4460b57cec5SDimitry Andric switch (ValuesOpcode) { 4470b57cec5SDimitry Andric case Instruction::Load: 4480b57cec5SDimitry Andric Opcode = VPInstruction::SLPLoad; 4490b57cec5SDimitry Andric break; 4500b57cec5SDimitry Andric case Instruction::Store: 4510b57cec5SDimitry Andric Opcode = VPInstruction::SLPStore; 4520b57cec5SDimitry Andric break; 4530b57cec5SDimitry Andric default: 4540b57cec5SDimitry Andric Opcode = ValuesOpcode; 4550b57cec5SDimitry Andric break; 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric if (!CompletelySLP) 4590b57cec5SDimitry Andric return markFailed(); 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric assert(CombinedOperands.size() > 0 && "Need more some operands"); 4620eae32dcSDimitry Andric auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr(); 4630eae32dcSDimitry Andric auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc()); 4640b57cec5SDimitry Andric 465e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " " 466e8d8bef9SDimitry Andric << *cast<VPInstruction>(Values[0]) << "\n"); 4670b57cec5SDimitry Andric addCombined(Values, VPI); 4680b57cec5SDimitry Andric return VPI; 4690b57cec5SDimitry Andric } 470