xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// This file implements SLP analysis based on VPlan. The analysis is based on
90b57cec5SDimitry Andric /// the ideas described in
100b57cec5SDimitry Andric ///
110b57cec5SDimitry Andric ///   Look-ahead SLP: auto-vectorization in the presence of commutative
120b57cec5SDimitry Andric ///   operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
130b57cec5SDimitry Andric ///   Luís F. W. Góes
140b57cec5SDimitry Andric ///
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "VPlan.h"
1881ad6265SDimitry Andric #include "VPlanValue.h"
1981ad6265SDimitry Andric #include "llvm/ADT/DenseMap.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
220b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
230b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
240b57cec5SDimitry Andric #include "llvm/IR/Type.h"
250b57cec5SDimitry Andric #include "llvm/IR/Value.h"
260b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
270b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
280b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
290b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
30fe6060f1SDimitry Andric #include <algorithm>
310b57cec5SDimitry Andric #include <cassert>
32*bdd1243dSDimitry Andric #include <optional>
33fe6060f1SDimitry Andric #include <utility>
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric using namespace llvm;
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric #define DEBUG_TYPE "vplan-slp"
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric // Number of levels to look ahead when re-ordering multi node operands.
400b57cec5SDimitry Andric static unsigned LookaheadMaxDepth = 5;
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric VPInstruction *VPlanSlp::markFailed() {
430b57cec5SDimitry Andric   // FIXME: Currently this is used to signal we hit instructions we cannot
440b57cec5SDimitry Andric   //        trivially SLP'ize.
450b57cec5SDimitry Andric   CompletelySLP = false;
460b57cec5SDimitry Andric   return nullptr;
470b57cec5SDimitry Andric }
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
500b57cec5SDimitry Andric   if (all_of(Operands, [](VPValue *V) {
510b57cec5SDimitry Andric         return cast<VPInstruction>(V)->getUnderlyingInstr();
520b57cec5SDimitry Andric       })) {
530b57cec5SDimitry Andric     unsigned BundleSize = 0;
540b57cec5SDimitry Andric     for (VPValue *V : Operands) {
550b57cec5SDimitry Andric       Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
560b57cec5SDimitry Andric       assert(!T->isVectorTy() && "Only scalar types supported for now");
570b57cec5SDimitry Andric       BundleSize += T->getScalarSizeInBits();
580b57cec5SDimitry Andric     }
590b57cec5SDimitry Andric     WidestBundleBits = std::max(WidestBundleBits, BundleSize);
600b57cec5SDimitry Andric   }
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric   auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
630b57cec5SDimitry Andric   assert(Res.second &&
640b57cec5SDimitry Andric          "Already created a combined instruction for the operand bundle");
650b57cec5SDimitry Andric   (void)Res;
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
690b57cec5SDimitry Andric   // Currently we only support VPInstructions.
700b57cec5SDimitry Andric   if (!all_of(Operands, [](VPValue *Op) {
710b57cec5SDimitry Andric         return Op && isa<VPInstruction>(Op) &&
720b57cec5SDimitry Andric                cast<VPInstruction>(Op)->getUnderlyingInstr();
730b57cec5SDimitry Andric       })) {
740b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
750b57cec5SDimitry Andric     return false;
760b57cec5SDimitry Andric   }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   // Check if opcodes and type width agree for all instructions in the bundle.
790b57cec5SDimitry Andric   // FIXME: Differing widths/opcodes can be handled by inserting additional
800b57cec5SDimitry Andric   //        instructions.
810b57cec5SDimitry Andric   // FIXME: Deal with non-primitive types.
820b57cec5SDimitry Andric   const Instruction *OriginalInstr =
830b57cec5SDimitry Andric       cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
840b57cec5SDimitry Andric   unsigned Opcode = OriginalInstr->getOpcode();
850b57cec5SDimitry Andric   unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
860b57cec5SDimitry Andric   if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
870b57cec5SDimitry Andric         const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
880b57cec5SDimitry Andric         return I->getOpcode() == Opcode &&
890b57cec5SDimitry Andric                I->getType()->getPrimitiveSizeInBits() == Width;
900b57cec5SDimitry Andric       })) {
910b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
920b57cec5SDimitry Andric     return false;
930b57cec5SDimitry Andric   }
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   // For now, all operands must be defined in the same BB.
960b57cec5SDimitry Andric   if (any_of(Operands, [this](VPValue *Op) {
970b57cec5SDimitry Andric         return cast<VPInstruction>(Op)->getParent() != &this->BB;
980b57cec5SDimitry Andric       })) {
990b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
1000b57cec5SDimitry Andric     return false;
1010b57cec5SDimitry Andric   }
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric   if (any_of(Operands,
1040b57cec5SDimitry Andric              [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
1050b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
1060b57cec5SDimitry Andric     return false;
1070b57cec5SDimitry Andric   }
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric   // For loads, check that there are no instructions writing to memory in
1100b57cec5SDimitry Andric   // between them.
1110b57cec5SDimitry Andric   // TODO: we only have to forbid instructions writing to memory that could
1120b57cec5SDimitry Andric   //       interfere with any of the loads in the bundle
1130b57cec5SDimitry Andric   if (Opcode == Instruction::Load) {
1140b57cec5SDimitry Andric     unsigned LoadsSeen = 0;
1150b57cec5SDimitry Andric     VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
1160b57cec5SDimitry Andric     for (auto &I : *Parent) {
117fe6060f1SDimitry Andric       auto *VPI = dyn_cast<VPInstruction>(&I);
118fe6060f1SDimitry Andric       if (!VPI)
119fe6060f1SDimitry Andric         break;
1200b57cec5SDimitry Andric       if (VPI->getOpcode() == Instruction::Load &&
121e8d8bef9SDimitry Andric           llvm::is_contained(Operands, VPI))
1220b57cec5SDimitry Andric         LoadsSeen++;
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric       if (LoadsSeen == Operands.size())
1250b57cec5SDimitry Andric         break;
1260b57cec5SDimitry Andric       if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
1270b57cec5SDimitry Andric         LLVM_DEBUG(
1280b57cec5SDimitry Andric             dbgs() << "VPSLP: instruction modifying memory between loads\n");
1290b57cec5SDimitry Andric         return false;
1300b57cec5SDimitry Andric       }
1310b57cec5SDimitry Andric     }
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric     if (!all_of(Operands, [](VPValue *Op) {
1340b57cec5SDimitry Andric           return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
1350b57cec5SDimitry Andric               ->isSimple();
1360b57cec5SDimitry Andric         })) {
1370b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
1380b57cec5SDimitry Andric       return false;
1390b57cec5SDimitry Andric     }
1400b57cec5SDimitry Andric   }
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric   if (Opcode == Instruction::Store)
1430b57cec5SDimitry Andric     if (!all_of(Operands, [](VPValue *Op) {
1440b57cec5SDimitry Andric           return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
1450b57cec5SDimitry Andric               ->isSimple();
1460b57cec5SDimitry Andric         })) {
1470b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
1480b57cec5SDimitry Andric       return false;
1490b57cec5SDimitry Andric     }
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric   return true;
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
1550b57cec5SDimitry Andric                                              unsigned OperandIndex) {
1560b57cec5SDimitry Andric   SmallVector<VPValue *, 4> Operands;
1570b57cec5SDimitry Andric   for (VPValue *V : Values) {
158e8d8bef9SDimitry Andric     // Currently we only support VPInstructions.
159e8d8bef9SDimitry Andric     auto *U = cast<VPInstruction>(V);
1600b57cec5SDimitry Andric     Operands.push_back(U->getOperand(OperandIndex));
1610b57cec5SDimitry Andric   }
1620b57cec5SDimitry Andric   return Operands;
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric static bool areCommutative(ArrayRef<VPValue *> Values) {
1660b57cec5SDimitry Andric   return Instruction::isCommutative(
1670b57cec5SDimitry Andric       cast<VPInstruction>(Values[0])->getOpcode());
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric static SmallVector<SmallVector<VPValue *, 4>, 4>
1710b57cec5SDimitry Andric getOperands(ArrayRef<VPValue *> Values) {
1720b57cec5SDimitry Andric   SmallVector<SmallVector<VPValue *, 4>, 4> Result;
1730b57cec5SDimitry Andric   auto *VPI = cast<VPInstruction>(Values[0]);
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   switch (VPI->getOpcode()) {
1760b57cec5SDimitry Andric   case Instruction::Load:
1770b57cec5SDimitry Andric     llvm_unreachable("Loads terminate a tree, no need to get operands");
1780b57cec5SDimitry Andric   case Instruction::Store:
1790b57cec5SDimitry Andric     Result.push_back(getOperands(Values, 0));
1800b57cec5SDimitry Andric     break;
1810b57cec5SDimitry Andric   default:
1820b57cec5SDimitry Andric     for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
1830b57cec5SDimitry Andric       Result.push_back(getOperands(Values, I));
1840b57cec5SDimitry Andric     break;
1850b57cec5SDimitry Andric   }
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric   return Result;
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric /// Returns the opcode of Values or ~0 if they do not all agree.
191*bdd1243dSDimitry Andric static std::optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
1920b57cec5SDimitry Andric   unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
1930b57cec5SDimitry Andric   if (any_of(Values, [Opcode](VPValue *V) {
1940b57cec5SDimitry Andric         return cast<VPInstruction>(V)->getOpcode() != Opcode;
1950b57cec5SDimitry Andric       }))
196*bdd1243dSDimitry Andric     return std::nullopt;
1970b57cec5SDimitry Andric   return {Opcode};
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric /// Returns true if A and B access sequential memory if they are loads or
2010b57cec5SDimitry Andric /// stores or if they have identical opcodes otherwise.
2020b57cec5SDimitry Andric static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
2030b57cec5SDimitry Andric                                   VPInterleavedAccessInfo &IAI) {
2040b57cec5SDimitry Andric   if (A->getOpcode() != B->getOpcode())
2050b57cec5SDimitry Andric     return false;
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   if (A->getOpcode() != Instruction::Load &&
2080b57cec5SDimitry Andric       A->getOpcode() != Instruction::Store)
2090b57cec5SDimitry Andric     return true;
2100b57cec5SDimitry Andric   auto *GA = IAI.getInterleaveGroup(A);
2110b57cec5SDimitry Andric   auto *GB = IAI.getInterleaveGroup(B);
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric /// Implements getLAScore from Listing 7 in the paper.
2170b57cec5SDimitry Andric /// Traverses and compares operands of V1 and V2 to MaxLevel.
2180b57cec5SDimitry Andric static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
2190b57cec5SDimitry Andric                            VPInterleavedAccessInfo &IAI) {
220e8d8bef9SDimitry Andric   auto *I1 = dyn_cast<VPInstruction>(V1);
221e8d8bef9SDimitry Andric   auto *I2 = dyn_cast<VPInstruction>(V2);
222e8d8bef9SDimitry Andric   // Currently we only support VPInstructions.
223e8d8bef9SDimitry Andric   if (!I1 || !I2)
2240b57cec5SDimitry Andric     return 0;
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   if (MaxLevel == 0)
227e8d8bef9SDimitry Andric     return (unsigned)areConsecutiveOrMatch(I1, I2, IAI);
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric   unsigned Score = 0;
230e8d8bef9SDimitry Andric   for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
231e8d8bef9SDimitry Andric     for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
232e8d8bef9SDimitry Andric       Score +=
233e8d8bef9SDimitry Andric           getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
2340b57cec5SDimitry Andric   return Score;
2350b57cec5SDimitry Andric }
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric std::pair<VPlanSlp::OpMode, VPValue *>
2380b57cec5SDimitry Andric VPlanSlp::getBest(OpMode Mode, VPValue *Last,
2390b57cec5SDimitry Andric                   SmallPtrSetImpl<VPValue *> &Candidates,
2400b57cec5SDimitry Andric                   VPInterleavedAccessInfo &IAI) {
2410b57cec5SDimitry Andric   assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
2420b57cec5SDimitry Andric          "Currently we only handle load and commutative opcodes");
2430b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "      getBest\n");
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   SmallVector<VPValue *, 4> BestCandidates;
2460b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "        Candidates  for "
2470b57cec5SDimitry Andric                     << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
2480b57cec5SDimitry Andric   for (auto *Candidate : Candidates) {
2490b57cec5SDimitry Andric     auto *LastI = cast<VPInstruction>(Last);
2500b57cec5SDimitry Andric     auto *CandidateI = cast<VPInstruction>(Candidate);
2510b57cec5SDimitry Andric     if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
2520b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
2530b57cec5SDimitry Andric                         << " ");
2540b57cec5SDimitry Andric       BestCandidates.push_back(Candidate);
2550b57cec5SDimitry Andric     }
2560b57cec5SDimitry Andric   }
2570b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\n");
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric   if (BestCandidates.empty())
2600b57cec5SDimitry Andric     return {OpMode::Failed, nullptr};
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric   if (BestCandidates.size() == 1)
2630b57cec5SDimitry Andric     return {Mode, BestCandidates[0]};
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric   VPValue *Best = nullptr;
2660b57cec5SDimitry Andric   unsigned BestScore = 0;
2670b57cec5SDimitry Andric   for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
2680b57cec5SDimitry Andric     unsigned PrevScore = ~0u;
2690b57cec5SDimitry Andric     bool AllSame = true;
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric     // FIXME: Avoid visiting the same operands multiple times.
2720b57cec5SDimitry Andric     for (auto *Candidate : BestCandidates) {
2730b57cec5SDimitry Andric       unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
2740b57cec5SDimitry Andric       if (PrevScore == ~0u)
2750b57cec5SDimitry Andric         PrevScore = Score;
2760b57cec5SDimitry Andric       if (PrevScore != Score)
2770b57cec5SDimitry Andric         AllSame = false;
2780b57cec5SDimitry Andric       PrevScore = Score;
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric       if (Score > BestScore) {
2810b57cec5SDimitry Andric         BestScore = Score;
2820b57cec5SDimitry Andric         Best = Candidate;
2830b57cec5SDimitry Andric       }
2840b57cec5SDimitry Andric     }
2850b57cec5SDimitry Andric     if (!AllSame)
2860b57cec5SDimitry Andric       break;
2870b57cec5SDimitry Andric   }
2880b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Found best "
2890b57cec5SDimitry Andric                     << *cast<VPInstruction>(Best)->getUnderlyingInstr()
2900b57cec5SDimitry Andric                     << "\n");
2910b57cec5SDimitry Andric   Candidates.erase(Best);
2920b57cec5SDimitry Andric 
2930b57cec5SDimitry Andric   return {Mode, Best};
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
2970b57cec5SDimitry Andric   SmallVector<MultiNodeOpTy, 4> FinalOrder;
2980b57cec5SDimitry Andric   SmallVector<OpMode, 4> Mode;
2990b57cec5SDimitry Andric   FinalOrder.reserve(MultiNodeOps.size());
3000b57cec5SDimitry Andric   Mode.reserve(MultiNodeOps.size());
3010b57cec5SDimitry Andric 
3020b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Reordering multinode\n");
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   for (auto &Operands : MultiNodeOps) {
3050b57cec5SDimitry Andric     FinalOrder.push_back({Operands.first, {Operands.second[0]}});
3060b57cec5SDimitry Andric     if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
3070b57cec5SDimitry Andric         Instruction::Load)
3080b57cec5SDimitry Andric       Mode.push_back(OpMode::Load);
3090b57cec5SDimitry Andric     else
3100b57cec5SDimitry Andric       Mode.push_back(OpMode::Opcode);
3110b57cec5SDimitry Andric   }
3120b57cec5SDimitry Andric 
3130b57cec5SDimitry Andric   for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
3140b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Finding best value for lane " << Lane << "\n");
3150b57cec5SDimitry Andric     SmallPtrSet<VPValue *, 4> Candidates;
3160b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Candidates  ");
3170b57cec5SDimitry Andric     for (auto Ops : MultiNodeOps) {
3180b57cec5SDimitry Andric       LLVM_DEBUG(
3190b57cec5SDimitry Andric           dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
3200b57cec5SDimitry Andric                  << " ");
3210b57cec5SDimitry Andric       Candidates.insert(Ops.second[Lane]);
3220b57cec5SDimitry Andric     }
3230b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "\n");
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric     for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
3260b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Checking " << Op << "\n");
3270b57cec5SDimitry Andric       if (Mode[Op] == OpMode::Failed)
3280b57cec5SDimitry Andric         continue;
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric       VPValue *Last = FinalOrder[Op].second[Lane - 1];
3310b57cec5SDimitry Andric       std::pair<OpMode, VPValue *> Res =
3320b57cec5SDimitry Andric           getBest(Mode[Op], Last, Candidates, IAI);
3330b57cec5SDimitry Andric       if (Res.second)
3340b57cec5SDimitry Andric         FinalOrder[Op].second.push_back(Res.second);
3350b57cec5SDimitry Andric       else
3360b57cec5SDimitry Andric         // TODO: handle this case
3370b57cec5SDimitry Andric         FinalOrder[Op].second.push_back(markFailed());
3380b57cec5SDimitry Andric     }
3390b57cec5SDimitry Andric   }
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric   return FinalOrder;
3420b57cec5SDimitry Andric }
3430b57cec5SDimitry Andric 
344fe6060f1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3450b57cec5SDimitry Andric void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
3460b57cec5SDimitry Andric   dbgs() << " Ops: ";
347*bdd1243dSDimitry Andric   for (auto *Op : Values) {
3488bcb0991SDimitry Andric     if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
3498bcb0991SDimitry Andric       if (auto *Instr = VPInstr->getUnderlyingInstr()) {
3500b57cec5SDimitry Andric         dbgs() << *Instr << " | ";
3518bcb0991SDimitry Andric         continue;
3528bcb0991SDimitry Andric       }
3530b57cec5SDimitry Andric     dbgs() << " nullptr | ";
3548bcb0991SDimitry Andric   }
3550b57cec5SDimitry Andric   dbgs() << "\n";
3560b57cec5SDimitry Andric }
357fe6060f1SDimitry Andric #endif
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
3600b57cec5SDimitry Andric   assert(!Values.empty() && "Need some operands!");
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric   // If we already visited this instruction bundle, re-use the existing node
3630b57cec5SDimitry Andric   auto I = BundleToCombined.find(to_vector<4>(Values));
3640b57cec5SDimitry Andric   if (I != BundleToCombined.end()) {
3650b57cec5SDimitry Andric #ifndef NDEBUG
3660b57cec5SDimitry Andric     // Check that the resulting graph is a tree. If we re-use a node, this means
3670b57cec5SDimitry Andric     // its values have multiple users. We only allow this, if all users of each
3680b57cec5SDimitry Andric     // value are the same instruction.
3690b57cec5SDimitry Andric     for (auto *V : Values) {
3700b57cec5SDimitry Andric       auto UI = V->user_begin();
3710b57cec5SDimitry Andric       auto *FirstUser = *UI++;
3720b57cec5SDimitry Andric       while (UI != V->user_end()) {
3730b57cec5SDimitry Andric         assert(*UI == FirstUser && "Currently we only support SLP trees.");
3740b57cec5SDimitry Andric         UI++;
3750b57cec5SDimitry Andric       }
3760b57cec5SDimitry Andric     }
3770b57cec5SDimitry Andric #endif
3780b57cec5SDimitry Andric     return I->second;
3790b57cec5SDimitry Andric   }
3800b57cec5SDimitry Andric 
3810b57cec5SDimitry Andric   // Dump inputs
3820b57cec5SDimitry Andric   LLVM_DEBUG({
3830b57cec5SDimitry Andric     dbgs() << "buildGraph: ";
3840b57cec5SDimitry Andric     dumpBundle(Values);
3850b57cec5SDimitry Andric   });
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   if (!areVectorizable(Values))
3880b57cec5SDimitry Andric     return markFailed();
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   assert(getOpcode(Values) && "Opcodes for all values must match");
39181ad6265SDimitry Andric   unsigned ValuesOpcode = *getOpcode(Values);
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric   SmallVector<VPValue *, 4> CombinedOperands;
3940b57cec5SDimitry Andric   if (areCommutative(Values)) {
3950b57cec5SDimitry Andric     bool MultiNodeRoot = !MultiNodeActive;
3960b57cec5SDimitry Andric     MultiNodeActive = true;
3970b57cec5SDimitry Andric     for (auto &Operands : getOperands(Values)) {
3980b57cec5SDimitry Andric       LLVM_DEBUG({
3990b57cec5SDimitry Andric         dbgs() << "  Visiting Commutative";
4000b57cec5SDimitry Andric         dumpBundle(Operands);
4010b57cec5SDimitry Andric       });
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric       auto OperandsOpcode = getOpcode(Operands);
4040b57cec5SDimitry Andric       if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
4050b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "    Same opcode, continue building\n");
4060b57cec5SDimitry Andric         CombinedOperands.push_back(buildGraph(Operands));
4070b57cec5SDimitry Andric       } else {
4080b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "    Adding multinode Ops\n");
4090b57cec5SDimitry Andric         // Create dummy VPInstruction, which will we replace later by the
4100b57cec5SDimitry Andric         // re-ordered operand.
4110b57cec5SDimitry Andric         VPInstruction *Op = new VPInstruction(0, {});
4120b57cec5SDimitry Andric         CombinedOperands.push_back(Op);
4130b57cec5SDimitry Andric         MultiNodeOps.emplace_back(Op, Operands);
4140b57cec5SDimitry Andric       }
4150b57cec5SDimitry Andric     }
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric     if (MultiNodeRoot) {
4180b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Reorder \n");
4190b57cec5SDimitry Andric       MultiNodeActive = false;
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric       auto FinalOrder = reorderMultiNodeOps();
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric       MultiNodeOps.clear();
4240b57cec5SDimitry Andric       for (auto &Ops : FinalOrder) {
4250b57cec5SDimitry Andric         VPInstruction *NewOp = buildGraph(Ops.second);
4260b57cec5SDimitry Andric         Ops.first->replaceAllUsesWith(NewOp);
4270b57cec5SDimitry Andric         for (unsigned i = 0; i < CombinedOperands.size(); i++)
4280b57cec5SDimitry Andric           if (CombinedOperands[i] == Ops.first)
4290b57cec5SDimitry Andric             CombinedOperands[i] = NewOp;
4300b57cec5SDimitry Andric         delete Ops.first;
4310b57cec5SDimitry Andric         Ops.first = NewOp;
4320b57cec5SDimitry Andric       }
4330b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Found final order\n");
4340b57cec5SDimitry Andric     }
4350b57cec5SDimitry Andric   } else {
4360b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  NonCommuntative\n");
4370b57cec5SDimitry Andric     if (ValuesOpcode == Instruction::Load)
4380b57cec5SDimitry Andric       for (VPValue *V : Values)
4390b57cec5SDimitry Andric         CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
4400b57cec5SDimitry Andric     else
4410b57cec5SDimitry Andric       for (auto &Operands : getOperands(Values))
4420b57cec5SDimitry Andric         CombinedOperands.push_back(buildGraph(Operands));
4430b57cec5SDimitry Andric   }
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric   unsigned Opcode;
4460b57cec5SDimitry Andric   switch (ValuesOpcode) {
4470b57cec5SDimitry Andric   case Instruction::Load:
4480b57cec5SDimitry Andric     Opcode = VPInstruction::SLPLoad;
4490b57cec5SDimitry Andric     break;
4500b57cec5SDimitry Andric   case Instruction::Store:
4510b57cec5SDimitry Andric     Opcode = VPInstruction::SLPStore;
4520b57cec5SDimitry Andric     break;
4530b57cec5SDimitry Andric   default:
4540b57cec5SDimitry Andric     Opcode = ValuesOpcode;
4550b57cec5SDimitry Andric     break;
4560b57cec5SDimitry Andric   }
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   if (!CompletelySLP)
4590b57cec5SDimitry Andric     return markFailed();
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric   assert(CombinedOperands.size() > 0 && "Need more some operands");
4620eae32dcSDimitry Andric   auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
4630eae32dcSDimitry Andric   auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
4640b57cec5SDimitry Andric 
465e8d8bef9SDimitry Andric   LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
466e8d8bef9SDimitry Andric                     << *cast<VPInstruction>(Values[0]) << "\n");
4670b57cec5SDimitry Andric   addCombined(Values, VPI);
4680b57cec5SDimitry Andric   return VPI;
4690b57cec5SDimitry Andric }
470