10b57cec5SDimitry Andric //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass removes the computation of provably redundant expressions that have
100b57cec5SDimitry Andric // been computed earlier in a previous iteration. It relies on the use of PHIs
110b57cec5SDimitry Andric // to identify loop carried dependences. This is scalar replacement for vector
120b57cec5SDimitry Andric // types.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric
16e8d8bef9SDimitry Andric #include "HexagonVectorLoopCarriedReuse.h"
170b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
220b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
230b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
240b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
250b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
260b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
270b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
280b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
29480093f4SDimitry Andric #include "llvm/IR/IntrinsicsHexagon.h"
300b57cec5SDimitry Andric #include "llvm/IR/Use.h"
310b57cec5SDimitry Andric #include "llvm/IR/User.h"
320b57cec5SDimitry Andric #include "llvm/IR/Value.h"
33480093f4SDimitry Andric #include "llvm/InitializePasses.h"
340b57cec5SDimitry Andric #include "llvm/Pass.h"
350b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
360b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
370b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
380b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
400b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
410b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
420b57cec5SDimitry Andric #include <algorithm>
430b57cec5SDimitry Andric #include <cassert>
440b57cec5SDimitry Andric #include <cstddef>
450b57cec5SDimitry Andric #include <map>
460b57cec5SDimitry Andric #include <memory>
470b57cec5SDimitry Andric #include <set>
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric using namespace llvm;
500b57cec5SDimitry Andric
510b57cec5SDimitry Andric #define DEBUG_TYPE "hexagon-vlcr"
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric STATISTIC(HexagonNumVectorLoopCarriedReuse,
540b57cec5SDimitry Andric "Number of values that were reused from a previous iteration.");
550b57cec5SDimitry Andric
5681ad6265SDimitry Andric static cl::opt<int> HexagonVLCRIterationLim(
5781ad6265SDimitry Andric "hexagon-vlcr-iteration-lim", cl::Hidden,
580b57cec5SDimitry Andric cl::desc("Maximum distance of loop carried dependences that are handled"),
5981ad6265SDimitry Andric cl::init(2));
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric namespace llvm {
620b57cec5SDimitry Andric
63e8d8bef9SDimitry Andric void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
64e8d8bef9SDimitry Andric Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
650b57cec5SDimitry Andric
660b57cec5SDimitry Andric } // end namespace llvm
670b57cec5SDimitry Andric
680b57cec5SDimitry Andric namespace {
690b57cec5SDimitry Andric
700b57cec5SDimitry Andric // See info about DepChain in the comments at the top of this file.
710b57cec5SDimitry Andric using ChainOfDependences = SmallVector<Instruction *, 4>;
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric class DepChain {
740b57cec5SDimitry Andric ChainOfDependences Chain;
750b57cec5SDimitry Andric
760b57cec5SDimitry Andric public:
isIdentical(DepChain & Other) const770b57cec5SDimitry Andric bool isIdentical(DepChain &Other) const {
780b57cec5SDimitry Andric if (Other.size() != size())
790b57cec5SDimitry Andric return false;
800b57cec5SDimitry Andric ChainOfDependences &OtherChain = Other.getChain();
810b57cec5SDimitry Andric for (int i = 0; i < size(); ++i) {
820b57cec5SDimitry Andric if (Chain[i] != OtherChain[i])
830b57cec5SDimitry Andric return false;
840b57cec5SDimitry Andric }
850b57cec5SDimitry Andric return true;
860b57cec5SDimitry Andric }
870b57cec5SDimitry Andric
getChain()880b57cec5SDimitry Andric ChainOfDependences &getChain() {
890b57cec5SDimitry Andric return Chain;
900b57cec5SDimitry Andric }
910b57cec5SDimitry Andric
size() const920b57cec5SDimitry Andric int size() const {
930b57cec5SDimitry Andric return Chain.size();
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric
clear()960b57cec5SDimitry Andric void clear() {
970b57cec5SDimitry Andric Chain.clear();
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric
push_back(Instruction * I)1000b57cec5SDimitry Andric void push_back(Instruction *I) {
1010b57cec5SDimitry Andric Chain.push_back(I);
1020b57cec5SDimitry Andric }
1030b57cec5SDimitry Andric
iterations() const1040b57cec5SDimitry Andric int iterations() const {
1050b57cec5SDimitry Andric return size() - 1;
1060b57cec5SDimitry Andric }
1070b57cec5SDimitry Andric
front() const1080b57cec5SDimitry Andric Instruction *front() const {
1090b57cec5SDimitry Andric return Chain.front();
1100b57cec5SDimitry Andric }
1110b57cec5SDimitry Andric
back() const1120b57cec5SDimitry Andric Instruction *back() const {
1130b57cec5SDimitry Andric return Chain.back();
1140b57cec5SDimitry Andric }
1150b57cec5SDimitry Andric
operator [](const int index)1160b57cec5SDimitry Andric Instruction *&operator[](const int index) {
1170b57cec5SDimitry Andric return Chain[index];
1180b57cec5SDimitry Andric }
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
1210b57cec5SDimitry Andric };
1220b57cec5SDimitry Andric
1230b57cec5SDimitry Andric LLVM_ATTRIBUTE_UNUSED
operator <<(raw_ostream & OS,const DepChain & D)1240b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
1250b57cec5SDimitry Andric const ChainOfDependences &CD = D.Chain;
1260b57cec5SDimitry Andric int ChainSize = CD.size();
1270b57cec5SDimitry Andric OS << "**DepChain Start::**\n";
1280b57cec5SDimitry Andric for (int i = 0; i < ChainSize -1; ++i) {
1290b57cec5SDimitry Andric OS << *(CD[i]) << " -->\n";
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric OS << *CD[ChainSize-1] << "\n";
1320b57cec5SDimitry Andric return OS;
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric struct ReuseValue {
1360b57cec5SDimitry Andric Instruction *Inst2Replace = nullptr;
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andric // In the new PHI node that we'll construct this is the value that'll be
139480093f4SDimitry Andric // used over the backedge. This is the value that gets reused from a
1400b57cec5SDimitry Andric // previous iteration.
1410b57cec5SDimitry Andric Instruction *BackedgeInst = nullptr;
1420b57cec5SDimitry Andric std::map<Instruction *, DepChain *> DepChains;
1430b57cec5SDimitry Andric int Iterations = -1;
1440b57cec5SDimitry Andric
1450b57cec5SDimitry Andric ReuseValue() = default;
1460b57cec5SDimitry Andric
reset__anon21964c660111::ReuseValue1470b57cec5SDimitry Andric void reset() {
1480b57cec5SDimitry Andric Inst2Replace = nullptr;
1490b57cec5SDimitry Andric BackedgeInst = nullptr;
1500b57cec5SDimitry Andric DepChains.clear();
1510b57cec5SDimitry Andric Iterations = -1;
1520b57cec5SDimitry Andric }
isDefined__anon21964c660111::ReuseValue1530b57cec5SDimitry Andric bool isDefined() { return Inst2Replace != nullptr; }
1540b57cec5SDimitry Andric };
1550b57cec5SDimitry Andric
1560b57cec5SDimitry Andric LLVM_ATTRIBUTE_UNUSED
operator <<(raw_ostream & OS,const ReuseValue & RU)1570b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
1580b57cec5SDimitry Andric OS << "** ReuseValue ***\n";
1590b57cec5SDimitry Andric OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
1600b57cec5SDimitry Andric OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
1610b57cec5SDimitry Andric return OS;
1620b57cec5SDimitry Andric }
1630b57cec5SDimitry Andric
164e8d8bef9SDimitry Andric class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
1650b57cec5SDimitry Andric public:
1660b57cec5SDimitry Andric static char ID;
1670b57cec5SDimitry Andric
HexagonVectorLoopCarriedReuseLegacyPass()168e8d8bef9SDimitry Andric explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {
1690b57cec5SDimitry Andric PassRegistry *PR = PassRegistry::getPassRegistry();
170e8d8bef9SDimitry Andric initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR);
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric
getPassName() const1730b57cec5SDimitry Andric StringRef getPassName() const override {
1740b57cec5SDimitry Andric return "Hexagon-specific loop carried reuse for HVX vectors";
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const1770b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
1780b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID);
1790b57cec5SDimitry Andric AU.addRequiredID(LCSSAID);
1800b57cec5SDimitry Andric AU.addPreservedID(LCSSAID);
1810b57cec5SDimitry Andric AU.setPreservesCFG();
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override;
185e8d8bef9SDimitry Andric };
186e8d8bef9SDimitry Andric
187e8d8bef9SDimitry Andric class HexagonVectorLoopCarriedReuse {
188e8d8bef9SDimitry Andric public:
HexagonVectorLoopCarriedReuse(Loop * L)189e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};
190e8d8bef9SDimitry Andric
191e8d8bef9SDimitry Andric bool run();
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric private:
1940b57cec5SDimitry Andric SetVector<DepChain *> Dependences;
1950b57cec5SDimitry Andric std::set<Instruction *> ReplacedInsts;
1960b57cec5SDimitry Andric Loop *CurLoop;
1970b57cec5SDimitry Andric ReuseValue ReuseCandidate;
1980b57cec5SDimitry Andric
1990b57cec5SDimitry Andric bool doVLCR();
2000b57cec5SDimitry Andric void findLoopCarriedDeps();
2010b57cec5SDimitry Andric void findValueToReuse();
2020b57cec5SDimitry Andric void findDepChainFromPHI(Instruction *I, DepChain &D);
2030b57cec5SDimitry Andric void reuseValue();
2040b57cec5SDimitry Andric Value *findValueInBlock(Value *Op, BasicBlock *BB);
2050b57cec5SDimitry Andric DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
2060b57cec5SDimitry Andric bool isEquivalentOperation(Instruction *I1, Instruction *I2);
2070b57cec5SDimitry Andric bool canReplace(Instruction *I);
2080b57cec5SDimitry Andric bool isCallInstCommutative(CallInst *C);
2090b57cec5SDimitry Andric };
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric } // end anonymous namespace
2120b57cec5SDimitry Andric
213e8d8bef9SDimitry Andric char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
2140b57cec5SDimitry Andric
215e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
216e8d8bef9SDimitry Andric "Hexagon-specific predictive commoning for HVX vectors",
217e8d8bef9SDimitry Andric false, false)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)2180b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
2190b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
220e8d8bef9SDimitry Andric INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
221e8d8bef9SDimitry Andric "Hexagon-specific predictive commoning for HVX vectors",
222e8d8bef9SDimitry Andric false, false)
2230b57cec5SDimitry Andric
224e8d8bef9SDimitry Andric PreservedAnalyses
225e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReusePass::run(Loop &L, LoopAnalysisManager &LAM,
226e8d8bef9SDimitry Andric LoopStandardAnalysisResults &AR,
227e8d8bef9SDimitry Andric LPMUpdater &U) {
228e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReuse Vlcr(&L);
229e8d8bef9SDimitry Andric if (!Vlcr.run())
230e8d8bef9SDimitry Andric return PreservedAnalyses::all();
231e8d8bef9SDimitry Andric PreservedAnalyses PA;
232e8d8bef9SDimitry Andric PA.preserveSet<CFGAnalyses>();
233e8d8bef9SDimitry Andric return PA;
234e8d8bef9SDimitry Andric }
235e8d8bef9SDimitry Andric
runOnLoop(Loop * L,LPPassManager & LPM)236e8d8bef9SDimitry Andric bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
237e8d8bef9SDimitry Andric LPPassManager &LPM) {
2380b57cec5SDimitry Andric if (skipLoop(L))
2390b57cec5SDimitry Andric return false;
240e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReuse Vlcr(L);
241e8d8bef9SDimitry Andric return Vlcr.run();
242e8d8bef9SDimitry Andric }
2430b57cec5SDimitry Andric
run()244e8d8bef9SDimitry Andric bool HexagonVectorLoopCarriedReuse::run() {
245e8d8bef9SDimitry Andric if (!CurLoop->getLoopPreheader())
2460b57cec5SDimitry Andric return false;
2470b57cec5SDimitry Andric
2480b57cec5SDimitry Andric // Work only on innermost loops.
249e8d8bef9SDimitry Andric if (!CurLoop->getSubLoops().empty())
2500b57cec5SDimitry Andric return false;
2510b57cec5SDimitry Andric
2520b57cec5SDimitry Andric // Work only on single basic blocks loops.
253e8d8bef9SDimitry Andric if (CurLoop->getNumBlocks() != 1)
2540b57cec5SDimitry Andric return false;
2550b57cec5SDimitry Andric
2560b57cec5SDimitry Andric return doVLCR();
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric
isCallInstCommutative(CallInst * C)2590b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
2600b57cec5SDimitry Andric switch (C->getCalledFunction()->getIntrinsicID()) {
2610b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddb:
2620b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddb_128B:
2630b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddh:
2640b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddh_128B:
2650b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddw:
2660b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddw_128B:
2670b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddubh:
2680b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddubh_128B:
2690b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vadduhw:
2700b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vadduhw_128B:
2710b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddhw:
2720b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddhw_128B:
2730b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxb:
2740b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxb_128B:
2750b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxh:
2760b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxh_128B:
2770b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxw:
2780b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxw_128B:
2790b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxub:
2800b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxub_128B:
2810b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxuh:
2820b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxuh_128B:
2830b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminub:
2840b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminub_128B:
2850b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminuh:
2860b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminuh_128B:
2870b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminb:
2880b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminb_128B:
2890b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminh:
2900b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminh_128B:
2910b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminw:
2920b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminw_128B:
2930b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyub:
2940b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyub_128B:
2950b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyuh:
2960b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyuh_128B:
2970b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgub:
2980b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgub_128B:
2990b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgh:
3000b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgh_128B:
3010b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguh:
3020b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguh_128B:
3030b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgw:
3040b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgw_128B:
3050b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgb:
3060b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgb_128B:
3070b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguw:
3080b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguw_128B:
3090b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffh:
3100b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffh_128B:
3110b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffub:
3120b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffub_128B:
3130b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffuh:
3140b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffuh_128B:
3150b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffw:
3160b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffw_128B:
3170b57cec5SDimitry Andric return true;
3180b57cec5SDimitry Andric default:
3190b57cec5SDimitry Andric return false;
3200b57cec5SDimitry Andric }
3210b57cec5SDimitry Andric }
3220b57cec5SDimitry Andric
isEquivalentOperation(Instruction * I1,Instruction * I2)3230b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
3240b57cec5SDimitry Andric Instruction *I2) {
3250b57cec5SDimitry Andric if (!I1->isSameOperationAs(I2))
3260b57cec5SDimitry Andric return false;
3270b57cec5SDimitry Andric // This check is in place specifically for intrinsics. isSameOperationAs will
3280b57cec5SDimitry Andric // return two for any two hexagon intrinsics because they are essentially the
3290b57cec5SDimitry Andric // same instruciton (CallInst). We need to scratch the surface to see if they
3300b57cec5SDimitry Andric // are calls to the same function.
3310b57cec5SDimitry Andric if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
3320b57cec5SDimitry Andric if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
3330b57cec5SDimitry Andric if (C1->getCalledFunction() != C2->getCalledFunction())
3340b57cec5SDimitry Andric return false;
3350b57cec5SDimitry Andric }
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric
3380b57cec5SDimitry Andric // If both the Instructions are of Vector Type and any of the element
3390b57cec5SDimitry Andric // is integer constant, check their values too for equivalence.
3400b57cec5SDimitry Andric if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
3410b57cec5SDimitry Andric unsigned NumOperands = I1->getNumOperands();
3420b57cec5SDimitry Andric for (unsigned i = 0; i < NumOperands; ++i) {
3430b57cec5SDimitry Andric ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
3440b57cec5SDimitry Andric ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
3450b57cec5SDimitry Andric if(!C1) continue;
3460b57cec5SDimitry Andric assert(C2);
3470b57cec5SDimitry Andric if (C1->getSExtValue() != C2->getSExtValue())
3480b57cec5SDimitry Andric return false;
3490b57cec5SDimitry Andric }
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric
3520b57cec5SDimitry Andric return true;
3530b57cec5SDimitry Andric }
3540b57cec5SDimitry Andric
canReplace(Instruction * I)3550b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
3560b57cec5SDimitry Andric const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
3570b57cec5SDimitry Andric if (!II)
3580b57cec5SDimitry Andric return true;
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric switch (II->getIntrinsicID()) {
3610b57cec5SDimitry Andric case Intrinsic::hexagon_V6_hi:
3620b57cec5SDimitry Andric case Intrinsic::hexagon_V6_lo:
3630b57cec5SDimitry Andric case Intrinsic::hexagon_V6_hi_128B:
3640b57cec5SDimitry Andric case Intrinsic::hexagon_V6_lo_128B:
3650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
3660b57cec5SDimitry Andric return false;
3670b57cec5SDimitry Andric default:
3680b57cec5SDimitry Andric return true;
3690b57cec5SDimitry Andric }
3700b57cec5SDimitry Andric }
findValueToReuse()3710b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::findValueToReuse() {
3720b57cec5SDimitry Andric for (auto *D : Dependences) {
3730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
3740b57cec5SDimitry Andric if (D->iterations() > HexagonVLCRIterationLim) {
3750b57cec5SDimitry Andric LLVM_DEBUG(
3760b57cec5SDimitry Andric dbgs()
3770b57cec5SDimitry Andric << ".. Skipping because number of iterations > than the limit\n");
3780b57cec5SDimitry Andric continue;
3790b57cec5SDimitry Andric }
3800b57cec5SDimitry Andric
3810b57cec5SDimitry Andric PHINode *PN = cast<PHINode>(D->front());
3820b57cec5SDimitry Andric Instruction *BEInst = D->back();
3830b57cec5SDimitry Andric int Iters = D->iterations();
3840b57cec5SDimitry Andric BasicBlock *BB = PN->getParent();
3850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
3860b57cec5SDimitry Andric << " can be reused\n");
3870b57cec5SDimitry Andric
3880b57cec5SDimitry Andric SmallVector<Instruction *, 4> PNUsers;
389349cc55cSDimitry Andric for (Use &U : PN->uses()) {
3900b57cec5SDimitry Andric Instruction *User = cast<Instruction>(U.getUser());
3910b57cec5SDimitry Andric
3920b57cec5SDimitry Andric if (User->getParent() != BB)
3930b57cec5SDimitry Andric continue;
3940b57cec5SDimitry Andric if (ReplacedInsts.count(User)) {
3950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << *User
3960b57cec5SDimitry Andric << " has already been replaced. Skipping...\n");
3970b57cec5SDimitry Andric continue;
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric if (isa<PHINode>(User))
4000b57cec5SDimitry Andric continue;
4010b57cec5SDimitry Andric if (User->mayHaveSideEffects())
4020b57cec5SDimitry Andric continue;
4030b57cec5SDimitry Andric if (!canReplace(User))
4040b57cec5SDimitry Andric continue;
4050b57cec5SDimitry Andric
4060b57cec5SDimitry Andric PNUsers.push_back(User);
4070b57cec5SDimitry Andric }
4080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
4090b57cec5SDimitry Andric
4100b57cec5SDimitry Andric // For each interesting use I of PN, find an Instruction BEUser that
4110b57cec5SDimitry Andric // performs the same operation as I on BEInst and whose other operands,
4120b57cec5SDimitry Andric // if any, can also be rematerialized in OtherBB. We stop when we find the
4130b57cec5SDimitry Andric // first such Instruction BEUser. This is because once BEUser is
4140b57cec5SDimitry Andric // rematerialized in OtherBB, we may find more such "fixup" opportunities
4150b57cec5SDimitry Andric // in this block. So, we'll start over again.
4160b57cec5SDimitry Andric for (Instruction *I : PNUsers) {
417349cc55cSDimitry Andric for (Use &U : BEInst->uses()) {
4180b57cec5SDimitry Andric Instruction *BEUser = cast<Instruction>(U.getUser());
4190b57cec5SDimitry Andric
4200b57cec5SDimitry Andric if (BEUser->getParent() != BB)
4210b57cec5SDimitry Andric continue;
4220b57cec5SDimitry Andric if (!isEquivalentOperation(I, BEUser))
4230b57cec5SDimitry Andric continue;
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric int NumOperands = I->getNumOperands();
4260b57cec5SDimitry Andric
4270b57cec5SDimitry Andric // Take operands of each PNUser one by one and try to find DepChain
4280b57cec5SDimitry Andric // with every operand of the BEUser. If any of the operands of BEUser
4290b57cec5SDimitry Andric // has DepChain with current operand of the PNUser, break the matcher
4300b57cec5SDimitry Andric // loop. Keep doing this for Every PNUser operand. If PNUser operand
4310b57cec5SDimitry Andric // does not have DepChain with any of the BEUser operand, break the
4320b57cec5SDimitry Andric // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
4330b57cec5SDimitry Andric // This ensures that DepChain exist for all the PNUser operand with
4340b57cec5SDimitry Andric // BEUser operand. This also ensures that DepChains are independent of
4350b57cec5SDimitry Andric // the positions in PNUser and BEUser.
4360b57cec5SDimitry Andric std::map<Instruction *, DepChain *> DepChains;
4370b57cec5SDimitry Andric CallInst *C1 = dyn_cast<CallInst>(I);
4380b57cec5SDimitry Andric if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
4390b57cec5SDimitry Andric bool Found = false;
4400b57cec5SDimitry Andric for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
4410b57cec5SDimitry Andric Value *Op = I->getOperand(OpNo);
4420b57cec5SDimitry Andric Instruction *OpInst = dyn_cast<Instruction>(Op);
4430b57cec5SDimitry Andric Found = false;
4440b57cec5SDimitry Andric for (int T = 0; T < NumOperands; ++T) {
4450b57cec5SDimitry Andric Value *BEOp = BEUser->getOperand(T);
4460b57cec5SDimitry Andric Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
4470b57cec5SDimitry Andric if (!OpInst && !BEOpInst) {
4480b57cec5SDimitry Andric if (Op == BEOp) {
4490b57cec5SDimitry Andric Found = true;
4500b57cec5SDimitry Andric break;
4510b57cec5SDimitry Andric }
4520b57cec5SDimitry Andric }
4530b57cec5SDimitry Andric
4540b57cec5SDimitry Andric if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
4550b57cec5SDimitry Andric continue;
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andric DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andric if (D) {
4600b57cec5SDimitry Andric Found = true;
4610b57cec5SDimitry Andric DepChains[OpInst] = D;
4620b57cec5SDimitry Andric break;
4630b57cec5SDimitry Andric }
4640b57cec5SDimitry Andric }
4650b57cec5SDimitry Andric if (!Found) {
4660b57cec5SDimitry Andric BEUser = nullptr;
4670b57cec5SDimitry Andric break;
4680b57cec5SDimitry Andric }
4690b57cec5SDimitry Andric }
4700b57cec5SDimitry Andric } else {
4710b57cec5SDimitry Andric
4720b57cec5SDimitry Andric for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
4730b57cec5SDimitry Andric Value *Op = I->getOperand(OpNo);
4740b57cec5SDimitry Andric Value *BEOp = BEUser->getOperand(OpNo);
4750b57cec5SDimitry Andric
4760b57cec5SDimitry Andric Instruction *OpInst = dyn_cast<Instruction>(Op);
4770b57cec5SDimitry Andric if (!OpInst) {
4780b57cec5SDimitry Andric if (Op == BEOp)
4790b57cec5SDimitry Andric continue;
4800b57cec5SDimitry Andric // Do not allow reuse to occur when the operands may be different
4810b57cec5SDimitry Andric // values.
4820b57cec5SDimitry Andric BEUser = nullptr;
4830b57cec5SDimitry Andric break;
4840b57cec5SDimitry Andric }
4850b57cec5SDimitry Andric
4860b57cec5SDimitry Andric Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
4870b57cec5SDimitry Andric DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
4880b57cec5SDimitry Andric
4890b57cec5SDimitry Andric if (D) {
4900b57cec5SDimitry Andric DepChains[OpInst] = D;
4910b57cec5SDimitry Andric } else {
4920b57cec5SDimitry Andric BEUser = nullptr;
4930b57cec5SDimitry Andric break;
4940b57cec5SDimitry Andric }
4950b57cec5SDimitry Andric }
4960b57cec5SDimitry Andric }
4970b57cec5SDimitry Andric if (BEUser) {
4980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
4990b57cec5SDimitry Andric ReuseCandidate.Inst2Replace = I;
5000b57cec5SDimitry Andric ReuseCandidate.BackedgeInst = BEUser;
5010b57cec5SDimitry Andric ReuseCandidate.DepChains = DepChains;
5020b57cec5SDimitry Andric ReuseCandidate.Iterations = Iters;
5030b57cec5SDimitry Andric return;
5040b57cec5SDimitry Andric }
5050b57cec5SDimitry Andric ReuseCandidate.reset();
5060b57cec5SDimitry Andric }
5070b57cec5SDimitry Andric }
5080b57cec5SDimitry Andric }
5090b57cec5SDimitry Andric ReuseCandidate.reset();
5100b57cec5SDimitry Andric }
5110b57cec5SDimitry Andric
findValueInBlock(Value * Op,BasicBlock * BB)5120b57cec5SDimitry Andric Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
5130b57cec5SDimitry Andric BasicBlock *BB) {
5140b57cec5SDimitry Andric PHINode *PN = dyn_cast<PHINode>(Op);
5150b57cec5SDimitry Andric assert(PN);
5160b57cec5SDimitry Andric Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
5170b57cec5SDimitry Andric return ValueInBlock;
5180b57cec5SDimitry Andric }
5190b57cec5SDimitry Andric
reuseValue()5200b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::reuseValue() {
5210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ReuseCandidate);
5220b57cec5SDimitry Andric Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
5230b57cec5SDimitry Andric Instruction *BEInst = ReuseCandidate.BackedgeInst;
5240b57cec5SDimitry Andric int NumOperands = Inst2Replace->getNumOperands();
5250b57cec5SDimitry Andric std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
5260b57cec5SDimitry Andric int Iterations = ReuseCandidate.Iterations;
5270b57cec5SDimitry Andric BasicBlock *LoopPH = CurLoop->getLoopPreheader();
5280b57cec5SDimitry Andric assert(!DepChains.empty() && "No DepChains");
5290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
5300b57cec5SDimitry Andric
5310b57cec5SDimitry Andric SmallVector<Instruction *, 4> InstsInPreheader;
5320b57cec5SDimitry Andric for (int i = 0; i < Iterations; ++i) {
5330b57cec5SDimitry Andric Instruction *InstInPreheader = Inst2Replace->clone();
5340b57cec5SDimitry Andric SmallVector<Value *, 4> Ops;
5350b57cec5SDimitry Andric for (int j = 0; j < NumOperands; ++j) {
5360b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
5370b57cec5SDimitry Andric if (!I)
5380b57cec5SDimitry Andric continue;
5390b57cec5SDimitry Andric // Get the DepChain corresponding to this operand.
5400b57cec5SDimitry Andric DepChain &D = *DepChains[I];
5410b57cec5SDimitry Andric // Get the PHI for the iteration number and find
5420b57cec5SDimitry Andric // the incoming value from the Loop Preheader for
5430b57cec5SDimitry Andric // that PHI.
5440b57cec5SDimitry Andric Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
5450b57cec5SDimitry Andric InstInPreheader->setOperand(j, ValInPreheader);
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric InstsInPreheader.push_back(InstInPreheader);
5480b57cec5SDimitry Andric InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
5490b57cec5SDimitry Andric InstInPreheader->insertBefore(LoopPH->getTerminator());
5500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
5510b57cec5SDimitry Andric << LoopPH->getName() << "\n");
5520b57cec5SDimitry Andric }
5530b57cec5SDimitry Andric BasicBlock *BB = BEInst->getParent();
5540b57cec5SDimitry Andric IRBuilder<> IRB(BB);
555*5f757f3fSDimitry Andric IRB.SetInsertPoint(BB, BB->getFirstNonPHIIt());
5560b57cec5SDimitry Andric Value *BEVal = BEInst;
5570b57cec5SDimitry Andric PHINode *NewPhi;
5580b57cec5SDimitry Andric for (int i = Iterations-1; i >=0 ; --i) {
5590b57cec5SDimitry Andric Instruction *InstInPreheader = InstsInPreheader[i];
5600b57cec5SDimitry Andric NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
5610b57cec5SDimitry Andric NewPhi->addIncoming(InstInPreheader, LoopPH);
5620b57cec5SDimitry Andric NewPhi->addIncoming(BEVal, BB);
5630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
5640b57cec5SDimitry Andric << "\n");
5650b57cec5SDimitry Andric BEVal = NewPhi;
5660b57cec5SDimitry Andric }
5670b57cec5SDimitry Andric // We are in LCSSA form. So, a value defined inside the Loop is used only
5680b57cec5SDimitry Andric // inside the loop. So, the following is safe.
5690b57cec5SDimitry Andric Inst2Replace->replaceAllUsesWith(NewPhi);
5700b57cec5SDimitry Andric ReplacedInsts.insert(Inst2Replace);
5710b57cec5SDimitry Andric ++HexagonNumVectorLoopCarriedReuse;
5720b57cec5SDimitry Andric }
5730b57cec5SDimitry Andric
doVLCR()5740b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::doVLCR() {
5750b57cec5SDimitry Andric assert(CurLoop->getSubLoops().empty() &&
5760b57cec5SDimitry Andric "Can do VLCR on the innermost loop only");
5770b57cec5SDimitry Andric assert((CurLoop->getNumBlocks() == 1) &&
5780b57cec5SDimitry Andric "Can do VLCR only on single block loops");
5790b57cec5SDimitry Andric
5800b57cec5SDimitry Andric bool Changed = false;
5810b57cec5SDimitry Andric bool Continue;
5820b57cec5SDimitry Andric
5830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
5840b57cec5SDimitry Andric do {
5850b57cec5SDimitry Andric // Reset datastructures.
5860b57cec5SDimitry Andric Dependences.clear();
5870b57cec5SDimitry Andric Continue = false;
5880b57cec5SDimitry Andric
5890b57cec5SDimitry Andric findLoopCarriedDeps();
5900b57cec5SDimitry Andric findValueToReuse();
5910b57cec5SDimitry Andric if (ReuseCandidate.isDefined()) {
5920b57cec5SDimitry Andric reuseValue();
5930b57cec5SDimitry Andric Changed = true;
5940b57cec5SDimitry Andric Continue = true;
5950b57cec5SDimitry Andric }
5960b57cec5SDimitry Andric llvm::for_each(Dependences, std::default_delete<DepChain>());
5970b57cec5SDimitry Andric } while (Continue);
5980b57cec5SDimitry Andric return Changed;
5990b57cec5SDimitry Andric }
6000b57cec5SDimitry Andric
findDepChainFromPHI(Instruction * I,DepChain & D)6010b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
6020b57cec5SDimitry Andric DepChain &D) {
6030b57cec5SDimitry Andric PHINode *PN = dyn_cast<PHINode>(I);
6040b57cec5SDimitry Andric if (!PN) {
6050b57cec5SDimitry Andric D.push_back(I);
6060b57cec5SDimitry Andric return;
6070b57cec5SDimitry Andric } else {
6080b57cec5SDimitry Andric auto NumIncomingValues = PN->getNumIncomingValues();
6090b57cec5SDimitry Andric if (NumIncomingValues != 2) {
6100b57cec5SDimitry Andric D.clear();
6110b57cec5SDimitry Andric return;
6120b57cec5SDimitry Andric }
6130b57cec5SDimitry Andric
6140b57cec5SDimitry Andric BasicBlock *BB = PN->getParent();
6150b57cec5SDimitry Andric if (BB != CurLoop->getHeader()) {
6160b57cec5SDimitry Andric D.clear();
6170b57cec5SDimitry Andric return;
6180b57cec5SDimitry Andric }
6190b57cec5SDimitry Andric
6200b57cec5SDimitry Andric Value *BEVal = PN->getIncomingValueForBlock(BB);
6210b57cec5SDimitry Andric Instruction *BEInst = dyn_cast<Instruction>(BEVal);
6220b57cec5SDimitry Andric // This is a single block loop with a preheader, so at least
6230b57cec5SDimitry Andric // one value should come over the backedge.
6240b57cec5SDimitry Andric assert(BEInst && "There should be a value over the backedge");
6250b57cec5SDimitry Andric
6260b57cec5SDimitry Andric Value *PreHdrVal =
6270b57cec5SDimitry Andric PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
6280b57cec5SDimitry Andric if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
6290b57cec5SDimitry Andric D.clear();
6300b57cec5SDimitry Andric return;
6310b57cec5SDimitry Andric }
6320b57cec5SDimitry Andric D.push_back(PN);
6330b57cec5SDimitry Andric findDepChainFromPHI(BEInst, D);
6340b57cec5SDimitry Andric }
6350b57cec5SDimitry Andric }
6360b57cec5SDimitry Andric
getDepChainBtwn(Instruction * I1,Instruction * I2,int Iters)6370b57cec5SDimitry Andric DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
6380b57cec5SDimitry Andric Instruction *I2,
6390b57cec5SDimitry Andric int Iters) {
6400b57cec5SDimitry Andric for (auto *D : Dependences) {
6410b57cec5SDimitry Andric if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
6420b57cec5SDimitry Andric return D;
6430b57cec5SDimitry Andric }
6440b57cec5SDimitry Andric return nullptr;
6450b57cec5SDimitry Andric }
6460b57cec5SDimitry Andric
findLoopCarriedDeps()6470b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
6480b57cec5SDimitry Andric BasicBlock *BB = CurLoop->getHeader();
6490b57cec5SDimitry Andric for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
6500b57cec5SDimitry Andric auto *PN = cast<PHINode>(I);
6510b57cec5SDimitry Andric if (!isa<VectorType>(PN->getType()))
6520b57cec5SDimitry Andric continue;
6530b57cec5SDimitry Andric
6540b57cec5SDimitry Andric DepChain *D = new DepChain();
6550b57cec5SDimitry Andric findDepChainFromPHI(PN, *D);
6560b57cec5SDimitry Andric if (D->size() != 0)
6570b57cec5SDimitry Andric Dependences.insert(D);
6580b57cec5SDimitry Andric else
6590b57cec5SDimitry Andric delete D;
6600b57cec5SDimitry Andric }
6610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
66204eeddc0SDimitry Andric LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);
6630b57cec5SDimitry Andric }
6640b57cec5SDimitry Andric
createHexagonVectorLoopCarriedReuseLegacyPass()665e8d8bef9SDimitry Andric Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
666e8d8bef9SDimitry Andric return new HexagonVectorLoopCarriedReuseLegacyPass();
6670b57cec5SDimitry Andric }
668