17330f729Sjoerg //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg /// \file
97330f729Sjoerg /// Insert hardware loop intrinsics into loops which are deemed profitable by
107330f729Sjoerg /// the target, by querying TargetTransformInfo. A hardware loop comprises of
117330f729Sjoerg /// two intrinsics: one, outside the loop, to set the loop iteration count and
127330f729Sjoerg /// another, in the exit block, to decrement the counter. The decremented value
137330f729Sjoerg /// can either be carried through the loop via a phi or handled in some opaque
147330f729Sjoerg /// way by the target.
157330f729Sjoerg ///
167330f729Sjoerg //===----------------------------------------------------------------------===//
177330f729Sjoerg
187330f729Sjoerg #include "llvm/ADT/Statistic.h"
197330f729Sjoerg #include "llvm/Analysis/AssumptionCache.h"
207330f729Sjoerg #include "llvm/Analysis/LoopInfo.h"
21*82d56013Sjoerg #include "llvm/Analysis/OptimizationRemarkEmitter.h"
227330f729Sjoerg #include "llvm/Analysis/ScalarEvolution.h"
23*82d56013Sjoerg #include "llvm/Analysis/TargetLibraryInfo.h"
247330f729Sjoerg #include "llvm/Analysis/TargetTransformInfo.h"
257330f729Sjoerg #include "llvm/CodeGen/Passes.h"
267330f729Sjoerg #include "llvm/CodeGen/TargetPassConfig.h"
277330f729Sjoerg #include "llvm/IR/BasicBlock.h"
28*82d56013Sjoerg #include "llvm/IR/Constants.h"
297330f729Sjoerg #include "llvm/IR/DataLayout.h"
307330f729Sjoerg #include "llvm/IR/Dominators.h"
317330f729Sjoerg #include "llvm/IR/IRBuilder.h"
327330f729Sjoerg #include "llvm/IR/Instructions.h"
337330f729Sjoerg #include "llvm/IR/IntrinsicInst.h"
347330f729Sjoerg #include "llvm/IR/Value.h"
35*82d56013Sjoerg #include "llvm/InitializePasses.h"
36*82d56013Sjoerg #include "llvm/Pass.h"
37*82d56013Sjoerg #include "llvm/PassRegistry.h"
38*82d56013Sjoerg #include "llvm/Support/CommandLine.h"
397330f729Sjoerg #include "llvm/Support/Debug.h"
407330f729Sjoerg #include "llvm/Transforms/Scalar.h"
417330f729Sjoerg #include "llvm/Transforms/Utils.h"
427330f729Sjoerg #include "llvm/Transforms/Utils/BasicBlockUtils.h"
437330f729Sjoerg #include "llvm/Transforms/Utils/Local.h"
447330f729Sjoerg #include "llvm/Transforms/Utils/LoopUtils.h"
45*82d56013Sjoerg #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
467330f729Sjoerg
477330f729Sjoerg #define DEBUG_TYPE "hardware-loops"
487330f729Sjoerg
497330f729Sjoerg #define HW_LOOPS_NAME "Hardware Loop Insertion"
507330f729Sjoerg
517330f729Sjoerg using namespace llvm;
527330f729Sjoerg
537330f729Sjoerg static cl::opt<bool>
547330f729Sjoerg ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
557330f729Sjoerg cl::desc("Force hardware loops intrinsics to be inserted"));
567330f729Sjoerg
577330f729Sjoerg static cl::opt<bool>
587330f729Sjoerg ForceHardwareLoopPHI(
597330f729Sjoerg "force-hardware-loop-phi", cl::Hidden, cl::init(false),
607330f729Sjoerg cl::desc("Force hardware loop counter to be updated through a phi"));
617330f729Sjoerg
627330f729Sjoerg static cl::opt<bool>
637330f729Sjoerg ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
647330f729Sjoerg cl::desc("Force allowance of nested hardware loops"));
657330f729Sjoerg
667330f729Sjoerg static cl::opt<unsigned>
677330f729Sjoerg LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
687330f729Sjoerg cl::desc("Set the loop decrement value"));
697330f729Sjoerg
707330f729Sjoerg static cl::opt<unsigned>
717330f729Sjoerg CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
727330f729Sjoerg cl::desc("Set the loop counter bitwidth"));
737330f729Sjoerg
747330f729Sjoerg static cl::opt<bool>
757330f729Sjoerg ForceGuardLoopEntry(
767330f729Sjoerg "force-hardware-loop-guard", cl::Hidden, cl::init(false),
777330f729Sjoerg cl::desc("Force generation of loop guard intrinsic"));
787330f729Sjoerg
797330f729Sjoerg STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
807330f729Sjoerg
81*82d56013Sjoerg #ifndef NDEBUG
debugHWLoopFailure(const StringRef DebugMsg,Instruction * I)82*82d56013Sjoerg static void debugHWLoopFailure(const StringRef DebugMsg,
83*82d56013Sjoerg Instruction *I) {
84*82d56013Sjoerg dbgs() << "HWLoops: " << DebugMsg;
85*82d56013Sjoerg if (I)
86*82d56013Sjoerg dbgs() << ' ' << *I;
87*82d56013Sjoerg else
88*82d56013Sjoerg dbgs() << '.';
89*82d56013Sjoerg dbgs() << '\n';
90*82d56013Sjoerg }
91*82d56013Sjoerg #endif
92*82d56013Sjoerg
93*82d56013Sjoerg static OptimizationRemarkAnalysis
createHWLoopAnalysis(StringRef RemarkName,Loop * L,Instruction * I)94*82d56013Sjoerg createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
95*82d56013Sjoerg Value *CodeRegion = L->getHeader();
96*82d56013Sjoerg DebugLoc DL = L->getStartLoc();
97*82d56013Sjoerg
98*82d56013Sjoerg if (I) {
99*82d56013Sjoerg CodeRegion = I->getParent();
100*82d56013Sjoerg // If there is no debug location attached to the instruction, revert back to
101*82d56013Sjoerg // using the loop's.
102*82d56013Sjoerg if (I->getDebugLoc())
103*82d56013Sjoerg DL = I->getDebugLoc();
104*82d56013Sjoerg }
105*82d56013Sjoerg
106*82d56013Sjoerg OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
107*82d56013Sjoerg R << "hardware-loop not created: ";
108*82d56013Sjoerg return R;
109*82d56013Sjoerg }
110*82d56013Sjoerg
1117330f729Sjoerg namespace {
1127330f729Sjoerg
reportHWLoopFailure(const StringRef Msg,const StringRef ORETag,OptimizationRemarkEmitter * ORE,Loop * TheLoop,Instruction * I=nullptr)113*82d56013Sjoerg void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
114*82d56013Sjoerg OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
115*82d56013Sjoerg LLVM_DEBUG(debugHWLoopFailure(Msg, I));
116*82d56013Sjoerg ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
117*82d56013Sjoerg }
118*82d56013Sjoerg
1197330f729Sjoerg using TTI = TargetTransformInfo;
1207330f729Sjoerg
1217330f729Sjoerg class HardwareLoops : public FunctionPass {
1227330f729Sjoerg public:
1237330f729Sjoerg static char ID;
1247330f729Sjoerg
HardwareLoops()1257330f729Sjoerg HardwareLoops() : FunctionPass(ID) {
1267330f729Sjoerg initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
1277330f729Sjoerg }
1287330f729Sjoerg
1297330f729Sjoerg bool runOnFunction(Function &F) override;
1307330f729Sjoerg
getAnalysisUsage(AnalysisUsage & AU) const1317330f729Sjoerg void getAnalysisUsage(AnalysisUsage &AU) const override {
1327330f729Sjoerg AU.addRequired<LoopInfoWrapperPass>();
1337330f729Sjoerg AU.addPreserved<LoopInfoWrapperPass>();
1347330f729Sjoerg AU.addRequired<DominatorTreeWrapperPass>();
1357330f729Sjoerg AU.addPreserved<DominatorTreeWrapperPass>();
1367330f729Sjoerg AU.addRequired<ScalarEvolutionWrapperPass>();
1377330f729Sjoerg AU.addRequired<AssumptionCacheTracker>();
1387330f729Sjoerg AU.addRequired<TargetTransformInfoWrapperPass>();
139*82d56013Sjoerg AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1407330f729Sjoerg }
1417330f729Sjoerg
1427330f729Sjoerg // Try to convert the given Loop into a hardware loop.
1437330f729Sjoerg bool TryConvertLoop(Loop *L);
1447330f729Sjoerg
1457330f729Sjoerg // Given that the target believes the loop to be profitable, try to
1467330f729Sjoerg // convert it.
1477330f729Sjoerg bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
1487330f729Sjoerg
1497330f729Sjoerg private:
1507330f729Sjoerg ScalarEvolution *SE = nullptr;
1517330f729Sjoerg LoopInfo *LI = nullptr;
1527330f729Sjoerg const DataLayout *DL = nullptr;
153*82d56013Sjoerg OptimizationRemarkEmitter *ORE = nullptr;
1547330f729Sjoerg const TargetTransformInfo *TTI = nullptr;
1557330f729Sjoerg DominatorTree *DT = nullptr;
1567330f729Sjoerg bool PreserveLCSSA = false;
1577330f729Sjoerg AssumptionCache *AC = nullptr;
1587330f729Sjoerg TargetLibraryInfo *LibInfo = nullptr;
1597330f729Sjoerg Module *M = nullptr;
1607330f729Sjoerg bool MadeChange = false;
1617330f729Sjoerg };
1627330f729Sjoerg
1637330f729Sjoerg class HardwareLoop {
1647330f729Sjoerg // Expand the trip count scev into a value that we can use.
1657330f729Sjoerg Value *InitLoopCount();
1667330f729Sjoerg
1677330f729Sjoerg // Insert the set_loop_iteration intrinsic.
168*82d56013Sjoerg Value *InsertIterationSetup(Value *LoopCountInit);
1697330f729Sjoerg
1707330f729Sjoerg // Insert the loop_decrement intrinsic.
1717330f729Sjoerg void InsertLoopDec();
1727330f729Sjoerg
1737330f729Sjoerg // Insert the loop_decrement_reg intrinsic.
1747330f729Sjoerg Instruction *InsertLoopRegDec(Value *EltsRem);
1757330f729Sjoerg
1767330f729Sjoerg // If the target requires the counter value to be updated in the loop,
1777330f729Sjoerg // insert a phi to hold the value. The intended purpose is for use by
1787330f729Sjoerg // loop_decrement_reg.
1797330f729Sjoerg PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
1807330f729Sjoerg
1817330f729Sjoerg // Create a new cmp, that checks the returned value of loop_decrement*,
1827330f729Sjoerg // and update the exit branch to use it.
1837330f729Sjoerg void UpdateBranch(Value *EltsRem);
1847330f729Sjoerg
1857330f729Sjoerg public:
HardwareLoop(HardwareLoopInfo & Info,ScalarEvolution & SE,const DataLayout & DL,OptimizationRemarkEmitter * ORE)1867330f729Sjoerg HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
187*82d56013Sjoerg const DataLayout &DL,
188*82d56013Sjoerg OptimizationRemarkEmitter *ORE) :
189*82d56013Sjoerg SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
190*82d56013Sjoerg TripCount(Info.TripCount),
1917330f729Sjoerg CountType(Info.CountType),
1927330f729Sjoerg ExitBranch(Info.ExitBranch),
1937330f729Sjoerg LoopDecrement(Info.LoopDecrement),
1947330f729Sjoerg UsePHICounter(Info.CounterInReg),
1957330f729Sjoerg UseLoopGuard(Info.PerformEntryTest) { }
1967330f729Sjoerg
1977330f729Sjoerg void Create();
1987330f729Sjoerg
1997330f729Sjoerg private:
2007330f729Sjoerg ScalarEvolution &SE;
2017330f729Sjoerg const DataLayout &DL;
202*82d56013Sjoerg OptimizationRemarkEmitter *ORE = nullptr;
2037330f729Sjoerg Loop *L = nullptr;
2047330f729Sjoerg Module *M = nullptr;
205*82d56013Sjoerg const SCEV *TripCount = nullptr;
2067330f729Sjoerg Type *CountType = nullptr;
2077330f729Sjoerg BranchInst *ExitBranch = nullptr;
2087330f729Sjoerg Value *LoopDecrement = nullptr;
2097330f729Sjoerg bool UsePHICounter = false;
2107330f729Sjoerg bool UseLoopGuard = false;
2117330f729Sjoerg BasicBlock *BeginBB = nullptr;
2127330f729Sjoerg };
2137330f729Sjoerg }
2147330f729Sjoerg
2157330f729Sjoerg char HardwareLoops::ID = 0;
2167330f729Sjoerg
runOnFunction(Function & F)2177330f729Sjoerg bool HardwareLoops::runOnFunction(Function &F) {
2187330f729Sjoerg if (skipFunction(F))
2197330f729Sjoerg return false;
2207330f729Sjoerg
2217330f729Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
2227330f729Sjoerg
2237330f729Sjoerg LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2247330f729Sjoerg SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2257330f729Sjoerg DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2267330f729Sjoerg TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2277330f729Sjoerg DL = &F.getParent()->getDataLayout();
228*82d56013Sjoerg ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2297330f729Sjoerg auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2307330f729Sjoerg LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
2317330f729Sjoerg PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
2327330f729Sjoerg AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2337330f729Sjoerg M = F.getParent();
2347330f729Sjoerg
235*82d56013Sjoerg for (Loop *L : *LI)
236*82d56013Sjoerg if (L->isOutermost())
2377330f729Sjoerg TryConvertLoop(L);
2387330f729Sjoerg
2397330f729Sjoerg return MadeChange;
2407330f729Sjoerg }
2417330f729Sjoerg
2427330f729Sjoerg // Return true if the search should stop, which will be when an inner loop is
2437330f729Sjoerg // converted and the parent loop doesn't support containing a hardware loop.
TryConvertLoop(Loop * L)2447330f729Sjoerg bool HardwareLoops::TryConvertLoop(Loop *L) {
2457330f729Sjoerg // Process nested loops first.
246*82d56013Sjoerg bool AnyChanged = false;
247*82d56013Sjoerg for (Loop *SL : *L)
248*82d56013Sjoerg AnyChanged |= TryConvertLoop(SL);
249*82d56013Sjoerg if (AnyChanged) {
250*82d56013Sjoerg reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
251*82d56013Sjoerg ORE, L);
2527330f729Sjoerg return true; // Stop search.
253*82d56013Sjoerg }
254*82d56013Sjoerg
255*82d56013Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
2567330f729Sjoerg
2577330f729Sjoerg HardwareLoopInfo HWLoopInfo(L);
258*82d56013Sjoerg if (!HWLoopInfo.canAnalyze(*LI)) {
259*82d56013Sjoerg reportHWLoopFailure("cannot analyze loop, irreducible control flow",
260*82d56013Sjoerg "HWLoopCannotAnalyze", ORE, L);
2617330f729Sjoerg return false;
262*82d56013Sjoerg }
2637330f729Sjoerg
264*82d56013Sjoerg if (!ForceHardwareLoops &&
265*82d56013Sjoerg !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
266*82d56013Sjoerg reportHWLoopFailure("it's not profitable to create a hardware-loop",
267*82d56013Sjoerg "HWLoopNotProfitable", ORE, L);
268*82d56013Sjoerg return false;
269*82d56013Sjoerg }
2707330f729Sjoerg
2717330f729Sjoerg // Allow overriding of the counter width and loop decrement value.
2727330f729Sjoerg if (CounterBitWidth.getNumOccurrences())
2737330f729Sjoerg HWLoopInfo.CountType =
2747330f729Sjoerg IntegerType::get(M->getContext(), CounterBitWidth);
2757330f729Sjoerg
2767330f729Sjoerg if (LoopDecrement.getNumOccurrences())
2777330f729Sjoerg HWLoopInfo.LoopDecrement =
2787330f729Sjoerg ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
2797330f729Sjoerg
2807330f729Sjoerg MadeChange |= TryConvertLoop(HWLoopInfo);
2817330f729Sjoerg return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
2827330f729Sjoerg }
2837330f729Sjoerg
TryConvertLoop(HardwareLoopInfo & HWLoopInfo)2847330f729Sjoerg bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
2857330f729Sjoerg
2867330f729Sjoerg Loop *L = HWLoopInfo.L;
2877330f729Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
2887330f729Sjoerg
2897330f729Sjoerg if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
290*82d56013Sjoerg ForceHardwareLoopPHI)) {
291*82d56013Sjoerg // TODO: there can be many reasons a loop is not considered a
292*82d56013Sjoerg // candidate, so we should let isHardwareLoopCandidate fill in the
293*82d56013Sjoerg // reason and then report a better message here.
294*82d56013Sjoerg reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
2957330f729Sjoerg return false;
296*82d56013Sjoerg }
2977330f729Sjoerg
2987330f729Sjoerg assert(
299*82d56013Sjoerg (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) &&
3007330f729Sjoerg "Hardware Loop must have set exit info.");
3017330f729Sjoerg
3027330f729Sjoerg BasicBlock *Preheader = L->getLoopPreheader();
3037330f729Sjoerg
3047330f729Sjoerg // If we don't have a preheader, then insert one.
3057330f729Sjoerg if (!Preheader)
3067330f729Sjoerg Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
3077330f729Sjoerg if (!Preheader)
3087330f729Sjoerg return false;
3097330f729Sjoerg
310*82d56013Sjoerg HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
3117330f729Sjoerg HWLoop.Create();
3127330f729Sjoerg ++NumHWLoops;
3137330f729Sjoerg return true;
3147330f729Sjoerg }
3157330f729Sjoerg
Create()3167330f729Sjoerg void HardwareLoop::Create() {
3177330f729Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
3187330f729Sjoerg
3197330f729Sjoerg Value *LoopCountInit = InitLoopCount();
320*82d56013Sjoerg if (!LoopCountInit) {
321*82d56013Sjoerg reportHWLoopFailure("could not safely create a loop count expression",
322*82d56013Sjoerg "HWLoopNotSafe", ORE, L);
3237330f729Sjoerg return;
324*82d56013Sjoerg }
3257330f729Sjoerg
326*82d56013Sjoerg Value *Setup = InsertIterationSetup(LoopCountInit);
3277330f729Sjoerg
3287330f729Sjoerg if (UsePHICounter || ForceHardwareLoopPHI) {
3297330f729Sjoerg Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
330*82d56013Sjoerg Value *EltsRem = InsertPHICounter(Setup, LoopDec);
3317330f729Sjoerg LoopDec->setOperand(0, EltsRem);
3327330f729Sjoerg UpdateBranch(LoopDec);
3337330f729Sjoerg } else
3347330f729Sjoerg InsertLoopDec();
3357330f729Sjoerg
3367330f729Sjoerg // Run through the basic blocks of the loop and see if any of them have dead
3377330f729Sjoerg // PHIs that can be removed.
3387330f729Sjoerg for (auto I : L->blocks())
3397330f729Sjoerg DeleteDeadPHIs(I);
3407330f729Sjoerg }
3417330f729Sjoerg
CanGenerateTest(Loop * L,Value * Count)3427330f729Sjoerg static bool CanGenerateTest(Loop *L, Value *Count) {
3437330f729Sjoerg BasicBlock *Preheader = L->getLoopPreheader();
3447330f729Sjoerg if (!Preheader->getSinglePredecessor())
3457330f729Sjoerg return false;
3467330f729Sjoerg
3477330f729Sjoerg BasicBlock *Pred = Preheader->getSinglePredecessor();
3487330f729Sjoerg if (!isa<BranchInst>(Pred->getTerminator()))
3497330f729Sjoerg return false;
3507330f729Sjoerg
3517330f729Sjoerg auto *BI = cast<BranchInst>(Pred->getTerminator());
3527330f729Sjoerg if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
3537330f729Sjoerg return false;
3547330f729Sjoerg
3557330f729Sjoerg // Check that the icmp is checking for equality of Count and zero and that
3567330f729Sjoerg // a non-zero value results in entering the loop.
3577330f729Sjoerg auto ICmp = cast<ICmpInst>(BI->getCondition());
3587330f729Sjoerg LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
3597330f729Sjoerg if (!ICmp->isEquality())
3607330f729Sjoerg return false;
3617330f729Sjoerg
3627330f729Sjoerg auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
3637330f729Sjoerg if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
3647330f729Sjoerg return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
3657330f729Sjoerg return false;
3667330f729Sjoerg };
3677330f729Sjoerg
3687330f729Sjoerg if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
3697330f729Sjoerg return false;
3707330f729Sjoerg
3717330f729Sjoerg unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
3727330f729Sjoerg if (BI->getSuccessor(SuccIdx) != Preheader)
3737330f729Sjoerg return false;
3747330f729Sjoerg
3757330f729Sjoerg return true;
3767330f729Sjoerg }
3777330f729Sjoerg
InitLoopCount()3787330f729Sjoerg Value *HardwareLoop::InitLoopCount() {
3797330f729Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
3807330f729Sjoerg // Can we replace a conditional branch with an intrinsic that sets the
3817330f729Sjoerg // loop counter and tests that is not zero?
3827330f729Sjoerg
3837330f729Sjoerg SCEVExpander SCEVE(SE, DL, "loopcnt");
3847330f729Sjoerg
3857330f729Sjoerg // If we're trying to use the 'test and set' form of the intrinsic, we need
3867330f729Sjoerg // to replace a conditional branch that is controlling entry to the loop. It
3877330f729Sjoerg // is likely (guaranteed?) that the preheader has an unconditional branch to
3887330f729Sjoerg // the loop header, so also check if it has a single predecessor.
389*82d56013Sjoerg if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount,
390*82d56013Sjoerg SE.getZero(TripCount->getType()))) {
3917330f729Sjoerg LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
3927330f729Sjoerg UseLoopGuard |= ForceGuardLoopEntry;
3937330f729Sjoerg } else
3947330f729Sjoerg UseLoopGuard = false;
3957330f729Sjoerg
3967330f729Sjoerg BasicBlock *BB = L->getLoopPreheader();
3977330f729Sjoerg if (UseLoopGuard && BB->getSinglePredecessor() &&
398*82d56013Sjoerg cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
399*82d56013Sjoerg BasicBlock *Predecessor = BB->getSinglePredecessor();
400*82d56013Sjoerg // If it's not safe to create a while loop then don't force it and create a
401*82d56013Sjoerg // do-while loop instead
402*82d56013Sjoerg if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE))
403*82d56013Sjoerg UseLoopGuard = false;
404*82d56013Sjoerg else
405*82d56013Sjoerg BB = Predecessor;
406*82d56013Sjoerg }
4077330f729Sjoerg
408*82d56013Sjoerg if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
409*82d56013Sjoerg LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount "
410*82d56013Sjoerg << *TripCount << "\n");
4117330f729Sjoerg return nullptr;
4127330f729Sjoerg }
4137330f729Sjoerg
414*82d56013Sjoerg Value *Count = SCEVE.expandCodeFor(TripCount, CountType,
4157330f729Sjoerg BB->getTerminator());
4167330f729Sjoerg
4177330f729Sjoerg // FIXME: We've expanded Count where we hope to insert the counter setting
4187330f729Sjoerg // intrinsic. But, in the case of the 'test and set' form, we may fallback to
4197330f729Sjoerg // the just 'set' form and in which case the insertion block is most likely
4207330f729Sjoerg // different. It means there will be instruction(s) in a block that possibly
4217330f729Sjoerg // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
4227330f729Sjoerg // but it's doesn't appear to work in all cases.
4237330f729Sjoerg
4247330f729Sjoerg UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
4257330f729Sjoerg BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
4267330f729Sjoerg LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
4277330f729Sjoerg << " - Expanded Count in " << BB->getName() << "\n"
4287330f729Sjoerg << " - Will insert set counter intrinsic into: "
4297330f729Sjoerg << BeginBB->getName() << "\n");
4307330f729Sjoerg return Count;
4317330f729Sjoerg }
4327330f729Sjoerg
InsertIterationSetup(Value * LoopCountInit)433*82d56013Sjoerg Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
4347330f729Sjoerg IRBuilder<> Builder(BeginBB->getTerminator());
4357330f729Sjoerg Type *Ty = LoopCountInit->getType();
436*82d56013Sjoerg bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
437*82d56013Sjoerg Intrinsic::ID ID = UseLoopGuard
438*82d56013Sjoerg ? (UsePhi ? Intrinsic::test_start_loop_iterations
439*82d56013Sjoerg : Intrinsic::test_set_loop_iterations)
440*82d56013Sjoerg : (UsePhi ? Intrinsic::start_loop_iterations
441*82d56013Sjoerg : Intrinsic::set_loop_iterations);
4427330f729Sjoerg Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
443*82d56013Sjoerg Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
4447330f729Sjoerg
4457330f729Sjoerg // Use the return value of the intrinsic to control the entry of the loop.
4467330f729Sjoerg if (UseLoopGuard) {
4477330f729Sjoerg assert((isa<BranchInst>(BeginBB->getTerminator()) &&
4487330f729Sjoerg cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
4497330f729Sjoerg "Expected conditional branch");
450*82d56013Sjoerg
451*82d56013Sjoerg Value *SetCount =
452*82d56013Sjoerg UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
4537330f729Sjoerg auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
4547330f729Sjoerg LoopGuard->setCondition(SetCount);
4557330f729Sjoerg if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
4567330f729Sjoerg LoopGuard->swapSuccessors();
4577330f729Sjoerg }
458*82d56013Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
459*82d56013Sjoerg << "\n");
460*82d56013Sjoerg if (UsePhi && UseLoopGuard)
461*82d56013Sjoerg LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
462*82d56013Sjoerg return !UsePhi ? LoopCountInit : LoopSetup;
4637330f729Sjoerg }
4647330f729Sjoerg
InsertLoopDec()4657330f729Sjoerg void HardwareLoop::InsertLoopDec() {
4667330f729Sjoerg IRBuilder<> CondBuilder(ExitBranch);
4677330f729Sjoerg
4687330f729Sjoerg Function *DecFunc =
4697330f729Sjoerg Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
4707330f729Sjoerg LoopDecrement->getType());
4717330f729Sjoerg Value *Ops[] = { LoopDecrement };
4727330f729Sjoerg Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
4737330f729Sjoerg Value *OldCond = ExitBranch->getCondition();
4747330f729Sjoerg ExitBranch->setCondition(NewCond);
4757330f729Sjoerg
4767330f729Sjoerg // The false branch must exit the loop.
4777330f729Sjoerg if (!L->contains(ExitBranch->getSuccessor(0)))
4787330f729Sjoerg ExitBranch->swapSuccessors();
4797330f729Sjoerg
4807330f729Sjoerg // The old condition may be dead now, and may have even created a dead PHI
4817330f729Sjoerg // (the original induction variable).
4827330f729Sjoerg RecursivelyDeleteTriviallyDeadInstructions(OldCond);
4837330f729Sjoerg
4847330f729Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
4857330f729Sjoerg }
4867330f729Sjoerg
InsertLoopRegDec(Value * EltsRem)4877330f729Sjoerg Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
4887330f729Sjoerg IRBuilder<> CondBuilder(ExitBranch);
4897330f729Sjoerg
4907330f729Sjoerg Function *DecFunc =
4917330f729Sjoerg Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
492*82d56013Sjoerg { EltsRem->getType() });
4937330f729Sjoerg Value *Ops[] = { EltsRem, LoopDecrement };
4947330f729Sjoerg Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
4957330f729Sjoerg
4967330f729Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
4977330f729Sjoerg return cast<Instruction>(Call);
4987330f729Sjoerg }
4997330f729Sjoerg
InsertPHICounter(Value * NumElts,Value * EltsRem)5007330f729Sjoerg PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
5017330f729Sjoerg BasicBlock *Preheader = L->getLoopPreheader();
5027330f729Sjoerg BasicBlock *Header = L->getHeader();
5037330f729Sjoerg BasicBlock *Latch = ExitBranch->getParent();
5047330f729Sjoerg IRBuilder<> Builder(Header->getFirstNonPHI());
5057330f729Sjoerg PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
5067330f729Sjoerg Index->addIncoming(NumElts, Preheader);
5077330f729Sjoerg Index->addIncoming(EltsRem, Latch);
5087330f729Sjoerg LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
5097330f729Sjoerg return Index;
5107330f729Sjoerg }
5117330f729Sjoerg
UpdateBranch(Value * EltsRem)5127330f729Sjoerg void HardwareLoop::UpdateBranch(Value *EltsRem) {
5137330f729Sjoerg IRBuilder<> CondBuilder(ExitBranch);
5147330f729Sjoerg Value *NewCond =
5157330f729Sjoerg CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
5167330f729Sjoerg Value *OldCond = ExitBranch->getCondition();
5177330f729Sjoerg ExitBranch->setCondition(NewCond);
5187330f729Sjoerg
5197330f729Sjoerg // The false branch must exit the loop.
5207330f729Sjoerg if (!L->contains(ExitBranch->getSuccessor(0)))
5217330f729Sjoerg ExitBranch->swapSuccessors();
5227330f729Sjoerg
5237330f729Sjoerg // The old condition may be dead now, and may have even created a dead PHI
5247330f729Sjoerg // (the original induction variable).
5257330f729Sjoerg RecursivelyDeleteTriviallyDeadInstructions(OldCond);
5267330f729Sjoerg }
5277330f729Sjoerg
INITIALIZE_PASS_BEGIN(HardwareLoops,DEBUG_TYPE,HW_LOOPS_NAME,false,false)5287330f729Sjoerg INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
5297330f729Sjoerg INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
5307330f729Sjoerg INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
5317330f729Sjoerg INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
532*82d56013Sjoerg INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
5337330f729Sjoerg INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
5347330f729Sjoerg
5357330f729Sjoerg FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
536