1*06c3fb27SDimitry Andric //===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric // \file
9*06c3fb27SDimitry Andric // \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
10*06c3fb27SDimitry Andric // there is a long branch. Branch size at this point is difficult to track since
11*06c3fb27SDimitry Andric // we have no idea what spills will be inserted later on. We just assume 8 bytes
12*06c3fb27SDimitry Andric // per instruction to compute approximations without computing the actual
13*06c3fb27SDimitry Andric // instruction size to see if we're in the neighborhood of the maximum branch
14*06c3fb27SDimitry Andric // distrance threshold tuning of what is considered "long" is handled through
15*06c3fb27SDimitry Andric // amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
16*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
17*06c3fb27SDimitry Andric #include "AMDGPU.h"
18*06c3fb27SDimitry Andric #include "GCNSubtarget.h"
19*06c3fb27SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20*06c3fb27SDimitry Andric #include "SIMachineFunctionInfo.h"
21*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
22*06c3fb27SDimitry Andric #include "llvm/InitializePasses.h"
23*06c3fb27SDimitry Andric
24*06c3fb27SDimitry Andric using namespace llvm;
25*06c3fb27SDimitry Andric
26*06c3fb27SDimitry Andric #define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
27*06c3fb27SDimitry Andric
28*06c3fb27SDimitry Andric namespace {
29*06c3fb27SDimitry Andric
30*06c3fb27SDimitry Andric static cl::opt<double> LongBranchFactor(
31*06c3fb27SDimitry Andric "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
32*06c3fb27SDimitry Andric cl::desc("Factor to apply to what qualifies as a long branch "
33*06c3fb27SDimitry Andric "to reserve a pair of scalar registers. If this value "
34*06c3fb27SDimitry Andric "is 0 the long branch registers are never reserved. As this "
35*06c3fb27SDimitry Andric "value grows the greater chance the branch distance will fall "
36*06c3fb27SDimitry Andric "within the threshold and the registers will be marked to be "
37*06c3fb27SDimitry Andric "reserved. We lean towards always reserving a register for "
38*06c3fb27SDimitry Andric "long jumps"));
39*06c3fb27SDimitry Andric
40*06c3fb27SDimitry Andric class GCNPreRALongBranchReg : public MachineFunctionPass {
41*06c3fb27SDimitry Andric
42*06c3fb27SDimitry Andric struct BasicBlockInfo {
43*06c3fb27SDimitry Andric // Offset - Distance from the beginning of the function to the beginning
44*06c3fb27SDimitry Andric // of this basic block.
45*06c3fb27SDimitry Andric uint64_t Offset = 0;
46*06c3fb27SDimitry Andric // Size - Size of the basic block in bytes
47*06c3fb27SDimitry Andric uint64_t Size = 0;
48*06c3fb27SDimitry Andric };
49*06c3fb27SDimitry Andric void generateBlockInfo(MachineFunction &MF,
50*06c3fb27SDimitry Andric SmallVectorImpl<BasicBlockInfo> &BlockInfo);
51*06c3fb27SDimitry Andric
52*06c3fb27SDimitry Andric public:
53*06c3fb27SDimitry Andric static char ID;
GCNPreRALongBranchReg()54*06c3fb27SDimitry Andric GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
55*06c3fb27SDimitry Andric initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
56*06c3fb27SDimitry Andric }
57*06c3fb27SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const58*06c3fb27SDimitry Andric StringRef getPassName() const override {
59*06c3fb27SDimitry Andric return "AMDGPU Pre-RA Long Branch Reg";
60*06c3fb27SDimitry Andric }
getAnalysisUsage(AnalysisUsage & AU) const61*06c3fb27SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
62*06c3fb27SDimitry Andric AU.setPreservesAll();
63*06c3fb27SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
64*06c3fb27SDimitry Andric }
65*06c3fb27SDimitry Andric };
66*06c3fb27SDimitry Andric } // End anonymous namespace.
67*06c3fb27SDimitry Andric char GCNPreRALongBranchReg::ID = 0;
68*06c3fb27SDimitry Andric
69*06c3fb27SDimitry Andric INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
70*06c3fb27SDimitry Andric "AMDGPU Pre-RA Long Branch Reg", false, false)
71*06c3fb27SDimitry Andric
72*06c3fb27SDimitry Andric char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
generateBlockInfo(MachineFunction & MF,SmallVectorImpl<BasicBlockInfo> & BlockInfo)73*06c3fb27SDimitry Andric void GCNPreRALongBranchReg::generateBlockInfo(
74*06c3fb27SDimitry Andric MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
75*06c3fb27SDimitry Andric
76*06c3fb27SDimitry Andric BlockInfo.resize(MF.getNumBlockIDs());
77*06c3fb27SDimitry Andric
78*06c3fb27SDimitry Andric // Approximate the size of all basic blocks by just
79*06c3fb27SDimitry Andric // assuming 8 bytes per instruction
80*06c3fb27SDimitry Andric for (const MachineBasicBlock &MBB : MF) {
81*06c3fb27SDimitry Andric uint64_t NumInstr = 0;
82*06c3fb27SDimitry Andric // Loop through the basic block and add up all non-debug
83*06c3fb27SDimitry Andric // non-meta instructions
84*06c3fb27SDimitry Andric for (const MachineInstr &MI : MBB) {
85*06c3fb27SDimitry Andric // isMetaInstruction is a superset of isDebugIstr
86*06c3fb27SDimitry Andric if (MI.isMetaInstruction())
87*06c3fb27SDimitry Andric continue;
88*06c3fb27SDimitry Andric NumInstr += 1;
89*06c3fb27SDimitry Andric }
90*06c3fb27SDimitry Andric // Approximate size as just 8 bytes per instruction
91*06c3fb27SDimitry Andric BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
92*06c3fb27SDimitry Andric }
93*06c3fb27SDimitry Andric uint64_t PrevNum = (&MF)->begin()->getNumber();
94*06c3fb27SDimitry Andric for (auto &MBB :
95*06c3fb27SDimitry Andric make_range(std::next(MachineFunction::iterator((&MF)->begin())),
96*06c3fb27SDimitry Andric (&MF)->end())) {
97*06c3fb27SDimitry Andric uint64_t Num = MBB.getNumber();
98*06c3fb27SDimitry Andric // Compute the offset immediately following this block.
99*06c3fb27SDimitry Andric BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
100*06c3fb27SDimitry Andric PrevNum = Num;
101*06c3fb27SDimitry Andric }
102*06c3fb27SDimitry Andric }
runOnMachineFunction(MachineFunction & MF)103*06c3fb27SDimitry Andric bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
104*06c3fb27SDimitry Andric const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
105*06c3fb27SDimitry Andric const SIInstrInfo *TII = STM.getInstrInfo();
106*06c3fb27SDimitry Andric const SIRegisterInfo *TRI = STM.getRegisterInfo();
107*06c3fb27SDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
108*06c3fb27SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
109*06c3fb27SDimitry Andric
110*06c3fb27SDimitry Andric // For now, reserve highest available SGPR pair. After RA,
111*06c3fb27SDimitry Andric // shift down to a lower unused pair of SGPRs
112*06c3fb27SDimitry Andric // If all registers are used, then findUnusedRegister will return
113*06c3fb27SDimitry Andric // AMDGPU::NoRegister.
114*06c3fb27SDimitry Andric constexpr bool ReserveHighestRegister = true;
115*06c3fb27SDimitry Andric Register LongBranchReservedReg = TRI->findUnusedRegister(
116*06c3fb27SDimitry Andric MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
117*06c3fb27SDimitry Andric if (!LongBranchReservedReg)
118*06c3fb27SDimitry Andric return false;
119*06c3fb27SDimitry Andric
120*06c3fb27SDimitry Andric // Approximate code size and offsets of each basic block
121*06c3fb27SDimitry Andric SmallVector<BasicBlockInfo, 16> BlockInfo;
122*06c3fb27SDimitry Andric generateBlockInfo(MF, BlockInfo);
123*06c3fb27SDimitry Andric
124*06c3fb27SDimitry Andric for (const MachineBasicBlock &MBB : MF) {
125*06c3fb27SDimitry Andric MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
126*06c3fb27SDimitry Andric if (Last == MBB.end() || !Last->isUnconditionalBranch())
127*06c3fb27SDimitry Andric continue;
128*06c3fb27SDimitry Andric MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
129*06c3fb27SDimitry Andric uint64_t BlockDistance = static_cast<uint64_t>(
130*06c3fb27SDimitry Andric LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
131*06c3fb27SDimitry Andric // If the distance falls outside the threshold assume it is a long branch
132*06c3fb27SDimitry Andric // and we need to reserve the registers
133*06c3fb27SDimitry Andric if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
134*06c3fb27SDimitry Andric MFI->setLongBranchReservedReg(LongBranchReservedReg);
135*06c3fb27SDimitry Andric return true;
136*06c3fb27SDimitry Andric }
137*06c3fb27SDimitry Andric }
138*06c3fb27SDimitry Andric return false;
139*06c3fb27SDimitry Andric }
140