xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
100b57cec5SDimitry Andric // of a MachineFunction.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //   mov %SPL, %depot
130b57cec5SDimitry Andric //   cvta.local %SP, %SPL
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // Because Frame Index is a generic address and alloca can only return generic
160b57cec5SDimitry Andric // pointer, without this pass the instructions producing alloca'ed address will
170b57cec5SDimitry Andric // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
180b57cec5SDimitry Andric // this address with their .local versions, but this may introduce a lot of
190b57cec5SDimitry Andric // cvta.to.local instructions. Performance can be improved if we avoid casting
200b57cec5SDimitry Andric // address back and forth and directly calculate local address based on %SPL.
210b57cec5SDimitry Andric // This peephole pass optimizes these cases, for example
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric // It will transform the following pattern
24349cc55cSDimitry Andric //    %0 = LEA_ADDRi64 %VRFrame64, 4
25*0fca6ea1SDimitry Andric //    %1 = cvta_to_local_64 %0
260b57cec5SDimitry Andric //
270b57cec5SDimitry Andric // into
28349cc55cSDimitry Andric //    %1 = LEA_ADDRi64 %VRFrameLocal64, 4
290b57cec5SDimitry Andric //
30349cc55cSDimitry Andric // %VRFrameLocal64 is the virtual register name of %SPL
310b57cec5SDimitry Andric //
320b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #include "NVPTX.h"
35349cc55cSDimitry Andric #include "NVPTXRegisterInfo.h"
36349cc55cSDimitry Andric #include "NVPTXSubtarget.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
380b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
390b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
400b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
410b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric using namespace llvm;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric #define DEBUG_TYPE "nvptx-peephole"
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric namespace llvm {
480b57cec5SDimitry Andric void initializeNVPTXPeepholePass(PassRegistry &);
490b57cec5SDimitry Andric }
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric namespace {
520b57cec5SDimitry Andric struct NVPTXPeephole : public MachineFunctionPass {
530b57cec5SDimitry Andric  public:
540b57cec5SDimitry Andric   static char ID;
550b57cec5SDimitry Andric   NVPTXPeephole() : MachineFunctionPass(ID) {
560b57cec5SDimitry Andric     initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
570b57cec5SDimitry Andric   }
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric   StringRef getPassName() const override {
620b57cec5SDimitry Andric     return "NVPTX optimize redundant cvta.to.local instruction";
630b57cec5SDimitry Andric   }
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
660b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
670b57cec5SDimitry Andric   }
680b57cec5SDimitry Andric };
690b57cec5SDimitry Andric }
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric char NVPTXPeephole::ID = 0;
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
760b57cec5SDimitry Andric   auto &MBB = *Root.getParent();
770b57cec5SDimitry Andric   auto &MF = *MBB.getParent();
780b57cec5SDimitry Andric   // Check current instruction is cvta.to.local
79*0fca6ea1SDimitry Andric   if (Root.getOpcode() != NVPTX::cvta_to_local_64 &&
80*0fca6ea1SDimitry Andric       Root.getOpcode() != NVPTX::cvta_to_local)
810b57cec5SDimitry Andric     return false;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric   auto &Op = Root.getOperand(1);
840b57cec5SDimitry Andric   const auto &MRI = MF.getRegInfo();
850b57cec5SDimitry Andric   MachineInstr *GenericAddrDef = nullptr;
86bdd1243dSDimitry Andric   if (Op.isReg() && Op.getReg().isVirtual()) {
870b57cec5SDimitry Andric     GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
880b57cec5SDimitry Andric   }
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   // Check the register operand is uniquely defined by LEA_ADDRi instruction
910b57cec5SDimitry Andric   if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
920b57cec5SDimitry Andric       (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
930b57cec5SDimitry Andric        GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
940b57cec5SDimitry Andric     return false;
950b57cec5SDimitry Andric   }
960b57cec5SDimitry Andric 
97349cc55cSDimitry Andric   const NVPTXRegisterInfo *NRI =
98349cc55cSDimitry Andric       MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
99349cc55cSDimitry Andric 
1000b57cec5SDimitry Andric   // Check the LEA_ADDRi operand is Frame index
1010b57cec5SDimitry Andric   auto &BaseAddrOp = GenericAddrDef->getOperand(1);
102349cc55cSDimitry Andric   if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
1030b57cec5SDimitry Andric     return true;
1040b57cec5SDimitry Andric   }
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric   return false;
1070b57cec5SDimitry Andric }
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric static void CombineCVTAToLocal(MachineInstr &Root) {
1100b57cec5SDimitry Andric   auto &MBB = *Root.getParent();
1110b57cec5SDimitry Andric   auto &MF = *MBB.getParent();
1120b57cec5SDimitry Andric   const auto &MRI = MF.getRegInfo();
1130b57cec5SDimitry Andric   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1140b57cec5SDimitry Andric   auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
1150b57cec5SDimitry Andric 
116349cc55cSDimitry Andric   const NVPTXRegisterInfo *NRI =
117349cc55cSDimitry Andric       MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
118349cc55cSDimitry Andric 
1190b57cec5SDimitry Andric   MachineInstrBuilder MIB =
1200b57cec5SDimitry Andric       BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
1210b57cec5SDimitry Andric               Root.getOperand(0).getReg())
122349cc55cSDimitry Andric           .addReg(NRI->getFrameLocalRegister(MF))
1230b57cec5SDimitry Andric           .add(Prev.getOperand(2));
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric   MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric   // Check if MRI has only one non dbg use, which is Root
1280b57cec5SDimitry Andric   if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
1290eae32dcSDimitry Andric     Prev.eraseFromParent();
1300b57cec5SDimitry Andric   }
1310eae32dcSDimitry Andric   Root.eraseFromParent();
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
1350b57cec5SDimitry Andric   if (skipFunction(MF.getFunction()))
1360b57cec5SDimitry Andric     return false;
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   bool Changed = false;
1390b57cec5SDimitry Andric   // Loop over all of the basic blocks.
1400b57cec5SDimitry Andric   for (auto &MBB : MF) {
1410b57cec5SDimitry Andric     // Traverse the basic block.
1420b57cec5SDimitry Andric     auto BlockIter = MBB.begin();
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric     while (BlockIter != MBB.end()) {
1450b57cec5SDimitry Andric       auto &MI = *BlockIter++;
1460b57cec5SDimitry Andric       if (isCVTAToLocalCombinationCandidate(MI)) {
1470b57cec5SDimitry Andric         CombineCVTAToLocal(MI);
1480b57cec5SDimitry Andric         Changed = true;
1490b57cec5SDimitry Andric       }
1500b57cec5SDimitry Andric     }  // Instruction
1510b57cec5SDimitry Andric   }    // Basic Block
1520b57cec5SDimitry Andric 
153349cc55cSDimitry Andric   const NVPTXRegisterInfo *NRI =
154349cc55cSDimitry Andric       MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
155349cc55cSDimitry Andric 
1560b57cec5SDimitry Andric   // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
1570b57cec5SDimitry Andric   const auto &MRI = MF.getRegInfo();
158349cc55cSDimitry Andric   if (MRI.use_empty(NRI->getFrameRegister(MF))) {
159349cc55cSDimitry Andric     if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
1600eae32dcSDimitry Andric       MI->eraseFromParent();
1610b57cec5SDimitry Andric     }
1620b57cec5SDimitry Andric   }
1630b57cec5SDimitry Andric 
1640b57cec5SDimitry Andric   return Changed;
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
168