15f757f3fSDimitry Andric //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===// 25f757f3fSDimitry Andric // 35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65f757f3fSDimitry Andric // 75f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 85f757f3fSDimitry Andric // 95f757f3fSDimitry Andric /// \file 105f757f3fSDimitry Andric /// Lowering the WWM_COPY instructions for various register classes. 115f757f3fSDimitry Andric /// AMDGPU target generates WWM_COPY instruction to differentiate WWM 125f757f3fSDimitry Andric /// copy from COPY. This pass generates the necessary exec mask manipulation 135f757f3fSDimitry Andric /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to 145f757f3fSDimitry Andric /// COPY. 155f757f3fSDimitry Andric // 165f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 175f757f3fSDimitry Andric 185f757f3fSDimitry Andric #include "AMDGPU.h" 195f757f3fSDimitry Andric #include "GCNSubtarget.h" 205f757f3fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 215f757f3fSDimitry Andric #include "SIMachineFunctionInfo.h" 225f757f3fSDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 235f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 245f757f3fSDimitry Andric #include "llvm/CodeGen/VirtRegMap.h" 255f757f3fSDimitry Andric #include "llvm/InitializePasses.h" 265f757f3fSDimitry Andric 275f757f3fSDimitry Andric using namespace llvm; 285f757f3fSDimitry Andric 295f757f3fSDimitry Andric #define DEBUG_TYPE "si-lower-wwm-copies" 305f757f3fSDimitry Andric 315f757f3fSDimitry Andric namespace { 325f757f3fSDimitry Andric 335f757f3fSDimitry Andric class SILowerWWMCopies : public MachineFunctionPass { 345f757f3fSDimitry Andric public: 355f757f3fSDimitry Andric static char ID; 365f757f3fSDimitry Andric 375f757f3fSDimitry Andric SILowerWWMCopies() : MachineFunctionPass(ID) { 385f757f3fSDimitry Andric initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry()); 395f757f3fSDimitry Andric } 405f757f3fSDimitry Andric 415f757f3fSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 425f757f3fSDimitry Andric 435f757f3fSDimitry Andric StringRef getPassName() const override { return "SI Lower WWM Copies"; } 445f757f3fSDimitry Andric 455f757f3fSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 465f757f3fSDimitry Andric AU.setPreservesAll(); 475f757f3fSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 485f757f3fSDimitry Andric } 495f757f3fSDimitry Andric 505f757f3fSDimitry Andric private: 515f757f3fSDimitry Andric bool isSCCLiveAtMI(const MachineInstr &MI); 525f757f3fSDimitry Andric void addToWWMSpills(MachineFunction &MF, Register Reg); 535f757f3fSDimitry Andric 545f757f3fSDimitry Andric LiveIntervals *LIS; 555f757f3fSDimitry Andric SlotIndexes *Indexes; 565f757f3fSDimitry Andric VirtRegMap *VRM; 575f757f3fSDimitry Andric const SIRegisterInfo *TRI; 585f757f3fSDimitry Andric const MachineRegisterInfo *MRI; 595f757f3fSDimitry Andric SIMachineFunctionInfo *MFI; 605f757f3fSDimitry Andric }; 615f757f3fSDimitry Andric 625f757f3fSDimitry Andric } // End anonymous namespace. 635f757f3fSDimitry Andric 645f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", 655f757f3fSDimitry Andric false, false) 66*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 675f757f3fSDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 685f757f3fSDimitry Andric INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false, 695f757f3fSDimitry Andric false) 705f757f3fSDimitry Andric 715f757f3fSDimitry Andric char SILowerWWMCopies::ID = 0; 725f757f3fSDimitry Andric 735f757f3fSDimitry Andric char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID; 745f757f3fSDimitry Andric 755f757f3fSDimitry Andric bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { 765f757f3fSDimitry Andric // We can't determine the liveness info if LIS isn't available. Early return 775f757f3fSDimitry Andric // in that case and always assume SCC is live. 785f757f3fSDimitry Andric if (!LIS) 795f757f3fSDimitry Andric return true; 805f757f3fSDimitry Andric 815f757f3fSDimitry Andric LiveRange &LR = 825f757f3fSDimitry Andric LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI)); 835f757f3fSDimitry Andric SlotIndex Idx = LIS->getInstructionIndex(MI); 845f757f3fSDimitry Andric return LR.liveAt(Idx); 855f757f3fSDimitry Andric } 865f757f3fSDimitry Andric 875f757f3fSDimitry Andric // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills 885f757f3fSDimitry Andric // for preserving its entire lanes at function prolog/epilog. 895f757f3fSDimitry Andric void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { 905f757f3fSDimitry Andric if (Reg.isPhysical()) 915f757f3fSDimitry Andric return; 925f757f3fSDimitry Andric 935f757f3fSDimitry Andric Register PhysReg = VRM->getPhys(Reg); 945f757f3fSDimitry Andric assert(PhysReg != VirtRegMap::NO_PHYS_REG && 955f757f3fSDimitry Andric "should have allocated a physical register"); 965f757f3fSDimitry Andric 975f757f3fSDimitry Andric MFI->allocateWWMSpill(MF, PhysReg); 985f757f3fSDimitry Andric } 995f757f3fSDimitry Andric 1005f757f3fSDimitry Andric bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) { 1015f757f3fSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1025f757f3fSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1035f757f3fSDimitry Andric 1045f757f3fSDimitry Andric MFI = MF.getInfo<SIMachineFunctionInfo>(); 105*0fca6ea1SDimitry Andric auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 106*0fca6ea1SDimitry Andric LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 107*0fca6ea1SDimitry Andric auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); 108*0fca6ea1SDimitry Andric Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; 1095f757f3fSDimitry Andric VRM = getAnalysisIfAvailable<VirtRegMap>(); 1105f757f3fSDimitry Andric TRI = ST.getRegisterInfo(); 1115f757f3fSDimitry Andric MRI = &MF.getRegInfo(); 1125f757f3fSDimitry Andric 1135f757f3fSDimitry Andric if (!MFI->hasVRegFlags()) 1145f757f3fSDimitry Andric return false; 1155f757f3fSDimitry Andric 1165f757f3fSDimitry Andric bool Changed = false; 1175f757f3fSDimitry Andric for (MachineBasicBlock &MBB : MF) { 1185f757f3fSDimitry Andric for (MachineInstr &MI : MBB) { 1195f757f3fSDimitry Andric if (MI.getOpcode() != AMDGPU::WWM_COPY) 1205f757f3fSDimitry Andric continue; 1215f757f3fSDimitry Andric 1225f757f3fSDimitry Andric // TODO: Club adjacent WWM ops between same exec save/restore 1235f757f3fSDimitry Andric assert(TII->isVGPRCopy(MI)); 1245f757f3fSDimitry Andric 1255f757f3fSDimitry Andric // For WWM vector copies, manipulate the exec mask around the copy 1265f757f3fSDimitry Andric // instruction. 1275f757f3fSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 1285f757f3fSDimitry Andric MachineBasicBlock::iterator InsertPt = MI.getIterator(); 1295f757f3fSDimitry Andric Register RegForExecCopy = MFI->getSGPRForEXECCopy(); 1305f757f3fSDimitry Andric TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy, 1315f757f3fSDimitry Andric isSCCLiveAtMI(MI), Indexes); 1325f757f3fSDimitry Andric TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes); 1335f757f3fSDimitry Andric addToWWMSpills(MF, MI.getOperand(0).getReg()); 1345f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); 1355f757f3fSDimitry Andric 1365f757f3fSDimitry Andric // Lower WWM_COPY back to COPY 1375f757f3fSDimitry Andric MI.setDesc(TII->get(AMDGPU::COPY)); 1385f757f3fSDimitry Andric Changed |= true; 1395f757f3fSDimitry Andric } 1405f757f3fSDimitry Andric } 1415f757f3fSDimitry Andric 1425f757f3fSDimitry Andric return Changed; 1435f757f3fSDimitry Andric } 144