106c3fb27SDimitry Andric //===-------------- GCNRewritePartialRegUses.cpp --------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric /// \file 906c3fb27SDimitry Andric /// RenameIndependentSubregs pass leaves large partially used super registers, 1006c3fb27SDimitry Andric /// for example: 1106c3fb27SDimitry Andric /// undef %0.sub4:VReg_1024 = ... 1206c3fb27SDimitry Andric /// %0.sub5:VReg_1024 = ... 1306c3fb27SDimitry Andric /// %0.sub6:VReg_1024 = ... 1406c3fb27SDimitry Andric /// %0.sub7:VReg_1024 = ... 1506c3fb27SDimitry Andric /// use %0.sub4_sub5_sub6_sub7 1606c3fb27SDimitry Andric /// use %0.sub6_sub7 1706c3fb27SDimitry Andric /// 1806c3fb27SDimitry Andric /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and 1906c3fb27SDimitry Andric /// rewrites such partially used super registers with registers of minimal size: 2006c3fb27SDimitry Andric /// undef %0.sub0:VReg_128 = ... 2106c3fb27SDimitry Andric /// %0.sub1:VReg_128 = ... 2206c3fb27SDimitry Andric /// %0.sub2:VReg_128 = ... 2306c3fb27SDimitry Andric /// %0.sub3:VReg_128 = ... 2406c3fb27SDimitry Andric /// use %0.sub0_sub1_sub2_sub3 2506c3fb27SDimitry Andric /// use %0.sub2_sub3 2606c3fb27SDimitry Andric /// 2706c3fb27SDimitry Andric /// This allows to avoid subreg lanemasks tracking during register pressure 2806c3fb27SDimitry Andric /// calculation and creates more possibilities for the code unaware of lanemasks 2906c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 3006c3fb27SDimitry Andric 3106c3fb27SDimitry Andric #include "AMDGPU.h" 3206c3fb27SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 3306c3fb27SDimitry Andric #include "SIRegisterInfo.h" 3406c3fb27SDimitry Andric #include "llvm/CodeGen/LiveInterval.h" 3506c3fb27SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 3606c3fb27SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 3706c3fb27SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 3806c3fb27SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 3906c3fb27SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 4006c3fb27SDimitry Andric #include "llvm/InitializePasses.h" 4106c3fb27SDimitry Andric #include "llvm/Pass.h" 4206c3fb27SDimitry Andric 4306c3fb27SDimitry Andric using namespace llvm; 4406c3fb27SDimitry Andric 4506c3fb27SDimitry Andric #define DEBUG_TYPE "rewrite-partial-reg-uses" 4606c3fb27SDimitry Andric 4706c3fb27SDimitry Andric namespace { 4806c3fb27SDimitry Andric 4906c3fb27SDimitry Andric class GCNRewritePartialRegUses : public MachineFunctionPass { 5006c3fb27SDimitry Andric public: 5106c3fb27SDimitry Andric static char ID; 5206c3fb27SDimitry Andric GCNRewritePartialRegUses() : MachineFunctionPass(ID) {} 5306c3fb27SDimitry Andric 5406c3fb27SDimitry Andric StringRef getPassName() const override { 5506c3fb27SDimitry Andric return "Rewrite Partial Register Uses"; 5606c3fb27SDimitry Andric } 5706c3fb27SDimitry Andric 5806c3fb27SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 5906c3fb27SDimitry Andric AU.setPreservesCFG(); 60*0fca6ea1SDimitry Andric AU.addPreserved<LiveIntervalsWrapperPass>(); 61*0fca6ea1SDimitry Andric AU.addPreserved<SlotIndexesWrapperPass>(); 6206c3fb27SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 6306c3fb27SDimitry Andric } 6406c3fb27SDimitry Andric 6506c3fb27SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 6606c3fb27SDimitry Andric 6706c3fb27SDimitry Andric private: 6806c3fb27SDimitry Andric MachineRegisterInfo *MRI; 6906c3fb27SDimitry Andric const SIRegisterInfo *TRI; 7006c3fb27SDimitry Andric const TargetInstrInfo *TII; 7106c3fb27SDimitry Andric LiveIntervals *LIS; 7206c3fb27SDimitry Andric 7306c3fb27SDimitry Andric /// Rewrite partially used register Reg by shifting all its subregisters to 7406c3fb27SDimitry Andric /// the right and replacing the original register with a register of minimal 7506c3fb27SDimitry Andric /// size. Return true if the change has been made. 7606c3fb27SDimitry Andric bool rewriteReg(Register Reg) const; 7706c3fb27SDimitry Andric 7806c3fb27SDimitry Andric /// Value type for SubRegMap below. 7906c3fb27SDimitry Andric struct SubRegInfo { 8006c3fb27SDimitry Andric /// Register class required to hold the value stored in the SubReg. 8106c3fb27SDimitry Andric const TargetRegisterClass *RC; 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric /// Index for the right-shifted subregister. If 0 this is the "covering" 8406c3fb27SDimitry Andric /// subreg i.e. subreg that covers all others. Covering subreg becomes the 8506c3fb27SDimitry Andric /// whole register after the replacement. 8606c3fb27SDimitry Andric unsigned SubReg = AMDGPU::NoSubRegister; 8706c3fb27SDimitry Andric SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {} 8806c3fb27SDimitry Andric }; 8906c3fb27SDimitry Andric 9006c3fb27SDimitry Andric /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container. 91*0fca6ea1SDimitry Andric using SubRegMap = SmallDenseMap<unsigned, SubRegInfo>; 9206c3fb27SDimitry Andric 9306c3fb27SDimitry Andric /// Given register class RC and the set of used subregs as keys in the SubRegs 9406c3fb27SDimitry Andric /// map return new register class and indexes of right-shifted subregs as 9506c3fb27SDimitry Andric /// values in SubRegs map such that the resulting regclass would contain 9606c3fb27SDimitry Andric /// registers of minimal size. 9706c3fb27SDimitry Andric const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC, 9806c3fb27SDimitry Andric SubRegMap &SubRegs) const; 9906c3fb27SDimitry Andric 10006c3fb27SDimitry Andric /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to 10106c3fb27SDimitry Andric /// find new regclass such that: 10206c3fb27SDimitry Andric /// 1. It has subregs obtained by shifting each OldSubReg by RShift number 10306c3fb27SDimitry Andric /// of bits to the right. Every "shifted" subreg should have the same 1045f757f3fSDimitry Andric /// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers" 1055f757f3fSDimitry Andric /// all other subregs in pairs. Basically such subreg becomes a whole 1065f757f3fSDimitry Andric /// register. 10706c3fb27SDimitry Andric /// 2. Resulting register class contains registers of minimal size but not 10806c3fb27SDimitry Andric /// less than RegNumBits. 10906c3fb27SDimitry Andric /// 11006c3fb27SDimitry Andric /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out 11106c3fb27SDimitry Andric /// parameter: 11206c3fb27SDimitry Andric /// OldSubReg - input parameter, 1135f757f3fSDimitry Andric /// SubRegRC - input parameter (cannot be null), 11406c3fb27SDimitry Andric /// NewSubReg - output, contains shifted subregs on return. 11506c3fb27SDimitry Andric const TargetRegisterClass * 11606c3fb27SDimitry Andric getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift, 11706c3fb27SDimitry Andric unsigned RegNumBits, unsigned CoverSubregIdx, 11806c3fb27SDimitry Andric SubRegMap &SubRegs) const; 11906c3fb27SDimitry Andric 12006c3fb27SDimitry Andric /// Update live intervals after rewriting OldReg to NewReg with SubRegs map 12106c3fb27SDimitry Andric /// describing OldSubReg -> NewSubReg mapping. 12206c3fb27SDimitry Andric void updateLiveIntervals(Register OldReg, Register NewReg, 12306c3fb27SDimitry Andric SubRegMap &SubRegs) const; 12406c3fb27SDimitry Andric 12506c3fb27SDimitry Andric /// Helper methods. 12606c3fb27SDimitry Andric 12706c3fb27SDimitry Andric /// Return reg class expected by a MO's parent instruction for a given MO. 12806c3fb27SDimitry Andric const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const; 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric /// Find right-shifted by RShift amount version of the SubReg if it exists, 13106c3fb27SDimitry Andric /// return 0 otherwise. 13206c3fb27SDimitry Andric unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const; 13306c3fb27SDimitry Andric 13406c3fb27SDimitry Andric /// Find subreg index with a given Offset and Size, return 0 if there is no 13506c3fb27SDimitry Andric /// such subregister index. The result is cached in SubRegs data-member. 13606c3fb27SDimitry Andric unsigned getSubReg(unsigned Offset, unsigned Size) const; 13706c3fb27SDimitry Andric 13806c3fb27SDimitry Andric /// Cache for getSubReg method: {Offset, Size} -> SubReg index. 13906c3fb27SDimitry Andric mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs; 14006c3fb27SDimitry Andric 14106c3fb27SDimitry Andric /// Return bit mask that contains all register classes that are projected into 14206c3fb27SDimitry Andric /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member. 14306c3fb27SDimitry Andric const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC, 14406c3fb27SDimitry Andric unsigned SubRegIdx) const; 14506c3fb27SDimitry Andric 14606c3fb27SDimitry Andric /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask. 14706c3fb27SDimitry Andric mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>, 14806c3fb27SDimitry Andric const uint32_t *> 14906c3fb27SDimitry Andric SuperRegMasks; 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric /// Return bitmask containing all allocatable register classes with registers 15206c3fb27SDimitry Andric /// aligned at AlignNumBits. The result is cached in 15306c3fb27SDimitry Andric /// AllocatableAndAlignedRegClassMasks data-member. 15406c3fb27SDimitry Andric const BitVector & 15506c3fb27SDimitry Andric getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const; 15606c3fb27SDimitry Andric 15706c3fb27SDimitry Andric /// Cache for getAllocatableAndAlignedRegClassMask method: 15806c3fb27SDimitry Andric /// AlignNumBits -> Class bitmask. 15906c3fb27SDimitry Andric mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks; 16006c3fb27SDimitry Andric }; 16106c3fb27SDimitry Andric 16206c3fb27SDimitry Andric } // end anonymous namespace 16306c3fb27SDimitry Andric 16406c3fb27SDimitry Andric // TODO: move this to the tablegen and use binary search by Offset. 16506c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset, 16606c3fb27SDimitry Andric unsigned Size) const { 16706c3fb27SDimitry Andric const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0); 16806c3fb27SDimitry Andric if (Inserted) { 16906c3fb27SDimitry Andric for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) { 17006c3fb27SDimitry Andric if (TRI->getSubRegIdxOffset(Idx) == Offset && 17106c3fb27SDimitry Andric TRI->getSubRegIdxSize(Idx) == Size) { 17206c3fb27SDimitry Andric I->second = Idx; 17306c3fb27SDimitry Andric break; 17406c3fb27SDimitry Andric } 17506c3fb27SDimitry Andric } 17606c3fb27SDimitry Andric } 17706c3fb27SDimitry Andric return I->second; 17806c3fb27SDimitry Andric } 17906c3fb27SDimitry Andric 18006c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg, 18106c3fb27SDimitry Andric unsigned RShift) const { 18206c3fb27SDimitry Andric unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift; 18306c3fb27SDimitry Andric return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg)); 18406c3fb27SDimitry Andric } 18506c3fb27SDimitry Andric 18606c3fb27SDimitry Andric const uint32_t * 18706c3fb27SDimitry Andric GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC, 18806c3fb27SDimitry Andric unsigned SubRegIdx) const { 18906c3fb27SDimitry Andric const auto [I, Inserted] = 19006c3fb27SDimitry Andric SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr); 19106c3fb27SDimitry Andric if (Inserted) { 19206c3fb27SDimitry Andric for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) { 19306c3fb27SDimitry Andric if (RCI.getSubReg() == SubRegIdx) { 19406c3fb27SDimitry Andric I->second = RCI.getMask(); 19506c3fb27SDimitry Andric break; 19606c3fb27SDimitry Andric } 19706c3fb27SDimitry Andric } 19806c3fb27SDimitry Andric } 19906c3fb27SDimitry Andric return I->second; 20006c3fb27SDimitry Andric } 20106c3fb27SDimitry Andric 20206c3fb27SDimitry Andric const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask( 20306c3fb27SDimitry Andric unsigned AlignNumBits) const { 20406c3fb27SDimitry Andric const auto [I, Inserted] = 20506c3fb27SDimitry Andric AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits); 20606c3fb27SDimitry Andric if (Inserted) { 20706c3fb27SDimitry Andric BitVector &BV = I->second; 20806c3fb27SDimitry Andric BV.resize(TRI->getNumRegClasses()); 20906c3fb27SDimitry Andric for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) { 21006c3fb27SDimitry Andric auto *RC = TRI->getRegClass(ClassID); 21106c3fb27SDimitry Andric if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits)) 21206c3fb27SDimitry Andric BV.set(ClassID); 21306c3fb27SDimitry Andric } 21406c3fb27SDimitry Andric } 21506c3fb27SDimitry Andric return I->second; 21606c3fb27SDimitry Andric } 21706c3fb27SDimitry Andric 21806c3fb27SDimitry Andric const TargetRegisterClass * 21906c3fb27SDimitry Andric GCNRewritePartialRegUses::getRegClassWithShiftedSubregs( 22006c3fb27SDimitry Andric const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits, 22106c3fb27SDimitry Andric unsigned CoverSubregIdx, SubRegMap &SubRegs) const { 22206c3fb27SDimitry Andric 22306c3fb27SDimitry Andric unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC); 22406c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign 22506c3fb27SDimitry Andric << '\n'); 22606c3fb27SDimitry Andric 22706c3fb27SDimitry Andric BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign)); 22806c3fb27SDimitry Andric for (auto &[OldSubReg, SRI] : SubRegs) { 22906c3fb27SDimitry Andric auto &[SubRegRC, NewSubReg] = SRI; 2305f757f3fSDimitry Andric assert(SubRegRC); 23106c3fb27SDimitry Andric 23206c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':' 23306c3fb27SDimitry Andric << TRI->getRegClassName(SubRegRC) 23406c3fb27SDimitry Andric << (SubRegRC->isAllocatable() ? "" : " not alloc") 23506c3fb27SDimitry Andric << " -> "); 23606c3fb27SDimitry Andric 23706c3fb27SDimitry Andric if (OldSubReg == CoverSubregIdx) { 2385f757f3fSDimitry Andric // Covering subreg will become a full register, RC should be allocatable. 2395f757f3fSDimitry Andric assert(SubRegRC->isAllocatable()); 24006c3fb27SDimitry Andric NewSubReg = AMDGPU::NoSubRegister; 24106c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "whole reg"); 24206c3fb27SDimitry Andric } else { 24306c3fb27SDimitry Andric NewSubReg = shiftSubReg(OldSubReg, RShift); 24406c3fb27SDimitry Andric if (!NewSubReg) { 24506c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "none\n"); 24606c3fb27SDimitry Andric return nullptr; 24706c3fb27SDimitry Andric } 24806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg)); 24906c3fb27SDimitry Andric } 25006c3fb27SDimitry Andric 25106c3fb27SDimitry Andric const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg) 25206c3fb27SDimitry Andric : SubRegRC->getSubClassMask(); 25306c3fb27SDimitry Andric if (!Mask) 25406c3fb27SDimitry Andric llvm_unreachable("no register class mask?"); 25506c3fb27SDimitry Andric 25606c3fb27SDimitry Andric ClassMask.clearBitsNotInMask(Mask); 25706c3fb27SDimitry Andric // Don't try to early exit because checking if ClassMask has set bits isn't 25806c3fb27SDimitry Andric // that cheap and we expect it to pass in most cases. 25906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n'); 26006c3fb27SDimitry Andric } 26106c3fb27SDimitry Andric 26206c3fb27SDimitry Andric // ClassMask is the set of all register classes such that each class is 26306c3fb27SDimitry Andric // allocatable, aligned, has all shifted subregs and each subreg has required 26406c3fb27SDimitry Andric // register class (see SubRegRC above). Now select first (that is largest) 26506c3fb27SDimitry Andric // register class with registers of minimal but not less than RegNumBits size. 26606c3fb27SDimitry Andric // We have to check register size because we may encounter classes of smaller 26706c3fb27SDimitry Andric // registers like VReg_1 in some situations. 26806c3fb27SDimitry Andric const TargetRegisterClass *MinRC = nullptr; 26906c3fb27SDimitry Andric unsigned MinNumBits = std::numeric_limits<unsigned>::max(); 27006c3fb27SDimitry Andric for (unsigned ClassID : ClassMask.set_bits()) { 27106c3fb27SDimitry Andric auto *RC = TRI->getRegClass(ClassID); 27206c3fb27SDimitry Andric unsigned NumBits = TRI->getRegSizeInBits(*RC); 27306c3fb27SDimitry Andric if (NumBits < MinNumBits && NumBits >= RegNumBits) { 27406c3fb27SDimitry Andric MinNumBits = NumBits; 27506c3fb27SDimitry Andric MinRC = RC; 27606c3fb27SDimitry Andric } 27706c3fb27SDimitry Andric if (MinNumBits == RegNumBits) 27806c3fb27SDimitry Andric break; 27906c3fb27SDimitry Andric } 28006c3fb27SDimitry Andric #ifndef NDEBUG 28106c3fb27SDimitry Andric if (MinRC) { 28206c3fb27SDimitry Andric assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign)); 28306c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) 28406c3fb27SDimitry Andric // Check that all registers in MinRC support SRI.SubReg subregister. 28506c3fb27SDimitry Andric assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg)); 28606c3fb27SDimitry Andric } 28706c3fb27SDimitry Andric #endif 28806c3fb27SDimitry Andric // There might be zero RShift - in this case we just trying to find smaller 28906c3fb27SDimitry Andric // register. 29006c3fb27SDimitry Andric return (MinRC != RC || RShift != 0) ? MinRC : nullptr; 29106c3fb27SDimitry Andric } 29206c3fb27SDimitry Andric 29306c3fb27SDimitry Andric const TargetRegisterClass * 29406c3fb27SDimitry Andric GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC, 29506c3fb27SDimitry Andric SubRegMap &SubRegs) const { 29606c3fb27SDimitry Andric unsigned CoverSubreg = AMDGPU::NoSubRegister; 29706c3fb27SDimitry Andric unsigned Offset = std::numeric_limits<unsigned>::max(); 29806c3fb27SDimitry Andric unsigned End = 0; 29906c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) { 30006c3fb27SDimitry Andric unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg); 30106c3fb27SDimitry Andric unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg); 30206c3fb27SDimitry Andric if (SubRegOffset < Offset) { 30306c3fb27SDimitry Andric Offset = SubRegOffset; 30406c3fb27SDimitry Andric CoverSubreg = AMDGPU::NoSubRegister; 30506c3fb27SDimitry Andric } 30606c3fb27SDimitry Andric if (SubRegEnd > End) { 30706c3fb27SDimitry Andric End = SubRegEnd; 30806c3fb27SDimitry Andric CoverSubreg = AMDGPU::NoSubRegister; 30906c3fb27SDimitry Andric } 31006c3fb27SDimitry Andric if (SubRegOffset == Offset && SubRegEnd == End) 31106c3fb27SDimitry Andric CoverSubreg = SubReg; 31206c3fb27SDimitry Andric } 31306c3fb27SDimitry Andric // If covering subreg is found shift everything so the covering subreg would 31406c3fb27SDimitry Andric // be in the rightmost position. 31506c3fb27SDimitry Andric if (CoverSubreg != AMDGPU::NoSubRegister) 31606c3fb27SDimitry Andric return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg, 31706c3fb27SDimitry Andric SubRegs); 31806c3fb27SDimitry Andric 31906c3fb27SDimitry Andric // Otherwise find subreg with maximum required alignment and shift it and all 32006c3fb27SDimitry Andric // other subregs to the rightmost possible position with respect to the 32106c3fb27SDimitry Andric // alignment. 32206c3fb27SDimitry Andric unsigned MaxAlign = 0; 32306c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) 32406c3fb27SDimitry Andric MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg)); 32506c3fb27SDimitry Andric 32606c3fb27SDimitry Andric unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max(); 32706c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) { 32806c3fb27SDimitry Andric if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign) 32906c3fb27SDimitry Andric continue; 33006c3fb27SDimitry Andric FirstMaxAlignedSubRegOffset = 33106c3fb27SDimitry Andric std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg)); 33206c3fb27SDimitry Andric if (FirstMaxAlignedSubRegOffset == Offset) 33306c3fb27SDimitry Andric break; 33406c3fb27SDimitry Andric } 33506c3fb27SDimitry Andric 33606c3fb27SDimitry Andric unsigned NewOffsetOfMaxAlignedSubReg = 33706c3fb27SDimitry Andric alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign); 33806c3fb27SDimitry Andric 33906c3fb27SDimitry Andric if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset) 34006c3fb27SDimitry Andric llvm_unreachable("misaligned subreg"); 34106c3fb27SDimitry Andric 34206c3fb27SDimitry Andric unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg; 34306c3fb27SDimitry Andric return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs); 34406c3fb27SDimitry Andric } 34506c3fb27SDimitry Andric 34606c3fb27SDimitry Andric // Only the subrange's lanemasks of the original interval need to be modified. 34706c3fb27SDimitry Andric // Subrange for a covering subreg becomes the main range. 34806c3fb27SDimitry Andric void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg, 34906c3fb27SDimitry Andric Register NewReg, 35006c3fb27SDimitry Andric SubRegMap &SubRegs) const { 35106c3fb27SDimitry Andric if (!LIS->hasInterval(OldReg)) 35206c3fb27SDimitry Andric return; 35306c3fb27SDimitry Andric 35406c3fb27SDimitry Andric auto &OldLI = LIS->getInterval(OldReg); 35506c3fb27SDimitry Andric auto &NewLI = LIS->createEmptyInterval(NewReg); 35606c3fb27SDimitry Andric 35706c3fb27SDimitry Andric auto &Allocator = LIS->getVNInfoAllocator(); 35806c3fb27SDimitry Andric NewLI.setWeight(OldLI.weight()); 35906c3fb27SDimitry Andric 36006c3fb27SDimitry Andric for (auto &SR : OldLI.subranges()) { 36106c3fb27SDimitry Andric auto I = find_if(SubRegs, [&](auto &P) { 36206c3fb27SDimitry Andric return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first); 36306c3fb27SDimitry Andric }); 36406c3fb27SDimitry Andric 36506c3fb27SDimitry Andric if (I == SubRegs.end()) { 36606c3fb27SDimitry Andric // There might be a situation when subranges don't exactly match used 36706c3fb27SDimitry Andric // subregs, for example: 36806c3fb27SDimitry Andric // %120 [160r,1392r:0) 0@160r 36906c3fb27SDimitry Andric // L000000000000C000 [160r,1392r:0) 0@160r 37006c3fb27SDimitry Andric // L0000000000003000 [160r,1392r:0) 0@160r 37106c3fb27SDimitry Andric // L0000000000000C00 [160r,1392r:0) 0@160r 37206c3fb27SDimitry Andric // L0000000000000300 [160r,1392r:0) 0@160r 37306c3fb27SDimitry Andric // L0000000000000003 [160r,1104r:0) 0@160r 37406c3fb27SDimitry Andric // L000000000000000C [160r,1104r:0) 0@160r 37506c3fb27SDimitry Andric // L0000000000000030 [160r,1104r:0) 0@160r 37606c3fb27SDimitry Andric // L00000000000000C0 [160r,1104r:0) 0@160r 37706c3fb27SDimitry Andric // but used subregs are: 37806c3fb27SDimitry Andric // sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF 37906c3fb27SDimitry Andric // sub0_sub1_sub2_sub3, L00000000000000FF 38006c3fb27SDimitry Andric // sub4_sub5_sub6_sub7, L000000000000FF00 38106c3fb27SDimitry Andric // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7 38206c3fb27SDimitry Andric // have several subranges with the same lifetime. For such cases just 38306c3fb27SDimitry Andric // recreate the interval. 38406c3fb27SDimitry Andric LIS->removeInterval(OldReg); 38506c3fb27SDimitry Andric LIS->removeInterval(NewReg); 38606c3fb27SDimitry Andric LIS->createAndComputeVirtRegInterval(NewReg); 38706c3fb27SDimitry Andric return; 38806c3fb27SDimitry Andric } 38906c3fb27SDimitry Andric 39006c3fb27SDimitry Andric if (unsigned NewSubReg = I->second.SubReg) 39106c3fb27SDimitry Andric NewLI.createSubRangeFrom(Allocator, 39206c3fb27SDimitry Andric TRI->getSubRegIndexLaneMask(NewSubReg), SR); 39306c3fb27SDimitry Andric else // This is the covering subreg (0 index) - set it as main range. 39406c3fb27SDimitry Andric NewLI.assign(SR, Allocator); 39506c3fb27SDimitry Andric 39606c3fb27SDimitry Andric SubRegs.erase(I); 39706c3fb27SDimitry Andric } 39806c3fb27SDimitry Andric if (NewLI.empty()) 39906c3fb27SDimitry Andric NewLI.assign(OldLI, Allocator); 40006c3fb27SDimitry Andric NewLI.verify(MRI); 40106c3fb27SDimitry Andric LIS->removeInterval(OldReg); 40206c3fb27SDimitry Andric } 40306c3fb27SDimitry Andric 40406c3fb27SDimitry Andric const TargetRegisterClass * 40506c3fb27SDimitry Andric GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const { 40606c3fb27SDimitry Andric MachineInstr *MI = MO.getParent(); 40706c3fb27SDimitry Andric return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI, 40806c3fb27SDimitry Andric *MI->getParent()->getParent()); 40906c3fb27SDimitry Andric } 41006c3fb27SDimitry Andric 41106c3fb27SDimitry Andric bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const { 41206c3fb27SDimitry Andric auto Range = MRI->reg_nodbg_operands(Reg); 4135f757f3fSDimitry Andric if (Range.empty() || any_of(Range, [](MachineOperand &MO) { 4145f757f3fSDimitry Andric return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1] 4155f757f3fSDimitry Andric })) 41606c3fb27SDimitry Andric return false; 41706c3fb27SDimitry Andric 41806c3fb27SDimitry Andric auto *RC = MRI->getRegClass(Reg); 41906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI) 42006c3fb27SDimitry Andric << ':' << TRI->getRegClassName(RC) << '\n'); 42106c3fb27SDimitry Andric 4225f757f3fSDimitry Andric // Collect used subregs and their reg classes infered from instruction 42306c3fb27SDimitry Andric // operands. 42406c3fb27SDimitry Andric SubRegMap SubRegs; 4255f757f3fSDimitry Andric for (MachineOperand &MO : Range) { 4265f757f3fSDimitry Andric const unsigned SubReg = MO.getSubReg(); 4275f757f3fSDimitry Andric assert(SubReg != AMDGPU::NoSubRegister); // Due to [1]. 4285f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':'); 4295f757f3fSDimitry Andric 4305f757f3fSDimitry Andric const auto [I, Inserted] = SubRegs.try_emplace(SubReg); 4315f757f3fSDimitry Andric const TargetRegisterClass *&SubRegRC = I->second.RC; 4325f757f3fSDimitry Andric 4335f757f3fSDimitry Andric if (Inserted) 4345f757f3fSDimitry Andric SubRegRC = TRI->getSubRegisterClass(RC, SubReg); 4355f757f3fSDimitry Andric 4365f757f3fSDimitry Andric if (SubRegRC) { 4375f757f3fSDimitry Andric if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) { 4385f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & " 4395f757f3fSDimitry Andric << TRI->getRegClassName(OpDescRC) << " = "); 4405f757f3fSDimitry Andric SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC); 4415f757f3fSDimitry Andric } 4425f757f3fSDimitry Andric } 4435f757f3fSDimitry Andric 4445f757f3fSDimitry Andric if (!SubRegRC) { 4455f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "couldn't find target regclass\n"); 44606c3fb27SDimitry Andric return false; 44706c3fb27SDimitry Andric } 4485f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n'); 44906c3fb27SDimitry Andric } 45006c3fb27SDimitry Andric 45106c3fb27SDimitry Andric auto *NewRC = getMinSizeReg(RC, SubRegs); 45206c3fb27SDimitry Andric if (!NewRC) { 45306c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " No improvement achieved\n"); 45406c3fb27SDimitry Andric return false; 45506c3fb27SDimitry Andric } 45606c3fb27SDimitry Andric 45706c3fb27SDimitry Andric Register NewReg = MRI->createVirtualRegister(NewRC); 45806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Success " << printReg(Reg, TRI) << ':' 45906c3fb27SDimitry Andric << TRI->getRegClassName(RC) << " -> " 46006c3fb27SDimitry Andric << printReg(NewReg, TRI) << ':' 46106c3fb27SDimitry Andric << TRI->getRegClassName(NewRC) << '\n'); 46206c3fb27SDimitry Andric 46306c3fb27SDimitry Andric for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) { 46406c3fb27SDimitry Andric MO.setReg(NewReg); 46506c3fb27SDimitry Andric // Debug info can refer to the whole reg, just leave it as it is for now. 46606c3fb27SDimitry Andric // TODO: create some DI shift expression? 46706c3fb27SDimitry Andric if (MO.isDebug() && MO.getSubReg() == 0) 46806c3fb27SDimitry Andric continue; 46906c3fb27SDimitry Andric unsigned SubReg = SubRegs[MO.getSubReg()].SubReg; 47006c3fb27SDimitry Andric MO.setSubReg(SubReg); 47106c3fb27SDimitry Andric if (SubReg == AMDGPU::NoSubRegister && MO.isDef()) 47206c3fb27SDimitry Andric MO.setIsUndef(false); 47306c3fb27SDimitry Andric } 47406c3fb27SDimitry Andric 47506c3fb27SDimitry Andric if (LIS) 47606c3fb27SDimitry Andric updateLiveIntervals(Reg, NewReg, SubRegs); 47706c3fb27SDimitry Andric 47806c3fb27SDimitry Andric return true; 47906c3fb27SDimitry Andric } 48006c3fb27SDimitry Andric 48106c3fb27SDimitry Andric bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) { 48206c3fb27SDimitry Andric MRI = &MF.getRegInfo(); 48306c3fb27SDimitry Andric TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo()); 48406c3fb27SDimitry Andric TII = MF.getSubtarget().getInstrInfo(); 485*0fca6ea1SDimitry Andric auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 486*0fca6ea1SDimitry Andric LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 48706c3fb27SDimitry Andric bool Changed = false; 48806c3fb27SDimitry Andric for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { 48906c3fb27SDimitry Andric Changed |= rewriteReg(Register::index2VirtReg(I)); 49006c3fb27SDimitry Andric } 49106c3fb27SDimitry Andric return Changed; 49206c3fb27SDimitry Andric } 49306c3fb27SDimitry Andric 49406c3fb27SDimitry Andric char GCNRewritePartialRegUses::ID; 49506c3fb27SDimitry Andric 49606c3fb27SDimitry Andric char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID; 49706c3fb27SDimitry Andric 49806c3fb27SDimitry Andric INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE, 49906c3fb27SDimitry Andric "Rewrite Partial Register Uses", false, false) 50006c3fb27SDimitry Andric INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE, 50106c3fb27SDimitry Andric "Rewrite Partial Register Uses", false, false) 502