18d0412ceSValery Pykhtin //===-------------- GCNRewritePartialRegUses.cpp --------------------------===// 28d0412ceSValery Pykhtin // 38d0412ceSValery Pykhtin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48d0412ceSValery Pykhtin // See https://llvm.org/LICENSE.txt for license information. 58d0412ceSValery Pykhtin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68d0412ceSValery Pykhtin // 78d0412ceSValery Pykhtin //===----------------------------------------------------------------------===// 88d0412ceSValery Pykhtin /// \file 98d0412ceSValery Pykhtin /// RenameIndependentSubregs pass leaves large partially used super registers, 108d0412ceSValery Pykhtin /// for example: 118d0412ceSValery Pykhtin /// undef %0.sub4:VReg_1024 = ... 128d0412ceSValery Pykhtin /// %0.sub5:VReg_1024 = ... 138d0412ceSValery Pykhtin /// %0.sub6:VReg_1024 = ... 148d0412ceSValery Pykhtin /// %0.sub7:VReg_1024 = ... 158d0412ceSValery Pykhtin /// use %0.sub4_sub5_sub6_sub7 168d0412ceSValery Pykhtin /// use %0.sub6_sub7 178d0412ceSValery Pykhtin /// 188d0412ceSValery Pykhtin /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and 198d0412ceSValery Pykhtin /// rewrites such partially used super registers with registers of minimal size: 208d0412ceSValery Pykhtin /// undef %0.sub0:VReg_128 = ... 218d0412ceSValery Pykhtin /// %0.sub1:VReg_128 = ... 228d0412ceSValery Pykhtin /// %0.sub2:VReg_128 = ... 238d0412ceSValery Pykhtin /// %0.sub3:VReg_128 = ... 248d0412ceSValery Pykhtin /// use %0.sub0_sub1_sub2_sub3 258d0412ceSValery Pykhtin /// use %0.sub2_sub3 268d0412ceSValery Pykhtin /// 278d0412ceSValery Pykhtin /// This allows to avoid subreg lanemasks tracking during register pressure 288d0412ceSValery Pykhtin /// calculation and creates more possibilities for the code unaware of lanemasks 298d0412ceSValery Pykhtin //===----------------------------------------------------------------------===// 308d0412ceSValery Pykhtin 318d0412ceSValery Pykhtin #include "AMDGPU.h" 328d0412ceSValery Pykhtin #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 338d0412ceSValery Pykhtin #include "SIRegisterInfo.h" 348d0412ceSValery Pykhtin #include "llvm/CodeGen/LiveInterval.h" 358d0412ceSValery Pykhtin #include "llvm/CodeGen/LiveIntervals.h" 368d0412ceSValery Pykhtin #include "llvm/CodeGen/MachineFunctionPass.h" 378d0412ceSValery Pykhtin #include "llvm/CodeGen/MachineRegisterInfo.h" 388d0412ceSValery Pykhtin #include "llvm/CodeGen/TargetInstrInfo.h" 398d0412ceSValery Pykhtin #include "llvm/Pass.h" 408d0412ceSValery Pykhtin 418d0412ceSValery Pykhtin using namespace llvm; 428d0412ceSValery Pykhtin 438d0412ceSValery Pykhtin #define DEBUG_TYPE "rewrite-partial-reg-uses" 448d0412ceSValery Pykhtin 458d0412ceSValery Pykhtin namespace { 468d0412ceSValery Pykhtin 478d0412ceSValery Pykhtin class GCNRewritePartialRegUses : public MachineFunctionPass { 488d0412ceSValery Pykhtin public: 498d0412ceSValery Pykhtin static char ID; 508d0412ceSValery Pykhtin GCNRewritePartialRegUses() : MachineFunctionPass(ID) {} 518d0412ceSValery Pykhtin 528d0412ceSValery Pykhtin StringRef getPassName() const override { 538d0412ceSValery Pykhtin return "Rewrite Partial Register Uses"; 548d0412ceSValery Pykhtin } 558d0412ceSValery Pykhtin 568d0412ceSValery Pykhtin void getAnalysisUsage(AnalysisUsage &AU) const override { 578d0412ceSValery Pykhtin AU.setPreservesCFG(); 58abde52aaSpaperchalice AU.addPreserved<LiveIntervalsWrapperPass>(); 594010f894Spaperchalice AU.addPreserved<SlotIndexesWrapperPass>(); 608d0412ceSValery Pykhtin MachineFunctionPass::getAnalysisUsage(AU); 618d0412ceSValery Pykhtin } 628d0412ceSValery Pykhtin 638d0412ceSValery Pykhtin bool runOnMachineFunction(MachineFunction &MF) override; 648d0412ceSValery Pykhtin 658d0412ceSValery Pykhtin private: 668d0412ceSValery Pykhtin MachineRegisterInfo *MRI; 678d0412ceSValery Pykhtin const SIRegisterInfo *TRI; 688d0412ceSValery Pykhtin const TargetInstrInfo *TII; 698d0412ceSValery Pykhtin LiveIntervals *LIS; 708d0412ceSValery Pykhtin 718d0412ceSValery Pykhtin /// Rewrite partially used register Reg by shifting all its subregisters to 728d0412ceSValery Pykhtin /// the right and replacing the original register with a register of minimal 738d0412ceSValery Pykhtin /// size. Return true if the change has been made. 748d0412ceSValery Pykhtin bool rewriteReg(Register Reg) const; 758d0412ceSValery Pykhtin 768d0412ceSValery Pykhtin /// Value type for SubRegMap below. 778d0412ceSValery Pykhtin struct SubRegInfo { 788d0412ceSValery Pykhtin /// Register class required to hold the value stored in the SubReg. 798d0412ceSValery Pykhtin const TargetRegisterClass *RC; 808d0412ceSValery Pykhtin 818d0412ceSValery Pykhtin /// Index for the right-shifted subregister. If 0 this is the "covering" 828d0412ceSValery Pykhtin /// subreg i.e. subreg that covers all others. Covering subreg becomes the 838d0412ceSValery Pykhtin /// whole register after the replacement. 848d0412ceSValery Pykhtin unsigned SubReg = AMDGPU::NoSubRegister; 858d0412ceSValery Pykhtin SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {} 868d0412ceSValery Pykhtin }; 878d0412ceSValery Pykhtin 888d0412ceSValery Pykhtin /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container. 89aeafdc21SJay Foad using SubRegMap = SmallDenseMap<unsigned, SubRegInfo>; 908d0412ceSValery Pykhtin 918d0412ceSValery Pykhtin /// Given register class RC and the set of used subregs as keys in the SubRegs 928d0412ceSValery Pykhtin /// map return new register class and indexes of right-shifted subregs as 938d0412ceSValery Pykhtin /// values in SubRegs map such that the resulting regclass would contain 948d0412ceSValery Pykhtin /// registers of minimal size. 958d0412ceSValery Pykhtin const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC, 968d0412ceSValery Pykhtin SubRegMap &SubRegs) const; 978d0412ceSValery Pykhtin 9898aa8439SValery Pykhtin /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to 9998aa8439SValery Pykhtin /// find new regclass such that: 10098aa8439SValery Pykhtin /// 1. It has subregs obtained by shifting each OldSubReg by RShift number 10198aa8439SValery Pykhtin /// of bits to the right. Every "shifted" subreg should have the same 102667ba7f8SValery Pykhtin /// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers" 103667ba7f8SValery Pykhtin /// all other subregs in pairs. Basically such subreg becomes a whole 104667ba7f8SValery Pykhtin /// register. 10598aa8439SValery Pykhtin /// 2. Resulting register class contains registers of minimal size but not 10698aa8439SValery Pykhtin /// less than RegNumBits. 10798aa8439SValery Pykhtin /// 10898aa8439SValery Pykhtin /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out 10998aa8439SValery Pykhtin /// parameter: 11098aa8439SValery Pykhtin /// OldSubReg - input parameter, 111667ba7f8SValery Pykhtin /// SubRegRC - input parameter (cannot be null), 11298aa8439SValery Pykhtin /// NewSubReg - output, contains shifted subregs on return. 1138d0412ceSValery Pykhtin const TargetRegisterClass * 1148d0412ceSValery Pykhtin getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift, 11598aa8439SValery Pykhtin unsigned RegNumBits, unsigned CoverSubregIdx, 1168d0412ceSValery Pykhtin SubRegMap &SubRegs) const; 1178d0412ceSValery Pykhtin 1188d0412ceSValery Pykhtin /// Update live intervals after rewriting OldReg to NewReg with SubRegs map 1198d0412ceSValery Pykhtin /// describing OldSubReg -> NewSubReg mapping. 1208d0412ceSValery Pykhtin void updateLiveIntervals(Register OldReg, Register NewReg, 1218d0412ceSValery Pykhtin SubRegMap &SubRegs) const; 1228d0412ceSValery Pykhtin 1238d0412ceSValery Pykhtin /// Helper methods. 1248d0412ceSValery Pykhtin 1258d0412ceSValery Pykhtin /// Return reg class expected by a MO's parent instruction for a given MO. 1268d0412ceSValery Pykhtin const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const; 1278d0412ceSValery Pykhtin 1288d0412ceSValery Pykhtin /// Find right-shifted by RShift amount version of the SubReg if it exists, 1298d0412ceSValery Pykhtin /// return 0 otherwise. 1308d0412ceSValery Pykhtin unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const; 1318d0412ceSValery Pykhtin 1328d0412ceSValery Pykhtin /// Find subreg index with a given Offset and Size, return 0 if there is no 1338d0412ceSValery Pykhtin /// such subregister index. The result is cached in SubRegs data-member. 1348d0412ceSValery Pykhtin unsigned getSubReg(unsigned Offset, unsigned Size) const; 1358d0412ceSValery Pykhtin 1368d0412ceSValery Pykhtin /// Cache for getSubReg method: {Offset, Size} -> SubReg index. 1378d0412ceSValery Pykhtin mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs; 1388d0412ceSValery Pykhtin 1398d0412ceSValery Pykhtin /// Return bit mask that contains all register classes that are projected into 1408d0412ceSValery Pykhtin /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member. 1418d0412ceSValery Pykhtin const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC, 1428d0412ceSValery Pykhtin unsigned SubRegIdx) const; 1438d0412ceSValery Pykhtin 1448d0412ceSValery Pykhtin /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask. 1458d0412ceSValery Pykhtin mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>, 1468d0412ceSValery Pykhtin const uint32_t *> 1478d0412ceSValery Pykhtin SuperRegMasks; 1488d0412ceSValery Pykhtin 1498d0412ceSValery Pykhtin /// Return bitmask containing all allocatable register classes with registers 1508d0412ceSValery Pykhtin /// aligned at AlignNumBits. The result is cached in 1518d0412ceSValery Pykhtin /// AllocatableAndAlignedRegClassMasks data-member. 1528d0412ceSValery Pykhtin const BitVector & 1538d0412ceSValery Pykhtin getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const; 1548d0412ceSValery Pykhtin 1558d0412ceSValery Pykhtin /// Cache for getAllocatableAndAlignedRegClassMask method: 1568d0412ceSValery Pykhtin /// AlignNumBits -> Class bitmask. 1578d0412ceSValery Pykhtin mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks; 1588d0412ceSValery Pykhtin }; 1598d0412ceSValery Pykhtin 1608d0412ceSValery Pykhtin } // end anonymous namespace 1618d0412ceSValery Pykhtin 1628d0412ceSValery Pykhtin // TODO: move this to the tablegen and use binary search by Offset. 1638d0412ceSValery Pykhtin unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset, 1648d0412ceSValery Pykhtin unsigned Size) const { 1658d0412ceSValery Pykhtin const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0); 1668d0412ceSValery Pykhtin if (Inserted) { 1678d0412ceSValery Pykhtin for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) { 1688d0412ceSValery Pykhtin if (TRI->getSubRegIdxOffset(Idx) == Offset && 1698d0412ceSValery Pykhtin TRI->getSubRegIdxSize(Idx) == Size) { 1708d0412ceSValery Pykhtin I->second = Idx; 1718d0412ceSValery Pykhtin break; 1728d0412ceSValery Pykhtin } 1738d0412ceSValery Pykhtin } 1748d0412ceSValery Pykhtin } 1758d0412ceSValery Pykhtin return I->second; 1768d0412ceSValery Pykhtin } 1778d0412ceSValery Pykhtin 1788d0412ceSValery Pykhtin unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg, 1798d0412ceSValery Pykhtin unsigned RShift) const { 1808d0412ceSValery Pykhtin unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift; 1818d0412ceSValery Pykhtin return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg)); 1828d0412ceSValery Pykhtin } 1838d0412ceSValery Pykhtin 1848d0412ceSValery Pykhtin const uint32_t * 1858d0412ceSValery Pykhtin GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC, 1868d0412ceSValery Pykhtin unsigned SubRegIdx) const { 1878d0412ceSValery Pykhtin const auto [I, Inserted] = 1888d0412ceSValery Pykhtin SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr); 1898d0412ceSValery Pykhtin if (Inserted) { 1908d0412ceSValery Pykhtin for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) { 1918d0412ceSValery Pykhtin if (RCI.getSubReg() == SubRegIdx) { 1928d0412ceSValery Pykhtin I->second = RCI.getMask(); 1938d0412ceSValery Pykhtin break; 1948d0412ceSValery Pykhtin } 1958d0412ceSValery Pykhtin } 1968d0412ceSValery Pykhtin } 1978d0412ceSValery Pykhtin return I->second; 1988d0412ceSValery Pykhtin } 1998d0412ceSValery Pykhtin 2008d0412ceSValery Pykhtin const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask( 2018d0412ceSValery Pykhtin unsigned AlignNumBits) const { 2028d0412ceSValery Pykhtin const auto [I, Inserted] = 2038d0412ceSValery Pykhtin AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits); 2048d0412ceSValery Pykhtin if (Inserted) { 2058d0412ceSValery Pykhtin BitVector &BV = I->second; 2068d0412ceSValery Pykhtin BV.resize(TRI->getNumRegClasses()); 2078d0412ceSValery Pykhtin for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) { 2088d0412ceSValery Pykhtin auto *RC = TRI->getRegClass(ClassID); 2098d0412ceSValery Pykhtin if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits)) 2108d0412ceSValery Pykhtin BV.set(ClassID); 2118d0412ceSValery Pykhtin } 2128d0412ceSValery Pykhtin } 2138d0412ceSValery Pykhtin return I->second; 2148d0412ceSValery Pykhtin } 2158d0412ceSValery Pykhtin 2168d0412ceSValery Pykhtin const TargetRegisterClass * 2178d0412ceSValery Pykhtin GCNRewritePartialRegUses::getRegClassWithShiftedSubregs( 21898aa8439SValery Pykhtin const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits, 21998aa8439SValery Pykhtin unsigned CoverSubregIdx, SubRegMap &SubRegs) const { 2208d0412ceSValery Pykhtin 2218d0412ceSValery Pykhtin unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC); 2228d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign 2238d0412ceSValery Pykhtin << '\n'); 2248d0412ceSValery Pykhtin 2258d0412ceSValery Pykhtin BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign)); 2268d0412ceSValery Pykhtin for (auto &[OldSubReg, SRI] : SubRegs) { 2278d0412ceSValery Pykhtin auto &[SubRegRC, NewSubReg] = SRI; 228667ba7f8SValery Pykhtin assert(SubRegRC); 2298d0412ceSValery Pykhtin 2308d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':' 2318d0412ceSValery Pykhtin << TRI->getRegClassName(SubRegRC) 2328d0412ceSValery Pykhtin << (SubRegRC->isAllocatable() ? "" : " not alloc") 2338d0412ceSValery Pykhtin << " -> "); 2348d0412ceSValery Pykhtin 2358d0412ceSValery Pykhtin if (OldSubReg == CoverSubregIdx) { 236667ba7f8SValery Pykhtin // Covering subreg will become a full register, RC should be allocatable. 237667ba7f8SValery Pykhtin assert(SubRegRC->isAllocatable()); 2388d0412ceSValery Pykhtin NewSubReg = AMDGPU::NoSubRegister; 2398d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << "whole reg"); 2408d0412ceSValery Pykhtin } else { 2418d0412ceSValery Pykhtin NewSubReg = shiftSubReg(OldSubReg, RShift); 2428d0412ceSValery Pykhtin if (!NewSubReg) { 2438d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << "none\n"); 2448d0412ceSValery Pykhtin return nullptr; 2458d0412ceSValery Pykhtin } 2468d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg)); 2478d0412ceSValery Pykhtin } 2488d0412ceSValery Pykhtin 2498d0412ceSValery Pykhtin const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg) 2508d0412ceSValery Pykhtin : SubRegRC->getSubClassMask(); 2518d0412ceSValery Pykhtin if (!Mask) 2528d0412ceSValery Pykhtin llvm_unreachable("no register class mask?"); 2538d0412ceSValery Pykhtin 2548d0412ceSValery Pykhtin ClassMask.clearBitsNotInMask(Mask); 2558d0412ceSValery Pykhtin // Don't try to early exit because checking if ClassMask has set bits isn't 2568d0412ceSValery Pykhtin // that cheap and we expect it to pass in most cases. 2578d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n'); 2588d0412ceSValery Pykhtin } 2598d0412ceSValery Pykhtin 2608d0412ceSValery Pykhtin // ClassMask is the set of all register classes such that each class is 2618d0412ceSValery Pykhtin // allocatable, aligned, has all shifted subregs and each subreg has required 2628d0412ceSValery Pykhtin // register class (see SubRegRC above). Now select first (that is largest) 26398aa8439SValery Pykhtin // register class with registers of minimal but not less than RegNumBits size. 26498aa8439SValery Pykhtin // We have to check register size because we may encounter classes of smaller 26598aa8439SValery Pykhtin // registers like VReg_1 in some situations. 2668d0412ceSValery Pykhtin const TargetRegisterClass *MinRC = nullptr; 2678d0412ceSValery Pykhtin unsigned MinNumBits = std::numeric_limits<unsigned>::max(); 2688d0412ceSValery Pykhtin for (unsigned ClassID : ClassMask.set_bits()) { 2698d0412ceSValery Pykhtin auto *RC = TRI->getRegClass(ClassID); 2708d0412ceSValery Pykhtin unsigned NumBits = TRI->getRegSizeInBits(*RC); 27198aa8439SValery Pykhtin if (NumBits < MinNumBits && NumBits >= RegNumBits) { 2728d0412ceSValery Pykhtin MinNumBits = NumBits; 2738d0412ceSValery Pykhtin MinRC = RC; 2748d0412ceSValery Pykhtin } 27598aa8439SValery Pykhtin if (MinNumBits == RegNumBits) 27698aa8439SValery Pykhtin break; 2778d0412ceSValery Pykhtin } 2788d0412ceSValery Pykhtin #ifndef NDEBUG 2798d0412ceSValery Pykhtin if (MinRC) { 2808d0412ceSValery Pykhtin assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign)); 2818d0412ceSValery Pykhtin for (auto [SubReg, SRI] : SubRegs) 28298aa8439SValery Pykhtin // Check that all registers in MinRC support SRI.SubReg subregister. 2838d0412ceSValery Pykhtin assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg)); 2848d0412ceSValery Pykhtin } 2858d0412ceSValery Pykhtin #endif 2868d0412ceSValery Pykhtin // There might be zero RShift - in this case we just trying to find smaller 2878d0412ceSValery Pykhtin // register. 2888d0412ceSValery Pykhtin return (MinRC != RC || RShift != 0) ? MinRC : nullptr; 2898d0412ceSValery Pykhtin } 2908d0412ceSValery Pykhtin 2918d0412ceSValery Pykhtin const TargetRegisterClass * 2928d0412ceSValery Pykhtin GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC, 2938d0412ceSValery Pykhtin SubRegMap &SubRegs) const { 2948d0412ceSValery Pykhtin unsigned CoverSubreg = AMDGPU::NoSubRegister; 2958d0412ceSValery Pykhtin unsigned Offset = std::numeric_limits<unsigned>::max(); 2968d0412ceSValery Pykhtin unsigned End = 0; 2978d0412ceSValery Pykhtin for (auto [SubReg, SRI] : SubRegs) { 2988d0412ceSValery Pykhtin unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg); 2998d0412ceSValery Pykhtin unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg); 3008d0412ceSValery Pykhtin if (SubRegOffset < Offset) { 3018d0412ceSValery Pykhtin Offset = SubRegOffset; 3028d0412ceSValery Pykhtin CoverSubreg = AMDGPU::NoSubRegister; 3038d0412ceSValery Pykhtin } 3048d0412ceSValery Pykhtin if (SubRegEnd > End) { 3058d0412ceSValery Pykhtin End = SubRegEnd; 3068d0412ceSValery Pykhtin CoverSubreg = AMDGPU::NoSubRegister; 3078d0412ceSValery Pykhtin } 3088d0412ceSValery Pykhtin if (SubRegOffset == Offset && SubRegEnd == End) 3098d0412ceSValery Pykhtin CoverSubreg = SubReg; 3108d0412ceSValery Pykhtin } 3118d0412ceSValery Pykhtin // If covering subreg is found shift everything so the covering subreg would 3128d0412ceSValery Pykhtin // be in the rightmost position. 3138d0412ceSValery Pykhtin if (CoverSubreg != AMDGPU::NoSubRegister) 31498aa8439SValery Pykhtin return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg, 31598aa8439SValery Pykhtin SubRegs); 3168d0412ceSValery Pykhtin 3178d0412ceSValery Pykhtin // Otherwise find subreg with maximum required alignment and shift it and all 3188d0412ceSValery Pykhtin // other subregs to the rightmost possible position with respect to the 3198d0412ceSValery Pykhtin // alignment. 3208d0412ceSValery Pykhtin unsigned MaxAlign = 0; 3218d0412ceSValery Pykhtin for (auto [SubReg, SRI] : SubRegs) 3228d0412ceSValery Pykhtin MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg)); 3238d0412ceSValery Pykhtin 3248d0412ceSValery Pykhtin unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max(); 3258d0412ceSValery Pykhtin for (auto [SubReg, SRI] : SubRegs) { 3268d0412ceSValery Pykhtin if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign) 3278d0412ceSValery Pykhtin continue; 3288d0412ceSValery Pykhtin FirstMaxAlignedSubRegOffset = 3298d0412ceSValery Pykhtin std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg)); 3308d0412ceSValery Pykhtin if (FirstMaxAlignedSubRegOffset == Offset) 3318d0412ceSValery Pykhtin break; 3328d0412ceSValery Pykhtin } 3338d0412ceSValery Pykhtin 3348d0412ceSValery Pykhtin unsigned NewOffsetOfMaxAlignedSubReg = 3358d0412ceSValery Pykhtin alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign); 3368d0412ceSValery Pykhtin 3378d0412ceSValery Pykhtin if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset) 3388d0412ceSValery Pykhtin llvm_unreachable("misaligned subreg"); 3398d0412ceSValery Pykhtin 3408d0412ceSValery Pykhtin unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg; 34198aa8439SValery Pykhtin return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs); 3428d0412ceSValery Pykhtin } 3438d0412ceSValery Pykhtin 3448d0412ceSValery Pykhtin // Only the subrange's lanemasks of the original interval need to be modified. 3458d0412ceSValery Pykhtin // Subrange for a covering subreg becomes the main range. 3468d0412ceSValery Pykhtin void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg, 3478d0412ceSValery Pykhtin Register NewReg, 3488d0412ceSValery Pykhtin SubRegMap &SubRegs) const { 3498d0412ceSValery Pykhtin if (!LIS->hasInterval(OldReg)) 3508d0412ceSValery Pykhtin return; 3518d0412ceSValery Pykhtin 3528d0412ceSValery Pykhtin auto &OldLI = LIS->getInterval(OldReg); 3538d0412ceSValery Pykhtin auto &NewLI = LIS->createEmptyInterval(NewReg); 3548d0412ceSValery Pykhtin 3558d0412ceSValery Pykhtin auto &Allocator = LIS->getVNInfoAllocator(); 3568d0412ceSValery Pykhtin NewLI.setWeight(OldLI.weight()); 3578d0412ceSValery Pykhtin 3588d0412ceSValery Pykhtin for (auto &SR : OldLI.subranges()) { 3598d0412ceSValery Pykhtin auto I = find_if(SubRegs, [&](auto &P) { 3608d0412ceSValery Pykhtin return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first); 3618d0412ceSValery Pykhtin }); 3628d0412ceSValery Pykhtin 3638d0412ceSValery Pykhtin if (I == SubRegs.end()) { 3648d0412ceSValery Pykhtin // There might be a situation when subranges don't exactly match used 3658d0412ceSValery Pykhtin // subregs, for example: 3668d0412ceSValery Pykhtin // %120 [160r,1392r:0) 0@160r 3678d0412ceSValery Pykhtin // L000000000000C000 [160r,1392r:0) 0@160r 3688d0412ceSValery Pykhtin // L0000000000003000 [160r,1392r:0) 0@160r 3698d0412ceSValery Pykhtin // L0000000000000C00 [160r,1392r:0) 0@160r 3708d0412ceSValery Pykhtin // L0000000000000300 [160r,1392r:0) 0@160r 3718d0412ceSValery Pykhtin // L0000000000000003 [160r,1104r:0) 0@160r 3728d0412ceSValery Pykhtin // L000000000000000C [160r,1104r:0) 0@160r 3738d0412ceSValery Pykhtin // L0000000000000030 [160r,1104r:0) 0@160r 3748d0412ceSValery Pykhtin // L00000000000000C0 [160r,1104r:0) 0@160r 3758d0412ceSValery Pykhtin // but used subregs are: 3768d0412ceSValery Pykhtin // sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF 3778d0412ceSValery Pykhtin // sub0_sub1_sub2_sub3, L00000000000000FF 3788d0412ceSValery Pykhtin // sub4_sub5_sub6_sub7, L000000000000FF00 3798d0412ceSValery Pykhtin // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7 3808d0412ceSValery Pykhtin // have several subranges with the same lifetime. For such cases just 3818d0412ceSValery Pykhtin // recreate the interval. 3828d0412ceSValery Pykhtin LIS->removeInterval(OldReg); 3838d0412ceSValery Pykhtin LIS->removeInterval(NewReg); 3848d0412ceSValery Pykhtin LIS->createAndComputeVirtRegInterval(NewReg); 3858d0412ceSValery Pykhtin return; 3868d0412ceSValery Pykhtin } 3878d0412ceSValery Pykhtin 3888d0412ceSValery Pykhtin if (unsigned NewSubReg = I->second.SubReg) 3898d0412ceSValery Pykhtin NewLI.createSubRangeFrom(Allocator, 3908d0412ceSValery Pykhtin TRI->getSubRegIndexLaneMask(NewSubReg), SR); 3918d0412ceSValery Pykhtin else // This is the covering subreg (0 index) - set it as main range. 3928d0412ceSValery Pykhtin NewLI.assign(SR, Allocator); 3938d0412ceSValery Pykhtin 3948d0412ceSValery Pykhtin SubRegs.erase(I); 3958d0412ceSValery Pykhtin } 3968d0412ceSValery Pykhtin if (NewLI.empty()) 3978d0412ceSValery Pykhtin NewLI.assign(OldLI, Allocator); 398*b30b9eb7SMatt Arsenault assert(NewLI.verify(MRI)); 3998d0412ceSValery Pykhtin LIS->removeInterval(OldReg); 4008d0412ceSValery Pykhtin } 4018d0412ceSValery Pykhtin 4028d0412ceSValery Pykhtin const TargetRegisterClass * 4038d0412ceSValery Pykhtin GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const { 4048d0412ceSValery Pykhtin MachineInstr *MI = MO.getParent(); 4058d0412ceSValery Pykhtin return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI, 4068d0412ceSValery Pykhtin *MI->getParent()->getParent()); 4078d0412ceSValery Pykhtin } 4088d0412ceSValery Pykhtin 4098d0412ceSValery Pykhtin bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const { 4108d0412ceSValery Pykhtin auto Range = MRI->reg_nodbg_operands(Reg); 411667ba7f8SValery Pykhtin if (Range.empty() || any_of(Range, [](MachineOperand &MO) { 412667ba7f8SValery Pykhtin return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1] 413667ba7f8SValery Pykhtin })) 4148d0412ceSValery Pykhtin return false; 4158d0412ceSValery Pykhtin 41698aa8439SValery Pykhtin auto *RC = MRI->getRegClass(Reg); 41798aa8439SValery Pykhtin LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI) 41898aa8439SValery Pykhtin << ':' << TRI->getRegClassName(RC) << '\n'); 41998aa8439SValery Pykhtin 420667ba7f8SValery Pykhtin // Collect used subregs and their reg classes infered from instruction 4218d0412ceSValery Pykhtin // operands. 4228d0412ceSValery Pykhtin SubRegMap SubRegs; 423667ba7f8SValery Pykhtin for (MachineOperand &MO : Range) { 424667ba7f8SValery Pykhtin const unsigned SubReg = MO.getSubReg(); 425667ba7f8SValery Pykhtin assert(SubReg != AMDGPU::NoSubRegister); // Due to [1]. 426667ba7f8SValery Pykhtin LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':'); 427667ba7f8SValery Pykhtin 428667ba7f8SValery Pykhtin const auto [I, Inserted] = SubRegs.try_emplace(SubReg); 429667ba7f8SValery Pykhtin const TargetRegisterClass *&SubRegRC = I->second.RC; 430667ba7f8SValery Pykhtin 431667ba7f8SValery Pykhtin if (Inserted) 432667ba7f8SValery Pykhtin SubRegRC = TRI->getSubRegisterClass(RC, SubReg); 433667ba7f8SValery Pykhtin 434667ba7f8SValery Pykhtin if (SubRegRC) { 435667ba7f8SValery Pykhtin if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) { 436667ba7f8SValery Pykhtin LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & " 437667ba7f8SValery Pykhtin << TRI->getRegClassName(OpDescRC) << " = "); 438667ba7f8SValery Pykhtin SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC); 439667ba7f8SValery Pykhtin } 440667ba7f8SValery Pykhtin } 441667ba7f8SValery Pykhtin 442667ba7f8SValery Pykhtin if (!SubRegRC) { 443667ba7f8SValery Pykhtin LLVM_DEBUG(dbgs() << "couldn't find target regclass\n"); 44498aa8439SValery Pykhtin return false; 4458d0412ceSValery Pykhtin } 446667ba7f8SValery Pykhtin LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n'); 44798aa8439SValery Pykhtin } 4488d0412ceSValery Pykhtin 4498d0412ceSValery Pykhtin auto *NewRC = getMinSizeReg(RC, SubRegs); 4508d0412ceSValery Pykhtin if (!NewRC) { 4518d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << " No improvement achieved\n"); 4528d0412ceSValery Pykhtin return false; 4538d0412ceSValery Pykhtin } 4548d0412ceSValery Pykhtin 4558d0412ceSValery Pykhtin Register NewReg = MRI->createVirtualRegister(NewRC); 4568d0412ceSValery Pykhtin LLVM_DEBUG(dbgs() << " Success " << printReg(Reg, TRI) << ':' 4578d0412ceSValery Pykhtin << TRI->getRegClassName(RC) << " -> " 4588d0412ceSValery Pykhtin << printReg(NewReg, TRI) << ':' 4598d0412ceSValery Pykhtin << TRI->getRegClassName(NewRC) << '\n'); 4608d0412ceSValery Pykhtin 4618d0412ceSValery Pykhtin for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) { 4628d0412ceSValery Pykhtin MO.setReg(NewReg); 4638d0412ceSValery Pykhtin // Debug info can refer to the whole reg, just leave it as it is for now. 4648d0412ceSValery Pykhtin // TODO: create some DI shift expression? 4658d0412ceSValery Pykhtin if (MO.isDebug() && MO.getSubReg() == 0) 4668d0412ceSValery Pykhtin continue; 4678d0412ceSValery Pykhtin unsigned SubReg = SubRegs[MO.getSubReg()].SubReg; 4688d0412ceSValery Pykhtin MO.setSubReg(SubReg); 4698d0412ceSValery Pykhtin if (SubReg == AMDGPU::NoSubRegister && MO.isDef()) 4708d0412ceSValery Pykhtin MO.setIsUndef(false); 4718d0412ceSValery Pykhtin } 4728d0412ceSValery Pykhtin 4738d0412ceSValery Pykhtin if (LIS) 4748d0412ceSValery Pykhtin updateLiveIntervals(Reg, NewReg, SubRegs); 4758d0412ceSValery Pykhtin 4768d0412ceSValery Pykhtin return true; 4778d0412ceSValery Pykhtin } 4788d0412ceSValery Pykhtin 4798d0412ceSValery Pykhtin bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) { 4808d0412ceSValery Pykhtin MRI = &MF.getRegInfo(); 4818d0412ceSValery Pykhtin TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo()); 4828d0412ceSValery Pykhtin TII = MF.getSubtarget().getInstrInfo(); 483abde52aaSpaperchalice auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 484abde52aaSpaperchalice LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 4858d0412ceSValery Pykhtin bool Changed = false; 4868d0412ceSValery Pykhtin for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { 4878d0412ceSValery Pykhtin Changed |= rewriteReg(Register::index2VirtReg(I)); 4888d0412ceSValery Pykhtin } 4898d0412ceSValery Pykhtin return Changed; 4908d0412ceSValery Pykhtin } 4918d0412ceSValery Pykhtin 4928d0412ceSValery Pykhtin char GCNRewritePartialRegUses::ID; 4938d0412ceSValery Pykhtin 4948d0412ceSValery Pykhtin char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID; 4958d0412ceSValery Pykhtin 4968d0412ceSValery Pykhtin INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE, 4978d0412ceSValery Pykhtin "Rewrite Partial Register Uses", false, false) 4988d0412ceSValery Pykhtin INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE, 4998d0412ceSValery Pykhtin "Rewrite Partial Register Uses", false, false) 500