xref: /llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp (revision be187369a03bf2df8bdbc76ecd381377b3bb6074)
18d0412ceSValery Pykhtin //===-------------- GCNRewritePartialRegUses.cpp --------------------------===//
28d0412ceSValery Pykhtin //
38d0412ceSValery Pykhtin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48d0412ceSValery Pykhtin // See https://llvm.org/LICENSE.txt for license information.
58d0412ceSValery Pykhtin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68d0412ceSValery Pykhtin //
78d0412ceSValery Pykhtin //===----------------------------------------------------------------------===//
88d0412ceSValery Pykhtin /// \file
98d0412ceSValery Pykhtin /// RenameIndependentSubregs pass leaves large partially used super registers,
108d0412ceSValery Pykhtin /// for example:
118d0412ceSValery Pykhtin ///   undef %0.sub4:VReg_1024 = ...
128d0412ceSValery Pykhtin ///   %0.sub5:VReg_1024 = ...
138d0412ceSValery Pykhtin ///   %0.sub6:VReg_1024 = ...
148d0412ceSValery Pykhtin ///   %0.sub7:VReg_1024 = ...
158d0412ceSValery Pykhtin ///   use %0.sub4_sub5_sub6_sub7
168d0412ceSValery Pykhtin ///   use %0.sub6_sub7
178d0412ceSValery Pykhtin ///
188d0412ceSValery Pykhtin /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and
198d0412ceSValery Pykhtin /// rewrites such partially used super registers with registers of minimal size:
208d0412ceSValery Pykhtin ///   undef %0.sub0:VReg_128 = ...
218d0412ceSValery Pykhtin ///   %0.sub1:VReg_128 = ...
228d0412ceSValery Pykhtin ///   %0.sub2:VReg_128 = ...
238d0412ceSValery Pykhtin ///   %0.sub3:VReg_128 = ...
248d0412ceSValery Pykhtin ///   use %0.sub0_sub1_sub2_sub3
258d0412ceSValery Pykhtin ///   use %0.sub2_sub3
268d0412ceSValery Pykhtin ///
278d0412ceSValery Pykhtin /// This allows to avoid subreg lanemasks tracking during register pressure
288d0412ceSValery Pykhtin /// calculation and creates more possibilities for the code unaware of lanemasks
298d0412ceSValery Pykhtin //===----------------------------------------------------------------------===//
308d0412ceSValery Pykhtin 
318d0412ceSValery Pykhtin #include "AMDGPU.h"
328d0412ceSValery Pykhtin #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
338d0412ceSValery Pykhtin #include "SIRegisterInfo.h"
348d0412ceSValery Pykhtin #include "llvm/CodeGen/LiveInterval.h"
358d0412ceSValery Pykhtin #include "llvm/CodeGen/LiveIntervals.h"
368d0412ceSValery Pykhtin #include "llvm/CodeGen/MachineFunctionPass.h"
378d0412ceSValery Pykhtin #include "llvm/CodeGen/MachineRegisterInfo.h"
388d0412ceSValery Pykhtin #include "llvm/CodeGen/TargetInstrInfo.h"
398d0412ceSValery Pykhtin #include "llvm/Pass.h"
408d0412ceSValery Pykhtin 
418d0412ceSValery Pykhtin using namespace llvm;
428d0412ceSValery Pykhtin 
438d0412ceSValery Pykhtin #define DEBUG_TYPE "rewrite-partial-reg-uses"
448d0412ceSValery Pykhtin 
458d0412ceSValery Pykhtin namespace {
468d0412ceSValery Pykhtin 
478d0412ceSValery Pykhtin class GCNRewritePartialRegUses : public MachineFunctionPass {
488d0412ceSValery Pykhtin public:
498d0412ceSValery Pykhtin   static char ID;
508d0412ceSValery Pykhtin   GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
518d0412ceSValery Pykhtin 
528d0412ceSValery Pykhtin   StringRef getPassName() const override {
538d0412ceSValery Pykhtin     return "Rewrite Partial Register Uses";
548d0412ceSValery Pykhtin   }
558d0412ceSValery Pykhtin 
568d0412ceSValery Pykhtin   void getAnalysisUsage(AnalysisUsage &AU) const override {
578d0412ceSValery Pykhtin     AU.setPreservesCFG();
58abde52aaSpaperchalice     AU.addPreserved<LiveIntervalsWrapperPass>();
594010f894Spaperchalice     AU.addPreserved<SlotIndexesWrapperPass>();
608d0412ceSValery Pykhtin     MachineFunctionPass::getAnalysisUsage(AU);
618d0412ceSValery Pykhtin   }
628d0412ceSValery Pykhtin 
638d0412ceSValery Pykhtin   bool runOnMachineFunction(MachineFunction &MF) override;
648d0412ceSValery Pykhtin 
658d0412ceSValery Pykhtin private:
668d0412ceSValery Pykhtin   MachineRegisterInfo *MRI;
678d0412ceSValery Pykhtin   const SIRegisterInfo *TRI;
688d0412ceSValery Pykhtin   const TargetInstrInfo *TII;
698d0412ceSValery Pykhtin   LiveIntervals *LIS;
708d0412ceSValery Pykhtin 
718d0412ceSValery Pykhtin   /// Rewrite partially used register Reg by shifting all its subregisters to
728d0412ceSValery Pykhtin   /// the right and replacing the original register with a register of minimal
738d0412ceSValery Pykhtin   /// size. Return true if the change has been made.
748d0412ceSValery Pykhtin   bool rewriteReg(Register Reg) const;
758d0412ceSValery Pykhtin 
768d0412ceSValery Pykhtin   /// Value type for SubRegMap below.
778d0412ceSValery Pykhtin   struct SubRegInfo {
788d0412ceSValery Pykhtin     /// Register class required to hold the value stored in the SubReg.
798d0412ceSValery Pykhtin     const TargetRegisterClass *RC;
808d0412ceSValery Pykhtin 
818d0412ceSValery Pykhtin     /// Index for the right-shifted subregister. If 0 this is the "covering"
828d0412ceSValery Pykhtin     /// subreg i.e. subreg that covers all others. Covering subreg becomes the
838d0412ceSValery Pykhtin     /// whole register after the replacement.
848d0412ceSValery Pykhtin     unsigned SubReg = AMDGPU::NoSubRegister;
858d0412ceSValery Pykhtin     SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {}
868d0412ceSValery Pykhtin   };
878d0412ceSValery Pykhtin 
888d0412ceSValery Pykhtin   /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
89aeafdc21SJay Foad   using SubRegMap = SmallDenseMap<unsigned, SubRegInfo>;
908d0412ceSValery Pykhtin 
918d0412ceSValery Pykhtin   /// Given register class RC and the set of used subregs as keys in the SubRegs
928d0412ceSValery Pykhtin   /// map return new register class and indexes of right-shifted subregs as
938d0412ceSValery Pykhtin   /// values in SubRegs map such that the resulting regclass would contain
948d0412ceSValery Pykhtin   /// registers of minimal size.
958d0412ceSValery Pykhtin   const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
968d0412ceSValery Pykhtin                                            SubRegMap &SubRegs) const;
978d0412ceSValery Pykhtin 
9898aa8439SValery Pykhtin   /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
9998aa8439SValery Pykhtin   /// find new regclass such that:
10098aa8439SValery Pykhtin   ///   1. It has subregs obtained by shifting each OldSubReg by RShift number
10198aa8439SValery Pykhtin   ///      of bits to the right. Every "shifted" subreg should have the same
102667ba7f8SValery Pykhtin   ///      SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
103667ba7f8SValery Pykhtin   ///      all other subregs in pairs. Basically such subreg becomes a whole
104667ba7f8SValery Pykhtin   ///      register.
10598aa8439SValery Pykhtin   ///   2. Resulting register class contains registers of minimal size but not
10698aa8439SValery Pykhtin   ///      less than RegNumBits.
10798aa8439SValery Pykhtin   ///
10898aa8439SValery Pykhtin   /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
10998aa8439SValery Pykhtin   /// parameter:
11098aa8439SValery Pykhtin   ///   OldSubReg - input parameter,
111667ba7f8SValery Pykhtin   ///   SubRegRC  - input parameter (cannot be null),
11298aa8439SValery Pykhtin   ///   NewSubReg - output, contains shifted subregs on return.
1138d0412ceSValery Pykhtin   const TargetRegisterClass *
1148d0412ceSValery Pykhtin   getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
11598aa8439SValery Pykhtin                                 unsigned RegNumBits, unsigned CoverSubregIdx,
1168d0412ceSValery Pykhtin                                 SubRegMap &SubRegs) const;
1178d0412ceSValery Pykhtin 
1188d0412ceSValery Pykhtin   /// Update live intervals after rewriting OldReg to NewReg with SubRegs map
1198d0412ceSValery Pykhtin   /// describing OldSubReg -> NewSubReg mapping.
1208d0412ceSValery Pykhtin   void updateLiveIntervals(Register OldReg, Register NewReg,
1218d0412ceSValery Pykhtin                            SubRegMap &SubRegs) const;
1228d0412ceSValery Pykhtin 
1238d0412ceSValery Pykhtin   /// Helper methods.
1248d0412ceSValery Pykhtin 
1258d0412ceSValery Pykhtin   /// Return reg class expected by a MO's parent instruction for a given MO.
1268d0412ceSValery Pykhtin   const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const;
1278d0412ceSValery Pykhtin 
1288d0412ceSValery Pykhtin   /// Find right-shifted by RShift amount version of the SubReg if it exists,
1298d0412ceSValery Pykhtin   /// return 0 otherwise.
1308d0412ceSValery Pykhtin   unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;
1318d0412ceSValery Pykhtin 
1328d0412ceSValery Pykhtin   /// Find subreg index with a given Offset and Size, return 0 if there is no
1338d0412ceSValery Pykhtin   /// such subregister index. The result is cached in SubRegs data-member.
1348d0412ceSValery Pykhtin   unsigned getSubReg(unsigned Offset, unsigned Size) const;
1358d0412ceSValery Pykhtin 
1368d0412ceSValery Pykhtin   /// Cache for getSubReg method: {Offset, Size} -> SubReg index.
1378d0412ceSValery Pykhtin   mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs;
1388d0412ceSValery Pykhtin 
1398d0412ceSValery Pykhtin   /// Return bit mask that contains all register classes that are projected into
1408d0412ceSValery Pykhtin   /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member.
1418d0412ceSValery Pykhtin   const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC,
1428d0412ceSValery Pykhtin                                        unsigned SubRegIdx) const;
1438d0412ceSValery Pykhtin 
1448d0412ceSValery Pykhtin   /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask.
1458d0412ceSValery Pykhtin   mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>,
1468d0412ceSValery Pykhtin                         const uint32_t *>
1478d0412ceSValery Pykhtin       SuperRegMasks;
1488d0412ceSValery Pykhtin 
1498d0412ceSValery Pykhtin   /// Return bitmask containing all allocatable register classes with registers
1508d0412ceSValery Pykhtin   /// aligned at AlignNumBits. The result is cached in
1518d0412ceSValery Pykhtin   /// AllocatableAndAlignedRegClassMasks data-member.
1528d0412ceSValery Pykhtin   const BitVector &
1538d0412ceSValery Pykhtin   getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const;
1548d0412ceSValery Pykhtin 
1558d0412ceSValery Pykhtin   /// Cache for getAllocatableAndAlignedRegClassMask method:
1568d0412ceSValery Pykhtin   ///   AlignNumBits -> Class bitmask.
1578d0412ceSValery Pykhtin   mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
1588d0412ceSValery Pykhtin };
1598d0412ceSValery Pykhtin 
1608d0412ceSValery Pykhtin } // end anonymous namespace
1618d0412ceSValery Pykhtin 
1628d0412ceSValery Pykhtin // TODO: move this to the tablegen and use binary search by Offset.
1638d0412ceSValery Pykhtin unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
1648d0412ceSValery Pykhtin                                              unsigned Size) const {
1658d0412ceSValery Pykhtin   const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
1668d0412ceSValery Pykhtin   if (Inserted) {
1678d0412ceSValery Pykhtin     for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
1688d0412ceSValery Pykhtin       if (TRI->getSubRegIdxOffset(Idx) == Offset &&
1698d0412ceSValery Pykhtin           TRI->getSubRegIdxSize(Idx) == Size) {
1708d0412ceSValery Pykhtin         I->second = Idx;
1718d0412ceSValery Pykhtin         break;
1728d0412ceSValery Pykhtin       }
1738d0412ceSValery Pykhtin     }
1748d0412ceSValery Pykhtin   }
1758d0412ceSValery Pykhtin   return I->second;
1768d0412ceSValery Pykhtin }
1778d0412ceSValery Pykhtin 
1788d0412ceSValery Pykhtin unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
1798d0412ceSValery Pykhtin                                                unsigned RShift) const {
1808d0412ceSValery Pykhtin   unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
1818d0412ceSValery Pykhtin   return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
1828d0412ceSValery Pykhtin }
1838d0412ceSValery Pykhtin 
1848d0412ceSValery Pykhtin const uint32_t *
1858d0412ceSValery Pykhtin GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
1868d0412ceSValery Pykhtin                                                unsigned SubRegIdx) const {
1878d0412ceSValery Pykhtin   const auto [I, Inserted] =
1888d0412ceSValery Pykhtin       SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
1898d0412ceSValery Pykhtin   if (Inserted) {
1908d0412ceSValery Pykhtin     for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) {
1918d0412ceSValery Pykhtin       if (RCI.getSubReg() == SubRegIdx) {
1928d0412ceSValery Pykhtin         I->second = RCI.getMask();
1938d0412ceSValery Pykhtin         break;
1948d0412ceSValery Pykhtin       }
1958d0412ceSValery Pykhtin     }
1968d0412ceSValery Pykhtin   }
1978d0412ceSValery Pykhtin   return I->second;
1988d0412ceSValery Pykhtin }
1998d0412ceSValery Pykhtin 
2008d0412ceSValery Pykhtin const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
2018d0412ceSValery Pykhtin     unsigned AlignNumBits) const {
2028d0412ceSValery Pykhtin   const auto [I, Inserted] =
2038d0412ceSValery Pykhtin       AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
2048d0412ceSValery Pykhtin   if (Inserted) {
2058d0412ceSValery Pykhtin     BitVector &BV = I->second;
2068d0412ceSValery Pykhtin     BV.resize(TRI->getNumRegClasses());
2078d0412ceSValery Pykhtin     for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) {
2088d0412ceSValery Pykhtin       auto *RC = TRI->getRegClass(ClassID);
2098d0412ceSValery Pykhtin       if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits))
2108d0412ceSValery Pykhtin         BV.set(ClassID);
2118d0412ceSValery Pykhtin     }
2128d0412ceSValery Pykhtin   }
2138d0412ceSValery Pykhtin   return I->second;
2148d0412ceSValery Pykhtin }
2158d0412ceSValery Pykhtin 
2168d0412ceSValery Pykhtin const TargetRegisterClass *
2178d0412ceSValery Pykhtin GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
21898aa8439SValery Pykhtin     const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
21998aa8439SValery Pykhtin     unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
2208d0412ceSValery Pykhtin 
2218d0412ceSValery Pykhtin   unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
2228d0412ceSValery Pykhtin   LLVM_DEBUG(dbgs() << "  Shift " << RShift << ", reg align " << RCAlign
2238d0412ceSValery Pykhtin                     << '\n');
2248d0412ceSValery Pykhtin 
2258d0412ceSValery Pykhtin   BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
2268d0412ceSValery Pykhtin   for (auto &[OldSubReg, SRI] : SubRegs) {
2278d0412ceSValery Pykhtin     auto &[SubRegRC, NewSubReg] = SRI;
228667ba7f8SValery Pykhtin     assert(SubRegRC);
2298d0412ceSValery Pykhtin 
2308d0412ceSValery Pykhtin     LLVM_DEBUG(dbgs() << "  " << TRI->getSubRegIndexName(OldSubReg) << ':'
2318d0412ceSValery Pykhtin                       << TRI->getRegClassName(SubRegRC)
2328d0412ceSValery Pykhtin                       << (SubRegRC->isAllocatable() ? "" : " not alloc")
2338d0412ceSValery Pykhtin                       << " -> ");
2348d0412ceSValery Pykhtin 
2358d0412ceSValery Pykhtin     if (OldSubReg == CoverSubregIdx) {
236667ba7f8SValery Pykhtin       // Covering subreg will become a full register, RC should be allocatable.
237667ba7f8SValery Pykhtin       assert(SubRegRC->isAllocatable());
2388d0412ceSValery Pykhtin       NewSubReg = AMDGPU::NoSubRegister;
2398d0412ceSValery Pykhtin       LLVM_DEBUG(dbgs() << "whole reg");
2408d0412ceSValery Pykhtin     } else {
2418d0412ceSValery Pykhtin       NewSubReg = shiftSubReg(OldSubReg, RShift);
2428d0412ceSValery Pykhtin       if (!NewSubReg) {
2438d0412ceSValery Pykhtin         LLVM_DEBUG(dbgs() << "none\n");
2448d0412ceSValery Pykhtin         return nullptr;
2458d0412ceSValery Pykhtin       }
2468d0412ceSValery Pykhtin       LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg));
2478d0412ceSValery Pykhtin     }
2488d0412ceSValery Pykhtin 
2498d0412ceSValery Pykhtin     const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
2508d0412ceSValery Pykhtin                                      : SubRegRC->getSubClassMask();
2518d0412ceSValery Pykhtin     if (!Mask)
2528d0412ceSValery Pykhtin       llvm_unreachable("no register class mask?");
2538d0412ceSValery Pykhtin 
2548d0412ceSValery Pykhtin     ClassMask.clearBitsNotInMask(Mask);
2558d0412ceSValery Pykhtin     // Don't try to early exit because checking if ClassMask has set bits isn't
2568d0412ceSValery Pykhtin     // that cheap and we expect it to pass in most cases.
2578d0412ceSValery Pykhtin     LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n');
2588d0412ceSValery Pykhtin   }
2598d0412ceSValery Pykhtin 
2608d0412ceSValery Pykhtin   // ClassMask is the set of all register classes such that each class is
2618d0412ceSValery Pykhtin   // allocatable, aligned, has all shifted subregs and each subreg has required
2628d0412ceSValery Pykhtin   // register class (see SubRegRC above). Now select first (that is largest)
26398aa8439SValery Pykhtin   // register class with registers of minimal but not less than RegNumBits size.
26498aa8439SValery Pykhtin   // We have to check register size because we may encounter classes of smaller
26598aa8439SValery Pykhtin   // registers like VReg_1 in some situations.
2668d0412ceSValery Pykhtin   const TargetRegisterClass *MinRC = nullptr;
2678d0412ceSValery Pykhtin   unsigned MinNumBits = std::numeric_limits<unsigned>::max();
2688d0412ceSValery Pykhtin   for (unsigned ClassID : ClassMask.set_bits()) {
2698d0412ceSValery Pykhtin     auto *RC = TRI->getRegClass(ClassID);
2708d0412ceSValery Pykhtin     unsigned NumBits = TRI->getRegSizeInBits(*RC);
27198aa8439SValery Pykhtin     if (NumBits < MinNumBits && NumBits >= RegNumBits) {
2728d0412ceSValery Pykhtin       MinNumBits = NumBits;
2738d0412ceSValery Pykhtin       MinRC = RC;
2748d0412ceSValery Pykhtin     }
27598aa8439SValery Pykhtin     if (MinNumBits == RegNumBits)
27698aa8439SValery Pykhtin       break;
2778d0412ceSValery Pykhtin   }
2788d0412ceSValery Pykhtin #ifndef NDEBUG
2798d0412ceSValery Pykhtin   if (MinRC) {
2808d0412ceSValery Pykhtin     assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
2818d0412ceSValery Pykhtin     for (auto [SubReg, SRI] : SubRegs)
28298aa8439SValery Pykhtin       // Check that all registers in MinRC support SRI.SubReg subregister.
2838d0412ceSValery Pykhtin       assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
2848d0412ceSValery Pykhtin   }
2858d0412ceSValery Pykhtin #endif
2868d0412ceSValery Pykhtin   // There might be zero RShift - in this case we just trying to find smaller
2878d0412ceSValery Pykhtin   // register.
2888d0412ceSValery Pykhtin   return (MinRC != RC || RShift != 0) ? MinRC : nullptr;
2898d0412ceSValery Pykhtin }
2908d0412ceSValery Pykhtin 
2918d0412ceSValery Pykhtin const TargetRegisterClass *
2928d0412ceSValery Pykhtin GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
2938d0412ceSValery Pykhtin                                         SubRegMap &SubRegs) const {
2948d0412ceSValery Pykhtin   unsigned CoverSubreg = AMDGPU::NoSubRegister;
2958d0412ceSValery Pykhtin   unsigned Offset = std::numeric_limits<unsigned>::max();
2968d0412ceSValery Pykhtin   unsigned End = 0;
2978d0412ceSValery Pykhtin   for (auto [SubReg, SRI] : SubRegs) {
2988d0412ceSValery Pykhtin     unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg);
2998d0412ceSValery Pykhtin     unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg);
3008d0412ceSValery Pykhtin     if (SubRegOffset < Offset) {
3018d0412ceSValery Pykhtin       Offset = SubRegOffset;
3028d0412ceSValery Pykhtin       CoverSubreg = AMDGPU::NoSubRegister;
3038d0412ceSValery Pykhtin     }
3048d0412ceSValery Pykhtin     if (SubRegEnd > End) {
3058d0412ceSValery Pykhtin       End = SubRegEnd;
3068d0412ceSValery Pykhtin       CoverSubreg = AMDGPU::NoSubRegister;
3078d0412ceSValery Pykhtin     }
3088d0412ceSValery Pykhtin     if (SubRegOffset == Offset && SubRegEnd == End)
3098d0412ceSValery Pykhtin       CoverSubreg = SubReg;
3108d0412ceSValery Pykhtin   }
3118d0412ceSValery Pykhtin   // If covering subreg is found shift everything so the covering subreg would
3128d0412ceSValery Pykhtin   // be in the rightmost position.
3138d0412ceSValery Pykhtin   if (CoverSubreg != AMDGPU::NoSubRegister)
31498aa8439SValery Pykhtin     return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
31598aa8439SValery Pykhtin                                          SubRegs);
3168d0412ceSValery Pykhtin 
3178d0412ceSValery Pykhtin   // Otherwise find subreg with maximum required alignment and shift it and all
3188d0412ceSValery Pykhtin   // other subregs to the rightmost possible position with respect to the
3198d0412ceSValery Pykhtin   // alignment.
3208d0412ceSValery Pykhtin   unsigned MaxAlign = 0;
3218d0412ceSValery Pykhtin   for (auto [SubReg, SRI] : SubRegs)
3228d0412ceSValery Pykhtin     MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg));
3238d0412ceSValery Pykhtin 
3248d0412ceSValery Pykhtin   unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
3258d0412ceSValery Pykhtin   for (auto [SubReg, SRI] : SubRegs) {
3268d0412ceSValery Pykhtin     if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign)
3278d0412ceSValery Pykhtin       continue;
3288d0412ceSValery Pykhtin     FirstMaxAlignedSubRegOffset =
3298d0412ceSValery Pykhtin         std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg));
3308d0412ceSValery Pykhtin     if (FirstMaxAlignedSubRegOffset == Offset)
3318d0412ceSValery Pykhtin       break;
3328d0412ceSValery Pykhtin   }
3338d0412ceSValery Pykhtin 
3348d0412ceSValery Pykhtin   unsigned NewOffsetOfMaxAlignedSubReg =
3358d0412ceSValery Pykhtin       alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign);
3368d0412ceSValery Pykhtin 
3378d0412ceSValery Pykhtin   if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
3388d0412ceSValery Pykhtin     llvm_unreachable("misaligned subreg");
3398d0412ceSValery Pykhtin 
3408d0412ceSValery Pykhtin   unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
34198aa8439SValery Pykhtin   return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
3428d0412ceSValery Pykhtin }
3438d0412ceSValery Pykhtin 
3448d0412ceSValery Pykhtin // Only the subrange's lanemasks of the original interval need to be modified.
3458d0412ceSValery Pykhtin // Subrange for a covering subreg becomes the main range.
3468d0412ceSValery Pykhtin void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
3478d0412ceSValery Pykhtin                                                    Register NewReg,
3488d0412ceSValery Pykhtin                                                    SubRegMap &SubRegs) const {
3498d0412ceSValery Pykhtin   if (!LIS->hasInterval(OldReg))
3508d0412ceSValery Pykhtin     return;
3518d0412ceSValery Pykhtin 
3528d0412ceSValery Pykhtin   auto &OldLI = LIS->getInterval(OldReg);
3538d0412ceSValery Pykhtin   auto &NewLI = LIS->createEmptyInterval(NewReg);
3548d0412ceSValery Pykhtin 
3558d0412ceSValery Pykhtin   auto &Allocator = LIS->getVNInfoAllocator();
3568d0412ceSValery Pykhtin   NewLI.setWeight(OldLI.weight());
3578d0412ceSValery Pykhtin 
3588d0412ceSValery Pykhtin   for (auto &SR : OldLI.subranges()) {
3598d0412ceSValery Pykhtin     auto I = find_if(SubRegs, [&](auto &P) {
3608d0412ceSValery Pykhtin       return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first);
3618d0412ceSValery Pykhtin     });
3628d0412ceSValery Pykhtin 
3638d0412ceSValery Pykhtin     if (I == SubRegs.end()) {
3648d0412ceSValery Pykhtin       // There might be a situation when subranges don't exactly match used
3658d0412ceSValery Pykhtin       // subregs, for example:
3668d0412ceSValery Pykhtin       // %120 [160r,1392r:0) 0@160r
3678d0412ceSValery Pykhtin       //    L000000000000C000 [160r,1392r:0) 0@160r
3688d0412ceSValery Pykhtin       //    L0000000000003000 [160r,1392r:0) 0@160r
3698d0412ceSValery Pykhtin       //    L0000000000000C00 [160r,1392r:0) 0@160r
3708d0412ceSValery Pykhtin       //    L0000000000000300 [160r,1392r:0) 0@160r
3718d0412ceSValery Pykhtin       //    L0000000000000003 [160r,1104r:0) 0@160r
3728d0412ceSValery Pykhtin       //    L000000000000000C [160r,1104r:0) 0@160r
3738d0412ceSValery Pykhtin       //    L0000000000000030 [160r,1104r:0) 0@160r
3748d0412ceSValery Pykhtin       //    L00000000000000C0 [160r,1104r:0) 0@160r
3758d0412ceSValery Pykhtin       // but used subregs are:
3768d0412ceSValery Pykhtin       //    sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF
3778d0412ceSValery Pykhtin       //    sub0_sub1_sub2_sub3, L00000000000000FF
3788d0412ceSValery Pykhtin       //    sub4_sub5_sub6_sub7, L000000000000FF00
3798d0412ceSValery Pykhtin       // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7
3808d0412ceSValery Pykhtin       // have several subranges with the same lifetime. For such cases just
3818d0412ceSValery Pykhtin       // recreate the interval.
3828d0412ceSValery Pykhtin       LIS->removeInterval(OldReg);
3838d0412ceSValery Pykhtin       LIS->removeInterval(NewReg);
3848d0412ceSValery Pykhtin       LIS->createAndComputeVirtRegInterval(NewReg);
3858d0412ceSValery Pykhtin       return;
3868d0412ceSValery Pykhtin     }
3878d0412ceSValery Pykhtin 
3888d0412ceSValery Pykhtin     if (unsigned NewSubReg = I->second.SubReg)
3898d0412ceSValery Pykhtin       NewLI.createSubRangeFrom(Allocator,
3908d0412ceSValery Pykhtin                                TRI->getSubRegIndexLaneMask(NewSubReg), SR);
3918d0412ceSValery Pykhtin     else // This is the covering subreg (0 index) - set it as main range.
3928d0412ceSValery Pykhtin       NewLI.assign(SR, Allocator);
3938d0412ceSValery Pykhtin 
3948d0412ceSValery Pykhtin     SubRegs.erase(I);
3958d0412ceSValery Pykhtin   }
3968d0412ceSValery Pykhtin   if (NewLI.empty())
3978d0412ceSValery Pykhtin     NewLI.assign(OldLI, Allocator);
398*b30b9eb7SMatt Arsenault   assert(NewLI.verify(MRI));
3998d0412ceSValery Pykhtin   LIS->removeInterval(OldReg);
4008d0412ceSValery Pykhtin }
4018d0412ceSValery Pykhtin 
4028d0412ceSValery Pykhtin const TargetRegisterClass *
4038d0412ceSValery Pykhtin GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
4048d0412ceSValery Pykhtin   MachineInstr *MI = MO.getParent();
4058d0412ceSValery Pykhtin   return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
4068d0412ceSValery Pykhtin                           *MI->getParent()->getParent());
4078d0412ceSValery Pykhtin }
4088d0412ceSValery Pykhtin 
4098d0412ceSValery Pykhtin bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
4108d0412ceSValery Pykhtin   auto Range = MRI->reg_nodbg_operands(Reg);
411667ba7f8SValery Pykhtin   if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
412667ba7f8SValery Pykhtin         return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
413667ba7f8SValery Pykhtin       }))
4148d0412ceSValery Pykhtin     return false;
4158d0412ceSValery Pykhtin 
41698aa8439SValery Pykhtin   auto *RC = MRI->getRegClass(Reg);
41798aa8439SValery Pykhtin   LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
41898aa8439SValery Pykhtin                     << ':' << TRI->getRegClassName(RC) << '\n');
41998aa8439SValery Pykhtin 
420667ba7f8SValery Pykhtin   // Collect used subregs and their reg classes infered from instruction
4218d0412ceSValery Pykhtin   // operands.
4228d0412ceSValery Pykhtin   SubRegMap SubRegs;
423667ba7f8SValery Pykhtin   for (MachineOperand &MO : Range) {
424667ba7f8SValery Pykhtin     const unsigned SubReg = MO.getSubReg();
425667ba7f8SValery Pykhtin     assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
426667ba7f8SValery Pykhtin     LLVM_DEBUG(dbgs() << "  " << TRI->getSubRegIndexName(SubReg) << ':');
427667ba7f8SValery Pykhtin 
428667ba7f8SValery Pykhtin     const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
429667ba7f8SValery Pykhtin     const TargetRegisterClass *&SubRegRC = I->second.RC;
430667ba7f8SValery Pykhtin 
431667ba7f8SValery Pykhtin     if (Inserted)
432667ba7f8SValery Pykhtin       SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
433667ba7f8SValery Pykhtin 
434667ba7f8SValery Pykhtin     if (SubRegRC) {
435667ba7f8SValery Pykhtin       if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
436667ba7f8SValery Pykhtin         LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
437667ba7f8SValery Pykhtin                           << TRI->getRegClassName(OpDescRC) << " = ");
438667ba7f8SValery Pykhtin         SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
439667ba7f8SValery Pykhtin       }
440667ba7f8SValery Pykhtin     }
441667ba7f8SValery Pykhtin 
442667ba7f8SValery Pykhtin     if (!SubRegRC) {
443667ba7f8SValery Pykhtin       LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
44498aa8439SValery Pykhtin       return false;
4458d0412ceSValery Pykhtin     }
446667ba7f8SValery Pykhtin     LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
44798aa8439SValery Pykhtin   }
4488d0412ceSValery Pykhtin 
4498d0412ceSValery Pykhtin   auto *NewRC = getMinSizeReg(RC, SubRegs);
4508d0412ceSValery Pykhtin   if (!NewRC) {
4518d0412ceSValery Pykhtin     LLVM_DEBUG(dbgs() << "  No improvement achieved\n");
4528d0412ceSValery Pykhtin     return false;
4538d0412ceSValery Pykhtin   }
4548d0412ceSValery Pykhtin 
4558d0412ceSValery Pykhtin   Register NewReg = MRI->createVirtualRegister(NewRC);
4568d0412ceSValery Pykhtin   LLVM_DEBUG(dbgs() << "  Success " << printReg(Reg, TRI) << ':'
4578d0412ceSValery Pykhtin                     << TRI->getRegClassName(RC) << " -> "
4588d0412ceSValery Pykhtin                     << printReg(NewReg, TRI) << ':'
4598d0412ceSValery Pykhtin                     << TRI->getRegClassName(NewRC) << '\n');
4608d0412ceSValery Pykhtin 
4618d0412ceSValery Pykhtin   for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) {
4628d0412ceSValery Pykhtin     MO.setReg(NewReg);
4638d0412ceSValery Pykhtin     // Debug info can refer to the whole reg, just leave it as it is for now.
4648d0412ceSValery Pykhtin     // TODO: create some DI shift expression?
4658d0412ceSValery Pykhtin     if (MO.isDebug() && MO.getSubReg() == 0)
4668d0412ceSValery Pykhtin       continue;
4678d0412ceSValery Pykhtin     unsigned SubReg = SubRegs[MO.getSubReg()].SubReg;
4688d0412ceSValery Pykhtin     MO.setSubReg(SubReg);
4698d0412ceSValery Pykhtin     if (SubReg == AMDGPU::NoSubRegister && MO.isDef())
4708d0412ceSValery Pykhtin       MO.setIsUndef(false);
4718d0412ceSValery Pykhtin   }
4728d0412ceSValery Pykhtin 
4738d0412ceSValery Pykhtin   if (LIS)
4748d0412ceSValery Pykhtin     updateLiveIntervals(Reg, NewReg, SubRegs);
4758d0412ceSValery Pykhtin 
4768d0412ceSValery Pykhtin   return true;
4778d0412ceSValery Pykhtin }
4788d0412ceSValery Pykhtin 
4798d0412ceSValery Pykhtin bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
4808d0412ceSValery Pykhtin   MRI = &MF.getRegInfo();
4818d0412ceSValery Pykhtin   TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
4828d0412ceSValery Pykhtin   TII = MF.getSubtarget().getInstrInfo();
483abde52aaSpaperchalice   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
484abde52aaSpaperchalice   LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
4858d0412ceSValery Pykhtin   bool Changed = false;
4868d0412ceSValery Pykhtin   for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
4878d0412ceSValery Pykhtin     Changed |= rewriteReg(Register::index2VirtReg(I));
4888d0412ceSValery Pykhtin   }
4898d0412ceSValery Pykhtin   return Changed;
4908d0412ceSValery Pykhtin }
4918d0412ceSValery Pykhtin 
4928d0412ceSValery Pykhtin char GCNRewritePartialRegUses::ID;
4938d0412ceSValery Pykhtin 
4948d0412ceSValery Pykhtin char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
4958d0412ceSValery Pykhtin 
4968d0412ceSValery Pykhtin INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
4978d0412ceSValery Pykhtin                       "Rewrite Partial Register Uses", false, false)
4988d0412ceSValery Pykhtin INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
4998d0412ceSValery Pykhtin                     "Rewrite Partial Register Uses", false, false)
500