xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //===-------------- GCNRewritePartialRegUses.cpp --------------------------===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric /// \file
906c3fb27SDimitry Andric /// RenameIndependentSubregs pass leaves large partially used super registers,
1006c3fb27SDimitry Andric /// for example:
1106c3fb27SDimitry Andric ///   undef %0.sub4:VReg_1024 = ...
1206c3fb27SDimitry Andric ///   %0.sub5:VReg_1024 = ...
1306c3fb27SDimitry Andric ///   %0.sub6:VReg_1024 = ...
1406c3fb27SDimitry Andric ///   %0.sub7:VReg_1024 = ...
1506c3fb27SDimitry Andric ///   use %0.sub4_sub5_sub6_sub7
1606c3fb27SDimitry Andric ///   use %0.sub6_sub7
1706c3fb27SDimitry Andric ///
1806c3fb27SDimitry Andric /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and
1906c3fb27SDimitry Andric /// rewrites such partially used super registers with registers of minimal size:
2006c3fb27SDimitry Andric ///   undef %0.sub0:VReg_128 = ...
2106c3fb27SDimitry Andric ///   %0.sub1:VReg_128 = ...
2206c3fb27SDimitry Andric ///   %0.sub2:VReg_128 = ...
2306c3fb27SDimitry Andric ///   %0.sub3:VReg_128 = ...
2406c3fb27SDimitry Andric ///   use %0.sub0_sub1_sub2_sub3
2506c3fb27SDimitry Andric ///   use %0.sub2_sub3
2606c3fb27SDimitry Andric ///
2706c3fb27SDimitry Andric /// This allows to avoid subreg lanemasks tracking during register pressure
2806c3fb27SDimitry Andric /// calculation and creates more possibilities for the code unaware of lanemasks
2906c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
3006c3fb27SDimitry Andric 
3106c3fb27SDimitry Andric #include "AMDGPU.h"
3206c3fb27SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
3306c3fb27SDimitry Andric #include "SIRegisterInfo.h"
3406c3fb27SDimitry Andric #include "llvm/CodeGen/LiveInterval.h"
3506c3fb27SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
3606c3fb27SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
3706c3fb27SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
3806c3fb27SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
3906c3fb27SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
4006c3fb27SDimitry Andric #include "llvm/InitializePasses.h"
4106c3fb27SDimitry Andric #include "llvm/Pass.h"
4206c3fb27SDimitry Andric 
4306c3fb27SDimitry Andric using namespace llvm;
4406c3fb27SDimitry Andric 
4506c3fb27SDimitry Andric #define DEBUG_TYPE "rewrite-partial-reg-uses"
4606c3fb27SDimitry Andric 
4706c3fb27SDimitry Andric namespace {
4806c3fb27SDimitry Andric 
4906c3fb27SDimitry Andric class GCNRewritePartialRegUses : public MachineFunctionPass {
5006c3fb27SDimitry Andric public:
5106c3fb27SDimitry Andric   static char ID;
5206c3fb27SDimitry Andric   GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
5306c3fb27SDimitry Andric 
5406c3fb27SDimitry Andric   StringRef getPassName() const override {
5506c3fb27SDimitry Andric     return "Rewrite Partial Register Uses";
5606c3fb27SDimitry Andric   }
5706c3fb27SDimitry Andric 
5806c3fb27SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
5906c3fb27SDimitry Andric     AU.setPreservesCFG();
60*0fca6ea1SDimitry Andric     AU.addPreserved<LiveIntervalsWrapperPass>();
61*0fca6ea1SDimitry Andric     AU.addPreserved<SlotIndexesWrapperPass>();
6206c3fb27SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
6306c3fb27SDimitry Andric   }
6406c3fb27SDimitry Andric 
6506c3fb27SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
6606c3fb27SDimitry Andric 
6706c3fb27SDimitry Andric private:
6806c3fb27SDimitry Andric   MachineRegisterInfo *MRI;
6906c3fb27SDimitry Andric   const SIRegisterInfo *TRI;
7006c3fb27SDimitry Andric   const TargetInstrInfo *TII;
7106c3fb27SDimitry Andric   LiveIntervals *LIS;
7206c3fb27SDimitry Andric 
7306c3fb27SDimitry Andric   /// Rewrite partially used register Reg by shifting all its subregisters to
7406c3fb27SDimitry Andric   /// the right and replacing the original register with a register of minimal
7506c3fb27SDimitry Andric   /// size. Return true if the change has been made.
7606c3fb27SDimitry Andric   bool rewriteReg(Register Reg) const;
7706c3fb27SDimitry Andric 
7806c3fb27SDimitry Andric   /// Value type for SubRegMap below.
7906c3fb27SDimitry Andric   struct SubRegInfo {
8006c3fb27SDimitry Andric     /// Register class required to hold the value stored in the SubReg.
8106c3fb27SDimitry Andric     const TargetRegisterClass *RC;
8206c3fb27SDimitry Andric 
8306c3fb27SDimitry Andric     /// Index for the right-shifted subregister. If 0 this is the "covering"
8406c3fb27SDimitry Andric     /// subreg i.e. subreg that covers all others. Covering subreg becomes the
8506c3fb27SDimitry Andric     /// whole register after the replacement.
8606c3fb27SDimitry Andric     unsigned SubReg = AMDGPU::NoSubRegister;
8706c3fb27SDimitry Andric     SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {}
8806c3fb27SDimitry Andric   };
8906c3fb27SDimitry Andric 
9006c3fb27SDimitry Andric   /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
91*0fca6ea1SDimitry Andric   using SubRegMap = SmallDenseMap<unsigned, SubRegInfo>;
9206c3fb27SDimitry Andric 
9306c3fb27SDimitry Andric   /// Given register class RC and the set of used subregs as keys in the SubRegs
9406c3fb27SDimitry Andric   /// map return new register class and indexes of right-shifted subregs as
9506c3fb27SDimitry Andric   /// values in SubRegs map such that the resulting regclass would contain
9606c3fb27SDimitry Andric   /// registers of minimal size.
9706c3fb27SDimitry Andric   const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
9806c3fb27SDimitry Andric                                            SubRegMap &SubRegs) const;
9906c3fb27SDimitry Andric 
10006c3fb27SDimitry Andric   /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
10106c3fb27SDimitry Andric   /// find new regclass such that:
10206c3fb27SDimitry Andric   ///   1. It has subregs obtained by shifting each OldSubReg by RShift number
10306c3fb27SDimitry Andric   ///      of bits to the right. Every "shifted" subreg should have the same
1045f757f3fSDimitry Andric   ///      SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
1055f757f3fSDimitry Andric   ///      all other subregs in pairs. Basically such subreg becomes a whole
1065f757f3fSDimitry Andric   ///      register.
10706c3fb27SDimitry Andric   ///   2. Resulting register class contains registers of minimal size but not
10806c3fb27SDimitry Andric   ///      less than RegNumBits.
10906c3fb27SDimitry Andric   ///
11006c3fb27SDimitry Andric   /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
11106c3fb27SDimitry Andric   /// parameter:
11206c3fb27SDimitry Andric   ///   OldSubReg - input parameter,
1135f757f3fSDimitry Andric   ///   SubRegRC  - input parameter (cannot be null),
11406c3fb27SDimitry Andric   ///   NewSubReg - output, contains shifted subregs on return.
11506c3fb27SDimitry Andric   const TargetRegisterClass *
11606c3fb27SDimitry Andric   getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
11706c3fb27SDimitry Andric                                 unsigned RegNumBits, unsigned CoverSubregIdx,
11806c3fb27SDimitry Andric                                 SubRegMap &SubRegs) const;
11906c3fb27SDimitry Andric 
12006c3fb27SDimitry Andric   /// Update live intervals after rewriting OldReg to NewReg with SubRegs map
12106c3fb27SDimitry Andric   /// describing OldSubReg -> NewSubReg mapping.
12206c3fb27SDimitry Andric   void updateLiveIntervals(Register OldReg, Register NewReg,
12306c3fb27SDimitry Andric                            SubRegMap &SubRegs) const;
12406c3fb27SDimitry Andric 
12506c3fb27SDimitry Andric   /// Helper methods.
12606c3fb27SDimitry Andric 
12706c3fb27SDimitry Andric   /// Return reg class expected by a MO's parent instruction for a given MO.
12806c3fb27SDimitry Andric   const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const;
12906c3fb27SDimitry Andric 
13006c3fb27SDimitry Andric   /// Find right-shifted by RShift amount version of the SubReg if it exists,
13106c3fb27SDimitry Andric   /// return 0 otherwise.
13206c3fb27SDimitry Andric   unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;
13306c3fb27SDimitry Andric 
13406c3fb27SDimitry Andric   /// Find subreg index with a given Offset and Size, return 0 if there is no
13506c3fb27SDimitry Andric   /// such subregister index. The result is cached in SubRegs data-member.
13606c3fb27SDimitry Andric   unsigned getSubReg(unsigned Offset, unsigned Size) const;
13706c3fb27SDimitry Andric 
13806c3fb27SDimitry Andric   /// Cache for getSubReg method: {Offset, Size} -> SubReg index.
13906c3fb27SDimitry Andric   mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs;
14006c3fb27SDimitry Andric 
14106c3fb27SDimitry Andric   /// Return bit mask that contains all register classes that are projected into
14206c3fb27SDimitry Andric   /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member.
14306c3fb27SDimitry Andric   const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC,
14406c3fb27SDimitry Andric                                        unsigned SubRegIdx) const;
14506c3fb27SDimitry Andric 
14606c3fb27SDimitry Andric   /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask.
14706c3fb27SDimitry Andric   mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>,
14806c3fb27SDimitry Andric                         const uint32_t *>
14906c3fb27SDimitry Andric       SuperRegMasks;
15006c3fb27SDimitry Andric 
15106c3fb27SDimitry Andric   /// Return bitmask containing all allocatable register classes with registers
15206c3fb27SDimitry Andric   /// aligned at AlignNumBits. The result is cached in
15306c3fb27SDimitry Andric   /// AllocatableAndAlignedRegClassMasks data-member.
15406c3fb27SDimitry Andric   const BitVector &
15506c3fb27SDimitry Andric   getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const;
15606c3fb27SDimitry Andric 
15706c3fb27SDimitry Andric   /// Cache for getAllocatableAndAlignedRegClassMask method:
15806c3fb27SDimitry Andric   ///   AlignNumBits -> Class bitmask.
15906c3fb27SDimitry Andric   mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
16006c3fb27SDimitry Andric };
16106c3fb27SDimitry Andric 
16206c3fb27SDimitry Andric } // end anonymous namespace
16306c3fb27SDimitry Andric 
16406c3fb27SDimitry Andric // TODO: move this to the tablegen and use binary search by Offset.
16506c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
16606c3fb27SDimitry Andric                                              unsigned Size) const {
16706c3fb27SDimitry Andric   const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
16806c3fb27SDimitry Andric   if (Inserted) {
16906c3fb27SDimitry Andric     for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
17006c3fb27SDimitry Andric       if (TRI->getSubRegIdxOffset(Idx) == Offset &&
17106c3fb27SDimitry Andric           TRI->getSubRegIdxSize(Idx) == Size) {
17206c3fb27SDimitry Andric         I->second = Idx;
17306c3fb27SDimitry Andric         break;
17406c3fb27SDimitry Andric       }
17506c3fb27SDimitry Andric     }
17606c3fb27SDimitry Andric   }
17706c3fb27SDimitry Andric   return I->second;
17806c3fb27SDimitry Andric }
17906c3fb27SDimitry Andric 
18006c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
18106c3fb27SDimitry Andric                                                unsigned RShift) const {
18206c3fb27SDimitry Andric   unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
18306c3fb27SDimitry Andric   return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
18406c3fb27SDimitry Andric }
18506c3fb27SDimitry Andric 
18606c3fb27SDimitry Andric const uint32_t *
18706c3fb27SDimitry Andric GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
18806c3fb27SDimitry Andric                                                unsigned SubRegIdx) const {
18906c3fb27SDimitry Andric   const auto [I, Inserted] =
19006c3fb27SDimitry Andric       SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
19106c3fb27SDimitry Andric   if (Inserted) {
19206c3fb27SDimitry Andric     for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) {
19306c3fb27SDimitry Andric       if (RCI.getSubReg() == SubRegIdx) {
19406c3fb27SDimitry Andric         I->second = RCI.getMask();
19506c3fb27SDimitry Andric         break;
19606c3fb27SDimitry Andric       }
19706c3fb27SDimitry Andric     }
19806c3fb27SDimitry Andric   }
19906c3fb27SDimitry Andric   return I->second;
20006c3fb27SDimitry Andric }
20106c3fb27SDimitry Andric 
20206c3fb27SDimitry Andric const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
20306c3fb27SDimitry Andric     unsigned AlignNumBits) const {
20406c3fb27SDimitry Andric   const auto [I, Inserted] =
20506c3fb27SDimitry Andric       AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
20606c3fb27SDimitry Andric   if (Inserted) {
20706c3fb27SDimitry Andric     BitVector &BV = I->second;
20806c3fb27SDimitry Andric     BV.resize(TRI->getNumRegClasses());
20906c3fb27SDimitry Andric     for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) {
21006c3fb27SDimitry Andric       auto *RC = TRI->getRegClass(ClassID);
21106c3fb27SDimitry Andric       if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits))
21206c3fb27SDimitry Andric         BV.set(ClassID);
21306c3fb27SDimitry Andric     }
21406c3fb27SDimitry Andric   }
21506c3fb27SDimitry Andric   return I->second;
21606c3fb27SDimitry Andric }
21706c3fb27SDimitry Andric 
21806c3fb27SDimitry Andric const TargetRegisterClass *
21906c3fb27SDimitry Andric GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
22006c3fb27SDimitry Andric     const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
22106c3fb27SDimitry Andric     unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
22206c3fb27SDimitry Andric 
22306c3fb27SDimitry Andric   unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
22406c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "  Shift " << RShift << ", reg align " << RCAlign
22506c3fb27SDimitry Andric                     << '\n');
22606c3fb27SDimitry Andric 
22706c3fb27SDimitry Andric   BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
22806c3fb27SDimitry Andric   for (auto &[OldSubReg, SRI] : SubRegs) {
22906c3fb27SDimitry Andric     auto &[SubRegRC, NewSubReg] = SRI;
2305f757f3fSDimitry Andric     assert(SubRegRC);
23106c3fb27SDimitry Andric 
23206c3fb27SDimitry Andric     LLVM_DEBUG(dbgs() << "  " << TRI->getSubRegIndexName(OldSubReg) << ':'
23306c3fb27SDimitry Andric                       << TRI->getRegClassName(SubRegRC)
23406c3fb27SDimitry Andric                       << (SubRegRC->isAllocatable() ? "" : " not alloc")
23506c3fb27SDimitry Andric                       << " -> ");
23606c3fb27SDimitry Andric 
23706c3fb27SDimitry Andric     if (OldSubReg == CoverSubregIdx) {
2385f757f3fSDimitry Andric       // Covering subreg will become a full register, RC should be allocatable.
2395f757f3fSDimitry Andric       assert(SubRegRC->isAllocatable());
24006c3fb27SDimitry Andric       NewSubReg = AMDGPU::NoSubRegister;
24106c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << "whole reg");
24206c3fb27SDimitry Andric     } else {
24306c3fb27SDimitry Andric       NewSubReg = shiftSubReg(OldSubReg, RShift);
24406c3fb27SDimitry Andric       if (!NewSubReg) {
24506c3fb27SDimitry Andric         LLVM_DEBUG(dbgs() << "none\n");
24606c3fb27SDimitry Andric         return nullptr;
24706c3fb27SDimitry Andric       }
24806c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg));
24906c3fb27SDimitry Andric     }
25006c3fb27SDimitry Andric 
25106c3fb27SDimitry Andric     const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
25206c3fb27SDimitry Andric                                      : SubRegRC->getSubClassMask();
25306c3fb27SDimitry Andric     if (!Mask)
25406c3fb27SDimitry Andric       llvm_unreachable("no register class mask?");
25506c3fb27SDimitry Andric 
25606c3fb27SDimitry Andric     ClassMask.clearBitsNotInMask(Mask);
25706c3fb27SDimitry Andric     // Don't try to early exit because checking if ClassMask has set bits isn't
25806c3fb27SDimitry Andric     // that cheap and we expect it to pass in most cases.
25906c3fb27SDimitry Andric     LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n');
26006c3fb27SDimitry Andric   }
26106c3fb27SDimitry Andric 
26206c3fb27SDimitry Andric   // ClassMask is the set of all register classes such that each class is
26306c3fb27SDimitry Andric   // allocatable, aligned, has all shifted subregs and each subreg has required
26406c3fb27SDimitry Andric   // register class (see SubRegRC above). Now select first (that is largest)
26506c3fb27SDimitry Andric   // register class with registers of minimal but not less than RegNumBits size.
26606c3fb27SDimitry Andric   // We have to check register size because we may encounter classes of smaller
26706c3fb27SDimitry Andric   // registers like VReg_1 in some situations.
26806c3fb27SDimitry Andric   const TargetRegisterClass *MinRC = nullptr;
26906c3fb27SDimitry Andric   unsigned MinNumBits = std::numeric_limits<unsigned>::max();
27006c3fb27SDimitry Andric   for (unsigned ClassID : ClassMask.set_bits()) {
27106c3fb27SDimitry Andric     auto *RC = TRI->getRegClass(ClassID);
27206c3fb27SDimitry Andric     unsigned NumBits = TRI->getRegSizeInBits(*RC);
27306c3fb27SDimitry Andric     if (NumBits < MinNumBits && NumBits >= RegNumBits) {
27406c3fb27SDimitry Andric       MinNumBits = NumBits;
27506c3fb27SDimitry Andric       MinRC = RC;
27606c3fb27SDimitry Andric     }
27706c3fb27SDimitry Andric     if (MinNumBits == RegNumBits)
27806c3fb27SDimitry Andric       break;
27906c3fb27SDimitry Andric   }
28006c3fb27SDimitry Andric #ifndef NDEBUG
28106c3fb27SDimitry Andric   if (MinRC) {
28206c3fb27SDimitry Andric     assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
28306c3fb27SDimitry Andric     for (auto [SubReg, SRI] : SubRegs)
28406c3fb27SDimitry Andric       // Check that all registers in MinRC support SRI.SubReg subregister.
28506c3fb27SDimitry Andric       assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
28606c3fb27SDimitry Andric   }
28706c3fb27SDimitry Andric #endif
28806c3fb27SDimitry Andric   // There might be zero RShift - in this case we just trying to find smaller
28906c3fb27SDimitry Andric   // register.
29006c3fb27SDimitry Andric   return (MinRC != RC || RShift != 0) ? MinRC : nullptr;
29106c3fb27SDimitry Andric }
29206c3fb27SDimitry Andric 
29306c3fb27SDimitry Andric const TargetRegisterClass *
29406c3fb27SDimitry Andric GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
29506c3fb27SDimitry Andric                                         SubRegMap &SubRegs) const {
29606c3fb27SDimitry Andric   unsigned CoverSubreg = AMDGPU::NoSubRegister;
29706c3fb27SDimitry Andric   unsigned Offset = std::numeric_limits<unsigned>::max();
29806c3fb27SDimitry Andric   unsigned End = 0;
29906c3fb27SDimitry Andric   for (auto [SubReg, SRI] : SubRegs) {
30006c3fb27SDimitry Andric     unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg);
30106c3fb27SDimitry Andric     unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg);
30206c3fb27SDimitry Andric     if (SubRegOffset < Offset) {
30306c3fb27SDimitry Andric       Offset = SubRegOffset;
30406c3fb27SDimitry Andric       CoverSubreg = AMDGPU::NoSubRegister;
30506c3fb27SDimitry Andric     }
30606c3fb27SDimitry Andric     if (SubRegEnd > End) {
30706c3fb27SDimitry Andric       End = SubRegEnd;
30806c3fb27SDimitry Andric       CoverSubreg = AMDGPU::NoSubRegister;
30906c3fb27SDimitry Andric     }
31006c3fb27SDimitry Andric     if (SubRegOffset == Offset && SubRegEnd == End)
31106c3fb27SDimitry Andric       CoverSubreg = SubReg;
31206c3fb27SDimitry Andric   }
31306c3fb27SDimitry Andric   // If covering subreg is found shift everything so the covering subreg would
31406c3fb27SDimitry Andric   // be in the rightmost position.
31506c3fb27SDimitry Andric   if (CoverSubreg != AMDGPU::NoSubRegister)
31606c3fb27SDimitry Andric     return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
31706c3fb27SDimitry Andric                                          SubRegs);
31806c3fb27SDimitry Andric 
31906c3fb27SDimitry Andric   // Otherwise find subreg with maximum required alignment and shift it and all
32006c3fb27SDimitry Andric   // other subregs to the rightmost possible position with respect to the
32106c3fb27SDimitry Andric   // alignment.
32206c3fb27SDimitry Andric   unsigned MaxAlign = 0;
32306c3fb27SDimitry Andric   for (auto [SubReg, SRI] : SubRegs)
32406c3fb27SDimitry Andric     MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg));
32506c3fb27SDimitry Andric 
32606c3fb27SDimitry Andric   unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
32706c3fb27SDimitry Andric   for (auto [SubReg, SRI] : SubRegs) {
32806c3fb27SDimitry Andric     if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign)
32906c3fb27SDimitry Andric       continue;
33006c3fb27SDimitry Andric     FirstMaxAlignedSubRegOffset =
33106c3fb27SDimitry Andric         std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg));
33206c3fb27SDimitry Andric     if (FirstMaxAlignedSubRegOffset == Offset)
33306c3fb27SDimitry Andric       break;
33406c3fb27SDimitry Andric   }
33506c3fb27SDimitry Andric 
33606c3fb27SDimitry Andric   unsigned NewOffsetOfMaxAlignedSubReg =
33706c3fb27SDimitry Andric       alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign);
33806c3fb27SDimitry Andric 
33906c3fb27SDimitry Andric   if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
34006c3fb27SDimitry Andric     llvm_unreachable("misaligned subreg");
34106c3fb27SDimitry Andric 
34206c3fb27SDimitry Andric   unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
34306c3fb27SDimitry Andric   return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
34406c3fb27SDimitry Andric }
34506c3fb27SDimitry Andric 
34606c3fb27SDimitry Andric // Only the subrange's lanemasks of the original interval need to be modified.
34706c3fb27SDimitry Andric // Subrange for a covering subreg becomes the main range.
34806c3fb27SDimitry Andric void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
34906c3fb27SDimitry Andric                                                    Register NewReg,
35006c3fb27SDimitry Andric                                                    SubRegMap &SubRegs) const {
35106c3fb27SDimitry Andric   if (!LIS->hasInterval(OldReg))
35206c3fb27SDimitry Andric     return;
35306c3fb27SDimitry Andric 
35406c3fb27SDimitry Andric   auto &OldLI = LIS->getInterval(OldReg);
35506c3fb27SDimitry Andric   auto &NewLI = LIS->createEmptyInterval(NewReg);
35606c3fb27SDimitry Andric 
35706c3fb27SDimitry Andric   auto &Allocator = LIS->getVNInfoAllocator();
35806c3fb27SDimitry Andric   NewLI.setWeight(OldLI.weight());
35906c3fb27SDimitry Andric 
36006c3fb27SDimitry Andric   for (auto &SR : OldLI.subranges()) {
36106c3fb27SDimitry Andric     auto I = find_if(SubRegs, [&](auto &P) {
36206c3fb27SDimitry Andric       return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first);
36306c3fb27SDimitry Andric     });
36406c3fb27SDimitry Andric 
36506c3fb27SDimitry Andric     if (I == SubRegs.end()) {
36606c3fb27SDimitry Andric       // There might be a situation when subranges don't exactly match used
36706c3fb27SDimitry Andric       // subregs, for example:
36806c3fb27SDimitry Andric       // %120 [160r,1392r:0) 0@160r
36906c3fb27SDimitry Andric       //    L000000000000C000 [160r,1392r:0) 0@160r
37006c3fb27SDimitry Andric       //    L0000000000003000 [160r,1392r:0) 0@160r
37106c3fb27SDimitry Andric       //    L0000000000000C00 [160r,1392r:0) 0@160r
37206c3fb27SDimitry Andric       //    L0000000000000300 [160r,1392r:0) 0@160r
37306c3fb27SDimitry Andric       //    L0000000000000003 [160r,1104r:0) 0@160r
37406c3fb27SDimitry Andric       //    L000000000000000C [160r,1104r:0) 0@160r
37506c3fb27SDimitry Andric       //    L0000000000000030 [160r,1104r:0) 0@160r
37606c3fb27SDimitry Andric       //    L00000000000000C0 [160r,1104r:0) 0@160r
37706c3fb27SDimitry Andric       // but used subregs are:
37806c3fb27SDimitry Andric       //    sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF
37906c3fb27SDimitry Andric       //    sub0_sub1_sub2_sub3, L00000000000000FF
38006c3fb27SDimitry Andric       //    sub4_sub5_sub6_sub7, L000000000000FF00
38106c3fb27SDimitry Andric       // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7
38206c3fb27SDimitry Andric       // have several subranges with the same lifetime. For such cases just
38306c3fb27SDimitry Andric       // recreate the interval.
38406c3fb27SDimitry Andric       LIS->removeInterval(OldReg);
38506c3fb27SDimitry Andric       LIS->removeInterval(NewReg);
38606c3fb27SDimitry Andric       LIS->createAndComputeVirtRegInterval(NewReg);
38706c3fb27SDimitry Andric       return;
38806c3fb27SDimitry Andric     }
38906c3fb27SDimitry Andric 
39006c3fb27SDimitry Andric     if (unsigned NewSubReg = I->second.SubReg)
39106c3fb27SDimitry Andric       NewLI.createSubRangeFrom(Allocator,
39206c3fb27SDimitry Andric                                TRI->getSubRegIndexLaneMask(NewSubReg), SR);
39306c3fb27SDimitry Andric     else // This is the covering subreg (0 index) - set it as main range.
39406c3fb27SDimitry Andric       NewLI.assign(SR, Allocator);
39506c3fb27SDimitry Andric 
39606c3fb27SDimitry Andric     SubRegs.erase(I);
39706c3fb27SDimitry Andric   }
39806c3fb27SDimitry Andric   if (NewLI.empty())
39906c3fb27SDimitry Andric     NewLI.assign(OldLI, Allocator);
40006c3fb27SDimitry Andric   NewLI.verify(MRI);
40106c3fb27SDimitry Andric   LIS->removeInterval(OldReg);
40206c3fb27SDimitry Andric }
40306c3fb27SDimitry Andric 
40406c3fb27SDimitry Andric const TargetRegisterClass *
40506c3fb27SDimitry Andric GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
40606c3fb27SDimitry Andric   MachineInstr *MI = MO.getParent();
40706c3fb27SDimitry Andric   return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
40806c3fb27SDimitry Andric                           *MI->getParent()->getParent());
40906c3fb27SDimitry Andric }
41006c3fb27SDimitry Andric 
41106c3fb27SDimitry Andric bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
41206c3fb27SDimitry Andric   auto Range = MRI->reg_nodbg_operands(Reg);
4135f757f3fSDimitry Andric   if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
4145f757f3fSDimitry Andric         return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
4155f757f3fSDimitry Andric       }))
41606c3fb27SDimitry Andric     return false;
41706c3fb27SDimitry Andric 
41806c3fb27SDimitry Andric   auto *RC = MRI->getRegClass(Reg);
41906c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
42006c3fb27SDimitry Andric                     << ':' << TRI->getRegClassName(RC) << '\n');
42106c3fb27SDimitry Andric 
4225f757f3fSDimitry Andric   // Collect used subregs and their reg classes infered from instruction
42306c3fb27SDimitry Andric   // operands.
42406c3fb27SDimitry Andric   SubRegMap SubRegs;
4255f757f3fSDimitry Andric   for (MachineOperand &MO : Range) {
4265f757f3fSDimitry Andric     const unsigned SubReg = MO.getSubReg();
4275f757f3fSDimitry Andric     assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
4285f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "  " << TRI->getSubRegIndexName(SubReg) << ':');
4295f757f3fSDimitry Andric 
4305f757f3fSDimitry Andric     const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
4315f757f3fSDimitry Andric     const TargetRegisterClass *&SubRegRC = I->second.RC;
4325f757f3fSDimitry Andric 
4335f757f3fSDimitry Andric     if (Inserted)
4345f757f3fSDimitry Andric       SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
4355f757f3fSDimitry Andric 
4365f757f3fSDimitry Andric     if (SubRegRC) {
4375f757f3fSDimitry Andric       if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
4385f757f3fSDimitry Andric         LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
4395f757f3fSDimitry Andric                           << TRI->getRegClassName(OpDescRC) << " = ");
4405f757f3fSDimitry Andric         SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
4415f757f3fSDimitry Andric       }
4425f757f3fSDimitry Andric     }
4435f757f3fSDimitry Andric 
4445f757f3fSDimitry Andric     if (!SubRegRC) {
4455f757f3fSDimitry Andric       LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
44606c3fb27SDimitry Andric       return false;
44706c3fb27SDimitry Andric     }
4485f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
44906c3fb27SDimitry Andric   }
45006c3fb27SDimitry Andric 
45106c3fb27SDimitry Andric   auto *NewRC = getMinSizeReg(RC, SubRegs);
45206c3fb27SDimitry Andric   if (!NewRC) {
45306c3fb27SDimitry Andric     LLVM_DEBUG(dbgs() << "  No improvement achieved\n");
45406c3fb27SDimitry Andric     return false;
45506c3fb27SDimitry Andric   }
45606c3fb27SDimitry Andric 
45706c3fb27SDimitry Andric   Register NewReg = MRI->createVirtualRegister(NewRC);
45806c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "  Success " << printReg(Reg, TRI) << ':'
45906c3fb27SDimitry Andric                     << TRI->getRegClassName(RC) << " -> "
46006c3fb27SDimitry Andric                     << printReg(NewReg, TRI) << ':'
46106c3fb27SDimitry Andric                     << TRI->getRegClassName(NewRC) << '\n');
46206c3fb27SDimitry Andric 
46306c3fb27SDimitry Andric   for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) {
46406c3fb27SDimitry Andric     MO.setReg(NewReg);
46506c3fb27SDimitry Andric     // Debug info can refer to the whole reg, just leave it as it is for now.
46606c3fb27SDimitry Andric     // TODO: create some DI shift expression?
46706c3fb27SDimitry Andric     if (MO.isDebug() && MO.getSubReg() == 0)
46806c3fb27SDimitry Andric       continue;
46906c3fb27SDimitry Andric     unsigned SubReg = SubRegs[MO.getSubReg()].SubReg;
47006c3fb27SDimitry Andric     MO.setSubReg(SubReg);
47106c3fb27SDimitry Andric     if (SubReg == AMDGPU::NoSubRegister && MO.isDef())
47206c3fb27SDimitry Andric       MO.setIsUndef(false);
47306c3fb27SDimitry Andric   }
47406c3fb27SDimitry Andric 
47506c3fb27SDimitry Andric   if (LIS)
47606c3fb27SDimitry Andric     updateLiveIntervals(Reg, NewReg, SubRegs);
47706c3fb27SDimitry Andric 
47806c3fb27SDimitry Andric   return true;
47906c3fb27SDimitry Andric }
48006c3fb27SDimitry Andric 
48106c3fb27SDimitry Andric bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
48206c3fb27SDimitry Andric   MRI = &MF.getRegInfo();
48306c3fb27SDimitry Andric   TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
48406c3fb27SDimitry Andric   TII = MF.getSubtarget().getInstrInfo();
485*0fca6ea1SDimitry Andric   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
486*0fca6ea1SDimitry Andric   LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
48706c3fb27SDimitry Andric   bool Changed = false;
48806c3fb27SDimitry Andric   for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
48906c3fb27SDimitry Andric     Changed |= rewriteReg(Register::index2VirtReg(I));
49006c3fb27SDimitry Andric   }
49106c3fb27SDimitry Andric   return Changed;
49206c3fb27SDimitry Andric }
49306c3fb27SDimitry Andric 
49406c3fb27SDimitry Andric char GCNRewritePartialRegUses::ID;
49506c3fb27SDimitry Andric 
49606c3fb27SDimitry Andric char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
49706c3fb27SDimitry Andric 
49806c3fb27SDimitry Andric INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
49906c3fb27SDimitry Andric                       "Rewrite Partial Register Uses", false, false)
50006c3fb27SDimitry Andric INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
50106c3fb27SDimitry Andric                     "Rewrite Partial Register Uses", false, false)
502