xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp (revision 68d90cff580fe181ad28247584d32837f3b9940e)
1 //===- AMDGPUGlobalISelUtils.cpp ---------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUGlobalISelUtils.h"
10 #include "AMDGPURegisterBankInfo.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "llvm/ADT/DenseSet.h"
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
16 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
17 #include "llvm/CodeGenTypes/LowLevelType.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 
21 using namespace llvm;
22 using namespace AMDGPU;
23 using namespace MIPatternMatch;
24 
25 std::pair<Register, unsigned>
26 AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg,
27                                   GISelKnownBits *KnownBits, bool CheckNUW) {
28   MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
29   if (Def->getOpcode() == TargetOpcode::G_CONSTANT) {
30     unsigned Offset;
31     const MachineOperand &Op = Def->getOperand(1);
32     if (Op.isImm())
33       Offset = Op.getImm();
34     else
35       Offset = Op.getCImm()->getZExtValue();
36 
37     return std::pair(Register(), Offset);
38   }
39 
40   int64_t Offset;
41   if (Def->getOpcode() == TargetOpcode::G_ADD) {
42     // A 32-bit (address + offset) should not cause unsigned 32-bit integer
43     // wraparound, because s_load instructions perform the addition in 64 bits.
44     if (CheckNUW && !Def->getFlag(MachineInstr::NoUWrap)) {
45       assert(MRI.getType(Reg).getScalarSizeInBits() == 32);
46       return std::pair(Reg, 0);
47     }
48     // TODO: Handle G_OR used for add case
49     if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset)))
50       return std::pair(Def->getOperand(1).getReg(), Offset);
51 
52     // FIXME: matcher should ignore copies
53     if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset))))
54       return std::pair(Def->getOperand(1).getReg(), Offset);
55   }
56 
57   Register Base;
58   if (KnownBits && mi_match(Reg, MRI, m_GOr(m_Reg(Base), m_ICst(Offset))) &&
59       KnownBits->maskedValueIsZero(Base, APInt(32, Offset, /*isSigned=*/true)))
60     return std::pair(Base, Offset);
61 
62   // Handle G_PTRTOINT (G_PTR_ADD base, const) case
63   if (Def->getOpcode() == TargetOpcode::G_PTRTOINT) {
64     MachineInstr *Base;
65     if (mi_match(Def->getOperand(1).getReg(), MRI,
66                  m_GPtrAdd(m_MInstr(Base), m_ICst(Offset)))) {
67       // If Base was int converted to pointer, simply return int and offset.
68       if (Base->getOpcode() == TargetOpcode::G_INTTOPTR)
69         return std::pair(Base->getOperand(1).getReg(), Offset);
70 
71       // Register returned here will be of pointer type.
72       return std::pair(Base->getOperand(0).getReg(), Offset);
73     }
74   }
75 
76   return std::pair(Reg, 0);
77 }
78 
79 IntrinsicLaneMaskAnalyzer::IntrinsicLaneMaskAnalyzer(MachineFunction &MF)
80     : MRI(MF.getRegInfo()) {
81   initLaneMaskIntrinsics(MF);
82 }
83 
84 bool IntrinsicLaneMaskAnalyzer::isS32S64LaneMask(Register Reg) const {
85   return S32S64LaneMask.contains(Reg);
86 }
87 
88 void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) {
89   for (auto &MBB : MF) {
90     for (auto &MI : MBB) {
91       GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI);
92       if (GI && GI->is(Intrinsic::amdgcn_if_break)) {
93         S32S64LaneMask.insert(MI.getOperand(3).getReg());
94         findLCSSAPhi(MI.getOperand(0).getReg());
95       }
96 
97       if (MI.getOpcode() == AMDGPU::SI_IF ||
98           MI.getOpcode() == AMDGPU::SI_ELSE) {
99         findLCSSAPhi(MI.getOperand(0).getReg());
100       }
101     }
102   }
103 }
104 
105 void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) {
106   S32S64LaneMask.insert(Reg);
107   for (const MachineInstr &LCSSAPhi : MRI.use_instructions(Reg)) {
108     if (LCSSAPhi.isPHI())
109       S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg());
110   }
111 }
112 
113 static LLT getReadAnyLaneSplitTy(LLT Ty) {
114   if (Ty.isVector()) {
115     LLT ElTy = Ty.getElementType();
116     if (ElTy.getSizeInBits() == 16)
117       return LLT::fixed_vector(2, ElTy);
118     // S32, S64 or pointer
119     return ElTy;
120   }
121 
122   // Large scalars and 64-bit pointers
123   return LLT::scalar(32);
124 }
125 
126 static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
127                                  const RegisterBankInfo &RBI);
128 
129 static void unmergeReadAnyLane(MachineIRBuilder &B,
130                                SmallVectorImpl<Register> &SgprDstParts,
131                                LLT UnmergeTy, Register VgprSrc,
132                                const RegisterBankInfo &RBI) {
133   const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
134   auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
135   for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
136     SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
137   }
138 }
139 
140 static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
141                                  const RegisterBankInfo &RBI) {
142   LLT Ty = B.getMRI()->getType(VgprSrc);
143   const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
144   if (Ty.getSizeInBits() == 32) {
145     return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}}, {VgprSrc})
146         .getReg(0);
147   }
148 
149   SmallVector<Register, 8> SgprDstParts;
150   unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
151 
152   return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
153 }
154 
155 void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
156                               Register VgprSrc, const RegisterBankInfo &RBI) {
157   LLT Ty = B.getMRI()->getType(VgprSrc);
158   if (Ty.getSizeInBits() == 32) {
159     B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
160     return;
161   }
162 
163   SmallVector<Register, 8> SgprDstParts;
164   unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
165 
166   B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
167 }
168