1 //===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
16
17 #include "GCNSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "llvm/CodeGen/SelectionDAGISel.h"
20 #include "llvm/Target/TargetMachine.h"
21
22 using namespace llvm;
23
24 namespace {
25
isNullConstantOrUndef(SDValue V)26 static inline bool isNullConstantOrUndef(SDValue V) {
27 if (V.isUndef())
28 return true;
29
30 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
31 return Const != nullptr && Const->isZero();
32 }
33
getConstantValue(SDValue N,uint32_t & Out)34 static inline bool getConstantValue(SDValue N, uint32_t &Out) {
35 // This is only used for packed vectors, where using 0 for undef should
36 // always be good.
37 if (N.isUndef()) {
38 Out = 0;
39 return true;
40 }
41
42 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
43 Out = C->getAPIntValue().getSExtValue();
44 return true;
45 }
46
47 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
48 Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
49 return true;
50 }
51
52 return false;
53 }
54
55 // TODO: Handle undef as zero
56 static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
57 bool Negate = false) {
58 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
59 uint32_t LHSVal, RHSVal;
60 if (getConstantValue(N->getOperand(0), LHSVal) &&
61 getConstantValue(N->getOperand(1), RHSVal)) {
62 SDLoc SL(N);
63 uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
64 : (LHSVal & 0xffff) | (RHSVal << 16);
65 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
66 DAG.getTargetConstant(K, SL, MVT::i32));
67 }
68
69 return nullptr;
70 }
71
packNegConstantV2I16(const SDNode * N,SelectionDAG & DAG)72 static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
73 return packConstantV2I16(N, DAG, true);
74 }
75 } // namespace
76
77 /// AMDGPU specific code to select AMDGPU machine instructions for
78 /// SelectionDAG operations.
79 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
80 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
81 // make the right decision when generating code for different targets.
82 const GCNSubtarget *Subtarget;
83
84 // Default FP mode for the current function.
85 AMDGPU::SIModeRegisterDefaults Mode;
86
87 bool EnableLateStructurizeCFG;
88
89 // Instructions that will be lowered with a final instruction that zeros the
90 // high result bits.
91 bool fp16SrcZerosHighBits(unsigned Opc) const;
92
93 public:
94 static char ID;
95
96 AMDGPUDAGToDAGISel() = delete;
97
98 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel);
99 ~AMDGPUDAGToDAGISel() override = default;
100
101 void getAnalysisUsage(AnalysisUsage &AU) const override;
102
103 bool matchLoadD16FromBuildVector(SDNode *N) const;
104
105 bool runOnMachineFunction(MachineFunction &MF) override;
106 void PreprocessISelDAG() override;
107 void Select(SDNode *N) override;
108 StringRef getPassName() const override;
109 void PostprocessISelDAG() override;
110
111 protected:
112 void SelectBuildVector(SDNode *N, unsigned RegClassID);
113
114 private:
115 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
116 bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
isNegInlineImmediate(const SDNode * N)117 bool isNegInlineImmediate(const SDNode *N) const {
118 return isInlineImmediate(N, true);
119 }
120
isInlineImmediate16(int64_t Imm)121 bool isInlineImmediate16(int64_t Imm) const {
122 return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
123 }
124
isInlineImmediate32(int64_t Imm)125 bool isInlineImmediate32(int64_t Imm) const {
126 return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
127 }
128
isInlineImmediate64(int64_t Imm)129 bool isInlineImmediate64(int64_t Imm) const {
130 return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
131 }
132
isInlineImmediate(const APFloat & Imm)133 bool isInlineImmediate(const APFloat &Imm) const {
134 return Subtarget->getInstrInfo()->isInlineConstant(Imm);
135 }
136
137 bool isVGPRImm(const SDNode *N) const;
138 bool isUniformLoad(const SDNode *N) const;
139 bool isUniformBr(const SDNode *N) const;
140
141 // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's
142 // `ShAmtBits` bits is unneeded.
143 bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const;
144
145 bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
146 SDValue &RHS) const;
147
148 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
149
150 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
151 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
152 SDNode *glueCopyToM0LDSInit(SDNode *N) const;
153
154 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
155 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
156 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
157 bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
158 bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
159 unsigned Size) const;
160 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
161 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
162 SDValue &Offset1) const;
163 bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
164 SDValue &Offset1) const;
165 bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
166 SDValue &Offset1, unsigned Size) const;
167 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
168 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
169 SDValue &Idxen, SDValue &Addr64) const;
170 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
171 SDValue &SOffset, SDValue &Offset) const;
172 bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
173 SDValue &VAddr, SDValue &SOffset,
174 SDValue &ImmOffset) const;
175 bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
176 SDValue &Soffset, SDValue &Offset) const;
177
178 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
179 SDValue &Offset) const;
180
181 bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
182 SDValue &Offset, uint64_t FlatVariant) const;
183 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
184 SDValue &Offset) const;
185 bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
186 SDValue &Offset) const;
187 bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
188 SDValue &Offset) const;
189 bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
190 SDValue &VOffset, SDValue &Offset) const;
191 bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
192 SDValue &Offset) const;
193 bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
194 uint64_t ImmOffset) const;
195 bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
196 SDValue &SAddr, SDValue &Offset) const;
197
198 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
199 SDValue *Offset, bool Imm32Only = false,
200 bool IsBuffer = false) const;
201 SDValue Expand32BitAddress(SDValue Addr) const;
202 bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
203 SDValue *Offset, bool Imm32Only = false,
204 bool IsBuffer = false) const;
205 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
206 SDValue *Offset, bool Imm32Only = false) const;
207 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
208 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
209 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
210 bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
211 SDValue &Offset) const;
212 bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
213 bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
214 bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
215 SDValue &Offset) const;
216 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
217
218 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
219 bool AllowAbs = true) const;
220 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
221 bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
222 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
223 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
224 SDValue &Clamp, SDValue &Omod) const;
225 bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
226 SDValue &Clamp, SDValue &Omod) const;
227 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
228 SDValue &Clamp, SDValue &Omod) const;
229
230 bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods,
231 bool OpSel) const;
232 bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
233 bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234
235 bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
236 SDValue &Omod) const;
237
238 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
239 bool IsDOT = false) const;
240 bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
241
242 bool SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const;
243 bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
244
245 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
246
247 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
248 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
249 unsigned &Mods) const;
250 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
251
252 SDValue getHi16Elt(SDValue In) const;
253
254 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
255
256 void SelectADD_SUB_I64(SDNode *N);
257 void SelectAddcSubb(SDNode *N);
258 void SelectUADDO_USUBO(SDNode *N);
259 void SelectDIV_SCALE(SDNode *N);
260 void SelectMAD_64_32(SDNode *N);
261 void SelectMUL_LOHI(SDNode *N);
262 void SelectFMA_W_CHAIN(SDNode *N);
263 void SelectFMUL_W_CHAIN(SDNode *N);
264 SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
265 uint32_t Width);
266 void SelectS_BFEFromShifts(SDNode *N);
267 void SelectS_BFE(SDNode *N);
268 bool isCBranchSCC(const SDNode *N) const;
269 void SelectBRCOND(SDNode *N);
270 void SelectFMAD_FMA(SDNode *N);
271 void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
272 void SelectDSBvhStackIntrinsic(SDNode *N);
273 void SelectDS_GWS(SDNode *N, unsigned IntrID);
274 void SelectInterpP1F16(SDNode *N);
275 void SelectINTRINSIC_W_CHAIN(SDNode *N);
276 void SelectINTRINSIC_WO_CHAIN(SDNode *N);
277 void SelectINTRINSIC_VOID(SDNode *N);
278
279 protected:
280 // Include the pieces autogenerated from the target description.
281 #include "AMDGPUGenDAGISel.inc"
282 };
283
284 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
285