xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (revision e28e93550a74752714db6fffe50233aa96e536a5)
1 //===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
16 
17 #include "GCNSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIModeRegisterDefaults.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/CodeGen/SelectionDAGISel.h"
22 #include "llvm/Target/TargetMachine.h"
23 
24 namespace llvm {
25 
26 static inline bool getConstantValue(SDValue N, uint32_t &Out) {
27   // This is only used for packed vectors, where using 0 for undef should
28   // always be good.
29   if (N.isUndef()) {
30     Out = 0;
31     return true;
32   }
33 
34   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
35     Out = C->getAPIntValue().getSExtValue();
36     return true;
37   }
38 
39   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
40     Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
41     return true;
42   }
43 
44   return false;
45 }
46 
47 // TODO: Handle undef as zero
48 static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
49   assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
50   uint32_t LHSVal, RHSVal;
51   if (getConstantValue(N->getOperand(0), LHSVal) &&
52       getConstantValue(N->getOperand(1), RHSVal)) {
53     SDLoc SL(N);
54     uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
55     return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
56                               DAG.getTargetConstant(K, SL, MVT::i32));
57   }
58 
59   return nullptr;
60 }
61 
62 /// AMDGPU specific code to select AMDGPU machine instructions for
63 /// SelectionDAG operations.
64 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
65   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
66   // make the right decision when generating code for different targets.
67   const GCNSubtarget *Subtarget;
68 
69   // Default FP mode for the current function.
70   SIModeRegisterDefaults Mode;
71 
72   // Instructions that will be lowered with a final instruction that zeros the
73   // high result bits.
74   bool fp16SrcZerosHighBits(unsigned Opc) const;
75 
76 public:
77   AMDGPUDAGToDAGISel() = delete;
78 
79   explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOptLevel OptLevel);
80 
81   bool runOnMachineFunction(MachineFunction &MF) override;
82   bool matchLoadD16FromBuildVector(SDNode *N) const;
83   void PreprocessISelDAG() override;
84   void Select(SDNode *N) override;
85   void PostprocessISelDAG() override;
86 
87 protected:
88   void SelectBuildVector(SDNode *N, unsigned RegClassID);
89   void SelectVectorShuffle(SDNode *N);
90 
91 private:
92   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
93 
94   bool isInlineImmediate(const SDNode *N) const;
95 
96   bool isInlineImmediate(const APInt &Imm) const {
97     return Subtarget->getInstrInfo()->isInlineConstant(Imm);
98   }
99 
100   bool isInlineImmediate(const APFloat &Imm) const {
101     return Subtarget->getInstrInfo()->isInlineConstant(Imm);
102   }
103 
104   bool isVGPRImm(const SDNode *N) const;
105   bool isUniformLoad(const SDNode *N) const;
106   bool isUniformBr(const SDNode *N) const;
107 
108   // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's
109   // `ShAmtBits` bits is unneeded.
110   bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const;
111 
112   bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
113                                   SDValue &RHS) const;
114 
115   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
116 
117   SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
118   SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
119   SDNode *glueCopyToM0LDSInit(SDNode *N) const;
120 
121   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
122   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
123   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
124   bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
125   bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
126                         unsigned Size) const;
127 
128   bool isFlatScratchBaseLegal(SDValue Addr) const;
129   bool isFlatScratchBaseLegalSV(SDValue Addr) const;
130   bool isFlatScratchBaseLegalSVImm(SDValue Addr) const;
131   bool isSOffsetLegalWithImmOffset(SDValue *SOffset, bool Imm32Only,
132                                    bool IsBuffer, int64_t ImmOffset = 0) const;
133 
134   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
135   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
136                                  SDValue &Offset1) const;
137   bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
138                                   SDValue &Offset1) const;
139   bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
140                           SDValue &Offset1, unsigned Size) const;
141   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
142                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
143                    SDValue &Idxen, SDValue &Addr64) const;
144   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
145                          SDValue &SOffset, SDValue &Offset) const;
146   bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
147                                SDValue &VAddr, SDValue &SOffset,
148                                SDValue &ImmOffset) const;
149   bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
150                                 SDValue &Soffset, SDValue &Offset) const;
151 
152   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
153                          SDValue &Offset) const;
154   bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;
155 
156   bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
157                             SDValue &Offset, uint64_t FlatVariant) const;
158   bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
159                         SDValue &Offset) const;
160   bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
161                           SDValue &Offset) const;
162   bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
163                            SDValue &Offset) const;
164   bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
165                          SDValue &VOffset, SDValue &Offset) const;
166   bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
167                           SDValue &Offset) const;
168   bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
169                                      uint64_t ImmOffset) const;
170   bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
171                            SDValue &SAddr, SDValue &Offset) const;
172 
173   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
174                         SDValue *Offset, bool Imm32Only = false,
175                         bool IsBuffer = false, bool HasSOffset = false,
176                         int64_t ImmOffset = 0) const;
177   SDValue Expand32BitAddress(SDValue Addr) const;
178   bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
179                             SDValue *Offset, bool Imm32Only = false,
180                             bool IsBuffer = false, bool HasSOffset = false,
181                             int64_t ImmOffset = 0) const;
182   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
183                   SDValue *Offset, bool Imm32Only = false) const;
184   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
185   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
186   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
187   bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
188                          SDValue &Offset) const;
189   bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
190   bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
191   bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
192                                SDValue &Offset) const;
193   bool SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase,
194                              SDValue &Offset) const;
195   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
196 
197   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
198                           bool IsCanonicalizing = true,
199                           bool AllowAbs = true) const;
200   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
201   bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src,
202                                        SDValue &SrcMods) const;
203   bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
204   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
205   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
206                        SDValue &Clamp, SDValue &Omod) const;
207   bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
208                         SDValue &Clamp, SDValue &Omod) const;
209   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
210                          SDValue &Clamp, SDValue &Omod) const;
211 
212   bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods,
213                              bool OpSel) const;
214   bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
215   bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const;
216 
217   bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
218                        SDValue &Omod) const;
219 
220   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
221                        bool IsDOT = false) const;
222   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
223 
224   bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
225   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
226 
227   bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
228                                SDValue &SrcMods) const;
229   bool SelectWMMAModsF16Neg(SDValue In, SDValue &Src, SDValue &SrcMods) const;
230   bool SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
231                                SDValue &SrcMods) const;
232   bool SelectWMMAVISrc(SDValue In, SDValue &Src) const;
233 
234   bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const;
235   bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const;
236 
237   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
238 
239   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
240   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
241                                  unsigned &Mods) const;
242   bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
243                                 SDValue &SrcMods) const;
244   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
245 
246   bool SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1, SDValue &Src2,
247                    SDValue &Tbl) const;
248 
249   SDValue getHi16Elt(SDValue In) const;
250 
251   SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
252 
253   void SelectADD_SUB_I64(SDNode *N);
254   void SelectAddcSubb(SDNode *N);
255   void SelectUADDO_USUBO(SDNode *N);
256   void SelectDIV_SCALE(SDNode *N);
257   void SelectMAD_64_32(SDNode *N);
258   void SelectMUL_LOHI(SDNode *N);
259   void SelectFMA_W_CHAIN(SDNode *N);
260   void SelectFMUL_W_CHAIN(SDNode *N);
261   SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
262                    uint32_t Width);
263   void SelectS_BFEFromShifts(SDNode *N);
264   void SelectS_BFE(SDNode *N);
265   bool isCBranchSCC(const SDNode *N) const;
266   void SelectBRCOND(SDNode *N);
267   void SelectFMAD_FMA(SDNode *N);
268   void SelectFP_EXTEND(SDNode *N);
269   void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
270   void SelectDSBvhStackIntrinsic(SDNode *N);
271   void SelectDS_GWS(SDNode *N, unsigned IntrID);
272   void SelectInterpP1F16(SDNode *N);
273   void SelectINTRINSIC_W_CHAIN(SDNode *N);
274   void SelectINTRINSIC_WO_CHAIN(SDNode *N);
275   void SelectINTRINSIC_VOID(SDNode *N);
276   void SelectWAVE_ADDRESS(SDNode *N);
277   void SelectSTACKRESTORE(SDNode *N);
278 
279 protected:
280   // Include the pieces autogenerated from the target description.
281 #include "AMDGPUGenDAGISel.inc"
282 };
283 
284 class AMDGPUISelDAGToDAGPass : public SelectionDAGISelPass {
285 public:
286   AMDGPUISelDAGToDAGPass(TargetMachine &TM);
287 
288   PreservedAnalyses run(MachineFunction &MF,
289                         MachineFunctionAnalysisManager &MFAM);
290 };
291 
292 class AMDGPUDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
293 public:
294   static char ID;
295 
296   AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel);
297 
298   bool runOnMachineFunction(MachineFunction &MF) override;
299   void getAnalysisUsage(AnalysisUsage &AU) const override;
300   StringRef getPassName() const override;
301 };
302 
303 } // namespace llvm
304 
305 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
306