xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (revision 01c9a14ccf98dba257bb36d9e9242b0bf5cdcaf2)
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21  int Constant32Bit = 6;
22}
23
24def AddrSpaces : AddressSpacesImpl;
25
26
27class AMDGPUInst <dag outs, dag ins, string asm = "",
28  list<dag> pattern = []> : Instruction {
29  field bit isRegisterLoad = 0;
30  field bit isRegisterStore = 0;
31
32  let Namespace = "AMDGPU";
33  let OutOperandList = outs;
34  let InOperandList = ins;
35  let AsmString = asm;
36  let Pattern = pattern;
37  let Itinerary = NullALU;
38
39  // SoftFail is a field the disassembler can use to provide a way for
40  // instructions to not match without killing the whole decode process. It is
41  // mainly used for ARM, but Tablegen expects this field to exist or it fails
42  // to build the decode table.
43  field bits<128> SoftFail = 0; // FIXME: If this is smaller than largest instruction, DecodeEmitter crashes
44
45  let DecoderNamespace = Namespace;
46
47  let TSFlags{63} = isRegisterLoad;
48  let TSFlags{62} = isRegisterStore;
49}
50
51class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
52  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
53
54  field bits<32> Inst = 0xffffffff;
55}
56
57//===---------------------------------------------------------------------===//
58// Return instruction
59//===---------------------------------------------------------------------===//
60
61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
62: Instruction {
63
64     let Namespace = "AMDGPU";
65     dag OutOperandList = outs;
66     dag InOperandList = ins;
67     let Pattern = pattern;
68     let AsmString = !strconcat(asmstr, "\n");
69     let isPseudo = 1;
70     let Itinerary = NullALU;
71     bit hasIEEEFlag = 0;
72     bit hasZeroOpFlag = 0;
73     let mayLoad = 0;
74     let mayStore = 0;
75     let hasSideEffects = 0;
76     let isCodeGenOnly = 1;
77}
78
79// Get the union of two Register lists
80class RegListUnion<list<Register> lstA, list<Register> lstB> {
81  list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA));
82}
83
84class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
85      PredicateControl, GISelFlags;
86
87let GIIgnoreCopies = 1 in
88class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
89
90let RecomputePerFunction = 1 in {
91def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
92def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
93def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
94def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
95def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
96def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
97def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
98}
99
100def FMA : Predicate<"Subtarget->hasFMA()">;
101
102def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
103
104def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>;
105
106class CustomOperandClass<string name, bit optional, string predicateMethod,
107                         string parserMethod, string defaultMethod>
108    : AsmOperandClass {
109  let Name = name;
110  let PredicateMethod = predicateMethod;
111  let ParserMethod = parserMethod;
112  let RenderMethod = "addImmOperands";
113  let IsOptional = optional;
114  let DefaultMethod = defaultMethod;
115}
116
117class CustomOperandProps<bit optional = 0, string name = NAME> {
118  string ImmTy = "ImmTy"#name;
119  string PredicateMethod = "is"#name;
120  string ParserMethod = "parse"#name;
121  string DefaultValue = "0";
122  string DefaultMethod = "[this]() { return "#
123    "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "#
124    "AMDGPUOperand::"#ImmTy#"); }";
125  string PrintMethod = "print"#name;
126  AsmOperandClass ParserMatchClass =
127    CustomOperandClass<name, optional, PredicateMethod, ParserMethod,
128                       DefaultMethod>;
129  string OperandType = "OPERAND_IMMEDIATE";
130}
131
132class CustomOperand<ValueType type, bit optional = 0, string name = NAME>
133  : Operand<type>, CustomOperandProps<optional, name>;
134
135class ImmOperand<ValueType type, string name = NAME, bit optional = 0,
136                 string printer = "print"#name>
137    : CustomOperand<type, optional, name> {
138  let ImmTy = "ImmTyNone";
139  let ParserMethod = "";
140  let PrintMethod = printer;
141}
142
143class S16ImmOperand : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">;
144
145def s16imm : S16ImmOperand;
146def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">;
147
148class ValuePredicatedOperand<CustomOperand op, string valuePredicate,
149                             bit optional = 0>
150    : CustomOperand<op.Type, optional> {
151  let ImmTy = op.ImmTy;
152  defvar OpPredicate = op.ParserMatchClass.PredicateMethod;
153  let PredicateMethod =
154    "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
155    "return Op."#OpPredicate#"() && "#valuePredicate#"; })";
156  let ParserMethod = op.ParserMatchClass.ParserMethod;
157  let DefaultValue = op.DefaultValue;
158  let DefaultMethod = op.DefaultMethod;
159  let PrintMethod = op.PrintMethod;
160}
161
162//===--------------------------------------------------------------------===//
163// Custom Operands
164//===--------------------------------------------------------------------===//
165def brtarget   : Operand<OtherVT>;
166
167//===----------------------------------------------------------------------===//
168// Misc. PatFrags
169//===----------------------------------------------------------------------===//
170
171class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
172  (ops node:$src0),
173  (op $src0)> {
174  let HasOneUse = 1;
175}
176
177class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
178  (ops node:$src0, node:$src1),
179  (op $src0, $src1)> {
180  let HasOneUse = 1;
181}
182
183class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
184  (ops node:$src0, node:$src1, node:$src2),
185  (op $src0, $src1, $src2)> {
186  let HasOneUse = 1;
187}
188
189class is_canonicalized_1<SDPatternOperator op> : PatFrag<
190  (ops node:$src0),
191  (op $src0),
192  [{
193    const SITargetLowering &Lowering =
194              *static_cast<const SITargetLowering *>(getTargetLowering());
195
196    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0));
197   }]> {
198
199  let GISelPredicateCode = [{
200    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
201      MF.getSubtarget().getTargetLowering());
202
203    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF);
204  }];
205}
206
207class is_canonicalized_2<SDPatternOperator op> : PatFrag<
208  (ops node:$src0, node:$src1),
209  (op $src0, $src1),
210  [{
211    const SITargetLowering &Lowering =
212              *static_cast<const SITargetLowering *>(getTargetLowering());
213
214    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) &&
215      Lowering.isCanonicalized(*CurDAG, N->getOperand(1));
216   }]> {
217
218  // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class
219  let GISelPredicateCode = [{
220    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
221      MF.getSubtarget().getTargetLowering());
222
223    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF) &&
224      TLI->isCanonicalized(MI.getOperand(2).getReg(), MF);
225  }];
226}
227
228class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
229  (ops node:$src0, node:$src1, node:$src2),
230  (op2 (op1 node:$src0, node:$src1), node:$src2)
231>;
232
233def imad : FoldTernaryOpPat<mul, add>;
234
235let Properties = [SDNPCommutative, SDNPAssociative] in {
236def smax_oneuse : HasOneUseBinOp<smax>;
237def smin_oneuse : HasOneUseBinOp<smin>;
238def umax_oneuse : HasOneUseBinOp<umax>;
239def umin_oneuse : HasOneUseBinOp<umin>;
240
241def fminnum_oneuse : HasOneUseBinOp<fminnum>;
242def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
243def fminimum_oneuse : HasOneUseBinOp<fminimum>;
244def fmaximum_oneuse : HasOneUseBinOp<fmaximum>;
245
246def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
247def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
248
249
250def and_oneuse : HasOneUseBinOp<and>;
251def or_oneuse : HasOneUseBinOp<or>;
252def xor_oneuse : HasOneUseBinOp<xor>;
253} // Properties = [SDNPCommutative, SDNPAssociative]
254
255def not_oneuse : HasOneUseUnaryOp<not>;
256
257def add_oneuse : HasOneUseBinOp<add>;
258def sub_oneuse : HasOneUseBinOp<sub>;
259
260def srl_oneuse : HasOneUseBinOp<srl>;
261def shl_oneuse : HasOneUseBinOp<shl>;
262
263def select_oneuse : HasOneUseTernaryOp<select>;
264
265def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
266def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
267
268//===----------------------------------------------------------------------===//
269// PatFrags for shifts
270//===----------------------------------------------------------------------===//
271
272// Constrained shift PatFrags.
273
274def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
275  [{ return isUnneededShiftMask(N, 4); }]> {
276    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
277  }
278
279def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
280  [{ return isUnneededShiftMask(N, 5); }]> {
281    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
282  }
283
284def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
285  [{ return isUnneededShiftMask(N, 6); }]> {
286    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
287  }
288
289foreach width = [16, 32, 64] in {
290defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
291
292def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
293  [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
294defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
295def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
296def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
297  (cshl $src1, $src0)>;
298
299def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
300  [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
301defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
302def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
303def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
304  (csrl $src1, $src0)>;
305
306def csra_#width : PatFrags<(ops node:$src0, node:$src1),
307  [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
308defvar csra = !cast<SDPatternOperator>("csra_"#width);
309def csra_#width#_oneuse : HasOneUseBinOp<csra>;
310def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
311  (csra $src1, $src0)>;
312} // end foreach width
313
314def srl_16 : PatFrag<
315  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
316>;
317
318
319def hi_i16_elt : PatFrag<
320  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
321>;
322
323
324def hi_f16_elt : PatLeaf<
325  (vt), [{
326  if (N->getOpcode() != ISD::BITCAST)
327    return false;
328  SDValue Tmp = N->getOperand(0);
329
330  if (Tmp.getOpcode() != ISD::SRL)
331    return false;
332    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
333      return RHS->getZExtValue() == 16;
334    return false;
335}]>;
336
337//===----------------------------------------------------------------------===//
338// PatLeafs for zero immediate
339//===----------------------------------------------------------------------===//
340
341def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>;
342def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>;
343
344//===----------------------------------------------------------------------===//
345// PatLeafs for floating-point comparisons
346//===----------------------------------------------------------------------===//
347
348def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
349def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
350def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
351def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
352def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
353def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
354def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
355def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
356
357//===----------------------------------------------------------------------===//
358// PatLeafs for unsigned / unordered comparisons
359//===----------------------------------------------------------------------===//
360
361def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
362def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
363def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
364def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
365def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
366def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
367
368// XXX - For some reason R600 version is preferring to use unordered
369// for setne?
370def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
371
372//===----------------------------------------------------------------------===//
373// PatLeafs for signed comparisons
374//===----------------------------------------------------------------------===//
375
376def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
377def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
378def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
379def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
380
381//===----------------------------------------------------------------------===//
382// PatLeafs for integer equality
383//===----------------------------------------------------------------------===//
384
385def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
386def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
387
388// FIXME: Should not need code predicate
389//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
390def COND_NULL : PatLeaf <
391  (cond),
392  [{(void)N; return false;}]
393>;
394
395//===----------------------------------------------------------------------===//
396// PatLeafs for Texture Constants
397//===----------------------------------------------------------------------===//
398
399def TEX_ARRAY : PatLeaf<
400  (imm),
401  [{uint32_t TType = (uint32_t)N->getZExtValue();
402    return TType == 9 || TType == 10 || TType == 16;
403  }]
404>;
405
406def TEX_RECT : PatLeaf<
407  (imm),
408  [{uint32_t TType = (uint32_t)N->getZExtValue();
409    return TType == 5;
410  }]
411>;
412
413def TEX_SHADOW : PatLeaf<
414  (imm),
415  [{uint32_t TType = (uint32_t)N->getZExtValue();
416    return (TType >= 6 && TType <= 8) || TType == 13;
417  }]
418>;
419
420def TEX_SHADOW_ARRAY : PatLeaf<
421  (imm),
422  [{uint32_t TType = (uint32_t)N->getZExtValue();
423    return TType == 11 || TType == 12 || TType == 17;
424  }]
425>;
426
427//===----------------------------------------------------------------------===//
428// Load/Store Pattern Fragments
429//===----------------------------------------------------------------------===//
430
431def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
432  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
433>;
434
435class AddressSpaceList<list<int> AS> {
436  list<int> AddrSpaces = AS;
437}
438
439class Aligned<int Bytes> {
440  int MinAlignment = Bytes;
441}
442
443class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag <
444  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
445  let IsStore = 1;
446  let MemoryVT = vt;
447}
448
449def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
450                                              AddrSpaces.Constant32Bit ]>;
451def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
452                                            AddrSpaces.Constant,
453                                            AddrSpaces.Constant32Bit ]>;
454def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
455
456def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
457                                          AddrSpaces.Global,
458                                          AddrSpaces.Constant,
459                                          AddrSpaces.Constant32Bit ]>;
460def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
461
462def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
463def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
464
465def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
466def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
467
468def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
469def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
470
471
472
473foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
474let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
475
476def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
477  let IsLoad = 1;
478  let IsNonExtLoad = 1;
479}
480
481def extloadi8_#as  : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> {
482  let IsLoad = 1;
483}
484
485def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> {
486  let IsLoad = 1;
487}
488
489def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> {
490  let IsLoad = 1;
491}
492
493def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> {
494  let IsLoad = 1;
495}
496
497def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> {
498  let IsLoad = 1;
499}
500
501def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
502  let IsLoad = 1;
503}
504
505def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
506  let IsAtomic = 1;
507}
508
509def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
510  let IsAtomic = 1;
511}
512
513def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
514  let IsAtomic = 1;
515}
516
517def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
518  let IsAtomic = 1;
519}
520
521def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_8 node:$ptr)> {
522  let IsAtomic = 1;
523}
524
525def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_8 node:$ptr)> {
526  let IsAtomic = 1;
527}
528
529def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_16 node:$ptr)> {
530  let IsAtomic = 1;
531}
532
533def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_16 node:$ptr)> {
534  let IsAtomic = 1;
535}
536
537} // End let AddressSpaces
538} // End foreach as
539
540
541foreach as = [ "global", "flat", "local", "private", "region" ] in {
542let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
543def store_#as : PatFrag<(ops node:$val, node:$ptr),
544                    (unindexedstore node:$val, node:$ptr)> {
545  let IsTruncStore = 0;
546}
547
548// truncstore fragments.
549def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
550                             (unindexedstore node:$val, node:$ptr)> {
551  let IsTruncStore = 1;
552}
553
554// TODO: We don't really need the truncstore here. We can use
555// unindexedstore with MemoryVT directly, which will save an
556// unnecessary check that the memory size is less than the value type
557// in the generated matcher table.
558def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
559                               (truncstorei8 node:$val, node:$ptr)>;
560def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
561                                (truncstorei16 node:$val, node:$ptr)>;
562
563def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
564def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
565def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
566} // End let IsStore = 1, AddressSpaces = ...
567
568let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
569def atomic_store_8_#as : PatFrag<(ops node:$val, node:$ptr),
570                                 (atomic_store_8 node:$val, node:$ptr)>;
571def atomic_store_16_#as : PatFrag<(ops node:$val, node:$ptr),
572                                  (atomic_store_16 node:$val, node:$ptr)>;
573def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr),
574                                  (atomic_store_32 node:$val, node:$ptr)>;
575def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr),
576                                  (atomic_store_64 node:$val, node:$ptr)>;
577} // End let IsAtomic = 1, AddressSpaces = ...
578} // End foreach as
579
580multiclass noret_op {
581  let HasNoUse = true in
582  def "_noret" : PatFrag<(ops node:$ptr, node:$data),
583    (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
584}
585
586multiclass global_addr_space_atomic_op {
587  def "_noret_global_addrspace" :
588    PatFrag<(ops node:$ptr, node:$data),
589            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
590      let HasNoUse = true;
591      let AddressSpaces = LoadAddress_global.AddrSpaces;
592      let IsAtomic = 1;
593    }
594    def "_global_addrspace" :
595    PatFrag<(ops node:$ptr, node:$data),
596            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
597      let AddressSpaces = LoadAddress_global.AddrSpaces;
598      let IsAtomic = 1;
599    }
600}
601
602multiclass flat_addr_space_atomic_op {
603  def "_noret_flat_addrspace" :
604    PatFrag<(ops node:$ptr, node:$data),
605            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
606      let HasNoUse = true;
607      let AddressSpaces = LoadAddress_flat.AddrSpaces;
608      let IsAtomic = 1;
609    }
610    def "_flat_addrspace" :
611    PatFrag<(ops node:$ptr, node:$data),
612            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
613      let AddressSpaces = LoadAddress_flat.AddrSpaces;
614      let IsAtomic = 1;
615    }
616}
617
618multiclass local_addr_space_atomic_op {
619  def "_noret_local_addrspace" :
620    PatFrag<(ops node:$ptr, node:$data),
621            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
622      let HasNoUse = true;
623      let AddressSpaces = LoadAddress_local.AddrSpaces;
624      let IsAtomic = 1;
625    }
626    def "_local_addrspace" :
627    PatFrag<(ops node:$ptr, node:$data),
628            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
629      let AddressSpaces = LoadAddress_local.AddrSpaces;
630      let IsAtomic = 1;
631    }
632}
633
634defm int_amdgcn_global_atomic_csub : noret_op;
635defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
636defm int_amdgcn_flat_atomic_fmin_num : noret_op;
637defm int_amdgcn_flat_atomic_fmax_num : noret_op;
638defm int_amdgcn_global_atomic_fmin_num : noret_op;
639defm int_amdgcn_global_atomic_fmax_num : noret_op;
640defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
641defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
642defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
643
644multiclass noret_binary_atomic_op<SDNode atomic_op> {
645  let HasNoUse = true in
646  defm "_noret" : binary_atomic_op<atomic_op>;
647}
648
649multiclass noret_binary_atomic_op_fp<SDNode atomic_op> {
650  let HasNoUse = true in
651  defm "_noret" : binary_atomic_op_fp<atomic_op>;
652}
653
654multiclass noret_ternary_atomic_op<SDNode atomic_op> {
655  let HasNoUse = true in
656  defm "_noret" : ternary_atomic_op<atomic_op>;
657}
658
659defvar atomic_addrspace_names = [ "global", "flat", "constant", "local", "private", "region" ];
660
661multiclass binary_atomic_op_all_as<SDNode atomic_op> {
662  foreach as = atomic_addrspace_names in {
663    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
664      defm "_"#as : binary_atomic_op<atomic_op>;
665      defm "_"#as : noret_binary_atomic_op<atomic_op>;
666    }
667  }
668}
669multiclass binary_atomic_op_fp_all_as<SDNode atomic_op> {
670  foreach as = atomic_addrspace_names in {
671    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
672      defm "_"#as : binary_atomic_op_fp<atomic_op>;
673      defm "_"#as : noret_binary_atomic_op_fp<atomic_op>;
674    }
675  }
676}
677
678defm atomic_swap : binary_atomic_op_all_as<atomic_swap>;
679defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>;
680defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>;
681defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>;
682defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>;
683defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>;
684defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>;
685defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
686defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
687defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
688defm atomic_load_fadd : binary_atomic_op_fp_all_as<atomic_load_fadd>;
689defm atomic_load_fmin : binary_atomic_op_fp_all_as<atomic_load_fmin>;
690defm atomic_load_fmax : binary_atomic_op_fp_all_as<atomic_load_fmax>;
691defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
692defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
693defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
694
695def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
696                       Aligned<8> {
697  let IsLoad = 1;
698}
699
700def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
701                        Aligned<16> {
702  let IsLoad = 1;
703}
704
705def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
706                                (store_local node:$val, node:$ptr)>, Aligned<8> {
707  let IsStore = 1;
708}
709
710def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
711                                (store_local node:$val, node:$ptr)>, Aligned<16> {
712  let IsStore = 1;
713}
714
715let AddressSpaces = StoreAddress_local.AddrSpaces in {
716defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
717defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>;
718defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
719defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
720}
721
722let AddressSpaces = StoreAddress_region.AddrSpaces in {
723defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>;
724defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
725defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
726}
727
728//===----------------------------------------------------------------------===//
729// Misc Pattern Fragments
730//===----------------------------------------------------------------------===//
731
732class Constants {
733int TWO_PI = 0x40c90fdb;
734int PI = 0x40490fdb;
735int TWO_PI_INV = 0x3e22f983;
736int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
737int FP16_ONE = 0x3C00;
738int FP16_NEG_ONE = 0xBC00;
739int FP32_ONE = 0x3f800000;
740int FP32_NEG_ONE = 0xbf800000;
741int FP64_ONE = 0x3ff0000000000000;
742int FP64_NEG_ONE = 0xbff0000000000000;
743}
744def CONST : Constants;
745
746def FP_ZERO : PatLeaf <
747  (fpimm),
748  [{return N->getValueAPF().isZero();}]
749>;
750
751def FP_ONE : PatLeaf <
752  (fpimm),
753  [{return N->isExactlyValue(1.0);}]
754>;
755
756def FP_HALF : PatLeaf <
757  (fpimm),
758  [{return N->isExactlyValue(0.5);}]
759>;
760
761/* Generic helper patterns for intrinsics */
762/* -------------------------------------- */
763
764class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
765  : AMDGPUPat <
766  (fpow f32:$src0, f32:$src1),
767  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
768>;
769
770/* Other helper patterns */
771/* --------------------- */
772
773/* Extract element pattern */
774class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
775                       SubRegIndex sub_reg>
776  : AMDGPUPat<
777  (sub_type (extractelt vec_type:$src, sub_idx)),
778  (EXTRACT_SUBREG $src, sub_reg)
779>;
780
781/* Insert element pattern */
782class Insert_Element <ValueType elem_type, ValueType vec_type,
783                      int sub_idx, SubRegIndex sub_reg>
784  : AMDGPUPat <
785  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
786  (INSERT_SUBREG $vec, $elem, sub_reg)
787>;
788
789// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
790// can handle COPY instructions.
791// bitconvert pattern
792class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
793  (dt (bitconvert (st rc:$src0))),
794  (dt rc:$src0)
795>;
796
797// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
798// can handle COPY instructions.
799class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
800  (vt (AMDGPUdwordaddr (vt rc:$addr))),
801  (vt rc:$addr)
802>;
803
804// rotr pattern
805class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
806  (rotr i32:$src0, i32:$src1),
807  (BIT_ALIGN $src0, $src0, $src1)
808>;
809
810// Special conversion patterns
811
812def cvt_rpi_i32_f32 : PatFrag <
813  (ops node:$src),
814  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
815  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
816>;
817
818def cvt_flr_i32_f32 : PatFrag <
819  (ops node:$src),
820  (fp_to_sint (ffloor $src)),
821  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
822>;
823
824let AddedComplexity = 2 in {
825class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
826  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
827  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
828                (Inst $src0, $src1, $src2))
829>;
830
831class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
832  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
833  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
834                (Inst $src0, $src1, $src2))
835>;
836} // AddedComplexity.
837
838class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
839  (fdiv FP_ONE, vt:$src),
840  (RcpInst $src)
841>;
842
843// Instructions which select to the same v_min_f*
844def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
845  [(fminnum_ieee node:$src0, node:$src1),
846   (fminnum node:$src0, node:$src1)]
847>;
848
849// Instructions which select to the same v_max_f*
850def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
851  [(fmaxnum_ieee node:$src0, node:$src1),
852   (fmaxnum node:$src0, node:$src1)]
853>;
854
855class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
856  let PredicateCode = [{
857    return CurDAG->isKnownNeverNaN(SDValue(N,0));
858  }];
859  let GISelPredicateCode = [{
860    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
861  }];
862}
863
864def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
865  [(fminnum_ieee node:$src0, node:$src1),
866   (fminnum node:$src0, node:$src1)]
867>;
868
869def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
870  [(fmaxnum_ieee node:$src0, node:$src1),
871   (fmaxnum node:$src0, node:$src1)]
872>;
873
874def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
875  [(fminnum_ieee_oneuse node:$src0, node:$src1),
876   (fminnum_oneuse node:$src0, node:$src1)]
877>;
878
879def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
880  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
881   (fmaxnum_oneuse node:$src0, node:$src1)]
882>;
883
884def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
885  [(fmad node:$src0, node:$src1, node:$src2),
886   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
887>;
888
889// FIXME: fsqrt should not select directly
890def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
891  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
892>;
893