xref: /llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision 435609b70c8bbf7bc6b73b04ec8852a9c11376ec)
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n i32:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier i32:$src1, i32:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set i32:$dst, (int_nvvm_barrier0_popc i32:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set i32:$dst, (int_nvvm_barrier0_and i32:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set i32:$dst, (int_nvvm_barrier0_or i32:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX<60>, hasSM<30>]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync i32:$i)]>,
110        Requires<[hasPTX<60>, hasSM<30>]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX<60>, hasSM<30>]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync i32:$i)]>,
117        Requires<[hasPTX<60>, hasSM<30>]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt i32:$id, i32:$cnt)]>,
122        Requires<[hasPTX<60>, hasSM<30>]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt i32:$id, imm:$cnt)]>,
126        Requires<[hasPTX<60>, hasSM<30>]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, i32:$cnt)]>,
130        Requires<[hasPTX<60>, hasSM<30>]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX<60>, hasSM<30>]>;
135
136class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
137                          list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
138        NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
139        Requires<Preds>;
140
141def barrier_cluster_arrive:
142        INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
143def barrier_cluster_arrive_relaxed:
144        INT_BARRIER_CLUSTER<"arrive.relaxed",
145        int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
146def barrier_cluster_wait:
147        INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
148
149// 'aligned' versions of the cluster barrier intrinsics
150def barrier_cluster_arrive_aligned:
151        INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
152def barrier_cluster_arrive_relaxed_aligned:
153        INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
154        int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
155def barrier_cluster_wait_aligned:
156        INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
157
158class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
159                 bit offset_imm, bit mask_imm, bit threadmask_imm>
160      : NVPTXInst<(outs), (ins), "?", []> {
161  NVPTXRegClass rc = !cond(
162    !eq(reg, "i32"): Int32Regs,
163    !eq(reg, "f32"): Float32Regs);
164  string IntrName = "int_nvvm_shfl_"
165                    # !if(sync, "sync_", "")
166                    # mode
167                    # "_" # reg
168                    # !if(return_pred, "p", "");
169  Intrinsic Intr = !cast<Intrinsic>(IntrName);
170  let InOperandList = !con(
171    !if(sync,
172        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
173        (ins)),
174    (ins rc:$src),
175    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
176    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
177    );
178  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
179  let AsmString = "shfl."
180     # !if(sync, "sync.", "")
181     # mode # ".b32\t"
182     # "$dst"
183     # !if(return_pred, "|$pred", "") # ", "
184     # "$src, $offset, $mask"
185     # !if(sync, ", $threadmask", "")
186     # ";"
187     ;
188  let Pattern = [!con(
189      !foreach(tmp, OutOperandList,
190             !subst(outs, set,
191             !subst(i32imm, imm, tmp))),
192      (set !foreach(tmp, InOperandList,
193             !subst(ins, Intr,
194             !subst(i32imm, imm, tmp))))
195  )];
196}
197
198foreach sync = [false, true] in {
199  foreach mode = ["up", "down", "bfly", "idx"] in {
200    foreach regclass = ["i32", "f32"] in {
201      foreach return_pred = [false, true] in {
202        foreach offset_imm = [false, true] in {
203          foreach mask_imm = [false, true] in {
204            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
205              def : SHFL_INSTR<sync, mode, regclass, return_pred,
206                               offset_imm, mask_imm, threadmask_imm>,
207                    Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
208            }
209          }
210        }
211      }
212    }
213  }
214}
215
216// vote.{all,any,uni,ballot}
217multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
218  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
219              "vote." # mode # " \t$dest, $pred;",
220              [(set regclass:$dest, (IntOp i1:$pred))]>,
221        Requires<[hasPTX<60>, hasSM<30>]>;
222}
223
224defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
225defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
226defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
227defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
228
229// vote.sync.{all,any,uni,ballot}
230multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
231  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
232              "vote.sync." # mode # " \t$dest, $pred, $mask;",
233              [(set regclass:$dest, (IntOp imm:$mask, i1:$pred))]>,
234          Requires<[hasPTX<60>, hasSM<30>]>;
235  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
236              "vote.sync." # mode #" \t$dest, $pred, $mask;",
237              [(set regclass:$dest, (IntOp i32:$mask, i1:$pred))]>,
238          Requires<[hasPTX<60>, hasSM<30>]>;
239}
240
241defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
242defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
243defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
244defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
245
246// elect.sync
247def INT_ELECT_SYNC_I : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask),
248            "elect.sync \t$dest|$pred, $mask;",
249            [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync imm:$mask))]>,
250            Requires<[hasPTX<80>, hasSM<90>]>;
251def INT_ELECT_SYNC_R : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask),
252            "elect.sync \t$dest|$pred, $mask;",
253            [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync i32:$mask))]>,
254            Requires<[hasPTX<80>, hasSM<90>]>;
255
256multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
257                          Operand ImmOp> {
258  def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
259              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
260              [(set i32:$dest, (IntOp imm:$mask, imm:$value))]>,
261           Requires<[hasPTX<60>, hasSM<70>]>;
262  def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
263              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
264              [(set i32:$dest, (IntOp i32:$mask, imm:$value))]>,
265           Requires<[hasPTX<60>, hasSM<70>]>;
266  def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
267              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
268              [(set i32:$dest, (IntOp imm:$mask, regclass:$value))]>,
269           Requires<[hasPTX<60>, hasSM<70>]>;
270  def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
271              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
272              [(set i32:$dest, (IntOp i32:$mask, regclass:$value))]>,
273           Requires<[hasPTX<60>, hasSM<70>]>;
274}
275
276// activemask.b32
277def ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins),
278                    "activemask.b32 \t$dest;",
279                    [(set i32:$dest, (int_nvvm_activemask))]>,
280                 Requires<[hasPTX<62>, hasSM<30>]>;
281
282defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
283                                        i32imm>;
284defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
285                                        i64imm>;
286
287multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
288                          Operand ImmOp> {
289  def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
290                     (ins i32imm:$mask, ImmOp:$value),
291              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
292              [(set i32:$dest, i1:$pred, (IntOp imm:$mask, imm:$value))]>,
293           Requires<[hasPTX<60>, hasSM<70>]>;
294  def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
295                     (ins Int32Regs:$mask, ImmOp:$value),
296              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
297              [(set i32:$dest, i1:$pred, (IntOp i32:$mask, imm:$value))]>,
298           Requires<[hasPTX<60>, hasSM<70>]>;
299  def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
300                     (ins i32imm:$mask, regclass:$value),
301              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
302              [(set i32:$dest, i1:$pred, (IntOp imm:$mask, regclass:$value))]>,
303           Requires<[hasPTX<60>, hasSM<70>]>;
304  def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
305                     (ins Int32Regs:$mask, regclass:$value),
306              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
307              [(set i32:$dest, i1:$pred, (IntOp i32:$mask, regclass:$value))]>,
308           Requires<[hasPTX<60>, hasSM<70>]>;
309}
310defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
311                                         i32imm>;
312defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
313                                         i64imm>;
314
315multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
316  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
317          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
318          [(set i32:$dst, (Intrin i32:$src, Int32Regs:$mask))]>,
319        Requires<[hasPTX<70>, hasSM<80>]>;
320}
321
322defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
323defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
324defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
325defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
326defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
327defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
328defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
329defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
330
331} // isConvergent = true
332
333//-----------------------------------
334// Explicit Memory Fence Functions
335//-----------------------------------
336class MEMBAR<string StrOp, Intrinsic IntOP> :
337              NVPTXInst<(outs), (ins),
338            StrOp, [(IntOP)]>;
339
340def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
341def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
342def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
343
344def INT_FENCE_SC_CLUSTER:
345       MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
346       Requires<[hasPTX<78>, hasSM<90>]>;
347
348// Proxy fence (uni-directional)
349// fence.proxy.tensormap.release variants
350
351class FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<string Scope, Intrinsic Intr> :
352        NVPTXInst<(outs), (ins),
353                  "fence.proxy.tensormap::generic.release." # Scope # ";", [(Intr)]>,
354        Requires<[hasPTX<83>, hasSM<90>]>;
355
356def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CTA:
357      FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cta",
358        int_nvvm_fence_proxy_tensormap_generic_release_cta>;
359def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CLUSTER:
360      FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cluster",
361        int_nvvm_fence_proxy_tensormap_generic_release_cluster>;
362def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_GPU:
363      FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"gpu",
364        int_nvvm_fence_proxy_tensormap_generic_release_gpu>;
365def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_SYS:
366      FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"sys",
367        int_nvvm_fence_proxy_tensormap_generic_release_sys>;
368
369// fence.proxy.tensormap.acquire variants
370
371class FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<string Scope, Intrinsic Intr> :
372        NVPTXInst<(outs), (ins Int64Regs:$addr),
373                  "fence.proxy.tensormap::generic.acquire." # Scope # " [$addr], 128;",
374                  [(Intr i64:$addr, (i32 128))]>,
375        Requires<[hasPTX<83>, hasSM<90>]>;
376
377def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CTA :
378      FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cta",
379        int_nvvm_fence_proxy_tensormap_generic_acquire_cta>;
380def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CLUSTER :
381      FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cluster",
382        int_nvvm_fence_proxy_tensormap_generic_acquire_cluster>;
383def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_GPU :
384      FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"gpu",
385        int_nvvm_fence_proxy_tensormap_generic_acquire_gpu>;
386def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_SYS :
387      FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"sys",
388        int_nvvm_fence_proxy_tensormap_generic_acquire_sys>;
389
390//-----------------------------------
391// Async Copy Functions
392//-----------------------------------
393
394multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
395  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
396            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
397            [(Intrin i32:$addr)]>,
398    Requires<[hasPTX<70>, hasSM<80>]>;
399  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
400            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
401            [(Intrin i64:$addr)]>,
402    Requires<[hasPTX<70>, hasSM<80>]>;
403}
404
405defm CP_ASYNC_MBARRIER_ARRIVE :
406  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
407defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
408  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
409defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
410  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
411defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
412  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
413
414multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
415  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
416            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
417            [(Intrin i32:$dst, i32:$src)]>,
418    Requires<[hasPTX<70>, hasSM<80>]>;
419  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
420            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
421            [(Intrin i64:$dst, i64:$src)]>,
422    Requires<[hasPTX<70>, hasSM<80>]>;
423  // Variant with src_size parameter
424  def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
425             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
426             [(IntrinS i32:$dst, i32:$src, i32:$src_size)]>,
427    Requires<[hasPTX<70>, hasSM<80>]>;
428  def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
429             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
430             [(IntrinS i32:$dst, i32:$src, imm:$src_size)]>,
431    Requires<[hasPTX<70>, hasSM<80>]>;
432  def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
433             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
434             [(IntrinS i64:$dst, i64:$src, i32:$src_size)]>,
435    Requires<[hasPTX<70>, hasSM<80>]>;
436  def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
437             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
438             [(IntrinS i64:$dst, i64:$src, imm:$src_size)]>,
439    Requires<[hasPTX<70>, hasSM<80>]>;
440}
441
442defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
443  CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
444                                      int_nvvm_cp_async_ca_shared_global_4_s>;
445
446defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
447  CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
448                                      int_nvvm_cp_async_ca_shared_global_8_s>;
449
450defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
451  CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
452                                       int_nvvm_cp_async_ca_shared_global_16_s>;
453
454defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
455  CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
456                                       int_nvvm_cp_async_cg_shared_global_16_s>;
457
458def CP_ASYNC_COMMIT_GROUP :
459  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
460  Requires<[hasPTX<70>, hasSM<80>]>;
461
462def CP_ASYNC_WAIT_GROUP :
463  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
464  [(int_nvvm_cp_async_wait_group timm:$n)]>,
465  Requires<[hasPTX<70>, hasSM<80>]>;
466
467def CP_ASYNC_WAIT_ALL :
468  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
469  [(int_nvvm_cp_async_wait_all)]>,
470  Requires<[hasPTX<70>, hasSM<80>]>;
471
472// cp.async.bulk variants of the commit/wait group
473def CP_ASYNC_BULK_COMMIT_GROUP :
474  NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
475  [(int_nvvm_cp_async_bulk_commit_group)]>,
476  Requires<[hasPTX<80>, hasSM<90>]>;
477
478def CP_ASYNC_BULK_WAIT_GROUP :
479  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
480  [(int_nvvm_cp_async_bulk_wait_group timm:$n)]>,
481  Requires<[hasPTX<80>, hasSM<90>]>;
482
483def CP_ASYNC_BULK_WAIT_GROUP_READ :
484  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
485  [(int_nvvm_cp_async_bulk_wait_group_read timm:$n)]>,
486  Requires<[hasPTX<80>, hasSM<90>]>;
487
488//------------------------------
489// TMA Async Bulk Copy Functions
490//------------------------------
491
492class CpAsyncBulkStr<bit mc, bit ch> {
493  // Shared to Global memory
494  string S2G = "cp.async.bulk.global.shared::cta.bulk_group"
495               # !if(ch, ".L2::cache_hint", "");
496
497  // Global to Shared cluster memory
498  string G2S = "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes"
499               # !if(mc, ".multicast::cluster", "")
500               # !if(ch, ".L2::cache_hint", "");
501
502  // Shared CTA to Cluster memory
503  string C2C = "cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes";
504}
505
506multiclass CP_ASYNC_BULK_S2G<NVPTXRegClass rc> {
507  def NAME: NVPTXInst<(outs),
508            (ins Int64Regs:$dst, rc:$src, Int32Regs:$size),
509            !strconcat(CpAsyncBulkStr<0, 0>.S2G, " [$dst], [$src], $size;"), []>,
510            Requires<[hasPTX<80>, hasSM<90>]>;
511  def NAME # _CH: NVPTXInst<(outs),
512                  (ins Int64Regs:$dst, rc:$src, Int32Regs:$size, Int64Regs:$ch),
513                  !strconcat(CpAsyncBulkStr<0, 1>.S2G, " [$dst], [$src], $size, $ch;"), []>,
514                  Requires<[hasPTX<80>, hasSM<90>]>;
515}
516defm CP_ASYNC_BULK_S2G : CP_ASYNC_BULK_S2G<Int64Regs>;
517defm CP_ASYNC_BULK_S2G_SHARED32 : CP_ASYNC_BULK_S2G<Int32Regs>;
518
519multiclass CP_ASYNC_BULK_G2S<NVPTXRegClass rc> {
520  def NAME: NVPTXInst<(outs),
521            (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size),
522            !strconcat(CpAsyncBulkStr<0, 0>.G2S, " [$dst], [$src], $size, [$mbar];"), []>,
523            Requires<[hasPTX<80>, hasSM<90>]>;
524  def NAME # _MC: NVPTXInst<(outs),
525                  (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc),
526                  !strconcat(CpAsyncBulkStr<1, 0>.G2S, " [$dst], [$src], $size, [$mbar], $mc;"), []>,
527                  Requires<[hasPTX<80>, hasSM<90>]>;
528  def NAME # _CH: NVPTXInst<(outs),
529                  (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch),
530                  !strconcat(CpAsyncBulkStr<0, 1>.G2S, " [$dst], [$src], $size, [$mbar], $ch;"), []>,
531                  Requires<[hasPTX<80>, hasSM<90>]>;
532  def NAME # _MC_CH: NVPTXInst<(outs),
533                     (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc, Int64Regs:$ch),
534                     !strconcat(CpAsyncBulkStr<1, 1>.G2S, " [$dst], [$src], $size, [$mbar], $mc, $ch;"), []>,
535                     Requires<[hasPTX<80>, hasSM<90>]>;
536}
537defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S<Int64Regs>;
538defm CP_ASYNC_BULK_G2S_SHARED32 : CP_ASYNC_BULK_G2S<Int32Regs>;
539
540multiclass CP_ASYNC_BULK_CTA_TO_CLUSTER<NVPTXRegClass rc> {
541  def NAME: NVPTXInst<(outs),
542            (ins rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size),
543            !strconcat(CpAsyncBulkStr<0, 0>.C2C, " [$dst], [$src], $size, [$mbar];"),
544            [(int_nvvm_cp_async_bulk_shared_cta_to_cluster rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size)]>,
545            Requires<[hasPTX<80>, hasSM<90>]>;
546}
547defm CP_ASYNC_BULK_CTA_TO_CLUSTER : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int64Regs>;
548defm CP_ASYNC_BULK_CTA_TO_CLUSTER_SHARED32 : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int32Regs>;
549
550//------------------------------
551// Bulk Copy Prefetch Functions
552//------------------------------
553def CP_ASYNC_BULK_PREFETCH : NVPTXInst<(outs),
554                             (ins Int64Regs:$src, Int32Regs:$size),
555                             "cp.async.bulk.prefetch.L2.global [$src], $size;", []>,
556                             Requires<[hasPTX<80>, hasSM<90>]>;
557
558def CP_ASYNC_BULK_PREFETCH_CH : NVPTXInst<(outs),
559                                (ins Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch),
560                                "cp.async.bulk.prefetch.L2.global.L2::cache_hint [$src], $size, $ch;", []>,
561                                Requires<[hasPTX<80>, hasSM<90>]>;
562//-------------------------------------
563// TMA Async Bulk Tensor Copy Functions
564//-------------------------------------
565
566// From Global to Shared memory (G2S)
567class G2S_STRINGS<int dim, string mode, bit mc, bit ch, bit is_shared32 = 0> {
568  string prefix = "cp.async.bulk.tensor";
569  string dir = "shared::cluster.global";
570  string completion = "mbarrier::complete_tx::bytes";
571  string inst_name = prefix
572                     # "." # dim # "d"
573                     # "." # dir
574                     # "." # mode
575                     # "." # completion
576                     # !if(mc, ".multicast::cluster", "")
577                     # !if(ch, ".L2::cache_hint", "");
578  string intr_name = "CP_ASYNC_BULK_TENSOR_G2S_"
579                     # dim # "D"
580                     # !if(is_shared32, "_SHARED32", "")
581                     # !if(!eq(mode, "tile"), "_TILE", "_IM2COL");
582}
583
584multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, bit is_shared32, string mode> {
585  defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
586  defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
587  defvar asm_str_default = " [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]";
588  defvar rc = !if(is_shared32, Int32Regs, Int64Regs);
589
590  defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0);
591  defvar im2col_dag = !if(!eq(mode, "im2col"),
592    !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
593    (ins));
594  defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", ");
595  defvar im2col_asm_str = ", {{" # im2col_str # "}}";
596
597  defvar asm_str = !if(!eq(mode, "im2col"),
598    !strconcat(asm_str_default, im2col_asm_str), asm_str_default);
599
600  def NAME: NVPTXInst<(outs),
601            !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag),
602            !strconcat(G2S_STRINGS<dim, mode, 0, 0>.inst_name, asm_str, ";"), []>,
603            Requires<[hasPTX<80>, hasSM<90>]>;
604  def NAME # _MC: NVPTXInst<(outs),
605                  !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc)),
606                  !strconcat(G2S_STRINGS<dim, mode, 1, 0>.inst_name, asm_str, ", $mc;"), []>,
607                  Requires<[hasPTX<80>, hasSM<90>]>;
608  def NAME # _CH: NVPTXInst<(outs),
609                  !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)),
610                  !strconcat(G2S_STRINGS<dim, mode, 0, 1>.inst_name, asm_str, ", $ch;"), []>,
611                  Requires<[hasPTX<80>, hasSM<90>]>;
612  def NAME # _MC_CH: NVPTXInst<(outs),
613                     !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc, Int64Regs:$ch)),
614                     !strconcat(G2S_STRINGS<dim, mode, 1, 1>.inst_name, asm_str, ", $mc, $ch;"), []>,
615                     Requires<[hasPTX<80>, hasSM<90>]>;
616}
617
618foreach dim = [1, 2, 3, 4, 5] in {
619  foreach shared32 = [true, false] in {
620    foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
621      defm G2S_STRINGS<dim, mode, 0, 0, shared32>.intr_name :
622        CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, shared32, mode>;
623    }
624  }
625}
626
627// From Shared to Global memory (S2G)
628class S2G_STRINGS<int dim, string mode, bit ch,
629                  bit is_shared32 = 0, bit is_reduce = 0> {
630  string dir = "global.shared::cta";
631  string completion = "bulk_group";
632  string inst_name = !if(is_reduce, "cp.reduce", "cp")
633                     # ".async.bulk.tensor"
634                     # "." # dim # "d"
635                     # "." # dir
636                     # "." # mode
637                     # "." # completion
638                     # !if(ch, ".L2::cache_hint", "");
639  string intr_name = "CP_ASYNC_BULK_TENSOR_"
640                     # !if(is_reduce, "RED_", "S2G_")
641                     # dim # "D"
642                     # !if(is_shared32, "_SHARED32", "")
643                     # !if(!eq(mode, "tile"), "_TILE", "_IM2COL");
644}
645
646multiclass CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, bit shared32, string mode> {
647  defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
648  defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
649  defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]";
650  defvar rc = !if(shared32, Int32Regs, Int64Regs);
651
652  def NAME: NVPTXInst<(outs),
653            !con((ins rc:$src, Int64Regs:$tmap), dims_dag),
654            !strconcat(S2G_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>,
655            Requires<[hasPTX<80>, hasSM<90>]>;
656  def NAME # _CH: NVPTXInst<(outs),
657                  !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch)),
658                  !strconcat(S2G_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>,
659                  Requires<[hasPTX<80>, hasSM<90>]>;
660}
661
662def TMAReductionFlags : Operand<i32> {
663  let PrintMethod = "printTmaReductionMode";
664}
665
666// TMA Copy from Shared to Global memory with Reduction
667multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, bit shared32, string mode> {
668  defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
669  defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
670  defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]";
671  defvar rc = !if(shared32, Int32Regs, Int64Regs);
672
673  defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta";
674  defvar suffix = "." # mode # ".bulk_group";
675
676  def NAME: NVPTXInst<(outs),
677            !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)),
678            !strconcat(prefix, "${red_op}", suffix, asm_str, ";"), []>,
679            Requires<[hasPTX<80>, hasSM<90>]>;
680  def NAME # _CH: NVPTXInst<(outs),
681                  !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch, TMAReductionFlags:$red_op)),
682                  !strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;"), []>,
683                  Requires<[hasPTX<80>, hasSM<90>]>;
684}
685
686foreach dim = [1, 2, 3, 4, 5] in {
687  foreach shared32 = [true, false] in {
688    foreach mode = !if(!ge(dim, 3), ["tile", "im2col_no_offs"], ["tile"]) in {
689      defm S2G_STRINGS<dim, mode, 0, shared32>.intr_name :
690        CP_ASYNC_BULK_TENSOR_S2G_INTR<dim, shared32, mode>;
691      defm S2G_STRINGS<dim, mode, 0, shared32, 1>.intr_name :
692        CP_ASYNC_BULK_TENSOR_REDUCE_INTR<dim, shared32, mode>;
693    }
694  }
695}
696
697// TMA Prefetch from Global memory to L2 cache
698class PREFETCH_STRINGS<int dim, string mode, bit ch> {
699  string prefix = "cp.async.bulk.prefetch.tensor";
700  string dir = "L2.global";
701  string inst_name = prefix
702                     # "." # dim # "d"
703                     # "." # dir
704                     # "." # mode
705                     # !if(ch, ".L2::cache_hint", "");
706  string intr_name = "CP_ASYNC_BULK_TENSOR_PREFETCH_"
707                     # dim # "D"
708                     # !if(!eq(mode, "tile"), "_TILE", "_IM2COL");
709}
710
711multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
712  defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
713  defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
714  defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]";
715
716  defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0);
717  defvar im2col_dag = !if(!eq(mode, "im2col"),
718    !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
719    (ins));
720  defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", ");
721  defvar im2col_asm_str = ", {{" # im2col_str # "}}";
722
723  defvar asm_str = !if(!eq(mode, "im2col"),
724    !strconcat(asm_str_default, im2col_asm_str), asm_str_default);
725
726  def NAME: NVPTXInst<(outs),
727            !con((ins Int64Regs:$tmap), dims_dag, im2col_dag),
728            !strconcat(PREFETCH_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>,
729            Requires<[hasPTX<80>, hasSM<90>]>;
730  def NAME # _CH: NVPTXInst<(outs),
731                  !con((ins Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)),
732                  !strconcat(PREFETCH_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>,
733                  Requires<[hasPTX<80>, hasSM<90>]>;
734}
735
736foreach dim = [1, 2, 3, 4, 5] in {
737  foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
738    defm PREFETCH_STRINGS<dim, mode, 0>.intr_name :
739      CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<dim, mode>;
740  }
741}
742
743//-----------------------------------
744// MBarrier Functions
745//-----------------------------------
746
747multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
748  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
749           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
750    [(Intrin i32:$addr, i32:$count)]>,
751    Requires<[hasPTX<70>, hasSM<80>]>;
752  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
753           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
754    [(Intrin i64:$addr, i32:$count)]>,
755    Requires<[hasPTX<70>, hasSM<80>]>;
756}
757
758defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
759defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
760                                          int_nvvm_mbarrier_init_shared>;
761
762multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
763  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
764           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
765    [(Intrin i32:$addr)]>,
766    Requires<[hasPTX<70>, hasSM<80>]>;
767  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
768           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
769    [(Intrin i64:$addr)]>,
770    Requires<[hasPTX<70>, hasSM<80>]>;
771}
772
773defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
774defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
775                                            int_nvvm_mbarrier_inval_shared>;
776
777multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
778  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
779           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
780    [(set i64:$state, (Intrin i32:$addr))]>,
781    Requires<[hasPTX<70>, hasSM<80>]>;
782  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
783           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
784    [(set i64:$state, (Intrin i64:$addr))]>,
785    Requires<[hasPTX<70>, hasSM<80>]>;
786}
787
788defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
789defm MBARRIER_ARRIVE_SHARED :
790  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
791
792multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
793  def _32 : NVPTXInst<(outs Int64Regs:$state),
794           (ins Int32Regs:$addr, Int32Regs:$count),
795           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
796                      ".b64 $state, [$addr], $count;"),
797    [(set i64:$state, (Intrin i32:$addr, i32:$count))]>,
798    Requires<[hasPTX<70>, hasSM<80>]>;
799  def _64 : NVPTXInst<(outs Int64Regs:$state),
800           (ins Int64Regs:$addr, Int32Regs:$count),
801           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
802                      ".b64 $state, [$addr], $count;"),
803    [(set i64:$state, (Intrin i64:$addr, i32:$count))]>,
804    Requires<[hasPTX<70>, hasSM<80>]>;
805}
806
807defm MBARRIER_ARRIVE_NOCOMPLETE :
808  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
809defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
810  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
811
812multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
813  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
814           !strconcat("mbarrier.arrive_drop", AddrSpace,
815                      ".b64 $state, [$addr];"),
816           [(set i64:$state, (Intrin i32:$addr))]>,
817    Requires<[hasPTX<70>, hasSM<80>]>;
818  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
819           !strconcat("mbarrier.arrive_drop", AddrSpace,
820                      ".b64 $state, [$addr];"),
821           [(set i64:$state, (Intrin i64:$addr))]>,
822    Requires<[hasPTX<70>, hasSM<80>]>;
823}
824
825defm MBARRIER_ARRIVE_DROP :
826  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
827defm MBARRIER_ARRIVE_DROP_SHARED :
828  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
829
830multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
831  def _32 : NVPTXInst<(outs Int64Regs:$state),
832           (ins Int32Regs:$addr, Int32Regs:$count),
833           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
834                      ".b64 $state, [$addr], $count;"),
835           [(set i64:$state, (Intrin i32:$addr, i32:$count))]>,
836    Requires<[hasPTX<70>, hasSM<80>]>;
837  def _64 : NVPTXInst<(outs Int64Regs:$state),
838           (ins Int64Regs:$addr, Int32Regs:$count),
839           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
840                      ".b64 $state, [$addr], $count;"),
841           [(set i64:$state, (Intrin i64:$addr, i32:$count))]>,
842    Requires<[hasPTX<70>, hasSM<80>]>;
843}
844
845defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
846  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
847defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
848  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
849                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
850
851multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
852  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
853           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
854           [(set i1:$res, (Intrin i32:$addr, i64:$state))]>,
855    Requires<[hasPTX<70>, hasSM<80>]>;
856  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
857           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
858           [(set i1:$res, (Intrin i64:$addr, i64:$state))]>,
859    Requires<[hasPTX<70>, hasSM<80>]>;
860}
861
862defm MBARRIER_TEST_WAIT :
863  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
864defm MBARRIER_TEST_WAIT_SHARED :
865  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
866
867class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
868           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
869           "mbarrier.pending_count.b64 $res, $state;",
870           [(set i32:$res, (Intrin i64:$state))]>,
871    Requires<[hasPTX<70>, hasSM<80>]>;
872
873def MBARRIER_PENDING_COUNT :
874  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
875
876//-----------------------------------
877// Math Functions
878//-----------------------------------
879
880// Map min(1.0, max(0.0, x)) to sat(x)
881// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
882// NaN
883// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
884// Same story for fmax, fmin.
885
886def : Pat<(int_nvvm_fmin_f immFloat1,
887            (int_nvvm_fmax_f immFloat0, f32:$a)),
888          (CVT_f32_f32 $a, CvtSAT)>;
889def : Pat<(int_nvvm_fmin_f immFloat1,
890            (int_nvvm_fmax_f f32:$a, immFloat0)),
891          (CVT_f32_f32 $a, CvtSAT)>;
892def : Pat<(int_nvvm_fmin_f
893            (int_nvvm_fmax_f immFloat0, f32:$a), immFloat1),
894          (CVT_f32_f32 $a, CvtSAT)>;
895def : Pat<(int_nvvm_fmin_f
896            (int_nvvm_fmax_f f32:$a, immFloat0), immFloat1),
897          (CVT_f32_f32 $a, CvtSAT)>;
898
899def : Pat<(int_nvvm_fmin_d immDouble1,
900            (int_nvvm_fmax_d immDouble0, f64:$a)),
901          (CVT_f64_f64 $a, CvtSAT)>;
902def : Pat<(int_nvvm_fmin_d immDouble1,
903            (int_nvvm_fmax_d f64:$a, immDouble0)),
904          (CVT_f64_f64 $a, CvtSAT)>;
905def : Pat<(int_nvvm_fmin_d
906            (int_nvvm_fmax_d immDouble0, f64:$a), immDouble1),
907          (CVT_f64_f64 $a, CvtSAT)>;
908def : Pat<(int_nvvm_fmin_d
909            (int_nvvm_fmax_d f64:$a, immDouble0), immDouble1),
910          (CVT_f64_f64 $a, CvtSAT)>;
911
912
913// We need a full string for OpcStr here because we need to deal with case like
914// INT_PTX_RECIP.
915class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
916  NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
917            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
918            OpcStr,
919        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
920        Requires<Preds>;
921
922// We need a full string for OpcStr here because we need to deal with the case
923// like INT_PTX_NATIVE_POWR_F.
924class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
925  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
926  list<Predicate> Preds = []>
927            : NVPTXInst<(outs t_regclass:$dst),
928              (ins s0_regclass:$src0, s1_regclass:$src1),
929            OpcStr,
930        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
931        Requires<Preds>;
932
933class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
934  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
935  NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
936            : NVPTXInst<(outs t_regclass:$dst),
937              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
938            OpcStr,
939        [(set t_regclass:$dst,
940          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
941          Requires<Preds>;
942
943//
944// MISC
945//
946
947def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
948  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
949
950def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;",
951                             [(int_nvvm_nanosleep imm:$i)]>,
952        Requires<[hasPTX<63>, hasSM<70>]>;
953def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
954                             [(int_nvvm_nanosleep i32:$i)]>,
955        Requires<[hasPTX<63>, hasSM<70>]>;
956//
957// Min Max
958//
959
960def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
961  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
962def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
963  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
964def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
965  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
966  [hasPTX<70>, hasSM<80>]>;
967def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
968  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
969  [hasPTX<70>, hasSM<80>]>;
970def INT_NVVM_FMIN_XORSIGN_ABS_F :
971  F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
972    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
973    [hasPTX<72>, hasSM<86>]>;
974def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
975  F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
976    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
977    [hasPTX<72>, hasSM<86>]>;
978def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
979  F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
980    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
981    [hasPTX<72>, hasSM<86>]>;
982def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
983  F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
984    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
985    [hasPTX<72>, hasSM<86>]>;
986
987def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
988  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
989def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
990  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
991def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
992  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
993  [hasPTX<70>, hasSM<80>]>;
994def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
995  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
996  [hasPTX<70>, hasSM<80>]>;
997def INT_NVVM_FMAX_XORSIGN_ABS_F :
998  F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
999    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
1000    [hasPTX<72>, hasSM<86>]>;
1001def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
1002  F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
1003    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
1004    [hasPTX<72>, hasSM<86>]>;
1005def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
1006  F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
1007    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
1008    [hasPTX<72>, hasSM<86>]>;
1009def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
1010  F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
1011    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
1012    [hasPTX<72>, hasSM<86>]>;
1013
1014def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
1015  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
1016def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
1017  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
1018
1019//
1020// Min Max f16, f16x2, bf16, bf16x2
1021//
1022
1023class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
1024                    list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
1025  string Variant = V;
1026  Intrinsic Intr = I;
1027  NVPTXRegClass RegClass = RC;
1028  list<Predicate> Predicates = Preds;
1029}
1030
1031multiclass MIN_MAX<string IntName> {
1032  foreach P = [
1033    MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
1034      int_nvvm_fmax_f16), Int16Regs>,
1035    MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
1036      int_nvvm_fmax_ftz_f16), Int16Regs>,
1037    MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
1038      int_nvvm_fmax_nan_f16), Int16Regs>,
1039    MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
1040      int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
1041    MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
1042      int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
1043      Int16Regs, [hasPTX<72>, hasSM<86>]>,
1044    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
1045      int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
1046      Int16Regs, [hasPTX<72>, hasSM<86>]>,
1047    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
1048      int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
1049      Int16Regs, [hasPTX<72>, hasSM<86>]>,
1050    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
1051      int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
1052      int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
1053    MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
1054      int_nvvm_fmax_f16x2), Int32Regs>,
1055    MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
1056      int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
1057    MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
1058      int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
1059    MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
1060      int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
1061    MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
1062      int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
1063      Int32Regs, [hasPTX<72>, hasSM<86>]>,
1064    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
1065      int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
1066      Int32Regs, [hasPTX<72>, hasSM<86>]>,
1067    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
1068      int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
1069      Int32Regs, [hasPTX<72>, hasSM<86>]>,
1070    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
1071      int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
1072      int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
1073      Int32Regs, [hasPTX<72>, hasSM<86>]>,
1074    MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
1075      int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
1076    MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
1077      int_nvvm_fmax_nan_bf16), Int16Regs>,
1078    MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
1079      int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
1080      Int16Regs, [hasPTX<72>, hasSM<86>]>,
1081    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
1082      int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
1083      Int16Regs, [hasPTX<72>, hasSM<86>]>,
1084    MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
1085      int_nvvm_fmax_bf16x2), Int32Regs>,
1086    MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
1087      int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
1088    MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
1089      int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
1090      Int32Regs, [hasPTX<72>, hasSM<86>]>,
1091    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
1092      int_nvvm_fmin_nan_xorsign_abs_bf16x2,
1093      int_nvvm_fmax_nan_xorsign_abs_bf16x2),
1094      Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
1095        def P.Variant : F_MATH_2<!strconcat(
1096          IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
1097          P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1098  }
1099}
1100
1101defm INT_NVVM_FMIN : MIN_MAX<"min">;
1102defm INT_NVVM_FMAN : MIN_MAX<"max">;
1103
1104//
1105// Multiplication
1106//
1107
1108def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs,
1109  Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
1110def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs,
1111  Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
1112def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
1113  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
1114def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
1115  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
1116def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
1117  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
1118def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
1119  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
1120
1121def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
1122  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
1123def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
1124  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
1125def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
1126  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
1127def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
1128  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
1129def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
1130  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
1131def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
1132  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
1133def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
1134  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
1135def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
1136  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
1137
1138def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
1139  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
1140def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
1141  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
1142def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
1143  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
1144def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
1145  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
1146
1147def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
1148  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
1149def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
1150  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
1151
1152//
1153// Div
1154//
1155
1156def INT_NVVM_DIV_APPROX_FTZ_F
1157  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
1158    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
1159def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
1160  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
1161
1162def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
1163  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
1164def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
1165  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
1166def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
1167  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
1168def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
1169  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
1170def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
1171  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
1172def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
1173  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
1174def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
1175  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
1176def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
1177  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
1178
1179def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
1180  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
1181def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
1182  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
1183def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
1184  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
1185def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
1186  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
1187
1188def : Pat<(int_nvvm_div_full f32:$a, f32:$b),
1189          (FDIV32rr $a, $b)>;
1190
1191def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b),
1192          (FDIV32ri $a, f32imm:$b)>;
1193
1194def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b),
1195          (FDIV32rr_ftz $a, $b)>;
1196
1197def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b),
1198          (FDIV32ri_ftz $a, f32imm:$b)>;
1199
1200//
1201// Sad
1202//
1203
1204def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;",
1205  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
1206def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;",
1207  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
1208def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
1209  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
1210def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
1211  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
1212def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;",
1213  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
1214def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
1215  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
1216
1217//
1218// Floor  Ceil
1219//
1220
1221def : Pat<(int_nvvm_floor_ftz_f f32:$a),
1222          (CVT_f32_f32 $a, CvtRMI_FTZ)>;
1223def : Pat<(int_nvvm_floor_f f32:$a),
1224          (CVT_f32_f32 $a, CvtRMI)>;
1225def : Pat<(int_nvvm_floor_d f64:$a),
1226          (CVT_f64_f64 $a, CvtRMI)>;
1227
1228def : Pat<(int_nvvm_ceil_ftz_f f32:$a),
1229          (CVT_f32_f32 $a, CvtRPI_FTZ)>;
1230def : Pat<(int_nvvm_ceil_f f32:$a),
1231          (CVT_f32_f32 $a, CvtRPI)>;
1232def : Pat<(int_nvvm_ceil_d f64:$a),
1233          (CVT_f64_f64 $a, CvtRPI)>;
1234
1235//
1236// Abs
1237//
1238
1239def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
1240  Float32Regs, int_nvvm_fabs_ftz_f>;
1241def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
1242  Float32Regs, int_nvvm_fabs_f>;
1243
1244def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
1245  Float64Regs, int_nvvm_fabs_d>;
1246
1247//
1248// copysign
1249//
1250
1251def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>;
1252
1253def COPYSIGN_F :
1254    NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1),
1255              "copysign.f32 \t$dst, $src0, $src1;",
1256              [(set f32:$dst, (fcopysign_nvptx f32:$src1, f32:$src0))]>;
1257
1258def COPYSIGN_D :
1259    NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1),
1260              "copysign.f64 \t$dst, $src0, $src1;",
1261              [(set f64:$dst, (fcopysign_nvptx f64:$src1, f64:$src0))]>;
1262
1263//
1264// Abs, Neg bf16, bf16x2
1265//
1266
1267def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
1268  Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
1269def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
1270  Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
1271def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
1272  Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
1273def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
1274  Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
1275
1276//
1277// Round
1278//
1279
1280def : Pat<(int_nvvm_round_ftz_f f32:$a),
1281          (CVT_f32_f32 $a, CvtRNI_FTZ)>;
1282def : Pat<(int_nvvm_round_f f32:$a),
1283          (CVT_f32_f32 $a, CvtRNI)>;
1284def : Pat<(int_nvvm_round_d f64:$a),
1285          (CVT_f64_f64 $a, CvtRNI)>;
1286
1287//
1288// Trunc
1289//
1290
1291def : Pat<(int_nvvm_trunc_ftz_f f32:$a),
1292          (CVT_f32_f32 $a, CvtRZI_FTZ)>;
1293def : Pat<(int_nvvm_trunc_f f32:$a),
1294          (CVT_f32_f32 $a, CvtRZI)>;
1295def : Pat<(int_nvvm_trunc_d f64:$a),
1296          (CVT_f64_f64 $a, CvtRZI)>;
1297
1298//
1299// Saturate
1300//
1301
1302def : Pat<(int_nvvm_saturate_ftz_f f32:$a),
1303          (CVT_f32_f32 $a, CvtSAT_FTZ)>;
1304def : Pat<(int_nvvm_saturate_f f32:$a),
1305          (CVT_f32_f32 $a, CvtSAT)>;
1306def : Pat<(int_nvvm_saturate_d f64:$a),
1307          (CVT_f64_f64 $a, CvtSAT)>;
1308
1309//
1310// Exp2  Log2
1311//
1312
1313def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
1314  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
1315def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
1316  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
1317def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
1318  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
1319
1320def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
1321  Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
1322def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
1323  Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
1324
1325def : Pat<(fexp2 f32:$a),
1326          (INT_NVVM_EX2_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
1327def : Pat<(fexp2 f32:$a),
1328          (INT_NVVM_EX2_APPROX_F $a)>, Requires<[doNoF32FTZ]>;
1329def : Pat<(fexp2 f16:$a),
1330          (INT_NVVM_EX2_APPROX_F16 $a)>, Requires<[useFP16Math]>;
1331def : Pat<(fexp2 v2f16:$a),
1332          (INT_NVVM_EX2_APPROX_F16X2 $a)>, Requires<[useFP16Math]>;
1333
1334def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
1335  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
1336def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
1337  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
1338def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
1339  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
1340
1341def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_FTZ_F $a)>,
1342          Requires<[doF32FTZ]>;
1343def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_F $a)>,
1344          Requires<[doNoF32FTZ]>;
1345
1346//
1347// Sin  Cos
1348//
1349
1350def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
1351  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
1352def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
1353  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
1354
1355def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
1356  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
1357def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
1358  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
1359
1360//
1361// Fma
1362//
1363
1364class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
1365                list<Predicate> Preds = []> {
1366  string Variant = V;
1367  Intrinsic Intr = I;
1368  NVPTXRegClass RegClass = RC;
1369  list<Predicate> Predicates = Preds;
1370}
1371
1372multiclass FMA_INST {
1373  foreach P = [
1374    FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
1375    FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
1376    FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
1377    FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
1378
1379    FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
1380    FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
1381    FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
1382    FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
1383    FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
1384    FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
1385    FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
1386    FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
1387
1388    FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
1389    FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
1390      [hasPTX<42>, hasSM<53>]>,
1391    FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
1392      [hasPTX<42>, hasSM<53>]>,
1393    FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
1394      [hasPTX<42>, hasSM<53>]>,
1395    FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
1396      [hasPTX<70>, hasSM<80>]>,
1397    FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
1398      [hasPTX<70>, hasSM<80>]>,
1399
1400    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1401    FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1402      [hasPTX<70>, hasSM<80>]>,
1403    FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1404      [hasPTX<70>, hasSM<80>]>,
1405    FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1406      [hasPTX<70>, hasSM<80>]>,
1407    FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1408      [hasPTX<70>, hasSM<80>]>,
1409    FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1410      [hasPTX<70>, hasSM<80>]>,
1411
1412    FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1413      [hasPTX<42>, hasSM<53>]>,
1414    FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1415      [hasPTX<42>, hasSM<53>]>,
1416    FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1417      [hasPTX<42>, hasSM<53>]>,
1418    FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1419      Int32Regs, [hasPTX<42>, hasSM<53>]>,
1420    FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1421      [hasPTX<70>, hasSM<80>]>,
1422    FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1423      Int32Regs, [hasPTX<70>, hasSM<80>]>,
1424    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1425      [hasPTX<70>, hasSM<80>]>,
1426    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1427      [hasPTX<70>, hasSM<80>]>
1428  ] in {
1429    def P.Variant :
1430      F_MATH_3<!strconcat("fma",
1431        !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1432        P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1433  }
1434}
1435
1436defm INT_NVVM_FMA : FMA_INST;
1437
1438//
1439// Rcp
1440//
1441
1442def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1443  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1444def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1445  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1446def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1447  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1448def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1449  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1450def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1451  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1452def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1453  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1454def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1455  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1456def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1457  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1458
1459def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1460  Float64Regs, int_nvvm_rcp_rn_d>;
1461def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1462  Float64Regs, int_nvvm_rcp_rz_d>;
1463def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1464  Float64Regs, int_nvvm_rcp_rm_d>;
1465def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1466  Float64Regs, int_nvvm_rcp_rp_d>;
1467
1468def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1469  Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1470def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1471  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1472
1473//
1474// Sqrt
1475//
1476
1477def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1478  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1479def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1480  Float32Regs, int_nvvm_sqrt_rn_f>;
1481def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1482  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1483def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1484  Float32Regs, int_nvvm_sqrt_rz_f>;
1485def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1486  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1487def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1488  Float32Regs, int_nvvm_sqrt_rm_f>;
1489def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1490  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1491def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1492  Float32Regs, int_nvvm_sqrt_rp_f>;
1493def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1494  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1495def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1496  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1497
1498def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1499  Float64Regs, int_nvvm_sqrt_rn_d>;
1500def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1501  Float64Regs, int_nvvm_sqrt_rz_d>;
1502def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1503  Float64Regs, int_nvvm_sqrt_rm_d>;
1504def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1505  Float64Regs, int_nvvm_sqrt_rp_d>;
1506
1507// nvvm_sqrt intrinsic
1508def : Pat<(int_nvvm_sqrt_f f32:$a),
1509          (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1510def : Pat<(int_nvvm_sqrt_f f32:$a),
1511          (INT_NVVM_SQRT_RN_F $a)>, Requires<[do_SQRTF32_RN]>;
1512def : Pat<(int_nvvm_sqrt_f f32:$a),
1513          (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
1514def : Pat<(int_nvvm_sqrt_f f32:$a),
1515          (INT_NVVM_SQRT_APPROX_F $a)>;
1516
1517//
1518// Rsqrt
1519//
1520
1521def INT_NVVM_RSQRT_APPROX_FTZ_F
1522  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1523    int_nvvm_rsqrt_approx_ftz_f>;
1524def INT_NVVM_RSQRT_APPROX_FTZ_D
1525  : F_MATH_1<"rsqrt.approx.ftz.f64 \t$dst, $src0;", Float64Regs, Float64Regs,
1526    int_nvvm_rsqrt_approx_ftz_d>;
1527
1528def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1529  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1530def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1531  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1532
1533// 1.0f / sqrt_approx -> rsqrt_approx
1534def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f f32:$a)),
1535         (INT_NVVM_RSQRT_APPROX_F $a)>,
1536         Requires<[doRsqrtOpt]>;
1537def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
1538         (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
1539         Requires<[doRsqrtOpt]>;
1540// same for int_nvvm_sqrt_f when non-precision sqrt is requested
1541def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)),
1542         (INT_NVVM_RSQRT_APPROX_F $a)>,
1543         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1544def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)),
1545         (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
1546         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1547
1548def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)),
1549         (INT_NVVM_RSQRT_APPROX_F $a)>,
1550         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1551def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)),
1552         (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
1553         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1554//
1555// Add
1556//
1557
1558def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1559  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1560def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1561  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1562def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1563  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1564def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1565  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1566def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1567  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1568def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1569  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1570def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1571  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1572def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1573  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1574
1575def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1576  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1577def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1578  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1579def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1580  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1581def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1582  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1583
1584//
1585// BFIND
1586//
1587
1588foreach t = [I32RT, I64RT] in {
1589  foreach sign = ["s", "u"] in {
1590    defvar flo_intrin = !cast<Intrinsic>("int_nvvm_flo_" # sign);
1591    def BFIND_ # sign # t.Size
1592      : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src),
1593                  "bfind." # sign # t.Size # " \t$dst, $src;",
1594                  [(set i32:$dst, (flo_intrin t.Ty:$src, 0))]>;
1595
1596    def BFIND_SHIFTAMT_ # sign # t.Size
1597      : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src),
1598                  "bfind.shiftamt." # sign # t.Size # " \t$dst, $src;",
1599                  [(set i32:$dst, (flo_intrin t.Ty:$src, -1))]>;
1600  }
1601}
1602
1603//
1604// Convert
1605//
1606
1607def : Pat<(int_nvvm_d2f_rn_ftz f64:$a),
1608          (CVT_f32_f64 $a, CvtRN_FTZ)>;
1609def : Pat<(int_nvvm_d2f_rn f64:$a),
1610          (CVT_f32_f64 $a, CvtRN)>;
1611def : Pat<(int_nvvm_d2f_rz_ftz f64:$a),
1612          (CVT_f32_f64 $a, CvtRZ_FTZ)>;
1613def : Pat<(int_nvvm_d2f_rz f64:$a),
1614          (CVT_f32_f64 $a, CvtRZ)>;
1615def : Pat<(int_nvvm_d2f_rm_ftz f64:$a),
1616          (CVT_f32_f64 $a, CvtRM_FTZ)>;
1617def : Pat<(int_nvvm_d2f_rm f64:$a),
1618          (CVT_f32_f64 $a, CvtRM)>;
1619def : Pat<(int_nvvm_d2f_rp_ftz f64:$a),
1620          (CVT_f32_f64 $a, CvtRP_FTZ)>;
1621def : Pat<(int_nvvm_d2f_rp f64:$a),
1622          (CVT_f32_f64 $a, CvtRP)>;
1623
1624def : Pat<(int_nvvm_d2i_rn f64:$a),
1625          (CVT_s32_f64 $a, CvtRNI)>;
1626def : Pat<(int_nvvm_d2i_rz f64:$a),
1627          (CVT_s32_f64 $a, CvtRZI)>;
1628def : Pat<(int_nvvm_d2i_rm f64:$a),
1629          (CVT_s32_f64 $a, CvtRMI)>;
1630def : Pat<(int_nvvm_d2i_rp f64:$a),
1631          (CVT_s32_f64 $a, CvtRPI)>;
1632
1633def : Pat<(int_nvvm_d2ui_rn f64:$a),
1634          (CVT_u32_f64 $a, CvtRNI)>;
1635def : Pat<(int_nvvm_d2ui_rz f64:$a),
1636          (CVT_u32_f64 $a, CvtRZI)>;
1637def : Pat<(int_nvvm_d2ui_rm f64:$a),
1638          (CVT_u32_f64 $a, CvtRMI)>;
1639def : Pat<(int_nvvm_d2ui_rp f64:$a),
1640          (CVT_u32_f64 $a, CvtRPI)>;
1641
1642def : Pat<(int_nvvm_i2d_rn i32:$a),
1643          (CVT_f64_s32 $a, CvtRN)>;
1644def : Pat<(int_nvvm_i2d_rz i32:$a),
1645          (CVT_f64_s32 $a, CvtRZ)>;
1646def : Pat<(int_nvvm_i2d_rm i32:$a),
1647          (CVT_f64_s32 $a, CvtRM)>;
1648def : Pat<(int_nvvm_i2d_rp i32:$a),
1649          (CVT_f64_s32 $a, CvtRP)>;
1650
1651def : Pat<(int_nvvm_ui2d_rn i32:$a),
1652          (CVT_f64_u32 $a, CvtRN)>;
1653def : Pat<(int_nvvm_ui2d_rz i32:$a),
1654          (CVT_f64_u32 $a, CvtRZ)>;
1655def : Pat<(int_nvvm_ui2d_rm i32:$a),
1656          (CVT_f64_u32 $a, CvtRM)>;
1657def : Pat<(int_nvvm_ui2d_rp i32:$a),
1658          (CVT_f64_u32 $a, CvtRP)>;
1659
1660def : Pat<(int_nvvm_f2i_rn_ftz f32:$a),
1661          (CVT_s32_f32 $a, CvtRNI_FTZ)>;
1662def : Pat<(int_nvvm_f2i_rn f32:$a),
1663          (CVT_s32_f32 $a, CvtRNI)>;
1664def : Pat<(int_nvvm_f2i_rz_ftz f32:$a),
1665          (CVT_s32_f32 $a, CvtRZI_FTZ)>;
1666def : Pat<(int_nvvm_f2i_rz f32:$a),
1667          (CVT_s32_f32 $a, CvtRZI)>;
1668def : Pat<(int_nvvm_f2i_rm_ftz f32:$a),
1669          (CVT_s32_f32 $a, CvtRMI_FTZ)>;
1670def : Pat<(int_nvvm_f2i_rm f32:$a),
1671          (CVT_s32_f32 $a, CvtRMI)>;
1672def : Pat<(int_nvvm_f2i_rp_ftz f32:$a),
1673          (CVT_s32_f32 $a, CvtRPI_FTZ)>;
1674def : Pat<(int_nvvm_f2i_rp f32:$a),
1675          (CVT_s32_f32 $a, CvtRPI)>;
1676
1677def : Pat<(int_nvvm_f2ui_rn_ftz f32:$a),
1678          (CVT_u32_f32 $a, CvtRNI_FTZ)>;
1679def : Pat<(int_nvvm_f2ui_rn f32:$a),
1680          (CVT_u32_f32 $a, CvtRNI)>;
1681def : Pat<(int_nvvm_f2ui_rz_ftz f32:$a),
1682          (CVT_u32_f32 $a, CvtRZI_FTZ)>;
1683def : Pat<(int_nvvm_f2ui_rz f32:$a),
1684          (CVT_u32_f32 $a, CvtRZI)>;
1685def : Pat<(int_nvvm_f2ui_rm_ftz f32:$a),
1686          (CVT_u32_f32 $a, CvtRMI_FTZ)>;
1687def : Pat<(int_nvvm_f2ui_rm f32:$a),
1688          (CVT_u32_f32 $a, CvtRMI)>;
1689def : Pat<(int_nvvm_f2ui_rp_ftz f32:$a),
1690          (CVT_u32_f32 $a, CvtRPI_FTZ)>;
1691def : Pat<(int_nvvm_f2ui_rp f32:$a),
1692          (CVT_u32_f32 $a, CvtRPI)>;
1693
1694def : Pat<(int_nvvm_i2f_rn i32:$a),
1695          (CVT_f32_s32 $a, CvtRN)>;
1696def : Pat<(int_nvvm_i2f_rz i32:$a),
1697          (CVT_f32_s32 $a, CvtRZ)>;
1698def : Pat<(int_nvvm_i2f_rm i32:$a),
1699          (CVT_f32_s32 $a, CvtRM)>;
1700def : Pat<(int_nvvm_i2f_rp i32:$a),
1701          (CVT_f32_s32 $a, CvtRP)>;
1702
1703def : Pat<(int_nvvm_ui2f_rn i32:$a),
1704          (CVT_f32_u32 $a, CvtRN)>;
1705def : Pat<(int_nvvm_ui2f_rz i32:$a),
1706          (CVT_f32_u32 $a, CvtRZ)>;
1707def : Pat<(int_nvvm_ui2f_rm i32:$a),
1708          (CVT_f32_u32 $a, CvtRM)>;
1709def : Pat<(int_nvvm_ui2f_rp i32:$a),
1710          (CVT_f32_u32 $a, CvtRP)>;
1711
1712def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b),
1713          (CVT_bf16x2_f32 $a, $b, CvtRN)>;
1714def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b),
1715          (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>;
1716def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b),
1717          (CVT_bf16x2_f32 $a, $b, CvtRZ)>;
1718def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b),
1719          (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>;
1720
1721def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b),
1722          (CVT_f16x2_f32 $a, $b, CvtRN)>;
1723def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b),
1724          (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>;
1725def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b),
1726          (CVT_f16x2_f32 $a, $b, CvtRZ)>;
1727def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b),
1728          (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>;
1729
1730def : Pat<(int_nvvm_f2bf16_rn f32:$a),
1731          (CVT_bf16_f32 $a, CvtRN)>;
1732def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a),
1733          (CVT_bf16_f32 $a, CvtRN_RELU)>;
1734def : Pat<(int_nvvm_f2bf16_rz f32:$a),
1735          (CVT_bf16_f32 $a, CvtRZ)>;
1736def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a),
1737          (CVT_bf16_f32 $a, CvtRZ_RELU)>;
1738
1739def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1740  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1741
1742def INT_NVVM_D2I_LO : F_MATH_1<
1743  !strconcat("{{\n\t",
1744             ".reg .b32 %temp; \n\t",
1745             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1746             "}}"),
1747  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1748def INT_NVVM_D2I_HI : F_MATH_1<
1749  !strconcat("{{\n\t",
1750             ".reg .b32 %temp; \n\t",
1751             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1752             "}}"),
1753  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1754
1755def : Pat<(int_nvvm_f2ll_rn_ftz f32:$a),
1756          (CVT_s64_f32 $a, CvtRNI_FTZ)>;
1757def : Pat<(int_nvvm_f2ll_rn f32:$a),
1758          (CVT_s64_f32 $a, CvtRNI)>;
1759def : Pat<(int_nvvm_f2ll_rz_ftz f32:$a),
1760          (CVT_s64_f32 $a, CvtRZI_FTZ)>;
1761def : Pat<(int_nvvm_f2ll_rz f32:$a),
1762          (CVT_s64_f32 $a, CvtRZI)>;
1763def : Pat<(int_nvvm_f2ll_rm_ftz f32:$a),
1764          (CVT_s64_f32 $a, CvtRMI_FTZ)>;
1765def : Pat<(int_nvvm_f2ll_rm f32:$a),
1766          (CVT_s64_f32 $a, CvtRMI)>;
1767def : Pat<(int_nvvm_f2ll_rp_ftz f32:$a),
1768          (CVT_s64_f32 $a, CvtRPI_FTZ)>;
1769def : Pat<(int_nvvm_f2ll_rp f32:$a),
1770          (CVT_s64_f32 $a, CvtRPI)>;
1771
1772def : Pat<(int_nvvm_f2ull_rn_ftz f32:$a),
1773          (CVT_u64_f32 $a, CvtRNI_FTZ)>;
1774def : Pat<(int_nvvm_f2ull_rn f32:$a),
1775          (CVT_u64_f32 $a, CvtRNI)>;
1776def : Pat<(int_nvvm_f2ull_rz_ftz f32:$a),
1777          (CVT_u64_f32 $a, CvtRZI_FTZ)>;
1778def : Pat<(int_nvvm_f2ull_rz f32:$a),
1779          (CVT_u64_f32 $a, CvtRZI)>;
1780def : Pat<(int_nvvm_f2ull_rm_ftz f32:$a),
1781          (CVT_u64_f32 $a, CvtRMI_FTZ)>;
1782def : Pat<(int_nvvm_f2ull_rm f32:$a),
1783          (CVT_u64_f32 $a, CvtRMI)>;
1784def : Pat<(int_nvvm_f2ull_rp_ftz f32:$a),
1785          (CVT_u64_f32 $a, CvtRPI_FTZ)>;
1786def : Pat<(int_nvvm_f2ull_rp f32:$a),
1787          (CVT_u64_f32 $a, CvtRPI)>;
1788
1789def : Pat<(int_nvvm_d2ll_rn f64:$a),
1790          (CVT_s64_f64 $a, CvtRNI)>;
1791def : Pat<(int_nvvm_d2ll_rz f64:$a),
1792          (CVT_s64_f64 $a, CvtRZI)>;
1793def : Pat<(int_nvvm_d2ll_rm f64:$a),
1794          (CVT_s64_f64 $a, CvtRMI)>;
1795def : Pat<(int_nvvm_d2ll_rp f64:$a),
1796          (CVT_s64_f64 $a, CvtRPI)>;
1797
1798def : Pat<(int_nvvm_d2ull_rn f64:$a),
1799          (CVT_u64_f64 $a, CvtRNI)>;
1800def : Pat<(int_nvvm_d2ull_rz f64:$a),
1801          (CVT_u64_f64 $a, CvtRZI)>;
1802def : Pat<(int_nvvm_d2ull_rm f64:$a),
1803          (CVT_u64_f64 $a, CvtRMI)>;
1804def : Pat<(int_nvvm_d2ull_rp f64:$a),
1805          (CVT_u64_f64 $a, CvtRPI)>;
1806
1807def : Pat<(int_nvvm_ll2f_rn i64:$a),
1808          (CVT_f32_s64 $a, CvtRN)>;
1809def : Pat<(int_nvvm_ll2f_rz i64:$a),
1810          (CVT_f32_s64 $a, CvtRZ)>;
1811def : Pat<(int_nvvm_ll2f_rm i64:$a),
1812          (CVT_f32_s64 $a, CvtRM)>;
1813def : Pat<(int_nvvm_ll2f_rp i64:$a),
1814          (CVT_f32_s64 $a, CvtRP)>;
1815
1816def : Pat<(int_nvvm_ull2f_rn i64:$a),
1817          (CVT_f32_u64 $a, CvtRN)>;
1818def : Pat<(int_nvvm_ull2f_rz i64:$a),
1819          (CVT_f32_u64 $a, CvtRZ)>;
1820def : Pat<(int_nvvm_ull2f_rm i64:$a),
1821          (CVT_f32_u64 $a, CvtRM)>;
1822def : Pat<(int_nvvm_ull2f_rp i64:$a),
1823          (CVT_f32_u64 $a, CvtRP)>;
1824
1825def : Pat<(int_nvvm_ll2d_rn i64:$a),
1826          (CVT_f64_s64 $a, CvtRN)>;
1827def : Pat<(int_nvvm_ll2d_rz i64:$a),
1828          (CVT_f64_s64 $a, CvtRZ)>;
1829def : Pat<(int_nvvm_ll2d_rm i64:$a),
1830          (CVT_f64_s64 $a, CvtRM)>;
1831def : Pat<(int_nvvm_ll2d_rp i64:$a),
1832          (CVT_f64_s64 $a, CvtRP)>;
1833
1834def : Pat<(int_nvvm_ull2d_rn i64:$a),
1835          (CVT_f64_u64 $a, CvtRN)>;
1836def : Pat<(int_nvvm_ull2d_rz i64:$a),
1837          (CVT_f64_u64 $a, CvtRZ)>;
1838def : Pat<(int_nvvm_ull2d_rm i64:$a),
1839          (CVT_f64_u64 $a, CvtRM)>;
1840def : Pat<(int_nvvm_ull2d_rp i64:$a),
1841          (CVT_f64_u64 $a, CvtRP)>;
1842
1843
1844def : Pat<(int_nvvm_f2h_rn_ftz f32:$a),
1845          (CVT_f16_f32 $a, CvtRN_FTZ)>;
1846def : Pat<(int_nvvm_f2h_rn f32:$a),
1847          (CVT_f16_f32 $a, CvtRN)>;
1848
1849def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
1850          (CVT_e4m3x2_f32 $a, $b, CvtRN)>;
1851def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
1852          (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
1853def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
1854          (CVT_e5m2x2_f32 $a, $b, CvtRN)>;
1855def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
1856          (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
1857
1858def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn Int32Regs:$a),
1859          (CVT_e4m3x2_f16x2 $a, CvtRN)>;
1860def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu Int32Regs:$a),
1861          (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
1862def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn Int32Regs:$a),
1863          (CVT_e5m2x2_f16x2 $a, CvtRN)>;
1864def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu Int32Regs:$a),
1865          (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
1866
1867def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn Int16Regs:$a),
1868          (CVT_f16x2_e4m3x2 $a, CvtRN)>;
1869def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu Int16Regs:$a),
1870          (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
1871def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn Int16Regs:$a),
1872          (CVT_f16x2_e5m2x2 $a, CvtRN)>;
1873def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu Int16Regs:$a),
1874          (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
1875
1876//
1877// FNS
1878//
1879
1880class INT_FNS_MBO<dag ins, dag Operands>
1881  : NVPTXInst<(outs Int32Regs:$dst), ins,
1882               "fns.b32 \t$dst, $mask, $base, $offset;",
1883               [(set i32:$dst, Operands)]>,
1884    Requires<[hasPTX<60>, hasSM<30>]>;
1885
1886def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1887                     (int_nvvm_fns i32:$mask, i32:$base, i32:$offset)>;
1888def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1889                     (int_nvvm_fns i32:$mask, i32:$base,       imm:$offset)>;
1890def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1891                     (int_nvvm_fns i32:$mask,       imm:$base, i32:$offset)>;
1892def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1893                     (int_nvvm_fns i32:$mask,       imm:$base,       imm:$offset)>;
1894def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1895                     (int_nvvm_fns       imm:$mask, i32:$base, i32:$offset)>;
1896def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1897                     (int_nvvm_fns       imm:$mask, i32:$base,       imm:$offset)>;
1898def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1899                     (int_nvvm_fns       imm:$mask,       imm:$base, i32:$offset)>;
1900def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1901                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1902
1903//-----------------------------------
1904// Atomic Functions
1905//-----------------------------------
1906
1907class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1908 : PatFrag<ops, frag, AS_match.global>;
1909class ATOMIC_SHARED_CHK <dag ops, dag frag>
1910 : PatFrag<ops, frag, AS_match.shared>;
1911class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1912 : PatFrag<ops, frag, AS_match.generic>;
1913
1914multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1915  ValueType regT, NVPTXRegClass regclass,
1916  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1917  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1918  let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1919    def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1920      !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1921      [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1922    Requires<Pred>;
1923    def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1924      !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1925      [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1926    Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
1927  }
1928}
1929multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1930  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1931  list<Predicate> Pred = []> {
1932  defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1933    IntOp, IMMType, IMM, Pred>;
1934  defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1935    IntOp, IMMType, IMM, Pred>;
1936}
1937
1938// has 2 operands, neg the second one
1939multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1940  ValueType regT, NVPTXRegClass regclass,
1941  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1942  list<Predicate> Pred> {
1943  let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1944    def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1945      !strconcat(
1946        "{{ \n\t",
1947        ".reg \t.s", TypeStr, " temp; \n\t",
1948        "neg.s", TypeStr, " \ttemp, $b; \n\t",
1949        "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1950        "}}"),
1951      [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1952    Requires<Pred>;
1953  }
1954}
1955multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1956  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1957 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1958   IntOp, Pred> ;
1959 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1960   IntOp, Pred> ;
1961}
1962
1963// has 3 operands
1964multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1965  ValueType regT, NVPTXRegClass regclass,
1966  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1967  Operand IMMType, list<Predicate> Pred> {
1968  let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1969    def reg : NVPTXInst<(outs regclass:$dst),
1970      (ins ptrclass:$addr, regclass:$b, regclass:$c),
1971      !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1972      [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1973    Requires<Pred>;
1974
1975    def imm1 : NVPTXInst<(outs regclass:$dst),
1976      (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1977      !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1978      [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1979    Requires<Pred>;
1980
1981    def imm2 : NVPTXInst<(outs regclass:$dst),
1982      (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1983      !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1984      [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1985    Requires<Pred>;
1986
1987    def imm3 : NVPTXInst<(outs regclass:$dst),
1988      (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1989      !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1990      [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1991    Requires<Pred>;
1992  }
1993}
1994multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1995  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1996  defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1997    IntOp, IMMType, Pred>;
1998  defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1999    IntOp, IMMType, Pred>;
2000}
2001
2002// atom_add
2003
2004def atomic_load_add_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2005  (atomic_load_add_i32 node:$a, node:$b)>;
2006def atomic_load_add_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2007  (atomic_load_add_i32 node:$a, node:$b)>;
2008def atomic_load_add_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2009  (atomic_load_add_i32 node:$a, node:$b)>;
2010def atomic_load_add_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2011  (atomic_load_add_i64 node:$a, node:$b)>;
2012def atomic_load_add_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2013  (atomic_load_add_i64 node:$a, node:$b)>;
2014def atomic_load_add_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2015  (atomic_load_add_i64 node:$a, node:$b)>;
2016def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2017  (atomic_load_fadd node:$a, node:$b)>;
2018def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2019  (atomic_load_fadd node:$a, node:$b)>;
2020def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2021  (atomic_load_fadd node:$a, node:$b)>;
2022
2023defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
2024  atomic_load_add_i32_g, i32imm, imm>;
2025defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
2026  atomic_load_add_i32_s, i32imm, imm>;
2027defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
2028  atomic_load_add_i32_gen, i32imm, imm>;
2029defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
2030  ".add", atomic_load_add_i32_gen, i32imm, imm>;
2031
2032defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
2033  atomic_load_add_i64_g, i64imm, imm>;
2034defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
2035  atomic_load_add_i64_s, i64imm, imm>;
2036defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
2037  atomic_load_add_i64_gen, i64imm, imm>;
2038defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
2039  ".add", atomic_load_add_i64_gen, i64imm, imm>;
2040
2041defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
2042  atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
2043defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
2044  atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
2045defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
2046  atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
2047
2048defm INT_PTX_ATOM_ADD_G_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".global", ".bf16", ".add.noftz",
2049  atomic_load_add_g, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
2050defm INT_PTX_ATOM_ADD_S_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".shared", ".bf16", ".add.noftz",
2051  atomic_load_add_s, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
2052defm INT_PTX_ATOM_ADD_GEN_BF16 : F_ATOMIC_2<bf16, Int16Regs, "", ".bf16", ".add.noftz",
2053  atomic_load_add_gen, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
2054
2055defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
2056  atomic_load_add_g, f32imm, fpimm>;
2057defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
2058  atomic_load_add_s, f32imm, fpimm>;
2059defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
2060  atomic_load_add_gen, f32imm, fpimm>;
2061
2062defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
2063  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
2064defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
2065  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
2066defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
2067  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
2068
2069// atom_sub
2070
2071def atomic_load_sub_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2072  (atomic_load_sub_i32 node:$a, node:$b)>;
2073def atomic_load_sub_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2074  (atomic_load_sub_i32 node:$a, node:$b)>;
2075def atomic_load_sub_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2076  (atomic_load_sub_i32 node:$a, node:$b)>;
2077def atomic_load_sub_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2078  (atomic_load_sub_i64 node:$a, node:$b)>;
2079def atomic_load_sub_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2080  (atomic_load_sub_i64 node:$a, node:$b)>;
2081def atomic_load_sub_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2082  (atomic_load_sub_i64 node:$a, node:$b)>;
2083
2084defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
2085  atomic_load_sub_i32_g>;
2086defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
2087  atomic_load_sub_i64_g>;
2088defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
2089  atomic_load_sub_i32_gen>;
2090defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
2091  ".add", atomic_load_sub_i32_gen>;
2092defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
2093  atomic_load_sub_i32_s>;
2094defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
2095  atomic_load_sub_i64_s>;
2096defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
2097  atomic_load_sub_i64_gen>;
2098defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
2099  ".add", atomic_load_sub_i64_gen>;
2100
2101// atom_swap
2102
2103def atomic_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2104  (atomic_swap_i32 node:$a, node:$b)>;
2105def atomic_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2106  (atomic_swap_i32 node:$a, node:$b)>;
2107def atomic_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2108  (atomic_swap_i32 node:$a, node:$b)>;
2109def atomic_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2110  (atomic_swap_i64 node:$a, node:$b)>;
2111def atomic_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2112  (atomic_swap_i64 node:$a, node:$b)>;
2113def atomic_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2114  (atomic_swap_i64 node:$a, node:$b)>;
2115
2116defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
2117  atomic_swap_i32_g, i32imm, imm>;
2118defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
2119  atomic_swap_i32_s, i32imm, imm>;
2120defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
2121  atomic_swap_i32_gen, i32imm, imm>;
2122defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
2123  ".exch", atomic_swap_i32_gen, i32imm, imm>;
2124defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
2125  atomic_swap_i64_g, i64imm, imm>;
2126defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
2127  atomic_swap_i64_s, i64imm, imm>;
2128defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
2129  atomic_swap_i64_gen, i64imm, imm>;
2130defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
2131  ".exch", atomic_swap_i64_gen, i64imm, imm>;
2132
2133// atom_max
2134
2135def atomic_load_max_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
2136  , (atomic_load_max_i32 node:$a, node:$b)>;
2137def atomic_load_max_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2138  (atomic_load_max_i32 node:$a, node:$b)>;
2139def atomic_load_max_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2140  (atomic_load_max_i32 node:$a, node:$b)>;
2141def atomic_load_max_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
2142  , (atomic_load_max_i64 node:$a, node:$b)>;
2143def atomic_load_max_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2144  (atomic_load_max_i64 node:$a, node:$b)>;
2145def atomic_load_max_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2146  (atomic_load_max_i64 node:$a, node:$b)>;
2147def atomic_load_umax_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2148  (atomic_load_umax_i32 node:$a, node:$b)>;
2149def atomic_load_umax_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2150  (atomic_load_umax_i32 node:$a, node:$b)>;
2151def atomic_load_umax_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2152  (atomic_load_umax_i32 node:$a, node:$b)>;
2153def atomic_load_umax_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2154  (atomic_load_umax_i64 node:$a, node:$b)>;
2155def atomic_load_umax_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2156  (atomic_load_umax_i64 node:$a, node:$b)>;
2157def atomic_load_umax_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2158  (atomic_load_umax_i64 node:$a, node:$b)>;
2159
2160defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
2161  ".max", atomic_load_max_i32_g, i32imm, imm>;
2162defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
2163  ".max", atomic_load_max_i32_s, i32imm, imm>;
2164defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
2165  atomic_load_max_i32_gen, i32imm, imm>;
2166defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
2167  ".s32", ".max", atomic_load_max_i32_gen, i32imm, imm>;
2168defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
2169  ".max", atomic_load_max_i64_g, i64imm, imm, [hasSM<32>]>;
2170defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
2171  ".max", atomic_load_max_i64_s, i64imm, imm, [hasSM<32>]>;
2172defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
2173  atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
2174defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
2175  ".s64", ".max", atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
2176defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
2177  ".max", atomic_load_umax_i32_g, i32imm, imm>;
2178defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
2179  ".max", atomic_load_umax_i32_s, i32imm, imm>;
2180defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
2181  atomic_load_umax_i32_gen, i32imm, imm>;
2182defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
2183  ".u32", ".max", atomic_load_umax_i32_gen, i32imm, imm>;
2184defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
2185  ".max", atomic_load_umax_i64_g, i64imm, imm, [hasSM<32>]>;
2186defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
2187  ".max", atomic_load_umax_i64_s, i64imm, imm, [hasSM<32>]>;
2188defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
2189  atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
2190defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
2191  ".u64", ".max", atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
2192
2193// atom_min
2194
2195def atomic_load_min_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2196  (atomic_load_min_i32 node:$a, node:$b)>;
2197def atomic_load_min_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2198  (atomic_load_min_i32 node:$a, node:$b)>;
2199def atomic_load_min_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2200  (atomic_load_min_i32 node:$a, node:$b)>;
2201def atomic_load_min_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2202  (atomic_load_min_i64 node:$a, node:$b)>;
2203def atomic_load_min_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2204  (atomic_load_min_i64 node:$a, node:$b)>;
2205def atomic_load_min_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2206  (atomic_load_min_i64 node:$a, node:$b)>;
2207def atomic_load_umin_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2208  (atomic_load_umin_i32 node:$a, node:$b)>;
2209def atomic_load_umin_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2210  (atomic_load_umin_i32 node:$a, node:$b)>;
2211def atomic_load_umin_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2212  (atomic_load_umin_i32 node:$a, node:$b)>;
2213def atomic_load_umin_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2214  (atomic_load_umin_i64 node:$a, node:$b)>;
2215def atomic_load_umin_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2216  (atomic_load_umin_i64 node:$a, node:$b)>;
2217def atomic_load_umin_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2218  (atomic_load_umin_i64 node:$a, node:$b)>;
2219
2220defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
2221  ".min", atomic_load_min_i32_g, i32imm, imm>;
2222defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
2223  ".min", atomic_load_min_i32_s, i32imm, imm>;
2224defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
2225  atomic_load_min_i32_gen, i32imm, imm>;
2226defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
2227  ".s32", ".min", atomic_load_min_i32_gen, i32imm, imm>;
2228defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
2229  ".min", atomic_load_min_i64_g, i64imm, imm, [hasSM<32>]>;
2230defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
2231  ".min", atomic_load_min_i64_s, i64imm, imm, [hasSM<32>]>;
2232defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
2233  atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
2234defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
2235  ".s64", ".min", atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
2236defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
2237  ".min", atomic_load_umin_i32_g, i32imm, imm>;
2238defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
2239  ".min", atomic_load_umin_i32_s, i32imm, imm>;
2240defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
2241  atomic_load_umin_i32_gen, i32imm, imm>;
2242defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
2243  ".u32", ".min", atomic_load_umin_i32_gen, i32imm, imm>;
2244defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
2245  ".min", atomic_load_umin_i64_g, i64imm, imm, [hasSM<32>]>;
2246defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
2247  ".min", atomic_load_umin_i64_s, i64imm, imm, [hasSM<32>]>;
2248defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
2249  atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
2250defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
2251  ".u64", ".min", atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
2252
2253// atom_inc  atom_dec
2254
2255def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2256  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
2257def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2258  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
2259def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2260  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
2261def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2262  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
2263def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2264  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
2265def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2266  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
2267
2268defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
2269  atomic_load_inc_32_g, i32imm, imm>;
2270defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
2271  atomic_load_inc_32_s, i32imm, imm>;
2272defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
2273  atomic_load_inc_32_gen, i32imm, imm>;
2274defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
2275  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
2276defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
2277  atomic_load_dec_32_g, i32imm, imm>;
2278defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
2279  atomic_load_dec_32_s, i32imm, imm>;
2280defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
2281  atomic_load_dec_32_gen, i32imm, imm>;
2282defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
2283  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
2284
2285// atom_and
2286
2287def atomic_load_and_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2288  (atomic_load_and_i32 node:$a, node:$b)>;
2289def atomic_load_and_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2290  (atomic_load_and_i32 node:$a, node:$b)>;
2291def atomic_load_and_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2292  (atomic_load_and_i32 node:$a, node:$b)>;
2293def atomic_load_and_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2294  (atomic_load_and_i64 node:$a, node:$b)>;
2295def atomic_load_and_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2296  (atomic_load_and_i64 node:$a, node:$b)>;
2297def atomic_load_and_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2298  (atomic_load_and_i64 node:$a, node:$b)>;
2299
2300defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
2301  atomic_load_and_i32_g, i32imm, imm>;
2302defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
2303  atomic_load_and_i32_s, i32imm, imm>;
2304defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
2305  atomic_load_and_i32_gen, i32imm, imm>;
2306defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
2307  ".and", atomic_load_and_i32_gen, i32imm, imm>;
2308defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
2309  atomic_load_and_i64_g, i64imm, imm, [hasSM<32>]>;
2310defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
2311  atomic_load_and_i64_s, i64imm, imm, [hasSM<32>]>;
2312defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
2313  atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
2314defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
2315  ".and", atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
2316
2317// atom_or
2318
2319def atomic_load_or_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2320  (atomic_load_or_i32 node:$a, node:$b)>;
2321def atomic_load_or_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2322  (atomic_load_or_i32 node:$a, node:$b)>;
2323def atomic_load_or_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2324  (atomic_load_or_i32 node:$a, node:$b)>;
2325def atomic_load_or_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2326  (atomic_load_or_i64 node:$a, node:$b)>;
2327def atomic_load_or_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2328  (atomic_load_or_i64 node:$a, node:$b)>;
2329def atomic_load_or_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2330  (atomic_load_or_i64 node:$a, node:$b)>;
2331
2332defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
2333  atomic_load_or_i32_g, i32imm, imm>;
2334defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
2335  atomic_load_or_i32_gen, i32imm, imm>;
2336defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
2337  ".or", atomic_load_or_i32_gen, i32imm, imm>;
2338defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
2339  atomic_load_or_i32_s, i32imm, imm>;
2340defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
2341  atomic_load_or_i64_g, i64imm, imm, [hasSM<32>]>;
2342defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
2343  atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
2344defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
2345  ".or", atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
2346defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
2347  atomic_load_or_i64_s, i64imm, imm, [hasSM<32>]>;
2348
2349// atom_xor
2350
2351def atomic_load_xor_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2352  (atomic_load_xor_i32 node:$a, node:$b)>;
2353def atomic_load_xor_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2354  (atomic_load_xor_i32 node:$a, node:$b)>;
2355def atomic_load_xor_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2356  (atomic_load_xor_i32 node:$a, node:$b)>;
2357def atomic_load_xor_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
2358  (atomic_load_xor_i64 node:$a, node:$b)>;
2359def atomic_load_xor_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
2360  (atomic_load_xor_i64 node:$a, node:$b)>;
2361def atomic_load_xor_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
2362  (atomic_load_xor_i64 node:$a, node:$b)>;
2363
2364defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
2365  atomic_load_xor_i32_g, i32imm, imm>;
2366defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
2367  atomic_load_xor_i32_s, i32imm, imm>;
2368defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
2369  atomic_load_xor_i32_gen, i32imm, imm>;
2370defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
2371  ".xor", atomic_load_xor_i32_gen, i32imm, imm>;
2372defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
2373  atomic_load_xor_i64_g, i64imm, imm, [hasSM<32>]>;
2374defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
2375  atomic_load_xor_i64_s, i64imm, imm, [hasSM<32>]>;
2376defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
2377  atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
2378defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
2379  ".xor", atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
2380
2381// atom_cas
2382
2383def atomic_cmp_swap_i16_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2384  (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
2385def atomic_cmp_swap_i16_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2386  (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
2387def atomic_cmp_swap_i16_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2388  (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
2389def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2390  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2391def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2392  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2393def atomic_cmp_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2394  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2395def atomic_cmp_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2396  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2397def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2398  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2399def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2400  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2401
2402defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas",
2403  atomic_cmp_swap_i16_g, i16imm, [hasSM<70>, hasPTX<63>]>;
2404defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, ".shared", ".b16", ".cas",
2405  atomic_cmp_swap_i16_s, i16imm, [hasSM<70>, hasPTX<63>]>;
2406defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", ".b16", ".cas",
2407  atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>;
2408defm INT_PTX_ATOM_CAS_GEN_16_USE_G : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas",
2409  atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>;
2410defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
2411  atomic_cmp_swap_i32_g, i32imm>;
2412defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
2413  atomic_cmp_swap_i32_s, i32imm>;
2414defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
2415  atomic_cmp_swap_i32_gen, i32imm>;
2416defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
2417  ".cas", atomic_cmp_swap_i32_gen, i32imm>;
2418defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
2419  atomic_cmp_swap_i64_g, i64imm>;
2420defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
2421  atomic_cmp_swap_i64_s, i64imm>;
2422defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
2423  atomic_cmp_swap_i64_gen, i64imm>;
2424defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
2425  ".cas", atomic_cmp_swap_i64_gen, i64imm>;
2426
2427// Support for scoped atomic operations.  Matches
2428// int_nvvm_atomic_{op}_{space}_{type}_{scope}
2429// and converts it into the appropriate instruction.
2430// NOTE: not all possible combinations are implemented
2431//  'space' is limited to generic as it's the only one needed to support CUDA.
2432//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
2433class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
2434                  dag ins, dag Operands>
2435      : NVPTXInst<(outs regclass:$result), ins,
2436                  AsmStr,
2437                  [(set regT:$result, Operands)]>,
2438        Requires<Preds>;
2439
2440// Define instruction variants for all addressing modes.
2441multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
2442                       ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2443                       SDNode Imm, ValueType ImmTy,
2444                       list<Predicate> Preds> {
2445  let AddedComplexity = 1 in {
2446    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2447                      (ins Int16Regs:$src, regclass:$b),
2448                      (Intr i16:$src, regT:$b)>;
2449    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2450                      (ins Int32Regs:$src, regclass:$b),
2451                      (Intr i32:$src, regT:$b)>;
2452    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2453                      (ins Int64Regs:$src, regclass:$b),
2454                      (Intr i64:$src, regT:$b)>;
2455  }
2456  // tablegen can't infer argument types from Intrinsic (though it can
2457  // from Instruction) so we have to enforce specific type on
2458  // immediates via explicit cast to ImmTy.
2459  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2460                    (ins Int16Regs:$src, ImmType:$b),
2461                    (Intr i16:$src, (ImmTy Imm:$b))>;
2462  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2463                    (ins Int32Regs:$src, ImmType:$b),
2464                    (Intr i32:$src, (ImmTy Imm:$b))>;
2465  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2466                    (ins Int64Regs:$src, ImmType:$b),
2467                    (Intr i64:$src, (ImmTy Imm:$b))>;
2468}
2469
2470multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
2471                       ValueType regT, NVPTXRegClass regclass,
2472                       Operand ImmType, SDNode Imm, ValueType ImmTy,
2473                       list<Predicate> Preds> {
2474  // Variants for register/immediate permutations of $b and $c
2475  let AddedComplexity = 2 in {
2476    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2477                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
2478                      (Intr i32:$src, regT:$b, regT:$c)>;
2479    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2480                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
2481                      (Intr i64:$src, regT:$b, regT:$c)>;
2482  }
2483  let AddedComplexity = 1 in {
2484    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2485                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
2486                      (Intr i32:$src, (ImmTy Imm:$b), regT:$c)>;
2487    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2488                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
2489                      (Intr i64:$src, (ImmTy Imm:$b), regT:$c)>;
2490    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2491                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2492                      (Intr i32:$src, regT:$b, (ImmTy Imm:$c))>;
2493    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2494                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2495                      (Intr i64:$src, regT:$b, (ImmTy Imm:$c))>;
2496  }
2497  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2498                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2499                    (Intr i32:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2500  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2501                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2502                    (Intr i64:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2503}
2504
2505// Constructs intrinsic name and instruction asm strings.
2506multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2507                       string ScopeStr, string SpaceStr,
2508                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2509                       ValueType ImmTy, list<Predicate> Preds> {
2510  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2511                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2512                            # "." # OpStr # "." # TypeStr
2513                            # " \t$result, [$src], $b;",
2514                     !cast<Intrinsic>(
2515                            "int_nvvm_atomic_" # OpStr
2516                            # "_" # SpaceStr # "_" # IntTypeStr
2517                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2518                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2519}
2520multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2521                       string ScopeStr, string SpaceStr,
2522                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2523                       ValueType ImmTy, list<Predicate> Preds> {
2524  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2525                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2526                            # "." # OpStr # "." # TypeStr
2527                            # " \t$result, [$src], $b, $c;",
2528                     !cast<Intrinsic>(
2529                            "int_nvvm_atomic_" # OpStr
2530                            # "_" # SpaceStr # "_" # IntTypeStr
2531                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2532                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2533}
2534
2535// Constructs variants for different address spaces.
2536// For now we only need variants for generic space pointers.
2537multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2538                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2539                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2540   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2541                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2542}
2543multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2544                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2545                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2546   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2547                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2548}
2549
2550// Constructs variants for different scopes of atomic op.
2551multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2552                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2553                       ValueType ImmTy, list<Predicate> Preds> {
2554   // .gpu scope is default and is currently covered by existing
2555   // atomics w/o explicitly specified scope.
2556   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2557                           regT, regclass, ImmType, Imm, ImmTy,
2558                           !listconcat(Preds,[hasAtomScope])>;
2559   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2560                           regT, regclass, ImmType, Imm, ImmTy,
2561                           !listconcat(Preds,[hasAtomScope])>;
2562}
2563multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2564           ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2565           list<Predicate> Preds> {
2566   // No need to define ".gpu"-scoped atomics.  They do the same thing
2567   // as the regular, non-scoped atomics defined elsewhere.
2568   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2569                           regT, regclass, ImmType, Imm, ImmTy,
2570                           !listconcat(Preds,[hasAtomScope])>;
2571   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2572                           regT, regclass, ImmType, Imm, ImmTy,
2573                           !listconcat(Preds,[hasAtomScope])>;
2574}
2575
2576// atom.add
2577multiclass ATOM2_add_impl<string OpStr> {
2578   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2579   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2580   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2581   defm _bf16  : ATOM2S_impl<OpStr, "f", "bf16", bf16, Int16Regs, bf16imm, fpimm, bf16,
2582                            [hasSM<90>, hasPTX<78>]>;
2583   defm _f16  : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
2584                            [hasSM<70>, hasPTX<63>]>;
2585   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2586                            []>;
2587   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2588                            [hasAtomAddF64]>;
2589}
2590
2591// atom.{and,or,xor}
2592multiclass ATOM2_bitwise_impl<string OpStr> {
2593   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2594   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2595                            [hasAtomBitwise64]>;
2596}
2597
2598// atom.exch
2599multiclass ATOM2_exch_impl<string OpStr> {
2600   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2601   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2602}
2603
2604// atom.{min,max}
2605multiclass ATOM2_minmax_impl<string OpStr> {
2606   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2607   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2608   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2609                            [hasAtomMinMax64]>;
2610   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2611                            [hasAtomMinMax64]>;
2612}
2613
2614// atom.{inc,dec}
2615multiclass ATOM2_incdec_impl<string OpStr> {
2616   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2617}
2618
2619// atom.cas
2620multiclass ATOM3_cas_impl<string OpStr> {
2621   defm _b16  : ATOM3S_impl<OpStr, "i", "b16", i16, Int16Regs, i16imm, imm, i16, []>;
2622   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2623   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2624}
2625
2626defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2627defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2628defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2629defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2630defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2631defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2632defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2633defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2634defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
2635defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2636
2637//-----------------------------------
2638// Support for ldu on sm_20 or later
2639//-----------------------------------
2640
2641// Don't annotate ldu instructions as mayLoad, as they load from memory that is
2642// read-only in a kernel.
2643
2644// Scalar
2645
2646multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2647  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2648               !strconcat("ldu.global.", TyStr),
2649                      []>, Requires<[hasLDU]>;
2650  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2651               !strconcat("ldu.global.", TyStr),
2652                        []>, Requires<[hasLDU]>;
2653 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2654               !strconcat("ldu.global.", TyStr),
2655                      []>, Requires<[hasLDU]>;
2656 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2657               !strconcat("ldu.global.", TyStr),
2658                      []>, Requires<[hasLDU]>;
2659 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2660               !strconcat("ldu.global.", TyStr),
2661                        []>, Requires<[hasLDU]>;
2662}
2663
2664defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2665defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2666defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2667defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2668defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2669defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2670
2671// vector
2672
2673// Elementized vector ldu
2674multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2675 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2676                     (ins Int32Regs:$src),
2677                     !strconcat("ldu.global.", TyStr), []>;
2678 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2679                     (ins Int64Regs:$src),
2680                     !strconcat("ldu.global.", TyStr), []>;
2681 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2682                     (ins MEMri:$src),
2683                     !strconcat("ldu.global.", TyStr), []>;
2684 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2685                     (ins MEMri64:$src),
2686                     !strconcat("ldu.global.", TyStr), []>;
2687 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2688                     (ins imemAny:$src),
2689                     !strconcat("ldu.global.", TyStr), []>;
2690}
2691
2692multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2693 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2694                            regclass:$dst4), (ins Int32Regs:$src),
2695               !strconcat("ldu.global.", TyStr), []>;
2696 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2697                            regclass:$dst4), (ins Int64Regs:$src),
2698               !strconcat("ldu.global.", TyStr), []>;
2699 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2700                            regclass:$dst4), (ins MEMri:$src),
2701               !strconcat("ldu.global.", TyStr), []>;
2702 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2703                            regclass:$dst4), (ins MEMri64:$src),
2704               !strconcat("ldu.global.", TyStr), []>;
2705 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2706                            regclass:$dst4), (ins imemAny:$src),
2707               !strconcat("ldu.global.", TyStr), []>;
2708}
2709
2710defm INT_PTX_LDU_G_v2i8_ELE
2711  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2712defm INT_PTX_LDU_G_v2i16_ELE
2713  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2714defm INT_PTX_LDU_G_v2i32_ELE
2715  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2716defm INT_PTX_LDU_G_v2f32_ELE
2717  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2718defm INT_PTX_LDU_G_v2i64_ELE
2719  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2720defm INT_PTX_LDU_G_v2f64_ELE
2721  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2722defm INT_PTX_LDU_G_v4i8_ELE
2723  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2724defm INT_PTX_LDU_G_v4i16_ELE
2725  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2726    Int16Regs>;
2727defm INT_PTX_LDU_G_v4i32_ELE
2728  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2729    Int32Regs>;
2730defm INT_PTX_LDU_G_v4f16_ELE
2731  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2732    Int16Regs>;
2733defm INT_PTX_LDU_G_v4f16x2_ELE
2734  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2735    Int32Regs>;
2736defm INT_PTX_LDU_G_v4f32_ELE
2737  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2738    Float32Regs>;
2739
2740
2741//-----------------------------------
2742// Support for ldg on sm_35 or later
2743//-----------------------------------
2744
2745// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2746// non-coherent texture cache, and therefore the values read must be read-only
2747// during the lifetime of the kernel.
2748
2749multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2750  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2751               !strconcat("ld.global.nc.", TyStr),
2752                      []>, Requires<[hasLDG]>;
2753  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2754               !strconcat("ld.global.nc.", TyStr),
2755                        []>, Requires<[hasLDG]>;
2756 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2757               !strconcat("ld.global.nc.", TyStr),
2758                      []>, Requires<[hasLDG]>;
2759 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2760               !strconcat("ld.global.nc.", TyStr),
2761                      []>, Requires<[hasLDG]>;
2762 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2763               !strconcat("ld.global.nc.", TyStr),
2764                        []>, Requires<[hasLDG]>;
2765}
2766
2767defm INT_PTX_LDG_GLOBAL_i8
2768  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2769defm INT_PTX_LDG_GLOBAL_i16
2770  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2771defm INT_PTX_LDG_GLOBAL_i32
2772  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2773defm INT_PTX_LDG_GLOBAL_i64
2774  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2775defm INT_PTX_LDG_GLOBAL_f32
2776  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2777defm INT_PTX_LDG_GLOBAL_f64
2778  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2779
2780// vector
2781
2782// Elementized vector ldg
2783multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2784 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2785                     (ins Int32Regs:$src),
2786                     !strconcat("ld.global.nc.", TyStr), []>;
2787 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2788                     (ins Int64Regs:$src),
2789                     !strconcat("ld.global.nc.", TyStr), []>;
2790 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2791                     (ins MEMri:$src),
2792                     !strconcat("ld.global.nc.", TyStr), []>;
2793 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2794                     (ins MEMri64:$src),
2795                     !strconcat("ld.global.nc.", TyStr), []>;
2796 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2797                     (ins imemAny:$src),
2798                     !strconcat("ld.global.nc.", TyStr), []>;
2799}
2800
2801multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2802  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2803                              regclass:$dst4), (ins Int32Regs:$src),
2804               !strconcat("ld.global.nc.", TyStr), []>;
2805  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2806                               regclass:$dst4), (ins Int64Regs:$src),
2807               !strconcat("ld.global.nc.", TyStr), []>;
2808  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2809                              regclass:$dst4), (ins MEMri:$src),
2810               !strconcat("ld.global.nc.", TyStr), []>;
2811  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2812                              regclass:$dst4), (ins MEMri64:$src),
2813               !strconcat("ld.global.nc.", TyStr), []>;
2814  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2815                             regclass:$dst4), (ins imemAny:$src),
2816               !strconcat("ld.global.nc.", TyStr), []>;
2817}
2818
2819// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2820defm INT_PTX_LDG_G_v2i8_ELE
2821  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2822defm INT_PTX_LDG_G_v2i16_ELE
2823  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2824defm INT_PTX_LDG_G_v2i32_ELE
2825  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2826defm INT_PTX_LDG_G_v2f32_ELE
2827  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2828defm INT_PTX_LDG_G_v2i64_ELE
2829  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2830defm INT_PTX_LDG_G_v2f64_ELE
2831  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2832defm INT_PTX_LDG_G_v4i8_ELE
2833  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2834defm INT_PTX_LDG_G_v4i16_ELE
2835  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2836defm INT_PTX_LDG_G_v4i32_ELE
2837  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2838defm INT_PTX_LDG_G_v4f32_ELE
2839  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2840
2841
2842multiclass NG_TO_G<string Str> {
2843   def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2844          "cvta." # Str # ".u32 \t$result, $src;", []>;
2845   def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2846          "cvta." # Str # ".u64 \t$result, $src;", []>;
2847}
2848
2849multiclass G_TO_NG<string Str> {
2850   def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2851          "cvta.to." # Str # ".u32 \t$result, $src;", []>;
2852   def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2853          "cvta.to." # Str # ".u64 \t$result, $src;", []>;
2854}
2855
2856defm cvta_local  : NG_TO_G<"local">;
2857defm cvta_shared : NG_TO_G<"shared">;
2858defm cvta_global : NG_TO_G<"global">;
2859defm cvta_const  : NG_TO_G<"const">;
2860
2861defm cvta_to_local  : G_TO_NG<"local">;
2862defm cvta_to_shared : G_TO_NG<"shared">;
2863defm cvta_to_global : G_TO_NG<"global">;
2864defm cvta_to_const  : G_TO_NG<"const">;
2865
2866// nvvm.ptr.param.to.gen
2867defm cvta_param : NG_TO_G<"param">;
2868
2869def : Pat<(int_nvvm_ptr_param_to_gen i32:$src),
2870          (cvta_param $src)>;
2871
2872def : Pat<(int_nvvm_ptr_param_to_gen i64:$src),
2873          (cvta_param_64 $src)>;
2874
2875// nvvm.ptr.gen.to.param
2876def : Pat<(int_nvvm_ptr_gen_to_param i32:$src),
2877          (i32 Int32Regs:$src)>;
2878
2879def : Pat<(int_nvvm_ptr_gen_to_param i64:$src),
2880          (i64 Int64Regs:$src)>;
2881
2882// nvvm.move intrinsicc
2883def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2884                             "mov.b16 \t$r, $s;",
2885                             [(set i16:$r,
2886                               (int_nvvm_move_i16 i16:$s))]>;
2887def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2888                             "mov.b32 \t$r, $s;",
2889                             [(set i32:$r,
2890                               (int_nvvm_move_i32 i32:$s))]>;
2891def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2892                             "mov.b64 \t$r, $s;",
2893                             [(set i64:$r,
2894                               (int_nvvm_move_i64 i64:$s))]>;
2895def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2896                             "mov.f32 \t$r, $s;",
2897                             [(set f32:$r,
2898                               (int_nvvm_move_float f32:$s))]>;
2899def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2900                             "mov.f64 \t$r, $s;",
2901                             [(set f64:$r,
2902                               (int_nvvm_move_double f64:$s))]>;
2903def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2904                             "mov.u32 \t$r, $s;",
2905                             [(set i32:$r,
2906                               (int_nvvm_move_ptr i32:$s))]>;
2907def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2908                             "mov.u64 \t$r, $s;",
2909                             [(set i64:$r,
2910                               (int_nvvm_move_ptr i64:$s))]>;
2911
2912// @TODO: Are these actually needed, or will we always just see symbols
2913// copied to registers first?
2914/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2915                             "mov.u32 \t$r, $s;",
2916                             [(set Int32Regs:$r,
2917                             (int_nvvm_move_ptr texternalsym:$s))]>;
2918def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2919                             "mov.u64 \t$r, $s;",
2920                             [(set Int64Regs:$r,
2921                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2922
2923def texsurf_handles
2924  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2925              "mov.u64 \t$result, $src;", []>;
2926
2927//-----------------------------------
2928// Compiler Error Warn
2929// - Just ignore them in codegen
2930//-----------------------------------
2931
2932def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2933                "// llvm.nvvm.compiler.warn()",
2934                [(int_nvvm_compiler_warn i32:$a)]>;
2935def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2936                "// llvm.nvvm.compiler.warn()",
2937                [(int_nvvm_compiler_warn i64:$a)]>;
2938def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2939                "// llvm.nvvm.compiler.error()",
2940                [(int_nvvm_compiler_error i32:$a)]>;
2941def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2942                "// llvm.nvvm.compiler.error()",
2943                [(int_nvvm_compiler_error i64:$a)]>;
2944
2945
2946// isspacep
2947
2948multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2949  def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2950              "isspacep." # suffix # "\t$d, $a;",
2951              [(set i1:$d, (Intr i32:$a))]>,
2952    Requires<Preds>;
2953  def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2954              "isspacep." # suffix # "\t$d, $a;",
2955              [(set i1:$d, (Intr i64:$a))]>,
2956    Requires<Preds>;
2957}
2958
2959defm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2960defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2961defm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
2962defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2963defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2964                                       int_nvvm_isspacep_shared_cluster,
2965                                       [hasPTX<78>, hasSM<90>]>;
2966
2967// Special register reads
2968def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2969                            (ins SpecialRegs:$r),
2970                            "mov.b32 \t$d, $r;", []>;
2971
2972def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2973def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2974def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2975def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2976def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2977def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2978def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2979def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2980def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2981def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2982def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2983def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2984def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2985def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2986def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2987def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2988def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2989def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2990def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2991def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2992def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2993def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2994def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2995def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2996def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2997def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2998def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2999def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
3000def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
3001def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
3002def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
3003def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
3004
3005
3006def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
3007          (V2I32toI64 (I64toI32H $src),
3008                      (I64toI32L $src))> ;
3009
3010//-----------------------------------
3011// Texture Intrinsics
3012//-----------------------------------
3013
3014// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
3015// also defined in NVPTXReplaceImageHandles.cpp
3016
3017// texmode_independent
3018let IsTex = true, IsTexModeUnified = false in {
3019// Texture fetch instructions using handles
3020
3021class TEX_1D_base<string inst, NVPTXRegClass outtype,
3022                  NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
3023    : NVPTXInst<(outs outtype:$r, outtype:$g,
3024                      outtype:$b, outtype:$a),
3025                 !con(texsamp, (ins intype:$x)),
3026                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
3027                 pattern>;
3028
3029multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
3030                  Intrinsic intr> {
3031  def _RR : TEX_1D_base<
3032      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3033      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3034            (intr i64:$t, i64:$s, intype:$x))]>;
3035  def _RI : TEX_1D_base<inst, outtype, intype,
3036                        (ins Int64Regs:$t, i64imm:$s)>;
3037  def _IR : TEX_1D_base<inst, outtype, intype,
3038                        (ins i64imm:$t, Int64Regs:$s)>;
3039  def _II : TEX_1D_base<inst, outtype, intype,
3040                        (ins i64imm:$t, i64imm:$s)>;
3041}
3042
3043defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs,
3044                             int_nvvm_tex_1d_v4f32_s32>;
3045defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs,
3046                             int_nvvm_tex_1d_v4f32_f32>;
3047defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs,
3048                             int_nvvm_tex_1d_v4s32_s32>;
3049defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs,
3050                             int_nvvm_tex_1d_v4s32_f32>;
3051defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs,
3052                             int_nvvm_tex_1d_v4u32_s32>;
3053defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs,
3054                             int_nvvm_tex_1d_v4u32_f32>;
3055
3056class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3057                        NVPTXRegClass intype, dag texsamp,
3058                        list<dag> pattern = []>
3059    : NVPTXInst<(outs outtype:$r, outtype:$g,
3060                      outtype:$b, outtype:$a),
3061                 !con(texsamp, (ins intype:$x, intype:$lod)),
3062                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
3063                 pattern>;
3064
3065multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
3066                        NVPTXRegClass intype, Intrinsic intr> {
3067  def _RR : TEX_1D_LEVEL_base<
3068      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3069      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3070            (intr i64:$t, i64:$s, intype:$x, intype:$lod))]>;
3071  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
3072                              (ins Int64Regs:$t, i64imm:$s)>;
3073  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
3074                              (ins i64imm:$t, Int64Regs:$s)>;
3075  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
3076                              (ins i64imm:$t, i64imm:$s)>;
3077}
3078
3079defm TEX_1D_F32_F32_LEVEL :
3080  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs,
3081               int_nvvm_tex_1d_level_v4f32_f32>;
3082defm TEX_1D_S32_F32_LEVEL :
3083  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs,
3084               int_nvvm_tex_1d_level_v4s32_f32>;
3085defm TEX_1D_U32_F32_LEVEL :
3086  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs,
3087               int_nvvm_tex_1d_level_v4u32_f32>;
3088
3089class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3090                       NVPTXRegClass intype, dag texsamp,
3091                       list<dag> pattern = []>
3092    : NVPTXInst<(outs outtype:$r, outtype:$g,
3093                      outtype:$b, outtype:$a),
3094                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
3095                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
3096                        " \\{$gradx\\}, \\{$grady\\};",
3097                 pattern>;
3098
3099multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
3100                       NVPTXRegClass intype, Intrinsic intr> {
3101  def _RR : TEX_1D_GRAD_base<
3102      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3103      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3104            (intr i64:$t, i64:$s, intype:$x, intype:$gradx, intype:$grady))]>;
3105  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
3106                             (ins Int64Regs:$t, i64imm:$s)>;
3107  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
3108                             (ins i64imm:$t, Int64Regs:$s)>;
3109  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
3110                             (ins i64imm:$t, i64imm:$s)>;
3111}
3112
3113defm TEX_1D_F32_F32_GRAD
3114  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs,
3115                int_nvvm_tex_1d_grad_v4f32_f32>;
3116defm TEX_1D_S32_F32_GRAD
3117  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs,
3118                int_nvvm_tex_1d_grad_v4s32_f32>;
3119defm TEX_1D_U32_F32_GRAD
3120  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs,
3121                int_nvvm_tex_1d_grad_v4u32_f32>;
3122
3123class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3124                        NVPTXRegClass intype, dag texsamp,
3125                        list<dag> pattern = []>
3126    : NVPTXInst<(outs outtype:$r, outtype:$g,
3127                      outtype:$b, outtype:$a),
3128                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
3129                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
3130                 pattern>;
3131
3132multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
3133                        NVPTXRegClass intype, Intrinsic intr> {
3134  def _RR : TEX_1D_ARRAY_base<
3135      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3136      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3137            (intr i64:$t, i64:$s, i32:$l, intype:$x))]>;
3138  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
3139                              (ins Int64Regs:$t, i64imm:$s)>;
3140  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
3141                              (ins i64imm:$t, Int64Regs:$s)>;
3142  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
3143                              (ins i64imm:$t, i64imm:$s)>;
3144}
3145
3146defm TEX_1D_ARRAY_F32_F32
3147  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs,
3148                 int_nvvm_tex_1d_array_v4f32_f32>;
3149defm TEX_1D_ARRAY_F32_S32
3150  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs,
3151                 int_nvvm_tex_1d_array_v4f32_s32>;
3152defm TEX_1D_ARRAY_S32_S32
3153  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs,
3154                 int_nvvm_tex_1d_array_v4s32_s32>;
3155defm TEX_1D_ARRAY_S32_F32
3156  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs,
3157                 int_nvvm_tex_1d_array_v4s32_f32>;
3158defm TEX_1D_ARRAY_U32_S32
3159  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs,
3160                 int_nvvm_tex_1d_array_v4u32_s32>;
3161defm TEX_1D_ARRAY_U32_F32
3162  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs,
3163                 int_nvvm_tex_1d_array_v4u32_f32>;
3164
3165class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3166                              NVPTXRegClass intype, dag texsamp,
3167                              list<dag> pattern = []>
3168    : NVPTXInst<(outs outtype:$r, outtype:$g,
3169                      outtype:$b, outtype:$a),
3170                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3171                 inst # " \t\\{$r, $g, $b, $a\\},"
3172                        " [$t, $s, \\{$l, $x\\}], $lod;",
3173                 pattern>;
3174
3175multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3176                              NVPTXRegClass intype, Intrinsic intr> {
3177  def _RR : TEX_1D_ARRAY_LEVEL_base<
3178      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3179      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3180            (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$lod))]>;
3181  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3182                                    (ins Int64Regs:$t, i64imm:$s)>;
3183  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3184                                    (ins i64imm:$t, Int64Regs:$s)>;
3185  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3186                                    (ins i64imm:$t, i64imm:$s)>;
3187}
3188
3189defm TEX_1D_ARRAY_F32_F32_LEVEL
3190  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs,
3191                       int_nvvm_tex_1d_array_level_v4f32_f32>;
3192defm TEX_1D_ARRAY_S32_F32_LEVEL
3193  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs,
3194                       int_nvvm_tex_1d_array_level_v4s32_f32>;
3195defm TEX_1D_ARRAY_U32_F32_LEVEL
3196  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs,
3197                       int_nvvm_tex_1d_array_level_v4u32_f32>;
3198
3199class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3200                             NVPTXRegClass intype, dag texsamp,
3201                             list<dag> pattern = []>
3202    : NVPTXInst<(outs outtype:$r, outtype:$g,
3203                      outtype:$b, outtype:$a),
3204                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
3205                                    intype:$gradx, intype:$grady)),
3206                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
3207                        " \\{$gradx\\}, \\{$grady\\};",
3208                 pattern>;
3209
3210multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3211                             NVPTXRegClass intype, Intrinsic intr> {
3212  def _RR : TEX_1D_ARRAY_GRAD_base<
3213      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3214      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3215            (intr i64:$t, i64:$s, i32:$l, intype:$x,
3216                  intype:$gradx, intype:$grady))]>;
3217  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
3218                                   (ins Int64Regs:$t, i64imm:$s)>;
3219  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
3220                                   (ins i64imm:$t, Int64Regs:$s)>;
3221  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
3222                                   (ins i64imm:$t, i64imm:$s)>;
3223}
3224
3225defm TEX_1D_ARRAY_F32_F32_GRAD
3226  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs,
3227                      int_nvvm_tex_1d_array_grad_v4f32_f32>;
3228defm TEX_1D_ARRAY_S32_F32_GRAD
3229  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs,
3230                      int_nvvm_tex_1d_array_grad_v4s32_f32>;
3231defm TEX_1D_ARRAY_U32_F32_GRAD
3232  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs,
3233                      int_nvvm_tex_1d_array_grad_v4u32_f32>;
3234
3235class TEX_2D_base<string inst, NVPTXRegClass outtype,
3236                  NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
3237    : NVPTXInst<(outs outtype:$r, outtype:$g,
3238                      outtype:$b, outtype:$a),
3239                 !con(texsamp, (ins intype:$x, intype:$y)),
3240                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
3241                 pattern>;
3242
3243multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
3244                  Intrinsic intr> {
3245  def _RR : TEX_2D_base<
3246      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3247      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3248            (intr i64:$t, i64:$s, intype:$x, intype:$y))]>;
3249  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
3250  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
3251  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
3252}
3253
3254defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs,
3255                             int_nvvm_tex_2d_v4f32_f32>;
3256defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs,
3257                             int_nvvm_tex_2d_v4f32_s32>;
3258defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs,
3259                             int_nvvm_tex_2d_v4s32_s32>;
3260defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs,
3261                             int_nvvm_tex_2d_v4s32_f32>;
3262defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs,
3263                             int_nvvm_tex_2d_v4u32_s32>;
3264defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs,
3265                             int_nvvm_tex_2d_v4u32_f32>;
3266
3267class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3268                        NVPTXRegClass intype, dag texsamp,
3269                        list<dag> pattern = []>
3270    : NVPTXInst<(outs outtype:$r, outtype:$g,
3271                      outtype:$b, outtype:$a),
3272                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
3273                 inst # " \t\\{$r, $g, $b, $a\\},"
3274                        " [$t, $s, \\{$x, $y\\}], $lod;",
3275                 pattern>;
3276
3277multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
3278                        NVPTXRegClass intype, Intrinsic intr> {
3279  def _RR : TEX_2D_LEVEL_base<
3280      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3281      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3282            (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$lod))]>;
3283  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
3284                              (ins Int64Regs:$t, i64imm:$s)>;
3285  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
3286                              (ins i64imm:$t, Int64Regs:$s)>;
3287  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
3288                              (ins i64imm:$t, i64imm:$s)>;
3289}
3290
3291defm TEX_2D_F32_F32_LEVEL :
3292  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs,
3293               int_nvvm_tex_2d_level_v4f32_f32>;
3294defm TEX_2D_S32_F32_LEVEL :
3295  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs,
3296               int_nvvm_tex_2d_level_v4s32_f32>;
3297defm TEX_2D_U32_F32_LEVEL :
3298  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs,
3299               int_nvvm_tex_2d_level_v4u32_f32>;
3300
3301class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3302                       NVPTXRegClass intype, dag texsamp,
3303                       list<dag> pattern = []>
3304    : NVPTXInst<(outs outtype:$r, outtype:$g,
3305                      outtype:$b, outtype:$a),
3306                 !con(texsamp, (ins intype:$x, intype:$y,
3307                                    intype:$gradx0, intype:$gradx1,
3308                                    intype:$grady0, intype:$grady1)),
3309                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
3310                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3311                 pattern>;
3312
3313multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
3314                       NVPTXRegClass intype, Intrinsic intr> {
3315  def _RR : TEX_2D_GRAD_base<
3316      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3317      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3318            (intr i64:$t, i64:$s, intype:$x, intype:$y,
3319                  intype:$gradx0, intype:$gradx1,
3320                  intype:$grady0, intype:$grady1))]>;
3321  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
3322                              (ins Int64Regs:$t, i64imm:$s)>;
3323  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
3324                              (ins i64imm:$t, Int64Regs:$s)>;
3325  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
3326                              (ins i64imm:$t, i64imm:$s)>;
3327}
3328
3329defm TEX_2D_F32_F32_GRAD :
3330  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs,
3331              int_nvvm_tex_2d_grad_v4f32_f32>;
3332defm TEX_2D_S32_F32_GRAD :
3333  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs,
3334              int_nvvm_tex_2d_grad_v4s32_f32>;
3335defm TEX_2D_U32_F32_GRAD :
3336  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs,
3337              int_nvvm_tex_2d_grad_v4u32_f32>;
3338
3339class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3340                        NVPTXRegClass intype, dag texsamp,
3341                        list<dag> pattern = []>
3342    : NVPTXInst<(outs outtype:$r, outtype:$g,
3343                      outtype:$b, outtype:$a),
3344                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
3345                 inst # " \t\\{$r, $g, $b, $a\\},"
3346                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
3347                 pattern>;
3348
3349multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
3350                        NVPTXRegClass intype, Intrinsic intr> {
3351  def _RR : TEX_2D_ARRAY_base<
3352      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3353      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3354            (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y))]>;
3355  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
3356                              (ins Int64Regs:$t, i64imm:$s)>;
3357  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
3358                              (ins i64imm:$t, Int64Regs:$s)>;
3359  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
3360                              (ins i64imm:$t, i64imm:$s)>;
3361}
3362
3363defm TEX_2D_ARRAY_F32_F32
3364  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs,
3365                 int_nvvm_tex_2d_array_v4f32_f32>;
3366defm TEX_2D_ARRAY_F32_S32
3367  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs,
3368                 int_nvvm_tex_2d_array_v4f32_s32>;
3369defm TEX_2D_ARRAY_S32_S32
3370  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs,
3371                 int_nvvm_tex_2d_array_v4s32_s32>;
3372defm TEX_2D_ARRAY_S32_F32
3373  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs,
3374                 int_nvvm_tex_2d_array_v4s32_f32>;
3375defm TEX_2D_ARRAY_U32_S32
3376  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs,
3377                 int_nvvm_tex_2d_array_v4u32_s32>;
3378defm TEX_2D_ARRAY_U32_F32
3379  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs,
3380                 int_nvvm_tex_2d_array_v4u32_f32>;
3381
3382class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3383                              NVPTXRegClass intype, dag texsamp,
3384                              list<dag> pattern = []>
3385    : NVPTXInst<(outs outtype:$r, outtype:$g,
3386                      outtype:$b, outtype:$a),
3387                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3388                                    intype:$lod)),
3389                 inst # " \t\\{$r, $g, $b, $a\\},"
3390                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
3391                 pattern>;
3392
3393multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3394                              NVPTXRegClass intype, Intrinsic intr> {
3395  def _RR : TEX_2D_ARRAY_LEVEL_base<
3396      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3397      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3398            (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$lod))]>;
3399  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3400                              (ins Int64Regs:$t, i64imm:$s)>;
3401  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3402                              (ins i64imm:$t, Int64Regs:$s)>;
3403  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3404                              (ins i64imm:$t, i64imm:$s)>;
3405}
3406
3407defm TEX_2D_ARRAY_F32_F32_LEVEL
3408  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs,
3409                       int_nvvm_tex_2d_array_level_v4f32_f32>;
3410defm TEX_2D_ARRAY_S32_F32_LEVEL
3411  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs,
3412                       int_nvvm_tex_2d_array_level_v4s32_f32>;
3413defm TEX_2D_ARRAY_U32_F32_LEVEL
3414  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs,
3415                       int_nvvm_tex_2d_array_level_v4u32_f32>;
3416
3417class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3418                             NVPTXRegClass intype, dag texsamp,
3419                             list<dag> pattern = []>
3420    : NVPTXInst<(outs outtype:$r, outtype:$g,
3421                      outtype:$b, outtype:$a),
3422                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3423                                    intype:$gradx0, intype:$gradx1,
3424                                    intype:$grady0, intype:$grady1)),
3425                 inst # " \t\\{$r, $g, $b, $a\\},"
3426                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
3427                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3428                 pattern>;
3429
3430multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3431                             NVPTXRegClass intype, Intrinsic intr> {
3432  def _RR : TEX_2D_ARRAY_GRAD_base<
3433      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3434      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3435            (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y,
3436                  intype:$gradx0, intype:$gradx1,
3437                  intype:$grady0, intype:$grady1))]>;
3438  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3439                              (ins Int64Regs:$t, i64imm:$s)>;
3440  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3441                              (ins i64imm:$t, Int64Regs:$s)>;
3442  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3443                              (ins i64imm:$t, i64imm:$s)>;
3444}
3445
3446defm TEX_2D_ARRAY_F32_F32_GRAD
3447  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs,
3448                      int_nvvm_tex_2d_array_grad_v4f32_f32>;
3449defm TEX_2D_ARRAY_S32_F32_GRAD
3450  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs,
3451                      int_nvvm_tex_2d_array_grad_v4s32_f32>;
3452defm TEX_2D_ARRAY_U32_F32_GRAD
3453  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs,
3454                      int_nvvm_tex_2d_array_grad_v4u32_f32>;
3455
3456class TEX_3D_base<string inst, NVPTXRegClass outtype,
3457                  NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
3458    : NVPTXInst<(outs outtype:$r, outtype:$g,
3459                      outtype:$b, outtype:$a),
3460                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3461                 inst # " \t\\{$r, $g, $b, $a\\},"
3462                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3463                 pattern>;
3464
3465multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
3466                  Intrinsic intr> {
3467  def _RR : TEX_3D_base<
3468      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3469      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3470            (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>;
3471  def _RI : TEX_3D_base<inst, outtype, intype,
3472                              (ins Int64Regs:$t, i64imm:$s)>;
3473  def _IR : TEX_3D_base<inst, outtype, intype,
3474                              (ins i64imm:$t, Int64Regs:$s)>;
3475  def _II : TEX_3D_base<inst, outtype, intype,
3476                              (ins i64imm:$t, i64imm:$s)>;
3477}
3478
3479defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs,
3480                             int_nvvm_tex_3d_v4f32_f32>;
3481defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs,
3482                             int_nvvm_tex_3d_v4f32_s32>;
3483defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs,
3484                             int_nvvm_tex_3d_v4s32_s32>;
3485defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs,
3486                             int_nvvm_tex_3d_v4s32_f32>;
3487defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs,
3488                             int_nvvm_tex_3d_v4u32_s32>;
3489defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs,
3490                             int_nvvm_tex_3d_v4u32_f32>;
3491
3492class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3493                        NVPTXRegClass intype, dag texsamp,
3494                        list<dag> pattern = []>
3495    : NVPTXInst<(outs outtype:$r, outtype:$g,
3496                      outtype:$b, outtype:$a),
3497                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3498                                    intype:$lod)),
3499                 inst # " \t\\{$r, $g, $b, $a\\},"
3500                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3501                 pattern>;
3502
3503multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3504                        NVPTXRegClass intype, Intrinsic intr> {
3505  def _RR : TEX_3D_LEVEL_base<
3506      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3507      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3508            (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
3509                  intype:$lod))]>;
3510  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3511                              (ins Int64Regs:$t, i64imm:$s)>;
3512  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3513                              (ins i64imm:$t, Int64Regs:$s)>;
3514  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3515                              (ins i64imm:$t, i64imm:$s)>;
3516}
3517
3518defm TEX_3D_F32_F32_LEVEL
3519  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs,
3520                 int_nvvm_tex_3d_level_v4f32_f32>;
3521defm TEX_3D_S32_F32_LEVEL
3522  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs,
3523                 int_nvvm_tex_3d_level_v4s32_f32>;
3524defm TEX_3D_U32_F32_LEVEL
3525  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs,
3526                 int_nvvm_tex_3d_level_v4u32_f32>;
3527
3528class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3529                       NVPTXRegClass intype, dag texsamp,
3530                       list<dag> pattern = []>
3531    : NVPTXInst<(outs outtype:$r, outtype:$g,
3532                      outtype:$b, outtype:$a),
3533                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3534                                    intype :$gradx0, intype:$gradx1,
3535                                    intype:$gradx2, intype:$grady0,
3536                                    intype:$grady1, intype:$grady2)),
3537                 inst # " \t\\{$r, $g, $b, $a\\},"
3538                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
3539                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3540                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3541                 pattern>;
3542
3543multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3544                       NVPTXRegClass intype, Intrinsic intr> {
3545  def _RR : TEX_3D_GRAD_base<
3546      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3547      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3548            (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
3549                  intype:$gradx0, intype:$gradx1, intype:$gradx2,
3550                  intype:$grady0, intype:$grady1, intype:$grady2))]>;
3551  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3552                             (ins Int64Regs:$t, i64imm:$s)>;
3553  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3554                             (ins i64imm:$t, Int64Regs:$s)>;
3555  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3556                             (ins i64imm:$t, i64imm:$s)>;
3557}
3558
3559defm TEX_3D_F32_F32_GRAD
3560  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs,
3561                int_nvvm_tex_3d_grad_v4f32_f32>;
3562defm TEX_3D_S32_F32_GRAD
3563  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs,
3564                int_nvvm_tex_3d_grad_v4s32_f32>;
3565defm TEX_3D_U32_F32_GRAD
3566  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs,
3567                int_nvvm_tex_3d_grad_v4u32_f32>;
3568
3569class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3570                    NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
3571    : NVPTXInst<(outs outtype:$r, outtype:$g,
3572                      outtype:$b, outtype:$a),
3573                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3574                 inst # " \t\\{$r, $g, $b, $a\\},"
3575                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3576                 pattern>;
3577
3578multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
3579                    Intrinsic intr> {
3580  def _RR : TEX_CUBE_base<
3581      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3582      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3583            (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>;
3584  def _RI : TEX_CUBE_base<inst, outtype, intype,
3585                          (ins Int64Regs:$t, i64imm:$s)>;
3586  def _IR : TEX_CUBE_base<inst, outtype, intype,
3587                          (ins i64imm:$t, Int64Regs:$s)>;
3588  def _II : TEX_CUBE_base<inst, outtype, intype,
3589                          (ins i64imm:$t, i64imm:$s)>;
3590}
3591
3592defm TEX_CUBE_F32_F32
3593  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs,
3594             int_nvvm_tex_cube_v4f32_f32>;
3595defm TEX_CUBE_S32_F32
3596  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs,
3597             int_nvvm_tex_cube_v4s32_f32>;
3598defm TEX_CUBE_U32_F32
3599  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs,
3600             int_nvvm_tex_cube_v4u32_f32>;
3601
3602class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3603                          NVPTXRegClass intype, dag texsamp,
3604                          list<dag> pattern = []>
3605    : NVPTXInst<(outs outtype:$r, outtype:$g,
3606                      outtype:$b, outtype:$a),
3607                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3608                                    intype:$lod)),
3609                 inst # " \t\\{$r, $g, $b, $a\\},"
3610                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3611                 pattern>;
3612
3613multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3614                          NVPTXRegClass intype, Intrinsic intr> {
3615  def _RR : TEX_CUBE_LEVEL_base<
3616      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3617      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3618            (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
3619                  intype:$lod))]>;
3620  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3621                                (ins Int64Regs:$t, i64imm:$s)>;
3622  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3623                                (ins i64imm:$t, Int64Regs:$s)>;
3624  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3625                                (ins i64imm:$t, i64imm:$s)>;
3626}
3627
3628defm TEX_CUBE_F32_F32_LEVEL
3629  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs,
3630                   int_nvvm_tex_cube_level_v4f32_f32>;
3631defm TEX_CUBE_S32_F32_LEVEL
3632  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs,
3633                   int_nvvm_tex_cube_level_v4s32_f32>;
3634defm TEX_CUBE_U32_F32_LEVEL
3635  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs,
3636                   int_nvvm_tex_cube_level_v4u32_f32>;
3637
3638class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3639                          NVPTXRegClass intype, dag texsamp,
3640                          list<dag> pattern = []>
3641    : NVPTXInst<(outs outtype:$r, outtype:$g,
3642                      outtype:$b, outtype:$a),
3643                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3644                                    intype:$z)),
3645                 inst # " \t\\{$r, $g, $b, $a\\},"
3646                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
3647                 pattern>;
3648
3649multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3650                          NVPTXRegClass intype, Intrinsic intr> {
3651  def _RR : TEX_CUBE_ARRAY_base<
3652      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3653      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3654            (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z))]>;
3655  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3656                                (ins Int64Regs:$t, i64imm:$s)>;
3657  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3658                                (ins i64imm:$t, Int64Regs:$s)>;
3659  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3660                                (ins i64imm:$t, i64imm:$s)>;
3661}
3662
3663defm TEX_CUBE_ARRAY_F32_F32
3664  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs,
3665                   int_nvvm_tex_cube_array_v4f32_f32>;
3666defm TEX_CUBE_ARRAY_S32_F32
3667  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs,
3668                   int_nvvm_tex_cube_array_v4s32_f32>;
3669defm TEX_CUBE_ARRAY_U32_F32
3670  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs,
3671                   int_nvvm_tex_cube_array_v4u32_f32>;
3672
3673class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3674                                NVPTXRegClass intype, dag texsamp,
3675                                list<dag> pattern = []>
3676    : NVPTXInst<(outs outtype:$r, outtype:$g,
3677                      outtype:$b, outtype:$a),
3678                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3679                                    intype:$z, intype:$lod)),
3680                 inst # " \t\\{$r, $g, $b, $a\\},"
3681                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3682                 pattern>;
3683
3684multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3685                                NVPTXRegClass intype, Intrinsic intr> {
3686  def _RR : TEX_CUBE_ARRAY_LEVEL_base<
3687      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3688      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3689            (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z,
3690                  intype:$lod))]>;
3691  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3692                                      (ins Int64Regs:$t, i64imm:$s)>;
3693  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3694                                      (ins i64imm:$t, Int64Regs:$s)>;
3695  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3696                                      (ins i64imm:$t, i64imm:$s)>;
3697}
3698
3699defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3700  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3701                         Float32Regs, Float32Regs,
3702                         int_nvvm_tex_cube_array_level_v4f32_f32>;
3703defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3704  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3705                         Int32Regs, Float32Regs,
3706                         int_nvvm_tex_cube_array_level_v4s32_f32>;
3707defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3708  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3709                         Int32Regs, Float32Regs,
3710                         int_nvvm_tex_cube_array_level_v4u32_f32>;
3711
3712class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3713                   NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
3714    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3715                      outtype:$v2, outtype:$v3),
3716                 !con(texsamp, (ins intype:$x, intype:$y)),
3717                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3718                 pattern>;
3719
3720multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
3721                   Intrinsic intr> {
3722  def _RR : TLD4_2D_base<
3723      inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
3724      [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3,
3725            (intr i64:$t, i64:$s, intype:$x, intype:$y))]>;
3726  def _RI : TLD4_2D_base<inst, outtype, intype,
3727                         (ins Int64Regs:$t, i64imm:$s)>;
3728  def _IR : TLD4_2D_base<inst, outtype, intype,
3729                         (ins i64imm:$t, Int64Regs:$s)>;
3730  def _II : TLD4_2D_base<inst, outtype, intype,
3731                         (ins i64imm:$t, i64imm:$s)>;
3732}
3733
3734defm TLD4_R_2D_F32_F32
3735  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs,
3736            int_nvvm_tld4_r_2d_v4f32_f32>;
3737defm TLD4_G_2D_F32_F32
3738  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs,
3739            int_nvvm_tld4_g_2d_v4f32_f32>;
3740defm TLD4_B_2D_F32_F32
3741  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs,
3742            int_nvvm_tld4_b_2d_v4f32_f32>;
3743defm TLD4_A_2D_F32_F32
3744  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs,
3745            int_nvvm_tld4_a_2d_v4f32_f32>;
3746
3747defm TLD4_R_2D_S32_F32
3748  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs,
3749            int_nvvm_tld4_r_2d_v4s32_f32>;
3750defm TLD4_G_2D_S32_F32
3751  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs,
3752            int_nvvm_tld4_g_2d_v4s32_f32>;
3753defm TLD4_B_2D_S32_F32
3754  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs,
3755            int_nvvm_tld4_b_2d_v4s32_f32>;
3756defm TLD4_A_2D_S32_F32
3757  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs,
3758            int_nvvm_tld4_a_2d_v4s32_f32>;
3759
3760defm TLD4_R_2D_U32_F32
3761  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs,
3762            int_nvvm_tld4_r_2d_v4u32_f32>;
3763defm TLD4_G_2D_U32_F32
3764  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs,
3765            int_nvvm_tld4_g_2d_v4u32_f32>;
3766defm TLD4_B_2D_U32_F32
3767  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs,
3768            int_nvvm_tld4_b_2d_v4u32_f32>;
3769defm TLD4_A_2D_U32_F32
3770  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs,
3771            int_nvvm_tld4_a_2d_v4u32_f32>;
3772
3773}
3774
3775
3776// texmode_unified
3777let IsTex = true, IsTexModeUnified = true in {
3778// Texture fetch instructions using handles
3779
3780class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3781                          NVPTXRegClass intype, dag tex, list<dag> pattern = []>
3782    : NVPTXInst<(outs outtype:$r, outtype:$g,
3783                      outtype:$b, outtype:$a),
3784                 !con(tex, (ins intype:$x)),
3785                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3786                 pattern>;
3787
3788multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3789                          NVPTXRegClass intype, Intrinsic intr> {
3790  def _R : TEX_UNIFIED_1D_base<
3791      inst, outtype, intype, (ins Int64Regs:$t),
3792      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3793            (intr i64:$t, intype:$x))]>;
3794  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3795}
3796
3797defm TEX_UNIFIED_1D_F32_S32
3798  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs,
3799                   int_nvvm_tex_unified_1d_v4f32_s32>;
3800defm TEX_UNIFIED_1D_F32_F32
3801  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs,
3802                   int_nvvm_tex_unified_1d_v4f32_f32>;
3803defm TEX_UNIFIED_1D_S32_S32
3804  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs,
3805                   int_nvvm_tex_unified_1d_v4s32_s32>;
3806defm TEX_UNIFIED_1D_S32_F32
3807  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs,
3808                   int_nvvm_tex_unified_1d_v4s32_f32>;
3809defm TEX_UNIFIED_1D_U32_S32
3810  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs,
3811                   int_nvvm_tex_unified_1d_v4u32_s32>;
3812defm TEX_UNIFIED_1D_U32_F32
3813  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs,
3814                   int_nvvm_tex_unified_1d_v4u32_f32>;
3815
3816class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3817                                NVPTXRegClass intype, dag tex,
3818                                list<dag> pattern = []>
3819    : NVPTXInst<(outs outtype:$r, outtype:$g,
3820                      outtype:$b, outtype:$a),
3821                 !con(tex, (ins intype:$x, intype:$lod)),
3822                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3823                 pattern>;
3824
3825multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3826                                NVPTXRegClass intype, Intrinsic intr> {
3827  def _R : TEX_UNIFIED_1D_LEVEL_base<
3828      inst, outtype, intype, (ins Int64Regs:$t),
3829      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3830            (intr i64:$t, intype:$x, intype:$lod))]>;
3831  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3832}
3833
3834defm TEX_UNIFIED_1D_F32_F32_LEVEL
3835  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs,
3836                         int_nvvm_tex_unified_1d_level_v4f32_f32>;
3837defm TEX_UNIFIED_1D_S32_F32_LEVEL
3838  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs,
3839                         int_nvvm_tex_unified_1d_level_v4s32_f32>;
3840defm TEX_UNIFIED_1D_U32_F32_LEVEL
3841  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs,
3842                         int_nvvm_tex_unified_1d_level_v4u32_f32>;
3843
3844class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3845                               NVPTXRegClass intype, dag tex,
3846                               list<dag> pattern = []>
3847    : NVPTXInst<(outs outtype:$r, outtype:$g,
3848                      outtype:$b, outtype:$a),
3849                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3850                 inst # " \t\\{$r, $g, $b, $a\\},"
3851                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3852                 pattern>;
3853
3854multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3855                               NVPTXRegClass intype, Intrinsic intr> {
3856  def _R : TEX_UNIFIED_1D_GRAD_base<
3857      inst, outtype, intype, (ins Int64Regs:$t),
3858      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3859            (intr i64:$t, intype:$x, intype:$gradx, intype:$grady))]>;
3860  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3861}
3862
3863defm TEX_UNIFIED_1D_F32_F32_GRAD
3864  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs,
3865                        int_nvvm_tex_unified_1d_grad_v4f32_f32>;
3866defm TEX_UNIFIED_1D_S32_F32_GRAD
3867  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs,
3868                        int_nvvm_tex_unified_1d_grad_v4s32_f32>;
3869defm TEX_UNIFIED_1D_U32_F32_GRAD
3870  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs,
3871                        int_nvvm_tex_unified_1d_grad_v4u32_f32>;
3872
3873class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3874                                NVPTXRegClass intype, dag tex,
3875                                list<dag> pattern = []>
3876    : NVPTXInst<(outs outtype:$r, outtype:$g,
3877                      outtype:$b, outtype:$a),
3878                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3879                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3880                 pattern>;
3881
3882multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3883                                NVPTXRegClass intype, Intrinsic intr> {
3884  def _R : TEX_UNIFIED_1D_ARRAY_base<
3885      inst, outtype, intype, (ins Int64Regs:$t),
3886      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3887            (intr i64:$t, i32:$l, intype:$x))]>;
3888  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3889}
3890
3891defm TEX_UNIFIED_1D_ARRAY_F32_S32
3892  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs,
3893                         int_nvvm_tex_unified_1d_array_v4f32_s32>;
3894defm TEX_UNIFIED_1D_ARRAY_F32_F32
3895  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs,
3896                         int_nvvm_tex_unified_1d_array_v4f32_f32>;
3897defm TEX_UNIFIED_1D_ARRAY_S32_S32
3898  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs,
3899                         int_nvvm_tex_unified_1d_array_v4s32_s32>;
3900defm TEX_UNIFIED_1D_ARRAY_S32_F32
3901  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs,
3902                         int_nvvm_tex_unified_1d_array_v4s32_f32>;
3903defm TEX_UNIFIED_1D_ARRAY_U32_S32
3904  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs,
3905                         int_nvvm_tex_unified_1d_array_v4u32_s32>;
3906defm TEX_UNIFIED_1D_ARRAY_U32_F32
3907  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs,
3908                         int_nvvm_tex_unified_1d_array_v4u32_f32>;
3909
3910class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3911                                      NVPTXRegClass intype, dag tex,
3912                                      list<dag> pattern = []>
3913    : NVPTXInst<(outs outtype:$r, outtype:$g,
3914                      outtype:$b, outtype:$a),
3915                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3916                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3917                 pattern>;
3918
3919multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3920                                      NVPTXRegClass intype, Intrinsic intr> {
3921  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<
3922      inst, outtype, intype, (ins Int64Regs:$t),
3923      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3924            (intr i64:$t, i32:$l, intype:$x, intype:$lod))]>;
3925  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3926                                           (ins i64imm:$t)>;
3927}
3928
3929defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3930  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3931                               Float32Regs, Float32Regs,
3932                               int_nvvm_tex_unified_1d_array_level_v4f32_f32>;
3933defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3934  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3935                               Int32Regs, Float32Regs,
3936                               int_nvvm_tex_unified_1d_array_level_v4s32_f32>;
3937defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3938  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3939                               Int32Regs, Float32Regs,
3940                               int_nvvm_tex_unified_1d_array_level_v4u32_f32>;
3941
3942class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3943                                     NVPTXRegClass intype, dag tex,
3944                                     list<dag> pattern = []>
3945    : NVPTXInst<(outs outtype:$r, outtype:$g,
3946                      outtype:$b, outtype:$a),
3947                 !con(tex, (ins Int32Regs:$l, intype:$x,
3948                                intype:$gradx, intype:$grady)),
3949                 inst # " \t\\{$r, $g, $b, $a\\},"
3950                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3951                 pattern>;
3952
3953multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3954                                     NVPTXRegClass intype, Intrinsic intr> {
3955  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<
3956      inst, outtype, intype, (ins Int64Regs:$t),
3957      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3958            (intr i64:$t, i32:$l, intype:$x, intype:$gradx, intype:$grady))]>;
3959  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3960                                          (ins i64imm:$t)>;
3961}
3962
3963defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3964  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3965                              Float32Regs, Float32Regs,
3966                              int_nvvm_tex_unified_1d_array_grad_v4f32_f32>;
3967defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3968  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3969                              Int32Regs, Float32Regs,
3970                              int_nvvm_tex_unified_1d_array_grad_v4s32_f32>;
3971defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3972  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3973                              Int32Regs, Float32Regs,
3974                              int_nvvm_tex_unified_1d_array_grad_v4u32_f32>;
3975
3976class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3977                          NVPTXRegClass intype, dag tex, list<dag> pattern = []>
3978    : NVPTXInst<(outs outtype:$r, outtype:$g,
3979                      outtype:$b, outtype:$a),
3980                 !con(tex, (ins intype:$x, intype:$y)),
3981                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3982                 pattern>;
3983
3984multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3985                          NVPTXRegClass intype, Intrinsic intr> {
3986  def _R : TEX_UNIFIED_2D_base<
3987      inst, outtype, intype, (ins Int64Regs:$t),
3988      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
3989            (intr i64:$t, intype:$x, intype:$y))]>;
3990  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3991}
3992
3993defm TEX_UNIFIED_2D_F32_S32
3994  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs,
3995                   int_nvvm_tex_unified_2d_v4f32_s32>;
3996defm TEX_UNIFIED_2D_F32_F32
3997  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs,
3998                   int_nvvm_tex_unified_2d_v4f32_f32>;
3999defm TEX_UNIFIED_2D_S32_S32
4000  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs,
4001                   int_nvvm_tex_unified_2d_v4s32_s32>;
4002defm TEX_UNIFIED_2D_S32_F32
4003  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs,
4004                   int_nvvm_tex_unified_2d_v4s32_f32>;
4005defm TEX_UNIFIED_2D_U32_S32
4006  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs,
4007                   int_nvvm_tex_unified_2d_v4u32_s32>;
4008defm TEX_UNIFIED_2D_U32_F32
4009  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs,
4010                   int_nvvm_tex_unified_2d_v4u32_f32>;
4011
4012class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
4013                                NVPTXRegClass intype, dag tex,
4014                                list<dag> pattern = []>
4015    : NVPTXInst<(outs outtype:$r, outtype:$g,
4016                      outtype:$b, outtype:$a),
4017                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
4018                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
4019                 pattern>;
4020
4021multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
4022                                NVPTXRegClass intype, Intrinsic intr> {
4023  def _R : TEX_UNIFIED_2D_LEVEL_base<
4024      inst, outtype, intype, (ins Int64Regs:$t),
4025      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4026            (intr i64:$t, intype:$x, intype:$y, intype:$lod))]>;
4027  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
4028}
4029
4030defm TEX_UNIFIED_2D_F32_F32_LEVEL
4031  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs,
4032                         int_nvvm_tex_unified_2d_level_v4f32_f32>;
4033defm TEX_UNIFIED_2D_S32_F32_LEVEL
4034  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs,
4035                         int_nvvm_tex_unified_2d_level_v4s32_f32>;
4036defm TEX_UNIFIED_2D_U32_F32_LEVEL
4037  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs,
4038                         int_nvvm_tex_unified_2d_level_v4u32_f32>;
4039
4040class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
4041                               NVPTXRegClass intype, dag tex,
4042                               list<dag> pattern = []>
4043    : NVPTXInst<(outs outtype:$r, outtype:$g,
4044                      outtype:$b, outtype:$a),
4045                 !con(tex, (ins intype:$x, intype:$y,
4046                                intype:$gradx0, intype:$gradx1,
4047                                intype:$grady0, intype:$grady1)),
4048                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
4049                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
4050                 pattern>;
4051multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
4052                               NVPTXRegClass intype, Intrinsic intr> {
4053  def _R : TEX_UNIFIED_2D_GRAD_base<
4054      inst, outtype, intype, (ins Int64Regs:$t),
4055      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4056            (intr i64:$t, intype:$x, intype:$y,
4057                  intype:$gradx0, intype:$gradx1,
4058                  intype:$grady0, intype:$grady1))]>;
4059  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
4060}
4061
4062defm TEX_UNIFIED_2D_F32_F32_GRAD
4063  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs,
4064                        int_nvvm_tex_unified_2d_grad_v4f32_f32>;
4065defm TEX_UNIFIED_2D_S32_F32_GRAD
4066  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs,
4067                        int_nvvm_tex_unified_2d_grad_v4s32_f32>;
4068defm TEX_UNIFIED_2D_U32_F32_GRAD
4069  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs,
4070                        int_nvvm_tex_unified_2d_grad_v4u32_f32>;
4071
4072class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
4073                                NVPTXRegClass intype, dag tex,
4074                                list<dag> pattern = []>
4075    : NVPTXInst<(outs outtype:$r, outtype:$g,
4076                      outtype:$b, outtype:$a),
4077                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
4078                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
4079                 pattern>;
4080multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
4081                                NVPTXRegClass intype, Intrinsic intr> {
4082  def _R : TEX_UNIFIED_2D_ARRAY_base<
4083      inst, outtype, intype, (ins Int64Regs:$t),
4084      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4085            (intr i64:$t, i32:$l, intype:$x, intype:$y))]>;
4086  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
4087}
4088
4089defm TEX_UNIFIED_2D_ARRAY_F32_S32
4090  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs,
4091                         int_nvvm_tex_unified_2d_array_v4f32_s32>;
4092defm TEX_UNIFIED_2D_ARRAY_F32_F32
4093  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs,
4094                         int_nvvm_tex_unified_2d_array_v4f32_f32>;
4095defm TEX_UNIFIED_2D_ARRAY_S32_S32
4096  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs,
4097                         int_nvvm_tex_unified_2d_array_v4s32_s32>;
4098defm TEX_UNIFIED_2D_ARRAY_S32_F32
4099  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs,
4100                         int_nvvm_tex_unified_2d_array_v4s32_f32>;
4101defm TEX_UNIFIED_2D_ARRAY_U32_S32
4102  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs,
4103                         int_nvvm_tex_unified_2d_array_v4u32_s32>;
4104defm TEX_UNIFIED_2D_ARRAY_U32_F32
4105  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs,
4106                         int_nvvm_tex_unified_2d_array_v4u32_f32>;
4107
4108class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
4109                                      NVPTXRegClass intype, dag tex,
4110                                      list<dag> pattern = []>
4111    : NVPTXInst<(outs outtype:$r, outtype:$g,
4112                      outtype:$b, outtype:$a),
4113                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
4114                                intype:$lod)),
4115                 inst # " \t\\{$r, $g, $b, $a\\},"
4116                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
4117                 pattern>;
4118multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
4119                                      NVPTXRegClass intype, Intrinsic intr> {
4120  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<
4121      inst, outtype, intype, (ins Int64Regs:$t),
4122      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4123            (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$lod))]>;
4124  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
4125                                           (ins i64imm:$t)>;
4126}
4127
4128defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
4129  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
4130                               Float32Regs, Float32Regs,
4131                               int_nvvm_tex_unified_2d_array_level_v4f32_f32>;
4132defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
4133  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
4134                               Int32Regs, Float32Regs,
4135                               int_nvvm_tex_unified_2d_array_level_v4s32_f32>;
4136defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
4137  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
4138                               Int32Regs, Float32Regs,
4139                               int_nvvm_tex_unified_2d_array_level_v4u32_f32>;
4140
4141class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
4142                                     NVPTXRegClass intype, dag tex,
4143                                     list<dag> pattern = []>
4144    : NVPTXInst<(outs outtype:$r, outtype:$g,
4145                      outtype:$b, outtype:$a),
4146                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
4147                                intype:$gradx0, intype:$gradx1,
4148                                intype:$grady0, intype:$grady1)),
4149                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
4150                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
4151                 pattern>;
4152multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
4153                                     NVPTXRegClass intype, Intrinsic intr> {
4154  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<
4155      inst, outtype, intype, (ins Int64Regs:$t),
4156      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4157            (intr i64:$t, i32:$l, intype:$x, intype:$y,
4158                  intype:$gradx0, intype:$gradx1,
4159                  intype:$grady0, intype:$grady1))]>;
4160  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
4161                                          (ins i64imm:$t)>;
4162}
4163
4164defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
4165  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
4166                              Float32Regs, Float32Regs,
4167                              int_nvvm_tex_unified_2d_array_grad_v4f32_f32>;
4168defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
4169  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
4170                              Int32Regs, Float32Regs,
4171                              int_nvvm_tex_unified_2d_array_grad_v4s32_f32>;
4172defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
4173  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
4174                              Int32Regs, Float32Regs,
4175                              int_nvvm_tex_unified_2d_array_grad_v4u32_f32>;
4176
4177class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
4178                          NVPTXRegClass intype, dag tex, list<dag> pattern = []>
4179    : NVPTXInst<(outs outtype:$r, outtype:$g,
4180                      outtype:$b, outtype:$a),
4181                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
4182                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
4183                 pattern>;
4184multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
4185                          NVPTXRegClass intype, Intrinsic intr> {
4186  def _R : TEX_UNIFIED_3D_base<
4187      inst, outtype, intype, (ins Int64Regs:$t),
4188      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4189            (intr i64:$t, intype:$x, intype:$y, intype:$z))]>;
4190  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
4191}
4192
4193defm TEX_UNIFIED_3D_F32_S32
4194  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs,
4195                   int_nvvm_tex_unified_3d_v4f32_s32>;
4196defm TEX_UNIFIED_3D_F32_F32
4197  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs,
4198                   int_nvvm_tex_unified_3d_v4f32_f32>;
4199defm TEX_UNIFIED_3D_S32_S32
4200  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs,
4201                   int_nvvm_tex_unified_3d_v4s32_s32>;
4202defm TEX_UNIFIED_3D_S32_F32
4203  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs,
4204                   int_nvvm_tex_unified_3d_v4s32_f32>;
4205defm TEX_UNIFIED_3D_U32_S32
4206  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs,
4207                   int_nvvm_tex_unified_3d_v4u32_s32>;
4208defm TEX_UNIFIED_3D_U32_F32
4209  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs,
4210                   int_nvvm_tex_unified_3d_v4u32_f32>;
4211
4212class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
4213                                NVPTXRegClass intype, dag tex,
4214                                list<dag> pattern = []>
4215    : NVPTXInst<(outs outtype:$r, outtype:$g,
4216                      outtype:$b, outtype:$a),
4217                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
4218                 inst # " \t\\{$r, $g, $b, $a\\},"
4219                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
4220                 pattern>;
4221multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
4222                                NVPTXRegClass intype, Intrinsic intr> {
4223  def _R : TEX_UNIFIED_3D_LEVEL_base<
4224      inst, outtype, intype, (ins Int64Regs:$t),
4225      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4226            (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
4227  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
4228}
4229
4230defm TEX_UNIFIED_3D_F32_F32_LEVEL
4231  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs,
4232                         int_nvvm_tex_unified_3d_level_v4f32_f32>;
4233defm TEX_UNIFIED_3D_S32_F32_LEVEL
4234  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs,
4235                         int_nvvm_tex_unified_3d_level_v4s32_f32>;
4236defm TEX_UNIFIED_3D_U32_F32_LEVEL
4237  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs,
4238                         int_nvvm_tex_unified_3d_level_v4u32_f32>;
4239
4240class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
4241                               NVPTXRegClass intype, dag tex,
4242                               list<dag> pattern = []>
4243    : NVPTXInst<(outs outtype:$r, outtype:$g,
4244                      outtype:$b, outtype:$a),
4245                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
4246                                intype:$gradx0, intype:$gradx1,
4247                                intype:$gradx2, intype:$grady0,
4248                                intype:$grady1, intype:$grady2)),
4249                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
4250                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
4251                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
4252                 pattern>;
4253multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
4254                               NVPTXRegClass intype, Intrinsic intr> {
4255  def _R : TEX_UNIFIED_3D_GRAD_base<
4256      inst, outtype, intype, (ins Int64Regs:$t),
4257      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4258            (intr i64:$t, intype:$x, intype:$y, intype:$z,
4259                  intype:$gradx0, intype:$gradx1, intype:$gradx2,
4260                  intype:$grady0, intype:$grady1, intype:$grady2))]>;
4261  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
4262}
4263
4264defm TEX_UNIFIED_3D_F32_F32_GRAD
4265  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs,
4266                        int_nvvm_tex_unified_3d_grad_v4f32_f32>;
4267defm TEX_UNIFIED_3D_S32_F32_GRAD
4268  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs,
4269                        int_nvvm_tex_unified_3d_grad_v4s32_f32>;
4270defm TEX_UNIFIED_3D_U32_F32_GRAD
4271  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs,
4272                        int_nvvm_tex_unified_3d_grad_v4u32_f32>;
4273
4274class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
4275                            NVPTXRegClass intype, dag tex,
4276                            list<dag> pattern = []>
4277    : NVPTXInst<(outs outtype:$r, outtype:$g,
4278                      outtype:$b, outtype:$a),
4279                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
4280                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
4281                 pattern>;
4282multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
4283                            NVPTXRegClass intype, Intrinsic intr> {
4284  def _R : TEX_UNIFIED_CUBE_base<
4285      inst, outtype, intype, (ins Int64Regs:$t),
4286      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4287            (intr i64:$t, intype:$x, intype:$y, intype:$z))]>;
4288  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
4289}
4290
4291defm TEX_UNIFIED_CUBE_F32_F32
4292  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs,
4293                     int_nvvm_tex_unified_cube_v4f32_f32>;
4294defm TEX_UNIFIED_CUBE_S32_F32
4295  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs,
4296                     int_nvvm_tex_unified_cube_v4s32_f32>;
4297defm TEX_UNIFIED_CUBE_U32_F32
4298  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs,
4299                     int_nvvm_tex_unified_cube_v4u32_f32>;
4300
4301class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
4302                                  NVPTXRegClass intype, dag tex,
4303                                  list<dag> pattern = []>
4304    : NVPTXInst<(outs outtype:$r, outtype:$g,
4305                      outtype:$b, outtype:$a),
4306                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
4307                 inst # " \t\\{$r, $g, $b, $a\\},"
4308                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
4309                 pattern>;
4310multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
4311                                  NVPTXRegClass intype, Intrinsic intr> {
4312  def _R : TEX_UNIFIED_CUBE_LEVEL_base<
4313      inst, outtype, intype, (ins Int64Regs:$t),
4314      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4315            (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
4316  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
4317                                       (ins i64imm:$t)>;
4318}
4319
4320defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
4321  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
4322                           Float32Regs, Float32Regs,
4323                           int_nvvm_tex_unified_cube_level_v4f32_f32>;
4324defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
4325  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
4326                           Int32Regs, Float32Regs,
4327                           int_nvvm_tex_unified_cube_level_v4s32_f32>;
4328defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
4329  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
4330                           Int32Regs, Float32Regs,
4331                           int_nvvm_tex_unified_cube_level_v4u32_f32>;
4332
4333class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
4334                                  NVPTXRegClass intype, dag tex,
4335                                  list<dag> pattern = []>
4336    : NVPTXInst<(outs outtype:$r, outtype:$g,
4337                      outtype:$b, outtype:$a),
4338                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
4339                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
4340                 pattern>;
4341multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
4342                                  NVPTXRegClass intype, Intrinsic intr> {
4343  def _R : TEX_UNIFIED_CUBE_ARRAY_base<
4344      inst, outtype, intype, (ins Int64Regs:$t),
4345      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4346            (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z))]>;
4347  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
4348                                       (ins i64imm:$t)>;
4349}
4350
4351defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
4352  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs,
4353                           int_nvvm_tex_unified_cube_array_v4f32_f32>;
4354defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
4355  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs,
4356                           int_nvvm_tex_unified_cube_array_v4s32_f32>;
4357defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
4358  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs,
4359                           int_nvvm_tex_unified_cube_array_v4u32_f32>;
4360
4361class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
4362                                        NVPTXRegClass intype, dag tex,
4363                                        list<dag> pattern = []>
4364    : NVPTXInst<(outs outtype:$r, outtype:$g,
4365                      outtype:$b, outtype:$a),
4366                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
4367                                intype:$lod)),
4368                 inst # " \t\\{$r, $g, $b, $a\\},"
4369                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
4370                 pattern>;
4371multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
4372                                        NVPTXRegClass intype, Intrinsic intr> {
4373  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<
4374      inst, outtype, intype, (ins Int64Regs:$t),
4375      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4376            (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
4377  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
4378                                             (ins i64imm:$t)>;
4379}
4380
4381defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
4382  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
4383                                 Float32Regs, Float32Regs,
4384                                 int_nvvm_tex_unified_cube_array_level_v4f32_f32>;
4385defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
4386  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
4387                                 Int32Regs, Float32Regs,
4388                                 int_nvvm_tex_unified_cube_array_level_v4s32_f32>;
4389defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
4390  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
4391                                 Int32Regs, Float32Regs,
4392                                 int_nvvm_tex_unified_cube_array_level_v4u32_f32>;
4393
4394class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
4395                                 NVPTXRegClass intype, dag tex,
4396                                 list<dag> pattern = []>
4397    : NVPTXInst<(outs outtype:$r, outtype:$g,
4398                      outtype:$b, outtype:$a),
4399                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
4400                                intype:$gradx0, intype:$gradx1,
4401                                intype:$gradx2, intype:$grady0,
4402                                intype:$grady1, intype:$grady2)),
4403                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
4404                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
4405                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
4406                 pattern>;
4407
4408multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
4409                                 NVPTXRegClass intype, Intrinsic intr> {
4410  def _R : TEX_UNIFIED_CUBE_GRAD_base<
4411      inst, outtype, intype, (ins Int64Regs:$t),
4412      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4413            (intr i64:$t, intype:$x, intype:$y, intype:$z,
4414                  intype:$gradx0, intype:$gradx1, intype:$gradx2,
4415                  intype:$grady0, intype:$grady1, intype:$grady2))]>;
4416  def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
4417}
4418
4419defm TEX_UNIFIED_CUBE_F32_F32_GRAD
4420  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs,
4421                          int_nvvm_tex_unified_cube_grad_v4f32_f32>;
4422defm TEX_UNIFIED_CUBE_S32_F32_GRAD
4423  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs,
4424                          int_nvvm_tex_unified_cube_grad_v4s32_f32>;
4425defm TEX_UNIFIED_CUBE_U32_F32_GRAD
4426  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs,
4427                          int_nvvm_tex_unified_cube_grad_v4u32_f32>;
4428
4429class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
4430                                       NVPTXRegClass intype, dag tex,
4431                                       list<dag> pattern = []>
4432    : NVPTXInst<(outs outtype:$r, outtype:$g,
4433                      outtype:$b, outtype:$a),
4434                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
4435                                intype:$gradx0, intype:$gradx1,
4436                                intype:$gradx2, intype:$grady0,
4437                                intype:$grady1, intype:$grady2)),
4438                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
4439                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
4440                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
4441                 pattern>;
4442multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
4443                                       NVPTXRegClass intype, Intrinsic intr> {
4444  def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<
4445      inst, outtype, intype, (ins Int64Regs:$t),
4446      [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4447            (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z,
4448                  intype:$gradx0, intype:$gradx1,
4449                  intype:$gradx2, intype:$grady0,
4450                  intype:$grady1, intype:$grady2))]>;
4451  def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
4452                                            (ins i64imm:$t)>;
4453}
4454
4455defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
4456  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
4457                                Float32Regs, Float32Regs,
4458                                int_nvvm_tex_unified_cube_array_grad_v4f32_f32>;
4459defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
4460  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
4461                                Int32Regs, Float32Regs,
4462                                int_nvvm_tex_unified_cube_array_grad_v4s32_f32>;
4463defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
4464  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
4465                                Int32Regs, Float32Regs,
4466                                int_nvvm_tex_unified_cube_array_grad_v4u32_f32>;
4467
4468class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
4469                           NVPTXRegClass intype, dag tex,
4470                           list<dag> pattern = []>
4471    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
4472                      outtype:$v2, outtype:$v3),
4473                 !con(tex, (ins intype:$x, intype:$y)),
4474                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
4475                 pattern>;
4476multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
4477                           NVPTXRegClass intype, Intrinsic intr> {
4478  def _R : TLD4_UNIFIED_2D_base<
4479      inst, outtype, intype, (ins Int64Regs:$t),
4480      [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3,
4481            (intr i64:$t, intype:$x, intype:$y))]>;
4482  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
4483}
4484
4485defm TLD4_UNIFIED_R_2D_F32_F32
4486  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs,
4487                    int_nvvm_tld4_unified_r_2d_v4f32_f32>;
4488defm TLD4_UNIFIED_G_2D_F32_F32
4489  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs,
4490                    int_nvvm_tld4_unified_g_2d_v4f32_f32>;
4491defm TLD4_UNIFIED_B_2D_F32_F32
4492  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs,
4493                    int_nvvm_tld4_unified_b_2d_v4f32_f32>;
4494defm TLD4_UNIFIED_A_2D_F32_F32
4495  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs,
4496                    int_nvvm_tld4_unified_a_2d_v4f32_f32>;
4497
4498defm TLD4_UNIFIED_R_2D_S32_F32
4499  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs,
4500                    int_nvvm_tld4_unified_r_2d_v4s32_f32>;
4501defm TLD4_UNIFIED_G_2D_S32_F32
4502  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs,
4503                    int_nvvm_tld4_unified_g_2d_v4s32_f32>;
4504defm TLD4_UNIFIED_B_2D_S32_F32
4505  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs,
4506                    int_nvvm_tld4_unified_b_2d_v4s32_f32>;
4507defm TLD4_UNIFIED_A_2D_S32_F32
4508  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs,
4509                    int_nvvm_tld4_unified_a_2d_v4s32_f32>;
4510
4511defm TLD4_UNIFIED_R_2D_U32_F32
4512  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs,
4513                    int_nvvm_tld4_unified_r_2d_v4u32_f32>;
4514defm TLD4_UNIFIED_G_2D_U32_F32
4515  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs,
4516                    int_nvvm_tld4_unified_g_2d_v4u32_f32>;
4517defm TLD4_UNIFIED_B_2D_U32_F32
4518  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs,
4519                    int_nvvm_tld4_unified_b_2d_v4u32_f32>;
4520defm TLD4_UNIFIED_A_2D_U32_F32
4521  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs,
4522                    int_nvvm_tld4_unified_a_2d_v4u32_f32>;
4523
4524}
4525
4526
4527
4528//=== Surface load instructions
4529
4530let IsSuld = true in {
4531
4532class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf,
4533                   list<dag> pattern = []>
4534    : NVPTXInst<(outs outtype:$r),
4535                !con(surf, (ins Int32Regs:$x)),
4536                inst # " \\{$r\\}, [$s, \\{$x\\}];",
4537                pattern>;
4538multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
4539  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4540
4541  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s),
4542                        [(set outtype:$r, (intr i64:$s, i32:$x))]>;
4543  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
4544}
4545
4546defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
4547defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
4548defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
4549defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
4550
4551defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
4552defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
4553defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
4554defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
4555
4556defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
4557defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
4558defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
4559defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
4560
4561class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf,
4562                         list<dag> pattern = []>
4563    : NVPTXInst<(outs outtype:$r),
4564                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4565                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
4566                pattern>;
4567multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
4568  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4569
4570  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s),
4571                              [(set outtype:$r,
4572                                    (intr i64:$s, i32:$l, i32:$x))]>;
4573  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
4574}
4575
4576defm SULD_1D_ARRAY_I8_CLAMP
4577  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
4578defm SULD_1D_ARRAY_I16_CLAMP
4579  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
4580defm SULD_1D_ARRAY_I32_CLAMP
4581  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
4582defm SULD_1D_ARRAY_I64_CLAMP
4583  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
4584
4585defm SULD_1D_ARRAY_I8_TRAP
4586  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
4587defm SULD_1D_ARRAY_I16_TRAP
4588  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
4589defm SULD_1D_ARRAY_I32_TRAP
4590  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
4591defm SULD_1D_ARRAY_I64_TRAP
4592  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
4593
4594defm SULD_1D_ARRAY_I8_ZERO
4595  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
4596defm SULD_1D_ARRAY_I16_ZERO
4597  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
4598defm SULD_1D_ARRAY_I32_ZERO
4599  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
4600defm SULD_1D_ARRAY_I64_ZERO
4601  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
4602
4603class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf,
4604                   list<dag> pattern = []>
4605    : NVPTXInst<(outs outtype:$r),
4606                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4607                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
4608                pattern>;
4609multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
4610  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4611
4612  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s),
4613                        [(set outtype:$r, (intr i64:$s, i32:$x, i32:$y))]>;
4614  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
4615}
4616
4617defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
4618defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
4619defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
4620defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
4621
4622defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
4623defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
4624defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
4625defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
4626
4627defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
4628defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
4629defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
4630defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
4631
4632class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf,
4633                         list<dag> pattern = []>
4634    : NVPTXInst<(outs outtype:$r),
4635                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4636                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4637                pattern>;
4638multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
4639  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4640
4641  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s),
4642                              [(set outtype:$r,
4643                                    (intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
4644  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
4645}
4646
4647defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
4648defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
4649defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
4650defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
4651
4652defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
4653defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
4654defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
4655defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
4656
4657defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
4658defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
4659defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
4660defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
4661
4662class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf,
4663                   list<dag> pattern = []>
4664    : NVPTXInst<(outs outtype:$r),
4665                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4666                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4667                pattern>;
4668multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
4669  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4670
4671  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s),
4672                        [(set outtype:$r,
4673                              (intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
4674  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
4675}
4676
4677defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
4678defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
4679defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
4680defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
4681
4682defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
4683defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
4684defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
4685defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
4686
4687defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
4688defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
4689defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
4690defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
4691}
4692
4693let IsSuld = 2 in {
4694
4695class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
4696                      list<dag> pattern = []>
4697    : NVPTXInst<(outs outtype:$r, outtype:$g),
4698                !con(surf, (ins Int32Regs:$x)),
4699                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
4700                pattern>;
4701multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
4702  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4703
4704  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s),
4705                           [(set outtype:$r, outtype:$g,
4706                                 (intr i64:$s, i32:$x))]>;
4707  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
4708}
4709
4710defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
4711defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
4712defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
4713defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
4714
4715defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
4716defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
4717defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
4718defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
4719
4720defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
4721defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
4722defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
4723defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
4724
4725class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf,
4726                            list<dag> pattern = []>
4727    : NVPTXInst<(outs outtype:$r, outtype:$g),
4728                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4729                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4730                pattern>;
4731multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4732  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4733
4734  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s),
4735                                 [(set outtype:$r, outtype:$g,
4736                                       (intr i64:$s, i32:$l, i32:$x))]>;
4737  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4738}
4739
4740defm SULD_1D_ARRAY_V2I8_CLAMP
4741  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
4742defm SULD_1D_ARRAY_V2I16_CLAMP
4743  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
4744defm SULD_1D_ARRAY_V2I32_CLAMP
4745  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
4746defm SULD_1D_ARRAY_V2I64_CLAMP
4747  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
4748
4749defm SULD_1D_ARRAY_V2I8_TRAP
4750  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
4751defm SULD_1D_ARRAY_V2I16_TRAP
4752  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
4753defm SULD_1D_ARRAY_V2I32_TRAP
4754  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4755defm SULD_1D_ARRAY_V2I64_TRAP
4756  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4757
4758defm SULD_1D_ARRAY_V2I8_ZERO
4759  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4760defm SULD_1D_ARRAY_V2I16_ZERO
4761  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4762defm SULD_1D_ARRAY_V2I32_ZERO
4763  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4764defm SULD_1D_ARRAY_V2I64_ZERO
4765  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4766
4767class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
4768                      list<dag> pattern = []>
4769    : NVPTXInst<(outs outtype:$r, outtype:$g),
4770                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4771                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4772                pattern>;
4773multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4774  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4775
4776  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s),
4777                           [(set outtype:$r, outtype:$g,
4778                                 (intr i64:$s, i32:$x, i32:$y))]>;
4779  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4780}
4781
4782defm SULD_2D_V2I8_CLAMP
4783  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4784defm SULD_2D_V2I16_CLAMP
4785  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4786defm SULD_2D_V2I32_CLAMP
4787  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4788defm SULD_2D_V2I64_CLAMP
4789  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4790
4791defm SULD_2D_V2I8_TRAP
4792  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4793defm SULD_2D_V2I16_TRAP
4794  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4795defm SULD_2D_V2I32_TRAP
4796  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4797defm SULD_2D_V2I64_TRAP
4798  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4799
4800defm SULD_2D_V2I8_ZERO
4801  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4802defm SULD_2D_V2I16_ZERO
4803  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4804defm SULD_2D_V2I32_ZERO
4805  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4806defm SULD_2D_V2I64_ZERO
4807  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4808
4809class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf,
4810                            list<dag> pattern = []>
4811    : NVPTXInst<(outs outtype:$r, outtype:$g),
4812                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4813                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4814                pattern>;
4815multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4816  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4817
4818  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s),
4819                                 [(set outtype:$r, outtype:$g,
4820                                       (intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
4821  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4822}
4823
4824defm SULD_2D_ARRAY_V2I8_CLAMP
4825  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4826defm SULD_2D_ARRAY_V2I16_CLAMP
4827  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4828defm SULD_2D_ARRAY_V2I32_CLAMP
4829  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4830defm SULD_2D_ARRAY_V2I64_CLAMP
4831  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4832
4833defm SULD_2D_ARRAY_V2I8_TRAP
4834  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4835defm SULD_2D_ARRAY_V2I16_TRAP
4836  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4837defm SULD_2D_ARRAY_V2I32_TRAP
4838  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4839defm SULD_2D_ARRAY_V2I64_TRAP
4840  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4841
4842defm SULD_2D_ARRAY_V2I8_ZERO
4843  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4844defm SULD_2D_ARRAY_V2I16_ZERO
4845  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4846defm SULD_2D_ARRAY_V2I32_ZERO
4847  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4848defm SULD_2D_ARRAY_V2I64_ZERO
4849  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4850
4851class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
4852                      list<dag> pattern = []>
4853    : NVPTXInst<(outs outtype:$r, outtype:$g),
4854                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4855                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4856                pattern>;
4857multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4858  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4859
4860  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s),
4861                           [(set outtype:$r, outtype:$g,
4862                                 (intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
4863  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4864}
4865
4866defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4867defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4868defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4869defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4870
4871defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4872defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4873defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4874defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4875
4876defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4877defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4878defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4879defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4880
4881}
4882
4883let IsSuld = 3 in {
4884
4885class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
4886                      list<dag> pattern = []>
4887    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4888                !con(surf, (ins Int32Regs:$x)),
4889                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4890                pattern>;
4891multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4892  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4893
4894  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s),
4895                           [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4896                                 (intr i64:$s, i32:$x))]>;
4897  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4898}
4899
4900defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4901defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4902defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4903
4904defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4905defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4906defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4907
4908defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4909defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4910defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4911
4912class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf,
4913                            list<dag> pattern = []>
4914    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4915                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4916                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4917                pattern>;
4918multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4919  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4920
4921  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s),
4922                                 [(set outtype:$r, outtype:$g, outtype:$b,
4923                                       outtype:$a,
4924                                       (intr i64:$s, i32:$l, i32:$x))]>;
4925  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4926}
4927
4928defm SULD_1D_ARRAY_V4I8_CLAMP
4929  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4930defm SULD_1D_ARRAY_V4I16_CLAMP
4931  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4932defm SULD_1D_ARRAY_V4I32_CLAMP
4933  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4934
4935defm SULD_1D_ARRAY_V4I8_TRAP
4936  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4937defm SULD_1D_ARRAY_V4I16_TRAP
4938  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4939defm SULD_1D_ARRAY_V4I32_TRAP
4940  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4941
4942defm SULD_1D_ARRAY_V4I8_ZERO
4943  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4944defm SULD_1D_ARRAY_V4I16_ZERO
4945  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4946defm SULD_1D_ARRAY_V4I32_ZERO
4947  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4948
4949class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
4950                      list<dag> pattern = []>
4951    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4952                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4953                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4954                pattern>;
4955multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4956  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4957
4958  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s),
4959                           [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
4960                                 (intr i64:$s, i32:$x, i32:$y))]>;
4961  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4962}
4963
4964defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4965defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4966defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4967
4968defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4969defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4970defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4971
4972defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4973defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4974defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4975
4976class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf,
4977                            list<dag> pattern = []>
4978    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4979                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4980                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4981                pattern>;
4982multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4983  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
4984
4985  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s),
4986                                 [(set outtype:$r, outtype:$g, outtype:$b,
4987                                       outtype:$a,
4988                                       (intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
4989  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4990}
4991
4992defm SULD_2D_ARRAY_V4I8_CLAMP
4993  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4994defm SULD_2D_ARRAY_V4I16_CLAMP
4995  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4996defm SULD_2D_ARRAY_V4I32_CLAMP
4997  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4998
4999defm SULD_2D_ARRAY_V4I8_TRAP
5000  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
5001defm SULD_2D_ARRAY_V4I16_TRAP
5002  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
5003defm SULD_2D_ARRAY_V4I32_TRAP
5004  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
5005
5006defm SULD_2D_ARRAY_V4I8_ZERO
5007  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
5008defm SULD_2D_ARRAY_V4I16_ZERO
5009  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
5010defm SULD_2D_ARRAY_V4I32_ZERO
5011  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
5012
5013class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
5014                      list<dag> pattern = []>
5015    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
5016                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
5017                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
5018                pattern>;
5019multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
5020  defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
5021
5022  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s),
5023                           [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
5024                                 (intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
5025  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
5026}
5027
5028defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
5029defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
5030defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
5031
5032defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
5033defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
5034defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
5035
5036defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
5037defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
5038defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
5039
5040}
5041
5042//-----------------------------------
5043// Texture Query Intrinsics
5044//-----------------------------------
5045
5046let IsSurfTexQuery = true in {
5047def TXQ_CHANNEL_ORDER_R
5048  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5049              "txq.channel_order.b32 \t$d, [$a];",
5050              []>;
5051def TXQ_CHANNEL_ORDER_I
5052  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5053              "txq.channel_order.b32 \t$d, [$a];",
5054              []>;
5055def TXQ_CHANNEL_DATA_TYPE_R
5056  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5057              "txq.channel_data_type.b32 \t$d, [$a];",
5058              []>;
5059def TXQ_CHANNEL_DATA_TYPE_I
5060  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5061              "txq.channel_data_type.b32 \t$d, [$a];",
5062              []>;
5063def TXQ_WIDTH_R
5064  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5065              "txq.width.b32 \t$d, [$a];",
5066              []>;
5067def TXQ_WIDTH_I
5068  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5069              "txq.width.b32 \t$d, [$a];",
5070              []>;
5071def TXQ_HEIGHT_R
5072  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5073              "txq.height.b32 \t$d, [$a];",
5074              []>;
5075def TXQ_HEIGHT_I
5076  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5077              "txq.height.b32 \t$d, [$a];",
5078              []>;
5079def TXQ_DEPTH_R
5080  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5081              "txq.depth.b32 \t$d, [$a];",
5082              []>;
5083def TXQ_DEPTH_I
5084  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5085              "txq.depth.b32 \t$d, [$a];",
5086              []>;
5087def TXQ_ARRAY_SIZE_R
5088  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5089              "txq.array_size.b32 \t$d, [$a];",
5090              []>;
5091def TXQ_ARRAY_SIZE_I
5092  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5093              "txq.array_size.b32 \t$d, [$a];",
5094              []>;
5095def TXQ_NUM_SAMPLES_R
5096  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5097              "txq.num_samples.b32 \t$d, [$a];",
5098              []>;
5099def TXQ_NUM_SAMPLES_I
5100  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5101              "txq.num_samples.b32 \t$d, [$a];",
5102              []>;
5103def TXQ_NUM_MIPMAP_LEVELS_R
5104  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5105              "txq.num_mipmap_levels.b32 \t$d, [$a];",
5106              []>;
5107def TXQ_NUM_MIPMAP_LEVELS_I
5108  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5109              "txq.num_mipmap_levels.b32 \t$d, [$a];",
5110              []>;
5111}
5112
5113def : Pat<(int_nvvm_txq_channel_order i64:$a),
5114          (TXQ_CHANNEL_ORDER_R $a)>;
5115def : Pat<(int_nvvm_txq_channel_data_type i64:$a),
5116          (TXQ_CHANNEL_DATA_TYPE_R $a)>;
5117def : Pat<(int_nvvm_txq_width i64:$a),
5118          (TXQ_WIDTH_R $a)>;
5119def : Pat<(int_nvvm_txq_height i64:$a),
5120          (TXQ_HEIGHT_R $a)>;
5121def : Pat<(int_nvvm_txq_depth i64:$a),
5122          (TXQ_DEPTH_R $a)>;
5123def : Pat<(int_nvvm_txq_array_size i64:$a),
5124          (TXQ_ARRAY_SIZE_R $a)>;
5125def : Pat<(int_nvvm_txq_num_samples i64:$a),
5126          (TXQ_NUM_SAMPLES_R $a)>;
5127def : Pat<(int_nvvm_txq_num_mipmap_levels i64:$a),
5128          (TXQ_NUM_MIPMAP_LEVELS_R $a)>;
5129
5130
5131//-----------------------------------
5132// Surface Query Intrinsics
5133//-----------------------------------
5134
5135let IsSurfTexQuery = true in {
5136def SUQ_CHANNEL_ORDER_R
5137  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5138              "suq.channel_order.b32 \t$d, [$a];",
5139              []>;
5140def SUQ_CHANNEL_ORDER_I
5141  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5142              "suq.channel_order.b32 \t$d, [$a];",
5143              []>;
5144def SUQ_CHANNEL_DATA_TYPE_R
5145  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5146              "suq.channel_data_type.b32 \t$d, [$a];",
5147              []>;
5148def SUQ_CHANNEL_DATA_TYPE_I
5149  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5150              "suq.channel_data_type.b32 \t$d, [$a];",
5151              []>;
5152def SUQ_WIDTH_R
5153  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5154              "suq.width.b32 \t$d, [$a];",
5155              []>;
5156def SUQ_WIDTH_I
5157  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5158              "suq.width.b32 \t$d, [$a];",
5159              []>;
5160def SUQ_HEIGHT_R
5161  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5162              "suq.height.b32 \t$d, [$a];",
5163              []>;
5164def SUQ_HEIGHT_I
5165  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5166              "suq.height.b32 \t$d, [$a];",
5167              []>;
5168def SUQ_DEPTH_R
5169  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5170              "suq.depth.b32 \t$d, [$a];",
5171              []>;
5172def SUQ_DEPTH_I
5173  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5174              "suq.depth.b32 \t$d, [$a];",
5175              []>;
5176def SUQ_ARRAY_SIZE_R
5177  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
5178              "suq.array_size.b32 \t$d, [$a];",
5179              []>;
5180def SUQ_ARRAY_SIZE_I
5181  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
5182              "suq.array_size.b32 \t$d, [$a];",
5183              []>;
5184}
5185
5186def : Pat<(int_nvvm_suq_channel_order i64:$a),
5187          (SUQ_CHANNEL_ORDER_R $a)>;
5188def : Pat<(int_nvvm_suq_channel_data_type i64:$a),
5189          (SUQ_CHANNEL_DATA_TYPE_R $a)>;
5190def : Pat<(int_nvvm_suq_width i64:$a),
5191          (SUQ_WIDTH_R $a)>;
5192def : Pat<(int_nvvm_suq_height i64:$a),
5193          (SUQ_HEIGHT_R $a)>;
5194def : Pat<(int_nvvm_suq_depth i64:$a),
5195          (SUQ_DEPTH_R $a)>;
5196def : Pat<(int_nvvm_suq_array_size i64:$a),
5197          (SUQ_ARRAY_SIZE_R $a)>;
5198
5199
5200//===- Handle Query -------------------------------------------------------===//
5201
5202// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
5203def ISTYPEP_SAMPLER
5204  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
5205              "istypep.samplerref \t$d, $a;",
5206              [(set i1:$d, (int_nvvm_istypep_sampler i64:$a))]>;
5207def ISTYPEP_SURFACE
5208  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
5209              "istypep.surfref \t$d, $a;",
5210              [(set i1:$d, (int_nvvm_istypep_surface i64:$a))]>;
5211def ISTYPEP_TEXTURE
5212  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
5213              "istypep.texref \t$d, $a;",
5214              [(set i1:$d, (int_nvvm_istypep_texture i64:$a))]>;
5215
5216//===- Surface Stores -----------------------------------------------------===//
5217
5218let IsSust = true in {
5219
5220class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
5221    : NVPTXInst<(outs),
5222                !con(surf, (ins Int32Regs:$x, intype:$r)),
5223                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
5224                []>;
5225multiclass SUST_1D<string inst, NVPTXRegClass intype> {
5226  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
5227  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
5228}
5229
5230defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
5231defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
5232defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
5233defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
5234
5235defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
5236defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
5237defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
5238defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
5239
5240defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
5241defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
5242defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
5243defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
5244
5245defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
5246defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
5247defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
5248
5249class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
5250    : NVPTXInst<(outs),
5251                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
5252                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
5253                []>;
5254multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
5255  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
5256  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
5257}
5258
5259defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
5260defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
5261defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
5262defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
5263
5264defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
5265defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
5266defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
5267defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
5268
5269defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
5270defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
5271defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
5272defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
5273
5274defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
5275defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
5276defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
5277
5278class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5279    : NVPTXInst<(outs),
5280                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
5281                                intype:$b, intype:$a)),
5282                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5283                []>;
5284multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
5285  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5286  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
5287}
5288
5289defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
5290defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
5291defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
5292
5293defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
5294defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
5295defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
5296
5297defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
5298defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
5299defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
5300
5301defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
5302defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
5303defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
5304
5305class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
5306    : NVPTXInst<(outs),
5307                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
5308                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5309                []>;
5310multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
5311  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
5312  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
5313}
5314
5315defm SUST_B_1D_ARRAY_B8_CLAMP
5316  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
5317defm SUST_B_1D_ARRAY_B16_CLAMP
5318  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
5319defm SUST_B_1D_ARRAY_B32_CLAMP
5320  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
5321defm SUST_B_1D_ARRAY_B64_CLAMP
5322  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
5323
5324defm SUST_B_1D_ARRAY_B8_TRAP
5325  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
5326defm SUST_B_1D_ARRAY_B16_TRAP
5327  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
5328defm SUST_B_1D_ARRAY_B32_TRAP
5329  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
5330defm SUST_B_1D_ARRAY_B64_TRAP
5331  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
5332
5333defm SUST_B_1D_ARRAY_B8_ZERO
5334  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
5335defm SUST_B_1D_ARRAY_B16_ZERO
5336  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
5337defm SUST_B_1D_ARRAY_B32_ZERO
5338  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
5339defm SUST_B_1D_ARRAY_B64_ZERO
5340  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
5341
5342defm SUST_P_1D_ARRAY_B8_TRAP
5343  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
5344defm SUST_P_1D_ARRAY_B16_TRAP
5345  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
5346defm SUST_P_1D_ARRAY_B32_TRAP
5347  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
5348
5349class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
5350    : NVPTXInst<(outs),
5351                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
5352                                intype:$r, intype:$g)),
5353                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5354                []>;
5355multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
5356  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
5357  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
5358}
5359
5360defm SUST_B_1D_ARRAY_V2B8_CLAMP
5361  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
5362defm SUST_B_1D_ARRAY_V2B16_CLAMP
5363  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
5364defm SUST_B_1D_ARRAY_V2B32_CLAMP
5365  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
5366defm SUST_B_1D_ARRAY_V2B64_CLAMP
5367  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
5368
5369defm SUST_B_1D_ARRAY_V2B8_TRAP
5370  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
5371defm SUST_B_1D_ARRAY_V2B16_TRAP
5372  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
5373defm SUST_B_1D_ARRAY_V2B32_TRAP
5374  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
5375defm SUST_B_1D_ARRAY_V2B64_TRAP
5376  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
5377
5378defm SUST_B_1D_ARRAY_V2B8_ZERO
5379  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
5380defm SUST_B_1D_ARRAY_V2B16_ZERO
5381  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
5382defm SUST_B_1D_ARRAY_V2B32_ZERO
5383  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
5384defm SUST_B_1D_ARRAY_V2B64_ZERO
5385  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
5386
5387defm SUST_P_1D_ARRAY_V2B8_TRAP
5388  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
5389defm SUST_P_1D_ARRAY_V2B16_TRAP
5390  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
5391defm SUST_P_1D_ARRAY_V2B32_TRAP
5392  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
5393
5394class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
5395    : NVPTXInst<(outs),
5396                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
5397                                intype:$r, intype:$g, intype:$b, intype:$a)),
5398                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
5399                []>;
5400multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
5401  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
5402  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
5403}
5404
5405defm SUST_B_1D_ARRAY_V4B8_CLAMP
5406  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
5407defm SUST_B_1D_ARRAY_V4B16_CLAMP
5408  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
5409defm SUST_B_1D_ARRAY_V4B32_CLAMP
5410  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
5411
5412defm SUST_B_1D_ARRAY_V4B8_TRAP
5413  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
5414defm SUST_B_1D_ARRAY_V4B16_TRAP
5415  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
5416defm SUST_B_1D_ARRAY_V4B32_TRAP
5417  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
5418
5419defm SUST_B_1D_ARRAY_V4B8_ZERO
5420  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
5421defm SUST_B_1D_ARRAY_V4B16_ZERO
5422  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
5423defm SUST_B_1D_ARRAY_V4B32_ZERO
5424  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
5425
5426defm SUST_P_1D_ARRAY_V4B8_TRAP
5427  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
5428defm SUST_P_1D_ARRAY_V4B16_TRAP
5429  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
5430defm SUST_P_1D_ARRAY_V4B32_TRAP
5431  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
5432
5433class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
5434    : NVPTXInst<(outs),
5435                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
5436                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
5437                []>;
5438multiclass SUST_2D<string inst, NVPTXRegClass intype> {
5439  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
5440  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
5441}
5442
5443defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
5444defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
5445defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
5446defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
5447
5448defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
5449defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
5450defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
5451defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
5452
5453defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
5454defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
5455defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
5456defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
5457
5458defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
5459defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
5460defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
5461
5462class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
5463    : NVPTXInst<(outs),
5464                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
5465                                intype:$r, intype:$g)),
5466                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5467                []>;
5468multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
5469  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
5470  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
5471}
5472
5473defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
5474defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
5475defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
5476defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
5477
5478defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
5479defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
5480defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
5481defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
5482
5483defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
5484defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
5485defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
5486defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
5487
5488defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
5489defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
5490defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
5491
5492class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5493    : NVPTXInst<(outs),
5494                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
5495                                intype:$r, intype:$g, intype:$b, intype:$a)),
5496                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
5497                []>;
5498multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
5499  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5500  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
5501}
5502
5503defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
5504defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
5505defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
5506
5507defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
5508defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
5509defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
5510
5511defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
5512defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
5513defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
5514
5515defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
5516defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
5517defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
5518
5519class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
5520    : NVPTXInst<(outs),
5521                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5522                                intype:$r)),
5523                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5524                []>;
5525multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
5526  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
5527  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
5528}
5529
5530defm SUST_B_2D_ARRAY_B8_CLAMP
5531  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
5532defm SUST_B_2D_ARRAY_B16_CLAMP
5533  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
5534defm SUST_B_2D_ARRAY_B32_CLAMP
5535  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
5536defm SUST_B_2D_ARRAY_B64_CLAMP
5537  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
5538
5539defm SUST_B_2D_ARRAY_B8_TRAP
5540  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
5541defm SUST_B_2D_ARRAY_B16_TRAP
5542  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
5543defm SUST_B_2D_ARRAY_B32_TRAP
5544  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
5545defm SUST_B_2D_ARRAY_B64_TRAP
5546  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
5547
5548defm SUST_B_2D_ARRAY_B8_ZERO
5549  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
5550defm SUST_B_2D_ARRAY_B16_ZERO
5551  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
5552defm SUST_B_2D_ARRAY_B32_ZERO
5553  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
5554defm SUST_B_2D_ARRAY_B64_ZERO
5555  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
5556
5557defm SUST_P_2D_ARRAY_B8_TRAP
5558  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
5559defm SUST_P_2D_ARRAY_B16_TRAP
5560  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
5561defm SUST_P_2D_ARRAY_B32_TRAP
5562  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
5563
5564class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
5565    : NVPTXInst<(outs),
5566                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5567                                intype:$r, intype:$g)),
5568                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
5569                []>;
5570multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
5571  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
5572  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
5573}
5574
5575defm SUST_B_2D_ARRAY_V2B8_CLAMP
5576  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
5577defm SUST_B_2D_ARRAY_V2B16_CLAMP
5578  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
5579defm SUST_B_2D_ARRAY_V2B32_CLAMP
5580  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
5581defm SUST_B_2D_ARRAY_V2B64_CLAMP
5582  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
5583
5584defm SUST_B_2D_ARRAY_V2B8_TRAP
5585  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
5586defm SUST_B_2D_ARRAY_V2B16_TRAP
5587  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
5588defm SUST_B_2D_ARRAY_V2B32_TRAP
5589  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
5590defm SUST_B_2D_ARRAY_V2B64_TRAP
5591  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
5592
5593defm SUST_B_2D_ARRAY_V2B8_ZERO
5594  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
5595defm SUST_B_2D_ARRAY_V2B16_ZERO
5596  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
5597defm SUST_B_2D_ARRAY_V2B32_ZERO
5598  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
5599defm SUST_B_2D_ARRAY_V2B64_ZERO
5600  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
5601
5602defm SUST_P_2D_ARRAY_V2B8_TRAP
5603  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
5604defm SUST_P_2D_ARRAY_V2B16_TRAP
5605  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
5606defm SUST_P_2D_ARRAY_V2B32_TRAP
5607  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
5608
5609class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
5610    : NVPTXInst<(outs),
5611                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5612                                intype:$r, intype:$g, intype:$b, intype:$a)),
5613                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
5614                []>;
5615multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
5616  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
5617  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
5618}
5619
5620defm SUST_B_2D_ARRAY_V4B8_CLAMP
5621  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
5622defm SUST_B_2D_ARRAY_V4B16_CLAMP
5623  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
5624defm SUST_B_2D_ARRAY_V4B32_CLAMP
5625  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
5626
5627defm SUST_B_2D_ARRAY_V4B8_TRAP
5628  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
5629defm SUST_B_2D_ARRAY_V4B16_TRAP
5630  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
5631defm SUST_B_2D_ARRAY_V4B32_TRAP
5632  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
5633
5634defm SUST_B_2D_ARRAY_V4B8_ZERO
5635  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
5636defm SUST_B_2D_ARRAY_V4B16_ZERO
5637  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
5638defm SUST_B_2D_ARRAY_V4B32_ZERO
5639  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
5640
5641defm SUST_P_2D_ARRAY_V4B8_TRAP
5642  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
5643defm SUST_P_2D_ARRAY_V4B16_TRAP
5644  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
5645defm SUST_P_2D_ARRAY_V4B32_TRAP
5646  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
5647
5648class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
5649    : NVPTXInst<(outs),
5650                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5651                                intype:$r)),
5652                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5653                []>;
5654multiclass SUST_3D<string inst, NVPTXRegClass intype> {
5655  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
5656  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
5657}
5658
5659defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
5660defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
5661defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
5662defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
5663
5664defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
5665defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
5666defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
5667defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
5668
5669defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
5670defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
5671defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
5672defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
5673
5674defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
5675defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
5676defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
5677
5678class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
5679    : NVPTXInst<(outs),
5680                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5681                                intype:$r, intype:$g)),
5682                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
5683                []>;
5684multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
5685  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
5686  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
5687}
5688
5689defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
5690defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
5691defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
5692defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
5693
5694defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
5695defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
5696defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
5697defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
5698
5699defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
5700defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
5701defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
5702defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
5703
5704defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
5705defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
5706defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
5707
5708class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5709    : NVPTXInst<(outs),
5710                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5711                                intype:$r, intype:$g, intype:$b, intype:$a)),
5712                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
5713                []>;
5714multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
5715  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5716  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
5717}
5718
5719defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
5720defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
5721defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
5722
5723defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
5724defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
5725defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
5726
5727defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
5728defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
5729defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
5730
5731defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
5732defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
5733defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
5734
5735}
5736
5737// Surface store instruction patterns
5738// I'm not sure why we can't just include these in the instruction definitions,
5739// but TableGen complains of type errors :(
5740
5741// .clamp variant
5742def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5743           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5744          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5745
5746def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5747           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5748          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5749
5750def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5751           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5752          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5753
5754def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5755           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5756          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5757
5758def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5759           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5760          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5761           Int16Regs:$r, Int16Regs:$g)>;
5762
5763def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5764           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5765          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5766           Int16Regs:$r, Int16Regs:$g)>;
5767
5768def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5769           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5770          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5771           Int32Regs:$r, Int32Regs:$g)>;
5772
5773def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5774           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5775          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5776           Int64Regs:$r, Int64Regs:$g)>;
5777
5778def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5779           Int64Regs:$s, Int32Regs:$x,
5780           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5781          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5782           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5783
5784def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5785           Int64Regs:$s, Int32Regs:$x,
5786           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5787          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5788           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5789
5790def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5791           Int64Regs:$s, Int32Regs:$x,
5792           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5793          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5794           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5795
5796
5797
5798def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5799           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5800          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5801           Int16Regs:$r)>;
5802
5803def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5804           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5805          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5806           Int16Regs:$r)>;
5807
5808def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5809           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5810          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5811           Int32Regs:$r)>;
5812
5813def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5814           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5815          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5816           Int64Regs:$r)>;
5817
5818def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5819          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5820          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5821           Int16Regs:$r, Int16Regs:$g)>;
5822
5823def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5824          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5825          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5826           Int16Regs:$r, Int16Regs:$g)>;
5827
5828def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5829          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5830          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5831           Int32Regs:$r, Int32Regs:$g)>;
5832
5833def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5834          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5835          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5836           Int64Regs:$r, Int64Regs:$g)>;
5837
5838def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5839           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5840           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5841          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5842           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5843
5844def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5845           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5846           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5847          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5848           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5849
5850def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5851           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5852           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5853          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5854           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5855
5856
5857
5858def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5859           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5860          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5861           Int16Regs:$r)>;
5862
5863def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5864           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5865          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5866           Int16Regs:$r)>;
5867
5868def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5869           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5870          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5871           Int32Regs:$r)>;
5872
5873def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5874           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5875          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5876           Int64Regs:$r)>;
5877
5878def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5879          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5880          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5881           Int16Regs:$r, Int16Regs:$g)>;
5882
5883def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5884          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5885          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5886           Int16Regs:$r, Int16Regs:$g)>;
5887
5888def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5889          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5890          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5891           Int32Regs:$r, Int32Regs:$g)>;
5892
5893def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5894          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5895          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5896           Int64Regs:$r, Int64Regs:$g)>;
5897
5898def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5899           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5900           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5901          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5902           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5903
5904def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5905           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5906           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5907          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5908           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5909
5910def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5911           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5912           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5913          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5914           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5915
5916
5917
5918def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5919          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5920          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5921           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5922           Int16Regs:$r)>;
5923
5924def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5925          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5926          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5927           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5928           Int16Regs:$r)>;
5929
5930def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5931          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5932          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5933           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5934           Int32Regs:$r)>;
5935
5936def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5937          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5938          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5939           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5940           Int64Regs:$r)>;
5941
5942def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5943           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5944           Int16Regs:$r, Int16Regs:$g),
5945          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5946           Int32Regs:$x, Int32Regs:$y,
5947           Int16Regs:$r, Int16Regs:$g)>;
5948
5949def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5950           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5951           Int16Regs:$r, Int16Regs:$g),
5952          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5953           Int32Regs:$x, Int32Regs:$y,
5954           Int16Regs:$r, Int16Regs:$g)>;
5955
5956def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5957           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5958           Int32Regs:$g),
5959          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5960           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5961
5962def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5963           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5964           Int64Regs:$g),
5965          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5966           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5967
5968def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5969           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5970           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5971          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5972           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5973           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5974
5975def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5976           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5977           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5978          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5979           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5980           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5981
5982def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5983           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5984           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5985          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5986           Int32Regs:$x, Int32Regs:$y,
5987           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5988
5989
5990
5991def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5992           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5993           Int16Regs:$r),
5994          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5995           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5996           Int16Regs:$r)>;
5997
5998def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5999           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6000           Int16Regs:$r),
6001          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
6002           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6003           Int16Regs:$r)>;
6004
6005def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6006           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6007           Int32Regs:$r),
6008          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
6009           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6010           Int32Regs:$r)>;
6011
6012def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6013           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6014           Int64Regs:$r),
6015          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
6016           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6017           Int64Regs:$r)>;
6018
6019def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6020           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6021           Int16Regs:$r, Int16Regs:$g),
6022          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
6023           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6024           Int16Regs:$r, Int16Regs:$g)>;
6025
6026def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6027           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6028           Int16Regs:$r, Int16Regs:$g),
6029          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
6030           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6031           Int16Regs:$r, Int16Regs:$g)>;
6032
6033def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6034           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6035           Int32Regs:$r, Int32Regs:$g),
6036          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
6037           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6038           Int32Regs:$r, Int32Regs:$g)>;
6039
6040def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6041           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6042           Int64Regs:$r, Int64Regs:$g),
6043          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
6044           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6045           Int64Regs:$r, Int64Regs:$g)>;
6046
6047def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6048           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6049           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6050          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
6051           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6052           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6053
6054def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6055           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6056           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6057          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
6058           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6059           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6060
6061def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6062           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6063           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6064          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
6065           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6066           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6067
6068
6069// .trap variant
6070def : Pat<(int_nvvm_sust_b_1d_i8_trap
6071           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6072          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6073
6074def : Pat<(int_nvvm_sust_b_1d_i16_trap
6075           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6076          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6077
6078def : Pat<(int_nvvm_sust_b_1d_i32_trap
6079           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6080          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6081
6082def : Pat<(int_nvvm_sust_b_1d_i64_trap
6083           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6084          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6085
6086def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6087           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6088          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6089           Int16Regs:$r, Int16Regs:$g)>;
6090
6091def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6092           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6093          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6094           Int16Regs:$r, Int16Regs:$g)>;
6095
6096def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6097           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6098          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6099           Int32Regs:$r, Int32Regs:$g)>;
6100
6101def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6102           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6103          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
6104           Int64Regs:$r, Int64Regs:$g)>;
6105
6106def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6107           Int64Regs:$s, Int32Regs:$x,
6108           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6109          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6110           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6111
6112def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6113           Int64Regs:$s, Int32Regs:$x,
6114           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6115          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6116           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6117
6118def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6119           Int64Regs:$s, Int32Regs:$x,
6120           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6121          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6122           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6123
6124
6125
6126def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6127           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6128          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129           Int16Regs:$r)>;
6130
6131def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6132           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6133          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6134           Int16Regs:$r)>;
6135
6136def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6137           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6138          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139           Int32Regs:$r)>;
6140
6141def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6142           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6143          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6144           Int64Regs:$r)>;
6145
6146def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6147          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6148          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6149           Int16Regs:$r, Int16Regs:$g)>;
6150
6151def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6152          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6153          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6154           Int16Regs:$r, Int16Regs:$g)>;
6155
6156def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6157          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6158          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6159           Int32Regs:$r, Int32Regs:$g)>;
6160
6161def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6162          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6163          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6164           Int64Regs:$r, Int64Regs:$g)>;
6165
6166def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6167           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6168           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6169          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6170           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6171
6172def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6173           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6174           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6175          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6176           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6177
6178def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6179           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6180           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6181          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6182           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6183
6184
6185
6186def : Pat<(int_nvvm_sust_b_2d_i8_trap
6187           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6188          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189           Int16Regs:$r)>;
6190
6191def : Pat<(int_nvvm_sust_b_2d_i16_trap
6192           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6193          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6194           Int16Regs:$r)>;
6195
6196def : Pat<(int_nvvm_sust_b_2d_i32_trap
6197           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6198          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199           Int32Regs:$r)>;
6200
6201def : Pat<(int_nvvm_sust_b_2d_i64_trap
6202           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6203          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6204           Int64Regs:$r)>;
6205
6206def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6207          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6208          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6209           Int16Regs:$r, Int16Regs:$g)>;
6210
6211def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6212          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6213          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6214           Int16Regs:$r, Int16Regs:$g)>;
6215
6216def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6217          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6218          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6219           Int32Regs:$r, Int32Regs:$g)>;
6220
6221def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6222          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6223          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6224           Int64Regs:$r, Int64Regs:$g)>;
6225
6226def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6227           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6228           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6229          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6230           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6231
6232def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6233           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6234           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6235          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6236           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6237
6238def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6239           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6240           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6241          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6242           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6243
6244
6245
6246def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6247          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6248          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6249           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6250           Int16Regs:$r)>;
6251
6252def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6253          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6254          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6255           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6256           Int16Regs:$r)>;
6257
6258def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6259          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6260          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6261           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6262           Int32Regs:$r)>;
6263
6264def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6265          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6266          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
6267           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6268           Int64Regs:$r)>;
6269
6270def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6271           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6272           Int16Regs:$r, Int16Regs:$g),
6273          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6274           Int32Regs:$x, Int32Regs:$y,
6275           Int16Regs:$r, Int16Regs:$g)>;
6276
6277def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6278           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6279           Int16Regs:$r, Int16Regs:$g),
6280          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6281           Int32Regs:$x, Int32Regs:$y,
6282           Int16Regs:$r, Int16Regs:$g)>;
6283
6284def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6285           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6286           Int32Regs:$g),
6287          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6288           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6289
6290def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6291           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6292           Int64Regs:$g),
6293          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
6294           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6295
6296def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6297           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6298           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6299          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6300           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6301           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6302
6303def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6304           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6305           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6306          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6307           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6308           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6309
6310def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6311           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6312           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6313          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6314           Int32Regs:$x, Int32Regs:$y,
6315           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6316
6317
6318
6319def : Pat<(int_nvvm_sust_b_3d_i8_trap
6320           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321           Int16Regs:$r),
6322          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
6323           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6324           Int16Regs:$r)>;
6325
6326def : Pat<(int_nvvm_sust_b_3d_i16_trap
6327           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6328           Int16Regs:$r),
6329          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
6330           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6331           Int16Regs:$r)>;
6332
6333def : Pat<(int_nvvm_sust_b_3d_i32_trap
6334           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6335           Int32Regs:$r),
6336          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
6337           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6338           Int32Regs:$r)>;
6339
6340def : Pat<(int_nvvm_sust_b_3d_i64_trap
6341           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6342           Int64Regs:$r),
6343          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
6344           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6345           Int64Regs:$r)>;
6346
6347def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6348           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6349           Int16Regs:$r, Int16Regs:$g),
6350          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
6351           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6352           Int16Regs:$r, Int16Regs:$g)>;
6353
6354def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6355           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6356           Int16Regs:$r, Int16Regs:$g),
6357          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
6358           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6359           Int16Regs:$r, Int16Regs:$g)>;
6360
6361def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6362           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6363           Int32Regs:$r, Int32Regs:$g),
6364          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
6365           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6366           Int32Regs:$r, Int32Regs:$g)>;
6367
6368def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6369           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6370           Int64Regs:$r, Int64Regs:$g),
6371          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
6372           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6373           Int64Regs:$r, Int64Regs:$g)>;
6374
6375def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6376           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6377           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6378          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
6379           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6380           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6381
6382def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6383           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6384           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6385          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
6386           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6387           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6388
6389def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6390           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6391           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6392          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
6393           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6394           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6395
6396
6397// .zero variant
6398def : Pat<(int_nvvm_sust_b_1d_i8_zero
6399           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6400          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6401
6402def : Pat<(int_nvvm_sust_b_1d_i16_zero
6403           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6404          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6405
6406def : Pat<(int_nvvm_sust_b_1d_i32_zero
6407           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6408          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6409
6410def : Pat<(int_nvvm_sust_b_1d_i64_zero
6411           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6412          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6413
6414def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6415           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6416          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
6417           Int16Regs:$r, Int16Regs:$g)>;
6418
6419def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6420           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6421          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
6422           Int16Regs:$r, Int16Regs:$g)>;
6423
6424def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6425           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6426          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
6427           Int32Regs:$r, Int32Regs:$g)>;
6428
6429def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6430           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6431          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
6432           Int64Regs:$r, Int64Regs:$g)>;
6433
6434def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6435           Int64Regs:$s, Int32Regs:$x,
6436           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6437          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
6438           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6439
6440def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6441           Int64Regs:$s, Int32Regs:$x,
6442           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6443          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
6444           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6445
6446def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6447           Int64Regs:$s, Int32Regs:$x,
6448           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6449          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
6450           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6451
6452
6453
6454def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6455           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6456          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457           Int16Regs:$r)>;
6458
6459def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6460           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6461          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6462           Int16Regs:$r)>;
6463
6464def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6465           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6466          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6467           Int32Regs:$r)>;
6468
6469def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6470           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6471          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6472           Int64Regs:$r)>;
6473
6474def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6475          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6476          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6477           Int16Regs:$r, Int16Regs:$g)>;
6478
6479def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6480          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6481          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6482           Int16Regs:$r, Int16Regs:$g)>;
6483
6484def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6485          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6486          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6487           Int32Regs:$r, Int32Regs:$g)>;
6488
6489def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6490          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6491          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6492           Int64Regs:$r, Int64Regs:$g)>;
6493
6494def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6495           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6496           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6497          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6498           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6499
6500def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6501           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6502           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6503          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6504           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6505
6506def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6507           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6508           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6509          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6510           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6511
6512
6513
6514def : Pat<(int_nvvm_sust_b_2d_i8_zero
6515           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6516          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517           Int16Regs:$r)>;
6518
6519def : Pat<(int_nvvm_sust_b_2d_i16_zero
6520           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6521          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6522           Int16Regs:$r)>;
6523
6524def : Pat<(int_nvvm_sust_b_2d_i32_zero
6525           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6526          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6527           Int32Regs:$r)>;
6528
6529def : Pat<(int_nvvm_sust_b_2d_i64_zero
6530           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6531          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6532           Int64Regs:$r)>;
6533
6534def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6535          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6536          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6537           Int16Regs:$r, Int16Regs:$g)>;
6538
6539def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6540          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6541          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6542           Int16Regs:$r, Int16Regs:$g)>;
6543
6544def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6545          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6546          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6547           Int32Regs:$r, Int32Regs:$g)>;
6548
6549def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6550          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6551          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6552           Int64Regs:$r, Int64Regs:$g)>;
6553
6554def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6555           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6556           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6557          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6558           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6559
6560def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6561           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6562           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6563          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6564           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6565
6566def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6567           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6568           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6569          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6570           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6571
6572
6573
6574def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6575          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6576          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
6577           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6578           Int16Regs:$r)>;
6579
6580def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6581          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6582          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
6583           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6584           Int16Regs:$r)>;
6585
6586def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6587          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6588          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
6589           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6590           Int32Regs:$r)>;
6591
6592def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6593          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6594          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
6595           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6596           Int64Regs:$r)>;
6597
6598def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6599           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6600           Int16Regs:$r, Int16Regs:$g),
6601          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
6602           Int32Regs:$x, Int32Regs:$y,
6603           Int16Regs:$r, Int16Regs:$g)>;
6604
6605def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6606           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6607           Int16Regs:$r, Int16Regs:$g),
6608          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
6609           Int32Regs:$x, Int32Regs:$y,
6610           Int16Regs:$r, Int16Regs:$g)>;
6611
6612def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6613           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6614           Int32Regs:$g),
6615          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
6616           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6617
6618def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6619           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6620           Int64Regs:$g),
6621          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
6622           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6623
6624def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6625           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6626           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6627          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
6628           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6629           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6630
6631def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6632           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6633           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6634          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
6635           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6636           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6637
6638def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6639           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6640           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6641          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
6642           Int32Regs:$x, Int32Regs:$y,
6643           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6644
6645
6646
6647def : Pat<(int_nvvm_sust_b_3d_i8_zero
6648           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649           Int16Regs:$r),
6650          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
6651           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6652           Int16Regs:$r)>;
6653
6654def : Pat<(int_nvvm_sust_b_3d_i16_zero
6655           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6656           Int16Regs:$r),
6657          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
6658           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6659           Int16Regs:$r)>;
6660
6661def : Pat<(int_nvvm_sust_b_3d_i32_zero
6662           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6663           Int32Regs:$r),
6664          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
6665           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6666           Int32Regs:$r)>;
6667
6668def : Pat<(int_nvvm_sust_b_3d_i64_zero
6669           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6670           Int64Regs:$r),
6671          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
6672           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6673           Int64Regs:$r)>;
6674
6675def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6676           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6677           Int16Regs:$r, Int16Regs:$g),
6678          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
6679           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6680           Int16Regs:$r, Int16Regs:$g)>;
6681
6682def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6683           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6684           Int16Regs:$r, Int16Regs:$g),
6685          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
6686           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6687           Int16Regs:$r, Int16Regs:$g)>;
6688
6689def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6690           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6691           Int32Regs:$r, Int32Regs:$g),
6692          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
6693           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6694           Int32Regs:$r, Int32Regs:$g)>;
6695
6696def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6697           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6698           Int64Regs:$r, Int64Regs:$g),
6699          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
6700           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6701           Int64Regs:$r, Int64Regs:$g)>;
6702
6703def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6704           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6705           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6706          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
6707           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6708           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6709
6710def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6711           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6712           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6713          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
6714           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6715           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6716
6717def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6718           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6719           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6720          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
6721           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6722           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6723
6724
6725
6726
6727def : Pat<(int_nvvm_sust_p_1d_i8_trap
6728           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6729          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6730
6731def : Pat<(int_nvvm_sust_p_1d_i16_trap
6732           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6733          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6734
6735def : Pat<(int_nvvm_sust_p_1d_i32_trap
6736           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6737          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6738
6739def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6740           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6741          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6742           Int16Regs:$r, Int16Regs:$g)>;
6743
6744def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6745           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6746          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6747           Int16Regs:$r, Int16Regs:$g)>;
6748
6749def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6750           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6751          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6752           Int32Regs:$r, Int32Regs:$g)>;
6753
6754def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6755           Int64Regs:$s, Int32Regs:$x,
6756           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6757          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6758           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6759
6760def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6761           Int64Regs:$s, Int32Regs:$x,
6762           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6763          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6764           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6765
6766def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6767           Int64Regs:$s, Int32Regs:$x,
6768           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6769          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6770           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6771
6772
6773
6774def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6775           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6776          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6777           Int16Regs:$r)>;
6778
6779def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6780           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6781          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6782           Int16Regs:$r)>;
6783
6784def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6785           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6786          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6787           Int32Regs:$r)>;
6788
6789def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6790          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6791          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6792           Int16Regs:$r, Int16Regs:$g)>;
6793
6794def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6795          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6796          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6797           Int16Regs:$r, Int16Regs:$g)>;
6798
6799def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6800          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6801          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6802           Int32Regs:$r, Int32Regs:$g)>;
6803
6804def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6805           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6806           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6807          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6808           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6809
6810def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6811           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6812           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6813          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6814           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6815
6816def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6817           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6818           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6819          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6820           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6821
6822
6823
6824def : Pat<(int_nvvm_sust_p_2d_i8_trap
6825           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6826          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6827           Int16Regs:$r)>;
6828
6829def : Pat<(int_nvvm_sust_p_2d_i16_trap
6830           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6831          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6832           Int16Regs:$r)>;
6833
6834def : Pat<(int_nvvm_sust_p_2d_i32_trap
6835           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6836          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6837           Int32Regs:$r)>;
6838
6839def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6840          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6841          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6842           Int16Regs:$r, Int16Regs:$g)>;
6843
6844def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6845          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6846          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6847           Int16Regs:$r, Int16Regs:$g)>;
6848
6849def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6850          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6851          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6852           Int32Regs:$r, Int32Regs:$g)>;
6853
6854def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6855           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6856           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6857          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6858           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6859
6860def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6861           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6862           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6863          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6864           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6865
6866def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6867           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6868           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6869          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6870           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6871
6872
6873
6874def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6875          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6876          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6877           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6878           Int16Regs:$r)>;
6879
6880def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6881          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6882          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6883           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6884           Int16Regs:$r)>;
6885
6886def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6887          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6888          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6889           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6890           Int32Regs:$r)>;
6891
6892def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6893           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6894           Int16Regs:$r, Int16Regs:$g),
6895          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6896           Int32Regs:$x, Int32Regs:$y,
6897           Int16Regs:$r, Int16Regs:$g)>;
6898
6899def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6900           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6901           Int16Regs:$r, Int16Regs:$g),
6902          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6903           Int32Regs:$x, Int32Regs:$y,
6904           Int16Regs:$r, Int16Regs:$g)>;
6905
6906def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6907           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6908           Int32Regs:$g),
6909          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6910           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6911
6912def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6913           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6914           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6915          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6916           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6917           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6918
6919def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6920           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6921           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6922          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6923           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6924           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6925
6926def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6927           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6928           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6929          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6930           Int32Regs:$x, Int32Regs:$y,
6931           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6932
6933
6934
6935def : Pat<(int_nvvm_sust_p_3d_i8_trap
6936           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6937           Int16Regs:$r),
6938          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6939           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6940           Int16Regs:$r)>;
6941
6942def : Pat<(int_nvvm_sust_p_3d_i16_trap
6943           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6944           Int16Regs:$r),
6945          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6946           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6947           Int16Regs:$r)>;
6948
6949def : Pat<(int_nvvm_sust_p_3d_i32_trap
6950           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6951           Int32Regs:$r),
6952          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6953           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6954           Int32Regs:$r)>;
6955
6956def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6957           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6958           Int16Regs:$r, Int16Regs:$g),
6959          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6960           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6961           Int16Regs:$r, Int16Regs:$g)>;
6962
6963def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6964           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6965           Int16Regs:$r, Int16Regs:$g),
6966          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6967           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6968           Int16Regs:$r, Int16Regs:$g)>;
6969
6970def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6971           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6972           Int32Regs:$r, Int32Regs:$g),
6973          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6974           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6975           Int32Regs:$r, Int32Regs:$g)>;
6976
6977def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6978           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6979           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6980          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6981           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6982           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6983
6984def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6985           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6986           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6987          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6988           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6989           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6990
6991def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6992           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6993           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6994          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6995           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6996           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6997
6998//-----------------------------------
6999// Read Special Registers
7000//-----------------------------------
7001
7002class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
7003  : NVPTXInst<(outs Int64Regs:$d), (ins),
7004              !strconcat("mov.u64 \t$d, %", regname, ";"),
7005              [(set i64:$d, (intop))]>,
7006    Requires<Preds>;
7007
7008class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
7009  : NVPTXInst<(outs Int32Regs:$d), (ins),
7010              !strconcat("mov.u32 \t$d, %", regname, ";"),
7011              [(set i32:$d, (intop))]>,
7012    Requires<Preds>;
7013
7014multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
7015   foreach suffix = ["x", "y", "z", "w"] in {
7016      defvar reg = regname # "." # suffix;
7017      defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
7018      def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
7019   }
7020}
7021
7022// TODO Add read vector-version of special registers
7023
7024defm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
7025defm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
7026defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
7027defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
7028
7029defm INT_PTX_SREG_CLUSTERID :
7030       PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
7031defm INT_PTX_SREG_NCLUSTERID :
7032       PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
7033defm INT_PTX_SREG_CLUSTER_CTAID :
7034       PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
7035defm INT_PTX_SREG_CLUSTER_NCTAID:
7036       PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
7037
7038def  INT_PTX_SREG_CLUSTER_CTARANK :
7039       PTX_READ_SREG_R32<"cluster_ctarank",
7040                         int_nvvm_read_ptx_sreg_cluster_ctarank,
7041                         [hasSM<90>, hasPTX<78>]>;
7042def  INT_PTX_SREG_CLUSTER_NCTARANK:
7043       PTX_READ_SREG_R32<"cluster_nctarank",
7044                         int_nvvm_read_ptx_sreg_cluster_nctarank,
7045                         [hasSM<90>, hasPTX<78>]>;
7046
7047
7048def INT_PTX_SREG_LANEID :
7049    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7050def INT_PTX_SREG_WARPID :
7051    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7052def INT_PTX_SREG_NWARPID :
7053    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7054def INT_PTX_SREG_SMID :
7055    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7056def INT_PTX_SREG_NSMID :
7057    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7058def INT_PTX_SREG_GRIDID :
7059    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7060
7061def INT_PTX_SREG_LANEMASK_EQ :
7062    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7063def INT_PTX_SREG_LANEMASK_LE :
7064    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7065def INT_PTX_SREG_LANEMASK_LT :
7066    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7067def INT_PTX_SREG_LANEMASK_GE :
7068    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7069def INT_PTX_SREG_LANEMASK_GT :
7070    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7071
7072let hasSideEffects = 1 in {
7073def INT_PTX_SREG_CLOCK :
7074    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7075def INT_PTX_SREG_CLOCK64 :
7076    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7077def INT_PTX_SREG_GLOBALTIMER :
7078    PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
7079}
7080
7081def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
7082def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;
7083
7084def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7085def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7086def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7087def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7088
7089// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7090// handle the constant.
7091def INT_PTX_SREG_WARPSIZE :
7092    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7093              [(set i32:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7094
7095// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7096// In addition to target-independent fields provided by WMMA_REGS, it adds
7097// the fields commonly used to implement specific PTX instruction -- register
7098// types and names, constraints, parts of assembly, etc.
7099class WMMA_REGINFO<WMMA_REGS r, string op>
7100      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7101  // NVPTX register types used to carry fragment data.
7102  NVPTXRegClass regclass = !cond(
7103    !eq(ptx_elt_type, "f16") : Int32Regs,
7104    !eq(ptx_elt_type, "f32") : Float32Regs,
7105    !eq(ptx_elt_type, "f64") : Float64Regs,
7106    !eq(ptx_elt_type, "bf16") : Int32Regs,
7107    !eq(ptx_elt_type, "tf32") : Int32Regs,
7108    !eq(ptx_elt_type, "s32") : Int32Regs,
7109    !eq(ptx_elt_type, "b16") : Int32Regs,
7110    !eq(ptx_elt_type, "s8") : Int32Regs,
7111    !eq(ptx_elt_type, "u8") : Int32Regs,
7112    !eq(ptx_elt_type, "s4") : Int32Regs,
7113    !eq(ptx_elt_type, "u4") : Int32Regs,
7114    !eq(ptx_elt_type, "b1") : Int32Regs);
7115
7116  // Instruction input/output arguments for the fragment.
7117  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
7118
7119  // List of register names for the fragment -- ["ra0", "ra1",...]
7120  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7121
7122  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7123  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
7124
7125  // Predicates for particular fragment variant. Technically those are
7126  // per-instruction predicates, but currently all fragments that can be used in
7127  // a given instruction are subject to the same constraints, so an instruction
7128  // can use predicates from any of its fragments. If/when this is no
7129  // longer the case, we can concat all per-fragment predicates to enforce that
7130  // all fragments of the instruction are viable.
7131  list<Predicate> Predicates = !cond(
7132    // fp16 -> fp16/fp32 @ m16n16k16
7133    !and(!eq(geom, "m16n16k16"),
7134         !or(!eq(ptx_elt_type, "f16"),
7135             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
7136
7137    !and(!eq(geom,"m8n8k4"),
7138         !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
7139
7140    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7141    !and(!or(!eq(geom, "m8n32k16"),
7142             !eq(geom, "m32n8k16")),
7143         !or(!eq(ptx_elt_type, "f16"),
7144             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
7145
7146    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7147    !and(!or(!eq(geom,"m16n16k16"),
7148             !eq(geom,"m8n32k16"),
7149             !eq(geom,"m32n8k16")),
7150         !or(!eq(ptx_elt_type, "u8"),
7151             !eq(ptx_elt_type, "s8"),
7152             !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
7153
7154    !and(!or(!eq(geom,"m16n16k16"),
7155             !eq(geom,"m8n32k16"),
7156             !eq(geom,"m32n8k16")),
7157         !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
7158
7159    !and(!eq(geom,"m16n16k8"),
7160         !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
7161
7162    !and(!eq(geom,"m16n16k8"),
7163         !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
7164
7165    // b1 -> s32 @ m8n8k128(b1)
7166    !and(!ne(op,"mma"),
7167         !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
7168
7169    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
7170    !and(!ne(op,"mma"),
7171         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
7172
7173    !or(!eq(geom,"m16n8k8"),
7174        !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
7175
7176    !and(!ne(ptx_elt_type,"f64"),
7177         !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
7178
7179    // mma m8n8k32 requires higher PTX version
7180    !and(!eq(op,"mma"),
7181         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
7182
7183    !and(!eq(ptx_elt_type,"f64"),
7184         !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
7185
7186    !and(!eq(op,"mma"),
7187         !or(!eq(geom, "m16n8k16"),
7188             !eq(geom, "m16n8k4"),
7189             !eq(geom, "m16n8k32"),
7190             !eq(geom, "m16n8k64"),
7191             !eq(geom, "m8n8k128"),
7192             !eq(geom, "m16n8k128"),
7193             !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
7194
7195    !and(!eq(op,"ldmatrix"),
7196         !eq(ptx_elt_type,"b16"),
7197         !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
7198
7199  // template DAGs for instruction inputs/output.
7200  dag Outs = !dag(outs, ptx_regs, reg_names);
7201  dag Ins = !dag(ins, ptx_regs, reg_names);
7202}
7203
7204// Convert dag of arguments into a dag to match given intrinsic.
7205class BuildPatternI<Intrinsic Intr, dag Ins> {
7206  // Build a dag pattern that matches the intrinsic call.
7207  dag ret = !foreach(tmp, Ins,
7208                          !subst(imem, ADDRvar,
7209                          !subst(MEMri64, ADDRri64,
7210                          !subst(MEMri, ADDRri,
7211                          !subst(ins, Intr, tmp)))));
7212}
7213
7214// Same as above, but uses PatFrag instead of an Intrinsic.
7215class BuildPatternPF<PatFrag Intr, dag Ins> {
7216  // Build a dag pattern that matches the intrinsic call.
7217  dag ret = !foreach(tmp, Ins,
7218                          !subst(imem, ADDRvar,
7219                          !subst(MEMri64, ADDRri64,
7220                          !subst(MEMri, ADDRri,
7221                          !subst(ins, Intr, tmp)))));
7222}
7223
7224// Common WMMA-related fields used for building patterns for all MMA instructions.
7225class WMMA_INSTR<string _Intr, list<dag> _Args>
7226  : NVPTXInst<(outs), (ins), "?", []> {
7227  Intrinsic Intr = !cast<Intrinsic>(_Intr);
7228  // Concatenate all arguments into a single dag.
7229  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7230  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7231  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7232}
7233
7234//
7235// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7236//
7237
7238class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7239                DAGOperand SrcOp>
7240  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7241                              [!con((ins SrcOp:$src),
7242                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7243    Requires<Frag.Predicates> {
7244  // Load/store intrinsics are overloaded on pointer's address space.
7245  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7246  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7247  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7248  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
7249  // Build PatFrag that only matches particular address space.
7250  PatFrag IntrFrag = PatFrag<PFOperands,
7251                             PFOperandsIntr,
7252                             !cond(!eq(Space, ".shared"): AS_match.shared,
7253                                   !eq(Space, ".global"): AS_match.global,
7254                                   true: AS_match.generic)>;
7255  // Build AS-constrained pattern.
7256  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7257
7258  let OutOperandList = Frag.Outs;
7259  let InOperandList = !con(Args, (ins MmaCode:$ptx));
7260  let AsmString = "wmma.load."
7261                  # Frag.frag
7262                  # ".sync"
7263                  # "${ptx:aligned}"
7264                  # "." # Layout
7265                  # "." # Frag.geom
7266                  # Space
7267                  # "." # Frag.ptx_elt_type # " \t"
7268                  # Frag.regstring
7269                  # ", [$src]"
7270                  # !if(WithStride, ", $ldm", "")
7271                  # ";";
7272}
7273
7274//
7275// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7276//
7277class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7278                   bit WithStride, DAGOperand DstOp>
7279  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7280               [!con((ins DstOp:$dst),
7281                     Frag.Ins,
7282                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7283    Requires<Frag.Predicates> {
7284
7285  // Load/store intrinsics are overloaded on pointer's address space.
7286  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7287  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7288  dag PFOperands = !con((ops node:$dst),
7289                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
7290                        !if(WithStride, (ops node:$ldm), (ops)));
7291  // Build PatFrag that only matches particular address space.
7292  PatFrag IntrFrag = PatFrag<PFOperands,
7293                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7294                             !cond(!eq(Space, ".shared"): AS_match.shared,
7295                                   !eq(Space, ".global"): AS_match.global,
7296                                   true: AS_match.generic)>;
7297  // Build AS-constrained pattern.
7298  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7299
7300  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7301  let OutOperandList = (outs);
7302  let AsmString = "wmma.store.d.sync"
7303                  # "${ptx:aligned}"
7304                  # "." # Layout
7305                  # "." # Frag.geom
7306                  # Space
7307                  # "." # Frag.ptx_elt_type
7308                  # " \t[$dst],"
7309                  # Frag.regstring
7310                  # !if(WithStride, ", $ldm", "")
7311                  # ";";
7312}
7313
7314// Create all load/store variants
7315defset list<WMMA_INSTR> MMA_LDSTs  = {
7316  foreach layout = ["row", "col"] in {
7317    foreach stride = [false, true] in {
7318      foreach space = [".global", ".shared", ""] in {
7319        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7320          foreach frag = NVVM_MMA_OPS.all_ld_ops in
7321            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7322              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
7323          foreach frag = NVVM_MMA_OPS.all_st_ops in
7324            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7325              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
7326        } // addr
7327      } // space
7328    } // stride
7329  } // layout
7330} // defset
7331
7332// B1 instruction variants need extra constraints.
7333class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
7334  string Op = b1op;
7335  WMMA_REGINFO Frag = FragA;
7336  list<Predicate> ret = !listconcat(
7337    FragA.Predicates,
7338    !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
7339  );
7340}
7341// WMMA.MMA
7342class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7343               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7344               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
7345  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
7346                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
7347    // Requires does not seem to have effect on Instruction w/o Patterns.
7348    // We set it here anyways and propagate to the Pat<> we construct below.
7349    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7350  let OutOperandList = FragD.Outs;
7351  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7352  string TypeList = !cond(
7353    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
7354                                     # "." # FragC.ptx_elt_type,
7355    1: "." # FragD.ptx_elt_type
7356       # "." # FragA.ptx_elt_type
7357       # "." # FragB.ptx_elt_type
7358       # "." # FragC.ptx_elt_type,
7359  );
7360  let AsmString = "wmma.mma"
7361                  # b1op
7362                  # ".sync"
7363                  # "${ptx:aligned}"
7364                  # "." # ALayout
7365                  # "." # BLayout
7366                  # "." # FragA.geom
7367                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
7368                  # TypeList
7369                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7370                  # FragD.regstring # ",\n\t\t"
7371                  # FragA.regstring # ",\n\t\t"
7372                  # FragB.regstring # ",\n\t\t"
7373                  # FragC.regstring # ";";
7374}
7375
7376let isConvergent = true in {
7377defset list<WMMA_INSTR> WMMAs  = {
7378  foreach layout_a = ["row", "col"] in {
7379    foreach layout_b = ["row", "col"] in {
7380      foreach satf = [0, 1] in {
7381        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
7382          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
7383            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7384              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
7385                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
7386                              WMMA_REGINFO<op[1], "wmma.mma">,
7387                              WMMA_REGINFO<op[2], "wmma.mma">,
7388                              WMMA_REGINFO<op[3], "wmma.mma">,
7389                              layout_a, layout_b, satf, rnd, b1op>;
7390              }
7391            } // b1op
7392          } // op
7393        } // rnd
7394      } // satf
7395    } // layout_b
7396  } // layout_a
7397} // defset
7398}
7399
7400// MMA
7401class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7402               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7403               string ALayout, string BLayout, int Satfinite, string b1op>
7404  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
7405                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
7406    // Requires does not seem to have effect on Instruction w/o Patterns.
7407    // We set it here anyways and propagate to the Pat<> we construct below.
7408  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7409  let OutOperandList = FragD.Outs;
7410  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7411  string TypeList = "." # FragD.ptx_elt_type
7412                    # "." # FragA.ptx_elt_type
7413                    # "." # FragB.ptx_elt_type
7414                    # "." # FragC.ptx_elt_type;
7415  let AsmString = "mma.sync.aligned."
7416                  # FragA.geom
7417                  # "." # ALayout
7418                  # "." # BLayout
7419                  # !if(Satfinite, ".satfinite", "")
7420                  # TypeList
7421                  # b1op # "\n\t\t"
7422                  # FragD.regstring # ",\n\t\t"
7423                  # FragA.regstring # ",\n\t\t"
7424                  # FragB.regstring # ",\n\t\t"
7425                  # FragC.regstring # ";";
7426}
7427
7428let isConvergent = true in {
7429defset list<WMMA_INSTR> MMAs  = {
7430  foreach layout_a = ["row", "col"] in {
7431    foreach layout_b = ["row", "col"] in {
7432      foreach satf = [0, 1] in {
7433        foreach op = NVVM_MMA_OPS.all_mma_ops in {
7434          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7435            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
7436              def : MMA<WMMA_REGINFO<op[0], "mma">,
7437                        WMMA_REGINFO<op[1], "mma">,
7438                        WMMA_REGINFO<op[2], "mma">,
7439                        WMMA_REGINFO<op[3], "mma">,
7440                        layout_a, layout_b, satf, b1op>;
7441            }
7442          } // b1op
7443        } // op
7444      } // satf
7445    } // layout_b
7446  } // layout_a
7447} // defset
7448}
7449
7450//
7451// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
7452//
7453class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
7454               DAGOperand SrcOp>
7455  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
7456    Requires<Frag.Predicates> {
7457  // Build PatFrag that only matches particular address space.
7458  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
7459                             !cond(!eq(Space, ".shared"): AS_match.shared,
7460                                   true: AS_match.generic)>;
7461  // Build AS-constrained pattern.
7462  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7463
7464  let OutOperandList = Frag.Outs;
7465  let InOperandList = !con(Args, (ins MmaCode:$ptx));
7466  let AsmString = "ldmatrix.sync.aligned."
7467                  # Frag.geom
7468                  # "." # Frag.frag
7469                  # !if(Transposed, ".trans", "")
7470                  # Space
7471                  # "." # Frag.ptx_elt_type
7472                  # " " # Frag.regstring # ", [$src];";
7473}
7474
7475// Create all ldmatrix variants
7476defset list<WMMA_INSTR> LDMATRIXs  = {
7477  foreach transposed = [false, true] in {
7478    foreach space = [".shared", ""] in {
7479      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7480        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
7481          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
7482            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
7483                            addr>;
7484      } // addr
7485    } // space
7486  } // transposed
7487} // defset
7488
7489// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7490// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7491// the instruction record.
7492class MMA_PAT<WMMA_INSTR wi>
7493      : Pat<wi.IntrinsicPattern,
7494            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7495                 (wi ptx.version))>,
7496        Requires<wi.Predicates>;
7497
7498// Build intrinsic->instruction patterns for all MMA instructions.
7499foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
7500  def : MMA_PAT<mma>;
7501
7502multiclass MAPA<string suffix, Intrinsic Intr> {
7503  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
7504              "mapa" # suffix # ".u32\t$d, $a, $b;",
7505              [(set i32:$d, (Intr i32:$a, i32:$b))]>,
7506    Requires<[hasSM<90>, hasPTX<78>]>;
7507  def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
7508              "mapa" # suffix # ".u32\t$d, $a, $b;",
7509              [(set i32:$d, (Intr i32:$a, imm:$b))]>,
7510    Requires<[hasSM<90>, hasPTX<78>]>;
7511  def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
7512              "mapa" # suffix # ".u64\t$d, $a, $b;",
7513              [(set i64:$d, (Intr i64:$a, i32:$b))]>,
7514    Requires<[hasSM<90>, hasPTX<78>]>;
7515  def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
7516              "mapa" # suffix # ".u64\t$d, $a, $b;",
7517              [(set i64:$d, (Intr i64:$a, imm:$b))]>,
7518    Requires<[hasSM<90>, hasPTX<78>]>;
7519}
7520
7521defm mapa  : MAPA<"", int_nvvm_mapa>;
7522defm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
7523
7524
7525multiclass GETCTARANK<string suffix, Intrinsic Intr> {
7526  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
7527              "getctarank" # suffix # ".u32\t$d, $a;",
7528              [(set i32:$d, (Intr i32:$a))]>,
7529    Requires<[hasSM<90>, hasPTX<78>]>;
7530  def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
7531              "getctarank" # suffix # ".u64\t$d, $a;",
7532              [(set i32:$d, (Intr i64:$a))]>,
7533    Requires<[hasSM<90>, hasPTX<78>]>;
7534}
7535
7536defm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
7537defm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
7538
7539def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
7540              "mov.pred\t$d, %is_explicit_cluster;",
7541              [(set i1:$d, (int_nvvm_is_explicit_cluster))]>,
7542    Requires<[hasSM<90>, hasPTX<78>]>;
7543
7544// setmaxnreg inc/dec intrinsics
7545let isConvergent = true in {
7546multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
7547  def : NVPTXInst<(outs), (ins i32imm:$reg_count),
7548          "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
7549          [(Intr timm:$reg_count)]>,
7550    Requires<[hasSM90a, hasPTX<80>]>;
7551}
7552
7553defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
7554defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
7555
7556} // isConvergent
7557
7558//
7559// WGMMA fence instructions
7560//
7561let isConvergent = true in {
7562def INT_NVVM_WGMMA_FENCE_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.fence.sync.aligned;",
7563                             [(int_nvvm_wgmma_fence_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
7564
7565def INT_NVVM_WGMMA_COMMIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.commit_group.sync.aligned;",
7566                             [(int_nvvm_wgmma_commit_group_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
7567
7568def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned \t$n;",
7569                             [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>;
7570} // isConvergent = true
7571
7572def GRIDDEPCONTROL_LAUNCH_DEPENDENTS :
7573      NVPTXInst<(outs), (ins),
7574                "griddepcontrol.launch_dependents;",
7575                [(int_nvvm_griddepcontrol_launch_dependents)]>,
7576                Requires<[hasSM<90>, hasPTX<78>]>;
7577
7578def GRIDDEPCONTROL_WAIT :
7579      NVPTXInst<(outs), (ins),
7580                "griddepcontrol.wait;",
7581                [(int_nvvm_griddepcontrol_wait)]>,
7582                Requires<[hasSM<90>, hasPTX<78>]>;
7583
7584def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>;
7585