xref: /llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td (revision 3b5e9eed2f67c1fb6dcf7033e92509ba2b0381e9)
1//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines all of the NVVM-specific intrinsics for use with NVPTX.
10//
11//===----------------------------------------------------------------------===//
12
13// The following intrinsics were once defined here, but are now auto-upgraded
14// to target-generic LLVM intrinsics.
15//
16//   * llvm.nvvm.brev32  --> llvm.bitreverse.i32
17//   * llvm.nvvm.brev64  --> llvm.bitreverse.i64
18//   * llvm.nvvm.clz.i   --> llvm.ctlz.i32
19//   * llvm.nvvm.clz.ll  --> trunc i64 llvm.ctlz.i64(x) to i32
20//   * llvm.nvvm.popc.i  --> llvm.ctpop.i32
21//   * llvm.nvvm.popc.ll --> trunc i64 llvm.ctpop.i64 to i32
22//   * llvm.nvvm.abs.i   --> select(x >= -x, x, -x)
23//   * llvm.nvvm.abs.ll  --> ibid.
24//   * llvm.nvvm.max.i   --> select(x sge y, x, y)
25//   * llvm.nvvm.max.ll  --> ibid.
26//   * llvm.nvvm.max.ui  --> select(x uge y, x, y)
27//   * llvm.nvvm.max.ull --> ibid.
28//   * llvm.nvvm.max.i   --> select(x sle y, x, y)
29//   * llvm.nvvm.max.ll  --> ibid.
30//   * llvm.nvvm.max.ui  --> select(x ule y, x, y)
31//   * llvm.nvvm.max.ull --> ibid.
32//   * llvm.nvvm.h2f     --> llvm.convert.to.fp16.f32
33//   * llvm.nvvm.bitcast.f2i         --> bitcast
34//   * llvm.nvvm.bitcast.i2f         --> ibid.
35//   * llvm.nvvm.bitcast.d2ll        --> ibid.
36//   * llvm.nvvm.bitcast.ll2d        --> ibid.
37//   * llvm.nvvm.ptr.gen.to.global   --> addrspacecast
38//   * llvm.nvvm.ptr.gen.to.shared   --> ibid.
39//   * llvm.nvvm.ptr.gen.to.constant --> ibid.
40//   * llvm.nvvm.ptr.gen.to.local    --> ibid.
41//   * llvm.nvvm.ptr.global.to.gen   --> ibid.
42//   * llvm.nvvm.ptr.shared.to.gen   --> ibid.
43//   * llvm.nvvm.ptr.constant.to.gen --> ibid.
44//   * llvm.nvvm.ptr.local.to.gen    --> ibid.
45//   * llvm.nvvm.ldg.global.i        --> load addrspace(1) !load.invariant
46//   * llvm.nvvm.ldg.global.f        --> ibid.
47//   * llvm.nvvm.ldg.global.p        --> ibid.
48
49def llvm_global_ptr_ty  : LLVMQualPointerType<1>;  // (global)ptr
50def llvm_shared_ptr_ty  : LLVMQualPointerType<3>;  // (shared)ptr
51
52//
53// MISC
54//
55
56// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
57// Geom: m<M>n<N>k<K>. E.g. m8n32k16
58// Frag: [a|b|c|d] ([x1|x2|x4] for ldmatrix)
59// PtxEltType: PTX type for the element.
60class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
61  string geom = Geom;
62  string frag = Frag;
63  string ptx_elt_type = PtxEltType;
64  string gft = Geom#":"#Frag#":"#ptx_elt_type;
65  string ft = frag#":"#ptx_elt_type;
66  list<LLVMType> regs = !cond(
67    // mma fp ops use smaller fragments than wmma fp ops
68    !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2),
69    !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2),
70    !eq(gft,"m16n8k8:a:f16") : !listsplat(llvm_v2f16_ty, 2),
71    !eq(gft,"m16n8k8:b:f16") : [llvm_v2f16_ty],
72    !eq(gft,"m16n8k8:c:f16") : !listsplat(llvm_v2f16_ty, 2),
73    !eq(gft,"m16n8k8:d:f16") : !listsplat(llvm_v2f16_ty, 2),
74    !eq(gft,"m16n8k8:c:f32") : !listsplat(llvm_float_ty, 4),
75    !eq(gft,"m16n8k8:d:f32") : !listsplat(llvm_float_ty, 4),
76    !eq(gft,"m16n8k16:a:f16") : !listsplat(llvm_v2f16_ty, 4),
77    !eq(gft,"m16n8k16:b:f16") : !listsplat(llvm_v2f16_ty, 2),
78    !eq(gft,"m16n8k16:c:f16") : !listsplat(llvm_v2f16_ty, 2),
79    !eq(gft,"m16n8k16:d:f16") : !listsplat(llvm_v2f16_ty, 2),
80    !eq(gft,"m16n8k16:c:f32") : !listsplat(llvm_float_ty, 4),
81    !eq(gft,"m16n8k16:d:f32") : !listsplat(llvm_float_ty, 4),
82    !eq(gft,"m16n8k4:c:f32") : !listsplat(llvm_float_ty, 4),
83    !eq(gft,"m16n8k4:d:f32") : !listsplat(llvm_float_ty, 4),
84
85    // wmma fp16 -> fp16/fp32 @  m16n16k16/m8n32k16/m32n8k16
86    // All other supported geometries use the same fragment format for f32 and
87    // f16, so we only need to consider {fragment, type}.
88    !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8),
89    !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8),
90    !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4),
91    !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4),
92    !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8),
93    !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8),
94
95    // wmma tf32 -> s32 @ m16n16k8
96    !eq(gft,"m16n16k8:a:tf32") : !listsplat(llvm_i32_ty, 4),
97    !eq(gft,"m16n16k8:b:tf32") : !listsplat(llvm_i32_ty, 4),
98
99    // mma tf32 -> s32 @ m16n16k8/m16n8k8
100    !eq(gft,"m16n8k4:a:tf32") : !listsplat(llvm_i32_ty, 2),
101    !eq(gft,"m16n8k4:b:tf32") : [llvm_i32_ty],
102    !eq(gft,"m16n8k8:a:tf32") : !listsplat(llvm_i32_ty, 4),
103    !eq(gft,"m16n8k8:b:tf32") : !listsplat(llvm_i32_ty, 2),
104
105    !eq(gft,"m8n8k4:a:f64") : [llvm_double_ty],
106    !eq(gft,"m8n8k4:b:f64") : [llvm_double_ty],
107    !eq(gft,"m8n8k4:c:f64") : !listsplat(llvm_double_ty, 2),
108    !eq(gft,"m8n8k4:d:f64") : !listsplat(llvm_double_ty, 2),
109
110    // wmma bf16 -> s32 @ m16n16k16/m8n32k16/m32n8k16
111    !eq(gft,"m16n16k16:a:bf16") : !listsplat(llvm_i32_ty, 4),
112    !eq(gft,"m16n16k16:b:bf16") : !listsplat(llvm_i32_ty, 4),
113    !eq(gft,"m8n32k16:a:bf16") : !listsplat(llvm_i32_ty, 2),
114    !eq(gft,"m8n32k16:b:bf16") : !listsplat(llvm_i32_ty, 8),
115    !eq(gft,"m32n8k16:a:bf16") : !listsplat(llvm_i32_ty, 8),
116    !eq(gft,"m32n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2),
117
118    // mma bf16 -> s32 @ m16n8k16/m16n8k8
119    !eq(gft,"m16n8k16:a:bf16") : !listsplat(llvm_i32_ty, 4),
120    !eq(gft,"m16n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2),
121    !eq(gft,"m16n8k8:a:bf16") : !listsplat(llvm_i32_ty, 2),
122    !eq(gft,"m16n8k8:b:bf16") : [llvm_i32_ty],
123
124    // wmma u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
125    !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2),
126    !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2),
127    !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2),
128    !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2),
129    !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8),
130    !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8),
131
132    !eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty],
133    !eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty],
134    !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4),
135    !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4),
136    !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8),
137    !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8),
138
139    !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4),
140    !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4),
141    !eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty],
142    !eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty],
143    !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8),
144    !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8),
145
146    // mma u8/s8 -> s32 @ m8n8k16/m16n8k16/m16n8k32
147    !eq(gft,"m8n8k16:a:u8") : [llvm_i32_ty],
148    !eq(gft,"m8n8k16:a:s8") : [llvm_i32_ty],
149    !eq(gft,"m8n8k16:b:u8") : [llvm_i32_ty],
150    !eq(gft,"m8n8k16:b:s8") : [llvm_i32_ty],
151    !eq(gft,"m8n8k16:c:s32") : !listsplat(llvm_i32_ty, 2),
152    !eq(gft,"m8n8k16:d:s32") : !listsplat(llvm_i32_ty, 2),
153
154    !eq(gft,"m16n8k16:a:u8") : !listsplat(llvm_i32_ty, 2),
155    !eq(gft,"m16n8k16:a:s8") : !listsplat(llvm_i32_ty, 2),
156    !eq(gft,"m16n8k16:b:u8") : [llvm_i32_ty],
157    !eq(gft,"m16n8k16:b:s8") : [llvm_i32_ty],
158    !eq(gft,"m16n8k16:c:s32") : !listsplat(llvm_i32_ty, 4),
159    !eq(gft,"m16n8k16:d:s32") : !listsplat(llvm_i32_ty, 4),
160
161    !eq(gft,"m16n8k32:a:u8") : !listsplat(llvm_i32_ty, 4),
162    !eq(gft,"m16n8k32:a:s8") : !listsplat(llvm_i32_ty, 4),
163    !eq(gft,"m16n8k32:b:u8") : !listsplat(llvm_i32_ty, 2),
164    !eq(gft,"m16n8k32:b:s8") : !listsplat(llvm_i32_ty, 2),
165    !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4),
166    !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4),
167
168    // wmma/mma u4/s4 -> s32 @ m8n8k32 (u4/s4)
169    !eq(gft,"m8n8k32:a:u4") : [llvm_i32_ty],
170    !eq(gft,"m8n8k32:a:s4") : [llvm_i32_ty],
171    !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty],
172    !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty],
173    !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2),
174    !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2),
175
176    !eq(gft,"m16n8k32:a:u4") : !listsplat(llvm_i32_ty, 2),
177    !eq(gft,"m16n8k32:a:s4") : !listsplat(llvm_i32_ty, 2),
178    !eq(gft,"m16n8k32:b:u4") : [llvm_i32_ty],
179    !eq(gft,"m16n8k32:b:s4") : [llvm_i32_ty],
180    !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4),
181    !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4),
182
183    !eq(gft,"m16n8k64:a:u4") : !listsplat(llvm_i32_ty, 4),
184    !eq(gft,"m16n8k64:a:s4") : !listsplat(llvm_i32_ty, 4),
185    !eq(gft,"m16n8k64:b:u4") : !listsplat(llvm_i32_ty, 2),
186    !eq(gft,"m16n8k64:b:s4") : !listsplat(llvm_i32_ty, 2),
187    !eq(gft,"m16n8k64:c:s32") : !listsplat(llvm_i32_ty, 4),
188    !eq(gft,"m16n8k64:d:s32") : !listsplat(llvm_i32_ty, 4),
189
190    // wmma/mma b1 -> s32 @ m8n8k128(b1)
191    !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty],
192    !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty],
193    !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2),
194    !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2),
195
196    !eq(gft,"m16n8k128:a:b1") : !listsplat(llvm_i32_ty, 2),
197    !eq(gft,"m16n8k128:b:b1") : [llvm_i32_ty],
198    !eq(gft,"m16n8k128:c:s32") : !listsplat(llvm_i32_ty, 4),
199    !eq(gft,"m16n8k128:d:s32") : !listsplat(llvm_i32_ty, 4),
200
201    !eq(gft,"m16n8k256:a:b1") : !listsplat(llvm_i32_ty, 4),
202    !eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2),
203    !eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4),
204    !eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4),
205
206    // ldmatrix b16 -> s32 @ m8n8
207    !eq(gft,"m8n8:x1:b16") : !listsplat(llvm_i32_ty, 1),
208    !eq(gft,"m8n8:x2:b16") : !listsplat(llvm_i32_ty, 2),
209    !eq(gft,"m8n8:x4:b16") : !listsplat(llvm_i32_ty, 4),
210  );
211}
212
213class WMMA_NAME_LDST<string Op, WMMA_REGS Frag, string Layout, int WithStride> {
214  string intr = "llvm.nvvm.wmma."
215                # Frag.geom
216                # "." # Op
217                # "." # Frag.frag
218                # "." # Layout
219                # !if(WithStride, ".stride", "")
220                # "." # Frag.ptx_elt_type
221                ;
222  // TODO(tra): record name should ideally use the same field order as the intrinsic.
223  // E.g. string record = !subst("llvm", "int",
224  //                      !subst(".", "_", llvm));
225  string record = "int_nvvm_wmma_"
226                # Frag.geom
227                # "_" # Op
228                # "_" # Frag.frag
229                # "_" # Frag.ptx_elt_type
230                # "_" # Layout
231                # !if(WithStride, "_stride", "");
232}
233
234class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
235  list<WMMA_REGS> id_frags = !cond(
236     // FP16 ops are identified by accumulator & result type.
237     !eq(A.ptx_elt_type, "f16") : [D, C],
238     // other ops are identified by input types.
239     !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B],
240     true: [A]
241     );
242   string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
243}
244
245class WMMA_NAME<string ALayout, string BLayout, int Satfinite, string Rnd, string b1op,
246                WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
247  string signature = MMA_SIGNATURE<A, B, C, D>.ret;
248  string llvm = "llvm.nvvm.wmma."
249                # A.geom
250                # ".mma"
251                # b1op
252                # "." # ALayout
253                # "." # BLayout
254                # !if(!ne(Rnd, ""), !strconcat(".", Rnd), "")
255                # signature
256                # !if(Satfinite, ".satfinite", "");
257
258  string record = !subst(".", "_",
259                  !subst("llvm.", "int_", llvm));
260}
261
262class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op,
263               WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
264  string signature = MMA_SIGNATURE<A, B, C, D>.ret;
265  string llvm = "llvm.nvvm.mma"
266                # b1op
267                # "." # A.geom
268                # "." # ALayout
269                # "." # BLayout
270                # !if(Satfinite, ".satfinite", "")
271                # signature;
272  string record = !subst(".", "_",
273                  !subst("llvm.", "int_", llvm));
274}
275
276class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
277  string intr = "llvm.nvvm.ldmatrix.sync.aligned"
278                # "." # Frag.geom
279                # "." # Frag.frag
280                # !if(Trans, ".trans", "")
281                # "." # Frag.ptx_elt_type
282                ;
283  string record = !subst(".", "_",
284                  !subst("llvm.", "int_", intr));
285}
286
287// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
288//   Geom: list of supported geometries.
289//   TypeN: PTX type of the corresponding fragment's element.
290//   TypeB and TypeD may be empty if it must match that of TypeA or TypeC.
291class MMA_OPS<list<string> Geom, list<string> TypeA, list<string> TypeB,
292            list<string> TypeC, list<string> TypeD> {
293  list<list<WMMA_REGS>> ret =
294     !foldl([]<list<WMMA_REGS>>, Geom, t1, geom, !listconcat(t1,
295     !foldl([]<list<WMMA_REGS>>, TypeA, t2, type_a, !listconcat(t2,
296     !foldl([]<list<WMMA_REGS>>, !if(!size(TypeB), TypeB, [type_a]), t3, type_b, !listconcat(t3,
297     !foldl([]<list<WMMA_REGS>>, TypeC, t4, type_c, !listconcat(t4,
298     !foldl([]<list<WMMA_REGS>>, !if(!size(TypeD), TypeD, [type_c]), t5, type_d, !listconcat(t5,
299            [[WMMA_REGS<geom, "a", type_a>,
300              WMMA_REGS<geom, "b", type_b>,
301              WMMA_REGS<geom, "c", type_c>,
302              WMMA_REGS<geom, "d", type_d>]]))))))))));
303   // Debugging aid for readable representation of the list above.
304   list<list<string>> ops = !foreach(x, ret, [x[0].gft, x[1].gft, x[2].gft, x[3].gft]);
305}
306
307class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
308  list<WMMA_REGS> ret =
309     !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
310     !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
311     !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
312            [WMMA_REGS<geom, frag, type>]))))));
313   // Debugging aid for readable representation of the list above.
314   list<string> ops = !foreach(x, ret, x.gft);
315}
316
317class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
318  list<WMMA_REGS> ret =
319     !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
320     !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
321     !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
322            [WMMA_REGS<geom, frag, type>]))))));
323   // Debugging aid for readable representation of the list above.
324   list<string> ops = !foreach(x, ret, x.gft);
325}
326
327// Creates list of valid combinations of fragments. This is the main list that
328// drives generation of corresponding intrinsics and instructions.
329class NVVM_MMA_OPS {
330  list<list<WMMA_REGS>> tf32_wmma_ops = MMA_OPS<
331            ["m16n16k8"],
332            ["tf32"], [], ["f32"], []>.ret;
333  list<list<WMMA_REGS>> bf16_wmma_ops = MMA_OPS<
334            ["m16n16k16", "m32n8k16", "m8n32k16"],
335            ["bf16"], [], ["f32"], []>.ret;
336  list<list<WMMA_REGS>> f64_wmma_ops = MMA_OPS<
337            ["m8n8k4"],
338            ["f64"], [], ["f64"], []>.ret;
339  list<list<WMMA_REGS>> fp_wmma_ops = MMA_OPS<
340            ["m16n16k16", "m32n8k16", "m8n32k16"],
341            ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
342  list<list<WMMA_REGS>> int_wmma_ops = MMA_OPS<
343            ["m16n16k16", "m32n8k16", "m8n32k16"],
344            ["s8", "u8"], [], ["s32"], []>.ret;
345  list<list<WMMA_REGS>> subint_wmma_ops = MMA_OPS<
346            ["m8n8k32"],
347            ["s4", "u4"], [], ["s32"], []>.ret;
348  list<list<WMMA_REGS>> bit_wmma_ops = MMA_OPS<
349            ["m8n8k128"],
350            ["b1"], [], ["s32"], []>.ret;
351  list<list<WMMA_REGS>> all_wmma_ops = !listconcat(
352            tf32_wmma_ops, bf16_wmma_ops, f64_wmma_ops,
353            fp_wmma_ops, int_wmma_ops, subint_wmma_ops, bit_wmma_ops);
354
355  list<list<WMMA_REGS>> tf32_mma_ops = MMA_OPS<
356            ["m16n8k4", "m16n8k8"],
357            ["tf32"], [], ["f32"], []>.ret;
358  list<list<WMMA_REGS>> bf16_mma_ops = MMA_OPS<
359            ["m16n8k16", "m16n8k8"],
360            ["bf16"], [], ["f32"], []>.ret;
361  list<list<WMMA_REGS>> f64_mma_ops = MMA_OPS<
362            ["m8n8k4"],
363            ["f64"], [], ["f64"], []>.ret;
364  list<list<WMMA_REGS>> fp_mma_ops = MMA_OPS<
365            ["m8n8k4", "m16n8k8", "m16n8k16"],
366            ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
367  list<list<WMMA_REGS>> int_mma_ops = MMA_OPS<
368            ["m8n8k16", "m16n8k16", "m16n8k32"],
369            ["s8", "u8"], ["s8", "u8"], ["s32"], []>.ret;
370  list<list<WMMA_REGS>> subint_mma_ops = MMA_OPS<
371            ["m8n8k32", "m16n8k32", "m16n8k64"],
372            ["s4", "u4"], ["s4", "u4"], ["s32"], []>.ret;
373  list<list<WMMA_REGS>> bit_mma_ops = MMA_OPS<
374            ["m8n8k128", "m16n8k128", "m16n8k256"],
375            ["b1"], [], ["s32"], []>.ret;
376  list<list<WMMA_REGS>> all_mma_ops = !listconcat(
377            tf32_mma_ops, bf16_mma_ops, f64_mma_ops,
378            fp_mma_ops, int_mma_ops, subint_mma_ops, bit_mma_ops);
379
380  list<WMMA_REGS> ldst_ab_ops = MMA_LDST_OPS<
381            ["m16n16k16", "m32n8k16", "m8n32k16"],
382            ["a", "b"], ["f16", "u8", "s8", "bf16"]>.ret;
383  list<WMMA_REGS> ldst_cd_ops = MMA_LDST_OPS<
384            ["m16n16k16", "m32n8k16", "m8n32k16"],
385            ["c", "d"], ["f16", "f32", "s32"]>.ret;
386  list<WMMA_REGS> ldst_tf32_ab_ops = MMA_LDST_OPS<
387            ["m16n16k8"],
388            ["a", "b"], ["tf32"]>.ret;
389  list<WMMA_REGS> ldst_tf32_cd_ops = MMA_LDST_OPS<
390            ["m16n16k8"],
391            ["c", "d"], ["f32"]>.ret;
392  list<WMMA_REGS> ldst_f64_abcd_ops = MMA_LDST_OPS<
393            ["m8n8k4"],
394            ["a", "b", "c", "d"], ["f64"]>.ret;
395  list<WMMA_REGS> ldst_subint_ab_ops = MMA_LDST_OPS<
396            ["m8n8k32"], ["a", "b"], ["s4","u4"]>.ret;
397  list<WMMA_REGS> ldst_bit_ab_ops = MMA_LDST_OPS<
398            ["m8n8k128"], ["a", "b"], ["b1"]>.ret;
399  list<WMMA_REGS> ldst_subint_cd_ops = MMA_LDST_OPS<
400            ["m8n8k32", "m8n8k128"],  ["c", "d"], ["s32"]>.ret;
401  list<WMMA_REGS> all_ldst_ops = !listconcat(ldst_ab_ops, ldst_cd_ops,
402                                             ldst_tf32_ab_ops,
403                                             ldst_tf32_cd_ops,
404                                             ldst_f64_abcd_ops,
405                                             ldst_subint_ab_ops,
406                                             ldst_bit_ab_ops,
407                                             ldst_subint_cd_ops);
408  // Separate A/B/C fragments (loads) from D (stores).
409  list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
410  list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
411
412  list<WMMA_REGS> ldmatrix_b16_ops = LDMATRIX_OPS<
413    ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
414  list<WMMA_REGS> all_ldmatrix_ops = ldmatrix_b16_ops;
415}
416
417def NVVM_MMA_OPS : NVVM_MMA_OPS;
418
419// Returns true if this combination of fragment and layout for WMMA load/store
420// ops is supported; false otherwise.
421// E.g.
422// if NVVM_WMMA_LDST_SUPPORTED<...>.ret then
423//   def : FOO<>; // The record will only be defined for supported ops.
424//
425class NVVM_WMMA_LDST_SUPPORTED<WMMA_REGS frag, string layout> {
426  string f = frag.frag;
427  string t = frag.ptx_elt_type;
428
429  bit ret = !cond(
430    // Sub-int load and store requires A fragment to be of row layout and B
431    // fragments to be of column layout.
432    !and(!or(!eq(t, "b1"),
433             !eq(t, "u4"),
434             !eq(t, "s4")),
435         !or(!and(!eq(f, "a"),
436                  !ne(layout, "row")),
437             !and(!eq(f, "b"),
438                  !ne(layout, "col")))) : false,
439    true: true
440  );
441}
442
443// Returns true if this combination of layout/satf/rnd for WMMA ops is
444// supported; false otherwise.
445// E.g.
446// if NVVM_WMMA_SUPPORTED<...>.ret then
447//   def : FOO<>; // The record will only be defined for supported ops.
448//
449class NVVM_WMMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, int satf, string rnd> {
450  // WMMA ops check both layouts.
451  string layout = layout_a # ":" # layout_b;
452  string t = frags[0].ptx_elt_type;
453
454  bit ret = !cond(
455    // only f64 wmma functions support rnd options
456    // any non f64 type that uses a rnd value is invalid
457    !and(!ne(t, "f64"), !ne(rnd, "")) : false,
458
459    // satf is only valid for select types
460    !and(!eq(satf, 1),
461         !ne(t, "s8"),
462         !ne(t, "u8"),
463         !ne(t, "s4"),
464         !ne(t, "u4"),
465         !ne(t, "f16")): false,
466
467    // Sub-int wmma requires row/column layout
468    !and(!or(!eq(t, "s4"),
469             !eq(t, "u4"),
470             !eq(t, "b1")),
471         !ne(layout, "row:col")) : false,
472    true: true
473  );
474}
475
476class NVVM_MMA_B1OPS<list<WMMA_REGS> frags> {
477  list<string> ret = !cond(
478    !eq(frags[0].ptx_elt_type, "b1") : [".xor.popc", ".and.popc"],
479    true: [""]
480  );
481}
482
483// Returns true if this combination of layout/satf for MMA ops is supported;
484// false otherwise.
485// E.g.
486// if NVVM_MMA_SUPPORTED<...>.ret then
487//   def : FOO<>; // The record will only be defined for supported ops.
488//
489class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, int satf> {
490  // MMA ops check both layouts.
491  string layout = layout_a # ":" # layout_b;
492  string a_type = frags[0].ptx_elt_type;
493  string b_type = frags[1].ptx_elt_type;
494  string c_type = frags[2].ptx_elt_type;
495  string d_type = frags[3].ptx_elt_type;
496  string geom = frags[0].geom;
497
498  // gcd is a shortcut used to identify instructions that depend on
499  // geom+frag_c+frag_d.
500  string gcd = geom # ":" # c_type # d_type;
501  bit ret = !cond(
502
503    // Limit satf to valid types
504    !and(!eq(satf, 1),
505         !ne(a_type, "s8"),
506         !ne(a_type, "u8"),
507         !ne(a_type, "s4"),
508         !ne(a_type, "u4")): false,
509
510    // m8n8k4 has no C=f32 D=f16 variant.
511    !eq(gcd, "m8n8k4:f32f16"): false,
512
513    // only m8n8k4 for f16 does not require row:col layout
514    !and(!ne(layout, "row:col"),
515         !or(!ne(geom, "m8n8k4"),
516             !ne(a_type, "f16"))) : false,
517
518    // m16n8k8 requires A and B to be the same type and C and D to be the same
519    // type.
520    !and(!eq(geom, "m16n8k8"),
521         !or(!ne(a_type, b_type),
522             !ne(c_type, d_type))): false,
523
524    // m16n8k8 requires C and D to be the same type.
525    !and(!eq(geom, "m16n8k8"),
526         !ne(c_type, d_type)): false,
527
528    // All other are OK.
529    true: true
530  );
531}
532
533// Returns true if the fragment is valid for ldmatrix ops is supported;
534// false otherwise.
535// E.g.
536// if NVVM_LDMATRIX_SUPPORTED<...>.ret then
537//   def : FOO<>; // The record will only be defined for supported ops.
538//
539class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag> {
540  string g = frag.geom;
541  string t = frag.ptx_elt_type;
542
543  bit ret = !cond(
544    // Only currently support m8n8 and b16
545    !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
546    true: false
547  );
548}
549
550class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
551  string Suffix = !if(sync, "sync_", "")
552                  # mode # "_"
553                  # type
554                  # !if(return_pred, "p", "");
555
556  string Name = "int_nvvm_shfl_" # Suffix;
557  string Builtin = "__nvvm_shfl_" # Suffix;
558  string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
559  bit withGccBuiltin = !not(return_pred);
560  bit withoutGccBuiltin = return_pred;
561  LLVMType OpType = !cond(
562    !eq(type,"i32"): llvm_i32_ty,
563    !eq(type,"f32"): llvm_float_ty);
564  list<LLVMType> RetTy = !if(return_pred, [OpType, llvm_i1_ty], [OpType]);
565  list<LLVMType> ArgsTy = !if(sync,
566    [llvm_i32_ty, OpType, llvm_i32_ty, llvm_i32_ty],
567    [OpType, llvm_i32_ty, llvm_i32_ty]);
568}
569
570class CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, string mode> {
571  string Name = "int_nvvm_cp_async_bulk_tensor_g2s_" # mode # "_" # dim # "d";
572
573  bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
574  int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
575  list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
576  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
577  list<LLVMType> ArgsTy = !listconcat(
578                          [llvm_shared_ptr_ty,  // dst_smem_ptr
579                           llvm_shared_ptr_ty,  // mbarrier_smem_ptr
580                           llvm_ptr_ty],        // tensormap_ptr
581                           TensorDimsTy,        // actual tensor dims
582                           Im2ColOffsetsTy,     // im2col offsets
583                          [llvm_i16_ty,         // cta_mask
584                           llvm_i64_ty,         // cache_hint
585                           llvm_i1_ty,          // Flag for cta_mask
586                           llvm_i1_ty]          // Flag for cache_hint
587                          );
588
589  int TempFlagsStartIdx = !add(dim, 5);
590  int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
591  list<IntrinsicProperty> IntrProp = [IntrConvergent,
592        WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
593        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoCapture<ArgIndex<2>>,
594        ImmArg<ArgIndex<FlagsStartIdx>>,
595        ImmArg<ArgIndex<!add(FlagsStartIdx, 1)>>];
596}
597
598class CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, string mode> {
599  string Name = "int_nvvm_cp_async_bulk_tensor_s2g_" # mode # "_" # dim # "d";
600
601  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
602  list<LLVMType> ArgsTy = !listconcat(
603                          [llvm_shared_ptr_ty,  // src_smem_ptr
604                           llvm_ptr_ty],        // tensormap_ptr
605                           TensorDimsTy,        // actual tensor dims
606                          [llvm_i64_ty,         // cache_hint
607                           llvm_i1_ty]          // Flag for cache_hint
608                          );
609  int FlagsStartIdx = !add(dim, 3);
610  list<IntrinsicProperty> IntrProp = [IntrConvergent,
611        ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
612        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
613        ImmArg<ArgIndex<FlagsStartIdx>>];
614}
615
616class CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
617  string Name = "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # "d";
618
619  bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
620  int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
621  list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
622  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
623  list<LLVMType> ArgsTy = !listconcat(
624                          [llvm_ptr_ty],     // tensormap_ptr
625                           TensorDimsTy,     // actual tensor dims
626                           Im2ColOffsetsTy,  // im2col offsets
627                          [llvm_i64_ty,      // cache_hint
628                           llvm_i1_ty]       // Flag for cache_hint
629                          );
630
631  int TempFlagsStartIdx = !add(dim, 2);
632  int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
633  list<IntrinsicProperty> IntrProp = [IntrConvergent,
634        ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
635        ImmArg<ArgIndex<FlagsStartIdx>>];
636}
637
638class CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, string mode, string op> {
639  string Suffix = op # "_" # mode # "_" # dim # "d";
640  string Name = "int_nvvm_cp_async_bulk_tensor_reduce_" # Suffix;
641
642  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
643  list<LLVMType> ArgsTy = !listconcat(
644                          [llvm_shared_ptr_ty,  // src_smem_ptr
645                           llvm_ptr_ty],        // tensormap_ptr
646                           TensorDimsTy,        // actual tensor dims
647                          [llvm_i64_ty,         // cache_hint
648                           llvm_i1_ty]          // Flag for cache_hint
649                          );
650  int FlagsStartIdx = !add(dim, 3);
651  list<IntrinsicProperty> IntrProp = [IntrConvergent,
652        ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
653        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
654        ImmArg<ArgIndex<FlagsStartIdx>>];
655}
656
657let TargetPrefix = "nvvm" in {
658  def int_nvvm_prmt : ClangBuiltin<"__nvvm_prmt">,
659      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
660        [IntrNoMem, IntrSpeculatable]>;
661
662  def int_nvvm_nanosleep : ClangBuiltin<"__nvvm_nanosleep">,
663      DefaultAttrsIntrinsic<[], [llvm_i32_ty],
664                            [IntrConvergent, IntrNoMem, IntrHasSideEffects]>;
665
666//
667// Min Max
668//
669
670  foreach operation = ["min", "max"] in {
671    def int_nvvm_f # operation # _d :
672      ClangBuiltin<!strconcat("__nvvm_f", operation, "_d")>,
673      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
674        [IntrNoMem, IntrSpeculatable, Commutative]>;
675
676    foreach variant = ["_f", "_ftz_f", "_nan_f", "_ftz_nan_f",
677      "_xorsign_abs_f", "_ftz_xorsign_abs_f", "_nan_xorsign_abs_f",
678      "_ftz_nan_xorsign_abs_f"] in {
679      def int_nvvm_f # operation # variant :
680        ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
681        DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
682          [IntrNoMem, IntrSpeculatable, Commutative]>;
683    }
684
685    foreach variant = ["_f16", "_ftz_f16", "_nan_f16", "_ftz_nan_f16",
686      "_xorsign_abs_f16", "_ftz_xorsign_abs_f16", "_nan_xorsign_abs_f16",
687      "_ftz_nan_xorsign_abs_f16"] in {
688      def int_nvvm_f # operation # variant :
689        DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty],
690          [IntrNoMem, IntrSpeculatable, Commutative]>;
691    }
692
693    foreach variant = ["_f16x2", "_ftz_f16x2", "_nan_f16x2",
694      "_ftz_nan_f16x2", "_xorsign_abs_f16x2", "_ftz_xorsign_abs_f16x2",
695      "_nan_xorsign_abs_f16x2", "_ftz_nan_xorsign_abs_f16x2"] in {
696      def int_nvvm_f # operation # variant :
697        DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty],
698          [IntrNoMem, IntrSpeculatable, Commutative]>;
699    }
700
701    foreach variant = ["_bf16", "_ftz_bf16", "_nan_bf16", "_ftz_nan_bf16",
702      "_xorsign_abs_bf16", "_ftz_xorsign_abs_bf16", "_nan_xorsign_abs_bf16",
703      "_ftz_nan_xorsign_abs_bf16"] in {
704      def int_nvvm_f # operation # variant :
705        ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
706        DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty],
707          [IntrNoMem, IntrSpeculatable, Commutative]>;
708    }
709
710    foreach variant = ["_bf16x2", "_ftz_bf16x2", "_nan_bf16x2",
711      "_ftz_nan_bf16x2", "_xorsign_abs_bf16x2", "_ftz_xorsign_abs_bf16x2",
712      "_nan_xorsign_abs_bf16x2", "_ftz_nan_xorsign_abs_bf16x2"]  in {
713      def int_nvvm_f # operation # variant :
714        ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
715        DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty],
716          [IntrNoMem, IntrSpeculatable, Commutative]>;
717    }
718  }
719
720//
721// Multiplication
722//
723
724  def int_nvvm_mulhi_s : ClangBuiltin<"__nvvm_mulhi_s">,
725      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
726        [IntrNoMem, IntrSpeculatable, Commutative]>;
727  def int_nvvm_mulhi_us : ClangBuiltin<"__nvvm_mulhi_us">,
728      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
729        [IntrNoMem, IntrSpeculatable, Commutative]>;
730
731  def int_nvvm_mulhi_i : ClangBuiltin<"__nvvm_mulhi_i">,
732      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
733        [IntrNoMem, IntrSpeculatable, Commutative]>;
734  def int_nvvm_mulhi_ui : ClangBuiltin<"__nvvm_mulhi_ui">,
735      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
736        [IntrNoMem, IntrSpeculatable, Commutative]>;
737
738  def int_nvvm_mulhi_ll : ClangBuiltin<"__nvvm_mulhi_ll">,
739      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
740        [IntrNoMem, IntrSpeculatable, Commutative]>;
741  def int_nvvm_mulhi_ull : ClangBuiltin<"__nvvm_mulhi_ull">,
742      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
743        [IntrNoMem, IntrSpeculatable, Commutative]>;
744
745  def int_nvvm_mul_rn_ftz_f : ClangBuiltin<"__nvvm_mul_rn_ftz_f">,
746      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
747        [IntrNoMem, IntrSpeculatable, Commutative]>;
748  def int_nvvm_mul_rn_f : ClangBuiltin<"__nvvm_mul_rn_f">,
749      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
750        [IntrNoMem, IntrSpeculatable, Commutative]>;
751  def int_nvvm_mul_rz_ftz_f : ClangBuiltin<"__nvvm_mul_rz_ftz_f">,
752      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
753        [IntrNoMem, IntrSpeculatable, Commutative]>;
754  def int_nvvm_mul_rz_f : ClangBuiltin<"__nvvm_mul_rz_f">,
755      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
756        [IntrNoMem, IntrSpeculatable, Commutative]>;
757  def int_nvvm_mul_rm_ftz_f : ClangBuiltin<"__nvvm_mul_rm_ftz_f">,
758      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
759        [IntrNoMem, IntrSpeculatable, Commutative]>;
760  def int_nvvm_mul_rm_f : ClangBuiltin<"__nvvm_mul_rm_f">,
761      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
762        [IntrNoMem, IntrSpeculatable, Commutative]>;
763  def int_nvvm_mul_rp_ftz_f : ClangBuiltin<"__nvvm_mul_rp_ftz_f">,
764      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
765        [IntrNoMem, IntrSpeculatable, Commutative]>;
766  def int_nvvm_mul_rp_f : ClangBuiltin<"__nvvm_mul_rp_f">,
767      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
768        [IntrNoMem, IntrSpeculatable, Commutative]>;
769
770  def int_nvvm_mul_rn_d : ClangBuiltin<"__nvvm_mul_rn_d">,
771      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
772        [IntrNoMem, IntrSpeculatable, Commutative]>;
773  def int_nvvm_mul_rz_d : ClangBuiltin<"__nvvm_mul_rz_d">,
774      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
775        [IntrNoMem, IntrSpeculatable, Commutative]>;
776  def int_nvvm_mul_rm_d : ClangBuiltin<"__nvvm_mul_rm_d">,
777      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
778        [IntrNoMem, IntrSpeculatable, Commutative]>;
779  def int_nvvm_mul_rp_d : ClangBuiltin<"__nvvm_mul_rp_d">,
780      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
781        [IntrNoMem, IntrSpeculatable, Commutative]>;
782
783  def int_nvvm_mul24_i : ClangBuiltin<"__nvvm_mul24_i">,
784      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
785        [IntrNoMem, IntrSpeculatable, Commutative]>;
786  def int_nvvm_mul24_ui : ClangBuiltin<"__nvvm_mul24_ui">,
787      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
788        [IntrNoMem, IntrSpeculatable, Commutative]>;
789
790//
791// Div
792//
793
794  def int_nvvm_div_approx_ftz_f : ClangBuiltin<"__nvvm_div_approx_ftz_f">,
795      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
796        [IntrNoMem]>;
797  def int_nvvm_div_approx_f : ClangBuiltin<"__nvvm_div_approx_f">,
798      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
799        [IntrNoMem]>;
800
801  def int_nvvm_div_rn_ftz_f : ClangBuiltin<"__nvvm_div_rn_ftz_f">,
802      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
803        [IntrNoMem]>;
804  def int_nvvm_div_rn_f : ClangBuiltin<"__nvvm_div_rn_f">,
805      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
806        [IntrNoMem]>;
807
808  def int_nvvm_div_rz_ftz_f : ClangBuiltin<"__nvvm_div_rz_ftz_f">,
809      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
810        [IntrNoMem]>;
811  def int_nvvm_div_rz_f : ClangBuiltin<"__nvvm_div_rz_f">,
812      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
813        [IntrNoMem]>;
814
815  def int_nvvm_div_rm_ftz_f : ClangBuiltin<"__nvvm_div_rm_ftz_f">,
816      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
817        [IntrNoMem]>;
818  def int_nvvm_div_rm_f : ClangBuiltin<"__nvvm_div_rm_f">,
819      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
820        [IntrNoMem]>;
821
822  def int_nvvm_div_rp_ftz_f : ClangBuiltin<"__nvvm_div_rp_ftz_f">,
823      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
824        [IntrNoMem]>;
825  def int_nvvm_div_rp_f : ClangBuiltin<"__nvvm_div_rp_f">,
826      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
827        [IntrNoMem]>;
828
829  def int_nvvm_div_rn_d : ClangBuiltin<"__nvvm_div_rn_d">,
830      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
831        [IntrNoMem]>;
832  def int_nvvm_div_rz_d : ClangBuiltin<"__nvvm_div_rz_d">,
833      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
834        [IntrNoMem]>;
835  def int_nvvm_div_rm_d : ClangBuiltin<"__nvvm_div_rm_d">,
836      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
837        [IntrNoMem]>;
838  def int_nvvm_div_rp_d : ClangBuiltin<"__nvvm_div_rp_d">,
839      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
840        [IntrNoMem]>;
841
842  def int_nvvm_div_full : ClangBuiltin<"__nvvm_div_full">,
843      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
844        [IntrNoMem]>;
845  def int_nvvm_div_full_ftz : ClangBuiltin<"__nvvm_div_full_ftz">,
846      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
847        [IntrNoMem]>;
848
849//
850// Sad
851//
852
853  def int_nvvm_sad_s : ClangBuiltin<"__nvvm_sad_s">,
854      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
855        [IntrNoMem, Commutative, IntrSpeculatable]>;
856  def int_nvvm_sad_us : ClangBuiltin<"__nvvm_sad_us">,
857      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
858        [IntrNoMem, Commutative, IntrSpeculatable]>;
859
860  def int_nvvm_sad_i : ClangBuiltin<"__nvvm_sad_i">,
861      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
862        [IntrNoMem, Commutative, IntrSpeculatable]>;
863  def int_nvvm_sad_ui : ClangBuiltin<"__nvvm_sad_ui">,
864      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
865        [IntrNoMem, Commutative, IntrSpeculatable]>;
866
867  def int_nvvm_sad_ll : ClangBuiltin<"__nvvm_sad_ll">,
868      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
869        [IntrNoMem, Commutative, IntrSpeculatable]>;
870  def int_nvvm_sad_ull : ClangBuiltin<"__nvvm_sad_ull">,
871      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
872        [IntrNoMem, Commutative, IntrSpeculatable]>;
873
874
875//
876// Floor  Ceil
877//
878
879  def int_nvvm_floor_ftz_f : ClangBuiltin<"__nvvm_floor_ftz_f">,
880      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
881  def int_nvvm_floor_f : ClangBuiltin<"__nvvm_floor_f">,
882      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
883  def int_nvvm_floor_d : ClangBuiltin<"__nvvm_floor_d">,
884      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
885
886  def int_nvvm_ceil_ftz_f : ClangBuiltin<"__nvvm_ceil_ftz_f">,
887      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
888  def int_nvvm_ceil_f : ClangBuiltin<"__nvvm_ceil_f">,
889      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
890  def int_nvvm_ceil_d : ClangBuiltin<"__nvvm_ceil_d">,
891      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
892
893//
894// Abs
895//
896
897  def int_nvvm_fabs_ftz_f : ClangBuiltin<"__nvvm_fabs_ftz_f">,
898      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
899  def int_nvvm_fabs_f : ClangBuiltin<"__nvvm_fabs_f">,
900      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
901  def int_nvvm_fabs_d : ClangBuiltin<"__nvvm_fabs_d">,
902      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
903
904//
905// Abs, Neg bf16, bf16x2
906//
907
908  foreach unary = ["abs", "neg"] in {
909    def int_nvvm_ # unary # _bf16 :
910      ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16")>,
911      DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>;
912    def int_nvvm_ # unary # _bf16x2 :
913      ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16x2")>,
914      DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty], [IntrNoMem]>;
915  }
916
917//
918// Round
919//
920
921  def int_nvvm_round_ftz_f : ClangBuiltin<"__nvvm_round_ftz_f">,
922      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
923  def int_nvvm_round_f : ClangBuiltin<"__nvvm_round_f">,
924      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
925
926  def int_nvvm_round_d : ClangBuiltin<"__nvvm_round_d">,
927      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
928
929//
930// Trunc
931//
932
933  def int_nvvm_trunc_ftz_f : ClangBuiltin<"__nvvm_trunc_ftz_f">,
934      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
935  def int_nvvm_trunc_f : ClangBuiltin<"__nvvm_trunc_f">,
936      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
937
938  def int_nvvm_trunc_d : ClangBuiltin<"__nvvm_trunc_d">,
939      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
940
941//
942// Saturate
943//
944
945  def int_nvvm_saturate_ftz_f : ClangBuiltin<"__nvvm_saturate_ftz_f">,
946      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
947  def int_nvvm_saturate_f : ClangBuiltin<"__nvvm_saturate_f">,
948      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
949
950  def int_nvvm_saturate_d : ClangBuiltin<"__nvvm_saturate_d">,
951      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
952
953//
954// Exp2  Log2
955//
956
957  def int_nvvm_ex2_approx_ftz_f : ClangBuiltin<"__nvvm_ex2_approx_ftz_f">,
958      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
959  def int_nvvm_ex2_approx_f : ClangBuiltin<"__nvvm_ex2_approx_f">,
960      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
961  def int_nvvm_ex2_approx_d : ClangBuiltin<"__nvvm_ex2_approx_d">,
962      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
963  def int_nvvm_ex2_approx_f16 :
964      DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
965  def int_nvvm_ex2_approx_f16x2 :
966      DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
967
968  def int_nvvm_lg2_approx_ftz_f : ClangBuiltin<"__nvvm_lg2_approx_ftz_f">,
969      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
970  def int_nvvm_lg2_approx_f : ClangBuiltin<"__nvvm_lg2_approx_f">,
971      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
972  def int_nvvm_lg2_approx_d : ClangBuiltin<"__nvvm_lg2_approx_d">,
973      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
974
975//
976// Sin  Cos
977//
978
979  def int_nvvm_sin_approx_ftz_f : ClangBuiltin<"__nvvm_sin_approx_ftz_f">,
980      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
981  def int_nvvm_sin_approx_f : ClangBuiltin<"__nvvm_sin_approx_f">,
982      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
983
984  def int_nvvm_cos_approx_ftz_f : ClangBuiltin<"__nvvm_cos_approx_ftz_f">,
985      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
986  def int_nvvm_cos_approx_f : ClangBuiltin<"__nvvm_cos_approx_f">,
987      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
988
989//
990// Fma
991//
992
993  foreach variant = ["_rn_f16", "_rn_ftz_f16", "_rn_sat_f16",
994    "_rn_ftz_sat_f16", "_rn_relu_f16", "_rn_ftz_relu_f16"] in {
995    def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_half_ty],
996      [llvm_half_ty, llvm_half_ty, llvm_half_ty],
997      [IntrNoMem, IntrSpeculatable]>;
998  }
999
1000  foreach variant = ["_rn_f16x2", "_rn_ftz_f16x2", "_rn_sat_f16x2",
1001    "_rn_ftz_sat_f16x2", "_rn_relu_f16x2", "_rn_ftz_relu_f16x2"] in {
1002    def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_v2f16_ty],
1003      [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
1004      [IntrNoMem, IntrSpeculatable]>;
1005  }
1006
1007  foreach variant = ["_rn_bf16", "_rn_ftz_bf16", "_rn_sat_bf16",
1008    "_rn_ftz_sat_bf16", "_rn_relu_bf16", "_rn_ftz_relu_bf16"] in {
1009    def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1010      DefaultAttrsIntrinsic<[llvm_bfloat_ty],
1011        [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty],
1012        [IntrNoMem, IntrSpeculatable]>;
1013  }
1014
1015  foreach variant = ["_rn_bf16x2", "_rn_ftz_bf16x2", "_rn_sat_bf16x2",
1016    "_rn_ftz_sat_bf16x2", "_rn_relu_bf16x2", "_rn_ftz_relu_bf16x2"] in {
1017    def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1018      DefaultAttrsIntrinsic<[llvm_v2bf16_ty],
1019        [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty],
1020        [IntrNoMem, IntrSpeculatable]>;
1021  }
1022
1023  foreach variant = ["_rn_ftz_f", "_rn_f", "_rz_ftz_f", "_rz_f", "_rm_ftz_f",
1024    "_rm_f", "_rp_ftz_f", "_rp_f"] in {
1025    def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1026      DefaultAttrsIntrinsic<[llvm_float_ty],
1027        [llvm_float_ty, llvm_float_ty, llvm_float_ty],
1028        [IntrNoMem, IntrSpeculatable]>;
1029  }
1030
1031  foreach variant = ["_rn_d", "_rz_d", "_rm_d", "_rp_d"] in {
1032    def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1033      DefaultAttrsIntrinsic<[llvm_double_ty],
1034        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
1035        [IntrNoMem, IntrSpeculatable]>;
1036  }
1037
1038//
1039// Rcp
1040//
1041
1042  def int_nvvm_rcp_rn_ftz_f : ClangBuiltin<"__nvvm_rcp_rn_ftz_f">,
1043      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1044  def int_nvvm_rcp_rn_f : ClangBuiltin<"__nvvm_rcp_rn_f">,
1045      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1046  def int_nvvm_rcp_rz_ftz_f : ClangBuiltin<"__nvvm_rcp_rz_ftz_f">,
1047      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1048  def int_nvvm_rcp_rz_f : ClangBuiltin<"__nvvm_rcp_rz_f">,
1049      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1050  def int_nvvm_rcp_rm_ftz_f : ClangBuiltin<"__nvvm_rcp_rm_ftz_f">,
1051      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1052  def int_nvvm_rcp_rm_f : ClangBuiltin<"__nvvm_rcp_rm_f">,
1053      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1054  def int_nvvm_rcp_rp_ftz_f : ClangBuiltin<"__nvvm_rcp_rp_ftz_f">,
1055      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1056  def int_nvvm_rcp_rp_f : ClangBuiltin<"__nvvm_rcp_rp_f">,
1057      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1058
1059  def int_nvvm_rcp_rn_d : ClangBuiltin<"__nvvm_rcp_rn_d">,
1060      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1061  def int_nvvm_rcp_rz_d : ClangBuiltin<"__nvvm_rcp_rz_d">,
1062      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1063  def int_nvvm_rcp_rm_d : ClangBuiltin<"__nvvm_rcp_rm_d">,
1064      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1065  def int_nvvm_rcp_rp_d : ClangBuiltin<"__nvvm_rcp_rp_d">,
1066      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1067
1068  def int_nvvm_rcp_approx_ftz_f : ClangBuiltin<"__nvvm_rcp_approx_ftz_f">,
1069      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1070  def int_nvvm_rcp_approx_ftz_d : ClangBuiltin<"__nvvm_rcp_approx_ftz_d">,
1071      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1072
1073//
1074// Sqrt
1075//
1076
1077  def int_nvvm_sqrt_f : ClangBuiltin<"__nvvm_sqrt_f">,
1078      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1079  def int_nvvm_sqrt_rn_ftz_f : ClangBuiltin<"__nvvm_sqrt_rn_ftz_f">,
1080      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1081  def int_nvvm_sqrt_rn_f : ClangBuiltin<"__nvvm_sqrt_rn_f">,
1082      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1083  def int_nvvm_sqrt_rz_ftz_f : ClangBuiltin<"__nvvm_sqrt_rz_ftz_f">,
1084      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1085  def int_nvvm_sqrt_rz_f : ClangBuiltin<"__nvvm_sqrt_rz_f">,
1086      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1087  def int_nvvm_sqrt_rm_ftz_f : ClangBuiltin<"__nvvm_sqrt_rm_ftz_f">,
1088      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1089  def int_nvvm_sqrt_rm_f : ClangBuiltin<"__nvvm_sqrt_rm_f">,
1090      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1091  def int_nvvm_sqrt_rp_ftz_f : ClangBuiltin<"__nvvm_sqrt_rp_ftz_f">,
1092      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1093  def int_nvvm_sqrt_rp_f : ClangBuiltin<"__nvvm_sqrt_rp_f">,
1094      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1095  def int_nvvm_sqrt_approx_ftz_f : ClangBuiltin<"__nvvm_sqrt_approx_ftz_f">,
1096      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1097  def int_nvvm_sqrt_approx_f : ClangBuiltin<"__nvvm_sqrt_approx_f">,
1098      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1099
1100  def int_nvvm_sqrt_rn_d : ClangBuiltin<"__nvvm_sqrt_rn_d">,
1101      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1102  def int_nvvm_sqrt_rz_d : ClangBuiltin<"__nvvm_sqrt_rz_d">,
1103      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1104  def int_nvvm_sqrt_rm_d : ClangBuiltin<"__nvvm_sqrt_rm_d">,
1105      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1106  def int_nvvm_sqrt_rp_d : ClangBuiltin<"__nvvm_sqrt_rp_d">,
1107      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1108
1109//
1110// Rsqrt
1111//
1112
1113  def int_nvvm_rsqrt_approx_ftz_f : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
1114      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1115  def int_nvvm_rsqrt_approx_ftz_d : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_d">,
1116      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1117  def int_nvvm_rsqrt_approx_f : ClangBuiltin<"__nvvm_rsqrt_approx_f">,
1118      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1119  def int_nvvm_rsqrt_approx_d : ClangBuiltin<"__nvvm_rsqrt_approx_d">,
1120      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1121
1122//
1123// Add
1124//
1125
1126  def int_nvvm_add_rn_ftz_f : ClangBuiltin<"__nvvm_add_rn_ftz_f">,
1127      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1128        [IntrNoMem, IntrSpeculatable, Commutative]>;
1129  def int_nvvm_add_rn_f : ClangBuiltin<"__nvvm_add_rn_f">,
1130      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1131        [IntrNoMem, IntrSpeculatable, Commutative]>;
1132  def int_nvvm_add_rz_ftz_f : ClangBuiltin<"__nvvm_add_rz_ftz_f">,
1133      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1134        [IntrNoMem, IntrSpeculatable, Commutative]>;
1135  def int_nvvm_add_rz_f : ClangBuiltin<"__nvvm_add_rz_f">,
1136      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1137        [IntrNoMem, IntrSpeculatable, Commutative]>;
1138  def int_nvvm_add_rm_ftz_f : ClangBuiltin<"__nvvm_add_rm_ftz_f">,
1139      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1140        [IntrNoMem, IntrSpeculatable, Commutative]>;
1141  def int_nvvm_add_rm_f : ClangBuiltin<"__nvvm_add_rm_f">,
1142      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1143        [IntrNoMem, IntrSpeculatable, Commutative]>;
1144  def int_nvvm_add_rp_ftz_f : ClangBuiltin<"__nvvm_add_rp_ftz_f">,
1145      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1146        [IntrNoMem, IntrSpeculatable, Commutative]>;
1147  def int_nvvm_add_rp_f : ClangBuiltin<"__nvvm_add_rp_f">,
1148      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1149        [IntrNoMem, IntrSpeculatable, Commutative]>;
1150
1151  def int_nvvm_add_rn_d : ClangBuiltin<"__nvvm_add_rn_d">,
1152      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1153        [IntrNoMem, IntrSpeculatable, Commutative]>;
1154  def int_nvvm_add_rz_d : ClangBuiltin<"__nvvm_add_rz_d">,
1155      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1156        [IntrNoMem, IntrSpeculatable, Commutative]>;
1157  def int_nvvm_add_rm_d : ClangBuiltin<"__nvvm_add_rm_d">,
1158      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1159        [IntrNoMem, IntrSpeculatable, Commutative]>;
1160  def int_nvvm_add_rp_d : ClangBuiltin<"__nvvm_add_rp_d">,
1161      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1162        [IntrNoMem, IntrSpeculatable, Commutative]>;
1163
1164//
1165// Dot Product
1166//
1167  foreach a_type = ["s", "u"] in {
1168    foreach b_type = ["s", "u"] in {
1169      def int_nvvm_idp4a_ # a_type # _ # b_type :
1170          DefaultAttrsIntrinsic<[llvm_i32_ty],
1171              [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1172              [IntrNoMem, IntrSpeculatable]>;
1173      def int_nvvm_idp2a_ # a_type # _ # b_type :
1174          DefaultAttrsIntrinsic<[llvm_i32_ty],
1175            [llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
1176            [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]>;
1177    }
1178  }
1179
1180//
1181// Funnel-shift
1182//
1183  foreach direction = ["l", "r"] in
1184    def int_nvvm_fsh # direction # _clamp :
1185      DefaultAttrsIntrinsic<[llvm_anyint_ty],
1186        [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
1187        [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
1188
1189//
1190// FLO - Find Leading One
1191//
1192  foreach sign = ["s", "u"] in
1193    def int_nvvm_flo_ # sign :
1194      DefaultAttrsIntrinsic<[llvm_i32_ty],
1195        [llvm_anyint_ty, llvm_i1_ty],
1196        [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
1197
1198//
1199// Convert
1200//
1201
1202  def int_nvvm_d2f_rn_ftz : ClangBuiltin<"__nvvm_d2f_rn_ftz">,
1203      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1204  def int_nvvm_d2f_rn : ClangBuiltin<"__nvvm_d2f_rn">,
1205      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1206  def int_nvvm_d2f_rz_ftz : ClangBuiltin<"__nvvm_d2f_rz_ftz">,
1207      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1208  def int_nvvm_d2f_rz : ClangBuiltin<"__nvvm_d2f_rz">,
1209      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1210  def int_nvvm_d2f_rm_ftz : ClangBuiltin<"__nvvm_d2f_rm_ftz">,
1211      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1212  def int_nvvm_d2f_rm : ClangBuiltin<"__nvvm_d2f_rm">,
1213      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1214  def int_nvvm_d2f_rp_ftz : ClangBuiltin<"__nvvm_d2f_rp_ftz">,
1215      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1216  def int_nvvm_d2f_rp : ClangBuiltin<"__nvvm_d2f_rp">,
1217      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1218
1219  def int_nvvm_d2i_rn : ClangBuiltin<"__nvvm_d2i_rn">,
1220      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1221  def int_nvvm_d2i_rz : ClangBuiltin<"__nvvm_d2i_rz">,
1222      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1223  def int_nvvm_d2i_rm : ClangBuiltin<"__nvvm_d2i_rm">,
1224      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1225  def int_nvvm_d2i_rp : ClangBuiltin<"__nvvm_d2i_rp">,
1226      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1227
1228  def int_nvvm_d2ui_rn : ClangBuiltin<"__nvvm_d2ui_rn">,
1229      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1230  def int_nvvm_d2ui_rz : ClangBuiltin<"__nvvm_d2ui_rz">,
1231      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1232  def int_nvvm_d2ui_rm : ClangBuiltin<"__nvvm_d2ui_rm">,
1233      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1234  def int_nvvm_d2ui_rp : ClangBuiltin<"__nvvm_d2ui_rp">,
1235      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1236
1237  def int_nvvm_i2d_rn : ClangBuiltin<"__nvvm_i2d_rn">,
1238      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1239  def int_nvvm_i2d_rz : ClangBuiltin<"__nvvm_i2d_rz">,
1240      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1241  def int_nvvm_i2d_rm : ClangBuiltin<"__nvvm_i2d_rm">,
1242      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1243  def int_nvvm_i2d_rp : ClangBuiltin<"__nvvm_i2d_rp">,
1244      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1245
1246  def int_nvvm_ui2d_rn : ClangBuiltin<"__nvvm_ui2d_rn">,
1247      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1248  def int_nvvm_ui2d_rz : ClangBuiltin<"__nvvm_ui2d_rz">,
1249      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1250  def int_nvvm_ui2d_rm : ClangBuiltin<"__nvvm_ui2d_rm">,
1251      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1252  def int_nvvm_ui2d_rp : ClangBuiltin<"__nvvm_ui2d_rp">,
1253      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1254
1255  def int_nvvm_f2i_rn_ftz : ClangBuiltin<"__nvvm_f2i_rn_ftz">,
1256      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1257  def int_nvvm_f2i_rn : ClangBuiltin<"__nvvm_f2i_rn">,
1258      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1259  def int_nvvm_f2i_rz_ftz : ClangBuiltin<"__nvvm_f2i_rz_ftz">,
1260      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1261  def int_nvvm_f2i_rz : ClangBuiltin<"__nvvm_f2i_rz">,
1262      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1263  def int_nvvm_f2i_rm_ftz : ClangBuiltin<"__nvvm_f2i_rm_ftz">,
1264      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1265  def int_nvvm_f2i_rm : ClangBuiltin<"__nvvm_f2i_rm">,
1266      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1267  def int_nvvm_f2i_rp_ftz : ClangBuiltin<"__nvvm_f2i_rp_ftz">,
1268      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1269  def int_nvvm_f2i_rp : ClangBuiltin<"__nvvm_f2i_rp">,
1270      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1271
1272  def int_nvvm_f2ui_rn_ftz : ClangBuiltin<"__nvvm_f2ui_rn_ftz">,
1273      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1274  def int_nvvm_f2ui_rn : ClangBuiltin<"__nvvm_f2ui_rn">,
1275      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1276  def int_nvvm_f2ui_rz_ftz : ClangBuiltin<"__nvvm_f2ui_rz_ftz">,
1277      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1278  def int_nvvm_f2ui_rz : ClangBuiltin<"__nvvm_f2ui_rz">,
1279      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1280  def int_nvvm_f2ui_rm_ftz : ClangBuiltin<"__nvvm_f2ui_rm_ftz">,
1281      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1282  def int_nvvm_f2ui_rm : ClangBuiltin<"__nvvm_f2ui_rm">,
1283      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1284  def int_nvvm_f2ui_rp_ftz : ClangBuiltin<"__nvvm_f2ui_rp_ftz">,
1285      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1286  def int_nvvm_f2ui_rp : ClangBuiltin<"__nvvm_f2ui_rp">,
1287      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1288
1289  def int_nvvm_i2f_rn : ClangBuiltin<"__nvvm_i2f_rn">,
1290      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1291  def int_nvvm_i2f_rz : ClangBuiltin<"__nvvm_i2f_rz">,
1292      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1293  def int_nvvm_i2f_rm : ClangBuiltin<"__nvvm_i2f_rm">,
1294      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1295  def int_nvvm_i2f_rp : ClangBuiltin<"__nvvm_i2f_rp">,
1296      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1297
1298  def int_nvvm_ui2f_rn : ClangBuiltin<"__nvvm_ui2f_rn">,
1299      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1300  def int_nvvm_ui2f_rz : ClangBuiltin<"__nvvm_ui2f_rz">,
1301      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1302  def int_nvvm_ui2f_rm : ClangBuiltin<"__nvvm_ui2f_rm">,
1303      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1304  def int_nvvm_ui2f_rp : ClangBuiltin<"__nvvm_ui2f_rp">,
1305      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1306
1307  def int_nvvm_lohi_i2d : ClangBuiltin<"__nvvm_lohi_i2d">,
1308      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
1309        [IntrNoMem, IntrSpeculatable, Commutative]>;
1310
1311  def int_nvvm_d2i_lo : ClangBuiltin<"__nvvm_d2i_lo">,
1312      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1313  def int_nvvm_d2i_hi : ClangBuiltin<"__nvvm_d2i_hi">,
1314      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1315
1316  def int_nvvm_f2ll_rn_ftz : ClangBuiltin<"__nvvm_f2ll_rn_ftz">,
1317      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1318  def int_nvvm_f2ll_rn : ClangBuiltin<"__nvvm_f2ll_rn">,
1319      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1320  def int_nvvm_f2ll_rz_ftz : ClangBuiltin<"__nvvm_f2ll_rz_ftz">,
1321      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1322  def int_nvvm_f2ll_rz : ClangBuiltin<"__nvvm_f2ll_rz">,
1323      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1324  def int_nvvm_f2ll_rm_ftz : ClangBuiltin<"__nvvm_f2ll_rm_ftz">,
1325      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1326  def int_nvvm_f2ll_rm : ClangBuiltin<"__nvvm_f2ll_rm">,
1327      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1328  def int_nvvm_f2ll_rp_ftz : ClangBuiltin<"__nvvm_f2ll_rp_ftz">,
1329      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1330  def int_nvvm_f2ll_rp : ClangBuiltin<"__nvvm_f2ll_rp">,
1331      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1332
1333  def int_nvvm_f2ull_rn_ftz : ClangBuiltin<"__nvvm_f2ull_rn_ftz">,
1334      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1335  def int_nvvm_f2ull_rn : ClangBuiltin<"__nvvm_f2ull_rn">,
1336      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1337  def int_nvvm_f2ull_rz_ftz : ClangBuiltin<"__nvvm_f2ull_rz_ftz">,
1338      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1339  def int_nvvm_f2ull_rz : ClangBuiltin<"__nvvm_f2ull_rz">,
1340      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1341  def int_nvvm_f2ull_rm_ftz : ClangBuiltin<"__nvvm_f2ull_rm_ftz">,
1342      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1343  def int_nvvm_f2ull_rm : ClangBuiltin<"__nvvm_f2ull_rm">,
1344      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1345  def int_nvvm_f2ull_rp_ftz : ClangBuiltin<"__nvvm_f2ull_rp_ftz">,
1346      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1347  def int_nvvm_f2ull_rp : ClangBuiltin<"__nvvm_f2ull_rp">,
1348      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1349
1350  def int_nvvm_d2ll_rn : ClangBuiltin<"__nvvm_d2ll_rn">,
1351      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1352  def int_nvvm_d2ll_rz : ClangBuiltin<"__nvvm_d2ll_rz">,
1353      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1354  def int_nvvm_d2ll_rm : ClangBuiltin<"__nvvm_d2ll_rm">,
1355      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1356  def int_nvvm_d2ll_rp : ClangBuiltin<"__nvvm_d2ll_rp">,
1357      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1358
1359  def int_nvvm_d2ull_rn : ClangBuiltin<"__nvvm_d2ull_rn">,
1360      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1361  def int_nvvm_d2ull_rz : ClangBuiltin<"__nvvm_d2ull_rz">,
1362      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1363  def int_nvvm_d2ull_rm : ClangBuiltin<"__nvvm_d2ull_rm">,
1364      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1365  def int_nvvm_d2ull_rp : ClangBuiltin<"__nvvm_d2ull_rp">,
1366      DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1367
1368  def int_nvvm_ll2f_rn : ClangBuiltin<"__nvvm_ll2f_rn">,
1369      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1370  def int_nvvm_ll2f_rz : ClangBuiltin<"__nvvm_ll2f_rz">,
1371      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1372  def int_nvvm_ll2f_rm : ClangBuiltin<"__nvvm_ll2f_rm">,
1373      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1374  def int_nvvm_ll2f_rp : ClangBuiltin<"__nvvm_ll2f_rp">,
1375      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1376  def int_nvvm_ull2f_rn : ClangBuiltin<"__nvvm_ull2f_rn">,
1377      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1378  def int_nvvm_ull2f_rz : ClangBuiltin<"__nvvm_ull2f_rz">,
1379      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1380  def int_nvvm_ull2f_rm : ClangBuiltin<"__nvvm_ull2f_rm">,
1381      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1382  def int_nvvm_ull2f_rp : ClangBuiltin<"__nvvm_ull2f_rp">,
1383      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1384
1385  def int_nvvm_ll2d_rn : ClangBuiltin<"__nvvm_ll2d_rn">,
1386      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1387  def int_nvvm_ll2d_rz : ClangBuiltin<"__nvvm_ll2d_rz">,
1388      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1389  def int_nvvm_ll2d_rm : ClangBuiltin<"__nvvm_ll2d_rm">,
1390      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1391  def int_nvvm_ll2d_rp : ClangBuiltin<"__nvvm_ll2d_rp">,
1392      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1393  def int_nvvm_ull2d_rn : ClangBuiltin<"__nvvm_ull2d_rn">,
1394      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1395  def int_nvvm_ull2d_rz : ClangBuiltin<"__nvvm_ull2d_rz">,
1396      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1397  def int_nvvm_ull2d_rm : ClangBuiltin<"__nvvm_ull2d_rm">,
1398      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1399  def int_nvvm_ull2d_rp : ClangBuiltin<"__nvvm_ull2d_rp">,
1400      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1401
1402  def int_nvvm_f2h_rn_ftz : ClangBuiltin<"__nvvm_f2h_rn_ftz">,
1403      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1404  def int_nvvm_f2h_rn : ClangBuiltin<"__nvvm_f2h_rn">,
1405      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1406
1407  def int_nvvm_bf2h_rn_ftz : ClangBuiltin<"__nvvm_bf2h_rn_ftz">,
1408      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
1409  def int_nvvm_bf2h_rn : ClangBuiltin<"__nvvm_bf2h_rn">,
1410      DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
1411
1412  def int_nvvm_ff2bf16x2_rn : ClangBuiltin<"__nvvm_ff2bf16x2_rn">,
1413       Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1414  def int_nvvm_ff2bf16x2_rn_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rn_relu">,
1415      Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1416  def int_nvvm_ff2bf16x2_rz : ClangBuiltin<"__nvvm_ff2bf16x2_rz">,
1417      Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1418  def int_nvvm_ff2bf16x2_rz_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rz_relu">,
1419      Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
1420
1421  def int_nvvm_ff2f16x2_rn : ClangBuiltin<"__nvvm_ff2f16x2_rn">,
1422      Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1423  def int_nvvm_ff2f16x2_rn_relu : ClangBuiltin<"__nvvm_ff2f16x2_rn_relu">,
1424      Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1425  def int_nvvm_ff2f16x2_rz : ClangBuiltin<"__nvvm_ff2f16x2_rz">,
1426      Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1427  def int_nvvm_ff2f16x2_rz_relu : ClangBuiltin<"__nvvm_ff2f16x2_rz_relu">,
1428      Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1429
1430  def int_nvvm_f2bf16_rn : ClangBuiltin<"__nvvm_f2bf16_rn">,
1431      Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1432  def int_nvvm_f2bf16_rn_relu : ClangBuiltin<"__nvvm_f2bf16_rn_relu">,
1433      Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1434  def int_nvvm_f2bf16_rz : ClangBuiltin<"__nvvm_f2bf16_rz">,
1435      Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1436  def int_nvvm_f2bf16_rz_relu : ClangBuiltin<"__nvvm_f2bf16_rz_relu">,
1437       Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1438
1439  def int_nvvm_f2tf32_rna : ClangBuiltin<"__nvvm_f2tf32_rna">,
1440      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1441  def int_nvvm_f2tf32_rna_satfinite : ClangBuiltin<"__nvvm_f2tf32_rna_satfinite">,
1442      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1443  def int_nvvm_f2tf32_rn : ClangBuiltin<"__nvvm_f2tf32_rn">,
1444      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1445  def int_nvvm_f2tf32_rn_relu : ClangBuiltin<"__nvvm_f2tf32_rn_relu">,
1446      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1447  def int_nvvm_f2tf32_rn_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_satfinite">,
1448      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1449  def int_nvvm_f2tf32_rn_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_relu_satfinite">,
1450      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1451  def int_nvvm_f2tf32_rz : ClangBuiltin<"__nvvm_f2tf32_rz">,
1452      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1453  def int_nvvm_f2tf32_rz_relu : ClangBuiltin<"__nvvm_f2tf32_rz_relu">,
1454      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1455  def int_nvvm_f2tf32_rz_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_satfinite">,
1456      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1457  def int_nvvm_f2tf32_rz_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_relu_satfinite">,
1458      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1459
1460  def int_nvvm_ff_to_e4m3x2_rn : ClangBuiltin<"__nvvm_ff_to_e4m3x2_rn">,
1461      Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1462  def int_nvvm_ff_to_e4m3x2_rn_relu : ClangBuiltin<"__nvvm_ff_to_e4m3x2_rn_relu">,
1463      Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1464  def int_nvvm_ff_to_e5m2x2_rn : ClangBuiltin<"__nvvm_ff_to_e5m2x2_rn">,
1465      Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1466  def int_nvvm_ff_to_e5m2x2_rn_relu : ClangBuiltin<"__nvvm_ff_to_e5m2x2_rn_relu">,
1467      Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1468
1469  def int_nvvm_f16x2_to_e4m3x2_rn : ClangBuiltin<"__nvvm_f16x2_to_e4m3x2_rn">,
1470      Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1471  def int_nvvm_f16x2_to_e4m3x2_rn_relu : ClangBuiltin<"__nvvm_f16x2_to_e4m3x2_rn_relu">,
1472      Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1473  def int_nvvm_f16x2_to_e5m2x2_rn : ClangBuiltin<"__nvvm_f16x2_to_e5m2x2_rn">,
1474      Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1475  def int_nvvm_f16x2_to_e5m2x2_rn_relu : ClangBuiltin<"__nvvm_f16x2_to_e5m2x2_rn_relu">,
1476      Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1477
1478  def int_nvvm_e4m3x2_to_f16x2_rn : ClangBuiltin<"__nvvm_e4m3x2_to_f16x2_rn">,
1479      Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1480  def int_nvvm_e4m3x2_to_f16x2_rn_relu : ClangBuiltin<"__nvvm_e4m3x2_to_f16x2_rn_relu">,
1481      Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1482  def int_nvvm_e5m2x2_to_f16x2_rn : ClangBuiltin<"__nvvm_e5m2x2_to_f16x2_rn">,
1483      Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1484  def int_nvvm_e5m2x2_to_f16x2_rn_relu : ClangBuiltin<"__nvvm_e5m2x2_to_f16x2_rn_relu">,
1485      Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1486
1487// FNS
1488
1489  def int_nvvm_fns : ClangBuiltin<"__nvvm_fns">,
1490      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1491                [IntrNoMem]>;
1492
1493// Atomics not available as llvm intrinsics.
1494  def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
1495          [llvm_anyptr_ty, llvm_i32_ty],
1496                                      [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1497  def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
1498          [llvm_anyptr_ty, llvm_i32_ty],
1499                                      [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1500
1501  class SCOPED_ATOMIC2_impl<LLVMType elty>
1502        : Intrinsic<[elty],
1503          [llvm_anyptr_ty, LLVMMatchType<0>],
1504          [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1505  class SCOPED_ATOMIC3_impl<LLVMType elty>
1506        : Intrinsic<[elty],
1507          [llvm_anyptr_ty, LLVMMatchType<0>,
1508           LLVMMatchType<0>],
1509          [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1510
1511  multiclass PTXAtomicWithScope2<LLVMType elty> {
1512    def _cta : SCOPED_ATOMIC2_impl<elty>;
1513    def _sys : SCOPED_ATOMIC2_impl<elty>;
1514  }
1515  multiclass PTXAtomicWithScope3<LLVMType elty> {
1516    def _cta : SCOPED_ATOMIC3_impl<elty>;
1517    def _sys : SCOPED_ATOMIC3_impl<elty>;
1518  }
1519  multiclass PTXAtomicWithScope2_fi {
1520    defm _f: PTXAtomicWithScope2<llvm_anyfloat_ty>;
1521    defm _i: PTXAtomicWithScope2<llvm_anyint_ty>;
1522  }
1523  defm int_nvvm_atomic_add_gen   : PTXAtomicWithScope2_fi;
1524  defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1525  defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1526  defm int_nvvm_atomic_exch_gen_i: PTXAtomicWithScope2<llvm_anyint_ty>;
1527  defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1528  defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1529  defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1530  defm int_nvvm_atomic_or_gen_i  : PTXAtomicWithScope2<llvm_anyint_ty>;
1531  defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1532  defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3<llvm_anyint_ty>;
1533
1534// Bar.Sync
1535
1536  // The builtin for "bar.sync 0" is called __syncthreads.  Unlike most of the
1537  // intrinsics in this file, this one is a user-facing API.
1538  def int_nvvm_barrier0 : ClangBuiltin<"__syncthreads">,
1539      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1540  // Synchronize all threads in the CTA at barrier 'n'.
1541  def int_nvvm_barrier_n : ClangBuiltin<"__nvvm_bar_n">,
1542      Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1543  // Synchronize 'm', a multiple of warp size, (arg 2) threads in
1544  // the CTA at barrier 'n' (arg 1).
1545  def int_nvvm_barrier : ClangBuiltin<"__nvvm_bar">,
1546      Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1547  def int_nvvm_barrier0_popc : ClangBuiltin<"__nvvm_bar0_popc">,
1548      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1549  def int_nvvm_barrier0_and : ClangBuiltin<"__nvvm_bar0_and">,
1550      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1551  def int_nvvm_barrier0_or : ClangBuiltin<"__nvvm_bar0_or">,
1552      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1553
1554  def int_nvvm_bar_sync :
1555      Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1556      ClangBuiltin<"__nvvm_bar_sync">;
1557  def int_nvvm_bar_warp_sync :
1558      Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1559      ClangBuiltin<"__nvvm_bar_warp_sync">;
1560
1561  // barrier.sync id[, cnt]
1562  def int_nvvm_barrier_sync :
1563      Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1564      ClangBuiltin<"__nvvm_barrier_sync">;
1565  def int_nvvm_barrier_sync_cnt :
1566      Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1567      ClangBuiltin<"__nvvm_barrier_sync_cnt">;
1568
1569  // barrier.cluster.[wait, arrive, arrive.relaxed]
1570  def int_nvvm_barrier_cluster_arrive :
1571      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1572  def int_nvvm_barrier_cluster_arrive_relaxed :
1573      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1574  def int_nvvm_barrier_cluster_wait :
1575      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1576
1577  // 'aligned' versions of the above barrier.cluster.* intrinsics
1578  def int_nvvm_barrier_cluster_arrive_aligned :
1579      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1580  def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
1581      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1582  def int_nvvm_barrier_cluster_wait_aligned :
1583      Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1584
1585  // Membar
1586  def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
1587      Intrinsic<[], [], [IntrNoCallback]>;
1588  def int_nvvm_membar_gl : ClangBuiltin<"__nvvm_membar_gl">,
1589      Intrinsic<[], [], [IntrNoCallback]>;
1590  def int_nvvm_membar_sys : ClangBuiltin<"__nvvm_membar_sys">,
1591      Intrinsic<[], [], [IntrNoCallback]>;
1592  def int_nvvm_fence_sc_cluster:
1593      Intrinsic<[], [], [IntrNoCallback]>;
1594
1595// Proxy fence (uni-directional)
1596foreach scope = ["cta", "cluster", "gpu", "sys"] in {
1597
1598  def int_nvvm_fence_proxy_tensormap_generic_release_ # scope:
1599        Intrinsic<[], [], [IntrNoCallback],
1600        "llvm.nvvm.fence.proxy.tensormap_generic.release." # scope>;
1601
1602  def int_nvvm_fence_proxy_tensormap_generic_acquire_ # scope:
1603        Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
1604                  [IntrNoCallback, IntrArgMemOnly, ImmArg<ArgIndex<1>>],
1605                  "llvm.nvvm.fence.proxy.tensormap_generic.acquire." # scope>;
1606
1607}
1608
1609// Async Copy
1610def int_nvvm_cp_async_mbarrier_arrive :
1611    ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive">,
1612    Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1613def int_nvvm_cp_async_mbarrier_arrive_shared :
1614    ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">,
1615    Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1616def int_nvvm_cp_async_mbarrier_arrive_noinc :
1617    ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">,
1618    Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1619def int_nvvm_cp_async_mbarrier_arrive_noinc_shared :
1620    ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">,
1621    Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1622
1623multiclass CP_ASYNC_SHARED_GLOBAL<string n, string cc> {
1624  def NAME: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty],
1625        [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
1626        WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
1627        "llvm.nvvm.cp.async." # cc # ".shared.global." # n>;
1628  def _s: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty, llvm_i32_ty],
1629        [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
1630        WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
1631        "llvm.nvvm.cp.async." # cc # ".shared.global." # n # ".s">;
1632}
1633
1634defm int_nvvm_cp_async_ca_shared_global_4 : CP_ASYNC_SHARED_GLOBAL<"4", "ca">;
1635defm int_nvvm_cp_async_ca_shared_global_8 : CP_ASYNC_SHARED_GLOBAL<"8", "ca">;
1636defm int_nvvm_cp_async_ca_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "ca">;
1637defm int_nvvm_cp_async_cg_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "cg">;
1638
1639def int_nvvm_cp_async_commit_group :
1640    ClangBuiltin<"__nvvm_cp_async_commit_group">,
1641    Intrinsic<[],[],[]>;
1642
1643def int_nvvm_cp_async_wait_group :
1644    ClangBuiltin<"__nvvm_cp_async_wait_group">,
1645    Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1646
1647def int_nvvm_cp_async_wait_all :
1648    ClangBuiltin<"__nvvm_cp_async_wait_all">,
1649    Intrinsic<[],[],[]>;
1650
1651// cp.async.bulk variants of the commit/wait group
1652def int_nvvm_cp_async_bulk_commit_group :
1653    Intrinsic<[],[],[]>;
1654
1655def int_nvvm_cp_async_bulk_wait_group :
1656    Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1657
1658def int_nvvm_cp_async_bulk_wait_group_read :
1659    Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1660
1661// mbarrier
1662def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">,
1663    Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1664def int_nvvm_mbarrier_init_shared :
1665    ClangBuiltin<"__nvvm_mbarrier_init_shared">,
1666    Intrinsic<[],[llvm_shared_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1667
1668def int_nvvm_mbarrier_inval : ClangBuiltin<"__nvvm_mbarrier_inval">,
1669    Intrinsic<[],[llvm_ptr_ty],
1670    [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
1671    WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
1672def int_nvvm_mbarrier_inval_shared :
1673    ClangBuiltin<"__nvvm_mbarrier_inval_shared">,
1674    Intrinsic<[],[llvm_shared_ptr_ty],
1675    [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
1676    WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
1677
1678def int_nvvm_mbarrier_arrive : ClangBuiltin<"__nvvm_mbarrier_arrive">,
1679    Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1680def int_nvvm_mbarrier_arrive_shared :
1681    ClangBuiltin<"__nvvm_mbarrier_arrive_shared">,
1682    Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1683def int_nvvm_mbarrier_arrive_noComplete :
1684    ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete">,
1685    Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1686def int_nvvm_mbarrier_arrive_noComplete_shared :
1687    ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">,
1688    Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
1689    llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1690
1691def int_nvvm_mbarrier_arrive_drop :
1692    ClangBuiltin<"__nvvm_mbarrier_arrive_drop">,
1693    Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1694def int_nvvm_mbarrier_arrive_drop_shared :
1695    ClangBuiltin<"__nvvm_mbarrier_arrive_drop_shared">,
1696    Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1697def int_nvvm_mbarrier_arrive_drop_noComplete :
1698    ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">,
1699    Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1700def int_nvvm_mbarrier_arrive_drop_noComplete_shared :
1701    ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">,
1702    Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
1703    llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1704
1705def int_nvvm_mbarrier_test_wait :
1706    ClangBuiltin<"__nvvm_mbarrier_test_wait">,
1707    Intrinsic<[llvm_i1_ty],[llvm_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
1708def int_nvvm_mbarrier_test_wait_shared :
1709    ClangBuiltin<"__nvvm_mbarrier_test_wait_shared">,
1710    Intrinsic<[llvm_i1_ty],[llvm_shared_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
1711
1712def int_nvvm_mbarrier_pending_count :
1713    ClangBuiltin<"__nvvm_mbarrier_pending_count">,
1714    Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent, IntrNoCallback]>;
1715
1716// Generated within nvvm. Use for ldu on sm_20 or later.  Second arg is the
1717// pointer's alignment.
1718def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
1719  [llvm_anyptr_ty, llvm_i32_ty],
1720  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1721  "llvm.nvvm.ldu.global.i">;
1722def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
1723  [llvm_anyptr_ty, llvm_i32_ty],
1724  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1725  "llvm.nvvm.ldu.global.f">;
1726def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
1727  [llvm_anyptr_ty, llvm_i32_ty],
1728  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1729  "llvm.nvvm.ldu.global.p">;
1730
1731// Used in nvvm internally to help address space opt and ptx code generation
1732// This is for params that are passed to kernel functions by pointer by-val.
1733def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
1734                                     [llvm_anyptr_ty],
1735                                   [IntrNoMem, IntrSpeculatable, IntrNoCallback],
1736                                   "llvm.nvvm.ptr.gen.to.param">;
1737
1738// sm70+, PTX7.7+
1739def int_nvvm_ptr_param_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
1740                                     [llvm_anyptr_ty],
1741                                   [IntrNoMem, IntrSpeculatable, IntrNoCallback],
1742                                   "llvm.nvvm.ptr.param.to.gen">;
1743
1744// Move intrinsics, used in nvvm internally
1745
1746def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
1747  "llvm.nvvm.move.i16">;
1748def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
1749  "llvm.nvvm.move.i32">;
1750def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
1751  "llvm.nvvm.move.i64">;
1752def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
1753  [IntrNoMem], "llvm.nvvm.move.float">;
1754def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
1755  [IntrNoMem], "llvm.nvvm.move.double">;
1756def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
1757  [IntrNoMem, NoCapture<ArgIndex<0>>], "llvm.nvvm.move.ptr">;
1758
1759
1760// For getting the handle from a texture or surface variable
1761def int_nvvm_texsurf_handle
1762  : Intrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty],
1763              [IntrNoMem], "llvm.nvvm.texsurf.handle">;
1764def int_nvvm_texsurf_handle_internal
1765  : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
1766              [IntrNoMem], "llvm.nvvm.texsurf.handle.internal">;
1767
1768/// Error / Warn
1769def int_nvvm_compiler_error :
1770    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
1771def int_nvvm_compiler_warn :
1772    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
1773
1774def int_nvvm_reflect :
1775  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">,
1776  ClangBuiltin<"__nvvm_reflect">;
1777
1778// isspacep.{const, global, local, shared}
1779def int_nvvm_isspacep_const
1780  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1781              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1782              "llvm.nvvm.isspacep.const">,
1783    ClangBuiltin<"__nvvm_isspacep_const">;
1784def int_nvvm_isspacep_global
1785  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1786              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1787              "llvm.nvvm.isspacep.global">,
1788    ClangBuiltin<"__nvvm_isspacep_global">;
1789def int_nvvm_isspacep_local
1790  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1791              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1792              "llvm.nvvm.isspacep.local">,
1793    ClangBuiltin<"__nvvm_isspacep_local">;
1794def int_nvvm_isspacep_shared
1795  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1796              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1797              "llvm.nvvm.isspacep.shared">,
1798    ClangBuiltin<"__nvvm_isspacep_shared">;
1799def int_nvvm_isspacep_shared_cluster
1800  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1801              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1802              "llvm.nvvm.isspacep.shared.cluster">;
1803
1804// Environment register read
1805def int_nvvm_read_ptx_sreg_envreg0
1806  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1807              "llvm.nvvm.read.ptx.sreg.envreg0">,
1808    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
1809def int_nvvm_read_ptx_sreg_envreg1
1810  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1811              "llvm.nvvm.read.ptx.sreg.envreg1">,
1812    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
1813def int_nvvm_read_ptx_sreg_envreg2
1814  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1815              "llvm.nvvm.read.ptx.sreg.envreg2">,
1816    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
1817def int_nvvm_read_ptx_sreg_envreg3
1818  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1819              "llvm.nvvm.read.ptx.sreg.envreg3">,
1820    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
1821def int_nvvm_read_ptx_sreg_envreg4
1822  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1823              "llvm.nvvm.read.ptx.sreg.envreg4">,
1824    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
1825def int_nvvm_read_ptx_sreg_envreg5
1826  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1827              "llvm.nvvm.read.ptx.sreg.envreg5">,
1828    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
1829def int_nvvm_read_ptx_sreg_envreg6
1830  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1831              "llvm.nvvm.read.ptx.sreg.envreg6">,
1832    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
1833def int_nvvm_read_ptx_sreg_envreg7
1834  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1835              "llvm.nvvm.read.ptx.sreg.envreg7">,
1836    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
1837def int_nvvm_read_ptx_sreg_envreg8
1838  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1839              "llvm.nvvm.read.ptx.sreg.envreg8">,
1840    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
1841def int_nvvm_read_ptx_sreg_envreg9
1842  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1843              "llvm.nvvm.read.ptx.sreg.envreg9">,
1844    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
1845def int_nvvm_read_ptx_sreg_envreg10
1846  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1847              "llvm.nvvm.read.ptx.sreg.envreg10">,
1848    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
1849def int_nvvm_read_ptx_sreg_envreg11
1850  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1851              "llvm.nvvm.read.ptx.sreg.envreg11">,
1852    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
1853def int_nvvm_read_ptx_sreg_envreg12
1854  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1855              "llvm.nvvm.read.ptx.sreg.envreg12">,
1856    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
1857def int_nvvm_read_ptx_sreg_envreg13
1858  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1859              "llvm.nvvm.read.ptx.sreg.envreg13">,
1860    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
1861def int_nvvm_read_ptx_sreg_envreg14
1862  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1863              "llvm.nvvm.read.ptx.sreg.envreg14">,
1864    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
1865def int_nvvm_read_ptx_sreg_envreg15
1866  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1867              "llvm.nvvm.read.ptx.sreg.envreg15">,
1868    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
1869def int_nvvm_read_ptx_sreg_envreg16
1870  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1871              "llvm.nvvm.read.ptx.sreg.envreg16">,
1872    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
1873def int_nvvm_read_ptx_sreg_envreg17
1874  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1875              "llvm.nvvm.read.ptx.sreg.envreg17">,
1876    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
1877def int_nvvm_read_ptx_sreg_envreg18
1878  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1879              "llvm.nvvm.read.ptx.sreg.envreg18">,
1880    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
1881def int_nvvm_read_ptx_sreg_envreg19
1882  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1883              "llvm.nvvm.read.ptx.sreg.envreg19">,
1884    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
1885def int_nvvm_read_ptx_sreg_envreg20
1886  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1887              "llvm.nvvm.read.ptx.sreg.envreg20">,
1888    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
1889def int_nvvm_read_ptx_sreg_envreg21
1890  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1891              "llvm.nvvm.read.ptx.sreg.envreg21">,
1892    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
1893def int_nvvm_read_ptx_sreg_envreg22
1894  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1895              "llvm.nvvm.read.ptx.sreg.envreg22">,
1896    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
1897def int_nvvm_read_ptx_sreg_envreg23
1898  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1899              "llvm.nvvm.read.ptx.sreg.envreg23">,
1900    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
1901def int_nvvm_read_ptx_sreg_envreg24
1902  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1903              "llvm.nvvm.read.ptx.sreg.envreg24">,
1904    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
1905def int_nvvm_read_ptx_sreg_envreg25
1906  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1907              "llvm.nvvm.read.ptx.sreg.envreg25">,
1908    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
1909def int_nvvm_read_ptx_sreg_envreg26
1910  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1911              "llvm.nvvm.read.ptx.sreg.envreg26">,
1912    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
1913def int_nvvm_read_ptx_sreg_envreg27
1914  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1915              "llvm.nvvm.read.ptx.sreg.envreg27">,
1916    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
1917def int_nvvm_read_ptx_sreg_envreg28
1918  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1919              "llvm.nvvm.read.ptx.sreg.envreg28">,
1920    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
1921def int_nvvm_read_ptx_sreg_envreg29
1922  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1923              "llvm.nvvm.read.ptx.sreg.envreg29">,
1924    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
1925def int_nvvm_read_ptx_sreg_envreg30
1926  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1927              "llvm.nvvm.read.ptx.sreg.envreg30">,
1928    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
1929def int_nvvm_read_ptx_sreg_envreg31
1930  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1931              "llvm.nvvm.read.ptx.sreg.envreg31">,
1932    ClangBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
1933
1934
1935// Texture Fetch
1936// texmode_independent
1937def int_nvvm_tex_1d_v4f32_s32
1938  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1939              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1940              "llvm.nvvm.tex.1d.v4f32.s32">;
1941def int_nvvm_tex_1d_v4f32_f32
1942  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1943              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1944              "llvm.nvvm.tex.1d.v4f32.f32">;
1945def int_nvvm_tex_1d_level_v4f32_f32
1946  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1947              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1948              "llvm.nvvm.tex.1d.level.v4f32.f32">;
1949def int_nvvm_tex_1d_grad_v4f32_f32
1950  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1951              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1952               llvm_float_ty], [],
1953              "llvm.nvvm.tex.1d.grad.v4f32.f32">;
1954def int_nvvm_tex_1d_v4s32_s32
1955  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1956              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1957              "llvm.nvvm.tex.1d.v4s32.s32">;
1958def int_nvvm_tex_1d_v4s32_f32
1959  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1960              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1961              "llvm.nvvm.tex.1d.v4s32.f32">;
1962def int_nvvm_tex_1d_level_v4s32_f32
1963  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1964              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1965              "llvm.nvvm.tex.1d.level.v4s32.f32">;
1966def int_nvvm_tex_1d_grad_v4s32_f32
1967  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1968              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1969               llvm_float_ty], [],
1970              "llvm.nvvm.tex.1d.grad.v4s32.f32">;
1971def int_nvvm_tex_1d_v4u32_s32
1972  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1973              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1974              "llvm.nvvm.tex.1d.v4u32.s32">;
1975def int_nvvm_tex_1d_v4u32_f32
1976  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1977              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1978              "llvm.nvvm.tex.1d.v4u32.f32">;
1979def int_nvvm_tex_1d_level_v4u32_f32
1980  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1981              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1982              "llvm.nvvm.tex.1d.level.v4u32.f32">;
1983def int_nvvm_tex_1d_grad_v4u32_f32
1984  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1985              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1986               llvm_float_ty], [],
1987              "llvm.nvvm.tex.1d.grad.v4u32.f32">;
1988
1989def int_nvvm_tex_1d_array_v4f32_s32
1990  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1991              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
1992              "llvm.nvvm.tex.1d.array.v4f32.s32">;
1993def int_nvvm_tex_1d_array_v4f32_f32
1994  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1995              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
1996              "llvm.nvvm.tex.1d.array.v4f32.f32">;
1997def int_nvvm_tex_1d_array_level_v4f32_f32
1998  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1999              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2000               llvm_float_ty], [],
2001              "llvm.nvvm.tex.1d.array.level.v4f32.f32">;
2002def int_nvvm_tex_1d_array_grad_v4f32_f32
2003  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2004              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2005               llvm_float_ty, llvm_float_ty], [],
2006              "llvm.nvvm.tex.1d.array.grad.v4f32.f32">;
2007def int_nvvm_tex_1d_array_v4s32_s32
2008  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2009              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2010              "llvm.nvvm.tex.1d.array.v4s32.s32">;
2011def int_nvvm_tex_1d_array_v4s32_f32
2012  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2013              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2014              "llvm.nvvm.tex.1d.array.v4s32.f32">;
2015def int_nvvm_tex_1d_array_level_v4s32_f32
2016  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2017              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2018               llvm_float_ty], [],
2019              "llvm.nvvm.tex.1d.array.level.v4s32.f32">;
2020def int_nvvm_tex_1d_array_grad_v4s32_f32
2021  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2022              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2023               llvm_float_ty, llvm_float_ty], [],
2024              "llvm.nvvm.tex.1d.array.grad.v4s32.f32">;
2025def int_nvvm_tex_1d_array_v4u32_s32
2026  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2027              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2028              "llvm.nvvm.tex.1d.array.v4u32.s32">;
2029def int_nvvm_tex_1d_array_v4u32_f32
2030  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2031              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2032              "llvm.nvvm.tex.1d.array.v4u32.f32">;
2033def int_nvvm_tex_1d_array_level_v4u32_f32
2034  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2035              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2036               llvm_float_ty], [],
2037              "llvm.nvvm.tex.1d.array.level.v4u32.f32">;
2038def int_nvvm_tex_1d_array_grad_v4u32_f32
2039  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2040              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2041               llvm_float_ty, llvm_float_ty], [],
2042              "llvm.nvvm.tex.1d.array.grad.v4u32.f32">;
2043
2044def int_nvvm_tex_2d_v4f32_s32
2045  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2046              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2047              "llvm.nvvm.tex.2d.v4f32.s32">;
2048def int_nvvm_tex_2d_v4f32_f32
2049  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2050              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2051              "llvm.nvvm.tex.2d.v4f32.f32">;
2052def int_nvvm_tex_2d_level_v4f32_f32
2053  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2054              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2055               llvm_float_ty], [],
2056              "llvm.nvvm.tex.2d.level.v4f32.f32">;
2057def int_nvvm_tex_2d_grad_v4f32_f32
2058  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2059              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2060               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2061              "llvm.nvvm.tex.2d.grad.v4f32.f32">;
2062def int_nvvm_tex_2d_v4s32_s32
2063  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2064              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2065              "llvm.nvvm.tex.2d.v4s32.s32">;
2066def int_nvvm_tex_2d_v4s32_f32
2067  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2068              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2069              "llvm.nvvm.tex.2d.v4s32.f32">;
2070def int_nvvm_tex_2d_level_v4s32_f32
2071  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2072              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2073               llvm_float_ty], [],
2074              "llvm.nvvm.tex.2d.level.v4s32.f32">;
2075def int_nvvm_tex_2d_grad_v4s32_f32
2076  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2077              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2078               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2079              "llvm.nvvm.tex.2d.grad.v4s32.f32">;
2080def int_nvvm_tex_2d_v4u32_s32
2081  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2082              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2083              "llvm.nvvm.tex.2d.v4u32.s32">;
2084def int_nvvm_tex_2d_v4u32_f32
2085  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2086              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2087              "llvm.nvvm.tex.2d.v4u32.f32">;
2088def int_nvvm_tex_2d_level_v4u32_f32
2089  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2090              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2091               llvm_float_ty], [],
2092              "llvm.nvvm.tex.2d.level.v4u32.f32">;
2093def int_nvvm_tex_2d_grad_v4u32_f32
2094  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2095              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2096               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2097              "llvm.nvvm.tex.2d.grad.v4u32.f32">;
2098
2099def int_nvvm_tex_2d_array_v4f32_s32
2100  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2101              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2102               llvm_i32_ty], [],
2103              "llvm.nvvm.tex.2d.array.v4f32.s32">;
2104def int_nvvm_tex_2d_array_v4f32_f32
2105  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2106              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2107               llvm_float_ty], [],
2108              "llvm.nvvm.tex.2d.array.v4f32.f32">;
2109def int_nvvm_tex_2d_array_level_v4f32_f32
2110  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2111              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2112               llvm_float_ty, llvm_float_ty], [],
2113              "llvm.nvvm.tex.2d.array.level.v4f32.f32">;
2114def int_nvvm_tex_2d_array_grad_v4f32_f32
2115  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2116              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2117               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2118               llvm_float_ty], [],
2119              "llvm.nvvm.tex.2d.array.grad.v4f32.f32">;
2120def int_nvvm_tex_2d_array_v4s32_s32
2121  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2122              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2123               llvm_i32_ty], [],
2124              "llvm.nvvm.tex.2d.array.v4s32.s32">;
2125def int_nvvm_tex_2d_array_v4s32_f32
2126  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2127              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2128               llvm_float_ty], [],
2129              "llvm.nvvm.tex.2d.array.v4s32.f32">;
2130def int_nvvm_tex_2d_array_level_v4s32_f32
2131  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2132              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2133               llvm_float_ty, llvm_float_ty], [],
2134              "llvm.nvvm.tex.2d.array.level.v4s32.f32">;
2135def int_nvvm_tex_2d_array_grad_v4s32_f32
2136  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2137              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2138               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2139               llvm_float_ty], [],
2140              "llvm.nvvm.tex.2d.array.grad.v4s32.f32">;
2141def int_nvvm_tex_2d_array_v4u32_s32
2142  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2143              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2144               llvm_i32_ty], [],
2145              "llvm.nvvm.tex.2d.array.v4u32.s32">;
2146def int_nvvm_tex_2d_array_v4u32_f32
2147  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2148              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2149               llvm_float_ty], [],
2150              "llvm.nvvm.tex.2d.array.v4u32.f32">;
2151def int_nvvm_tex_2d_array_level_v4u32_f32
2152  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2153              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2154               llvm_float_ty, llvm_float_ty], [],
2155              "llvm.nvvm.tex.2d.array.level.v4u32.f32">;
2156def int_nvvm_tex_2d_array_grad_v4u32_f32
2157  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2158              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2159               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2160               llvm_float_ty], [],
2161              "llvm.nvvm.tex.2d.array.grad.v4u32.f32">;
2162
2163def int_nvvm_tex_3d_v4f32_s32
2164  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2165              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2166              [], "llvm.nvvm.tex.3d.v4f32.s32">;
2167def int_nvvm_tex_3d_v4f32_f32
2168  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2169              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2170               llvm_float_ty], [],
2171              "llvm.nvvm.tex.3d.v4f32.f32">;
2172def int_nvvm_tex_3d_level_v4f32_f32
2173  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2174              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2175               llvm_float_ty, llvm_float_ty], [],
2176              "llvm.nvvm.tex.3d.level.v4f32.f32">;
2177def int_nvvm_tex_3d_grad_v4f32_f32
2178  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2179              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2180               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2181               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2182              "llvm.nvvm.tex.3d.grad.v4f32.f32">;
2183def int_nvvm_tex_3d_v4s32_s32
2184  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2185              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2186              [], "llvm.nvvm.tex.3d.v4s32.s32">;
2187def int_nvvm_tex_3d_v4s32_f32
2188  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2189              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2190               llvm_float_ty], [],
2191              "llvm.nvvm.tex.3d.v4s32.f32">;
2192def int_nvvm_tex_3d_level_v4s32_f32
2193  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2194              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2195               llvm_float_ty, llvm_float_ty], [],
2196              "llvm.nvvm.tex.3d.level.v4s32.f32">;
2197def int_nvvm_tex_3d_grad_v4s32_f32
2198  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2199              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2200               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2201               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2202              "llvm.nvvm.tex.3d.grad.v4s32.f32">;
2203def int_nvvm_tex_3d_v4u32_s32
2204  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2205              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2206              [], "llvm.nvvm.tex.3d.v4u32.s32">;
2207def int_nvvm_tex_3d_v4u32_f32
2208  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2209              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2210               llvm_float_ty], [],
2211              "llvm.nvvm.tex.3d.v4u32.f32">;
2212def int_nvvm_tex_3d_level_v4u32_f32
2213  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2214              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2215               llvm_float_ty, llvm_float_ty], [],
2216              "llvm.nvvm.tex.3d.level.v4u32.f32">;
2217def int_nvvm_tex_3d_grad_v4u32_f32
2218  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2219              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2220               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2221               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2222              "llvm.nvvm.tex.3d.grad.v4u32.f32">;
2223
2224def int_nvvm_tex_cube_v4f32_f32
2225  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2226              [llvm_i64_ty, llvm_i64_ty,
2227               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2228              "llvm.nvvm.tex.cube.v4f32.f32">;
2229def int_nvvm_tex_cube_level_v4f32_f32
2230  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2231              [llvm_i64_ty, llvm_i64_ty,
2232               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2233              "llvm.nvvm.tex.cube.level.v4f32.f32">;
2234def int_nvvm_tex_cube_v4s32_f32
2235  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2236              [llvm_i64_ty, llvm_i64_ty,
2237               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2238              "llvm.nvvm.tex.cube.v4s32.f32">;
2239def int_nvvm_tex_cube_level_v4s32_f32
2240  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2241              [llvm_i64_ty, llvm_i64_ty,
2242               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2243              "llvm.nvvm.tex.cube.level.v4s32.f32">;
2244def int_nvvm_tex_cube_v4u32_f32
2245  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2246              [llvm_i64_ty, llvm_i64_ty,
2247               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2248              "llvm.nvvm.tex.cube.v4u32.f32">;
2249def int_nvvm_tex_cube_level_v4u32_f32
2250  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2251              [llvm_i64_ty, llvm_i64_ty,
2252               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2253              "llvm.nvvm.tex.cube.level.v4u32.f32">;
2254
2255def int_nvvm_tex_cube_array_v4f32_f32
2256  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2257              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2258               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2259              "llvm.nvvm.tex.cube.array.v4f32.f32">;
2260def int_nvvm_tex_cube_array_level_v4f32_f32
2261  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2262              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2263               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2264              "llvm.nvvm.tex.cube.array.level.v4f32.f32">;
2265def int_nvvm_tex_cube_array_v4s32_f32
2266  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2267              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2268               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2269              "llvm.nvvm.tex.cube.array.v4s32.f32">;
2270def int_nvvm_tex_cube_array_level_v4s32_f32
2271  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2272              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2273               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2274              "llvm.nvvm.tex.cube.array.level.v4s32.f32">;
2275def int_nvvm_tex_cube_array_v4u32_f32
2276  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2277              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2278               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2279              "llvm.nvvm.tex.cube.array.v4u32.f32">;
2280def int_nvvm_tex_cube_array_level_v4u32_f32
2281  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2282              [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2283               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2284              "llvm.nvvm.tex.cube.array.level.v4u32.f32">;
2285
2286def int_nvvm_tld4_r_2d_v4f32_f32
2287  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2288              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2289              "llvm.nvvm.tld4.r.2d.v4f32.f32">;
2290def int_nvvm_tld4_g_2d_v4f32_f32
2291  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2292              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2293              "llvm.nvvm.tld4.g.2d.v4f32.f32">;
2294def int_nvvm_tld4_b_2d_v4f32_f32
2295  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2296              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2297              "llvm.nvvm.tld4.b.2d.v4f32.f32">;
2298def int_nvvm_tld4_a_2d_v4f32_f32
2299  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2300              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2301              "llvm.nvvm.tld4.a.2d.v4f32.f32">;
2302def int_nvvm_tld4_r_2d_v4s32_f32
2303  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2304              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2305              "llvm.nvvm.tld4.r.2d.v4s32.f32">;
2306def int_nvvm_tld4_g_2d_v4s32_f32
2307  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2308              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2309              "llvm.nvvm.tld4.g.2d.v4s32.f32">;
2310def int_nvvm_tld4_b_2d_v4s32_f32
2311  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2312              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2313              "llvm.nvvm.tld4.b.2d.v4s32.f32">;
2314def int_nvvm_tld4_a_2d_v4s32_f32
2315  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2316              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2317              "llvm.nvvm.tld4.a.2d.v4s32.f32">;
2318def int_nvvm_tld4_r_2d_v4u32_f32
2319  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2320              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2321              "llvm.nvvm.tld4.r.2d.v4u32.f32">;
2322def int_nvvm_tld4_g_2d_v4u32_f32
2323  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2324              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2325              "llvm.nvvm.tld4.g.2d.v4u32.f32">;
2326def int_nvvm_tld4_b_2d_v4u32_f32
2327  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2328              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2329              "llvm.nvvm.tld4.b.2d.v4u32.f32">;
2330def int_nvvm_tld4_a_2d_v4u32_f32
2331  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2332              [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2333              "llvm.nvvm.tld4.a.2d.v4u32.f32">;
2334
2335
2336// texmode_unified
2337def int_nvvm_tex_unified_1d_v4f32_s32
2338  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2339              [llvm_i64_ty, llvm_i32_ty], [],
2340              "llvm.nvvm.tex.unified.1d.v4f32.s32">;
2341def int_nvvm_tex_unified_1d_v4f32_f32
2342  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2343              [llvm_i64_ty, llvm_float_ty], [],
2344              "llvm.nvvm.tex.unified.1d.v4f32.f32">;
2345def int_nvvm_tex_unified_1d_level_v4f32_f32
2346  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2347              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2348              "llvm.nvvm.tex.unified.1d.level.v4f32.f32">;
2349def int_nvvm_tex_unified_1d_grad_v4f32_f32
2350  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2351              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2352               llvm_float_ty], [],
2353              "llvm.nvvm.tex.unified.1d.grad.v4f32.f32">;
2354def int_nvvm_tex_unified_1d_v4s32_s32
2355  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2356              [llvm_i64_ty, llvm_i32_ty], [],
2357              "llvm.nvvm.tex.unified.1d.v4s32.s32">;
2358def int_nvvm_tex_unified_1d_v4s32_f32
2359  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2360              [llvm_i64_ty, llvm_float_ty], [],
2361              "llvm.nvvm.tex.unified.1d.v4s32.f32">;
2362def int_nvvm_tex_unified_1d_level_v4s32_f32
2363  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2364              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2365              "llvm.nvvm.tex.unified.1d.level.v4s32.f32">;
2366def int_nvvm_tex_unified_1d_grad_v4s32_f32
2367  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2368              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2369               llvm_float_ty], [],
2370              "llvm.nvvm.tex.unified.1d.grad.v4s32.f32">;
2371def int_nvvm_tex_unified_1d_v4u32_s32
2372  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2373              [llvm_i64_ty, llvm_i32_ty], [],
2374              "llvm.nvvm.tex.unified.1d.v4u32.s32">;
2375def int_nvvm_tex_unified_1d_v4u32_f32
2376  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2377              [llvm_i64_ty, llvm_float_ty], [],
2378              "llvm.nvvm.tex.unified.1d.v4u32.f32">;
2379def int_nvvm_tex_unified_1d_level_v4u32_f32
2380  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2381              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2382              "llvm.nvvm.tex.unified.1d.level.v4u32.f32">;
2383def int_nvvm_tex_unified_1d_grad_v4u32_f32
2384  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2385              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2386               llvm_float_ty], [],
2387              "llvm.nvvm.tex.unified.1d.grad.v4u32.f32">;
2388
2389def int_nvvm_tex_unified_1d_array_v4f32_s32
2390  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2391              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2392              "llvm.nvvm.tex.unified.1d.array.v4f32.s32">;
2393def int_nvvm_tex_unified_1d_array_v4f32_f32
2394  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2395              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2396              "llvm.nvvm.tex.unified.1d.array.v4f32.f32">;
2397def int_nvvm_tex_unified_1d_array_level_v4f32_f32
2398  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2399              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2400               llvm_float_ty], [],
2401              "llvm.nvvm.tex.unified.1d.array.level.v4f32.f32">;
2402def int_nvvm_tex_unified_1d_array_grad_v4f32_f32
2403  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2404              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2405               llvm_float_ty, llvm_float_ty], [],
2406              "llvm.nvvm.tex.unified.1d.array.grad.v4f32.f32">;
2407def int_nvvm_tex_unified_1d_array_v4s32_s32
2408  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2409              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2410              "llvm.nvvm.tex.unified.1d.array.v4s32.s32">;
2411def int_nvvm_tex_unified_1d_array_v4s32_f32
2412  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2413              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2414              "llvm.nvvm.tex.unified.1d.array.v4s32.f32">;
2415def int_nvvm_tex_unified_1d_array_level_v4s32_f32
2416  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2417              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2418               llvm_float_ty], [],
2419              "llvm.nvvm.tex.unified.1d.array.level.v4s32.f32">;
2420def int_nvvm_tex_unified_1d_array_grad_v4s32_f32
2421  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2422              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2423               llvm_float_ty, llvm_float_ty], [],
2424              "llvm.nvvm.tex.unified.1d.array.grad.v4s32.f32">;
2425def int_nvvm_tex_unified_1d_array_v4u32_s32
2426  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2427              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2428              "llvm.nvvm.tex.unified.1d.array.v4u32.s32">;
2429def int_nvvm_tex_unified_1d_array_v4u32_f32
2430  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2431              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2432              "llvm.nvvm.tex.unified.1d.array.v4u32.f32">;
2433def int_nvvm_tex_unified_1d_array_level_v4u32_f32
2434  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2435              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2436               llvm_float_ty], [],
2437              "llvm.nvvm.tex.unified.1d.array.level.v4u32.f32">;
2438def int_nvvm_tex_unified_1d_array_grad_v4u32_f32
2439  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2440              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2441               llvm_float_ty, llvm_float_ty], [],
2442              "llvm.nvvm.tex.unified.1d.array.grad.v4u32.f32">;
2443
2444def int_nvvm_tex_unified_2d_v4f32_s32
2445  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2446              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2447              "llvm.nvvm.tex.unified.2d.v4f32.s32">;
2448def int_nvvm_tex_unified_2d_v4f32_f32
2449  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2450              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2451              "llvm.nvvm.tex.unified.2d.v4f32.f32">;
2452def int_nvvm_tex_unified_2d_level_v4f32_f32
2453  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2454              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2455               llvm_float_ty], [],
2456              "llvm.nvvm.tex.unified.2d.level.v4f32.f32">;
2457def int_nvvm_tex_unified_2d_grad_v4f32_f32
2458  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2459              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2460               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2461              "llvm.nvvm.tex.unified.2d.grad.v4f32.f32">;
2462def int_nvvm_tex_unified_2d_v4s32_s32
2463  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2464              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2465              "llvm.nvvm.tex.unified.2d.v4s32.s32">;
2466def int_nvvm_tex_unified_2d_v4s32_f32
2467  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2468              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2469              "llvm.nvvm.tex.unified.2d.v4s32.f32">;
2470def int_nvvm_tex_unified_2d_level_v4s32_f32
2471  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2472              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2473               llvm_float_ty], [],
2474              "llvm.nvvm.tex.unified.2d.level.v4s32.f32">;
2475def int_nvvm_tex_unified_2d_grad_v4s32_f32
2476  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2477              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2478               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2479              "llvm.nvvm.tex.unified.2d.grad.v4s32.f32">;
2480def int_nvvm_tex_unified_2d_v4u32_s32
2481  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2482              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2483              "llvm.nvvm.tex.unified.2d.v4u32.s32">;
2484def int_nvvm_tex_unified_2d_v4u32_f32
2485  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2486              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2487              "llvm.nvvm.tex.unified.2d.v4u32.f32">;
2488def int_nvvm_tex_unified_2d_level_v4u32_f32
2489  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2490              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2491               llvm_float_ty], [],
2492              "llvm.nvvm.tex.unified.2d.level.v4u32.f32">;
2493def int_nvvm_tex_unified_2d_grad_v4u32_f32
2494  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2495              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2496               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2497              "llvm.nvvm.tex.unified.2d.grad.v4u32.f32">;
2498
2499def int_nvvm_tex_unified_2d_array_v4f32_s32
2500  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2501              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2502               llvm_i32_ty], [],
2503              "llvm.nvvm.tex.unified.2d.array.v4f32.s32">;
2504def int_nvvm_tex_unified_2d_array_v4f32_f32
2505  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2506              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2507               llvm_float_ty], [],
2508              "llvm.nvvm.tex.unified.2d.array.v4f32.f32">;
2509def int_nvvm_tex_unified_2d_array_level_v4f32_f32
2510  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2511              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2512               llvm_float_ty, llvm_float_ty], [],
2513              "llvm.nvvm.tex.unified.2d.array.level.v4f32.f32">;
2514def int_nvvm_tex_unified_2d_array_grad_v4f32_f32
2515  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2516              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2517               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2518               llvm_float_ty], [],
2519              "llvm.nvvm.tex.unified.2d.array.grad.v4f32.f32">;
2520def int_nvvm_tex_unified_2d_array_v4s32_s32
2521  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2522              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2523               llvm_i32_ty], [],
2524              "llvm.nvvm.tex.unified.2d.array.v4s32.s32">;
2525def int_nvvm_tex_unified_2d_array_v4s32_f32
2526  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2527              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2528               llvm_float_ty], [],
2529              "llvm.nvvm.tex.unified.2d.array.v4s32.f32">;
2530def int_nvvm_tex_unified_2d_array_level_v4s32_f32
2531  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2532              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2533               llvm_float_ty, llvm_float_ty], [],
2534              "llvm.nvvm.tex.unified.2d.array.level.v4s32.f32">;
2535def int_nvvm_tex_unified_2d_array_grad_v4s32_f32
2536  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2537              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2538               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2539               llvm_float_ty], [],
2540              "llvm.nvvm.tex.unified.2d.array.grad.v4s32.f32">;
2541def int_nvvm_tex_unified_2d_array_v4u32_s32
2542  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2543              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2544               llvm_i32_ty], [],
2545              "llvm.nvvm.tex.unified.2d.array.v4u32.s32">;
2546def int_nvvm_tex_unified_2d_array_v4u32_f32
2547  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2548              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2549               llvm_float_ty], [],
2550              "llvm.nvvm.tex.unified.2d.array.v4u32.f32">;
2551def int_nvvm_tex_unified_2d_array_level_v4u32_f32
2552  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2553              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2554               llvm_float_ty, llvm_float_ty], [],
2555              "llvm.nvvm.tex.unified.2d.array.level.v4u32.f32">;
2556def int_nvvm_tex_unified_2d_array_grad_v4u32_f32
2557  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2558              [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2559               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2560               llvm_float_ty], [],
2561              "llvm.nvvm.tex.unified.2d.array.grad.v4u32.f32">;
2562
2563def int_nvvm_tex_unified_3d_v4f32_s32
2564  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2565              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2566              [], "llvm.nvvm.tex.unified.3d.v4f32.s32">;
2567def int_nvvm_tex_unified_3d_v4f32_f32
2568  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2569              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2570               llvm_float_ty], [],
2571              "llvm.nvvm.tex.unified.3d.v4f32.f32">;
2572def int_nvvm_tex_unified_3d_level_v4f32_f32
2573  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2574              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2575               llvm_float_ty, llvm_float_ty], [],
2576              "llvm.nvvm.tex.unified.3d.level.v4f32.f32">;
2577def int_nvvm_tex_unified_3d_grad_v4f32_f32
2578  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2579              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2580               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2581               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2582              "llvm.nvvm.tex.unified.3d.grad.v4f32.f32">;
2583def int_nvvm_tex_unified_3d_v4s32_s32
2584  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2585              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2586              [], "llvm.nvvm.tex.unified.3d.v4s32.s32">;
2587def int_nvvm_tex_unified_3d_v4s32_f32
2588  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2589              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2590               llvm_float_ty], [],
2591              "llvm.nvvm.tex.unified.3d.v4s32.f32">;
2592def int_nvvm_tex_unified_3d_level_v4s32_f32
2593  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2594              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2595               llvm_float_ty, llvm_float_ty], [],
2596              "llvm.nvvm.tex.unified.3d.level.v4s32.f32">;
2597def int_nvvm_tex_unified_3d_grad_v4s32_f32
2598  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2599              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2600               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2601               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2602              "llvm.nvvm.tex.unified.3d.grad.v4s32.f32">;
2603def int_nvvm_tex_unified_3d_v4u32_s32
2604  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2605              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2606              [], "llvm.nvvm.tex.unified.3d.v4u32.s32">;
2607def int_nvvm_tex_unified_3d_v4u32_f32
2608  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2609              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2610               llvm_float_ty], [],
2611              "llvm.nvvm.tex.unified.3d.v4u32.f32">;
2612def int_nvvm_tex_unified_3d_level_v4u32_f32
2613  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2614              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2615               llvm_float_ty, llvm_float_ty], [],
2616              "llvm.nvvm.tex.unified.3d.level.v4u32.f32">;
2617def int_nvvm_tex_unified_3d_grad_v4u32_f32
2618  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2619              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2620               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2621               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2622              "llvm.nvvm.tex.unified.3d.grad.v4u32.f32">;
2623
2624def int_nvvm_tex_unified_cube_v4f32_f32
2625  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2626              [llvm_i64_ty,
2627               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2628              "llvm.nvvm.tex.unified.cube.v4f32.f32">;
2629def int_nvvm_tex_unified_cube_level_v4f32_f32
2630  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2631              [llvm_i64_ty,
2632               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2633              "llvm.nvvm.tex.unified.cube.level.v4f32.f32">;
2634def int_nvvm_tex_unified_cube_v4s32_f32
2635  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2636              [llvm_i64_ty,
2637               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2638              "llvm.nvvm.tex.unified.cube.v4s32.f32">;
2639def int_nvvm_tex_unified_cube_level_v4s32_f32
2640  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2641              [llvm_i64_ty,
2642               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2643              "llvm.nvvm.tex.unified.cube.level.v4s32.f32">;
2644def int_nvvm_tex_unified_cube_v4u32_f32
2645  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2646              [llvm_i64_ty,
2647               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2648              "llvm.nvvm.tex.unified.cube.v4u32.f32">;
2649def int_nvvm_tex_unified_cube_level_v4u32_f32
2650  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2651              [llvm_i64_ty,
2652               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2653              "llvm.nvvm.tex.unified.cube.level.v4u32.f32">;
2654
2655def int_nvvm_tex_unified_cube_array_v4f32_f32
2656  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2657              [llvm_i64_ty, llvm_i32_ty,
2658               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2659              "llvm.nvvm.tex.unified.cube.array.v4f32.f32">;
2660def int_nvvm_tex_unified_cube_array_level_v4f32_f32
2661  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2662              [llvm_i64_ty, llvm_i32_ty,
2663               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2664              "llvm.nvvm.tex.unified.cube.array.level.v4f32.f32">;
2665def int_nvvm_tex_unified_cube_array_v4s32_f32
2666  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2667              [llvm_i64_ty, llvm_i32_ty,
2668               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2669              "llvm.nvvm.tex.unified.cube.array.v4s32.f32">;
2670def int_nvvm_tex_unified_cube_array_level_v4s32_f32
2671  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2672              [llvm_i64_ty, llvm_i32_ty,
2673               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2674              "llvm.nvvm.tex.unified.cube.array.level.v4s32.f32">;
2675def int_nvvm_tex_unified_cube_array_v4u32_f32
2676  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2677              [llvm_i64_ty, llvm_i32_ty,
2678               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2679              "llvm.nvvm.tex.unified.cube.array.v4u32.f32">;
2680def int_nvvm_tex_unified_cube_array_level_v4u32_f32
2681  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2682              [llvm_i64_ty, llvm_i32_ty,
2683               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2684              "llvm.nvvm.tex.unified.cube.array.level.v4u32.f32">;
2685
2686def int_nvvm_tex_unified_cube_grad_v4f32_f32
2687  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2688              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2689               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2690               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2691              "llvm.nvvm.tex.unified.cube.grad.v4f32.f32">;
2692def int_nvvm_tex_unified_cube_grad_v4s32_f32
2693  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2694              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2695               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2696               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2697              "llvm.nvvm.tex.unified.cube.grad.v4s32.f32">;
2698def int_nvvm_tex_unified_cube_grad_v4u32_f32
2699  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2700              [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2701               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2702               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2703              "llvm.nvvm.tex.unified.cube.grad.v4u32.f32">;
2704
2705def int_nvvm_tex_unified_cube_array_grad_v4f32_f32
2706  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2707              [llvm_i64_ty, llvm_i32_ty,
2708              llvm_float_ty, llvm_float_ty,
2709               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2710               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2711              "llvm.nvvm.tex.unified.cube.array.grad.v4f32.f32">;
2712def int_nvvm_tex_unified_cube_array_grad_v4s32_f32
2713  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2714              [llvm_i64_ty, llvm_i32_ty,
2715              llvm_float_ty, llvm_float_ty,
2716               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2717               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2718              "llvm.nvvm.tex.unified.cube.array.grad.v4s32.f32">;
2719def int_nvvm_tex_unified_cube_array_grad_v4u32_f32
2720  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2721              [llvm_i64_ty, llvm_i32_ty,
2722              llvm_float_ty, llvm_float_ty,
2723               llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2724               llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2725              "llvm.nvvm.tex.unified.cube.array.grad.v4u32.f32">;
2726
2727def int_nvvm_tld4_unified_r_2d_v4f32_f32
2728  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2729              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2730              "llvm.nvvm.tld4.unified.r.2d.v4f32.f32">;
2731def int_nvvm_tld4_unified_g_2d_v4f32_f32
2732  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2733              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2734              "llvm.nvvm.tld4.unified.g.2d.v4f32.f32">;
2735def int_nvvm_tld4_unified_b_2d_v4f32_f32
2736  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2737              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2738              "llvm.nvvm.tld4.unified.b.2d.v4f32.f32">;
2739def int_nvvm_tld4_unified_a_2d_v4f32_f32
2740  : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2741              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2742              "llvm.nvvm.tld4.unified.a.2d.v4f32.f32">;
2743def int_nvvm_tld4_unified_r_2d_v4s32_f32
2744  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2745              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2746              "llvm.nvvm.tld4.unified.r.2d.v4s32.f32">;
2747def int_nvvm_tld4_unified_g_2d_v4s32_f32
2748  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2749              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2750              "llvm.nvvm.tld4.unified.g.2d.v4s32.f32">;
2751def int_nvvm_tld4_unified_b_2d_v4s32_f32
2752  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2753              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2754              "llvm.nvvm.tld4.unified.b.2d.v4s32.f32">;
2755def int_nvvm_tld4_unified_a_2d_v4s32_f32
2756  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2757              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2758              "llvm.nvvm.tld4.unified.a.2d.v4s32.f32">;
2759def int_nvvm_tld4_unified_r_2d_v4u32_f32
2760  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2761              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2762              "llvm.nvvm.tld4.unified.r.2d.v4u32.f32">;
2763def int_nvvm_tld4_unified_g_2d_v4u32_f32
2764  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2765              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2766              "llvm.nvvm.tld4.unified.g.2d.v4u32.f32">;
2767def int_nvvm_tld4_unified_b_2d_v4u32_f32
2768  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2769              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2770              "llvm.nvvm.tld4.unified.b.2d.v4u32.f32">;
2771def int_nvvm_tld4_unified_a_2d_v4u32_f32
2772  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2773              [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2774              "llvm.nvvm.tld4.unified.a.2d.v4u32.f32">;
2775
2776
2777//=== Surface Load
2778// .clamp variants
2779def int_nvvm_suld_1d_i8_clamp
2780  : Intrinsic<[llvm_i16_ty],
2781              [llvm_i64_ty, llvm_i32_ty], [],
2782              "llvm.nvvm.suld.1d.i8.clamp">;
2783def int_nvvm_suld_1d_i16_clamp
2784  : Intrinsic<[llvm_i16_ty],
2785              [llvm_i64_ty, llvm_i32_ty], [],
2786              "llvm.nvvm.suld.1d.i16.clamp">;
2787def int_nvvm_suld_1d_i32_clamp
2788  : Intrinsic<[llvm_i32_ty],
2789              [llvm_i64_ty, llvm_i32_ty], [],
2790              "llvm.nvvm.suld.1d.i32.clamp">;
2791def int_nvvm_suld_1d_i64_clamp
2792  : Intrinsic<[llvm_i64_ty],
2793              [llvm_i64_ty, llvm_i32_ty], [],
2794              "llvm.nvvm.suld.1d.i64.clamp">;
2795def int_nvvm_suld_1d_v2i8_clamp
2796  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2797              [llvm_i64_ty, llvm_i32_ty], [],
2798              "llvm.nvvm.suld.1d.v2i8.clamp">;
2799def int_nvvm_suld_1d_v2i16_clamp
2800  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2801              [llvm_i64_ty, llvm_i32_ty], [],
2802              "llvm.nvvm.suld.1d.v2i16.clamp">;
2803def int_nvvm_suld_1d_v2i32_clamp
2804  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2805              [llvm_i64_ty, llvm_i32_ty], [],
2806              "llvm.nvvm.suld.1d.v2i32.clamp">;
2807def int_nvvm_suld_1d_v2i64_clamp
2808  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2809              [llvm_i64_ty, llvm_i32_ty], [],
2810              "llvm.nvvm.suld.1d.v2i64.clamp">;
2811def int_nvvm_suld_1d_v4i8_clamp
2812  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2813              [llvm_i64_ty, llvm_i32_ty], [],
2814              "llvm.nvvm.suld.1d.v4i8.clamp">;
2815def int_nvvm_suld_1d_v4i16_clamp
2816  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2817              [llvm_i64_ty, llvm_i32_ty], [],
2818              "llvm.nvvm.suld.1d.v4i16.clamp">;
2819def int_nvvm_suld_1d_v4i32_clamp
2820  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2821              [llvm_i64_ty, llvm_i32_ty], [],
2822              "llvm.nvvm.suld.1d.v4i32.clamp">;
2823
2824def int_nvvm_suld_1d_array_i8_clamp
2825  : Intrinsic<[llvm_i16_ty],
2826              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2827              "llvm.nvvm.suld.1d.array.i8.clamp">;
2828def int_nvvm_suld_1d_array_i16_clamp
2829  : Intrinsic<[llvm_i16_ty],
2830              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2831              "llvm.nvvm.suld.1d.array.i16.clamp">;
2832def int_nvvm_suld_1d_array_i32_clamp
2833  : Intrinsic<[llvm_i32_ty],
2834              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2835              "llvm.nvvm.suld.1d.array.i32.clamp">;
2836def int_nvvm_suld_1d_array_i64_clamp
2837  : Intrinsic<[llvm_i64_ty],
2838              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2839              "llvm.nvvm.suld.1d.array.i64.clamp">;
2840def int_nvvm_suld_1d_array_v2i8_clamp
2841  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2842              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2843              "llvm.nvvm.suld.1d.array.v2i8.clamp">;
2844def int_nvvm_suld_1d_array_v2i16_clamp
2845  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2846              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2847              "llvm.nvvm.suld.1d.array.v2i16.clamp">;
2848def int_nvvm_suld_1d_array_v2i32_clamp
2849  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2850              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2851              "llvm.nvvm.suld.1d.array.v2i32.clamp">;
2852def int_nvvm_suld_1d_array_v2i64_clamp
2853  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2854              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2855              "llvm.nvvm.suld.1d.array.v2i64.clamp">;
2856def int_nvvm_suld_1d_array_v4i8_clamp
2857  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2858              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2859              "llvm.nvvm.suld.1d.array.v4i8.clamp">;
2860def int_nvvm_suld_1d_array_v4i16_clamp
2861  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2862              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2863              "llvm.nvvm.suld.1d.array.v4i16.clamp">;
2864def int_nvvm_suld_1d_array_v4i32_clamp
2865  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2866              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2867              "llvm.nvvm.suld.1d.array.v4i32.clamp">;
2868
2869def int_nvvm_suld_2d_i8_clamp
2870  : Intrinsic<[llvm_i16_ty],
2871              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2872              "llvm.nvvm.suld.2d.i8.clamp">;
2873def int_nvvm_suld_2d_i16_clamp
2874  : Intrinsic<[llvm_i16_ty],
2875              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2876              "llvm.nvvm.suld.2d.i16.clamp">;
2877def int_nvvm_suld_2d_i32_clamp
2878  : Intrinsic<[llvm_i32_ty],
2879              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2880              "llvm.nvvm.suld.2d.i32.clamp">;
2881def int_nvvm_suld_2d_i64_clamp
2882  : Intrinsic<[llvm_i64_ty],
2883              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2884              "llvm.nvvm.suld.2d.i64.clamp">;
2885def int_nvvm_suld_2d_v2i8_clamp
2886  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2887              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2888              "llvm.nvvm.suld.2d.v2i8.clamp">;
2889def int_nvvm_suld_2d_v2i16_clamp
2890  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2891              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2892              "llvm.nvvm.suld.2d.v2i16.clamp">;
2893def int_nvvm_suld_2d_v2i32_clamp
2894  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2895              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2896              "llvm.nvvm.suld.2d.v2i32.clamp">;
2897def int_nvvm_suld_2d_v2i64_clamp
2898  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2899              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2900              "llvm.nvvm.suld.2d.v2i64.clamp">;
2901def int_nvvm_suld_2d_v4i8_clamp
2902  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2903              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2904              "llvm.nvvm.suld.2d.v4i8.clamp">;
2905def int_nvvm_suld_2d_v4i16_clamp
2906  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2907              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2908              "llvm.nvvm.suld.2d.v4i16.clamp">;
2909def int_nvvm_suld_2d_v4i32_clamp
2910  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2911              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2912              "llvm.nvvm.suld.2d.v4i32.clamp">;
2913
2914def int_nvvm_suld_2d_array_i8_clamp
2915  : Intrinsic<[llvm_i16_ty],
2916              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2917              "llvm.nvvm.suld.2d.array.i8.clamp">;
2918def int_nvvm_suld_2d_array_i16_clamp
2919  : Intrinsic<[llvm_i16_ty],
2920              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2921              "llvm.nvvm.suld.2d.array.i16.clamp">;
2922def int_nvvm_suld_2d_array_i32_clamp
2923  : Intrinsic<[llvm_i32_ty],
2924              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2925              "llvm.nvvm.suld.2d.array.i32.clamp">;
2926def int_nvvm_suld_2d_array_i64_clamp
2927  : Intrinsic<[llvm_i64_ty],
2928              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2929              "llvm.nvvm.suld.2d.array.i64.clamp">;
2930def int_nvvm_suld_2d_array_v2i8_clamp
2931  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2932              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2933              "llvm.nvvm.suld.2d.array.v2i8.clamp">;
2934def int_nvvm_suld_2d_array_v2i16_clamp
2935  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2936              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2937              "llvm.nvvm.suld.2d.array.v2i16.clamp">;
2938def int_nvvm_suld_2d_array_v2i32_clamp
2939  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2940              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2941              "llvm.nvvm.suld.2d.array.v2i32.clamp">;
2942def int_nvvm_suld_2d_array_v2i64_clamp
2943  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2944              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2945              "llvm.nvvm.suld.2d.array.v2i64.clamp">;
2946def int_nvvm_suld_2d_array_v4i8_clamp
2947  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2948              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2949              "llvm.nvvm.suld.2d.array.v4i8.clamp">;
2950def int_nvvm_suld_2d_array_v4i16_clamp
2951  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2952              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2953              "llvm.nvvm.suld.2d.array.v4i16.clamp">;
2954def int_nvvm_suld_2d_array_v4i32_clamp
2955  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2956              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2957              "llvm.nvvm.suld.2d.array.v4i32.clamp">;
2958
2959def int_nvvm_suld_3d_i8_clamp
2960  : Intrinsic<[llvm_i16_ty],
2961              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2962              "llvm.nvvm.suld.3d.i8.clamp">;
2963def int_nvvm_suld_3d_i16_clamp
2964  : Intrinsic<[llvm_i16_ty],
2965              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2966              "llvm.nvvm.suld.3d.i16.clamp">;
2967def int_nvvm_suld_3d_i32_clamp
2968  : Intrinsic<[llvm_i32_ty],
2969              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2970              "llvm.nvvm.suld.3d.i32.clamp">;
2971def int_nvvm_suld_3d_i64_clamp
2972  : Intrinsic<[llvm_i64_ty],
2973              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2974              "llvm.nvvm.suld.3d.i64.clamp">;
2975def int_nvvm_suld_3d_v2i8_clamp
2976  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2977              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2978              "llvm.nvvm.suld.3d.v2i8.clamp">;
2979def int_nvvm_suld_3d_v2i16_clamp
2980  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2981              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2982              "llvm.nvvm.suld.3d.v2i16.clamp">;
2983def int_nvvm_suld_3d_v2i32_clamp
2984  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2985              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2986              "llvm.nvvm.suld.3d.v2i32.clamp">;
2987def int_nvvm_suld_3d_v2i64_clamp
2988  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2989              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2990              "llvm.nvvm.suld.3d.v2i64.clamp">;
2991def int_nvvm_suld_3d_v4i8_clamp
2992  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2993              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2994              "llvm.nvvm.suld.3d.v4i8.clamp">;
2995def int_nvvm_suld_3d_v4i16_clamp
2996  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2997              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2998              "llvm.nvvm.suld.3d.v4i16.clamp">;
2999def int_nvvm_suld_3d_v4i32_clamp
3000  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3001              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3002              "llvm.nvvm.suld.3d.v4i32.clamp">;
3003
3004// .trap variants
3005def int_nvvm_suld_1d_i8_trap
3006  : Intrinsic<[llvm_i16_ty],
3007              [llvm_i64_ty, llvm_i32_ty], [],
3008              "llvm.nvvm.suld.1d.i8.trap">;
3009def int_nvvm_suld_1d_i16_trap
3010  : Intrinsic<[llvm_i16_ty],
3011              [llvm_i64_ty, llvm_i32_ty], [],
3012              "llvm.nvvm.suld.1d.i16.trap">;
3013def int_nvvm_suld_1d_i32_trap
3014  : Intrinsic<[llvm_i32_ty],
3015              [llvm_i64_ty, llvm_i32_ty], [],
3016              "llvm.nvvm.suld.1d.i32.trap">;
3017def int_nvvm_suld_1d_i64_trap
3018  : Intrinsic<[llvm_i64_ty],
3019              [llvm_i64_ty, llvm_i32_ty], [],
3020              "llvm.nvvm.suld.1d.i64.trap">;
3021def int_nvvm_suld_1d_v2i8_trap
3022  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3023              [llvm_i64_ty, llvm_i32_ty], [],
3024              "llvm.nvvm.suld.1d.v2i8.trap">;
3025def int_nvvm_suld_1d_v2i16_trap
3026  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3027              [llvm_i64_ty, llvm_i32_ty], [],
3028              "llvm.nvvm.suld.1d.v2i16.trap">;
3029def int_nvvm_suld_1d_v2i32_trap
3030  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3031              [llvm_i64_ty, llvm_i32_ty], [],
3032              "llvm.nvvm.suld.1d.v2i32.trap">;
3033def int_nvvm_suld_1d_v2i64_trap
3034  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3035              [llvm_i64_ty, llvm_i32_ty], [],
3036              "llvm.nvvm.suld.1d.v2i64.trap">;
3037def int_nvvm_suld_1d_v4i8_trap
3038  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3039              [llvm_i64_ty, llvm_i32_ty], [],
3040              "llvm.nvvm.suld.1d.v4i8.trap">;
3041def int_nvvm_suld_1d_v4i16_trap
3042  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3043              [llvm_i64_ty, llvm_i32_ty], [],
3044              "llvm.nvvm.suld.1d.v4i16.trap">;
3045def int_nvvm_suld_1d_v4i32_trap
3046  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3047              [llvm_i64_ty, llvm_i32_ty], [],
3048              "llvm.nvvm.suld.1d.v4i32.trap">;
3049
3050def int_nvvm_suld_1d_array_i8_trap
3051  : Intrinsic<[llvm_i16_ty],
3052              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3053              "llvm.nvvm.suld.1d.array.i8.trap">;
3054def int_nvvm_suld_1d_array_i16_trap
3055  : Intrinsic<[llvm_i16_ty],
3056              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3057              "llvm.nvvm.suld.1d.array.i16.trap">;
3058def int_nvvm_suld_1d_array_i32_trap
3059  : Intrinsic<[llvm_i32_ty],
3060              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3061              "llvm.nvvm.suld.1d.array.i32.trap">;
3062def int_nvvm_suld_1d_array_i64_trap
3063  : Intrinsic<[llvm_i64_ty],
3064              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3065              "llvm.nvvm.suld.1d.array.i64.trap">;
3066def int_nvvm_suld_1d_array_v2i8_trap
3067  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3068              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3069              "llvm.nvvm.suld.1d.array.v2i8.trap">;
3070def int_nvvm_suld_1d_array_v2i16_trap
3071  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3072              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3073              "llvm.nvvm.suld.1d.array.v2i16.trap">;
3074def int_nvvm_suld_1d_array_v2i32_trap
3075  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3076              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3077              "llvm.nvvm.suld.1d.array.v2i32.trap">;
3078def int_nvvm_suld_1d_array_v2i64_trap
3079  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3080              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3081              "llvm.nvvm.suld.1d.array.v2i64.trap">;
3082def int_nvvm_suld_1d_array_v4i8_trap
3083  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3084              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3085              "llvm.nvvm.suld.1d.array.v4i8.trap">;
3086def int_nvvm_suld_1d_array_v4i16_trap
3087  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3088              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3089              "llvm.nvvm.suld.1d.array.v4i16.trap">;
3090def int_nvvm_suld_1d_array_v4i32_trap
3091  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3092              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3093              "llvm.nvvm.suld.1d.array.v4i32.trap">;
3094
3095def int_nvvm_suld_2d_i8_trap
3096  : Intrinsic<[llvm_i16_ty],
3097              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3098              "llvm.nvvm.suld.2d.i8.trap">;
3099def int_nvvm_suld_2d_i16_trap
3100  : Intrinsic<[llvm_i16_ty],
3101              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3102              "llvm.nvvm.suld.2d.i16.trap">;
3103def int_nvvm_suld_2d_i32_trap
3104  : Intrinsic<[llvm_i32_ty],
3105              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3106              "llvm.nvvm.suld.2d.i32.trap">;
3107def int_nvvm_suld_2d_i64_trap
3108  : Intrinsic<[llvm_i64_ty],
3109              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3110              "llvm.nvvm.suld.2d.i64.trap">;
3111def int_nvvm_suld_2d_v2i8_trap
3112  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3113              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3114              "llvm.nvvm.suld.2d.v2i8.trap">;
3115def int_nvvm_suld_2d_v2i16_trap
3116  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3117              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3118              "llvm.nvvm.suld.2d.v2i16.trap">;
3119def int_nvvm_suld_2d_v2i32_trap
3120  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3121              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3122              "llvm.nvvm.suld.2d.v2i32.trap">;
3123def int_nvvm_suld_2d_v2i64_trap
3124  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3125              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3126              "llvm.nvvm.suld.2d.v2i64.trap">;
3127def int_nvvm_suld_2d_v4i8_trap
3128  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3129              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3130              "llvm.nvvm.suld.2d.v4i8.trap">;
3131def int_nvvm_suld_2d_v4i16_trap
3132  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3133              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3134              "llvm.nvvm.suld.2d.v4i16.trap">;
3135def int_nvvm_suld_2d_v4i32_trap
3136  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3137              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3138              "llvm.nvvm.suld.2d.v4i32.trap">;
3139
3140def int_nvvm_suld_2d_array_i8_trap
3141  : Intrinsic<[llvm_i16_ty],
3142              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3143              "llvm.nvvm.suld.2d.array.i8.trap">;
3144def int_nvvm_suld_2d_array_i16_trap
3145  : Intrinsic<[llvm_i16_ty],
3146              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3147              "llvm.nvvm.suld.2d.array.i16.trap">;
3148def int_nvvm_suld_2d_array_i32_trap
3149  : Intrinsic<[llvm_i32_ty],
3150              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3151              "llvm.nvvm.suld.2d.array.i32.trap">;
3152def int_nvvm_suld_2d_array_i64_trap
3153  : Intrinsic<[llvm_i64_ty],
3154              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3155              "llvm.nvvm.suld.2d.array.i64.trap">;
3156def int_nvvm_suld_2d_array_v2i8_trap
3157  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3158              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3159              "llvm.nvvm.suld.2d.array.v2i8.trap">;
3160def int_nvvm_suld_2d_array_v2i16_trap
3161  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3162              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3163              "llvm.nvvm.suld.2d.array.v2i16.trap">;
3164def int_nvvm_suld_2d_array_v2i32_trap
3165  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3166              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3167              "llvm.nvvm.suld.2d.array.v2i32.trap">;
3168def int_nvvm_suld_2d_array_v2i64_trap
3169  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3170              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3171              "llvm.nvvm.suld.2d.array.v2i64.trap">;
3172def int_nvvm_suld_2d_array_v4i8_trap
3173  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3174              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3175              "llvm.nvvm.suld.2d.array.v4i8.trap">;
3176def int_nvvm_suld_2d_array_v4i16_trap
3177  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3178              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3179              "llvm.nvvm.suld.2d.array.v4i16.trap">;
3180def int_nvvm_suld_2d_array_v4i32_trap
3181  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3182              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3183              "llvm.nvvm.suld.2d.array.v4i32.trap">;
3184
3185def int_nvvm_suld_3d_i8_trap
3186  : Intrinsic<[llvm_i16_ty],
3187              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3188              "llvm.nvvm.suld.3d.i8.trap">;
3189def int_nvvm_suld_3d_i16_trap
3190  : Intrinsic<[llvm_i16_ty],
3191              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3192              "llvm.nvvm.suld.3d.i16.trap">;
3193def int_nvvm_suld_3d_i32_trap
3194  : Intrinsic<[llvm_i32_ty],
3195              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3196              "llvm.nvvm.suld.3d.i32.trap">;
3197def int_nvvm_suld_3d_i64_trap
3198  : Intrinsic<[llvm_i64_ty],
3199              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3200              "llvm.nvvm.suld.3d.i64.trap">;
3201def int_nvvm_suld_3d_v2i8_trap
3202  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3203              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3204              "llvm.nvvm.suld.3d.v2i8.trap">;
3205def int_nvvm_suld_3d_v2i16_trap
3206  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3207              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3208              "llvm.nvvm.suld.3d.v2i16.trap">;
3209def int_nvvm_suld_3d_v2i32_trap
3210  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3211              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3212              "llvm.nvvm.suld.3d.v2i32.trap">;
3213def int_nvvm_suld_3d_v2i64_trap
3214  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3215              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3216              "llvm.nvvm.suld.3d.v2i64.trap">;
3217def int_nvvm_suld_3d_v4i8_trap
3218  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3219              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3220              "llvm.nvvm.suld.3d.v4i8.trap">;
3221def int_nvvm_suld_3d_v4i16_trap
3222  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3223              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3224              "llvm.nvvm.suld.3d.v4i16.trap">;
3225def int_nvvm_suld_3d_v4i32_trap
3226  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3227              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3228              "llvm.nvvm.suld.3d.v4i32.trap">;
3229
3230// .zero variants
3231def int_nvvm_suld_1d_i8_zero
3232  : Intrinsic<[llvm_i16_ty],
3233              [llvm_i64_ty, llvm_i32_ty], [],
3234              "llvm.nvvm.suld.1d.i8.zero">;
3235def int_nvvm_suld_1d_i16_zero
3236  : Intrinsic<[llvm_i16_ty],
3237              [llvm_i64_ty, llvm_i32_ty], [],
3238              "llvm.nvvm.suld.1d.i16.zero">;
3239def int_nvvm_suld_1d_i32_zero
3240  : Intrinsic<[llvm_i32_ty],
3241              [llvm_i64_ty, llvm_i32_ty], [],
3242              "llvm.nvvm.suld.1d.i32.zero">;
3243def int_nvvm_suld_1d_i64_zero
3244  : Intrinsic<[llvm_i64_ty],
3245              [llvm_i64_ty, llvm_i32_ty], [],
3246              "llvm.nvvm.suld.1d.i64.zero">;
3247def int_nvvm_suld_1d_v2i8_zero
3248  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3249              [llvm_i64_ty, llvm_i32_ty], [],
3250              "llvm.nvvm.suld.1d.v2i8.zero">;
3251def int_nvvm_suld_1d_v2i16_zero
3252  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3253              [llvm_i64_ty, llvm_i32_ty], [],
3254              "llvm.nvvm.suld.1d.v2i16.zero">;
3255def int_nvvm_suld_1d_v2i32_zero
3256  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3257              [llvm_i64_ty, llvm_i32_ty], [],
3258              "llvm.nvvm.suld.1d.v2i32.zero">;
3259def int_nvvm_suld_1d_v2i64_zero
3260  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3261              [llvm_i64_ty, llvm_i32_ty], [],
3262              "llvm.nvvm.suld.1d.v2i64.zero">;
3263def int_nvvm_suld_1d_v4i8_zero
3264  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3265              [llvm_i64_ty, llvm_i32_ty], [],
3266              "llvm.nvvm.suld.1d.v4i8.zero">;
3267def int_nvvm_suld_1d_v4i16_zero
3268  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3269              [llvm_i64_ty, llvm_i32_ty], [],
3270              "llvm.nvvm.suld.1d.v4i16.zero">;
3271def int_nvvm_suld_1d_v4i32_zero
3272  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3273              [llvm_i64_ty, llvm_i32_ty], [],
3274              "llvm.nvvm.suld.1d.v4i32.zero">;
3275
3276def int_nvvm_suld_1d_array_i8_zero
3277  : Intrinsic<[llvm_i16_ty],
3278              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3279              "llvm.nvvm.suld.1d.array.i8.zero">;
3280def int_nvvm_suld_1d_array_i16_zero
3281  : Intrinsic<[llvm_i16_ty],
3282              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3283              "llvm.nvvm.suld.1d.array.i16.zero">;
3284def int_nvvm_suld_1d_array_i32_zero
3285  : Intrinsic<[llvm_i32_ty],
3286              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3287              "llvm.nvvm.suld.1d.array.i32.zero">;
3288def int_nvvm_suld_1d_array_i64_zero
3289  : Intrinsic<[llvm_i64_ty],
3290              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3291              "llvm.nvvm.suld.1d.array.i64.zero">;
3292def int_nvvm_suld_1d_array_v2i8_zero
3293  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3294              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3295              "llvm.nvvm.suld.1d.array.v2i8.zero">;
3296def int_nvvm_suld_1d_array_v2i16_zero
3297  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3298              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3299              "llvm.nvvm.suld.1d.array.v2i16.zero">;
3300def int_nvvm_suld_1d_array_v2i32_zero
3301  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3302              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3303              "llvm.nvvm.suld.1d.array.v2i32.zero">;
3304def int_nvvm_suld_1d_array_v2i64_zero
3305  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3306              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3307              "llvm.nvvm.suld.1d.array.v2i64.zero">;
3308def int_nvvm_suld_1d_array_v4i8_zero
3309  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3310              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3311              "llvm.nvvm.suld.1d.array.v4i8.zero">;
3312def int_nvvm_suld_1d_array_v4i16_zero
3313  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3314              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3315              "llvm.nvvm.suld.1d.array.v4i16.zero">;
3316def int_nvvm_suld_1d_array_v4i32_zero
3317  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3318              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3319              "llvm.nvvm.suld.1d.array.v4i32.zero">;
3320
3321def int_nvvm_suld_2d_i8_zero
3322  : Intrinsic<[llvm_i16_ty],
3323              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3324              "llvm.nvvm.suld.2d.i8.zero">;
3325def int_nvvm_suld_2d_i16_zero
3326  : Intrinsic<[llvm_i16_ty],
3327              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3328              "llvm.nvvm.suld.2d.i16.zero">;
3329def int_nvvm_suld_2d_i32_zero
3330  : Intrinsic<[llvm_i32_ty],
3331              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3332              "llvm.nvvm.suld.2d.i32.zero">;
3333def int_nvvm_suld_2d_i64_zero
3334  : Intrinsic<[llvm_i64_ty],
3335              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3336              "llvm.nvvm.suld.2d.i64.zero">;
3337def int_nvvm_suld_2d_v2i8_zero
3338  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3339              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3340              "llvm.nvvm.suld.2d.v2i8.zero">;
3341def int_nvvm_suld_2d_v2i16_zero
3342  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3343              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3344              "llvm.nvvm.suld.2d.v2i16.zero">;
3345def int_nvvm_suld_2d_v2i32_zero
3346  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3347              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3348              "llvm.nvvm.suld.2d.v2i32.zero">;
3349def int_nvvm_suld_2d_v2i64_zero
3350  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3351              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3352              "llvm.nvvm.suld.2d.v2i64.zero">;
3353def int_nvvm_suld_2d_v4i8_zero
3354  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3355              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3356              "llvm.nvvm.suld.2d.v4i8.zero">;
3357def int_nvvm_suld_2d_v4i16_zero
3358  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3359              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3360              "llvm.nvvm.suld.2d.v4i16.zero">;
3361def int_nvvm_suld_2d_v4i32_zero
3362  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3363              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3364              "llvm.nvvm.suld.2d.v4i32.zero">;
3365
3366def int_nvvm_suld_2d_array_i8_zero
3367  : Intrinsic<[llvm_i16_ty],
3368              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3369              "llvm.nvvm.suld.2d.array.i8.zero">;
3370def int_nvvm_suld_2d_array_i16_zero
3371  : Intrinsic<[llvm_i16_ty],
3372              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3373              "llvm.nvvm.suld.2d.array.i16.zero">;
3374def int_nvvm_suld_2d_array_i32_zero
3375  : Intrinsic<[llvm_i32_ty],
3376              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3377              "llvm.nvvm.suld.2d.array.i32.zero">;
3378def int_nvvm_suld_2d_array_i64_zero
3379  : Intrinsic<[llvm_i64_ty],
3380              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3381              "llvm.nvvm.suld.2d.array.i64.zero">;
3382def int_nvvm_suld_2d_array_v2i8_zero
3383  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3384              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3385              "llvm.nvvm.suld.2d.array.v2i8.zero">;
3386def int_nvvm_suld_2d_array_v2i16_zero
3387  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3388              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3389              "llvm.nvvm.suld.2d.array.v2i16.zero">;
3390def int_nvvm_suld_2d_array_v2i32_zero
3391  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3392              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3393              "llvm.nvvm.suld.2d.array.v2i32.zero">;
3394def int_nvvm_suld_2d_array_v2i64_zero
3395  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3396              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3397              "llvm.nvvm.suld.2d.array.v2i64.zero">;
3398def int_nvvm_suld_2d_array_v4i8_zero
3399  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3400              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3401              "llvm.nvvm.suld.2d.array.v4i8.zero">;
3402def int_nvvm_suld_2d_array_v4i16_zero
3403  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3404              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3405              "llvm.nvvm.suld.2d.array.v4i16.zero">;
3406def int_nvvm_suld_2d_array_v4i32_zero
3407  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3408              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3409              "llvm.nvvm.suld.2d.array.v4i32.zero">;
3410
3411def int_nvvm_suld_3d_i8_zero
3412  : Intrinsic<[llvm_i16_ty],
3413              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3414              "llvm.nvvm.suld.3d.i8.zero">;
3415def int_nvvm_suld_3d_i16_zero
3416  : Intrinsic<[llvm_i16_ty],
3417              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3418              "llvm.nvvm.suld.3d.i16.zero">;
3419def int_nvvm_suld_3d_i32_zero
3420  : Intrinsic<[llvm_i32_ty],
3421              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3422              "llvm.nvvm.suld.3d.i32.zero">;
3423def int_nvvm_suld_3d_i64_zero
3424  : Intrinsic<[llvm_i64_ty],
3425              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3426              "llvm.nvvm.suld.3d.i64.zero">;
3427def int_nvvm_suld_3d_v2i8_zero
3428  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3429              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3430              "llvm.nvvm.suld.3d.v2i8.zero">;
3431def int_nvvm_suld_3d_v2i16_zero
3432  : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3433              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3434              "llvm.nvvm.suld.3d.v2i16.zero">;
3435def int_nvvm_suld_3d_v2i32_zero
3436  : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3437              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3438              "llvm.nvvm.suld.3d.v2i32.zero">;
3439def int_nvvm_suld_3d_v2i64_zero
3440  : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3441              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3442              "llvm.nvvm.suld.3d.v2i64.zero">;
3443def int_nvvm_suld_3d_v4i8_zero
3444  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3445              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3446              "llvm.nvvm.suld.3d.v4i8.zero">;
3447def int_nvvm_suld_3d_v4i16_zero
3448  : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3449              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3450              "llvm.nvvm.suld.3d.v4i16.zero">;
3451def int_nvvm_suld_3d_v4i32_zero
3452  : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3453              [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3454              "llvm.nvvm.suld.3d.v4i32.zero">;
3455
3456//===- Texture Query ------------------------------------------------------===//
3457
3458def int_nvvm_txq_channel_order
3459  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3460              "llvm.nvvm.txq.channel.order">,
3461    ClangBuiltin<"__nvvm_txq_channel_order">;
3462def int_nvvm_txq_channel_data_type
3463  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3464              "llvm.nvvm.txq.channel.data.type">,
3465    ClangBuiltin<"__nvvm_txq_channel_data_type">;
3466def int_nvvm_txq_width
3467  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3468              "llvm.nvvm.txq.width">,
3469    ClangBuiltin<"__nvvm_txq_width">;
3470def int_nvvm_txq_height
3471  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3472              "llvm.nvvm.txq.height">,
3473    ClangBuiltin<"__nvvm_txq_height">;
3474def int_nvvm_txq_depth
3475  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3476              "llvm.nvvm.txq.depth">,
3477    ClangBuiltin<"__nvvm_txq_depth">;
3478def int_nvvm_txq_array_size
3479  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3480              "llvm.nvvm.txq.array.size">,
3481    ClangBuiltin<"__nvvm_txq_array_size">;
3482def int_nvvm_txq_num_samples
3483  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3484              "llvm.nvvm.txq.num.samples">,
3485    ClangBuiltin<"__nvvm_txq_num_samples">;
3486def int_nvvm_txq_num_mipmap_levels
3487  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3488              "llvm.nvvm.txq.num.mipmap.levels">,
3489    ClangBuiltin<"__nvvm_txq_num_mipmap_levels">;
3490
3491//===- Surface Query ------------------------------------------------------===//
3492
3493def int_nvvm_suq_channel_order
3494  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3495              "llvm.nvvm.suq.channel.order">,
3496    ClangBuiltin<"__nvvm_suq_channel_order">;
3497def int_nvvm_suq_channel_data_type
3498  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3499              "llvm.nvvm.suq.channel.data.type">,
3500    ClangBuiltin<"__nvvm_suq_channel_data_type">;
3501def int_nvvm_suq_width
3502  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3503              "llvm.nvvm.suq.width">,
3504    ClangBuiltin<"__nvvm_suq_width">;
3505def int_nvvm_suq_height
3506  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3507              "llvm.nvvm.suq.height">,
3508    ClangBuiltin<"__nvvm_suq_height">;
3509def int_nvvm_suq_depth
3510  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3511              "llvm.nvvm.suq.depth">,
3512    ClangBuiltin<"__nvvm_suq_depth">;
3513def int_nvvm_suq_array_size
3514  : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3515              "llvm.nvvm.suq.array.size">,
3516    ClangBuiltin<"__nvvm_suq_array_size">;
3517
3518
3519//===- Handle Query -------------------------------------------------------===//
3520
3521def int_nvvm_istypep_sampler
3522  : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3523              "llvm.nvvm.istypep.sampler">,
3524    ClangBuiltin<"__nvvm_istypep_sampler">;
3525def int_nvvm_istypep_surface
3526  : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3527              "llvm.nvvm.istypep.surface">,
3528    ClangBuiltin<"__nvvm_istypep_surface">;
3529def int_nvvm_istypep_texture
3530  : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3531              "llvm.nvvm.istypep.texture">,
3532    ClangBuiltin<"__nvvm_istypep_texture">;
3533
3534
3535
3536//===- Surface Stores -----------------------------------------------------===//
3537
3538// Unformatted
3539// .clamp variant
3540def int_nvvm_sust_b_1d_i8_clamp
3541  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3542              "llvm.nvvm.sust.b.1d.i8.clamp">,
3543    ClangBuiltin<"__nvvm_sust_b_1d_i8_clamp">;
3544def int_nvvm_sust_b_1d_i16_clamp
3545  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3546              "llvm.nvvm.sust.b.1d.i16.clamp">,
3547    ClangBuiltin<"__nvvm_sust_b_1d_i16_clamp">;
3548def int_nvvm_sust_b_1d_i32_clamp
3549  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3550              "llvm.nvvm.sust.b.1d.i32.clamp">,
3551    ClangBuiltin<"__nvvm_sust_b_1d_i32_clamp">;
3552def int_nvvm_sust_b_1d_i64_clamp
3553  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
3554              "llvm.nvvm.sust.b.1d.i64.clamp">,
3555    ClangBuiltin<"__nvvm_sust_b_1d_i64_clamp">;
3556def int_nvvm_sust_b_1d_v2i8_clamp
3557  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3558              "llvm.nvvm.sust.b.1d.v2i8.clamp">,
3559    ClangBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">;
3560def int_nvvm_sust_b_1d_v2i16_clamp
3561  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3562              "llvm.nvvm.sust.b.1d.v2i16.clamp">,
3563    ClangBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">;
3564def int_nvvm_sust_b_1d_v2i32_clamp
3565  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3566              "llvm.nvvm.sust.b.1d.v2i32.clamp">,
3567    ClangBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">;
3568def int_nvvm_sust_b_1d_v2i64_clamp
3569  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
3570              "llvm.nvvm.sust.b.1d.v2i64.clamp">,
3571    ClangBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">;
3572def int_nvvm_sust_b_1d_v4i8_clamp
3573  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3574                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3575              "llvm.nvvm.sust.b.1d.v4i8.clamp">,
3576    ClangBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">;
3577def int_nvvm_sust_b_1d_v4i16_clamp
3578  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3579                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3580              "llvm.nvvm.sust.b.1d.v4i16.clamp">,
3581    ClangBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">;
3582def int_nvvm_sust_b_1d_v4i32_clamp
3583  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3584                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3585              "llvm.nvvm.sust.b.1d.v4i32.clamp">,
3586    ClangBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">;
3587
3588
3589def int_nvvm_sust_b_1d_array_i8_clamp
3590  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3591              "llvm.nvvm.sust.b.1d.array.i8.clamp">,
3592    ClangBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">;
3593def int_nvvm_sust_b_1d_array_i16_clamp
3594  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3595              "llvm.nvvm.sust.b.1d.array.i16.clamp">,
3596    ClangBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">;
3597def int_nvvm_sust_b_1d_array_i32_clamp
3598  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3599              "llvm.nvvm.sust.b.1d.array.i32.clamp">,
3600    ClangBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">;
3601def int_nvvm_sust_b_1d_array_i64_clamp
3602  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3603              "llvm.nvvm.sust.b.1d.array.i64.clamp">,
3604    ClangBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">;
3605def int_nvvm_sust_b_1d_array_v2i8_clamp
3606  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3607                   llvm_i16_ty, llvm_i16_ty], [],
3608              "llvm.nvvm.sust.b.1d.array.v2i8.clamp">,
3609    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">;
3610def int_nvvm_sust_b_1d_array_v2i16_clamp
3611  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3612                   llvm_i16_ty, llvm_i16_ty], [],
3613              "llvm.nvvm.sust.b.1d.array.v2i16.clamp">,
3614    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">;
3615def int_nvvm_sust_b_1d_array_v2i32_clamp
3616  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3617                   llvm_i32_ty, llvm_i32_ty], [],
3618              "llvm.nvvm.sust.b.1d.array.v2i32.clamp">,
3619    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">;
3620def int_nvvm_sust_b_1d_array_v2i64_clamp
3621  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3622                   llvm_i64_ty, llvm_i64_ty], [],
3623              "llvm.nvvm.sust.b.1d.array.v2i64.clamp">,
3624    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">;
3625def int_nvvm_sust_b_1d_array_v4i8_clamp
3626  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3627                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3628              "llvm.nvvm.sust.b.1d.array.v4i8.clamp">,
3629    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">;
3630def int_nvvm_sust_b_1d_array_v4i16_clamp
3631  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3632                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3633              "llvm.nvvm.sust.b.1d.array.v4i16.clamp">,
3634    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">;
3635def int_nvvm_sust_b_1d_array_v4i32_clamp
3636  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3637                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3638              "llvm.nvvm.sust.b.1d.array.v4i32.clamp">,
3639    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">;
3640
3641
3642def int_nvvm_sust_b_2d_i8_clamp
3643  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3644              "llvm.nvvm.sust.b.2d.i8.clamp">,
3645    ClangBuiltin<"__nvvm_sust_b_2d_i8_clamp">;
3646def int_nvvm_sust_b_2d_i16_clamp
3647  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3648              "llvm.nvvm.sust.b.2d.i16.clamp">,
3649    ClangBuiltin<"__nvvm_sust_b_2d_i16_clamp">;
3650def int_nvvm_sust_b_2d_i32_clamp
3651  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3652              "llvm.nvvm.sust.b.2d.i32.clamp">,
3653    ClangBuiltin<"__nvvm_sust_b_2d_i32_clamp">;
3654def int_nvvm_sust_b_2d_i64_clamp
3655  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3656              "llvm.nvvm.sust.b.2d.i64.clamp">,
3657    ClangBuiltin<"__nvvm_sust_b_2d_i64_clamp">;
3658def int_nvvm_sust_b_2d_v2i8_clamp
3659  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3660                   llvm_i16_ty, llvm_i16_ty], [],
3661              "llvm.nvvm.sust.b.2d.v2i8.clamp">,
3662    ClangBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">;
3663def int_nvvm_sust_b_2d_v2i16_clamp
3664  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3665                   llvm_i16_ty, llvm_i16_ty], [],
3666              "llvm.nvvm.sust.b.2d.v2i16.clamp">,
3667    ClangBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">;
3668def int_nvvm_sust_b_2d_v2i32_clamp
3669  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3670                   llvm_i32_ty, llvm_i32_ty], [],
3671              "llvm.nvvm.sust.b.2d.v2i32.clamp">,
3672    ClangBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">;
3673def int_nvvm_sust_b_2d_v2i64_clamp
3674  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3675                   llvm_i64_ty, llvm_i64_ty], [],
3676              "llvm.nvvm.sust.b.2d.v2i64.clamp">,
3677    ClangBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">;
3678def int_nvvm_sust_b_2d_v4i8_clamp
3679  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3680                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3681              "llvm.nvvm.sust.b.2d.v4i8.clamp">,
3682    ClangBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">;
3683def int_nvvm_sust_b_2d_v4i16_clamp
3684  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3685                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3686              "llvm.nvvm.sust.b.2d.v4i16.clamp">,
3687    ClangBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">;
3688def int_nvvm_sust_b_2d_v4i32_clamp
3689  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3690                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3691              "llvm.nvvm.sust.b.2d.v4i32.clamp">,
3692    ClangBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">;
3693
3694
3695def int_nvvm_sust_b_2d_array_i8_clamp
3696  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3697                   llvm_i32_ty, llvm_i16_ty], [],
3698              "llvm.nvvm.sust.b.2d.array.i8.clamp">,
3699    ClangBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">;
3700def int_nvvm_sust_b_2d_array_i16_clamp
3701  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3702                   llvm_i32_ty, llvm_i16_ty], [],
3703              "llvm.nvvm.sust.b.2d.array.i16.clamp">,
3704    ClangBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">;
3705def int_nvvm_sust_b_2d_array_i32_clamp
3706  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3707                   llvm_i32_ty, llvm_i32_ty], [],
3708              "llvm.nvvm.sust.b.2d.array.i32.clamp">,
3709    ClangBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">;
3710def int_nvvm_sust_b_2d_array_i64_clamp
3711  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3712                   llvm_i32_ty, llvm_i64_ty], [],
3713              "llvm.nvvm.sust.b.2d.array.i64.clamp">,
3714    ClangBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">;
3715def int_nvvm_sust_b_2d_array_v2i8_clamp
3716  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3717                   llvm_i16_ty, llvm_i16_ty], [],
3718              "llvm.nvvm.sust.b.2d.array.v2i8.clamp">,
3719    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">;
3720def int_nvvm_sust_b_2d_array_v2i16_clamp
3721  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3722                   llvm_i16_ty, llvm_i16_ty], [],
3723              "llvm.nvvm.sust.b.2d.array.v2i16.clamp">,
3724    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">;
3725def int_nvvm_sust_b_2d_array_v2i32_clamp
3726  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3727                   llvm_i32_ty, llvm_i32_ty], [],
3728              "llvm.nvvm.sust.b.2d.array.v2i32.clamp">,
3729    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">;
3730def int_nvvm_sust_b_2d_array_v2i64_clamp
3731  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3732                   llvm_i64_ty, llvm_i64_ty], [],
3733              "llvm.nvvm.sust.b.2d.array.v2i64.clamp">,
3734    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">;
3735def int_nvvm_sust_b_2d_array_v4i8_clamp
3736  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3737                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3738              "llvm.nvvm.sust.b.2d.array.v4i8.clamp">,
3739    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">;
3740def int_nvvm_sust_b_2d_array_v4i16_clamp
3741  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3742                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3743              "llvm.nvvm.sust.b.2d.array.v4i16.clamp">,
3744    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">;
3745def int_nvvm_sust_b_2d_array_v4i32_clamp
3746  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3747                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3748              "llvm.nvvm.sust.b.2d.array.v4i32.clamp">,
3749    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">;
3750
3751
3752def int_nvvm_sust_b_3d_i8_clamp
3753  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3754                   llvm_i32_ty, llvm_i16_ty], [],
3755              "llvm.nvvm.sust.b.3d.i8.clamp">,
3756    ClangBuiltin<"__nvvm_sust_b_3d_i8_clamp">;
3757def int_nvvm_sust_b_3d_i16_clamp
3758  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3759                   llvm_i32_ty, llvm_i16_ty], [],
3760              "llvm.nvvm.sust.b.3d.i16.clamp">,
3761    ClangBuiltin<"__nvvm_sust_b_3d_i16_clamp">;
3762def int_nvvm_sust_b_3d_i32_clamp
3763  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3764                   llvm_i32_ty, llvm_i32_ty], [],
3765              "llvm.nvvm.sust.b.3d.i32.clamp">,
3766    ClangBuiltin<"__nvvm_sust_b_3d_i32_clamp">;
3767def int_nvvm_sust_b_3d_i64_clamp
3768  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3769                   llvm_i32_ty, llvm_i64_ty], [],
3770              "llvm.nvvm.sust.b.3d.i64.clamp">,
3771    ClangBuiltin<"__nvvm_sust_b_3d_i64_clamp">;
3772def int_nvvm_sust_b_3d_v2i8_clamp
3773  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3774                   llvm_i16_ty, llvm_i16_ty], [],
3775              "llvm.nvvm.sust.b.3d.v2i8.clamp">,
3776    ClangBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">;
3777def int_nvvm_sust_b_3d_v2i16_clamp
3778  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3779                   llvm_i16_ty, llvm_i16_ty], [],
3780              "llvm.nvvm.sust.b.3d.v2i16.clamp">,
3781    ClangBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">;
3782def int_nvvm_sust_b_3d_v2i32_clamp
3783  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3784                   llvm_i32_ty, llvm_i32_ty], [],
3785              "llvm.nvvm.sust.b.3d.v2i32.clamp">,
3786    ClangBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">;
3787def int_nvvm_sust_b_3d_v2i64_clamp
3788  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3789                   llvm_i64_ty, llvm_i64_ty], [],
3790              "llvm.nvvm.sust.b.3d.v2i64.clamp">,
3791    ClangBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">;
3792def int_nvvm_sust_b_3d_v4i8_clamp
3793  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3794                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3795              "llvm.nvvm.sust.b.3d.v4i8.clamp">,
3796    ClangBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">;
3797def int_nvvm_sust_b_3d_v4i16_clamp
3798  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3799                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3800              "llvm.nvvm.sust.b.3d.v4i16.clamp">,
3801    ClangBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">;
3802def int_nvvm_sust_b_3d_v4i32_clamp
3803  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3804                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3805              "llvm.nvvm.sust.b.3d.v4i32.clamp">,
3806    ClangBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">;
3807
3808
3809// .trap variant
3810def int_nvvm_sust_b_1d_i8_trap
3811  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3812              "llvm.nvvm.sust.b.1d.i8.trap">,
3813    ClangBuiltin<"__nvvm_sust_b_1d_i8_trap">;
3814def int_nvvm_sust_b_1d_i16_trap
3815  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3816              "llvm.nvvm.sust.b.1d.i16.trap">,
3817    ClangBuiltin<"__nvvm_sust_b_1d_i16_trap">;
3818def int_nvvm_sust_b_1d_i32_trap
3819  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3820              "llvm.nvvm.sust.b.1d.i32.trap">,
3821    ClangBuiltin<"__nvvm_sust_b_1d_i32_trap">;
3822def int_nvvm_sust_b_1d_i64_trap
3823  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
3824              "llvm.nvvm.sust.b.1d.i64.trap">,
3825    ClangBuiltin<"__nvvm_sust_b_1d_i64_trap">;
3826def int_nvvm_sust_b_1d_v2i8_trap
3827  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3828              "llvm.nvvm.sust.b.1d.v2i8.trap">,
3829    ClangBuiltin<"__nvvm_sust_b_1d_v2i8_trap">;
3830def int_nvvm_sust_b_1d_v2i16_trap
3831  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3832              "llvm.nvvm.sust.b.1d.v2i16.trap">,
3833    ClangBuiltin<"__nvvm_sust_b_1d_v2i16_trap">;
3834def int_nvvm_sust_b_1d_v2i32_trap
3835  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3836              "llvm.nvvm.sust.b.1d.v2i32.trap">,
3837    ClangBuiltin<"__nvvm_sust_b_1d_v2i32_trap">;
3838def int_nvvm_sust_b_1d_v2i64_trap
3839  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
3840              "llvm.nvvm.sust.b.1d.v2i64.trap">,
3841    ClangBuiltin<"__nvvm_sust_b_1d_v2i64_trap">;
3842def int_nvvm_sust_b_1d_v4i8_trap
3843  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3844                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3845              "llvm.nvvm.sust.b.1d.v4i8.trap">,
3846    ClangBuiltin<"__nvvm_sust_b_1d_v4i8_trap">;
3847def int_nvvm_sust_b_1d_v4i16_trap
3848  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3849                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3850              "llvm.nvvm.sust.b.1d.v4i16.trap">,
3851    ClangBuiltin<"__nvvm_sust_b_1d_v4i16_trap">;
3852def int_nvvm_sust_b_1d_v4i32_trap
3853  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3854                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3855              "llvm.nvvm.sust.b.1d.v4i32.trap">,
3856    ClangBuiltin<"__nvvm_sust_b_1d_v4i32_trap">;
3857
3858
3859def int_nvvm_sust_b_1d_array_i8_trap
3860  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3861              "llvm.nvvm.sust.b.1d.array.i8.trap">,
3862    ClangBuiltin<"__nvvm_sust_b_1d_array_i8_trap">;
3863def int_nvvm_sust_b_1d_array_i16_trap
3864  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3865              "llvm.nvvm.sust.b.1d.array.i16.trap">,
3866    ClangBuiltin<"__nvvm_sust_b_1d_array_i16_trap">;
3867def int_nvvm_sust_b_1d_array_i32_trap
3868  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3869              "llvm.nvvm.sust.b.1d.array.i32.trap">,
3870    ClangBuiltin<"__nvvm_sust_b_1d_array_i32_trap">;
3871def int_nvvm_sust_b_1d_array_i64_trap
3872  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3873              "llvm.nvvm.sust.b.1d.array.i64.trap">,
3874    ClangBuiltin<"__nvvm_sust_b_1d_array_i64_trap">;
3875def int_nvvm_sust_b_1d_array_v2i8_trap
3876  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3877                   llvm_i16_ty, llvm_i16_ty], [],
3878              "llvm.nvvm.sust.b.1d.array.v2i8.trap">,
3879    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">;
3880def int_nvvm_sust_b_1d_array_v2i16_trap
3881  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3882                   llvm_i16_ty, llvm_i16_ty], [],
3883              "llvm.nvvm.sust.b.1d.array.v2i16.trap">,
3884    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">;
3885def int_nvvm_sust_b_1d_array_v2i32_trap
3886  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3887                   llvm_i32_ty, llvm_i32_ty], [],
3888              "llvm.nvvm.sust.b.1d.array.v2i32.trap">,
3889    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">;
3890def int_nvvm_sust_b_1d_array_v2i64_trap
3891  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3892                   llvm_i64_ty, llvm_i64_ty], [],
3893              "llvm.nvvm.sust.b.1d.array.v2i64.trap">,
3894    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">;
3895def int_nvvm_sust_b_1d_array_v4i8_trap
3896  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3897                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3898              "llvm.nvvm.sust.b.1d.array.v4i8.trap">,
3899    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">;
3900def int_nvvm_sust_b_1d_array_v4i16_trap
3901  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3902                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3903              "llvm.nvvm.sust.b.1d.array.v4i16.trap">,
3904    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">;
3905def int_nvvm_sust_b_1d_array_v4i32_trap
3906  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3907                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3908              "llvm.nvvm.sust.b.1d.array.v4i32.trap">,
3909    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">;
3910
3911
3912def int_nvvm_sust_b_2d_i8_trap
3913  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3914              "llvm.nvvm.sust.b.2d.i8.trap">,
3915    ClangBuiltin<"__nvvm_sust_b_2d_i8_trap">;
3916def int_nvvm_sust_b_2d_i16_trap
3917  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3918              "llvm.nvvm.sust.b.2d.i16.trap">,
3919    ClangBuiltin<"__nvvm_sust_b_2d_i16_trap">;
3920def int_nvvm_sust_b_2d_i32_trap
3921  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3922              "llvm.nvvm.sust.b.2d.i32.trap">,
3923    ClangBuiltin<"__nvvm_sust_b_2d_i32_trap">;
3924def int_nvvm_sust_b_2d_i64_trap
3925  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3926              "llvm.nvvm.sust.b.2d.i64.trap">,
3927    ClangBuiltin<"__nvvm_sust_b_2d_i64_trap">;
3928def int_nvvm_sust_b_2d_v2i8_trap
3929  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3930                   llvm_i16_ty, llvm_i16_ty], [],
3931              "llvm.nvvm.sust.b.2d.v2i8.trap">,
3932    ClangBuiltin<"__nvvm_sust_b_2d_v2i8_trap">;
3933def int_nvvm_sust_b_2d_v2i16_trap
3934  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3935                   llvm_i16_ty, llvm_i16_ty], [],
3936              "llvm.nvvm.sust.b.2d.v2i16.trap">,
3937    ClangBuiltin<"__nvvm_sust_b_2d_v2i16_trap">;
3938def int_nvvm_sust_b_2d_v2i32_trap
3939  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3940                   llvm_i32_ty, llvm_i32_ty], [],
3941              "llvm.nvvm.sust.b.2d.v2i32.trap">,
3942    ClangBuiltin<"__nvvm_sust_b_2d_v2i32_trap">;
3943def int_nvvm_sust_b_2d_v2i64_trap
3944  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3945                   llvm_i64_ty, llvm_i64_ty], [],
3946              "llvm.nvvm.sust.b.2d.v2i64.trap">,
3947    ClangBuiltin<"__nvvm_sust_b_2d_v2i64_trap">;
3948def int_nvvm_sust_b_2d_v4i8_trap
3949  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3950                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3951              "llvm.nvvm.sust.b.2d.v4i8.trap">,
3952    ClangBuiltin<"__nvvm_sust_b_2d_v4i8_trap">;
3953def int_nvvm_sust_b_2d_v4i16_trap
3954  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3955                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3956              "llvm.nvvm.sust.b.2d.v4i16.trap">,
3957    ClangBuiltin<"__nvvm_sust_b_2d_v4i16_trap">;
3958def int_nvvm_sust_b_2d_v4i32_trap
3959  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3960                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3961              "llvm.nvvm.sust.b.2d.v4i32.trap">,
3962    ClangBuiltin<"__nvvm_sust_b_2d_v4i32_trap">;
3963
3964
3965def int_nvvm_sust_b_2d_array_i8_trap
3966  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3967                   llvm_i32_ty, llvm_i16_ty], [],
3968              "llvm.nvvm.sust.b.2d.array.i8.trap">,
3969    ClangBuiltin<"__nvvm_sust_b_2d_array_i8_trap">;
3970def int_nvvm_sust_b_2d_array_i16_trap
3971  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3972                   llvm_i32_ty, llvm_i16_ty], [],
3973              "llvm.nvvm.sust.b.2d.array.i16.trap">,
3974    ClangBuiltin<"__nvvm_sust_b_2d_array_i16_trap">;
3975def int_nvvm_sust_b_2d_array_i32_trap
3976  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3977                   llvm_i32_ty, llvm_i32_ty], [],
3978              "llvm.nvvm.sust.b.2d.array.i32.trap">,
3979    ClangBuiltin<"__nvvm_sust_b_2d_array_i32_trap">;
3980def int_nvvm_sust_b_2d_array_i64_trap
3981  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3982                   llvm_i32_ty, llvm_i64_ty], [],
3983              "llvm.nvvm.sust.b.2d.array.i64.trap">,
3984    ClangBuiltin<"__nvvm_sust_b_2d_array_i64_trap">;
3985def int_nvvm_sust_b_2d_array_v2i8_trap
3986  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3987                   llvm_i16_ty, llvm_i16_ty], [],
3988              "llvm.nvvm.sust.b.2d.array.v2i8.trap">,
3989    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">;
3990def int_nvvm_sust_b_2d_array_v2i16_trap
3991  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3992                   llvm_i16_ty, llvm_i16_ty], [],
3993              "llvm.nvvm.sust.b.2d.array.v2i16.trap">,
3994    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">;
3995def int_nvvm_sust_b_2d_array_v2i32_trap
3996  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3997                   llvm_i32_ty, llvm_i32_ty], [],
3998              "llvm.nvvm.sust.b.2d.array.v2i32.trap">,
3999    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">;
4000def int_nvvm_sust_b_2d_array_v2i64_trap
4001  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4002                   llvm_i64_ty, llvm_i64_ty], [],
4003              "llvm.nvvm.sust.b.2d.array.v2i64.trap">,
4004    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">;
4005def int_nvvm_sust_b_2d_array_v4i8_trap
4006  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4007                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4008              "llvm.nvvm.sust.b.2d.array.v4i8.trap">,
4009    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">;
4010def int_nvvm_sust_b_2d_array_v4i16_trap
4011  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4012                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4013              "llvm.nvvm.sust.b.2d.array.v4i16.trap">,
4014    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">;
4015def int_nvvm_sust_b_2d_array_v4i32_trap
4016  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4017                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4018              "llvm.nvvm.sust.b.2d.array.v4i32.trap">,
4019    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">;
4020
4021
4022def int_nvvm_sust_b_3d_i8_trap
4023  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4024                   llvm_i32_ty, llvm_i16_ty], [],
4025              "llvm.nvvm.sust.b.3d.i8.trap">,
4026    ClangBuiltin<"__nvvm_sust_b_3d_i8_trap">;
4027def int_nvvm_sust_b_3d_i16_trap
4028  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4029                   llvm_i32_ty, llvm_i16_ty], [],
4030              "llvm.nvvm.sust.b.3d.i16.trap">,
4031    ClangBuiltin<"__nvvm_sust_b_3d_i16_trap">;
4032def int_nvvm_sust_b_3d_i32_trap
4033  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4034                   llvm_i32_ty, llvm_i32_ty], [],
4035              "llvm.nvvm.sust.b.3d.i32.trap">,
4036    ClangBuiltin<"__nvvm_sust_b_3d_i32_trap">;
4037def int_nvvm_sust_b_3d_i64_trap
4038  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4039                   llvm_i32_ty, llvm_i64_ty], [],
4040              "llvm.nvvm.sust.b.3d.i64.trap">,
4041    ClangBuiltin<"__nvvm_sust_b_3d_i64_trap">;
4042def int_nvvm_sust_b_3d_v2i8_trap
4043  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4044                   llvm_i16_ty, llvm_i16_ty], [],
4045              "llvm.nvvm.sust.b.3d.v2i8.trap">,
4046    ClangBuiltin<"__nvvm_sust_b_3d_v2i8_trap">;
4047def int_nvvm_sust_b_3d_v2i16_trap
4048  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4049                   llvm_i16_ty, llvm_i16_ty], [],
4050              "llvm.nvvm.sust.b.3d.v2i16.trap">,
4051    ClangBuiltin<"__nvvm_sust_b_3d_v2i16_trap">;
4052def int_nvvm_sust_b_3d_v2i32_trap
4053  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4054                   llvm_i32_ty, llvm_i32_ty], [],
4055              "llvm.nvvm.sust.b.3d.v2i32.trap">,
4056    ClangBuiltin<"__nvvm_sust_b_3d_v2i32_trap">;
4057def int_nvvm_sust_b_3d_v2i64_trap
4058  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4059                   llvm_i64_ty, llvm_i64_ty], [],
4060              "llvm.nvvm.sust.b.3d.v2i64.trap">,
4061    ClangBuiltin<"__nvvm_sust_b_3d_v2i64_trap">;
4062def int_nvvm_sust_b_3d_v4i8_trap
4063  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4064                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4065              "llvm.nvvm.sust.b.3d.v4i8.trap">,
4066    ClangBuiltin<"__nvvm_sust_b_3d_v4i8_trap">;
4067def int_nvvm_sust_b_3d_v4i16_trap
4068  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4069                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4070              "llvm.nvvm.sust.b.3d.v4i16.trap">,
4071    ClangBuiltin<"__nvvm_sust_b_3d_v4i16_trap">;
4072def int_nvvm_sust_b_3d_v4i32_trap
4073  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4074                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4075              "llvm.nvvm.sust.b.3d.v4i32.trap">,
4076    ClangBuiltin<"__nvvm_sust_b_3d_v4i32_trap">;
4077
4078
4079// .zero variant
4080def int_nvvm_sust_b_1d_i8_zero
4081  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4082              "llvm.nvvm.sust.b.1d.i8.zero">,
4083    ClangBuiltin<"__nvvm_sust_b_1d_i8_zero">;
4084def int_nvvm_sust_b_1d_i16_zero
4085  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4086              "llvm.nvvm.sust.b.1d.i16.zero">,
4087    ClangBuiltin<"__nvvm_sust_b_1d_i16_zero">;
4088def int_nvvm_sust_b_1d_i32_zero
4089  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
4090              "llvm.nvvm.sust.b.1d.i32.zero">,
4091    ClangBuiltin<"__nvvm_sust_b_1d_i32_zero">;
4092def int_nvvm_sust_b_1d_i64_zero
4093  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
4094              "llvm.nvvm.sust.b.1d.i64.zero">,
4095    ClangBuiltin<"__nvvm_sust_b_1d_i64_zero">;
4096def int_nvvm_sust_b_1d_v2i8_zero
4097  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4098              "llvm.nvvm.sust.b.1d.v2i8.zero">,
4099    ClangBuiltin<"__nvvm_sust_b_1d_v2i8_zero">;
4100def int_nvvm_sust_b_1d_v2i16_zero
4101  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4102              "llvm.nvvm.sust.b.1d.v2i16.zero">,
4103    ClangBuiltin<"__nvvm_sust_b_1d_v2i16_zero">;
4104def int_nvvm_sust_b_1d_v2i32_zero
4105  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4106              "llvm.nvvm.sust.b.1d.v2i32.zero">,
4107    ClangBuiltin<"__nvvm_sust_b_1d_v2i32_zero">;
4108def int_nvvm_sust_b_1d_v2i64_zero
4109  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
4110              "llvm.nvvm.sust.b.1d.v2i64.zero">,
4111    ClangBuiltin<"__nvvm_sust_b_1d_v2i64_zero">;
4112def int_nvvm_sust_b_1d_v4i8_zero
4113  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4114                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4115              "llvm.nvvm.sust.b.1d.v4i8.zero">,
4116    ClangBuiltin<"__nvvm_sust_b_1d_v4i8_zero">;
4117def int_nvvm_sust_b_1d_v4i16_zero
4118  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4119                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4120              "llvm.nvvm.sust.b.1d.v4i16.zero">,
4121    ClangBuiltin<"__nvvm_sust_b_1d_v4i16_zero">;
4122def int_nvvm_sust_b_1d_v4i32_zero
4123  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4124                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4125              "llvm.nvvm.sust.b.1d.v4i32.zero">,
4126    ClangBuiltin<"__nvvm_sust_b_1d_v4i32_zero">;
4127
4128
4129def int_nvvm_sust_b_1d_array_i8_zero
4130  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4131              "llvm.nvvm.sust.b.1d.array.i8.zero">,
4132    ClangBuiltin<"__nvvm_sust_b_1d_array_i8_zero">;
4133def int_nvvm_sust_b_1d_array_i16_zero
4134  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4135              "llvm.nvvm.sust.b.1d.array.i16.zero">,
4136    ClangBuiltin<"__nvvm_sust_b_1d_array_i16_zero">;
4137def int_nvvm_sust_b_1d_array_i32_zero
4138  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4139              "llvm.nvvm.sust.b.1d.array.i32.zero">,
4140    ClangBuiltin<"__nvvm_sust_b_1d_array_i32_zero">;
4141def int_nvvm_sust_b_1d_array_i64_zero
4142  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
4143              "llvm.nvvm.sust.b.1d.array.i64.zero">,
4144    ClangBuiltin<"__nvvm_sust_b_1d_array_i64_zero">;
4145def int_nvvm_sust_b_1d_array_v2i8_zero
4146  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4147                   llvm_i16_ty, llvm_i16_ty], [],
4148              "llvm.nvvm.sust.b.1d.array.v2i8.zero">,
4149    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">;
4150def int_nvvm_sust_b_1d_array_v2i16_zero
4151  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4152                   llvm_i16_ty, llvm_i16_ty], [],
4153              "llvm.nvvm.sust.b.1d.array.v2i16.zero">,
4154    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">;
4155def int_nvvm_sust_b_1d_array_v2i32_zero
4156  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4157                   llvm_i32_ty, llvm_i32_ty], [],
4158              "llvm.nvvm.sust.b.1d.array.v2i32.zero">,
4159    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">;
4160def int_nvvm_sust_b_1d_array_v2i64_zero
4161  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4162                   llvm_i64_ty, llvm_i64_ty], [],
4163              "llvm.nvvm.sust.b.1d.array.v2i64.zero">,
4164    ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">;
4165def int_nvvm_sust_b_1d_array_v4i8_zero
4166  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4167                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4168              "llvm.nvvm.sust.b.1d.array.v4i8.zero">,
4169    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">;
4170def int_nvvm_sust_b_1d_array_v4i16_zero
4171  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4172                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4173              "llvm.nvvm.sust.b.1d.array.v4i16.zero">,
4174    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">;
4175def int_nvvm_sust_b_1d_array_v4i32_zero
4176  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4177                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4178              "llvm.nvvm.sust.b.1d.array.v4i32.zero">,
4179    ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">;
4180
4181
4182def int_nvvm_sust_b_2d_i8_zero
4183  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4184              "llvm.nvvm.sust.b.2d.i8.zero">,
4185    ClangBuiltin<"__nvvm_sust_b_2d_i8_zero">;
4186def int_nvvm_sust_b_2d_i16_zero
4187  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4188              "llvm.nvvm.sust.b.2d.i16.zero">,
4189    ClangBuiltin<"__nvvm_sust_b_2d_i16_zero">;
4190def int_nvvm_sust_b_2d_i32_zero
4191  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4192              "llvm.nvvm.sust.b.2d.i32.zero">,
4193    ClangBuiltin<"__nvvm_sust_b_2d_i32_zero">;
4194def int_nvvm_sust_b_2d_i64_zero
4195  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
4196              "llvm.nvvm.sust.b.2d.i64.zero">,
4197    ClangBuiltin<"__nvvm_sust_b_2d_i64_zero">;
4198def int_nvvm_sust_b_2d_v2i8_zero
4199  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4200                   llvm_i16_ty, llvm_i16_ty], [],
4201              "llvm.nvvm.sust.b.2d.v2i8.zero">,
4202    ClangBuiltin<"__nvvm_sust_b_2d_v2i8_zero">;
4203def int_nvvm_sust_b_2d_v2i16_zero
4204  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4205                   llvm_i16_ty, llvm_i16_ty], [],
4206              "llvm.nvvm.sust.b.2d.v2i16.zero">,
4207    ClangBuiltin<"__nvvm_sust_b_2d_v2i16_zero">;
4208def int_nvvm_sust_b_2d_v2i32_zero
4209  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4210                   llvm_i32_ty, llvm_i32_ty], [],
4211              "llvm.nvvm.sust.b.2d.v2i32.zero">,
4212    ClangBuiltin<"__nvvm_sust_b_2d_v2i32_zero">;
4213def int_nvvm_sust_b_2d_v2i64_zero
4214  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4215                   llvm_i64_ty, llvm_i64_ty], [],
4216              "llvm.nvvm.sust.b.2d.v2i64.zero">,
4217    ClangBuiltin<"__nvvm_sust_b_2d_v2i64_zero">;
4218def int_nvvm_sust_b_2d_v4i8_zero
4219  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4220                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4221              "llvm.nvvm.sust.b.2d.v4i8.zero">,
4222    ClangBuiltin<"__nvvm_sust_b_2d_v4i8_zero">;
4223def int_nvvm_sust_b_2d_v4i16_zero
4224  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4225                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4226              "llvm.nvvm.sust.b.2d.v4i16.zero">,
4227    ClangBuiltin<"__nvvm_sust_b_2d_v4i16_zero">;
4228def int_nvvm_sust_b_2d_v4i32_zero
4229  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4230                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4231              "llvm.nvvm.sust.b.2d.v4i32.zero">,
4232    ClangBuiltin<"__nvvm_sust_b_2d_v4i32_zero">;
4233
4234
4235def int_nvvm_sust_b_2d_array_i8_zero
4236  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4237                   llvm_i32_ty, llvm_i16_ty], [],
4238              "llvm.nvvm.sust.b.2d.array.i8.zero">,
4239    ClangBuiltin<"__nvvm_sust_b_2d_array_i8_zero">;
4240def int_nvvm_sust_b_2d_array_i16_zero
4241  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4242                   llvm_i32_ty, llvm_i16_ty], [],
4243              "llvm.nvvm.sust.b.2d.array.i16.zero">,
4244    ClangBuiltin<"__nvvm_sust_b_2d_array_i16_zero">;
4245def int_nvvm_sust_b_2d_array_i32_zero
4246  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4247                   llvm_i32_ty, llvm_i32_ty], [],
4248              "llvm.nvvm.sust.b.2d.array.i32.zero">,
4249    ClangBuiltin<"__nvvm_sust_b_2d_array_i32_zero">;
4250def int_nvvm_sust_b_2d_array_i64_zero
4251  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4252                   llvm_i32_ty, llvm_i64_ty], [],
4253              "llvm.nvvm.sust.b.2d.array.i64.zero">,
4254    ClangBuiltin<"__nvvm_sust_b_2d_array_i64_zero">;
4255def int_nvvm_sust_b_2d_array_v2i8_zero
4256  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4257                   llvm_i16_ty, llvm_i16_ty], [],
4258              "llvm.nvvm.sust.b.2d.array.v2i8.zero">,
4259    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">;
4260def int_nvvm_sust_b_2d_array_v2i16_zero
4261  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4262                   llvm_i16_ty, llvm_i16_ty], [],
4263              "llvm.nvvm.sust.b.2d.array.v2i16.zero">,
4264    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">;
4265def int_nvvm_sust_b_2d_array_v2i32_zero
4266  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4267                   llvm_i32_ty, llvm_i32_ty], [],
4268              "llvm.nvvm.sust.b.2d.array.v2i32.zero">,
4269    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">;
4270def int_nvvm_sust_b_2d_array_v2i64_zero
4271  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4272                   llvm_i64_ty, llvm_i64_ty], [],
4273              "llvm.nvvm.sust.b.2d.array.v2i64.zero">,
4274    ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">;
4275def int_nvvm_sust_b_2d_array_v4i8_zero
4276  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4277                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4278              "llvm.nvvm.sust.b.2d.array.v4i8.zero">,
4279    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">;
4280def int_nvvm_sust_b_2d_array_v4i16_zero
4281  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4282                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4283              "llvm.nvvm.sust.b.2d.array.v4i16.zero">,
4284    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">;
4285def int_nvvm_sust_b_2d_array_v4i32_zero
4286  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4287                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4288              "llvm.nvvm.sust.b.2d.array.v4i32.zero">,
4289    ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">;
4290
4291
4292def int_nvvm_sust_b_3d_i8_zero
4293  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4294                   llvm_i32_ty, llvm_i16_ty], [],
4295              "llvm.nvvm.sust.b.3d.i8.zero">,
4296    ClangBuiltin<"__nvvm_sust_b_3d_i8_zero">;
4297def int_nvvm_sust_b_3d_i16_zero
4298  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4299                   llvm_i32_ty, llvm_i16_ty], [],
4300              "llvm.nvvm.sust.b.3d.i16.zero">,
4301    ClangBuiltin<"__nvvm_sust_b_3d_i16_zero">;
4302def int_nvvm_sust_b_3d_i32_zero
4303  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4304                   llvm_i32_ty, llvm_i32_ty], [],
4305              "llvm.nvvm.sust.b.3d.i32.zero">,
4306    ClangBuiltin<"__nvvm_sust_b_3d_i32_zero">;
4307def int_nvvm_sust_b_3d_i64_zero
4308  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4309                   llvm_i32_ty, llvm_i64_ty], [],
4310              "llvm.nvvm.sust.b.3d.i64.zero">,
4311    ClangBuiltin<"__nvvm_sust_b_3d_i64_zero">;
4312def int_nvvm_sust_b_3d_v2i8_zero
4313  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4314                   llvm_i16_ty, llvm_i16_ty], [],
4315              "llvm.nvvm.sust.b.3d.v2i8.zero">,
4316    ClangBuiltin<"__nvvm_sust_b_3d_v2i8_zero">;
4317def int_nvvm_sust_b_3d_v2i16_zero
4318  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4319                   llvm_i16_ty, llvm_i16_ty], [],
4320              "llvm.nvvm.sust.b.3d.v2i16.zero">,
4321    ClangBuiltin<"__nvvm_sust_b_3d_v2i16_zero">;
4322def int_nvvm_sust_b_3d_v2i32_zero
4323  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4324                   llvm_i32_ty, llvm_i32_ty], [],
4325              "llvm.nvvm.sust.b.3d.v2i32.zero">,
4326    ClangBuiltin<"__nvvm_sust_b_3d_v2i32_zero">;
4327def int_nvvm_sust_b_3d_v2i64_zero
4328  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4329                   llvm_i64_ty, llvm_i64_ty], [],
4330              "llvm.nvvm.sust.b.3d.v2i64.zero">,
4331    ClangBuiltin<"__nvvm_sust_b_3d_v2i64_zero">;
4332def int_nvvm_sust_b_3d_v4i8_zero
4333  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4334                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4335              "llvm.nvvm.sust.b.3d.v4i8.zero">,
4336    ClangBuiltin<"__nvvm_sust_b_3d_v4i8_zero">;
4337def int_nvvm_sust_b_3d_v4i16_zero
4338  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4339                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4340              "llvm.nvvm.sust.b.3d.v4i16.zero">,
4341    ClangBuiltin<"__nvvm_sust_b_3d_v4i16_zero">;
4342def int_nvvm_sust_b_3d_v4i32_zero
4343  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4344                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4345              "llvm.nvvm.sust.b.3d.v4i32.zero">,
4346    ClangBuiltin<"__nvvm_sust_b_3d_v4i32_zero">;
4347
4348
4349
4350// Formatted
4351
4352def int_nvvm_sust_p_1d_i8_trap
4353  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4354              "llvm.nvvm.sust.p.1d.i8.trap">,
4355    ClangBuiltin<"__nvvm_sust_p_1d_i8_trap">;
4356def int_nvvm_sust_p_1d_i16_trap
4357  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4358              "llvm.nvvm.sust.p.1d.i16.trap">,
4359    ClangBuiltin<"__nvvm_sust_p_1d_i16_trap">;
4360def int_nvvm_sust_p_1d_i32_trap
4361  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
4362              "llvm.nvvm.sust.p.1d.i32.trap">,
4363    ClangBuiltin<"__nvvm_sust_p_1d_i32_trap">;
4364def int_nvvm_sust_p_1d_v2i8_trap
4365  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4366              "llvm.nvvm.sust.p.1d.v2i8.trap">,
4367    ClangBuiltin<"__nvvm_sust_p_1d_v2i8_trap">;
4368def int_nvvm_sust_p_1d_v2i16_trap
4369  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4370              "llvm.nvvm.sust.p.1d.v2i16.trap">,
4371    ClangBuiltin<"__nvvm_sust_p_1d_v2i16_trap">;
4372def int_nvvm_sust_p_1d_v2i32_trap
4373  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4374              "llvm.nvvm.sust.p.1d.v2i32.trap">,
4375    ClangBuiltin<"__nvvm_sust_p_1d_v2i32_trap">;
4376def int_nvvm_sust_p_1d_v4i8_trap
4377  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4378                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4379              "llvm.nvvm.sust.p.1d.v4i8.trap">,
4380    ClangBuiltin<"__nvvm_sust_p_1d_v4i8_trap">;
4381def int_nvvm_sust_p_1d_v4i16_trap
4382  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4383                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4384              "llvm.nvvm.sust.p.1d.v4i16.trap">,
4385    ClangBuiltin<"__nvvm_sust_p_1d_v4i16_trap">;
4386def int_nvvm_sust_p_1d_v4i32_trap
4387  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4388                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4389              "llvm.nvvm.sust.p.1d.v4i32.trap">,
4390    ClangBuiltin<"__nvvm_sust_p_1d_v4i32_trap">;
4391
4392
4393def int_nvvm_sust_p_1d_array_i8_trap
4394  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4395              "llvm.nvvm.sust.p.1d.array.i8.trap">,
4396    ClangBuiltin<"__nvvm_sust_p_1d_array_i8_trap">;
4397def int_nvvm_sust_p_1d_array_i16_trap
4398  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4399              "llvm.nvvm.sust.p.1d.array.i16.trap">,
4400    ClangBuiltin<"__nvvm_sust_p_1d_array_i16_trap">;
4401def int_nvvm_sust_p_1d_array_i32_trap
4402  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4403              "llvm.nvvm.sust.p.1d.array.i32.trap">,
4404    ClangBuiltin<"__nvvm_sust_p_1d_array_i32_trap">;
4405def int_nvvm_sust_p_1d_array_v2i8_trap
4406  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4407                   llvm_i16_ty, llvm_i16_ty], [],
4408              "llvm.nvvm.sust.p.1d.array.v2i8.trap">,
4409    ClangBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">;
4410def int_nvvm_sust_p_1d_array_v2i16_trap
4411  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4412                   llvm_i16_ty, llvm_i16_ty], [],
4413              "llvm.nvvm.sust.p.1d.array.v2i16.trap">,
4414    ClangBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">;
4415def int_nvvm_sust_p_1d_array_v2i32_trap
4416  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4417                   llvm_i32_ty, llvm_i32_ty], [],
4418              "llvm.nvvm.sust.p.1d.array.v2i32.trap">,
4419    ClangBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">;
4420def int_nvvm_sust_p_1d_array_v4i8_trap
4421  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4422                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4423              "llvm.nvvm.sust.p.1d.array.v4i8.trap">,
4424    ClangBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">;
4425def int_nvvm_sust_p_1d_array_v4i16_trap
4426  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4427                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4428              "llvm.nvvm.sust.p.1d.array.v4i16.trap">,
4429    ClangBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">;
4430def int_nvvm_sust_p_1d_array_v4i32_trap
4431  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4432                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4433              "llvm.nvvm.sust.p.1d.array.v4i32.trap">,
4434    ClangBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">;
4435
4436
4437def int_nvvm_sust_p_2d_i8_trap
4438  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4439              "llvm.nvvm.sust.p.2d.i8.trap">,
4440    ClangBuiltin<"__nvvm_sust_p_2d_i8_trap">;
4441def int_nvvm_sust_p_2d_i16_trap
4442  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4443              "llvm.nvvm.sust.p.2d.i16.trap">,
4444    ClangBuiltin<"__nvvm_sust_p_2d_i16_trap">;
4445def int_nvvm_sust_p_2d_i32_trap
4446  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4447              "llvm.nvvm.sust.p.2d.i32.trap">,
4448    ClangBuiltin<"__nvvm_sust_p_2d_i32_trap">;
4449def int_nvvm_sust_p_2d_v2i8_trap
4450  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4451                   llvm_i16_ty, llvm_i16_ty], [],
4452              "llvm.nvvm.sust.p.2d.v2i8.trap">,
4453    ClangBuiltin<"__nvvm_sust_p_2d_v2i8_trap">;
4454def int_nvvm_sust_p_2d_v2i16_trap
4455  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4456                   llvm_i16_ty, llvm_i16_ty], [],
4457              "llvm.nvvm.sust.p.2d.v2i16.trap">,
4458    ClangBuiltin<"__nvvm_sust_p_2d_v2i16_trap">;
4459def int_nvvm_sust_p_2d_v2i32_trap
4460  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4461                   llvm_i32_ty, llvm_i32_ty], [],
4462              "llvm.nvvm.sust.p.2d.v2i32.trap">,
4463    ClangBuiltin<"__nvvm_sust_p_2d_v2i32_trap">;
4464def int_nvvm_sust_p_2d_v4i8_trap
4465  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4466                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4467              "llvm.nvvm.sust.p.2d.v4i8.trap">,
4468    ClangBuiltin<"__nvvm_sust_p_2d_v4i8_trap">;
4469def int_nvvm_sust_p_2d_v4i16_trap
4470  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4471                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4472              "llvm.nvvm.sust.p.2d.v4i16.trap">,
4473    ClangBuiltin<"__nvvm_sust_p_2d_v4i16_trap">;
4474def int_nvvm_sust_p_2d_v4i32_trap
4475  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4476                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4477              "llvm.nvvm.sust.p.2d.v4i32.trap">,
4478    ClangBuiltin<"__nvvm_sust_p_2d_v4i32_trap">;
4479
4480
4481def int_nvvm_sust_p_2d_array_i8_trap
4482  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4483                   llvm_i32_ty, llvm_i16_ty], [],
4484              "llvm.nvvm.sust.p.2d.array.i8.trap">,
4485    ClangBuiltin<"__nvvm_sust_p_2d_array_i8_trap">;
4486def int_nvvm_sust_p_2d_array_i16_trap
4487  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4488                   llvm_i32_ty, llvm_i16_ty], [],
4489              "llvm.nvvm.sust.p.2d.array.i16.trap">,
4490    ClangBuiltin<"__nvvm_sust_p_2d_array_i16_trap">;
4491def int_nvvm_sust_p_2d_array_i32_trap
4492  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4493                   llvm_i32_ty, llvm_i32_ty], [],
4494              "llvm.nvvm.sust.p.2d.array.i32.trap">,
4495    ClangBuiltin<"__nvvm_sust_p_2d_array_i32_trap">;
4496def int_nvvm_sust_p_2d_array_v2i8_trap
4497  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4498                   llvm_i16_ty, llvm_i16_ty], [],
4499              "llvm.nvvm.sust.p.2d.array.v2i8.trap">,
4500    ClangBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">;
4501def int_nvvm_sust_p_2d_array_v2i16_trap
4502  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4503                   llvm_i16_ty, llvm_i16_ty], [],
4504              "llvm.nvvm.sust.p.2d.array.v2i16.trap">,
4505    ClangBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">;
4506def int_nvvm_sust_p_2d_array_v2i32_trap
4507  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4508                   llvm_i32_ty, llvm_i32_ty], [],
4509              "llvm.nvvm.sust.p.2d.array.v2i32.trap">,
4510    ClangBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">;
4511def int_nvvm_sust_p_2d_array_v4i8_trap
4512  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4513                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4514              "llvm.nvvm.sust.p.2d.array.v4i8.trap">,
4515    ClangBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">;
4516def int_nvvm_sust_p_2d_array_v4i16_trap
4517  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4518                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4519              "llvm.nvvm.sust.p.2d.array.v4i16.trap">,
4520    ClangBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">;
4521def int_nvvm_sust_p_2d_array_v4i32_trap
4522  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4523                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4524              "llvm.nvvm.sust.p.2d.array.v4i32.trap">,
4525    ClangBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">;
4526
4527
4528def int_nvvm_sust_p_3d_i8_trap
4529  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4530                   llvm_i32_ty, llvm_i16_ty], [],
4531              "llvm.nvvm.sust.p.3d.i8.trap">,
4532    ClangBuiltin<"__nvvm_sust_p_3d_i8_trap">;
4533def int_nvvm_sust_p_3d_i16_trap
4534  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4535                   llvm_i32_ty, llvm_i16_ty], [],
4536              "llvm.nvvm.sust.p.3d.i16.trap">,
4537    ClangBuiltin<"__nvvm_sust_p_3d_i16_trap">;
4538def int_nvvm_sust_p_3d_i32_trap
4539  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4540                   llvm_i32_ty, llvm_i32_ty], [],
4541              "llvm.nvvm.sust.p.3d.i32.trap">,
4542    ClangBuiltin<"__nvvm_sust_p_3d_i32_trap">;
4543def int_nvvm_sust_p_3d_v2i8_trap
4544  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4545                   llvm_i16_ty, llvm_i16_ty], [],
4546              "llvm.nvvm.sust.p.3d.v2i8.trap">,
4547    ClangBuiltin<"__nvvm_sust_p_3d_v2i8_trap">;
4548def int_nvvm_sust_p_3d_v2i16_trap
4549  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4550                   llvm_i16_ty, llvm_i16_ty], [],
4551              "llvm.nvvm.sust.p.3d.v2i16.trap">,
4552    ClangBuiltin<"__nvvm_sust_p_3d_v2i16_trap">;
4553def int_nvvm_sust_p_3d_v2i32_trap
4554  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4555                   llvm_i32_ty, llvm_i32_ty], [],
4556              "llvm.nvvm.sust.p.3d.v2i32.trap">,
4557    ClangBuiltin<"__nvvm_sust_p_3d_v2i32_trap">;
4558def int_nvvm_sust_p_3d_v4i8_trap
4559  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4560                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4561              "llvm.nvvm.sust.p.3d.v4i8.trap">,
4562    ClangBuiltin<"__nvvm_sust_p_3d_v4i8_trap">;
4563def int_nvvm_sust_p_3d_v4i16_trap
4564  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4565                   llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4566              "llvm.nvvm.sust.p.3d.v4i16.trap">,
4567    ClangBuiltin<"__nvvm_sust_p_3d_v4i16_trap">;
4568def int_nvvm_sust_p_3d_v4i32_trap
4569  : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4570                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4571              "llvm.nvvm.sust.p.3d.v4i32.trap">,
4572    ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
4573
4574def int_nvvm_swap_lo_hi_b64
4575  : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
4576              [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
4577              ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
4578
4579
4580// Accessing special registers.
4581
4582class PTXReadSRegIntrinsicNB_r32
4583  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
4584class PTXReadSRegIntrinsic_r32<string name>
4585  : PTXReadSRegIntrinsicNB_r32, ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4586
4587multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
4588// FIXME: Do we need the 128-bit integer type version?
4589//    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem, IntrSpeculatable]>;
4590
4591// FIXME: Enable this once v4i32 support is enabled in back-end.
4592//    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
4593  foreach suffix = ["_x", "_y", "_z", "_w"] in
4594    def suffix : PTXReadSRegIntrinsic_r32<regname # suffix>;
4595}
4596
4597// Same, but without automatic clang builtins. It will be used for
4598// registers that require particular GPU or PTX version.
4599multiclass PTXReadSRegIntrinsicNB_v4i32 {
4600  foreach suffix = ["_x", "_y", "_z", "_w"] in
4601    def suffix : PTXReadSRegIntrinsicNB_r32;
4602}
4603
4604class PTXReadSRegIntrinsic_r64<string name>
4605  : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4606    ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4607
4608// Intrinsics to read registers with non-constant values. E.g. the values that
4609// do change over the kernel lifetime. Such reads should not be CSE'd.
4610class PTXReadNCSRegIntrinsic_r32<string name>
4611  : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
4612    ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4613class PTXReadNCSRegIntrinsic_r64<string name>
4614  : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
4615    ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4616
4617defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
4618defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
4619
4620def int_nvvm_read_ptx_sreg_laneid : PTXReadSRegIntrinsic_r32<"laneid">;
4621def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32<"warpid">;
4622def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32<"nwarpid">;
4623
4624defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<"ctaid">;
4625defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<"nctaid">;
4626
4627def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32<"smid">;
4628def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32<"nsmid">;
4629def int_nvvm_read_ptx_sreg_gridid : PTXReadSRegIntrinsic_r32<"gridid">;
4630
4631def int_nvvm_read_ptx_sreg_lanemask_eq :
4632    PTXReadSRegIntrinsic_r32<"lanemask_eq">;
4633def int_nvvm_read_ptx_sreg_lanemask_le :
4634    PTXReadSRegIntrinsic_r32<"lanemask_le">;
4635def int_nvvm_read_ptx_sreg_lanemask_lt :
4636    PTXReadSRegIntrinsic_r32<"lanemask_lt">;
4637def int_nvvm_read_ptx_sreg_lanemask_ge :
4638    PTXReadSRegIntrinsic_r32<"lanemask_ge">;
4639def int_nvvm_read_ptx_sreg_lanemask_gt :
4640    PTXReadSRegIntrinsic_r32<"lanemask_gt">;
4641
4642def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
4643def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
4644
4645def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">;
4646
4647def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
4648def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
4649def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
4650def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
4651
4652def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
4653
4654// sm90+, PTX7.8+
4655defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32;
4656defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32;
4657defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32;
4658defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32;
4659
4660def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
4661def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
4662
4663//
4664// SHUFFLE
4665//
4666// Generate intrinsics for all variants of shfl instruction.
4667foreach sync = [false, true] in {
4668  foreach mode = ["up", "down", "bfly", "idx"] in {
4669    foreach type = ["i32", "f32"] in {
4670      foreach return_pred = [false, true] in {
4671        foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in {
4672          if i.withGccBuiltin then {
4673            def i.Name : ClangBuiltin<i.Builtin>,
4674                         Intrinsic<i.RetTy, i.ArgsTy,
4675                                   [IntrInaccessibleMemOnly, IntrConvergent,
4676                                   IntrNoCallback],
4677                                   i.IntrName>;
4678          }
4679          if i.withoutGccBuiltin then {
4680            def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
4681                         [IntrInaccessibleMemOnly, IntrConvergent,
4682                         IntrNoCallback], i.IntrName>;
4683          }
4684        }
4685      }
4686    }
4687  }
4688}
4689
4690//
4691// VOTE
4692//
4693
4694// vote.all pred
4695def int_nvvm_vote_all :
4696  Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4697            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all">,
4698  ClangBuiltin<"__nvvm_vote_all">;
4699// vote.any pred
4700def int_nvvm_vote_any :
4701  Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4702            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any">,
4703  ClangBuiltin<"__nvvm_vote_any">;
4704// vote.uni pred
4705def int_nvvm_vote_uni :
4706  Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4707            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni">,
4708  ClangBuiltin<"__nvvm_vote_uni">;
4709// vote.ballot pred
4710def int_nvvm_vote_ballot :
4711  Intrinsic<[llvm_i32_ty], [llvm_i1_ty],
4712            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot">,
4713  ClangBuiltin<"__nvvm_vote_ballot">;
4714
4715//
4716// VOTE.SYNC
4717//
4718
4719// vote.sync.all mask, pred
4720def int_nvvm_vote_all_sync :
4721  Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4722            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all.sync">,
4723  ClangBuiltin<"__nvvm_vote_all_sync">;
4724// vote.sync.any mask, pred
4725def int_nvvm_vote_any_sync :
4726  Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4727            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any.sync">,
4728  ClangBuiltin<"__nvvm_vote_any_sync">;
4729// vote.sync.uni mask, pred
4730def int_nvvm_vote_uni_sync :
4731  Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4732            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni.sync">,
4733  ClangBuiltin<"__nvvm_vote_uni_sync">;
4734// vote.sync.ballot mask, pred
4735def int_nvvm_vote_ballot_sync :
4736  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty],
4737            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot.sync">,
4738  ClangBuiltin<"__nvvm_vote_ballot_sync">;
4739
4740//
4741// ACTIVEMASK
4742//
4743def int_nvvm_activemask :
4744  Intrinsic<[llvm_i32_ty], [],
4745            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback, IntrHasSideEffects], "llvm.nvvm.activemask">,
4746  ClangBuiltin<"__nvvm_activemask">;
4747
4748//
4749// MATCH.SYNC
4750//
4751// match.any.sync.b32 mask, value
4752def int_nvvm_match_any_sync_i32 :
4753  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4754            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i32">,
4755  ClangBuiltin<"__nvvm_match_any_sync_i32">;
4756// match.any.sync.b64 mask, value
4757def int_nvvm_match_any_sync_i64 :
4758  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
4759            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i64">,
4760  ClangBuiltin<"__nvvm_match_any_sync_i64">;
4761
4762// match.all instruction have two variants -- one returns a single value, another
4763// returns a pair {value, predicate}. We currently only implement the latter as
4764// that's the variant exposed by CUDA API.
4765
4766// match.all.sync.b32p mask, value
4767def int_nvvm_match_all_sync_i32p :
4768  Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
4769            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i32p">;
4770// match.all.sync.b64p mask, value
4771def int_nvvm_match_all_sync_i64p :
4772  Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
4773            [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i64p">;
4774
4775//
4776// ELECT.SYNC
4777//
4778// elect.sync dst|pred, membermask
4779def int_nvvm_elect_sync :
4780  DefaultAttrsIntrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty],
4781                        [IntrInaccessibleMemOnly, IntrConvergent]>;
4782
4783//
4784// REDUX.SYNC
4785//
4786// redux.sync.min.u32 dst, src, membermask;
4787def int_nvvm_redux_sync_umin : ClangBuiltin<"__nvvm_redux_sync_umin">,
4788  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4789            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4790
4791// redux.sync.max.u32 dst, src, membermask;
4792def int_nvvm_redux_sync_umax : ClangBuiltin<"__nvvm_redux_sync_umax">,
4793  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4794            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4795
4796// redux.sync.add.s32 dst, src, membermask;
4797def int_nvvm_redux_sync_add : ClangBuiltin<"__nvvm_redux_sync_add">,
4798  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4799            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4800
4801// redux.sync.min.s32 dst, src, membermask;
4802def int_nvvm_redux_sync_min : ClangBuiltin<"__nvvm_redux_sync_min">,
4803  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4804            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4805
4806// redux.sync.max.s32 dst, src, membermask;
4807def int_nvvm_redux_sync_max : ClangBuiltin<"__nvvm_redux_sync_max">,
4808  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4809            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4810
4811// redux.sync.and.b32 dst, src, membermask;
4812def int_nvvm_redux_sync_and : ClangBuiltin<"__nvvm_redux_sync_and">,
4813  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4814            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4815
4816// redux.sync.xor.b32 dst, src, membermask;
4817def int_nvvm_redux_sync_xor : ClangBuiltin<"__nvvm_redux_sync_xor">,
4818  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4819            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4820
4821// redux.sync.or.b32 dst, src, membermask;
4822def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
4823  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4824            [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4825
4826//
4827// WGMMA fence instructions
4828//
4829// wgmma.fence.sync.aligned;
4830def int_nvvm_wgmma_fence_sync_aligned
4831  : Intrinsic<[], [], [IntrConvergent]>;
4832
4833// wgmma.commit_group.sync.aligned;
4834def int_nvvm_wgmma_commit_group_sync_aligned
4835  : Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.commit_group.sync.aligned">;
4836
4837// wgmma.wait_group.sync.aligned N;
4838def int_nvvm_wgmma_wait_group_sync_aligned
4839  : Intrinsic<[], [llvm_i64_ty], [IntrConvergent, ImmArg<ArgIndex<0>>], "llvm.nvvm.wgmma.wait_group.sync.aligned">;
4840
4841//
4842// WMMA instructions
4843//
4844// WMMA.LOAD
4845class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
4846  : Intrinsic<Frag.regs,
4847              !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
4848              [IntrWillReturn, IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
4849              WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
4850
4851// WMMA.STORE.D
4852class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
4853  : Intrinsic<[],
4854              !listconcat(
4855                [llvm_anyptr_ty],
4856                Frag.regs,
4857                !if(WithStride, [llvm_i32_ty], [])),
4858              [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
4859              WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
4860
4861// Create all load/store variants
4862foreach layout = ["row", "col"] in {
4863  foreach stride = [0, 1] in {
4864    foreach frag = NVVM_MMA_OPS.all_ld_ops in
4865      if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
4866        def WMMA_NAME_LDST<"load", frag, layout, stride>.record
4867             : NVVM_WMMA_LD<frag, layout, stride>;
4868    foreach frag = NVVM_MMA_OPS.all_st_ops in
4869      if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
4870        def WMMA_NAME_LDST<"store", frag, layout, stride>.record
4871             : NVVM_WMMA_ST<frag, layout, stride>;
4872  }
4873}
4874
4875// WMMA.MMA
4876class NVVM_WMMA_MMA<string ALayout, string BLayout, int Satfinite, string rnd, string b1op,
4877                    WMMA_REGS A, WMMA_REGS B,
4878                    WMMA_REGS C, WMMA_REGS D>
4879  : Intrinsic<D.regs,
4880              !listconcat(A.regs, B.regs, C.regs),
4881              [IntrNoMem, IntrNoCallback],
4882              WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, A, B, C, D>.llvm>;
4883
4884foreach layout_a = ["row", "col"] in {
4885  foreach layout_b = ["row", "col"] in {
4886    foreach satf = [0, 1] in {
4887      foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
4888        foreach op = NVVM_MMA_OPS.all_wmma_ops in {
4889          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
4890            if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
4891              def WMMA_NAME<layout_a, layout_b, satf, rnd, b1op,
4892                                op[0], op[1], op[2], op[3]>.record
4893                : NVVM_WMMA_MMA<layout_a, layout_b, satf, rnd, b1op,
4894                                op[0], op[1], op[2], op[3]>;
4895            }
4896          } // b1op
4897        } // op
4898      } // rnd
4899    } // satf
4900  } // layout_b
4901} // layout_a
4902
4903// MMA
4904class NVVM_MMA<string ALayout, string BLayout, int Satfinite, string b1op,
4905               WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D>
4906  : Intrinsic<D.regs,
4907              !listconcat(A.regs, B.regs, C.regs),
4908              [IntrNoMem, IntrNoCallback],
4909              MMA_NAME<ALayout, BLayout, Satfinite, b1op, A, B, C, D>.llvm>;
4910
4911foreach layout_a = ["row", "col"] in {
4912  foreach layout_b = ["row", "col"] in {
4913    foreach satf = [0, 1] in {
4914      foreach op = NVVM_MMA_OPS.all_mma_ops in {
4915        foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
4916          if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
4917            def MMA_NAME<layout_a, layout_b, satf, b1op, op[0], op[1], op[2], op[3]>.record
4918              : NVVM_MMA<layout_a, layout_b, satf, b1op, op[0], op[1], op[2], op[3]>;
4919          }
4920        } // b1op
4921      } // op
4922    } // satf
4923  } // layout_b
4924} // layout_a
4925
4926// LDMATRIX
4927class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
4928  : Intrinsic<Frag.regs, [llvm_anyptr_ty],
4929              [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>,
4930               NoCapture<ArgIndex<0>>],
4931              LDMATRIX_NAME<Frag, Transposed>.intr>;
4932
4933foreach transposed = [0, 1] in {
4934  foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in {
4935    if NVVM_LDMATRIX_SUPPORTED<frag>.ret then {
4936      def LDMATRIX_NAME<frag, transposed>.record
4937        : NVVM_LDMATRIX<frag, transposed>;
4938    }
4939  }
4940}
4941
4942def int_nvvm_mapa
4943  : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
4944              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4945              "llvm.nvvm.mapa">;
4946def int_nvvm_mapa_shared_cluster
4947  : DefaultAttrsIntrinsic<[llvm_shared_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty],
4948              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4949              "llvm.nvvm.mapa.shared.cluster">;
4950def int_nvvm_getctarank
4951  : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty],
4952              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4953              "llvm.nvvm.getctarank">;
4954def int_nvvm_getctarank_shared_cluster
4955  : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty],
4956              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4957              "llvm.nvvm.getctarank.shared.cluster">;
4958def int_nvvm_is_explicit_cluster
4959  : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
4960              [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
4961              "llvm.nvvm.is_explicit_cluster">;
4962
4963// Setmaxnreg inc/dec intrinsics
4964def int_nvvm_setmaxnreg_inc_sync_aligned_u32
4965  : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
4966              [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
4967              "llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">;
4968def int_nvvm_setmaxnreg_dec_sync_aligned_u32
4969  : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
4970              [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
4971              "llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">;
4972
4973// Exit
4974def int_nvvm_exit : ClangBuiltin<"__nvvm_exit">,
4975    Intrinsic<[], [], [IntrConvergent, IntrInaccessibleMemOnly, IntrNoReturn]>;
4976
4977// Intrinsics for Tensor Copy using TMA
4978// G2S -> From Global to Shared memory variants
4979// S2G -> From Shared to Global memory variants
4980foreach dim = [1, 2, 3, 4, 5] in {
4981  foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
4982    foreach g2s = [CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, mode>] in
4983      def g2s.Name : DefaultAttrsIntrinsic<[], g2s.ArgsTy, g2s.IntrProp>;
4984    foreach s2g = [CP_ASYNC_BULK_TENSOR_S2G_INTR<dim, mode>] in
4985      def s2g.Name : DefaultAttrsIntrinsic<[], s2g.ArgsTy, s2g.IntrProp>;
4986    foreach prefetch = [CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<dim, mode>] in
4987      def prefetch.Name : DefaultAttrsIntrinsic<[], prefetch.ArgsTy, prefetch.IntrProp>;
4988  }
4989}
4990
4991// Intrinsics for TMA Copy with reduction
4992foreach dim = [1, 2, 3, 4, 5] in {
4993  foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
4994    foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in {
4995      foreach reduce = [CP_ASYNC_BULK_TENSOR_REDUCE_INTR<dim, mode, red_op>] in
4996        def reduce.Name : DefaultAttrsIntrinsic<[], reduce.ArgsTy, reduce.IntrProp>;
4997    }
4998  }
4999}
5000
5001// Intrinsics for Bulk Copy using TMA (non-tensor)
5002// From Global to Shared Cluster
5003def int_nvvm_cp_async_bulk_global_to_shared_cluster
5004  : DefaultAttrsIntrinsic<[],
5005      [llvm_shared_ptr_ty, // dst_smem_ptr
5006       llvm_shared_ptr_ty, // mbarrier_ptr
5007       llvm_global_ptr_ty, // src_gmem_ptr
5008       llvm_i32_ty,        // copy_size
5009       llvm_i16_ty,        // cta_mask
5010       llvm_i64_ty,        // cache_hint
5011       llvm_i1_ty,         // Flag for cta_mask
5012       llvm_i1_ty],        // Flag for cache_hint
5013      [IntrConvergent, IntrArgMemOnly,
5014       WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
5015       NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5016       NoCapture<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
5017       ImmArg<ArgIndex<7>>]>;
5018
5019// From Shared CTA to Shared Cluster
5020def int_nvvm_cp_async_bulk_shared_cta_to_cluster
5021  : DefaultAttrsIntrinsic<[],
5022      [llvm_shared_ptr_ty, // dst_smem_ptr
5023       llvm_shared_ptr_ty, // mbarrier_ptr
5024       llvm_shared_ptr_ty, // src_smem_ptr
5025       llvm_i32_ty],       // copy_size
5026      [IntrConvergent, IntrArgMemOnly,
5027       WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
5028       NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5029       NoCapture<ArgIndex<2>>]>;
5030
5031// From Shared CTA to Global memory
5032def int_nvvm_cp_async_bulk_shared_cta_to_global
5033  : DefaultAttrsIntrinsic<[],
5034      [llvm_global_ptr_ty, // dst_gmem_ptr
5035       llvm_shared_ptr_ty, // src_smem_ptr
5036       llvm_i32_ty,        // copy_size
5037       llvm_i64_ty,        // cache_hint
5038       llvm_i1_ty],        // Flag for cache_hint
5039      [IntrConvergent, IntrArgMemOnly,
5040       WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
5041       NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5042       ImmArg<ArgIndex<4>>]>;
5043
5044// Intrinsics for Bulk Copy Prefetch L2
5045def int_nvvm_cp_async_bulk_prefetch_L2
5046  : DefaultAttrsIntrinsic<[],
5047      [llvm_global_ptr_ty, // src_gmem_ptr
5048       llvm_i32_ty,        // copy_size
5049       llvm_i64_ty,        // cache_hint
5050       llvm_i1_ty],        // Flag for cache_hint
5051      [IntrConvergent, IntrArgMemOnly,
5052       NoCapture<ArgIndex<0>>, ReadOnly<ArgIndex<0>>,
5053       ImmArg<ArgIndex<3>>]>;
5054
5055def int_nvvm_griddepcontrol_launch_dependents: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
5056def int_nvvm_griddepcontrol_wait: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
5057
5058} // let TargetPrefix = "nvvm"
5059