xref: /llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (revision 03fef62b84469c5dbbed04235c30eb96b6b48369)
1// REQUIRES: amdgpu-registered-target
2// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
3// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
4// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
5// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
6// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
7
8#pragma OPENCL EXTENSION cl_khr_fp16 : enable
9
10typedef unsigned long ulong;
11typedef unsigned int  uint;
12
13// CHECK-LABEL: @test_div_fixup_f16
14// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.div.fixup.f16
15void test_div_fixup_f16(global half* out, half a, half b, half c)
16{
17  *out = __builtin_amdgcn_div_fixuph(a, b, c);
18}
19
20// CHECK-LABEL: @test_rcp_f16
21// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rcp.f16
22void test_rcp_f16(global half* out, half a)
23{
24  *out = __builtin_amdgcn_rcph(a);
25}
26
27// CHECK-LABEL: @test_sqrt_f16
28// CHECK: {{.*}}call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16
29void test_sqrt_f16(global half* out, half a)
30{
31  *out = __builtin_amdgcn_sqrth(a);
32}
33
34// CHECK-LABEL: @test_rsq_f16
35// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rsq.f16
36void test_rsq_f16(global half* out, half a)
37{
38  *out = __builtin_amdgcn_rsqh(a);
39}
40
41// CHECK-LABEL: @test_sin_f16
42// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.sin.f16
43void test_sin_f16(global half* out, half a)
44{
45  *out = __builtin_amdgcn_sinh(a);
46}
47
48// CHECK-LABEL: @test_cos_f16
49// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.cos.f16
50void test_cos_f16(global half* out, half a)
51{
52  *out = __builtin_amdgcn_cosh(a);
53}
54
55// CHECK-LABEL: @test_ldexp_f16
56// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32
57// CHECK: {{.*}}call{{.*}} half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
58void test_ldexp_f16(global half* out, half a, int b)
59{
60  *out = __builtin_amdgcn_ldexph(a, b);
61}
62
63// CHECK-LABEL: @test_frexp_mant_f16
64// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16
65void test_frexp_mant_f16(global half* out, half a)
66{
67  *out = __builtin_amdgcn_frexp_manth(a);
68}
69
70// CHECK-LABEL: @test_frexp_exp_f16
71// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16
72void test_frexp_exp_f16(global short* out, half a)
73{
74  *out = __builtin_amdgcn_frexp_exph(a);
75}
76
77// CHECK-LABEL: @test_fract_f16
78// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16
79void test_fract_f16(global half* out, half a)
80{
81  *out = __builtin_amdgcn_fracth(a);
82}
83
84// CHECK-LABEL: @test_class_f16
85// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16
86void test_class_f16(global half* out, half a, int b)
87{
88  *out = __builtin_amdgcn_classh(a, b);
89}
90
91// CHECK-LABEL: @test_s_memrealtime
92// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime()
93void test_s_memrealtime(global ulong* out)
94{
95  *out = __builtin_amdgcn_s_memrealtime();
96}
97
98// CHECK-LABEL: @test_s_dcache_wb()
99// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb()
100void test_s_dcache_wb()
101{
102  __builtin_amdgcn_s_dcache_wb();
103}
104
105// CHECK-LABEL: @test_mov_dpp_int
106// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
107void test_mov_dpp_int(global int* out, int src)
108{
109  *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
110}
111
112// CHECK-LABEL: @test_mov_dpp_long
113// CHECK:      %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
114// CHECK-NEXT: store i64 %0,
115void test_mov_dpp_long(long x, global long *p) {
116  *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
117}
118
119// CHECK-LABEL: @test_mov_dpp_float
120// CHECK:      %0 = bitcast float %x to i32
121// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
122// CHECK-NEXT: store i32 %1,
123void test_mov_dpp_float(float x, global float *p) {
124  *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
125}
126
127// CHECK-LABEL: @test_mov_dpp_double
128// CHECK:      %0 = bitcast double %x to i64
129// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false)
130// CHECK-NEXT: store i64 %1,
131void test_mov_dpp_double(double x, global double *p) {
132  *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
133}
134
135// CHECK-LABEL: @test_mov_dpp_short
136// CHECK:      %0 = zext i16 %x to i32
137// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
138// CHECK-NEXT: %2 = trunc i32 %1 to i16
139// CHECK-NEXT: store i16 %2,
140void test_mov_dpp_short(short x, global short *p) {
141  *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
142}
143
144// CHECK-LABEL: @test_mov_dpp_char
145// CHECK:      %0 = zext i8 %x to i32
146// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
147// CHECK-NEXT: %2 = trunc i32 %1 to i8
148// CHECK-NEXT: store i8 %2,
149void test_mov_dpp_char(char x, global char *p) {
150  *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
151}
152
153// CHECK-LABEL: @test_mov_dpp_half
154// CHECK:      %0 = load i16,
155// CHECK:      %1 = zext i16 %0 to i32
156// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false)
157// CHECK-NEXT: %3 = trunc i32 %2 to i16
158// CHECK-NEXT: store i16 %3,
159void test_mov_dpp_half(half *x, global half *p) {
160  *p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
161}
162
163// CHECK-LABEL: @test_update_dpp_int
164// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
165void test_update_dpp_int(global int* out, int arg1, int arg2)
166{
167  *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
168}
169
170// CHECK-LABEL: @test_update_dpp_long
171// CHECK:      %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
172// CHECk-NEXT: store i64 %0,
173void test_update_dpp_long(long x, global long *p) {
174  *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
175}
176
177// CHECK-LABEL: @test_update_dpp_float
178// CHECK:      %0 = bitcast float %x to i32
179// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
180// CHECK-NEXT: store i32 %1,
181void test_update_dpp_float(float x, global float *p) {
182  *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
183}
184
185// CHECK-LABEL: @test_update_dpp_double
186// CHECK:      %0 = bitcast double %x to i64
187// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false)
188// CHECK-NEXT: store i64 %1,
189void test_update_dpp_double(double x, global double *p) {
190  *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
191}
192
193// CHECK-LABEL: @test_update_dpp_short
194// CHECK:      %0 = zext i16 %x to i32
195// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
196// CHECK-NEXT: %2 = trunc i32 %1 to i16
197// CHECK-NEXT: store i16 %2,
198void test_update_dpp_short(short x, global short *p) {
199  *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
200}
201
202// CHECK-LABEL: @test_update_dpp_char
203// CHECK:      %0 = zext i8 %x to i32
204// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
205// CHECK-NEXT: %2 = trunc i32 %1 to i8
206// CHECK-NEXT: store i8 %2,
207void test_update_dpp_char(char x, global char *p) {
208  *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
209}
210
211// CHECK-LABEL: @test_update_dpp_half
212// CHECK:      %0 = load i16,
213// CHECK:      %1 = zext i16 %0 to i32
214// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false)
215// CHECK-NEXT: %3 = trunc i32 %2 to i16
216// CHECK-NEXT: store i16 %3,
217void test_update_dpp_half(half *x, global half *p) {
218  *p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
219}
220
221// CHECK-LABEL: @test_update_dpp_int_uint
222// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
223void test_update_dpp_int_uint(global int* out, int arg1, unsigned int arg2)
224{
225  *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
226}
227
228// CHECK-LABEL: @test_update_dpp_lit_int
229// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
230void test_update_dpp_lit_int(global int* out, int arg1)
231{
232  *out = __builtin_amdgcn_update_dpp(5, arg1, 0, 0, 0, false);
233}
234
235__constant int gi = 5;
236
237// CHECK-LABEL: @test_update_dpp_const_int
238// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
239void test_update_dpp_const_int(global int* out, int arg1)
240{
241  *out = __builtin_amdgcn_update_dpp(gi, arg1, 0, 0, 0, false);
242}
243
244// CHECK-LABEL: @test_ds_fadd
245// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
246// CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
247
248// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
249// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
250// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src release, align 4{{$}}
251// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
252// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
253// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
254
255// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
256// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
257// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
258// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
259// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
260#if !defined(__SPIRV__)
261void test_ds_faddf(local float *out, float src) {
262#else
263  void test_ds_faddf(__attribute__((address_space(3))) float *out, float src) {
264#endif
265
266  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, false);
267  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, true);
268
269  // Test all orders.
270  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
271  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
272  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
273  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
274  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
275  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
276
277  // Test all syncscopes.
278  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
279  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
280  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
281  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
282  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
283}
284
285// CHECK-LABEL: @test_ds_fmin
286// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
287// CHECK: atomicrmw volatile fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
288
289// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
290// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
291// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src release, align 4{{$}}
292// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
293// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
294// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
295
296// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
297// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
298// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
299// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
300// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
301
302#if !defined(__SPIRV__)
303void test_ds_fminf(local float *out, float src) {
304#else
305void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
306#endif
307  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
308  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, true);
309
310  // Test all orders.
311  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
312  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
313  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
314  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
315  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
316  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
317
318  // Test all syncscopes.
319  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
320  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
321  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
322  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
323  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
324}
325
326// CHECK-LABEL: @test_ds_fmax
327// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
328// CHECK: atomicrmw volatile fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
329
330// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
331// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
332// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src release, align 4{{$}}
333// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
334// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
335// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
336
337// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
338// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
339// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
340// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
341// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
342
343#if !defined(__SPIRV__)
344void test_ds_fmaxf(local float *out, float src) {
345#else
346void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
347#endif
348  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
349  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, true);
350
351  // Test all orders.
352  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
353  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
354  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
355  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
356  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
357  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
358
359  // Test all syncscopes.
360  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
361  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
362  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
363  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
364  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
365}
366
367// CHECK-LABEL: @test_s_memtime
368// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
369void test_s_memtime(global ulong* out)
370{
371  *out = __builtin_amdgcn_s_memtime();
372}
373
374// CHECK-LABEL: @test_perm
375// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
376void test_perm(global uint* out, uint a, uint b, uint s)
377{
378  *out = __builtin_amdgcn_perm(a, b, s);
379}
380
381// CHECK-LABEL: @test_groupstaticsize
382// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.groupstaticsize()
383void test_groupstaticsize(global uint* out)
384{
385  *out = __builtin_amdgcn_groupstaticsize();
386}
387