xref: /llvm-project/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl (revision 6e0b0038cd65ce726ce404305a06e1cf33e36cca)
1// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 \
2// RUN:   %s -emit-llvm -o - | FileCheck %s
3
4// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 \
5// RUN:   -S -o - %s | FileCheck -check-prefix=GFX12 %s
6
7// REQUIRES: amdgpu-registered-target
8
9typedef half  __attribute__((ext_vector_type(2))) half2;
10typedef short __attribute__((ext_vector_type(2))) short2;
11
12// CHECK-LABEL: test_local_add_2bf16
13// CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
14// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
15// CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
16
17// GFX12-LABEL:  test_local_add_2bf16
18// GFX12: ds_pk_add_rtn_bf16
19short2 test_local_add_2bf16(__local short2 *addr, short2 x) {
20  return __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x);
21}
22
23// CHECK-LABEL: test_local_add_2bf16_noret
24// CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
25// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
26// CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
27
28// GFX12-LABEL:  test_local_add_2bf16_noret
29// GFX12: ds_pk_add_bf16
30void test_local_add_2bf16_noret(__local short2 *addr, short2 x) {
31  __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x);
32}
33
34// CHECK-LABEL: test_local_add_2f16
35// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
36// GFX12-LABEL:  test_local_add_2f16
37// GFX12: ds_pk_add_rtn_f16
38half2 test_local_add_2f16(__local half2 *addr, half2 x) {
39  return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
40}
41
42// CHECK-LABEL: test_local_add_2f16_noret
43// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
44// GFX12-LABEL:  test_local_add_2f16_noret
45// GFX12: ds_pk_add_f16
46void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
47  __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
48}
49
50// CHECK-LABEL: test_flat_add_2f16
51// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
52
53// GFX12-LABEL:  test_flat_add_2f16
54// GFX12: flat_atomic_pk_add_f16
55half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
56  return __builtin_amdgcn_flat_atomic_fadd_v2f16(addr, x);
57}
58
59// CHECK-LABEL: test_flat_add_2bf16
60// CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
61// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
62// CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
63
64// GFX12-LABEL:  test_flat_add_2bf16
65// GFX12: flat_atomic_pk_add_bf16
66short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) {
67  return __builtin_amdgcn_flat_atomic_fadd_v2bf16(addr, x);
68}
69
70// CHECK-LABEL: test_global_add_half2
71// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
72
73// GFX12-LABEL:  test_global_add_half2
74// GFX12:  global_atomic_pk_add_f16 v2, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off th:TH_ATOMIC_RETURN
75void test_global_add_half2(__global half2 *addr, half2 x) {
76  half2 *rtn;
77  *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
78}
79
80// CHECK-LABEL: test_global_add_half2_noret
81// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
82
83// GFX12-LABEL:  test_global_add_half2_noret
84// GFX12:  global_atomic_pk_add_f16 v[0:1], v2, off
85void test_global_add_half2_noret(__global half2 *addr, half2 x) {
86  __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
87}
88
89// CHECK-LABEL: test_global_add_2bf16
90// CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
91// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
92// CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
93
94
95// GFX12-LABEL:  test_global_add_2bf16
96// GFX12: global_atomic_pk_add_bf16 v2, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off th:TH_ATOMIC_RETURN
97void test_global_add_2bf16(__global short2 *addr, short2 x) {
98  short2 *rtn;
99  *rtn = __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x);
100}
101
102// CHECK-LABEL: test_global_add_2bf16_noret
103// CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
104// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
105// CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
106
107// GFX12-LABEL:  test_global_add_2bf16_noret
108// GFX12: global_atomic_pk_add_bf16 v[0:1], v2, off
109void test_global_add_2bf16_noret(__global short2 *addr, short2 x) {
110  __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x);
111}
112