xref: /llvm-project/llvm/test/Bitcode/amdgcn-atomic.ll (revision c198f775cdcd731f68fb482c223702c7273d41ca)
1; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2
3
4define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) {
5  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
6  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false)
7
8  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
9  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false)
10
11  ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}}
12  %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr3, i32 46, i32 0, i32 0, i1 false)
13
14  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i64 48 syncscope("agent") seq_cst, align 8
15  %result3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr0, i64 48, i32 0, i32 0, i1 false)
16
17 ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i64 45 syncscope("agent") seq_cst, align 8
18  %result4 = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr1, i64 45, i32 0, i32 0, i1 false)
19
20  ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i64 4345 syncscope("agent") seq_cst, align 8
21  %result5 = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr3, i64 4345, i32 0, i32 0, i1 false)
22
23  ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(3) %ptr3, i64 4345 syncscope("agent") seq_cst, align 8
24  %result6 = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr3, i64 4345, i32 0, i32 0, i1 true)
25  ret void
26}
27
28define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) {
29  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
30  %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false)
31
32  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
33  %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false)
34
35  ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}}
36  %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr3, i32 46, i32 0, i32 0, i1 false)
37
38  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i64 48 syncscope("agent") seq_cst, align 8
39  %result3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr0, i64 48, i32 0, i32 0, i1 false)
40
41  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i64 45 syncscope("agent") seq_cst, align 8
42  %result4 = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr1, i64 45, i32 0, i32 0, i1 false)
43
44  ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i64 4345 syncscope("agent") seq_cst, align 8
45  %result5 = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr3, i64 4345, i32 0, i32 0, i1 false)
46
47  ; CHECK: atomicrmw volatile udec_wrap ptr addrspace(3) %ptr3, i64 4345 syncscope("agent") seq_cst, align 8
48  %result6 = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr3, i64 4345, i32 0, i32 0, i1 true)
49  ret void
50}
51
52; Test some invalid ordering handling
53define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) {
54  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
55  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true)
56
57  ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
58  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 true)
59
60  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
61  %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 1, i32 0, i1 false)
62
63  ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1
64  %result3 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 2, i32 0, i1 true)
65
66  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
67  %result4 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 false)
68
69  ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
70  %result5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 4, i1 true)
71
72  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
73  %result6 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 5, i1 false)
74
75  ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
76  %result7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 6, i1 true)
77
78  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
79  %result8 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 7, i1 false)
80
81  ; CHECK:= atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
82  %result9 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 8, i1 true)
83
84  ; CHECK:= atomicrmw volatile udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
85  %result10 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 true)
86  ret void
87}
88
89define void @immarg_violations(ptr %ptr0, i32 %val32, i1 %val1) {
90  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
91  %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 %val32, i32 0, i1 false)
92
93; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
94  %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 %val32, i1 false)
95
96  ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
97  %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 0, i1 %val1)
98  ret void
99}
100
101declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32 immarg, i32 immarg, i1 immarg) #0
102declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32 immarg, i32 immarg, i1 immarg) #0
103declare i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr nocapture, i32, i32 immarg, i32 immarg, i1 immarg) #0
104declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
105declare i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
106declare i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
107
108declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32 immarg, i32 immarg, i1 immarg) #0
109declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32 immarg, i32 immarg, i1 immarg) #0
110declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32 immarg, i32 immarg, i1 immarg) #0
111declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
112declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
113declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
114
115; ptr, rmw_value, ordering, scope, isVolatile)
116declare float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
117declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32 immarg, i32 immarg, i1 immarg)
118declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) nocapture, <2 x half>, i32 immarg, i32 immarg, i1 immarg)
119declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) nocapture, <2 x i16>)
120
121define float @upgrade_amdgcn_ds_fadd_f32(ptr addrspace(3) %ptr, float %val) {
122  ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
123  %result0 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
124
125  ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
126  %result1 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
127
128  ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
129  %result2 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 43, i32 3, i1 false)
130
131  ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") acquire, align 4
132  %result3 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 4, i32 2, i1 false)
133
134  ret float %result3
135}
136
137; Handle missing type suffix
138declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
139
140define float @upgrade_amdgcn_ds_fadd_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
141  ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
142  %result0 = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
143  ret float %result0
144}
145
146define void @immarg_violations_ds_fadd_f32(ptr addrspace(3) %ptr, float %fval, i32 %val32, i1 %val1) {
147  ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, float %fval syncscope("agent") seq_cst, align 4
148  %result0 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %fval, i32 %val32, i32 %val32, i1 %val1)
149  ret void
150}
151
152declare float @llvm.amdgcn.ds.fadd.f32broken0(i32, float, i32 immarg, i32 immarg, i1 immarg)
153
154; This will just delete the invalid call, which isn't ideal, but these
155; cases were never emitted.
156; CHECK-LABEL: define void @ds_fadd_f32_invalid_not_ptr(
157; CHECK-NEXT: ret void
158define void @ds_fadd_f32_invalid_not_ptr(i32 %ptr, float %fval) {
159  %result0 = call float @llvm.amdgcn.ds.fadd.f32broken0(i32 %ptr, float %fval, i32 0, i32 0, i1 false)
160  ret void
161}
162
163declare float @llvm.amdgcn.ds.fadd.f32broken1(ptr addrspace(3), double, i32 immarg, i32 immarg, i1 immarg)
164
165; CHECK-LABEL: define void @ds_fadd_f32_invalid_misatch(
166; CHECK-NEXT: ret void
167define void @ds_fadd_f32_invalid_misatch(ptr addrspace(3) %ptr, double %fval) {
168  %result0 = call float @llvm.amdgcn.ds.fadd.f32broken1(ptr addrspace(3) %ptr, double %fval, i32 0, i32 0, i1 false)
169  ret void
170}
171
172define double @upgrade_amdgcn_ds_fadd_f64(ptr addrspace(3) %ptr, double %val) {
173  ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
174  %result0 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 false)
175
176  ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
177  %result1 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 true)
178
179  ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
180  %result2 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 43, i32 3, i1 false)
181
182  ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, double %val syncscope("agent") acquire, align 8
183  %result3 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 4, i32 2, i1 false)
184
185  ret double %result3
186}
187
188; CHECK-LABEL: @immarg_violations_ds_fadd_f64(
189define void @immarg_violations_ds_fadd_f64(ptr addrspace(3) %ptr, double %fval, i32 %val32, i1 %val1) {
190  ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, double %fval syncscope("agent") seq_cst, align 8
191  %result0 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %fval, i32 %val32, i32 %val32, i1 %val1)
192  ret void
193}
194
195define <2 x half> @upgrade_amdgcn_ds_fadd_v2f16(ptr addrspace(3) %ptr, <2 x half> %val) {
196  ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") seq_cst, align 4
197  %result0 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 0, i32 0, i1 false)
198
199  ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") seq_cst, align 4
200  %result1 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 0, i32 0, i1 true)
201
202  ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") seq_cst, align 4
203  %result2 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 43, i32 3, i1 false)
204
205  ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") acquire, align 4
206  %result3 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 4, i32 2, i1 false)
207
208  ret <2 x half> %result3
209}
210
211define void @immarg_violations_ds_fadd_v2f16(ptr addrspace(3) %ptr, <2 x half> %fval, i32 %val32, i1 %val1) {
212  ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, <2 x half> %fval syncscope("agent") seq_cst, align 4
213  %result0 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %fval, i32 %val32, i32 %val32, i1 %val1)
214  ret void
215}
216
217define <2 x i16> @upgrade_amdgcn_ds_fadd_v2bf16__as_i16(ptr addrspace(3) %ptr, <2 x i16> %val) {
218  ; CHECK: [[BC0:%[0-9]+]] = bitcast <2 x i16> %val to <2 x bfloat>
219  ; CHECK-NEXT: [[RMW0:%[0-9]+]] = atomicrmw fadd ptr addrspace(3) %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4
220  ; CHECK-NEXT: = bitcast <2 x bfloat> [[RMW0]] to <2 x i16>
221  %result0 = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %val, i32 0, i32 0, i1 false)
222
223  ; CHECK: [[BC1:%[0-9]+]] = bitcast <2 x i16> %val to <2 x bfloat>
224  ; CHECK-NEXT: [[RMW1:%[0-9]+]] = atomicrmw volatile fadd ptr addrspace(3) %ptr, <2 x bfloat> [[BC1]] syncscope("agent") seq_cst, align 4
225  ; CHECK-NEXT: = bitcast <2 x bfloat> [[RMW1]] to <2 x i16>
226  %result1 = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %val, i32 0, i32 0, i1 true)
227
228  ; CHECK: [[BC2:%[0-9]+]] = bitcast <2 x i16> %val to <2 x bfloat>
229  ; CHECK-NEXT: [[RMW2:%[0-9]+]] = atomicrmw fadd ptr addrspace(3) %ptr, <2 x bfloat> [[BC2]] syncscope("agent") seq_cst, align 4
230  ; CHECK-NEXT: = bitcast <2 x bfloat> [[RMW2]] to <2 x i16>
231  %result2 = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %val, i32 43, i32 3, i1 false)
232
233  ; CHECK: [[BC3:%[0-9]+]] = bitcast <2 x i16> %val to <2 x bfloat>
234  ; CHECK-NEXT: [[RMW3:%[0-9]+]] = atomicrmw fadd ptr addrspace(3) %ptr, <2 x bfloat> [[BC3]] syncscope("agent") acquire, align 4
235  ; CHECK-NEXT: = bitcast <2 x bfloat> [[RMW3]] to <2 x i16>
236  %result3 = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %val, i32 4, i32 2, i1 false)
237
238  ret <2 x i16> %result3
239}
240
241; Somehow the bf16 version was defined as a separate intrinsic with missing arguments.
242define <2 x i16> @upgrade_amdgcn_ds_fadd_v2bf16__missing_args_as_i16(ptr addrspace(3) %ptr, <2 x i16> %val) {
243  ; CHECK: [[BC0:%[0-9]+]] = bitcast <2 x i16> %val to <2 x bfloat>
244  ; CHECK-NEXT: [[RMW0:%[0-9]+]] = atomicrmw fadd ptr addrspace(3) %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4
245  ; CHECK-NEXT: [[BC1:%[0-9]+]] = bitcast <2 x bfloat> [[RMW0]] to <2 x i16>
246  ; CHECK-NEXT: ret <2 x i16> [[BC1]]
247  %result0 = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %val)
248  ret <2 x i16> %result0
249}
250
251declare float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
252declare double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) nocapture, double, i32 immarg, i32 immarg, i1 immarg)
253
254define float @upgrade_amdgcn_ds_fmin_f32(ptr addrspace(3) %ptr, float %val) {
255  ; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
256  %result0 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
257
258  ; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
259  %result1 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
260
261  ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
262  %result2 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 43, i32 3, i1 false)
263
264  ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") acquire, align 4
265  %result3 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 4, i32 2, i1 false)
266
267  ret float %result3
268}
269
270define double @upgrade_amdgcn_ds_fmin_f64(ptr addrspace(3) %ptr, double %val) {
271  ; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
272  %result0 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 false)
273
274  ; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
275  %result1 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 true)
276
277  ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
278  %result2 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 43, i32 3, i1 false)
279
280  ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") acquire, align 8
281  %result3 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 4, i32 2, i1 false)
282
283  ret double %result3
284}
285
286declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
287
288define float @upgrade_amdgcn_ds_fmin_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
289  ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
290
291  %result0 = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
292  ret float %result0
293}
294
295declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
296
297define float @upgrade_amdgcn_ds_fmax_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
298  ; CHECK: = atomicrmw fmax ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
299  %result0 = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
300  ret float %result0
301}
302
303declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr, <2 x i16>)
304
305define <2 x i16> @upgrade_amdgcn_flat_atomic_fadd_v2bf16_p0(ptr %ptr, <2 x i16> %data) {
306  ; CHECK: [[BC0:%.+]] = bitcast <2 x i16> %data to <2 x bfloat>
307  ; CHECK-NEXT: [[ATOMIC:%.+]] = atomicrmw fadd ptr %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
308  ; CHECK-NEXT: [[BC1:%.+]] = bitcast <2 x bfloat> [[ATOMIC]] to <2 x i16>
309  ; CHECK-NEXT: ret <2 x i16> [[BC1]]
310  %result = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data)
311  ret <2 x i16> %result
312}
313
314declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1), <2 x i16>)
315
316define <2 x i16> @upgrade_amdgcn_global_atomic_fadd_v2bf16_p1(ptr addrspace(1) %ptr, <2 x i16> %data) {
317  ; CHECK: [[BC0:%.+]] = bitcast <2 x i16> %data to <2 x bfloat>
318  ; CHECK-NEXT: [[ATOMIC:%.+]] = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
319  ; CHECK-NEXT: [[BC1:%.+]] = bitcast <2 x bfloat> [[ATOMIC]] to <2 x i16>
320  ; CHECK-NEXT: ret <2 x i16> [[BC1]]
321  %result = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data)
322  ret <2 x i16> %result
323}
324
325declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr nocapture, <2 x half>) #0
326
327define <2 x half> @upgrade_amdgcn_flat_atomic_fadd_v2f16_p0_v2f16(ptr %ptr, <2 x half> %data) {
328  ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !{{[0-9]+}}, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
329  %result = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data)
330  ret <2 x half> %result
331}
332
333declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>) #0
334
335define <2 x half> @upgrade_amdgcn_global_atomic_fadd_v2f16_p1_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) {
336  ; CHECK: %{{.+}} = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
337  %result = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
338  ret <2 x half> %result
339}
340
341declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr nocapture, float) #0
342
343define float @upgrade_amdgcn_flat_atomic_fadd_f32_p0_f32(ptr %ptr, float %data) {
344  ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !{{[0-9]+}}, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
345  %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
346  ret float %result
347}
348
349declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
350
351define float @upgrade_amdgcn_global_atomic_fadd_f32_p1_f32(ptr addrspace(1) %ptr, float %data) {
352  ; CHECK: %{{.+}} = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
353  %result = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
354  ret float %result
355}
356
357declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr nocapture, float) #0
358
359define float @upgrade_amdgcn_flat_atomic_fmin_f32_p0_f32(ptr %ptr, float %data) {
360  ; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
361  %result = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data)
362  ret float %result
363}
364
365declare float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
366
367define float @upgrade_amdgcn_global_atomic_fmin_f32_p1_f32(ptr addrspace(1) %ptr, float %data) {
368  ; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
369  %result = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
370  ret float %result
371}
372
373declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr nocapture, double) #0
374
375define double @upgrade_amdgcn_flat_atomic_fmin_f64_p0_f64(ptr %ptr, double %data) {
376  ; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
377  %result = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
378  ret double %result
379}
380
381declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) nocapture, double) #0
382
383define double @upgrade_amdgcn_global_atomic_fmin_f64_p1_f64(ptr addrspace(1) %ptr, double %data) {
384  ; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
385  %result = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
386  ret double %result
387}
388
389declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr nocapture, float) #0
390
391define float @upgrade_amdgcn_flat_atomic_fmax_f32_p0_f32(ptr %ptr, float %data) {
392  ; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
393  %result = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data)
394  ret float %result
395}
396
397declare float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
398
399define float @upgrade_amdgcn_global_atomic_fmax_f32_p1_f32(ptr addrspace(1) %ptr, float %data) {
400  ; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
401  %result = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
402  ret float %result
403}
404
405declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr nocapture, double) #0
406
407define double @upgrade_amdgcn_flat_atomic_fmax_f64_p0_f64(ptr %ptr, double %data) {
408  ; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
409  %result = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
410  ret double %result
411}
412
413declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) nocapture, double) #0
414
415define double @upgrade_amdgcn_global_atomic_fmax_f64_p1_f64(ptr addrspace(1) %ptr, double %data) {
416  ; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
417  %result = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
418  ret double %result
419}
420
421attributes #0 = { argmemonly nounwind willreturn }
422
423; CHECK: !0 = !{i32 5, i32 6}
424; CHECK: !1 = !{}
425