xref: /llvm-project/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
3; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
4; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
5
6; CHECK-LABEL: .func test_atomics_scope(
7define void @test_atomics_scope(ptr %fp, float %f,
8                                ptr %dfp, double %df,
9                                ptr %ip, i32 %i,
10                                ptr %uip, i32 %ui,
11                                ptr %llp, i64 %ll) #0 {
12entry:
13; CHECK: atom.cta.add.s32
14  %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i)
15; CHECK: atom.cta.add.u64
16  %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
17; CHECK: atom.sys.add.s32
18  %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr %ip, i32 %i)
19; CHECK: atom.sys.add.u64
20  %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
21; CHECK: atom.cta.add.f32
22  %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f)
23; CHECK: atom.cta.add.f64
24  %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df)
25; CHECK: atom.sys.add.f32
26  %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr %fp, float %f)
27; CHECK: atom.sys.add.f64
28  %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr %dfp, double %df)
29
30; CHECK: atom.cta.exch.b32
31  %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr %ip, i32 %i)
32; CHECK: atom.cta.exch.b64
33  %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
34; CHECK: atom.sys.exch.b32
35  %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr %ip, i32 %i)
36; CHECK: atom.sys.exch.b64
37  %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
38
39; CHECK: atom.cta.max.s32
40  %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr %ip, i32 %i)
41; CHECK: atom.cta.max.s64
42  %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
43; CHECK: atom.sys.max.s32
44  %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr %ip, i32 %i)
45; CHECK: atom.sys.max.s64
46  %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
47
48; CHECK: atom.cta.min.s32
49  %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr %ip, i32 %i)
50; CHECK: atom.cta.min.s64
51  %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
52; CHECK: atom.sys.min.s32
53  %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr %ip, i32 %i)
54; CHECK: atom.sys.min.s64
55  %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
56
57; CHECK: atom.cta.inc.u32
58  %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr %ip, i32 %i)
59; CHECK: atom.sys.inc.u32
60  %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr %ip, i32 %i)
61
62; CHECK: atom.cta.dec.u32
63  %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr %ip, i32 %i)
64; CHECK: atom.sys.dec.u32
65  %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr %ip, i32 %i)
66
67; CHECK: atom.cta.and.b32
68  %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr %ip, i32 %i)
69; CHECK: atom.cta.and.b64
70  %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
71; CHECK: atom.sys.and.b32
72  %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr %ip, i32 %i)
73; CHECK: atom.sys.and.b64
74  %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
75
76; CHECK: atom.cta.or.b32
77  %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr %ip, i32 %i)
78; CHECK: atom.cta.or.b64
79  %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
80; CHECK: atom.sys.or.b32
81  %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr %ip, i32 %i)
82; CHECK: atom.sys.or.b64
83  %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
84
85; CHECK: atom.cta.xor.b32
86  %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr %ip, i32 %i)
87; CHECK: atom.cta.xor.b64
88  %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
89; CHECK: atom.sys.xor.b32
90  %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr %ip, i32 %i)
91; CHECK: atom.sys.xor.b64
92  %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
93
94; CHECK: atom.cta.cas.b32
95  %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i)
96; CHECK: atom.cta.cas.b64
97  %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr %llp, i64 %ll, i64 %ll)
98; CHECK: atom.sys.cas.b32
99  %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr %ip, i32 %i, i32 %i)
100; CHECK: atom.sys.cas.b64
101  %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr %llp, i64 %ll, i64 %ll)
102
103  ; CHECK: ret
104  ret void
105}
106
107; Make sure we use constants as operands to our scoped atomic calls, where appropriate.
108; CHECK-LABEL: .func test_atomics_scope_imm(
109define void @test_atomics_scope_imm(ptr %fp, float %f,
110                                    ptr %dfp, double %df,
111                                    ptr %ip, i32 %i,
112                                    ptr %uip, i32 %ui,
113                                    ptr %llp, i64 %ll) #0 {
114
115; CHECK: atom.cta.add.s32{{.*}} %r{{[0-9]+}};
116  %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i)
117; CHECK: atom.cta.add.s32{{.*}}, 1;
118  %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 1)
119; CHECK: atom.cta.add.u64{{.*}}, %rd{{[0-9]+}};
120  %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
121; CHECK: atom.cta.add.u64{{.*}}, 2;
122  %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 2)
123
124; CHECK: atom.cta.add.f32{{.*}}, %f{{[0-9]+}};
125  %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f)
126; CHECK: atom.cta.add.f32{{.*}}, 0f40400000;
127  %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float 3.0)
128; CHECK: atom.cta.add.f64{{.*}}, %fd{{[0-9]+}};
129  %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df)
130; CHECK: atom.cta.add.f64{{.*}}, 0d4010000000000000;
131  %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double 4.0)
132
133; CAS is implemented separately and has more arguments
134; CHECK: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, %r{{[0-9+]}};
135  %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i)
136; For some reason in 64-bit mode we end up passing 51 via a register.
137; CHECK32: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, 51;
138  %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 51)
139; CHECK: atom.cta.cas.b32{{.*}}], 52, %r{{[0-9+]}};
140  %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 52, i32 %i)
141; CHECK: atom.cta.cas.b32{{.*}}], 53, 54;
142  %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 53, i32 54)
143
144  ; CHECK: ret
145  ret void
146}
147
148declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr nocapture, i32) #1
149declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr nocapture, i64) #1
150declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr nocapture, i32) #1
151declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr nocapture, i64) #1
152declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr nocapture, float) #1
153declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr nocapture, double) #1
154declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr nocapture, float) #1
155declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr nocapture, double) #1
156declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr nocapture, i32) #1
157declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr nocapture, i64) #1
158declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr nocapture, i32) #1
159declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr nocapture, i64) #1
160declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr nocapture, i32) #1
161declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr nocapture, i64) #1
162declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr nocapture, i32) #1
163declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr nocapture, i64) #1
164declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr nocapture, i32) #1
165declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr nocapture, i64) #1
166declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr nocapture, i32) #1
167declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr nocapture, i64) #1
168declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr nocapture, i32) #1
169declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr nocapture, i32) #1
170declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr nocapture, i32) #1
171declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr nocapture, i32) #1
172declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr nocapture, i32) #1
173declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr nocapture, i64) #1
174declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr nocapture, i32) #1
175declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr nocapture, i64) #1
176declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr nocapture, i32) #1
177declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr nocapture, i64) #1
178declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr nocapture, i32) #1
179declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr nocapture, i64) #1
180declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr nocapture, i32) #1
181declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr nocapture, i64) #1
182declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr nocapture, i32) #1
183declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr nocapture, i64) #1
184declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr nocapture, i32, i32) #1
185declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr nocapture, i64, i64) #1
186declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr nocapture, i32, i32) #1
187declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr nocapture, i64, i64) #1
188
189attributes #1 = { argmemonly nounwind }
190