xref: /llvm-project/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s
2; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
3
4; TODO: fix "atomic load volatile acquire": generates "ld.acquire.sys;"
5;       but should generate "ld.mmio.relaxed.sys; fence.acq_rel.sys;"
6; TODO: fix "atomic store volatile release": generates "st.release.sys;"
7;       but should generate "fence.acq_rel.sys; st.mmio.relaxed.sys;"
8
9; TODO: fix "atomic load volatile seq_cst": generates "fence.sc.sys; ld.acquire.sys;"
10;       but should generate "fence.sc.sys; ld.relaxed.mmio.sys; fence.acq_rel.sys;"
11; TODO: fix "atomic store volatile seq_cst": generates "fence.sc.sys; st.release.sys;"
12;       but should generate "fence.sc.sys; st.relaxed.mmio.sys;"
13
14; TODO: add i1, <8 x i8>, and <6 x i8> vector tests.
15
16; TODO: add test for vectors that exceed 128-bit length
17; Per https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#vectors
18; vectors cannot exceed 128-bit in length, i.e., .v4.u64 is not allowed.
19
20; TODO: generate PTX that preserves Concurrent Forward Progress
21;       for atomic operations to local statespace
22;       by generating atomic or volatile operations.
23
24; TODO: design exposure for atomic operations on vector types.
25
26; TODO: implement and test thread scope.
27
28; TODO: add weak,atomic,volatile,atomic volatile tests
29;       for .const and .param statespaces.
30
31; TODO: optimize .sys.shared into .cta.shared or .cluster.shared .
32
33;; generic statespace
34
35; CHECK-LABEL: generic_unordered_gpu
36define void @generic_unordered_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
37  ; CHECK: ld.relaxed.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
38  %a.load = load atomic i8, ptr %a syncscope("device") unordered, align 1
39  %a.add = add i8 %a.load, 1
40  ; CHECK: st.relaxed.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
41  store atomic i8 %a.add, ptr %a syncscope("device") unordered, align 1
42
43  ; CHECK: ld.relaxed.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
44  %b.load = load atomic i16, ptr %b syncscope("device") unordered, align 2
45  %b.add = add i16 %b.load, 1
46  ; CHECK: st.relaxed.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
47  store atomic i16 %b.add, ptr %b syncscope("device") unordered, align 2
48
49  ; CHECK: ld.relaxed.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
50  %c.load = load atomic i32, ptr %c syncscope("device") unordered, align 4
51  %c.add = add i32 %c.load, 1
52  ; CHECK: st.relaxed.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
53  store atomic i32 %c.add, ptr %c syncscope("device") unordered, align 4
54
55  ; CHECK: ld.relaxed.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
56  %d.load = load atomic i64, ptr %d syncscope("device") unordered, align 8
57  %d.add = add i64 %d.load, 1
58  ; CHECK: st.relaxed.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
59  store atomic i64 %d.add, ptr %d syncscope("device") unordered, align 8
60
61  ; CHECK: ld.relaxed.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
62  %e.load = load atomic float, ptr %e syncscope("device") unordered, align 4
63  %e.add = fadd float %e.load, 1.
64  ; CHECK: st.relaxed.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
65  store atomic float %e.add, ptr %e syncscope("device") unordered, align 4
66
67  ; CHECK: ld.relaxed.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
68  %f.load = load atomic double, ptr %e syncscope("device") unordered, align 8
69  %f.add = fadd double %f.load, 1.
70  ; CHECK: st.relaxed.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
71  store atomic double %f.add, ptr %e syncscope("device") unordered, align 8
72
73  ret void
74}
75
76; CHECK-LABEL: generic_unordered_volatile_gpu
77define void @generic_unordered_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
78  ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
79  %a.load = load atomic volatile i8, ptr %a syncscope("device") unordered, align 1
80  %a.add = add i8 %a.load, 1
81  ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
82  store atomic volatile i8 %a.add, ptr %a syncscope("device") unordered, align 1
83
84  ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
85  %b.load = load atomic volatile i16, ptr %b syncscope("device") unordered, align 2
86  %b.add = add i16 %b.load, 1
87  ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
88  store atomic volatile i16 %b.add, ptr %b syncscope("device") unordered, align 2
89
90  ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
91  %c.load = load atomic volatile i32, ptr %c syncscope("device") unordered, align 4
92  %c.add = add i32 %c.load, 1
93  ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
94  store atomic volatile i32 %c.add, ptr %c syncscope("device") unordered, align 4
95
96  ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
97  %d.load = load atomic volatile i64, ptr %d syncscope("device") unordered, align 8
98  %d.add = add i64 %d.load, 1
99  ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
100  store atomic volatile i64 %d.add, ptr %d syncscope("device") unordered, align 8
101
102  ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
103  %e.load = load atomic volatile float, ptr %e syncscope("device") unordered, align 4
104  %e.add = fadd float %e.load, 1.
105  ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
106  store atomic volatile float %e.add, ptr %e syncscope("device") unordered, align 4
107
108  ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
109  %f.load = load atomic volatile double, ptr %e syncscope("device") unordered, align 8
110  %f.add = fadd double %f.load, 1.
111  ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
112  store atomic volatile double %f.add, ptr %e syncscope("device") unordered, align 8
113
114  ret void
115}
116
117; CHECK-LABEL: generic_unordered_cta
118define void @generic_unordered_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
119  ; CHECK: ld.relaxed.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
120  %a.load = load atomic i8, ptr %a syncscope("block") unordered, align 1
121  %a.add = add i8 %a.load, 1
122  ; CHECK: st.relaxed.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
123  store atomic i8 %a.add, ptr %a syncscope("block") unordered, align 1
124
125  ; CHECK: ld.relaxed.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
126  %b.load = load atomic i16, ptr %b syncscope("block") unordered, align 2
127  %b.add = add i16 %b.load, 1
128  ; CHECK: st.relaxed.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
129  store atomic i16 %b.add, ptr %b syncscope("block") unordered, align 2
130
131  ; CHECK: ld.relaxed.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
132  %c.load = load atomic i32, ptr %c syncscope("block") unordered, align 4
133  %c.add = add i32 %c.load, 1
134  ; CHECK: st.relaxed.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
135  store atomic i32 %c.add, ptr %c syncscope("block") unordered, align 4
136
137  ; CHECK: ld.relaxed.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
138  %d.load = load atomic i64, ptr %d syncscope("block") unordered, align 8
139  %d.add = add i64 %d.load, 1
140  ; CHECK: st.relaxed.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
141  store atomic i64 %d.add, ptr %d syncscope("block") unordered, align 8
142
143  ; CHECK: ld.relaxed.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
144  %e.load = load atomic float, ptr %e syncscope("block") unordered, align 4
145  %e.add = fadd float %e.load, 1.
146  ; CHECK: st.relaxed.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
147  store atomic float %e.add, ptr %e syncscope("block") unordered, align 4
148
149  ; CHECK: ld.relaxed.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
150  %f.load = load atomic double, ptr %e syncscope("block") unordered, align 8
151  %f.add = fadd double %f.load, 1.
152  ; CHECK: st.relaxed.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
153  store atomic double %f.add, ptr %e syncscope("block") unordered, align 8
154
155  ret void
156}
157
158; CHECK-LABEL: generic_unordered_volatile_cta
159define void @generic_unordered_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
160  ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
161  %a.load = load atomic volatile i8, ptr %a syncscope("block") unordered, align 1
162  %a.add = add i8 %a.load, 1
163  ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
164  store atomic volatile i8 %a.add, ptr %a syncscope("block") unordered, align 1
165
166  ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
167  %b.load = load atomic volatile i16, ptr %b syncscope("block") unordered, align 2
168  %b.add = add i16 %b.load, 1
169  ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
170  store atomic volatile i16 %b.add, ptr %b syncscope("block") unordered, align 2
171
172  ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
173  %c.load = load atomic volatile i32, ptr %c syncscope("block") unordered, align 4
174  %c.add = add i32 %c.load, 1
175  ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
176  store atomic volatile i32 %c.add, ptr %c syncscope("block") unordered, align 4
177
178  ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
179  %d.load = load atomic volatile i64, ptr %d syncscope("block") unordered, align 8
180  %d.add = add i64 %d.load, 1
181  ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
182  store atomic volatile i64 %d.add, ptr %d syncscope("block") unordered, align 8
183
184  ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
185  %e.load = load atomic volatile float, ptr %e syncscope("block") unordered, align 4
186  %e.add = fadd float %e.load, 1.
187  ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
188  store atomic volatile float %e.add, ptr %e syncscope("block") unordered, align 4
189
190  ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
191  %f.load = load atomic volatile double, ptr %e syncscope("block") unordered, align 8
192  %f.add = fadd double %f.load, 1.
193  ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
194  store atomic volatile double %f.add, ptr %e syncscope("block") unordered, align 8
195
196  ret void
197}
198
199; CHECK-LABEL: generic_monotonic_gpu
200define void @generic_monotonic_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
201  ; CHECK: ld.relaxed.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
202  %a.load = load atomic i8, ptr %a syncscope("device") monotonic, align 1
203  %a.add = add i8 %a.load, 1
204  ; CHECK: st.relaxed.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
205  store atomic i8 %a.add, ptr %a syncscope("device") monotonic, align 1
206
207  ; CHECK: ld.relaxed.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
208  %b.load = load atomic i16, ptr %b syncscope("device") monotonic, align 2
209  %b.add = add i16 %b.load, 1
210  ; CHECK: st.relaxed.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
211  store atomic i16 %b.add, ptr %b syncscope("device") monotonic, align 2
212
213  ; CHECK: ld.relaxed.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
214  %c.load = load atomic i32, ptr %c syncscope("device") monotonic, align 4
215  %c.add = add i32 %c.load, 1
216  ; CHECK: st.relaxed.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
217  store atomic i32 %c.add, ptr %c syncscope("device") monotonic, align 4
218
219  ; CHECK: ld.relaxed.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
220  %d.load = load atomic i64, ptr %d syncscope("device") monotonic, align 8
221  %d.add = add i64 %d.load, 1
222  ; CHECK: st.relaxed.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
223  store atomic i64 %d.add, ptr %d syncscope("device") monotonic, align 8
224
225  ; CHECK: ld.relaxed.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
226  %e.load = load atomic float, ptr %e syncscope("device") monotonic, align 4
227  %e.add = fadd float %e.load, 1.
228  ; CHECK: st.relaxed.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
229  store atomic float %e.add, ptr %e syncscope("device") monotonic, align 4
230
231  ; CHECK: ld.relaxed.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
232  %f.load = load atomic double, ptr %e syncscope("device") monotonic, align 8
233  %f.add = fadd double %f.load, 1.
234  ; CHECK: st.relaxed.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
235  store atomic double %f.add, ptr %e syncscope("device") monotonic, align 8
236
237  ret void
238}
239
240; CHECK-LABEL: generic_monotonic_volatile_gpu
241define void @generic_monotonic_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
242  ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
243  %a.load = load atomic volatile i8, ptr %a syncscope("device") monotonic, align 1
244  %a.add = add i8 %a.load, 1
245  ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
246  store atomic volatile i8 %a.add, ptr %a syncscope("device") monotonic, align 1
247
248  ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
249  %b.load = load atomic volatile i16, ptr %b syncscope("device") monotonic, align 2
250  %b.add = add i16 %b.load, 1
251  ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
252  store atomic volatile i16 %b.add, ptr %b syncscope("device") monotonic, align 2
253
254  ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
255  %c.load = load atomic volatile i32, ptr %c syncscope("device") monotonic, align 4
256  %c.add = add i32 %c.load, 1
257  ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
258  store atomic volatile i32 %c.add, ptr %c syncscope("device") monotonic, align 4
259
260  ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
261  %d.load = load atomic volatile i64, ptr %d syncscope("device") monotonic, align 8
262  %d.add = add i64 %d.load, 1
263  ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
264  store atomic volatile i64 %d.add, ptr %d syncscope("device") monotonic, align 8
265
266  ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
267  %e.load = load atomic volatile float, ptr %e syncscope("device") monotonic, align 4
268  %e.add = fadd float %e.load, 1.
269  ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
270  store atomic volatile float %e.add, ptr %e syncscope("device") monotonic, align 4
271
272  ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
273  %f.load = load atomic volatile double, ptr %e syncscope("device") monotonic, align 8
274  %f.add = fadd double %f.load, 1.
275  ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
276  store atomic volatile double %f.add, ptr %e syncscope("device") monotonic, align 8
277
278  ret void
279}
280
281; CHECK-LABEL: generic_monotonic_cta
282define void @generic_monotonic_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
283  ; CHECK: ld.relaxed.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
284  %a.load = load atomic i8, ptr %a syncscope("block") monotonic, align 1
285  %a.add = add i8 %a.load, 1
286  ; CHECK: st.relaxed.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
287  store atomic i8 %a.add, ptr %a syncscope("block") monotonic, align 1
288
289  ; CHECK: ld.relaxed.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
290  %b.load = load atomic i16, ptr %b syncscope("block") monotonic, align 2
291  %b.add = add i16 %b.load, 1
292  ; CHECK: st.relaxed.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
293  store atomic i16 %b.add, ptr %b syncscope("block") monotonic, align 2
294
295  ; CHECK: ld.relaxed.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
296  %c.load = load atomic i32, ptr %c syncscope("block") monotonic, align 4
297  %c.add = add i32 %c.load, 1
298  ; CHECK: st.relaxed.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
299  store atomic i32 %c.add, ptr %c syncscope("block") monotonic, align 4
300
301  ; CHECK: ld.relaxed.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
302  %d.load = load atomic i64, ptr %d syncscope("block") monotonic, align 8
303  %d.add = add i64 %d.load, 1
304  ; CHECK: st.relaxed.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
305  store atomic i64 %d.add, ptr %d syncscope("block") monotonic, align 8
306
307  ; CHECK: ld.relaxed.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
308  %e.load = load atomic float, ptr %e syncscope("block") monotonic, align 4
309  %e.add = fadd float %e.load, 1.
310  ; CHECK: st.relaxed.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
311  store atomic float %e.add, ptr %e syncscope("block") monotonic, align 4
312
313  ; CHECK: ld.relaxed.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
314  %f.load = load atomic double, ptr %e syncscope("block") monotonic, align 8
315  %f.add = fadd double %f.load, 1.
316  ; CHECK: st.relaxed.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
317  store atomic double %f.add, ptr %e syncscope("block") monotonic, align 8
318
319  ret void
320}
321
322; CHECK-LABEL: generic_monotonic_volatile_cta
323define void @generic_monotonic_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
324  ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
325  %a.load = load atomic volatile i8, ptr %a syncscope("block") monotonic, align 1
326  %a.add = add i8 %a.load, 1
327  ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
328  store atomic volatile i8 %a.add, ptr %a syncscope("block") monotonic, align 1
329
330  ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
331  %b.load = load atomic volatile i16, ptr %b syncscope("block") monotonic, align 2
332  %b.add = add i16 %b.load, 1
333  ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
334  store atomic volatile i16 %b.add, ptr %b syncscope("block") monotonic, align 2
335
336  ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
337  %c.load = load atomic volatile i32, ptr %c syncscope("block") monotonic, align 4
338  %c.add = add i32 %c.load, 1
339  ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
340  store atomic volatile i32 %c.add, ptr %c syncscope("block") monotonic, align 4
341
342  ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
343  %d.load = load atomic volatile i64, ptr %d syncscope("block") monotonic, align 8
344  %d.add = add i64 %d.load, 1
345  ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
346  store atomic volatile i64 %d.add, ptr %d syncscope("block") monotonic, align 8
347
348  ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
349  %e.load = load atomic volatile float, ptr %e syncscope("block") monotonic, align 4
350  %e.add = fadd float %e.load, 1.
351  ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
352  store atomic volatile float %e.add, ptr %e syncscope("block") monotonic, align 4
353
354  ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
355  %f.load = load atomic volatile double, ptr %e syncscope("block") monotonic, align 8
356  %f.add = fadd double %f.load, 1.
357  ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
358  store atomic volatile double %f.add, ptr %e syncscope("block") monotonic, align 8
359
360  ret void
361}
362
363; CHECK-LABEL: generic_acq_rel_sys
364define void @generic_acq_rel_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
365  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
366  %a.load = load atomic i8, ptr %a acquire, align 1
367  %a.add = add i8 %a.load, 1
368  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
369  store atomic i8 %a.add, ptr %a release, align 1
370
371  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
372  %b.load = load atomic i16, ptr %b acquire, align 2
373  %b.add = add i16 %b.load, 1
374  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
375  store atomic i16 %b.add, ptr %b release, align 2
376
377  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
378  %c.load = load atomic i32, ptr %c acquire, align 4
379  %c.add = add i32 %c.load, 1
380  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
381  store atomic i32 %c.add, ptr %c release, align 4
382
383  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
384  %d.load = load atomic i64, ptr %d acquire, align 8
385  %d.add = add i64 %d.load, 1
386  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
387  store atomic i64 %d.add, ptr %d release, align 8
388
389  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
390  %e.load = load atomic float, ptr %e acquire, align 4
391  %e.add = fadd float %e.load, 1.
392  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
393  store atomic float %e.add, ptr %e release, align 4
394
395  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
396  %f.load = load atomic double, ptr %e acquire, align 8
397  %f.add = fadd double %f.load, 1.
398  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
399  store atomic double %f.add, ptr %e release, align 8
400
401  ret void
402}
403
404; CHECK-LABEL: generic_acq_rel_volatile_sys
405define void @generic_acq_rel_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
406  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
407  %a.load = load atomic volatile i8, ptr %a acquire, align 1
408  %a.add = add i8 %a.load, 1
409  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
410  store atomic volatile i8 %a.add, ptr %a release, align 1
411
412  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
413  %b.load = load atomic volatile i16, ptr %b acquire, align 2
414  %b.add = add i16 %b.load, 1
415  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
416  store atomic volatile i16 %b.add, ptr %b release, align 2
417
418  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
419  %c.load = load atomic volatile i32, ptr %c acquire, align 4
420  %c.add = add i32 %c.load, 1
421  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
422  store atomic volatile i32 %c.add, ptr %c release, align 4
423
424  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
425  %d.load = load atomic volatile i64, ptr %d acquire, align 8
426  %d.add = add i64 %d.load, 1
427  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
428  store atomic volatile i64 %d.add, ptr %d release, align 8
429
430  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
431  %e.load = load atomic volatile float, ptr %e acquire, align 4
432  %e.add = fadd float %e.load, 1.
433  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
434  store atomic volatile float %e.add, ptr %e release, align 4
435
436  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
437  %f.load = load atomic volatile double, ptr %e acquire, align 8
438  %f.add = fadd double %f.load, 1.
439  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
440  store atomic volatile double %f.add, ptr %e release, align 8
441
442  ret void
443}
444
445; CHECK-LABEL: generic_acq_rel_gpu
446define void @generic_acq_rel_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
447  ; CHECK: ld.acquire.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
448  %a.load = load atomic i8, ptr %a syncscope("device") acquire, align 1
449  %a.add = add i8 %a.load, 1
450  ; CHECK: st.release.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
451  store atomic i8 %a.add, ptr %a syncscope("device") release, align 1
452
453  ; CHECK: ld.acquire.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
454  %b.load = load atomic i16, ptr %b syncscope("device") acquire, align 2
455  %b.add = add i16 %b.load, 1
456  ; CHECK: st.release.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
457  store atomic i16 %b.add, ptr %b syncscope("device") release, align 2
458
459  ; CHECK: ld.acquire.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
460  %c.load = load atomic i32, ptr %c syncscope("device") acquire, align 4
461  %c.add = add i32 %c.load, 1
462  ; CHECK: st.release.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
463  store atomic i32 %c.add, ptr %c syncscope("device") release, align 4
464
465  ; CHECK: ld.acquire.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
466  %d.load = load atomic i64, ptr %d syncscope("device") acquire, align 8
467  %d.add = add i64 %d.load, 1
468  ; CHECK: st.release.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
469  store atomic i64 %d.add, ptr %d syncscope("device") release, align 8
470
471  ; CHECK: ld.acquire.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
472  %e.load = load atomic float, ptr %e syncscope("device") acquire, align 4
473  %e.add = fadd float %e.load, 1.
474  ; CHECK: st.release.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
475  store atomic float %e.add, ptr %e syncscope("device") release, align 4
476
477  ; CHECK: ld.acquire.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
478  %f.load = load atomic double, ptr %e syncscope("device") acquire, align 8
479  %f.add = fadd double %f.load, 1.
480  ; CHECK: st.release.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
481  store atomic double %f.add, ptr %e syncscope("device") release, align 8
482
483  ret void
484}
485
486; CHECK-LABEL: generic_acq_rel_volatile_gpu
487define void @generic_acq_rel_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
488  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
489  %a.load = load atomic volatile i8, ptr %a syncscope("device") acquire, align 1
490  %a.add = add i8 %a.load, 1
491  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
492  store atomic volatile i8 %a.add, ptr %a syncscope("device") release, align 1
493
494  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
495  %b.load = load atomic volatile i16, ptr %b syncscope("device") acquire, align 2
496  %b.add = add i16 %b.load, 1
497  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
498  store atomic volatile i16 %b.add, ptr %b syncscope("device") release, align 2
499
500  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
501  %c.load = load atomic volatile i32, ptr %c syncscope("device") acquire, align 4
502  %c.add = add i32 %c.load, 1
503  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
504  store atomic volatile i32 %c.add, ptr %c syncscope("device") release, align 4
505
506  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
507  %d.load = load atomic volatile i64, ptr %d syncscope("device") acquire, align 8
508  %d.add = add i64 %d.load, 1
509  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
510  store atomic volatile i64 %d.add, ptr %d syncscope("device") release, align 8
511
512  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
513  %e.load = load atomic volatile float, ptr %e syncscope("device") acquire, align 4
514  %e.add = fadd float %e.load, 1.
515  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
516  store atomic volatile float %e.add, ptr %e syncscope("device") release, align 4
517
518  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
519  %f.load = load atomic volatile double, ptr %e syncscope("device") acquire, align 8
520  %f.add = fadd double %f.load, 1.
521  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
522  store atomic volatile double %f.add, ptr %e syncscope("device") release, align 8
523
524  ret void
525}
526
527; CHECK-LABEL: generic_acq_rel_cta
528define void @generic_acq_rel_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
529  ; CHECK: ld.acquire.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
530  %a.load = load atomic i8, ptr %a syncscope("block") acquire, align 1
531  %a.add = add i8 %a.load, 1
532  ; CHECK: st.release.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
533  store atomic i8 %a.add, ptr %a syncscope("block") release, align 1
534
535  ; CHECK: ld.acquire.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
536  %b.load = load atomic i16, ptr %b syncscope("block") acquire, align 2
537  %b.add = add i16 %b.load, 1
538  ; CHECK: st.release.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
539  store atomic i16 %b.add, ptr %b syncscope("block") release, align 2
540
541  ; CHECK: ld.acquire.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
542  %c.load = load atomic i32, ptr %c syncscope("block") acquire, align 4
543  %c.add = add i32 %c.load, 1
544  ; CHECK: st.release.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
545  store atomic i32 %c.add, ptr %c syncscope("block") release, align 4
546
547  ; CHECK: ld.acquire.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
548  %d.load = load atomic i64, ptr %d syncscope("block") acquire, align 8
549  %d.add = add i64 %d.load, 1
550  ; CHECK: st.release.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
551  store atomic i64 %d.add, ptr %d syncscope("block") release, align 8
552
553  ; CHECK: ld.acquire.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
554  %e.load = load atomic float, ptr %e syncscope("block") acquire, align 4
555  %e.add = fadd float %e.load, 1.
556  ; CHECK: st.release.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
557  store atomic float %e.add, ptr %e syncscope("block") release, align 4
558
559  ; CHECK: ld.acquire.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
560  %f.load = load atomic double, ptr %e syncscope("block") acquire, align 8
561  %f.add = fadd double %f.load, 1.
562  ; CHECK: st.release.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
563  store atomic double %f.add, ptr %e syncscope("block") release, align 8
564
565  ret void
566}
567
568; CHECK-LABEL: generic_acq_rel_volatile_cta
569define void @generic_acq_rel_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
570  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
571  %a.load = load atomic volatile i8, ptr %a syncscope("block") acquire, align 1
572  %a.add = add i8 %a.load, 1
573  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
574  store atomic volatile i8 %a.add, ptr %a syncscope("block") release, align 1
575
576  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
577  %b.load = load atomic volatile i16, ptr %b syncscope("block") acquire, align 2
578  %b.add = add i16 %b.load, 1
579  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
580  store atomic volatile i16 %b.add, ptr %b syncscope("block") release, align 2
581
582  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
583  %c.load = load atomic volatile i32, ptr %c syncscope("block") acquire, align 4
584  %c.add = add i32 %c.load, 1
585  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
586  store atomic volatile i32 %c.add, ptr %c syncscope("block") release, align 4
587
588  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
589  %d.load = load atomic volatile i64, ptr %d syncscope("block") acquire, align 8
590  %d.add = add i64 %d.load, 1
591  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
592  store atomic volatile i64 %d.add, ptr %d syncscope("block") release, align 8
593
594  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
595  %e.load = load atomic volatile float, ptr %e syncscope("block") acquire, align 4
596  %e.add = fadd float %e.load, 1.
597  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
598  store atomic volatile float %e.add, ptr %e syncscope("block") release, align 4
599
600  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
601  %f.load = load atomic volatile double, ptr %e syncscope("block") acquire, align 8
602  %f.add = fadd double %f.load, 1.
603  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
604  store atomic volatile double %f.add, ptr %e syncscope("block") release, align 8
605
606  ret void
607}
608
609; CHECK-LABEL: generic_sc_sys
610define void @generic_sc_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
611  ; CHECK: fence.sc.sys
612  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
613  %a.load = load atomic i8, ptr %a seq_cst, align 1
614  %a.add = add i8 %a.load, 1
615  ; CHECK: fence.sc.sys
616  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
617  store atomic i8 %a.add, ptr %a seq_cst, align 1
618
619  ; CHECK: fence.sc.sys
620  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
621  %b.load = load atomic i16, ptr %b seq_cst, align 2
622  %b.add = add i16 %b.load, 1
623  ; CHECK: fence.sc.sys
624  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
625  store atomic i16 %b.add, ptr %b seq_cst, align 2
626
627  ; CHECK: fence.sc.sys
628  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
629  %c.load = load atomic i32, ptr %c seq_cst, align 4
630  %c.add = add i32 %c.load, 1
631  ; CHECK: fence.sc.sys
632  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
633  store atomic i32 %c.add, ptr %c seq_cst, align 4
634
635  ; CHECK: fence.sc.sys
636  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
637  %d.load = load atomic i64, ptr %d seq_cst, align 8
638  %d.add = add i64 %d.load, 1
639  ; CHECK: fence.sc.sys
640  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
641  store atomic i64 %d.add, ptr %d seq_cst, align 8
642
643  ; CHECK: fence.sc.sys
644  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
645  %e.load = load atomic float, ptr %e seq_cst, align 4
646  %e.add = fadd float %e.load, 1.
647  ; CHECK: fence.sc.sys
648  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
649  store atomic float %e.add, ptr %e seq_cst, align 4
650
651  ; CHECK: fence.sc.sys
652  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
653  %f.load = load atomic double, ptr %e seq_cst, align 8
654  %f.add = fadd double %f.load, 1.
655  ; CHECK: fence.sc.sys
656  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
657  store atomic double %f.add, ptr %e seq_cst, align 8
658
659  ret void
660}
661
662; CHECK-LABEL: generic_sc_volatile_sys
663define void @generic_sc_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
664  ; CHECK: fence.sc.sys
665  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
666  %a.load = load atomic volatile i8, ptr %a seq_cst, align 1
667  %a.add = add i8 %a.load, 1
668  ; CHECK: fence.sc.sys
669  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
670  store atomic volatile i8 %a.add, ptr %a seq_cst, align 1
671
672  ; CHECK: fence.sc.sys
673  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
674  %b.load = load atomic volatile i16, ptr %b seq_cst, align 2
675  %b.add = add i16 %b.load, 1
676  ; CHECK: fence.sc.sys
677  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
678  store atomic volatile i16 %b.add, ptr %b seq_cst, align 2
679
680  ; CHECK: fence.sc.sys
681  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
682  %c.load = load atomic volatile i32, ptr %c seq_cst, align 4
683  %c.add = add i32 %c.load, 1
684  ; CHECK: fence.sc.sys
685  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
686  store atomic volatile i32 %c.add, ptr %c seq_cst, align 4
687
688  ; CHECK: fence.sc.sys
689  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
690  %d.load = load atomic volatile i64, ptr %d seq_cst, align 8
691  %d.add = add i64 %d.load, 1
692  ; CHECK: fence.sc.sys
693  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
694  store atomic volatile i64 %d.add, ptr %d seq_cst, align 8
695
696  ; CHECK: fence.sc.sys
697  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
698  %e.load = load atomic volatile float, ptr %e seq_cst, align 4
699  %e.add = fadd float %e.load, 1.
700  ; CHECK: fence.sc.sys
701  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
702  store atomic volatile float %e.add, ptr %e seq_cst, align 4
703
704  ; CHECK: fence.sc.sys
705  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
706  %f.load = load atomic volatile double, ptr %e seq_cst, align 8
707  %f.add = fadd double %f.load, 1.
708  ; CHECK: fence.sc.sys
709  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
710  store atomic volatile double %f.add, ptr %e seq_cst, align 8
711
712  ret void
713}
714
715; CHECK-LABEL: generic_sc_gpu
716define void @generic_sc_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
717  ; CHECK: fence.sc.gpu
718  ; CHECK: ld.acquire.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
719  %a.load = load atomic i8, ptr %a syncscope("device") seq_cst, align 1
720  %a.add = add i8 %a.load, 1
721  ; CHECK: fence.sc.gpu
722  ; CHECK: st.release.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
723  store atomic i8 %a.add, ptr %a syncscope("device") seq_cst, align 1
724
725  ; CHECK: fence.sc.gpu
726  ; CHECK: ld.acquire.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
727  %b.load = load atomic i16, ptr %b syncscope("device") seq_cst, align 2
728  %b.add = add i16 %b.load, 1
729  ; CHECK: fence.sc.gpu
730  ; CHECK: st.release.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
731  store atomic i16 %b.add, ptr %b syncscope("device") seq_cst, align 2
732
733  ; CHECK: fence.sc.gpu
734  ; CHECK: ld.acquire.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
735  %c.load = load atomic i32, ptr %c syncscope("device") seq_cst, align 4
736  %c.add = add i32 %c.load, 1
737  ; CHECK: fence.sc.gpu
738  ; CHECK: st.release.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
739  store atomic i32 %c.add, ptr %c syncscope("device") seq_cst, align 4
740
741  ; CHECK: fence.sc.gpu
742  ; CHECK: ld.acquire.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
743  %d.load = load atomic i64, ptr %d syncscope("device") seq_cst, align 8
744  %d.add = add i64 %d.load, 1
745  ; CHECK: fence.sc.gpu
746  ; CHECK: st.release.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
747  store atomic i64 %d.add, ptr %d syncscope("device") seq_cst, align 8
748
749  ; CHECK: fence.sc.gpu
750  ; CHECK: ld.acquire.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
751  %e.load = load atomic float, ptr %e syncscope("device") seq_cst, align 4
752  %e.add = fadd float %e.load, 1.
753  ; CHECK: fence.sc.gpu
754  ; CHECK: st.release.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
755  store atomic float %e.add, ptr %e syncscope("device") seq_cst, align 4
756
757  ; CHECK: fence.sc.gpu
758  ; CHECK: ld.acquire.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
759  %f.load = load atomic double, ptr %e syncscope("device") seq_cst, align 8
760  %f.add = fadd double %f.load, 1.
761  ; CHECK: fence.sc.gpu
762  ; CHECK: st.release.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
763  store atomic double %f.add, ptr %e syncscope("device") seq_cst, align 8
764
765  ret void
766}
767
768; CHECK-LABEL: generic_sc_volatile_gpu
769define void @generic_sc_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
770  ; CHECK: fence.sc.sys
771  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
772  %a.load = load atomic volatile i8, ptr %a syncscope("device") seq_cst, align 1
773  %a.add = add i8 %a.load, 1
774  ; CHECK: fence.sc.sys
775  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
776  store atomic volatile i8 %a.add, ptr %a syncscope("device") seq_cst, align 1
777
778  ; CHECK: fence.sc.sys
779  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
780  %b.load = load atomic volatile i16, ptr %b syncscope("device") seq_cst, align 2
781  %b.add = add i16 %b.load, 1
782  ; CHECK: fence.sc.sys
783  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
784  store atomic volatile i16 %b.add, ptr %b syncscope("device") seq_cst, align 2
785
786  ; CHECK: fence.sc.sys
787  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
788  %c.load = load atomic volatile i32, ptr %c syncscope("device") seq_cst, align 4
789  %c.add = add i32 %c.load, 1
790  ; CHECK: fence.sc.sys
791  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
792  store atomic volatile i32 %c.add, ptr %c syncscope("device") seq_cst, align 4
793
794  ; CHECK: fence.sc.sys
795  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
796  %d.load = load atomic volatile i64, ptr %d syncscope("device") seq_cst, align 8
797  %d.add = add i64 %d.load, 1
798  ; CHECK: fence.sc.sys
799  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
800  store atomic volatile i64 %d.add, ptr %d syncscope("device") seq_cst, align 8
801
802  ; CHECK: fence.sc.sys
803  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
804  %e.load = load atomic volatile float, ptr %e syncscope("device") seq_cst, align 4
805  %e.add = fadd float %e.load, 1.
806  ; CHECK: fence.sc.sys
807  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
808  store atomic volatile float %e.add, ptr %e syncscope("device") seq_cst, align 4
809
810  ; CHECK: fence.sc.sys
811  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
812  %f.load = load atomic volatile double, ptr %e syncscope("device") seq_cst, align 8
813  %f.add = fadd double %f.load, 1.
814  ; CHECK: fence.sc.sys
815  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
816  store atomic volatile double %f.add, ptr %e syncscope("device") seq_cst, align 8
817
818  ret void
819}
820
821; CHECK-LABEL: generic_sc_cta
822define void @generic_sc_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
823  ; CHECK: fence.sc.cta
824  ; CHECK: ld.acquire.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
825  %a.load = load atomic i8, ptr %a syncscope("block") seq_cst, align 1
826  %a.add = add i8 %a.load, 1
827  ; CHECK: fence.sc.cta
828  ; CHECK: st.release.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
829  store atomic i8 %a.add, ptr %a syncscope("block") seq_cst, align 1
830
831  ; CHECK: fence.sc.cta
832  ; CHECK: ld.acquire.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
833  %b.load = load atomic i16, ptr %b syncscope("block") seq_cst, align 2
834  %b.add = add i16 %b.load, 1
835  ; CHECK: fence.sc.cta
836  ; CHECK: st.release.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
837  store atomic i16 %b.add, ptr %b syncscope("block") seq_cst, align 2
838
839  ; CHECK: fence.sc.cta
840  ; CHECK: ld.acquire.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
841  %c.load = load atomic i32, ptr %c syncscope("block") seq_cst, align 4
842  %c.add = add i32 %c.load, 1
843  ; CHECK: fence.sc.cta
844  ; CHECK: st.release.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
845  store atomic i32 %c.add, ptr %c syncscope("block") seq_cst, align 4
846
847  ; CHECK: fence.sc.cta
848  ; CHECK: ld.acquire.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
849  %d.load = load atomic i64, ptr %d syncscope("block") seq_cst, align 8
850  %d.add = add i64 %d.load, 1
851  ; CHECK: fence.sc.cta
852  ; CHECK: st.release.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
853  store atomic i64 %d.add, ptr %d syncscope("block") seq_cst, align 8
854
855  ; CHECK: fence.sc.cta
856  ; CHECK: ld.acquire.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
857  %e.load = load atomic float, ptr %e syncscope("block") seq_cst, align 4
858  %e.add = fadd float %e.load, 1.
859  ; CHECK: fence.sc.cta
860  ; CHECK: st.release.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
861  store atomic float %e.add, ptr %e syncscope("block") seq_cst, align 4
862
863  ; CHECK: fence.sc.cta
864  ; CHECK: ld.acquire.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
865  %f.load = load atomic double, ptr %e syncscope("block") seq_cst, align 8
866  %f.add = fadd double %f.load, 1.
867  ; CHECK: fence.sc.cta
868  ; CHECK: st.release.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
869  store atomic double %f.add, ptr %e syncscope("block") seq_cst, align 8
870
871  ret void
872}
873
874; CHECK-LABEL: generic_sc_volatile_cta
875define void @generic_sc_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
876  ; CHECK: fence.sc.sys
877  ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
878  %a.load = load atomic volatile i8, ptr %a syncscope("block") seq_cst, align 1
879  %a.add = add i8 %a.load, 1
880  ; CHECK: fence.sc.sys
881  ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
882  store atomic volatile i8 %a.add, ptr %a syncscope("block") seq_cst, align 1
883
884  ; CHECK: fence.sc.sys
885  ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
886  %b.load = load atomic volatile i16, ptr %b syncscope("block") seq_cst, align 2
887  %b.add = add i16 %b.load, 1
888  ; CHECK: fence.sc.sys
889  ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
890  store atomic volatile i16 %b.add, ptr %b syncscope("block") seq_cst, align 2
891
892  ; CHECK: fence.sc.sys
893  ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
894  %c.load = load atomic volatile i32, ptr %c syncscope("block") seq_cst, align 4
895  %c.add = add i32 %c.load, 1
896  ; CHECK: fence.sc.sys
897  ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
898  store atomic volatile i32 %c.add, ptr %c syncscope("block") seq_cst, align 4
899
900  ; CHECK: fence.sc.sys
901  ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
902  %d.load = load atomic volatile i64, ptr %d syncscope("block") seq_cst, align 8
903  %d.add = add i64 %d.load, 1
904  ; CHECK: fence.sc.sys
905  ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
906  store atomic volatile i64 %d.add, ptr %d syncscope("block") seq_cst, align 8
907
908  ; CHECK: fence.sc.sys
909  ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
910  %e.load = load atomic volatile float, ptr %e syncscope("block") seq_cst, align 4
911  %e.add = fadd float %e.load, 1.
912  ; CHECK: fence.sc.sys
913  ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
914  store atomic volatile float %e.add, ptr %e syncscope("block") seq_cst, align 4
915
916  ; CHECK: fence.sc.sys
917  ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
918  %f.load = load atomic volatile double, ptr %e syncscope("block") seq_cst, align 8
919  %f.add = fadd double %f.load, 1.
920  ; CHECK: fence.sc.sys
921  ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
922  store atomic volatile double %f.add, ptr %e syncscope("block") seq_cst, align 8
923
924  ret void
925}
926
927;; global statespace
928
929; CHECK-LABEL: global_unordered_gpu
930define void @global_unordered_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
931  ; CHECK: ld.relaxed.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
932  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") unordered, align 1
933  %a.add = add i8 %a.load, 1
934  ; CHECK: st.relaxed.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
935  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") unordered, align 1
936
937  ; CHECK: ld.relaxed.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
938  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") unordered, align 2
939  %b.add = add i16 %b.load, 1
940  ; CHECK: st.relaxed.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
941  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") unordered, align 2
942
943  ; CHECK: ld.relaxed.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
944  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") unordered, align 4
945  %c.add = add i32 %c.load, 1
946  ; CHECK: st.relaxed.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
947  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") unordered, align 4
948
949  ; CHECK: ld.relaxed.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
950  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") unordered, align 8
951  %d.add = add i64 %d.load, 1
952  ; CHECK: st.relaxed.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
953  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") unordered, align 8
954
955  ; CHECK: ld.relaxed.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
956  %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") unordered, align 4
957  %e.add = fadd float %e.load, 1.
958  ; CHECK: st.relaxed.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
959  store atomic float %e.add, ptr addrspace(1) %e syncscope("device") unordered, align 4
960
961  ; CHECK: ld.relaxed.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
962  %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") unordered, align 8
963  %f.add = fadd double %f.load, 1.
964  ; CHECK: st.relaxed.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
965  store atomic double %f.add, ptr addrspace(1) %e syncscope("device") unordered, align 8
966
967  ret void
968}
969
970; CHECK-LABEL: global_unordered_volatile_gpu
971define void @global_unordered_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
972  ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
973  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") unordered, align 1
974  %a.add = add i8 %a.load, 1
975  ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
976  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") unordered, align 1
977
978  ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
979  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") unordered, align 2
980  %b.add = add i16 %b.load, 1
981  ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
982  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") unordered, align 2
983
984  ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
985  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") unordered, align 4
986  %c.add = add i32 %c.load, 1
987  ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
988  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") unordered, align 4
989
990  ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
991  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") unordered, align 8
992  %d.add = add i64 %d.load, 1
993  ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
994  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") unordered, align 8
995
996  ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
997  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") unordered, align 4
998  %e.add = fadd float %e.load, 1.
999  ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1000  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") unordered, align 4
1001
1002  ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1003  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") unordered, align 8
1004  %f.add = fadd double %f.load, 1.
1005  ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1006  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") unordered, align 8
1007
1008  ret void
1009}
1010
1011; CHECK-LABEL: global_unordered_cta
1012define void @global_unordered_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1013  ; CHECK: ld.relaxed.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1014  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") unordered, align 1
1015  %a.add = add i8 %a.load, 1
1016  ; CHECK: st.relaxed.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1017  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") unordered, align 1
1018
1019  ; CHECK: ld.relaxed.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1020  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") unordered, align 2
1021  %b.add = add i16 %b.load, 1
1022  ; CHECK: st.relaxed.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1023  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") unordered, align 2
1024
1025  ; CHECK: ld.relaxed.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1026  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") unordered, align 4
1027  %c.add = add i32 %c.load, 1
1028  ; CHECK: st.relaxed.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1029  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") unordered, align 4
1030
1031  ; CHECK: ld.relaxed.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1032  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") unordered, align 8
1033  %d.add = add i64 %d.load, 1
1034  ; CHECK: st.relaxed.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1035  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") unordered, align 8
1036
1037  ; CHECK: ld.relaxed.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1038  %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") unordered, align 4
1039  %e.add = fadd float %e.load, 1.
1040  ; CHECK: st.relaxed.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1041  store atomic float %e.add, ptr addrspace(1) %e syncscope("block") unordered, align 4
1042
1043  ; CHECK: ld.relaxed.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1044  %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") unordered, align 8
1045  %f.add = fadd double %f.load, 1.
1046  ; CHECK: st.relaxed.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1047  store atomic double %f.add, ptr addrspace(1) %e syncscope("block") unordered, align 8
1048
1049  ret void
1050}
1051
1052; CHECK-LABEL: global_unordered_volatile_cta
1053define void @global_unordered_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1054  ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1055  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") unordered, align 1
1056  %a.add = add i8 %a.load, 1
1057  ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1058  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") unordered, align 1
1059
1060  ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1061  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") unordered, align 2
1062  %b.add = add i16 %b.load, 1
1063  ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1064  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") unordered, align 2
1065
1066  ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1067  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") unordered, align 4
1068  %c.add = add i32 %c.load, 1
1069  ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1070  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") unordered, align 4
1071
1072  ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1073  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") unordered, align 8
1074  %d.add = add i64 %d.load, 1
1075  ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1076  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") unordered, align 8
1077
1078  ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1079  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") unordered, align 4
1080  %e.add = fadd float %e.load, 1.
1081  ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1082  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") unordered, align 4
1083
1084  ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1085  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") unordered, align 8
1086  %f.add = fadd double %f.load, 1.
1087  ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1088  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") unordered, align 8
1089
1090  ret void
1091}
1092
1093; CHECK-LABEL: global_monotonic_gpu
1094define void @global_monotonic_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1095  ; CHECK: ld.relaxed.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1096  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") monotonic, align 1
1097  %a.add = add i8 %a.load, 1
1098  ; CHECK: st.relaxed.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1099  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") monotonic, align 1
1100
1101  ; CHECK: ld.relaxed.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1102  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") monotonic, align 2
1103  %b.add = add i16 %b.load, 1
1104  ; CHECK: st.relaxed.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1105  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") monotonic, align 2
1106
1107  ; CHECK: ld.relaxed.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1108  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") monotonic, align 4
1109  %c.add = add i32 %c.load, 1
1110  ; CHECK: st.relaxed.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1111  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") monotonic, align 4
1112
1113  ; CHECK: ld.relaxed.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1114  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") monotonic, align 8
1115  %d.add = add i64 %d.load, 1
1116  ; CHECK: st.relaxed.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1117  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") monotonic, align 8
1118
1119  ; CHECK: ld.relaxed.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1120  %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") monotonic, align 4
1121  %e.add = fadd float %e.load, 1.
1122  ; CHECK: st.relaxed.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1123  store atomic float %e.add, ptr addrspace(1) %e syncscope("device") monotonic, align 4
1124
1125  ; CHECK: ld.relaxed.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1126  %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") monotonic, align 8
1127  %f.add = fadd double %f.load, 1.
1128  ; CHECK: st.relaxed.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1129  store atomic double %f.add, ptr addrspace(1) %e syncscope("device") monotonic, align 8
1130
1131  ret void
1132}
1133
1134; CHECK-LABEL: global_monotonic_volatile_gpu
1135define void @global_monotonic_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1136  ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1137  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") monotonic, align 1
1138  %a.add = add i8 %a.load, 1
1139  ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1140  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") monotonic, align 1
1141
1142  ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1143  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") monotonic, align 2
1144  %b.add = add i16 %b.load, 1
1145  ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1146  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") monotonic, align 2
1147
1148  ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1149  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") monotonic, align 4
1150  %c.add = add i32 %c.load, 1
1151  ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1152  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") monotonic, align 4
1153
1154  ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1155  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") monotonic, align 8
1156  %d.add = add i64 %d.load, 1
1157  ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1158  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") monotonic, align 8
1159
1160  ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1161  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") monotonic, align 4
1162  %e.add = fadd float %e.load, 1.
1163  ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1164  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") monotonic, align 4
1165
1166  ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1167  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") monotonic, align 8
1168  %f.add = fadd double %f.load, 1.
1169  ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1170  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") monotonic, align 8
1171
1172  ret void
1173}
1174
1175; CHECK-LABEL: global_monotonic_cta
1176define void @global_monotonic_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1177  ; CHECK: ld.relaxed.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1178  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") monotonic, align 1
1179  %a.add = add i8 %a.load, 1
1180  ; CHECK: st.relaxed.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1181  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") monotonic, align 1
1182
1183  ; CHECK: ld.relaxed.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1184  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") monotonic, align 2
1185  %b.add = add i16 %b.load, 1
1186  ; CHECK: st.relaxed.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1187  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") monotonic, align 2
1188
1189  ; CHECK: ld.relaxed.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1190  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") monotonic, align 4
1191  %c.add = add i32 %c.load, 1
1192  ; CHECK: st.relaxed.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1193  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") monotonic, align 4
1194
1195  ; CHECK: ld.relaxed.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1196  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") monotonic, align 8
1197  %d.add = add i64 %d.load, 1
1198  ; CHECK: st.relaxed.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1199  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") monotonic, align 8
1200
1201  ; CHECK: ld.relaxed.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1202  %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") monotonic, align 4
1203  %e.add = fadd float %e.load, 1.
1204  ; CHECK: st.relaxed.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1205  store atomic float %e.add, ptr addrspace(1) %e syncscope("block") monotonic, align 4
1206
1207  ; CHECK: ld.relaxed.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1208  %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") monotonic, align 8
1209  %f.add = fadd double %f.load, 1.
1210  ; CHECK: st.relaxed.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1211  store atomic double %f.add, ptr addrspace(1) %e syncscope("block") monotonic, align 8
1212
1213  ret void
1214}
1215
1216; CHECK-LABEL: global_monotonic_volatile_cta
1217define void @global_monotonic_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1218  ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1219  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") monotonic, align 1
1220  %a.add = add i8 %a.load, 1
1221  ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1222  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") monotonic, align 1
1223
1224  ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1225  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") monotonic, align 2
1226  %b.add = add i16 %b.load, 1
1227  ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1228  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") monotonic, align 2
1229
1230  ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1231  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") monotonic, align 4
1232  %c.add = add i32 %c.load, 1
1233  ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1234  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") monotonic, align 4
1235
1236  ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1237  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") monotonic, align 8
1238  %d.add = add i64 %d.load, 1
1239  ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1240  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") monotonic, align 8
1241
1242  ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1243  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") monotonic, align 4
1244  %e.add = fadd float %e.load, 1.
1245  ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1246  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") monotonic, align 4
1247
1248  ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1249  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") monotonic, align 8
1250  %f.add = fadd double %f.load, 1.
1251  ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1252  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") monotonic, align 8
1253
1254  ret void
1255}
1256
1257; CHECK-LABEL: global_acq_rel_sys
1258define void @global_acq_rel_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1259  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1260  %a.load = load atomic i8, ptr addrspace(1) %a acquire, align 1
1261  %a.add = add i8 %a.load, 1
1262  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1263  store atomic i8 %a.add, ptr addrspace(1) %a release, align 1
1264
1265  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1266  %b.load = load atomic i16, ptr addrspace(1) %b acquire, align 2
1267  %b.add = add i16 %b.load, 1
1268  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1269  store atomic i16 %b.add, ptr addrspace(1) %b release, align 2
1270
1271  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1272  %c.load = load atomic i32, ptr addrspace(1) %c acquire, align 4
1273  %c.add = add i32 %c.load, 1
1274  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1275  store atomic i32 %c.add, ptr addrspace(1) %c release, align 4
1276
1277  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1278  %d.load = load atomic i64, ptr addrspace(1) %d acquire, align 8
1279  %d.add = add i64 %d.load, 1
1280  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1281  store atomic i64 %d.add, ptr addrspace(1) %d release, align 8
1282
1283  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1284  %e.load = load atomic float, ptr addrspace(1) %e acquire, align 4
1285  %e.add = fadd float %e.load, 1.
1286  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1287  store atomic float %e.add, ptr addrspace(1) %e release, align 4
1288
1289  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1290  %f.load = load atomic double, ptr addrspace(1) %e acquire, align 8
1291  %f.add = fadd double %f.load, 1.
1292  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1293  store atomic double %f.add, ptr addrspace(1) %e release, align 8
1294
1295  ret void
1296}
1297
1298; CHECK-LABEL: global_acq_rel_volatile_sys
1299define void @global_acq_rel_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1300  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1301  %a.load = load atomic volatile i8, ptr addrspace(1) %a acquire, align 1
1302  %a.add = add i8 %a.load, 1
1303  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1304  store atomic volatile i8 %a.add, ptr addrspace(1) %a release, align 1
1305
1306  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1307  %b.load = load atomic volatile i16, ptr addrspace(1) %b acquire, align 2
1308  %b.add = add i16 %b.load, 1
1309  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1310  store atomic volatile i16 %b.add, ptr addrspace(1) %b release, align 2
1311
1312  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1313  %c.load = load atomic volatile i32, ptr addrspace(1) %c acquire, align 4
1314  %c.add = add i32 %c.load, 1
1315  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1316  store atomic volatile i32 %c.add, ptr addrspace(1) %c release, align 4
1317
1318  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1319  %d.load = load atomic volatile i64, ptr addrspace(1) %d acquire, align 8
1320  %d.add = add i64 %d.load, 1
1321  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1322  store atomic volatile i64 %d.add, ptr addrspace(1) %d release, align 8
1323
1324  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1325  %e.load = load atomic volatile float, ptr addrspace(1) %e acquire, align 4
1326  %e.add = fadd float %e.load, 1.
1327  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1328  store atomic volatile float %e.add, ptr addrspace(1) %e release, align 4
1329
1330  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1331  %f.load = load atomic volatile double, ptr addrspace(1) %e acquire, align 8
1332  %f.add = fadd double %f.load, 1.
1333  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1334  store atomic volatile double %f.add, ptr addrspace(1) %e release, align 8
1335
1336  ret void
1337}
1338
1339; CHECK-LABEL: global_acq_rel_gpu
1340define void @global_acq_rel_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1341  ; CHECK: ld.acquire.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1342  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") acquire, align 1
1343  %a.add = add i8 %a.load, 1
1344  ; CHECK: st.release.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1345  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") release, align 1
1346
1347  ; CHECK: ld.acquire.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1348  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") acquire, align 2
1349  %b.add = add i16 %b.load, 1
1350  ; CHECK: st.release.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1351  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") release, align 2
1352
1353  ; CHECK: ld.acquire.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1354  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") acquire, align 4
1355  %c.add = add i32 %c.load, 1
1356  ; CHECK: st.release.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1357  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") release, align 4
1358
1359  ; CHECK: ld.acquire.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1360  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") acquire, align 8
1361  %d.add = add i64 %d.load, 1
1362  ; CHECK: st.release.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1363  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") release, align 8
1364
1365  ; CHECK: ld.acquire.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1366  %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") acquire, align 4
1367  %e.add = fadd float %e.load, 1.
1368  ; CHECK: st.release.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1369  store atomic float %e.add, ptr addrspace(1) %e syncscope("device") release, align 4
1370
1371  ; CHECK: ld.acquire.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1372  %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") acquire, align 8
1373  %f.add = fadd double %f.load, 1.
1374  ; CHECK: st.release.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1375  store atomic double %f.add, ptr addrspace(1) %e syncscope("device") release, align 8
1376
1377  ret void
1378}
1379
1380; CHECK-LABEL: global_acq_rel_volatile_gpu
1381define void @global_acq_rel_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1382  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1383  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") acquire, align 1
1384  %a.add = add i8 %a.load, 1
1385  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1386  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") release, align 1
1387
1388  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1389  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") acquire, align 2
1390  %b.add = add i16 %b.load, 1
1391  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1392  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") release, align 2
1393
1394  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1395  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") acquire, align 4
1396  %c.add = add i32 %c.load, 1
1397  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1398  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") release, align 4
1399
1400  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1401  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") acquire, align 8
1402  %d.add = add i64 %d.load, 1
1403  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1404  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") release, align 8
1405
1406  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1407  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") acquire, align 4
1408  %e.add = fadd float %e.load, 1.
1409  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1410  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") release, align 4
1411
1412  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1413  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") acquire, align 8
1414  %f.add = fadd double %f.load, 1.
1415  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1416  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") release, align 8
1417
1418  ret void
1419}
1420
1421; CHECK-LABEL: global_acq_rel_cta
1422define void @global_acq_rel_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1423  ; CHECK: ld.acquire.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1424  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") acquire, align 1
1425  %a.add = add i8 %a.load, 1
1426  ; CHECK: st.release.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1427  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") release, align 1
1428
1429  ; CHECK: ld.acquire.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1430  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") acquire, align 2
1431  %b.add = add i16 %b.load, 1
1432  ; CHECK: st.release.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1433  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") release, align 2
1434
1435  ; CHECK: ld.acquire.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1436  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") acquire, align 4
1437  %c.add = add i32 %c.load, 1
1438  ; CHECK: st.release.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1439  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") release, align 4
1440
1441  ; CHECK: ld.acquire.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1442  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") acquire, align 8
1443  %d.add = add i64 %d.load, 1
1444  ; CHECK: st.release.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1445  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") release, align 8
1446
1447  ; CHECK: ld.acquire.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1448  %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") acquire, align 4
1449  %e.add = fadd float %e.load, 1.
1450  ; CHECK: st.release.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1451  store atomic float %e.add, ptr addrspace(1) %e syncscope("block") release, align 4
1452
1453  ; CHECK: ld.acquire.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1454  %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") acquire, align 8
1455  %f.add = fadd double %f.load, 1.
1456  ; CHECK: st.release.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1457  store atomic double %f.add, ptr addrspace(1) %e syncscope("block") release, align 8
1458
1459  ret void
1460}
1461
1462; CHECK-LABEL: global_acq_rel_volatile_cta
1463define void @global_acq_rel_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1464  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1465  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") acquire, align 1
1466  %a.add = add i8 %a.load, 1
1467  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1468  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") release, align 1
1469
1470  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1471  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") acquire, align 2
1472  %b.add = add i16 %b.load, 1
1473  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1474  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") release, align 2
1475
1476  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1477  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") acquire, align 4
1478  %c.add = add i32 %c.load, 1
1479  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1480  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") release, align 4
1481
1482  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1483  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") acquire, align 8
1484  %d.add = add i64 %d.load, 1
1485  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1486  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") release, align 8
1487
1488  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1489  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") acquire, align 4
1490  %e.add = fadd float %e.load, 1.
1491  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1492  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") release, align 4
1493
1494  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1495  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") acquire, align 8
1496  %f.add = fadd double %f.load, 1.
1497  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1498  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") release, align 8
1499
1500  ret void
1501}
1502
1503; CHECK-LABEL: global_seq_cst_sys
1504define void @global_seq_cst_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1505  ; CHECK: fence.sc.sys
1506  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1507  %a.load = load atomic i8, ptr addrspace(1) %a seq_cst, align 1
1508  %a.add = add i8 %a.load, 1
1509  ; CHECK: fence.sc.sys
1510  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1511  store atomic i8 %a.add, ptr addrspace(1) %a seq_cst, align 1
1512
1513  ; CHECK: fence.sc.sys
1514  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1515  %b.load = load atomic i16, ptr addrspace(1) %b seq_cst, align 2
1516  %b.add = add i16 %b.load, 1
1517  ; CHECK: fence.sc.sys
1518  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1519  store atomic i16 %b.add, ptr addrspace(1) %b seq_cst, align 2
1520
1521  ; CHECK: fence.sc.sys
1522  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1523  %c.load = load atomic i32, ptr addrspace(1) %c seq_cst, align 4
1524  %c.add = add i32 %c.load, 1
1525  ; CHECK: fence.sc.sys
1526  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1527  store atomic i32 %c.add, ptr addrspace(1) %c seq_cst, align 4
1528
1529  ; CHECK: fence.sc.sys
1530  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1531  %d.load = load atomic i64, ptr addrspace(1) %d seq_cst, align 8
1532  %d.add = add i64 %d.load, 1
1533  ; CHECK: fence.sc.sys
1534  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1535  store atomic i64 %d.add, ptr addrspace(1) %d seq_cst, align 8
1536
1537  ; CHECK: fence.sc.sys
1538  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1539  %e.load = load atomic float, ptr addrspace(1) %e seq_cst, align 4
1540  %e.add = fadd float %e.load, 1.
1541  ; CHECK: fence.sc.sys
1542  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1543  store atomic float %e.add, ptr addrspace(1) %e seq_cst, align 4
1544
1545  ; CHECK: fence.sc.sys
1546  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1547  %f.load = load atomic double, ptr addrspace(1) %e seq_cst, align 8
1548  %f.add = fadd double %f.load, 1.
1549  ; CHECK: fence.sc.sys
1550  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1551  store atomic double %f.add, ptr addrspace(1) %e seq_cst, align 8
1552
1553  ret void
1554}
1555
1556; CHECK-LABEL: global_seq_cst_volatile_sys
1557define void @global_seq_cst_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1558  ; CHECK: fence.sc.sys
1559  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1560  %a.load = load atomic volatile i8, ptr addrspace(1) %a seq_cst, align 1
1561  %a.add = add i8 %a.load, 1
1562  ; CHECK: fence.sc.sys
1563  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1564  store atomic volatile i8 %a.add, ptr addrspace(1) %a seq_cst, align 1
1565
1566  ; CHECK: fence.sc.sys
1567  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1568  %b.load = load atomic volatile i16, ptr addrspace(1) %b seq_cst, align 2
1569  %b.add = add i16 %b.load, 1
1570  ; CHECK: fence.sc.sys
1571  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1572  store atomic volatile i16 %b.add, ptr addrspace(1) %b seq_cst, align 2
1573
1574  ; CHECK: fence.sc.sys
1575  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1576  %c.load = load atomic volatile i32, ptr addrspace(1) %c seq_cst, align 4
1577  %c.add = add i32 %c.load, 1
1578  ; CHECK: fence.sc.sys
1579  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1580  store atomic volatile i32 %c.add, ptr addrspace(1) %c seq_cst, align 4
1581
1582  ; CHECK: fence.sc.sys
1583  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1584  %d.load = load atomic volatile i64, ptr addrspace(1) %d seq_cst, align 8
1585  %d.add = add i64 %d.load, 1
1586  ; CHECK: fence.sc.sys
1587  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1588  store atomic volatile i64 %d.add, ptr addrspace(1) %d seq_cst, align 8
1589
1590  ; CHECK: fence.sc.sys
1591  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1592  %e.load = load atomic volatile float, ptr addrspace(1) %e seq_cst, align 4
1593  %e.add = fadd float %e.load, 1.
1594  ; CHECK: fence.sc.sys
1595  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1596  store atomic volatile float %e.add, ptr addrspace(1) %e seq_cst, align 4
1597
1598  ; CHECK: fence.sc.sys
1599  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1600  %f.load = load atomic volatile double, ptr addrspace(1) %e seq_cst, align 8
1601  %f.add = fadd double %f.load, 1.
1602  ; CHECK: fence.sc.sys
1603  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1604  store atomic volatile double %f.add, ptr addrspace(1) %e seq_cst, align 8
1605
1606  ret void
1607}
1608
1609; CHECK-LABEL: global_seq_cst_gpu
1610define void @global_seq_cst_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1611  ; CHECK: fence.sc.gpu
1612  ; CHECK: ld.acquire.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1613  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") seq_cst, align 1
1614  %a.add = add i8 %a.load, 1
1615  ; CHECK: fence.sc.gpu
1616  ; CHECK: st.release.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1617  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") seq_cst, align 1
1618
1619  ; CHECK: fence.sc.gpu
1620  ; CHECK: ld.acquire.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1621  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") seq_cst, align 2
1622  %b.add = add i16 %b.load, 1
1623  ; CHECK: fence.sc.gpu
1624  ; CHECK: st.release.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1625  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") seq_cst, align 2
1626
1627  ; CHECK: fence.sc.gpu
1628  ; CHECK: ld.acquire.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1629  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") seq_cst, align 4
1630  %c.add = add i32 %c.load, 1
1631  ; CHECK: fence.sc.gpu
1632  ; CHECK: st.release.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1633  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") seq_cst, align 4
1634
1635  ; CHECK: fence.sc.gpu
1636  ; CHECK: ld.acquire.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1637  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") seq_cst, align 8
1638  %d.add = add i64 %d.load, 1
1639  ; CHECK: fence.sc.gpu
1640  ; CHECK: st.release.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1641  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") seq_cst, align 8
1642
1643  ; CHECK: fence.sc.gpu
1644  ; CHECK: ld.acquire.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1645  %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") seq_cst, align 4
1646  %e.add = fadd float %e.load, 1.
1647  ; CHECK: fence.sc.gpu
1648  ; CHECK: st.release.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1649  store atomic float %e.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 4
1650
1651  ; CHECK: fence.sc.gpu
1652  ; CHECK: ld.acquire.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1653  %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") seq_cst, align 8
1654  %f.add = fadd double %f.load, 1.
1655  ; CHECK: fence.sc.gpu
1656  ; CHECK: st.release.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1657  store atomic double %f.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 8
1658
1659  ret void
1660}
1661
1662; CHECK-LABEL: global_seq_cst_volatile_gpu
1663define void @global_seq_cst_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1664  ; CHECK: fence.sc.sys
1665  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1666  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") seq_cst, align 1
1667  %a.add = add i8 %a.load, 1
1668  ; CHECK: fence.sc.sys
1669  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1670  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") seq_cst, align 1
1671
1672  ; CHECK: fence.sc.sys
1673  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1674  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") seq_cst, align 2
1675  %b.add = add i16 %b.load, 1
1676  ; CHECK: fence.sc.sys
1677  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1678  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") seq_cst, align 2
1679
1680  ; CHECK: fence.sc.sys
1681  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1682  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") seq_cst, align 4
1683  %c.add = add i32 %c.load, 1
1684  ; CHECK: fence.sc.sys
1685  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1686  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") seq_cst, align 4
1687
1688  ; CHECK: fence.sc.sys
1689  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1690  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") seq_cst, align 8
1691  %d.add = add i64 %d.load, 1
1692  ; CHECK: fence.sc.sys
1693  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1694  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") seq_cst, align 8
1695
1696  ; CHECK: fence.sc.sys
1697  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1698  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") seq_cst, align 4
1699  %e.add = fadd float %e.load, 1.
1700  ; CHECK: fence.sc.sys
1701  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1702  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 4
1703
1704  ; CHECK: fence.sc.sys
1705  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1706  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") seq_cst, align 8
1707  %f.add = fadd double %f.load, 1.
1708  ; CHECK: fence.sc.sys
1709  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1710  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 8
1711
1712  ret void
1713}
1714
1715; CHECK-LABEL: global_seq_cst_cta
1716define void @global_seq_cst_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1717  ; CHECK: fence.sc.cta
1718  ; CHECK: ld.acquire.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1719  %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") seq_cst, align 1
1720  %a.add = add i8 %a.load, 1
1721  ; CHECK: fence.sc.cta
1722  ; CHECK: st.release.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1723  store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") seq_cst, align 1
1724
1725  ; CHECK: fence.sc.cta
1726  ; CHECK: ld.acquire.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1727  %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") seq_cst, align 2
1728  %b.add = add i16 %b.load, 1
1729  ; CHECK: fence.sc.cta
1730  ; CHECK: st.release.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1731  store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") seq_cst, align 2
1732
1733  ; CHECK: fence.sc.cta
1734  ; CHECK: ld.acquire.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1735  %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") seq_cst, align 4
1736  %c.add = add i32 %c.load, 1
1737  ; CHECK: fence.sc.cta
1738  ; CHECK: st.release.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1739  store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") seq_cst, align 4
1740
1741  ; CHECK: fence.sc.cta
1742  ; CHECK: ld.acquire.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1743  %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") seq_cst, align 8
1744  %d.add = add i64 %d.load, 1
1745  ; CHECK: fence.sc.cta
1746  ; CHECK: st.release.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1747  store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") seq_cst, align 8
1748
1749  ; CHECK: fence.sc.cta
1750  ; CHECK: ld.acquire.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1751  %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") seq_cst, align 4
1752  %e.add = fadd float %e.load, 1.
1753  ; CHECK: fence.sc.cta
1754  ; CHECK: st.release.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1755  store atomic float %e.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 4
1756
1757  ; CHECK: fence.sc.cta
1758  ; CHECK: ld.acquire.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1759  %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") seq_cst, align 8
1760  %f.add = fadd double %f.load, 1.
1761  ; CHECK: fence.sc.cta
1762  ; CHECK: st.release.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1763  store atomic double %f.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 8
1764
1765  ret void
1766}
1767
1768; CHECK-LABEL: global_seq_cst_volatile_cta
1769define void @global_seq_cst_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
1770  ; CHECK: fence.sc.sys
1771  ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1772  %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") seq_cst, align 1
1773  %a.add = add i8 %a.load, 1
1774  ; CHECK: fence.sc.sys
1775  ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1776  store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") seq_cst, align 1
1777
1778  ; CHECK: fence.sc.sys
1779  ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1780  %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") seq_cst, align 2
1781  %b.add = add i16 %b.load, 1
1782  ; CHECK: fence.sc.sys
1783  ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1784  store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") seq_cst, align 2
1785
1786  ; CHECK: fence.sc.sys
1787  ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1788  %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") seq_cst, align 4
1789  %c.add = add i32 %c.load, 1
1790  ; CHECK: fence.sc.sys
1791  ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1792  store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") seq_cst, align 4
1793
1794  ; CHECK: fence.sc.sys
1795  ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1796  %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") seq_cst, align 8
1797  %d.add = add i64 %d.load, 1
1798  ; CHECK: fence.sc.sys
1799  ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1800  store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") seq_cst, align 8
1801
1802  ; CHECK: fence.sc.sys
1803  ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1804  %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") seq_cst, align 4
1805  %e.add = fadd float %e.load, 1.
1806  ; CHECK: fence.sc.sys
1807  ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1808  store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 4
1809
1810  ; CHECK: fence.sc.sys
1811  ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1812  %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") seq_cst, align 8
1813  %f.add = fadd double %f.load, 1.
1814  ; CHECK: fence.sc.sys
1815  ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1816  store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 8
1817
1818  ret void
1819}
1820
1821;; shared statespace
1822
1823; CHECK-LABEL: shared_unordered_gpu
1824define void @shared_unordered_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
1825  ; CHECK: ld.relaxed.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1826  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") unordered, align 1
1827  %a.add = add i8 %a.load, 1
1828  ; CHECK: st.relaxed.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1829  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") unordered, align 1
1830
1831  ; CHECK: ld.relaxed.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1832  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") unordered, align 2
1833  %b.add = add i16 %b.load, 1
1834  ; CHECK: st.relaxed.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1835  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") unordered, align 2
1836
1837  ; CHECK: ld.relaxed.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1838  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") unordered, align 4
1839  %c.add = add i32 %c.load, 1
1840  ; CHECK: st.relaxed.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1841  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") unordered, align 4
1842
1843  ; CHECK: ld.relaxed.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1844  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") unordered, align 8
1845  %d.add = add i64 %d.load, 1
1846  ; CHECK: st.relaxed.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1847  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") unordered, align 8
1848
1849  ; CHECK: ld.relaxed.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1850  %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") unordered, align 4
1851  %e.add = fadd float %e.load, 1.
1852  ; CHECK: st.relaxed.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1853  store atomic float %e.add, ptr addrspace(3) %e syncscope("device") unordered, align 4
1854
1855  ; CHECK: ld.relaxed.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1856  %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") unordered, align 8
1857  %f.add = fadd double %f.load, 1.
1858  ; CHECK: st.relaxed.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1859  store atomic double %f.add, ptr addrspace(3) %e syncscope("device") unordered, align 8
1860
1861  ret void
1862}
1863
1864; CHECK-LABEL: shared_unordered_volatile_gpu
1865define void @shared_unordered_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
1866  ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1867  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") unordered, align 1
1868  %a.add = add i8 %a.load, 1
1869  ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1870  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") unordered, align 1
1871
1872  ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1873  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") unordered, align 2
1874  %b.add = add i16 %b.load, 1
1875  ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1876  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") unordered, align 2
1877
1878  ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1879  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") unordered, align 4
1880  %c.add = add i32 %c.load, 1
1881  ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1882  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") unordered, align 4
1883
1884  ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1885  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") unordered, align 8
1886  %d.add = add i64 %d.load, 1
1887  ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1888  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") unordered, align 8
1889
1890  ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1891  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") unordered, align 4
1892  %e.add = fadd float %e.load, 1.
1893  ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1894  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") unordered, align 4
1895
1896  ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1897  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") unordered, align 8
1898  %f.add = fadd double %f.load, 1.
1899  ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1900  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") unordered, align 8
1901
1902  ret void
1903}
1904
1905; CHECK-LABEL: shared_unordered_cta
1906define void @shared_unordered_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
1907  ; CHECK: ld.relaxed.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1908  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") unordered, align 1
1909  %a.add = add i8 %a.load, 1
1910  ; CHECK: st.relaxed.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1911  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") unordered, align 1
1912
1913  ; CHECK: ld.relaxed.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1914  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") unordered, align 2
1915  %b.add = add i16 %b.load, 1
1916  ; CHECK: st.relaxed.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1917  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") unordered, align 2
1918
1919  ; CHECK: ld.relaxed.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1920  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") unordered, align 4
1921  %c.add = add i32 %c.load, 1
1922  ; CHECK: st.relaxed.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1923  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") unordered, align 4
1924
1925  ; CHECK: ld.relaxed.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1926  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") unordered, align 8
1927  %d.add = add i64 %d.load, 1
1928  ; CHECK: st.relaxed.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1929  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") unordered, align 8
1930
1931  ; CHECK: ld.relaxed.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1932  %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") unordered, align 4
1933  %e.add = fadd float %e.load, 1.
1934  ; CHECK: st.relaxed.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1935  store atomic float %e.add, ptr addrspace(3) %e syncscope("block") unordered, align 4
1936
1937  ; CHECK: ld.relaxed.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1938  %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") unordered, align 8
1939  %f.add = fadd double %f.load, 1.
1940  ; CHECK: st.relaxed.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1941  store atomic double %f.add, ptr addrspace(3) %e syncscope("block") unordered, align 8
1942
1943  ret void
1944}
1945
1946; CHECK-LABEL: shared_unordered_volatile_cta
1947define void @shared_unordered_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
1948  ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1949  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") unordered, align 1
1950  %a.add = add i8 %a.load, 1
1951  ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1952  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") unordered, align 1
1953
1954  ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1955  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") unordered, align 2
1956  %b.add = add i16 %b.load, 1
1957  ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1958  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") unordered, align 2
1959
1960  ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1961  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") unordered, align 4
1962  %c.add = add i32 %c.load, 1
1963  ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
1964  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") unordered, align 4
1965
1966  ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
1967  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") unordered, align 8
1968  %d.add = add i64 %d.load, 1
1969  ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
1970  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") unordered, align 8
1971
1972  ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
1973  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") unordered, align 4
1974  %e.add = fadd float %e.load, 1.
1975  ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
1976  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") unordered, align 4
1977
1978  ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
1979  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") unordered, align 8
1980  %f.add = fadd double %f.load, 1.
1981  ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
1982  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") unordered, align 8
1983
1984  ret void
1985}
1986
1987; CHECK-LABEL: shared_monotonic_gpu
1988define void @shared_monotonic_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
1989  ; CHECK: ld.relaxed.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1990  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") monotonic, align 1
1991  %a.add = add i8 %a.load, 1
1992  ; CHECK: st.relaxed.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1993  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") monotonic, align 1
1994
1995  ; CHECK: ld.relaxed.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1996  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") monotonic, align 2
1997  %b.add = add i16 %b.load, 1
1998  ; CHECK: st.relaxed.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1999  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") monotonic, align 2
2000
2001  ; CHECK: ld.relaxed.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2002  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") monotonic, align 4
2003  %c.add = add i32 %c.load, 1
2004  ; CHECK: st.relaxed.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2005  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") monotonic, align 4
2006
2007  ; CHECK: ld.relaxed.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2008  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") monotonic, align 8
2009  %d.add = add i64 %d.load, 1
2010  ; CHECK: st.relaxed.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2011  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") monotonic, align 8
2012
2013  ; CHECK: ld.relaxed.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2014  %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") monotonic, align 4
2015  %e.add = fadd float %e.load, 1.
2016  ; CHECK: st.relaxed.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2017  store atomic float %e.add, ptr addrspace(3) %e syncscope("device") monotonic, align 4
2018
2019  ; CHECK: ld.relaxed.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2020  %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") monotonic, align 8
2021  %f.add = fadd double %f.load, 1.
2022  ; CHECK: st.relaxed.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2023  store atomic double %f.add, ptr addrspace(3) %e syncscope("device") monotonic, align 8
2024
2025  ret void
2026}
2027
2028; CHECK-LABEL: shared_monotonic_volatile_gpu
2029define void @shared_monotonic_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2030  ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2031  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") monotonic, align 1
2032  %a.add = add i8 %a.load, 1
2033  ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2034  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") monotonic, align 1
2035
2036  ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2037  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") monotonic, align 2
2038  %b.add = add i16 %b.load, 1
2039  ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2040  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") monotonic, align 2
2041
2042  ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2043  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") monotonic, align 4
2044  %c.add = add i32 %c.load, 1
2045  ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2046  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") monotonic, align 4
2047
2048  ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2049  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") monotonic, align 8
2050  %d.add = add i64 %d.load, 1
2051  ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2052  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") monotonic, align 8
2053
2054  ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2055  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") monotonic, align 4
2056  %e.add = fadd float %e.load, 1.
2057  ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2058  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") monotonic, align 4
2059
2060  ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2061  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") monotonic, align 8
2062  %f.add = fadd double %f.load, 1.
2063  ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2064  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") monotonic, align 8
2065
2066  ret void
2067}
2068
2069; CHECK-LABEL: shared_monotonic_cta
2070define void @shared_monotonic_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2071  ; CHECK: ld.relaxed.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2072  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") monotonic, align 1
2073  %a.add = add i8 %a.load, 1
2074  ; CHECK: st.relaxed.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2075  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") monotonic, align 1
2076
2077  ; CHECK: ld.relaxed.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2078  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") monotonic, align 2
2079  %b.add = add i16 %b.load, 1
2080  ; CHECK: st.relaxed.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2081  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") monotonic, align 2
2082
2083  ; CHECK: ld.relaxed.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2084  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") monotonic, align 4
2085  %c.add = add i32 %c.load, 1
2086  ; CHECK: st.relaxed.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2087  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") monotonic, align 4
2088
2089  ; CHECK: ld.relaxed.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2090  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") monotonic, align 8
2091  %d.add = add i64 %d.load, 1
2092  ; CHECK: st.relaxed.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2093  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") monotonic, align 8
2094
2095  ; CHECK: ld.relaxed.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2096  %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") monotonic, align 4
2097  %e.add = fadd float %e.load, 1.
2098  ; CHECK: st.relaxed.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2099  store atomic float %e.add, ptr addrspace(3) %e syncscope("block") monotonic, align 4
2100
2101  ; CHECK: ld.relaxed.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2102  %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") monotonic, align 8
2103  %f.add = fadd double %f.load, 1.
2104  ; CHECK: st.relaxed.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2105  store atomic double %f.add, ptr addrspace(3) %e syncscope("block") monotonic, align 8
2106
2107  ret void
2108}
2109
2110; CHECK-LABEL: shared_monotonic_volatile_cta
2111define void @shared_monotonic_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2112  ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2113  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") monotonic, align 1
2114  %a.add = add i8 %a.load, 1
2115  ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2116  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") monotonic, align 1
2117
2118  ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2119  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") monotonic, align 2
2120  %b.add = add i16 %b.load, 1
2121  ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2122  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") monotonic, align 2
2123
2124  ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2125  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") monotonic, align 4
2126  %c.add = add i32 %c.load, 1
2127  ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2128  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") monotonic, align 4
2129
2130  ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2131  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") monotonic, align 8
2132  %d.add = add i64 %d.load, 1
2133  ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2134  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") monotonic, align 8
2135
2136  ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2137  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") monotonic, align 4
2138  %e.add = fadd float %e.load, 1.
2139  ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2140  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") monotonic, align 4
2141
2142  ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2143  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") monotonic, align 8
2144  %f.add = fadd double %f.load, 1.
2145  ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2146  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") monotonic, align 8
2147
2148  ret void
2149}
2150
2151; CHECK-LABEL: shared_acq_rel_sys
2152define void @shared_acq_rel_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2153  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2154  %a.load = load atomic i8, ptr addrspace(3) %a acquire, align 1
2155  %a.add = add i8 %a.load, 1
2156  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2157  store atomic i8 %a.add, ptr addrspace(3) %a release, align 1
2158
2159  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2160  %b.load = load atomic i16, ptr addrspace(3) %b acquire, align 2
2161  %b.add = add i16 %b.load, 1
2162  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2163  store atomic i16 %b.add, ptr addrspace(3) %b release, align 2
2164
2165  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2166  %c.load = load atomic i32, ptr addrspace(3) %c acquire, align 4
2167  %c.add = add i32 %c.load, 1
2168  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2169  store atomic i32 %c.add, ptr addrspace(3) %c release, align 4
2170
2171  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2172  %d.load = load atomic i64, ptr addrspace(3) %d acquire, align 8
2173  %d.add = add i64 %d.load, 1
2174  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2175  store atomic i64 %d.add, ptr addrspace(3) %d release, align 8
2176
2177  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2178  %e.load = load atomic float, ptr addrspace(3) %e acquire, align 4
2179  %e.add = fadd float %e.load, 1.
2180  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2181  store atomic float %e.add, ptr addrspace(3) %e release, align 4
2182
2183  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2184  %f.load = load atomic double, ptr addrspace(3) %e acquire, align 8
2185  %f.add = fadd double %f.load, 1.
2186  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2187  store atomic double %f.add, ptr addrspace(3) %e release, align 8
2188
2189  ret void
2190}
2191
2192; CHECK-LABEL: shared_acq_rel_volatile_sys
2193define void @shared_acq_rel_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2194  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2195  %a.load = load atomic volatile i8, ptr addrspace(3) %a acquire, align 1
2196  %a.add = add i8 %a.load, 1
2197  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2198  store atomic volatile i8 %a.add, ptr addrspace(3) %a release, align 1
2199
2200  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2201  %b.load = load atomic volatile i16, ptr addrspace(3) %b acquire, align 2
2202  %b.add = add i16 %b.load, 1
2203  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2204  store atomic volatile i16 %b.add, ptr addrspace(3) %b release, align 2
2205
2206  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2207  %c.load = load atomic volatile i32, ptr addrspace(3) %c acquire, align 4
2208  %c.add = add i32 %c.load, 1
2209  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2210  store atomic volatile i32 %c.add, ptr addrspace(3) %c release, align 4
2211
2212  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2213  %d.load = load atomic volatile i64, ptr addrspace(3) %d acquire, align 8
2214  %d.add = add i64 %d.load, 1
2215  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2216  store atomic volatile i64 %d.add, ptr addrspace(3) %d release, align 8
2217
2218  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2219  %e.load = load atomic volatile float, ptr addrspace(3) %e acquire, align 4
2220  %e.add = fadd float %e.load, 1.
2221  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2222  store atomic volatile float %e.add, ptr addrspace(3) %e release, align 4
2223
2224  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2225  %f.load = load atomic volatile double, ptr addrspace(3) %e acquire, align 8
2226  %f.add = fadd double %f.load, 1.
2227  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2228  store atomic volatile double %f.add, ptr addrspace(3) %e release, align 8
2229
2230  ret void
2231}
2232
2233; CHECK-LABEL: shared_acq_rel_gpu
2234define void @shared_acq_rel_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2235  ; CHECK: ld.acquire.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2236  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") acquire, align 1
2237  %a.add = add i8 %a.load, 1
2238  ; CHECK: st.release.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2239  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") release, align 1
2240
2241  ; CHECK: ld.acquire.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2242  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") acquire, align 2
2243  %b.add = add i16 %b.load, 1
2244  ; CHECK: st.release.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2245  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") release, align 2
2246
2247  ; CHECK: ld.acquire.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2248  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") acquire, align 4
2249  %c.add = add i32 %c.load, 1
2250  ; CHECK: st.release.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2251  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") release, align 4
2252
2253  ; CHECK: ld.acquire.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2254  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") acquire, align 8
2255  %d.add = add i64 %d.load, 1
2256  ; CHECK: st.release.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2257  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") release, align 8
2258
2259  ; CHECK: ld.acquire.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2260  %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") acquire, align 4
2261  %e.add = fadd float %e.load, 1.
2262  ; CHECK: st.release.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2263  store atomic float %e.add, ptr addrspace(3) %e syncscope("device") release, align 4
2264
2265  ; CHECK: ld.acquire.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2266  %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") acquire, align 8
2267  %f.add = fadd double %f.load, 1.
2268  ; CHECK: st.release.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2269  store atomic double %f.add, ptr addrspace(3) %e syncscope("device") release, align 8
2270
2271  ret void
2272}
2273
2274; CHECK-LABEL: shared_acq_rel_volatile_gpu
2275define void @shared_acq_rel_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2276  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2277  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") acquire, align 1
2278  %a.add = add i8 %a.load, 1
2279  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2280  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") release, align 1
2281
2282  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2283  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") acquire, align 2
2284  %b.add = add i16 %b.load, 1
2285  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2286  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") release, align 2
2287
2288  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2289  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") acquire, align 4
2290  %c.add = add i32 %c.load, 1
2291  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2292  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") release, align 4
2293
2294  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2295  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") acquire, align 8
2296  %d.add = add i64 %d.load, 1
2297  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2298  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") release, align 8
2299
2300  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2301  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") acquire, align 4
2302  %e.add = fadd float %e.load, 1.
2303  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2304  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") release, align 4
2305
2306  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2307  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") acquire, align 8
2308  %f.add = fadd double %f.load, 1.
2309  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2310  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") release, align 8
2311
2312  ret void
2313}
2314
2315; CHECK-LABEL: shared_acq_rel_cta
2316define void @shared_acq_rel_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2317  ; CHECK: ld.acquire.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2318  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") acquire, align 1
2319  %a.add = add i8 %a.load, 1
2320  ; CHECK: st.release.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2321  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") release, align 1
2322
2323  ; CHECK: ld.acquire.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2324  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") acquire, align 2
2325  %b.add = add i16 %b.load, 1
2326  ; CHECK: st.release.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2327  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") release, align 2
2328
2329  ; CHECK: ld.acquire.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2330  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") acquire, align 4
2331  %c.add = add i32 %c.load, 1
2332  ; CHECK: st.release.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2333  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") release, align 4
2334
2335  ; CHECK: ld.acquire.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2336  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") acquire, align 8
2337  %d.add = add i64 %d.load, 1
2338  ; CHECK: st.release.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2339  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") release, align 8
2340
2341  ; CHECK: ld.acquire.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2342  %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") acquire, align 4
2343  %e.add = fadd float %e.load, 1.
2344  ; CHECK: st.release.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2345  store atomic float %e.add, ptr addrspace(3) %e syncscope("block") release, align 4
2346
2347  ; CHECK: ld.acquire.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2348  %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") acquire, align 8
2349  %f.add = fadd double %f.load, 1.
2350  ; CHECK: st.release.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2351  store atomic double %f.add, ptr addrspace(3) %e syncscope("block") release, align 8
2352
2353  ret void
2354}
2355
2356; CHECK-LABEL: shared_acq_rel_volatile_cta
2357define void @shared_acq_rel_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2358  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2359  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") acquire, align 1
2360  %a.add = add i8 %a.load, 1
2361  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2362  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") release, align 1
2363
2364  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2365  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") acquire, align 2
2366  %b.add = add i16 %b.load, 1
2367  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2368  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") release, align 2
2369
2370  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2371  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") acquire, align 4
2372  %c.add = add i32 %c.load, 1
2373  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2374  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") release, align 4
2375
2376  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2377  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") acquire, align 8
2378  %d.add = add i64 %d.load, 1
2379  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2380  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") release, align 8
2381
2382  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2383  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") acquire, align 4
2384  %e.add = fadd float %e.load, 1.
2385  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2386  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") release, align 4
2387
2388  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2389  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") acquire, align 8
2390  %f.add = fadd double %f.load, 1.
2391  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2392  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") release, align 8
2393
2394  ret void
2395}
2396
2397; CHECK-LABEL: shared_seq_cst_sys
2398define void @shared_seq_cst_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2399  ; CHECK: fence.sc.sys
2400  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2401  %a.load = load atomic i8, ptr addrspace(3) %a seq_cst, align 1
2402  %a.add = add i8 %a.load, 1
2403  ; CHECK: fence.sc.sys
2404  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2405  store atomic i8 %a.add, ptr addrspace(3) %a seq_cst, align 1
2406
2407  ; CHECK: fence.sc.sys
2408  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2409  %b.load = load atomic i16, ptr addrspace(3) %b seq_cst, align 2
2410  %b.add = add i16 %b.load, 1
2411  ; CHECK: fence.sc.sys
2412  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2413  store atomic i16 %b.add, ptr addrspace(3) %b seq_cst, align 2
2414
2415  ; CHECK: fence.sc.sys
2416  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2417  %c.load = load atomic i32, ptr addrspace(3) %c seq_cst, align 4
2418  %c.add = add i32 %c.load, 1
2419  ; CHECK: fence.sc.sys
2420  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2421  store atomic i32 %c.add, ptr addrspace(3) %c seq_cst, align 4
2422
2423  ; CHECK: fence.sc.sys
2424  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2425  %d.load = load atomic i64, ptr addrspace(3) %d seq_cst, align 8
2426  %d.add = add i64 %d.load, 1
2427  ; CHECK: fence.sc.sys
2428  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2429  store atomic i64 %d.add, ptr addrspace(3) %d seq_cst, align 8
2430
2431  ; CHECK: fence.sc.sys
2432  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2433  %e.load = load atomic float, ptr addrspace(3) %e seq_cst, align 4
2434  %e.add = fadd float %e.load, 1.
2435  ; CHECK: fence.sc.sys
2436  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2437  store atomic float %e.add, ptr addrspace(3) %e seq_cst, align 4
2438
2439  ; CHECK: fence.sc.sys
2440  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2441  %f.load = load atomic double, ptr addrspace(3) %e seq_cst, align 8
2442  %f.add = fadd double %f.load, 1.
2443  ; CHECK: fence.sc.sys
2444  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2445  store atomic double %f.add, ptr addrspace(3) %e seq_cst, align 8
2446
2447  ret void
2448}
2449
2450; CHECK-LABEL: shared_seq_cst_volatile_sys
2451define void @shared_seq_cst_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2452  ; CHECK: fence.sc.sys
2453  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2454  %a.load = load atomic volatile i8, ptr addrspace(3) %a seq_cst, align 1
2455  %a.add = add i8 %a.load, 1
2456  ; CHECK: fence.sc.sys
2457  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2458  store atomic volatile i8 %a.add, ptr addrspace(3) %a seq_cst, align 1
2459
2460  ; CHECK: fence.sc.sys
2461  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2462  %b.load = load atomic volatile i16, ptr addrspace(3) %b seq_cst, align 2
2463  %b.add = add i16 %b.load, 1
2464  ; CHECK: fence.sc.sys
2465  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2466  store atomic volatile i16 %b.add, ptr addrspace(3) %b seq_cst, align 2
2467
2468  ; CHECK: fence.sc.sys
2469  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2470  %c.load = load atomic volatile i32, ptr addrspace(3) %c seq_cst, align 4
2471  %c.add = add i32 %c.load, 1
2472  ; CHECK: fence.sc.sys
2473  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2474  store atomic volatile i32 %c.add, ptr addrspace(3) %c seq_cst, align 4
2475
2476  ; CHECK: fence.sc.sys
2477  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2478  %d.load = load atomic volatile i64, ptr addrspace(3) %d seq_cst, align 8
2479  %d.add = add i64 %d.load, 1
2480  ; CHECK: fence.sc.sys
2481  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2482  store atomic volatile i64 %d.add, ptr addrspace(3) %d seq_cst, align 8
2483
2484  ; CHECK: fence.sc.sys
2485  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2486  %e.load = load atomic volatile float, ptr addrspace(3) %e seq_cst, align 4
2487  %e.add = fadd float %e.load, 1.
2488  ; CHECK: fence.sc.sys
2489  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2490  store atomic volatile float %e.add, ptr addrspace(3) %e seq_cst, align 4
2491
2492  ; CHECK: fence.sc.sys
2493  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2494  %f.load = load atomic volatile double, ptr addrspace(3) %e seq_cst, align 8
2495  %f.add = fadd double %f.load, 1.
2496  ; CHECK: fence.sc.sys
2497  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2498  store atomic volatile double %f.add, ptr addrspace(3) %e seq_cst, align 8
2499
2500  ret void
2501}
2502
2503; CHECK-LABEL: shared_seq_cst_gpu
2504define void @shared_seq_cst_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2505  ; CHECK: fence.sc.gpu
2506  ; CHECK: ld.acquire.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2507  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") seq_cst, align 1
2508  %a.add = add i8 %a.load, 1
2509  ; CHECK: fence.sc.gpu
2510  ; CHECK: st.release.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2511  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") seq_cst, align 1
2512
2513  ; CHECK: fence.sc.gpu
2514  ; CHECK: ld.acquire.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2515  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") seq_cst, align 2
2516  %b.add = add i16 %b.load, 1
2517  ; CHECK: fence.sc.gpu
2518  ; CHECK: st.release.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2519  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") seq_cst, align 2
2520
2521  ; CHECK: fence.sc.gpu
2522  ; CHECK: ld.acquire.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2523  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") seq_cst, align 4
2524  %c.add = add i32 %c.load, 1
2525  ; CHECK: fence.sc.gpu
2526  ; CHECK: st.release.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2527  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") seq_cst, align 4
2528
2529  ; CHECK: fence.sc.gpu
2530  ; CHECK: ld.acquire.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2531  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") seq_cst, align 8
2532  %d.add = add i64 %d.load, 1
2533  ; CHECK: fence.sc.gpu
2534  ; CHECK: st.release.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2535  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") seq_cst, align 8
2536
2537  ; CHECK: fence.sc.gpu
2538  ; CHECK: ld.acquire.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2539  %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") seq_cst, align 4
2540  %e.add = fadd float %e.load, 1.
2541  ; CHECK: fence.sc.gpu
2542  ; CHECK: st.release.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2543  store atomic float %e.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 4
2544
2545  ; CHECK: fence.sc.gpu
2546  ; CHECK: ld.acquire.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2547  %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") seq_cst, align 8
2548  %f.add = fadd double %f.load, 1.
2549  ; CHECK: fence.sc.gpu
2550  ; CHECK: st.release.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2551  store atomic double %f.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 8
2552
2553  ret void
2554}
2555
2556; CHECK-LABEL: shared_seq_cst_volatile_gpu
2557define void @shared_seq_cst_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2558  ; CHECK: fence.sc.sys
2559  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2560  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") seq_cst, align 1
2561  %a.add = add i8 %a.load, 1
2562  ; CHECK: fence.sc.sys
2563  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2564  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") seq_cst, align 1
2565
2566  ; CHECK: fence.sc.sys
2567  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2568  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") seq_cst, align 2
2569  %b.add = add i16 %b.load, 1
2570  ; CHECK: fence.sc.sys
2571  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2572  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") seq_cst, align 2
2573
2574  ; CHECK: fence.sc.sys
2575  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2576  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") seq_cst, align 4
2577  %c.add = add i32 %c.load, 1
2578  ; CHECK: fence.sc.sys
2579  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2580  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") seq_cst, align 4
2581
2582  ; CHECK: fence.sc.sys
2583  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2584  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") seq_cst, align 8
2585  %d.add = add i64 %d.load, 1
2586  ; CHECK: fence.sc.sys
2587  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2588  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") seq_cst, align 8
2589
2590  ; CHECK: fence.sc.sys
2591  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2592  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") seq_cst, align 4
2593  %e.add = fadd float %e.load, 1.
2594  ; CHECK: fence.sc.sys
2595  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2596  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 4
2597
2598  ; CHECK: fence.sc.sys
2599  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2600  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") seq_cst, align 8
2601  %f.add = fadd double %f.load, 1.
2602  ; CHECK: fence.sc.sys
2603  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2604  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 8
2605
2606  ret void
2607}
2608
2609; CHECK-LABEL: shared_seq_cst_cta
2610define void @shared_seq_cst_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2611  ; CHECK: fence.sc.cta
2612  ; CHECK: ld.acquire.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2613  %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") seq_cst, align 1
2614  %a.add = add i8 %a.load, 1
2615  ; CHECK: fence.sc.cta
2616  ; CHECK: st.release.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2617  store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") seq_cst, align 1
2618
2619  ; CHECK: fence.sc.cta
2620  ; CHECK: ld.acquire.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2621  %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") seq_cst, align 2
2622  %b.add = add i16 %b.load, 1
2623  ; CHECK: fence.sc.cta
2624  ; CHECK: st.release.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2625  store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") seq_cst, align 2
2626
2627  ; CHECK: fence.sc.cta
2628  ; CHECK: ld.acquire.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2629  %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") seq_cst, align 4
2630  %c.add = add i32 %c.load, 1
2631  ; CHECK: fence.sc.cta
2632  ; CHECK: st.release.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2633  store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") seq_cst, align 4
2634
2635  ; CHECK: fence.sc.cta
2636  ; CHECK: ld.acquire.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2637  %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") seq_cst, align 8
2638  %d.add = add i64 %d.load, 1
2639  ; CHECK: fence.sc.cta
2640  ; CHECK: st.release.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2641  store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") seq_cst, align 8
2642
2643  ; CHECK: fence.sc.cta
2644  ; CHECK: ld.acquire.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2645  %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") seq_cst, align 4
2646  %e.add = fadd float %e.load, 1.
2647  ; CHECK: fence.sc.cta
2648  ; CHECK: st.release.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2649  store atomic float %e.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 4
2650
2651  ; CHECK: fence.sc.cta
2652  ; CHECK: ld.acquire.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2653  %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") seq_cst, align 8
2654  %f.add = fadd double %f.load, 1.
2655  ; CHECK: fence.sc.cta
2656  ; CHECK: st.release.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2657  store atomic double %f.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 8
2658
2659  ret void
2660}
2661
2662; CHECK-LABEL: shared_seq_cst_volatile_cta
2663define void @shared_seq_cst_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
2664  ; CHECK: fence.sc.sys
2665  ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2666  %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") seq_cst, align 1
2667  %a.add = add i8 %a.load, 1
2668  ; CHECK: fence.sc.sys
2669  ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2670  store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") seq_cst, align 1
2671
2672  ; CHECK: fence.sc.sys
2673  ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2674  %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") seq_cst, align 2
2675  %b.add = add i16 %b.load, 1
2676  ; CHECK: fence.sc.sys
2677  ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2678  store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") seq_cst, align 2
2679
2680  ; CHECK: fence.sc.sys
2681  ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2682  %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") seq_cst, align 4
2683  %c.add = add i32 %c.load, 1
2684  ; CHECK: fence.sc.sys
2685  ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2686  store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") seq_cst, align 4
2687
2688  ; CHECK: fence.sc.sys
2689  ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2690  %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") seq_cst, align 8
2691  %d.add = add i64 %d.load, 1
2692  ; CHECK: fence.sc.sys
2693  ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2694  store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") seq_cst, align 8
2695
2696  ; CHECK: fence.sc.sys
2697  ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2698  %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") seq_cst, align 4
2699  %e.add = fadd float %e.load, 1.
2700  ; CHECK: fence.sc.sys
2701  ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2702  store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 4
2703
2704  ; CHECK: fence.sc.sys
2705  ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2706  %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") seq_cst, align 8
2707  %f.add = fadd double %f.load, 1.
2708  ; CHECK: fence.sc.sys
2709  ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2710  store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 8
2711
2712  ret void
2713}
2714
2715;; local statespace
2716
2717; CHECK-LABEL: local_unordered_gpu
2718define void @local_unordered_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
2719  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2720  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") unordered, align 1
2721  %a.add = add i8 %a.load, 1
2722  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2723  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") unordered, align 1
2724
2725  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2726  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") unordered, align 2
2727  %b.add = add i16 %b.load, 1
2728  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2729  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") unordered, align 2
2730
2731  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2732  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") unordered, align 4
2733  %c.add = add i32 %c.load, 1
2734  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2735  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") unordered, align 4
2736
2737  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2738  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") unordered, align 8
2739  %d.add = add i64 %d.load, 1
2740  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2741  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") unordered, align 8
2742
2743  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2744  %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") unordered, align 4
2745  %e.add = fadd float %e.load, 1.
2746  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2747  store atomic float %e.add, ptr addrspace(5) %e syncscope("device") unordered, align 4
2748
2749  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2750  %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") unordered, align 8
2751  %f.add = fadd double %f.load, 1.
2752  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2753  store atomic double %f.add, ptr addrspace(5) %e syncscope("device") unordered, align 8
2754
2755  ret void
2756}
2757
2758; CHECK-LABEL: local_unordered_volatile_gpu
2759define void @local_unordered_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
2760  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2761  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") unordered, align 1
2762  %a.add = add i8 %a.load, 1
2763  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2764  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") unordered, align 1
2765
2766  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2767  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") unordered, align 2
2768  %b.add = add i16 %b.load, 1
2769  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2770  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") unordered, align 2
2771
2772  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2773  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") unordered, align 4
2774  %c.add = add i32 %c.load, 1
2775  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2776  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") unordered, align 4
2777
2778  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2779  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") unordered, align 8
2780  %d.add = add i64 %d.load, 1
2781  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2782  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") unordered, align 8
2783
2784  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2785  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") unordered, align 4
2786  %e.add = fadd float %e.load, 1.
2787  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2788  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") unordered, align 4
2789
2790  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2791  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") unordered, align 8
2792  %f.add = fadd double %f.load, 1.
2793  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2794  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") unordered, align 8
2795
2796  ret void
2797}
2798
2799; CHECK-LABEL: local_unordered_cta
2800define void @local_unordered_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
2801  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2802  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") unordered, align 1
2803  %a.add = add i8 %a.load, 1
2804  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2805  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") unordered, align 1
2806
2807  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2808  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") unordered, align 2
2809  %b.add = add i16 %b.load, 1
2810  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2811  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") unordered, align 2
2812
2813  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2814  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") unordered, align 4
2815  %c.add = add i32 %c.load, 1
2816  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2817  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") unordered, align 4
2818
2819  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2820  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") unordered, align 8
2821  %d.add = add i64 %d.load, 1
2822  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2823  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") unordered, align 8
2824
2825  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2826  %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") unordered, align 4
2827  %e.add = fadd float %e.load, 1.
2828  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2829  store atomic float %e.add, ptr addrspace(5) %e syncscope("block") unordered, align 4
2830
2831  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2832  %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") unordered, align 8
2833  %f.add = fadd double %f.load, 1.
2834  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2835  store atomic double %f.add, ptr addrspace(5) %e syncscope("block") unordered, align 8
2836
2837  ret void
2838}
2839
2840; CHECK-LABEL: local_unordered_volatile_cta
2841define void @local_unordered_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
2842  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2843  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") unordered, align 1
2844  %a.add = add i8 %a.load, 1
2845  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2846  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") unordered, align 1
2847
2848  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2849  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") unordered, align 2
2850  %b.add = add i16 %b.load, 1
2851  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2852  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") unordered, align 2
2853
2854  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2855  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") unordered, align 4
2856  %c.add = add i32 %c.load, 1
2857  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2858  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") unordered, align 4
2859
2860  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2861  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") unordered, align 8
2862  %d.add = add i64 %d.load, 1
2863  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2864  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") unordered, align 8
2865
2866  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2867  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") unordered, align 4
2868  %e.add = fadd float %e.load, 1.
2869  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2870  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") unordered, align 4
2871
2872  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2873  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") unordered, align 8
2874  %f.add = fadd double %f.load, 1.
2875  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2876  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") unordered, align 8
2877
2878  ret void
2879}
2880
2881; CHECK-LABEL: local_monotonic_gpu
2882define void @local_monotonic_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
2883  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2884  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") monotonic, align 1
2885  %a.add = add i8 %a.load, 1
2886  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2887  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") monotonic, align 1
2888
2889  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2890  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") monotonic, align 2
2891  %b.add = add i16 %b.load, 1
2892  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2893  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") monotonic, align 2
2894
2895  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2896  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") monotonic, align 4
2897  %c.add = add i32 %c.load, 1
2898  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2899  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") monotonic, align 4
2900
2901  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2902  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") monotonic, align 8
2903  %d.add = add i64 %d.load, 1
2904  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2905  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") monotonic, align 8
2906
2907  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2908  %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") monotonic, align 4
2909  %e.add = fadd float %e.load, 1.
2910  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2911  store atomic float %e.add, ptr addrspace(5) %e syncscope("device") monotonic, align 4
2912
2913  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2914  %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") monotonic, align 8
2915  %f.add = fadd double %f.load, 1.
2916  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2917  store atomic double %f.add, ptr addrspace(5) %e syncscope("device") monotonic, align 8
2918
2919  ret void
2920}
2921
2922; CHECK-LABEL: local_monotonic_volatile_gpu
2923define void @local_monotonic_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
2924  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2925  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") monotonic, align 1
2926  %a.add = add i8 %a.load, 1
2927  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2928  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") monotonic, align 1
2929
2930  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2931  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") monotonic, align 2
2932  %b.add = add i16 %b.load, 1
2933  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2934  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") monotonic, align 2
2935
2936  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2937  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") monotonic, align 4
2938  %c.add = add i32 %c.load, 1
2939  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2940  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") monotonic, align 4
2941
2942  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2943  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") monotonic, align 8
2944  %d.add = add i64 %d.load, 1
2945  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2946  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") monotonic, align 8
2947
2948  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2949  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") monotonic, align 4
2950  %e.add = fadd float %e.load, 1.
2951  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2952  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") monotonic, align 4
2953
2954  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2955  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") monotonic, align 8
2956  %f.add = fadd double %f.load, 1.
2957  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2958  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") monotonic, align 8
2959
2960  ret void
2961}
2962
2963; CHECK-LABEL: local_monotonic_cta
2964define void @local_monotonic_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
2965  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2966  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") monotonic, align 1
2967  %a.add = add i8 %a.load, 1
2968  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2969  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") monotonic, align 1
2970
2971  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
2972  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") monotonic, align 2
2973  %b.add = add i16 %b.load, 1
2974  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2975  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") monotonic, align 2
2976
2977  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2978  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") monotonic, align 4
2979  %c.add = add i32 %c.load, 1
2980  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
2981  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") monotonic, align 4
2982
2983  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
2984  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") monotonic, align 8
2985  %d.add = add i64 %d.load, 1
2986  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
2987  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") monotonic, align 8
2988
2989  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
2990  %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") monotonic, align 4
2991  %e.add = fadd float %e.load, 1.
2992  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
2993  store atomic float %e.add, ptr addrspace(5) %e syncscope("block") monotonic, align 4
2994
2995  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
2996  %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") monotonic, align 8
2997  %f.add = fadd double %f.load, 1.
2998  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
2999  store atomic double %f.add, ptr addrspace(5) %e syncscope("block") monotonic, align 8
3000
3001  ret void
3002}
3003
3004; CHECK-LABEL: local_monotonic_volatile_cta
3005define void @local_monotonic_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3006  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3007  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") monotonic, align 1
3008  %a.add = add i8 %a.load, 1
3009  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3010  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") monotonic, align 1
3011
3012  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3013  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") monotonic, align 2
3014  %b.add = add i16 %b.load, 1
3015  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3016  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") monotonic, align 2
3017
3018  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3019  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") monotonic, align 4
3020  %c.add = add i32 %c.load, 1
3021  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3022  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") monotonic, align 4
3023
3024  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3025  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") monotonic, align 8
3026  %d.add = add i64 %d.load, 1
3027  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3028  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") monotonic, align 8
3029
3030  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3031  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") monotonic, align 4
3032  %e.add = fadd float %e.load, 1.
3033  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3034  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") monotonic, align 4
3035
3036  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3037  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") monotonic, align 8
3038  %f.add = fadd double %f.load, 1.
3039  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3040  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") monotonic, align 8
3041
3042  ret void
3043}
3044
3045; CHECK-LABEL: local_acq_rel_sys
3046define void @local_acq_rel_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3047  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3048  %a.load = load atomic i8, ptr addrspace(5) %a acquire, align 1
3049  %a.add = add i8 %a.load, 1
3050  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3051  store atomic i8 %a.add, ptr addrspace(5) %a release, align 1
3052
3053  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3054  %b.load = load atomic i16, ptr addrspace(5) %b acquire, align 2
3055  %b.add = add i16 %b.load, 1
3056  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3057  store atomic i16 %b.add, ptr addrspace(5) %b release, align 2
3058
3059  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3060  %c.load = load atomic i32, ptr addrspace(5) %c acquire, align 4
3061  %c.add = add i32 %c.load, 1
3062  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3063  store atomic i32 %c.add, ptr addrspace(5) %c release, align 4
3064
3065  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3066  %d.load = load atomic i64, ptr addrspace(5) %d acquire, align 8
3067  %d.add = add i64 %d.load, 1
3068  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3069  store atomic i64 %d.add, ptr addrspace(5) %d release, align 8
3070
3071  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3072  %e.load = load atomic float, ptr addrspace(5) %e acquire, align 4
3073  %e.add = fadd float %e.load, 1.
3074  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3075  store atomic float %e.add, ptr addrspace(5) %e release, align 4
3076
3077  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3078  %f.load = load atomic double, ptr addrspace(5) %e acquire, align 8
3079  %f.add = fadd double %f.load, 1.
3080  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3081  store atomic double %f.add, ptr addrspace(5) %e release, align 8
3082
3083  ret void
3084}
3085
3086; CHECK-LABEL: local_acq_rel_volatile_sys
3087define void @local_acq_rel_volatile_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3088  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3089  %a.load = load atomic volatile i8, ptr addrspace(5) %a acquire, align 1
3090  %a.add = add i8 %a.load, 1
3091  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3092  store atomic volatile i8 %a.add, ptr addrspace(5) %a release, align 1
3093
3094  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3095  %b.load = load atomic volatile i16, ptr addrspace(5) %b acquire, align 2
3096  %b.add = add i16 %b.load, 1
3097  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3098  store atomic volatile i16 %b.add, ptr addrspace(5) %b release, align 2
3099
3100  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3101  %c.load = load atomic volatile i32, ptr addrspace(5) %c acquire, align 4
3102  %c.add = add i32 %c.load, 1
3103  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3104  store atomic volatile i32 %c.add, ptr addrspace(5) %c release, align 4
3105
3106  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3107  %d.load = load atomic volatile i64, ptr addrspace(5) %d acquire, align 8
3108  %d.add = add i64 %d.load, 1
3109  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3110  store atomic volatile i64 %d.add, ptr addrspace(5) %d release, align 8
3111
3112  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3113  %e.load = load atomic volatile float, ptr addrspace(5) %e acquire, align 4
3114  %e.add = fadd float %e.load, 1.
3115  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3116  store atomic volatile float %e.add, ptr addrspace(5) %e release, align 4
3117
3118  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3119  %f.load = load atomic volatile double, ptr addrspace(5) %e acquire, align 8
3120  %f.add = fadd double %f.load, 1.
3121  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3122  store atomic volatile double %f.add, ptr addrspace(5) %e release, align 8
3123
3124  ret void
3125}
3126
3127; CHECK-LABEL: local_acq_rel_gpu
3128define void @local_acq_rel_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3129  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3130  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") acquire, align 1
3131  %a.add = add i8 %a.load, 1
3132  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3133  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") release, align 1
3134
3135  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3136  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") acquire, align 2
3137  %b.add = add i16 %b.load, 1
3138  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3139  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") release, align 2
3140
3141  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3142  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") acquire, align 4
3143  %c.add = add i32 %c.load, 1
3144  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3145  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") release, align 4
3146
3147  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3148  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") acquire, align 8
3149  %d.add = add i64 %d.load, 1
3150  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3151  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") release, align 8
3152
3153  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3154  %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") acquire, align 4
3155  %e.add = fadd float %e.load, 1.
3156  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3157  store atomic float %e.add, ptr addrspace(5) %e syncscope("device") release, align 4
3158
3159  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3160  %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") acquire, align 8
3161  %f.add = fadd double %f.load, 1.
3162  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3163  store atomic double %f.add, ptr addrspace(5) %e syncscope("device") release, align 8
3164
3165  ret void
3166}
3167
3168; CHECK-LABEL: local_acq_rel_volatile_gpu
3169define void @local_acq_rel_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3170  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3171  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") acquire, align 1
3172  %a.add = add i8 %a.load, 1
3173  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3174  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") release, align 1
3175
3176  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3177  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") acquire, align 2
3178  %b.add = add i16 %b.load, 1
3179  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3180  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") release, align 2
3181
3182  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3183  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") acquire, align 4
3184  %c.add = add i32 %c.load, 1
3185  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3186  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") release, align 4
3187
3188  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3189  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") acquire, align 8
3190  %d.add = add i64 %d.load, 1
3191  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3192  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") release, align 8
3193
3194  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3195  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") acquire, align 4
3196  %e.add = fadd float %e.load, 1.
3197  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3198  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") release, align 4
3199
3200  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3201  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") acquire, align 8
3202  %f.add = fadd double %f.load, 1.
3203  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3204  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") release, align 8
3205
3206  ret void
3207}
3208
3209; CHECK-LABEL: local_acq_rel_cta
3210define void @local_acq_rel_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3211  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3212  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") acquire, align 1
3213  %a.add = add i8 %a.load, 1
3214  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3215  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") release, align 1
3216
3217  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3218  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") acquire, align 2
3219  %b.add = add i16 %b.load, 1
3220  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3221  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") release, align 2
3222
3223  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3224  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") acquire, align 4
3225  %c.add = add i32 %c.load, 1
3226  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3227  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") release, align 4
3228
3229  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3230  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") acquire, align 8
3231  %d.add = add i64 %d.load, 1
3232  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3233  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") release, align 8
3234
3235  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3236  %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") acquire, align 4
3237  %e.add = fadd float %e.load, 1.
3238  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3239  store atomic float %e.add, ptr addrspace(5) %e syncscope("block") release, align 4
3240
3241  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3242  %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") acquire, align 8
3243  %f.add = fadd double %f.load, 1.
3244  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3245  store atomic double %f.add, ptr addrspace(5) %e syncscope("block") release, align 8
3246
3247  ret void
3248}
3249
3250; CHECK-LABEL: local_acq_rel_volatile_cta
3251define void @local_acq_rel_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3252  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3253  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") acquire, align 1
3254  %a.add = add i8 %a.load, 1
3255  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3256  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") release, align 1
3257
3258  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3259  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") acquire, align 2
3260  %b.add = add i16 %b.load, 1
3261  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3262  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") release, align 2
3263
3264  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3265  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") acquire, align 4
3266  %c.add = add i32 %c.load, 1
3267  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3268  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") release, align 4
3269
3270  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3271  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") acquire, align 8
3272  %d.add = add i64 %d.load, 1
3273  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3274  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") release, align 8
3275
3276  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3277  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") acquire, align 4
3278  %e.add = fadd float %e.load, 1.
3279  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3280  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") release, align 4
3281
3282  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3283  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") acquire, align 8
3284  %f.add = fadd double %f.load, 1.
3285  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3286  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") release, align 8
3287
3288  ret void
3289}
3290
3291; CHECK-LABEL: local_seq_cst_sys
3292define void @local_seq_cst_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3293  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3294  %a.load = load atomic i8, ptr addrspace(5) %a seq_cst, align 1
3295  %a.add = add i8 %a.load, 1
3296  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3297  store atomic i8 %a.add, ptr addrspace(5) %a seq_cst, align 1
3298
3299  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3300  %b.load = load atomic i16, ptr addrspace(5) %b seq_cst, align 2
3301  %b.add = add i16 %b.load, 1
3302  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3303  store atomic i16 %b.add, ptr addrspace(5) %b seq_cst, align 2
3304
3305  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3306  %c.load = load atomic i32, ptr addrspace(5) %c seq_cst, align 4
3307  %c.add = add i32 %c.load, 1
3308  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3309  store atomic i32 %c.add, ptr addrspace(5) %c seq_cst, align 4
3310
3311  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3312  %d.load = load atomic i64, ptr addrspace(5) %d seq_cst, align 8
3313  %d.add = add i64 %d.load, 1
3314  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3315  store atomic i64 %d.add, ptr addrspace(5) %d seq_cst, align 8
3316
3317  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3318  %e.load = load atomic float, ptr addrspace(5) %e seq_cst, align 4
3319  %e.add = fadd float %e.load, 1.
3320  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3321  store atomic float %e.add, ptr addrspace(5) %e seq_cst, align 4
3322
3323  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3324  %f.load = load atomic double, ptr addrspace(5) %e seq_cst, align 8
3325  %f.add = fadd double %f.load, 1.
3326  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3327  store atomic double %f.add, ptr addrspace(5) %e seq_cst, align 8
3328
3329  ret void
3330}
3331
3332; CHECK-LABEL: local_seq_cst_volatile_sys
3333define void @local_seq_cst_volatile_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3334  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3335  %a.load = load atomic volatile i8, ptr addrspace(5) %a seq_cst, align 1
3336  %a.add = add i8 %a.load, 1
3337  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3338  store atomic volatile i8 %a.add, ptr addrspace(5) %a seq_cst, align 1
3339
3340  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3341  %b.load = load atomic volatile i16, ptr addrspace(5) %b seq_cst, align 2
3342  %b.add = add i16 %b.load, 1
3343  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3344  store atomic volatile i16 %b.add, ptr addrspace(5) %b seq_cst, align 2
3345
3346  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3347  %c.load = load atomic volatile i32, ptr addrspace(5) %c seq_cst, align 4
3348  %c.add = add i32 %c.load, 1
3349  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3350  store atomic volatile i32 %c.add, ptr addrspace(5) %c seq_cst, align 4
3351
3352  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3353  %d.load = load atomic volatile i64, ptr addrspace(5) %d seq_cst, align 8
3354  %d.add = add i64 %d.load, 1
3355  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3356  store atomic volatile i64 %d.add, ptr addrspace(5) %d seq_cst, align 8
3357
3358  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3359  %e.load = load atomic volatile float, ptr addrspace(5) %e seq_cst, align 4
3360  %e.add = fadd float %e.load, 1.
3361  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3362  store atomic volatile float %e.add, ptr addrspace(5) %e seq_cst, align 4
3363
3364  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3365  %f.load = load atomic volatile double, ptr addrspace(5) %e seq_cst, align 8
3366  %f.add = fadd double %f.load, 1.
3367  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3368  store atomic volatile double %f.add, ptr addrspace(5) %e seq_cst, align 8
3369
3370  ret void
3371}
3372
3373; CHECK-LABEL: local_seq_cst_gpu
3374define void @local_seq_cst_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3375  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3376  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") seq_cst, align 1
3377  %a.add = add i8 %a.load, 1
3378  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3379  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") seq_cst, align 1
3380
3381  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3382  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") seq_cst, align 2
3383  %b.add = add i16 %b.load, 1
3384  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3385  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") seq_cst, align 2
3386
3387  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3388  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") seq_cst, align 4
3389  %c.add = add i32 %c.load, 1
3390  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3391  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") seq_cst, align 4
3392
3393  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3394  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") seq_cst, align 8
3395  %d.add = add i64 %d.load, 1
3396  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3397  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") seq_cst, align 8
3398
3399  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3400  %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") seq_cst, align 4
3401  %e.add = fadd float %e.load, 1.
3402  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3403  store atomic float %e.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 4
3404
3405  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3406  %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") seq_cst, align 8
3407  %f.add = fadd double %f.load, 1.
3408  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3409  store atomic double %f.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 8
3410
3411  ret void
3412}
3413
3414; CHECK-LABEL: local_seq_cst_volatile_gpu
3415define void @local_seq_cst_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3416  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3417  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") seq_cst, align 1
3418  %a.add = add i8 %a.load, 1
3419  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3420  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") seq_cst, align 1
3421
3422  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3423  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") seq_cst, align 2
3424  %b.add = add i16 %b.load, 1
3425  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3426  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") seq_cst, align 2
3427
3428  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3429  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") seq_cst, align 4
3430  %c.add = add i32 %c.load, 1
3431  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3432  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") seq_cst, align 4
3433
3434  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3435  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") seq_cst, align 8
3436  %d.add = add i64 %d.load, 1
3437  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3438  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") seq_cst, align 8
3439
3440  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3441  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") seq_cst, align 4
3442  %e.add = fadd float %e.load, 1.
3443  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3444  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 4
3445
3446  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3447  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") seq_cst, align 8
3448  %f.add = fadd double %f.load, 1.
3449  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3450  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 8
3451
3452  ret void
3453}
3454
3455; CHECK-LABEL: local_seq_cst_cta
3456define void @local_seq_cst_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3457  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3458  %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") seq_cst, align 1
3459  %a.add = add i8 %a.load, 1
3460  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3461  store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") seq_cst, align 1
3462
3463  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3464  %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") seq_cst, align 2
3465  %b.add = add i16 %b.load, 1
3466  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3467  store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") seq_cst, align 2
3468
3469  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3470  %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") seq_cst, align 4
3471  %c.add = add i32 %c.load, 1
3472  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3473  store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") seq_cst, align 4
3474
3475  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3476  %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") seq_cst, align 8
3477  %d.add = add i64 %d.load, 1
3478  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3479  store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") seq_cst, align 8
3480
3481  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3482  %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") seq_cst, align 4
3483  %e.add = fadd float %e.load, 1.
3484  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3485  store atomic float %e.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 4
3486
3487  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3488  %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") seq_cst, align 8
3489  %f.add = fadd double %f.load, 1.
3490  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3491  store atomic double %f.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 8
3492
3493  ret void
3494}
3495
3496; CHECK-LABEL: local_seq_cst_volatile_cta
3497define void @local_seq_cst_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
3498  ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3499  %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") seq_cst, align 1
3500  %a.add = add i8 %a.load, 1
3501  ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3502  store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") seq_cst, align 1
3503
3504  ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
3505  %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") seq_cst, align 2
3506  %b.add = add i16 %b.load, 1
3507  ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3508  store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") seq_cst, align 2
3509
3510  ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3511  %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") seq_cst, align 4
3512  %c.add = add i32 %c.load, 1
3513  ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3514  store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") seq_cst, align 4
3515
3516  ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3517  %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") seq_cst, align 8
3518  %d.add = add i64 %d.load, 1
3519  ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
3520  store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") seq_cst, align 8
3521
3522  ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
3523  %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") seq_cst, align 4
3524  %e.add = fadd float %e.load, 1.
3525  ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
3526  store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 4
3527
3528  ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
3529  %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") seq_cst, align 8
3530  %f.add = fadd double %f.load, 1.
3531  ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
3532  store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 8
3533
3534  ret void
3535}
3536