1; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s 2; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %} 3 4; TODO: fix "atomic load volatile acquire": generates "ld.acquire.sys;" 5; but should generate "ld.mmio.relaxed.sys; fence.acq_rel.sys;" 6; TODO: fix "atomic store volatile release": generates "st.release.sys;" 7; but should generate "fence.acq_rel.sys; st.mmio.relaxed.sys;" 8 9; TODO: fix "atomic load volatile seq_cst": generates "fence.sc.sys; ld.acquire.sys;" 10; but should generate "fence.sc.sys; ld.relaxed.mmio.sys; fence.acq_rel.sys;" 11; TODO: fix "atomic store volatile seq_cst": generates "fence.sc.sys; st.release.sys;" 12; but should generate "fence.sc.sys; st.relaxed.mmio.sys;" 13 14; TODO: add i1, <8 x i8>, and <6 x i8> vector tests. 15 16; TODO: add test for vectors that exceed 128-bit length 17; Per https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#vectors 18; vectors cannot exceed 128-bit in length, i.e., .v4.u64 is not allowed. 19 20; TODO: generate PTX that preserves Concurrent Forward Progress 21; for atomic operations to local statespace 22; by generating atomic or volatile operations. 23 24; TODO: design exposure for atomic operations on vector types. 25 26; TODO: implement and test thread scope. 27 28; TODO: add weak,atomic,volatile,atomic volatile tests 29; for .const and .param statespaces. 30 31; TODO: optimize .sys.shared into .cta.shared or .cluster.shared . 32 33;; generic statespace 34 35; CHECK-LABEL: generic_unordered_gpu 36define void @generic_unordered_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 37 ; CHECK: ld.relaxed.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 38 %a.load = load atomic i8, ptr %a syncscope("device") unordered, align 1 39 %a.add = add i8 %a.load, 1 40 ; CHECK: st.relaxed.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 41 store atomic i8 %a.add, ptr %a syncscope("device") unordered, align 1 42 43 ; CHECK: ld.relaxed.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 44 %b.load = load atomic i16, ptr %b syncscope("device") unordered, align 2 45 %b.add = add i16 %b.load, 1 46 ; CHECK: st.relaxed.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 47 store atomic i16 %b.add, ptr %b syncscope("device") unordered, align 2 48 49 ; CHECK: ld.relaxed.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 50 %c.load = load atomic i32, ptr %c syncscope("device") unordered, align 4 51 %c.add = add i32 %c.load, 1 52 ; CHECK: st.relaxed.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 53 store atomic i32 %c.add, ptr %c syncscope("device") unordered, align 4 54 55 ; CHECK: ld.relaxed.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 56 %d.load = load atomic i64, ptr %d syncscope("device") unordered, align 8 57 %d.add = add i64 %d.load, 1 58 ; CHECK: st.relaxed.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 59 store atomic i64 %d.add, ptr %d syncscope("device") unordered, align 8 60 61 ; CHECK: ld.relaxed.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 62 %e.load = load atomic float, ptr %e syncscope("device") unordered, align 4 63 %e.add = fadd float %e.load, 1. 64 ; CHECK: st.relaxed.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 65 store atomic float %e.add, ptr %e syncscope("device") unordered, align 4 66 67 ; CHECK: ld.relaxed.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 68 %f.load = load atomic double, ptr %e syncscope("device") unordered, align 8 69 %f.add = fadd double %f.load, 1. 70 ; CHECK: st.relaxed.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 71 store atomic double %f.add, ptr %e syncscope("device") unordered, align 8 72 73 ret void 74} 75 76; CHECK-LABEL: generic_unordered_volatile_gpu 77define void @generic_unordered_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 78 ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 79 %a.load = load atomic volatile i8, ptr %a syncscope("device") unordered, align 1 80 %a.add = add i8 %a.load, 1 81 ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 82 store atomic volatile i8 %a.add, ptr %a syncscope("device") unordered, align 1 83 84 ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 85 %b.load = load atomic volatile i16, ptr %b syncscope("device") unordered, align 2 86 %b.add = add i16 %b.load, 1 87 ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 88 store atomic volatile i16 %b.add, ptr %b syncscope("device") unordered, align 2 89 90 ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 91 %c.load = load atomic volatile i32, ptr %c syncscope("device") unordered, align 4 92 %c.add = add i32 %c.load, 1 93 ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 94 store atomic volatile i32 %c.add, ptr %c syncscope("device") unordered, align 4 95 96 ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 97 %d.load = load atomic volatile i64, ptr %d syncscope("device") unordered, align 8 98 %d.add = add i64 %d.load, 1 99 ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 100 store atomic volatile i64 %d.add, ptr %d syncscope("device") unordered, align 8 101 102 ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 103 %e.load = load atomic volatile float, ptr %e syncscope("device") unordered, align 4 104 %e.add = fadd float %e.load, 1. 105 ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 106 store atomic volatile float %e.add, ptr %e syncscope("device") unordered, align 4 107 108 ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 109 %f.load = load atomic volatile double, ptr %e syncscope("device") unordered, align 8 110 %f.add = fadd double %f.load, 1. 111 ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 112 store atomic volatile double %f.add, ptr %e syncscope("device") unordered, align 8 113 114 ret void 115} 116 117; CHECK-LABEL: generic_unordered_cta 118define void @generic_unordered_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 119 ; CHECK: ld.relaxed.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 120 %a.load = load atomic i8, ptr %a syncscope("block") unordered, align 1 121 %a.add = add i8 %a.load, 1 122 ; CHECK: st.relaxed.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 123 store atomic i8 %a.add, ptr %a syncscope("block") unordered, align 1 124 125 ; CHECK: ld.relaxed.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 126 %b.load = load atomic i16, ptr %b syncscope("block") unordered, align 2 127 %b.add = add i16 %b.load, 1 128 ; CHECK: st.relaxed.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 129 store atomic i16 %b.add, ptr %b syncscope("block") unordered, align 2 130 131 ; CHECK: ld.relaxed.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 132 %c.load = load atomic i32, ptr %c syncscope("block") unordered, align 4 133 %c.add = add i32 %c.load, 1 134 ; CHECK: st.relaxed.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 135 store atomic i32 %c.add, ptr %c syncscope("block") unordered, align 4 136 137 ; CHECK: ld.relaxed.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 138 %d.load = load atomic i64, ptr %d syncscope("block") unordered, align 8 139 %d.add = add i64 %d.load, 1 140 ; CHECK: st.relaxed.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 141 store atomic i64 %d.add, ptr %d syncscope("block") unordered, align 8 142 143 ; CHECK: ld.relaxed.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 144 %e.load = load atomic float, ptr %e syncscope("block") unordered, align 4 145 %e.add = fadd float %e.load, 1. 146 ; CHECK: st.relaxed.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 147 store atomic float %e.add, ptr %e syncscope("block") unordered, align 4 148 149 ; CHECK: ld.relaxed.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 150 %f.load = load atomic double, ptr %e syncscope("block") unordered, align 8 151 %f.add = fadd double %f.load, 1. 152 ; CHECK: st.relaxed.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 153 store atomic double %f.add, ptr %e syncscope("block") unordered, align 8 154 155 ret void 156} 157 158; CHECK-LABEL: generic_unordered_volatile_cta 159define void @generic_unordered_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 160 ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 161 %a.load = load atomic volatile i8, ptr %a syncscope("block") unordered, align 1 162 %a.add = add i8 %a.load, 1 163 ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 164 store atomic volatile i8 %a.add, ptr %a syncscope("block") unordered, align 1 165 166 ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 167 %b.load = load atomic volatile i16, ptr %b syncscope("block") unordered, align 2 168 %b.add = add i16 %b.load, 1 169 ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 170 store atomic volatile i16 %b.add, ptr %b syncscope("block") unordered, align 2 171 172 ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 173 %c.load = load atomic volatile i32, ptr %c syncscope("block") unordered, align 4 174 %c.add = add i32 %c.load, 1 175 ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 176 store atomic volatile i32 %c.add, ptr %c syncscope("block") unordered, align 4 177 178 ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 179 %d.load = load atomic volatile i64, ptr %d syncscope("block") unordered, align 8 180 %d.add = add i64 %d.load, 1 181 ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 182 store atomic volatile i64 %d.add, ptr %d syncscope("block") unordered, align 8 183 184 ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 185 %e.load = load atomic volatile float, ptr %e syncscope("block") unordered, align 4 186 %e.add = fadd float %e.load, 1. 187 ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 188 store atomic volatile float %e.add, ptr %e syncscope("block") unordered, align 4 189 190 ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 191 %f.load = load atomic volatile double, ptr %e syncscope("block") unordered, align 8 192 %f.add = fadd double %f.load, 1. 193 ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 194 store atomic volatile double %f.add, ptr %e syncscope("block") unordered, align 8 195 196 ret void 197} 198 199; CHECK-LABEL: generic_monotonic_gpu 200define void @generic_monotonic_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 201 ; CHECK: ld.relaxed.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 202 %a.load = load atomic i8, ptr %a syncscope("device") monotonic, align 1 203 %a.add = add i8 %a.load, 1 204 ; CHECK: st.relaxed.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 205 store atomic i8 %a.add, ptr %a syncscope("device") monotonic, align 1 206 207 ; CHECK: ld.relaxed.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 208 %b.load = load atomic i16, ptr %b syncscope("device") monotonic, align 2 209 %b.add = add i16 %b.load, 1 210 ; CHECK: st.relaxed.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 211 store atomic i16 %b.add, ptr %b syncscope("device") monotonic, align 2 212 213 ; CHECK: ld.relaxed.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 214 %c.load = load atomic i32, ptr %c syncscope("device") monotonic, align 4 215 %c.add = add i32 %c.load, 1 216 ; CHECK: st.relaxed.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 217 store atomic i32 %c.add, ptr %c syncscope("device") monotonic, align 4 218 219 ; CHECK: ld.relaxed.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 220 %d.load = load atomic i64, ptr %d syncscope("device") monotonic, align 8 221 %d.add = add i64 %d.load, 1 222 ; CHECK: st.relaxed.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 223 store atomic i64 %d.add, ptr %d syncscope("device") monotonic, align 8 224 225 ; CHECK: ld.relaxed.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 226 %e.load = load atomic float, ptr %e syncscope("device") monotonic, align 4 227 %e.add = fadd float %e.load, 1. 228 ; CHECK: st.relaxed.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 229 store atomic float %e.add, ptr %e syncscope("device") monotonic, align 4 230 231 ; CHECK: ld.relaxed.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 232 %f.load = load atomic double, ptr %e syncscope("device") monotonic, align 8 233 %f.add = fadd double %f.load, 1. 234 ; CHECK: st.relaxed.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 235 store atomic double %f.add, ptr %e syncscope("device") monotonic, align 8 236 237 ret void 238} 239 240; CHECK-LABEL: generic_monotonic_volatile_gpu 241define void @generic_monotonic_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 242 ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 243 %a.load = load atomic volatile i8, ptr %a syncscope("device") monotonic, align 1 244 %a.add = add i8 %a.load, 1 245 ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 246 store atomic volatile i8 %a.add, ptr %a syncscope("device") monotonic, align 1 247 248 ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 249 %b.load = load atomic volatile i16, ptr %b syncscope("device") monotonic, align 2 250 %b.add = add i16 %b.load, 1 251 ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 252 store atomic volatile i16 %b.add, ptr %b syncscope("device") monotonic, align 2 253 254 ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 255 %c.load = load atomic volatile i32, ptr %c syncscope("device") monotonic, align 4 256 %c.add = add i32 %c.load, 1 257 ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 258 store atomic volatile i32 %c.add, ptr %c syncscope("device") monotonic, align 4 259 260 ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 261 %d.load = load atomic volatile i64, ptr %d syncscope("device") monotonic, align 8 262 %d.add = add i64 %d.load, 1 263 ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 264 store atomic volatile i64 %d.add, ptr %d syncscope("device") monotonic, align 8 265 266 ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 267 %e.load = load atomic volatile float, ptr %e syncscope("device") monotonic, align 4 268 %e.add = fadd float %e.load, 1. 269 ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 270 store atomic volatile float %e.add, ptr %e syncscope("device") monotonic, align 4 271 272 ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 273 %f.load = load atomic volatile double, ptr %e syncscope("device") monotonic, align 8 274 %f.add = fadd double %f.load, 1. 275 ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 276 store atomic volatile double %f.add, ptr %e syncscope("device") monotonic, align 8 277 278 ret void 279} 280 281; CHECK-LABEL: generic_monotonic_cta 282define void @generic_monotonic_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 283 ; CHECK: ld.relaxed.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 284 %a.load = load atomic i8, ptr %a syncscope("block") monotonic, align 1 285 %a.add = add i8 %a.load, 1 286 ; CHECK: st.relaxed.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 287 store atomic i8 %a.add, ptr %a syncscope("block") monotonic, align 1 288 289 ; CHECK: ld.relaxed.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 290 %b.load = load atomic i16, ptr %b syncscope("block") monotonic, align 2 291 %b.add = add i16 %b.load, 1 292 ; CHECK: st.relaxed.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 293 store atomic i16 %b.add, ptr %b syncscope("block") monotonic, align 2 294 295 ; CHECK: ld.relaxed.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 296 %c.load = load atomic i32, ptr %c syncscope("block") monotonic, align 4 297 %c.add = add i32 %c.load, 1 298 ; CHECK: st.relaxed.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 299 store atomic i32 %c.add, ptr %c syncscope("block") monotonic, align 4 300 301 ; CHECK: ld.relaxed.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 302 %d.load = load atomic i64, ptr %d syncscope("block") monotonic, align 8 303 %d.add = add i64 %d.load, 1 304 ; CHECK: st.relaxed.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 305 store atomic i64 %d.add, ptr %d syncscope("block") monotonic, align 8 306 307 ; CHECK: ld.relaxed.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 308 %e.load = load atomic float, ptr %e syncscope("block") monotonic, align 4 309 %e.add = fadd float %e.load, 1. 310 ; CHECK: st.relaxed.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 311 store atomic float %e.add, ptr %e syncscope("block") monotonic, align 4 312 313 ; CHECK: ld.relaxed.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 314 %f.load = load atomic double, ptr %e syncscope("block") monotonic, align 8 315 %f.add = fadd double %f.load, 1. 316 ; CHECK: st.relaxed.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 317 store atomic double %f.add, ptr %e syncscope("block") monotonic, align 8 318 319 ret void 320} 321 322; CHECK-LABEL: generic_monotonic_volatile_cta 323define void @generic_monotonic_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 324 ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 325 %a.load = load atomic volatile i8, ptr %a syncscope("block") monotonic, align 1 326 %a.add = add i8 %a.load, 1 327 ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 328 store atomic volatile i8 %a.add, ptr %a syncscope("block") monotonic, align 1 329 330 ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 331 %b.load = load atomic volatile i16, ptr %b syncscope("block") monotonic, align 2 332 %b.add = add i16 %b.load, 1 333 ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 334 store atomic volatile i16 %b.add, ptr %b syncscope("block") monotonic, align 2 335 336 ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 337 %c.load = load atomic volatile i32, ptr %c syncscope("block") monotonic, align 4 338 %c.add = add i32 %c.load, 1 339 ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 340 store atomic volatile i32 %c.add, ptr %c syncscope("block") monotonic, align 4 341 342 ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 343 %d.load = load atomic volatile i64, ptr %d syncscope("block") monotonic, align 8 344 %d.add = add i64 %d.load, 1 345 ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 346 store atomic volatile i64 %d.add, ptr %d syncscope("block") monotonic, align 8 347 348 ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 349 %e.load = load atomic volatile float, ptr %e syncscope("block") monotonic, align 4 350 %e.add = fadd float %e.load, 1. 351 ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 352 store atomic volatile float %e.add, ptr %e syncscope("block") monotonic, align 4 353 354 ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 355 %f.load = load atomic volatile double, ptr %e syncscope("block") monotonic, align 8 356 %f.add = fadd double %f.load, 1. 357 ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 358 store atomic volatile double %f.add, ptr %e syncscope("block") monotonic, align 8 359 360 ret void 361} 362 363; CHECK-LABEL: generic_acq_rel_sys 364define void @generic_acq_rel_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 365 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 366 %a.load = load atomic i8, ptr %a acquire, align 1 367 %a.add = add i8 %a.load, 1 368 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 369 store atomic i8 %a.add, ptr %a release, align 1 370 371 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 372 %b.load = load atomic i16, ptr %b acquire, align 2 373 %b.add = add i16 %b.load, 1 374 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 375 store atomic i16 %b.add, ptr %b release, align 2 376 377 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 378 %c.load = load atomic i32, ptr %c acquire, align 4 379 %c.add = add i32 %c.load, 1 380 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 381 store atomic i32 %c.add, ptr %c release, align 4 382 383 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 384 %d.load = load atomic i64, ptr %d acquire, align 8 385 %d.add = add i64 %d.load, 1 386 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 387 store atomic i64 %d.add, ptr %d release, align 8 388 389 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 390 %e.load = load atomic float, ptr %e acquire, align 4 391 %e.add = fadd float %e.load, 1. 392 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 393 store atomic float %e.add, ptr %e release, align 4 394 395 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 396 %f.load = load atomic double, ptr %e acquire, align 8 397 %f.add = fadd double %f.load, 1. 398 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 399 store atomic double %f.add, ptr %e release, align 8 400 401 ret void 402} 403 404; CHECK-LABEL: generic_acq_rel_volatile_sys 405define void @generic_acq_rel_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 406 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 407 %a.load = load atomic volatile i8, ptr %a acquire, align 1 408 %a.add = add i8 %a.load, 1 409 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 410 store atomic volatile i8 %a.add, ptr %a release, align 1 411 412 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 413 %b.load = load atomic volatile i16, ptr %b acquire, align 2 414 %b.add = add i16 %b.load, 1 415 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 416 store atomic volatile i16 %b.add, ptr %b release, align 2 417 418 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 419 %c.load = load atomic volatile i32, ptr %c acquire, align 4 420 %c.add = add i32 %c.load, 1 421 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 422 store atomic volatile i32 %c.add, ptr %c release, align 4 423 424 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 425 %d.load = load atomic volatile i64, ptr %d acquire, align 8 426 %d.add = add i64 %d.load, 1 427 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 428 store atomic volatile i64 %d.add, ptr %d release, align 8 429 430 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 431 %e.load = load atomic volatile float, ptr %e acquire, align 4 432 %e.add = fadd float %e.load, 1. 433 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 434 store atomic volatile float %e.add, ptr %e release, align 4 435 436 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 437 %f.load = load atomic volatile double, ptr %e acquire, align 8 438 %f.add = fadd double %f.load, 1. 439 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 440 store atomic volatile double %f.add, ptr %e release, align 8 441 442 ret void 443} 444 445; CHECK-LABEL: generic_acq_rel_gpu 446define void @generic_acq_rel_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 447 ; CHECK: ld.acquire.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 448 %a.load = load atomic i8, ptr %a syncscope("device") acquire, align 1 449 %a.add = add i8 %a.load, 1 450 ; CHECK: st.release.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 451 store atomic i8 %a.add, ptr %a syncscope("device") release, align 1 452 453 ; CHECK: ld.acquire.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 454 %b.load = load atomic i16, ptr %b syncscope("device") acquire, align 2 455 %b.add = add i16 %b.load, 1 456 ; CHECK: st.release.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 457 store atomic i16 %b.add, ptr %b syncscope("device") release, align 2 458 459 ; CHECK: ld.acquire.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 460 %c.load = load atomic i32, ptr %c syncscope("device") acquire, align 4 461 %c.add = add i32 %c.load, 1 462 ; CHECK: st.release.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 463 store atomic i32 %c.add, ptr %c syncscope("device") release, align 4 464 465 ; CHECK: ld.acquire.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 466 %d.load = load atomic i64, ptr %d syncscope("device") acquire, align 8 467 %d.add = add i64 %d.load, 1 468 ; CHECK: st.release.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 469 store atomic i64 %d.add, ptr %d syncscope("device") release, align 8 470 471 ; CHECK: ld.acquire.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 472 %e.load = load atomic float, ptr %e syncscope("device") acquire, align 4 473 %e.add = fadd float %e.load, 1. 474 ; CHECK: st.release.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 475 store atomic float %e.add, ptr %e syncscope("device") release, align 4 476 477 ; CHECK: ld.acquire.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 478 %f.load = load atomic double, ptr %e syncscope("device") acquire, align 8 479 %f.add = fadd double %f.load, 1. 480 ; CHECK: st.release.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 481 store atomic double %f.add, ptr %e syncscope("device") release, align 8 482 483 ret void 484} 485 486; CHECK-LABEL: generic_acq_rel_volatile_gpu 487define void @generic_acq_rel_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 488 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 489 %a.load = load atomic volatile i8, ptr %a syncscope("device") acquire, align 1 490 %a.add = add i8 %a.load, 1 491 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 492 store atomic volatile i8 %a.add, ptr %a syncscope("device") release, align 1 493 494 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 495 %b.load = load atomic volatile i16, ptr %b syncscope("device") acquire, align 2 496 %b.add = add i16 %b.load, 1 497 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 498 store atomic volatile i16 %b.add, ptr %b syncscope("device") release, align 2 499 500 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 501 %c.load = load atomic volatile i32, ptr %c syncscope("device") acquire, align 4 502 %c.add = add i32 %c.load, 1 503 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 504 store atomic volatile i32 %c.add, ptr %c syncscope("device") release, align 4 505 506 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 507 %d.load = load atomic volatile i64, ptr %d syncscope("device") acquire, align 8 508 %d.add = add i64 %d.load, 1 509 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 510 store atomic volatile i64 %d.add, ptr %d syncscope("device") release, align 8 511 512 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 513 %e.load = load atomic volatile float, ptr %e syncscope("device") acquire, align 4 514 %e.add = fadd float %e.load, 1. 515 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 516 store atomic volatile float %e.add, ptr %e syncscope("device") release, align 4 517 518 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 519 %f.load = load atomic volatile double, ptr %e syncscope("device") acquire, align 8 520 %f.add = fadd double %f.load, 1. 521 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 522 store atomic volatile double %f.add, ptr %e syncscope("device") release, align 8 523 524 ret void 525} 526 527; CHECK-LABEL: generic_acq_rel_cta 528define void @generic_acq_rel_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 529 ; CHECK: ld.acquire.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 530 %a.load = load atomic i8, ptr %a syncscope("block") acquire, align 1 531 %a.add = add i8 %a.load, 1 532 ; CHECK: st.release.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 533 store atomic i8 %a.add, ptr %a syncscope("block") release, align 1 534 535 ; CHECK: ld.acquire.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 536 %b.load = load atomic i16, ptr %b syncscope("block") acquire, align 2 537 %b.add = add i16 %b.load, 1 538 ; CHECK: st.release.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 539 store atomic i16 %b.add, ptr %b syncscope("block") release, align 2 540 541 ; CHECK: ld.acquire.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 542 %c.load = load atomic i32, ptr %c syncscope("block") acquire, align 4 543 %c.add = add i32 %c.load, 1 544 ; CHECK: st.release.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 545 store atomic i32 %c.add, ptr %c syncscope("block") release, align 4 546 547 ; CHECK: ld.acquire.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 548 %d.load = load atomic i64, ptr %d syncscope("block") acquire, align 8 549 %d.add = add i64 %d.load, 1 550 ; CHECK: st.release.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 551 store atomic i64 %d.add, ptr %d syncscope("block") release, align 8 552 553 ; CHECK: ld.acquire.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 554 %e.load = load atomic float, ptr %e syncscope("block") acquire, align 4 555 %e.add = fadd float %e.load, 1. 556 ; CHECK: st.release.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 557 store atomic float %e.add, ptr %e syncscope("block") release, align 4 558 559 ; CHECK: ld.acquire.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 560 %f.load = load atomic double, ptr %e syncscope("block") acquire, align 8 561 %f.add = fadd double %f.load, 1. 562 ; CHECK: st.release.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 563 store atomic double %f.add, ptr %e syncscope("block") release, align 8 564 565 ret void 566} 567 568; CHECK-LABEL: generic_acq_rel_volatile_cta 569define void @generic_acq_rel_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 570 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 571 %a.load = load atomic volatile i8, ptr %a syncscope("block") acquire, align 1 572 %a.add = add i8 %a.load, 1 573 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 574 store atomic volatile i8 %a.add, ptr %a syncscope("block") release, align 1 575 576 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 577 %b.load = load atomic volatile i16, ptr %b syncscope("block") acquire, align 2 578 %b.add = add i16 %b.load, 1 579 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 580 store atomic volatile i16 %b.add, ptr %b syncscope("block") release, align 2 581 582 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 583 %c.load = load atomic volatile i32, ptr %c syncscope("block") acquire, align 4 584 %c.add = add i32 %c.load, 1 585 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 586 store atomic volatile i32 %c.add, ptr %c syncscope("block") release, align 4 587 588 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 589 %d.load = load atomic volatile i64, ptr %d syncscope("block") acquire, align 8 590 %d.add = add i64 %d.load, 1 591 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 592 store atomic volatile i64 %d.add, ptr %d syncscope("block") release, align 8 593 594 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 595 %e.load = load atomic volatile float, ptr %e syncscope("block") acquire, align 4 596 %e.add = fadd float %e.load, 1. 597 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 598 store atomic volatile float %e.add, ptr %e syncscope("block") release, align 4 599 600 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 601 %f.load = load atomic volatile double, ptr %e syncscope("block") acquire, align 8 602 %f.add = fadd double %f.load, 1. 603 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 604 store atomic volatile double %f.add, ptr %e syncscope("block") release, align 8 605 606 ret void 607} 608 609; CHECK-LABEL: generic_sc_sys 610define void @generic_sc_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 611 ; CHECK: fence.sc.sys 612 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 613 %a.load = load atomic i8, ptr %a seq_cst, align 1 614 %a.add = add i8 %a.load, 1 615 ; CHECK: fence.sc.sys 616 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 617 store atomic i8 %a.add, ptr %a seq_cst, align 1 618 619 ; CHECK: fence.sc.sys 620 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 621 %b.load = load atomic i16, ptr %b seq_cst, align 2 622 %b.add = add i16 %b.load, 1 623 ; CHECK: fence.sc.sys 624 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 625 store atomic i16 %b.add, ptr %b seq_cst, align 2 626 627 ; CHECK: fence.sc.sys 628 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 629 %c.load = load atomic i32, ptr %c seq_cst, align 4 630 %c.add = add i32 %c.load, 1 631 ; CHECK: fence.sc.sys 632 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 633 store atomic i32 %c.add, ptr %c seq_cst, align 4 634 635 ; CHECK: fence.sc.sys 636 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 637 %d.load = load atomic i64, ptr %d seq_cst, align 8 638 %d.add = add i64 %d.load, 1 639 ; CHECK: fence.sc.sys 640 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 641 store atomic i64 %d.add, ptr %d seq_cst, align 8 642 643 ; CHECK: fence.sc.sys 644 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 645 %e.load = load atomic float, ptr %e seq_cst, align 4 646 %e.add = fadd float %e.load, 1. 647 ; CHECK: fence.sc.sys 648 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 649 store atomic float %e.add, ptr %e seq_cst, align 4 650 651 ; CHECK: fence.sc.sys 652 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 653 %f.load = load atomic double, ptr %e seq_cst, align 8 654 %f.add = fadd double %f.load, 1. 655 ; CHECK: fence.sc.sys 656 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 657 store atomic double %f.add, ptr %e seq_cst, align 8 658 659 ret void 660} 661 662; CHECK-LABEL: generic_sc_volatile_sys 663define void @generic_sc_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 664 ; CHECK: fence.sc.sys 665 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 666 %a.load = load atomic volatile i8, ptr %a seq_cst, align 1 667 %a.add = add i8 %a.load, 1 668 ; CHECK: fence.sc.sys 669 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 670 store atomic volatile i8 %a.add, ptr %a seq_cst, align 1 671 672 ; CHECK: fence.sc.sys 673 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 674 %b.load = load atomic volatile i16, ptr %b seq_cst, align 2 675 %b.add = add i16 %b.load, 1 676 ; CHECK: fence.sc.sys 677 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 678 store atomic volatile i16 %b.add, ptr %b seq_cst, align 2 679 680 ; CHECK: fence.sc.sys 681 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 682 %c.load = load atomic volatile i32, ptr %c seq_cst, align 4 683 %c.add = add i32 %c.load, 1 684 ; CHECK: fence.sc.sys 685 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 686 store atomic volatile i32 %c.add, ptr %c seq_cst, align 4 687 688 ; CHECK: fence.sc.sys 689 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 690 %d.load = load atomic volatile i64, ptr %d seq_cst, align 8 691 %d.add = add i64 %d.load, 1 692 ; CHECK: fence.sc.sys 693 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 694 store atomic volatile i64 %d.add, ptr %d seq_cst, align 8 695 696 ; CHECK: fence.sc.sys 697 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 698 %e.load = load atomic volatile float, ptr %e seq_cst, align 4 699 %e.add = fadd float %e.load, 1. 700 ; CHECK: fence.sc.sys 701 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 702 store atomic volatile float %e.add, ptr %e seq_cst, align 4 703 704 ; CHECK: fence.sc.sys 705 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 706 %f.load = load atomic volatile double, ptr %e seq_cst, align 8 707 %f.add = fadd double %f.load, 1. 708 ; CHECK: fence.sc.sys 709 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 710 store atomic volatile double %f.add, ptr %e seq_cst, align 8 711 712 ret void 713} 714 715; CHECK-LABEL: generic_sc_gpu 716define void @generic_sc_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 717 ; CHECK: fence.sc.gpu 718 ; CHECK: ld.acquire.gpu.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 719 %a.load = load atomic i8, ptr %a syncscope("device") seq_cst, align 1 720 %a.add = add i8 %a.load, 1 721 ; CHECK: fence.sc.gpu 722 ; CHECK: st.release.gpu.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 723 store atomic i8 %a.add, ptr %a syncscope("device") seq_cst, align 1 724 725 ; CHECK: fence.sc.gpu 726 ; CHECK: ld.acquire.gpu.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 727 %b.load = load atomic i16, ptr %b syncscope("device") seq_cst, align 2 728 %b.add = add i16 %b.load, 1 729 ; CHECK: fence.sc.gpu 730 ; CHECK: st.release.gpu.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 731 store atomic i16 %b.add, ptr %b syncscope("device") seq_cst, align 2 732 733 ; CHECK: fence.sc.gpu 734 ; CHECK: ld.acquire.gpu.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 735 %c.load = load atomic i32, ptr %c syncscope("device") seq_cst, align 4 736 %c.add = add i32 %c.load, 1 737 ; CHECK: fence.sc.gpu 738 ; CHECK: st.release.gpu.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 739 store atomic i32 %c.add, ptr %c syncscope("device") seq_cst, align 4 740 741 ; CHECK: fence.sc.gpu 742 ; CHECK: ld.acquire.gpu.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 743 %d.load = load atomic i64, ptr %d syncscope("device") seq_cst, align 8 744 %d.add = add i64 %d.load, 1 745 ; CHECK: fence.sc.gpu 746 ; CHECK: st.release.gpu.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 747 store atomic i64 %d.add, ptr %d syncscope("device") seq_cst, align 8 748 749 ; CHECK: fence.sc.gpu 750 ; CHECK: ld.acquire.gpu.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 751 %e.load = load atomic float, ptr %e syncscope("device") seq_cst, align 4 752 %e.add = fadd float %e.load, 1. 753 ; CHECK: fence.sc.gpu 754 ; CHECK: st.release.gpu.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 755 store atomic float %e.add, ptr %e syncscope("device") seq_cst, align 4 756 757 ; CHECK: fence.sc.gpu 758 ; CHECK: ld.acquire.gpu.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 759 %f.load = load atomic double, ptr %e syncscope("device") seq_cst, align 8 760 %f.add = fadd double %f.load, 1. 761 ; CHECK: fence.sc.gpu 762 ; CHECK: st.release.gpu.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 763 store atomic double %f.add, ptr %e syncscope("device") seq_cst, align 8 764 765 ret void 766} 767 768; CHECK-LABEL: generic_sc_volatile_gpu 769define void @generic_sc_volatile_gpu(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 770 ; CHECK: fence.sc.sys 771 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 772 %a.load = load atomic volatile i8, ptr %a syncscope("device") seq_cst, align 1 773 %a.add = add i8 %a.load, 1 774 ; CHECK: fence.sc.sys 775 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 776 store atomic volatile i8 %a.add, ptr %a syncscope("device") seq_cst, align 1 777 778 ; CHECK: fence.sc.sys 779 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 780 %b.load = load atomic volatile i16, ptr %b syncscope("device") seq_cst, align 2 781 %b.add = add i16 %b.load, 1 782 ; CHECK: fence.sc.sys 783 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 784 store atomic volatile i16 %b.add, ptr %b syncscope("device") seq_cst, align 2 785 786 ; CHECK: fence.sc.sys 787 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 788 %c.load = load atomic volatile i32, ptr %c syncscope("device") seq_cst, align 4 789 %c.add = add i32 %c.load, 1 790 ; CHECK: fence.sc.sys 791 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 792 store atomic volatile i32 %c.add, ptr %c syncscope("device") seq_cst, align 4 793 794 ; CHECK: fence.sc.sys 795 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 796 %d.load = load atomic volatile i64, ptr %d syncscope("device") seq_cst, align 8 797 %d.add = add i64 %d.load, 1 798 ; CHECK: fence.sc.sys 799 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 800 store atomic volatile i64 %d.add, ptr %d syncscope("device") seq_cst, align 8 801 802 ; CHECK: fence.sc.sys 803 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 804 %e.load = load atomic volatile float, ptr %e syncscope("device") seq_cst, align 4 805 %e.add = fadd float %e.load, 1. 806 ; CHECK: fence.sc.sys 807 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 808 store atomic volatile float %e.add, ptr %e syncscope("device") seq_cst, align 4 809 810 ; CHECK: fence.sc.sys 811 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 812 %f.load = load atomic volatile double, ptr %e syncscope("device") seq_cst, align 8 813 %f.add = fadd double %f.load, 1. 814 ; CHECK: fence.sc.sys 815 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 816 store atomic volatile double %f.add, ptr %e syncscope("device") seq_cst, align 8 817 818 ret void 819} 820 821; CHECK-LABEL: generic_sc_cta 822define void @generic_sc_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 823 ; CHECK: fence.sc.cta 824 ; CHECK: ld.acquire.cta.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 825 %a.load = load atomic i8, ptr %a syncscope("block") seq_cst, align 1 826 %a.add = add i8 %a.load, 1 827 ; CHECK: fence.sc.cta 828 ; CHECK: st.release.cta.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 829 store atomic i8 %a.add, ptr %a syncscope("block") seq_cst, align 1 830 831 ; CHECK: fence.sc.cta 832 ; CHECK: ld.acquire.cta.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 833 %b.load = load atomic i16, ptr %b syncscope("block") seq_cst, align 2 834 %b.add = add i16 %b.load, 1 835 ; CHECK: fence.sc.cta 836 ; CHECK: st.release.cta.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 837 store atomic i16 %b.add, ptr %b syncscope("block") seq_cst, align 2 838 839 ; CHECK: fence.sc.cta 840 ; CHECK: ld.acquire.cta.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 841 %c.load = load atomic i32, ptr %c syncscope("block") seq_cst, align 4 842 %c.add = add i32 %c.load, 1 843 ; CHECK: fence.sc.cta 844 ; CHECK: st.release.cta.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 845 store atomic i32 %c.add, ptr %c syncscope("block") seq_cst, align 4 846 847 ; CHECK: fence.sc.cta 848 ; CHECK: ld.acquire.cta.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 849 %d.load = load atomic i64, ptr %d syncscope("block") seq_cst, align 8 850 %d.add = add i64 %d.load, 1 851 ; CHECK: fence.sc.cta 852 ; CHECK: st.release.cta.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 853 store atomic i64 %d.add, ptr %d syncscope("block") seq_cst, align 8 854 855 ; CHECK: fence.sc.cta 856 ; CHECK: ld.acquire.cta.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 857 %e.load = load atomic float, ptr %e syncscope("block") seq_cst, align 4 858 %e.add = fadd float %e.load, 1. 859 ; CHECK: fence.sc.cta 860 ; CHECK: st.release.cta.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 861 store atomic float %e.add, ptr %e syncscope("block") seq_cst, align 4 862 863 ; CHECK: fence.sc.cta 864 ; CHECK: ld.acquire.cta.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 865 %f.load = load atomic double, ptr %e syncscope("block") seq_cst, align 8 866 %f.add = fadd double %f.load, 1. 867 ; CHECK: fence.sc.cta 868 ; CHECK: st.release.cta.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 869 store atomic double %f.add, ptr %e syncscope("block") seq_cst, align 8 870 871 ret void 872} 873 874; CHECK-LABEL: generic_sc_volatile_cta 875define void @generic_sc_volatile_cta(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr { 876 ; CHECK: fence.sc.sys 877 ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 878 %a.load = load atomic volatile i8, ptr %a syncscope("block") seq_cst, align 1 879 %a.add = add i8 %a.load, 1 880 ; CHECK: fence.sc.sys 881 ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 882 store atomic volatile i8 %a.add, ptr %a syncscope("block") seq_cst, align 1 883 884 ; CHECK: fence.sc.sys 885 ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 886 %b.load = load atomic volatile i16, ptr %b syncscope("block") seq_cst, align 2 887 %b.add = add i16 %b.load, 1 888 ; CHECK: fence.sc.sys 889 ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 890 store atomic volatile i16 %b.add, ptr %b syncscope("block") seq_cst, align 2 891 892 ; CHECK: fence.sc.sys 893 ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 894 %c.load = load atomic volatile i32, ptr %c syncscope("block") seq_cst, align 4 895 %c.add = add i32 %c.load, 1 896 ; CHECK: fence.sc.sys 897 ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 898 store atomic volatile i32 %c.add, ptr %c syncscope("block") seq_cst, align 4 899 900 ; CHECK: fence.sc.sys 901 ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 902 %d.load = load atomic volatile i64, ptr %d syncscope("block") seq_cst, align 8 903 %d.add = add i64 %d.load, 1 904 ; CHECK: fence.sc.sys 905 ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 906 store atomic volatile i64 %d.add, ptr %d syncscope("block") seq_cst, align 8 907 908 ; CHECK: fence.sc.sys 909 ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 910 %e.load = load atomic volatile float, ptr %e syncscope("block") seq_cst, align 4 911 %e.add = fadd float %e.load, 1. 912 ; CHECK: fence.sc.sys 913 ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 914 store atomic volatile float %e.add, ptr %e syncscope("block") seq_cst, align 4 915 916 ; CHECK: fence.sc.sys 917 ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 918 %f.load = load atomic volatile double, ptr %e syncscope("block") seq_cst, align 8 919 %f.add = fadd double %f.load, 1. 920 ; CHECK: fence.sc.sys 921 ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 922 store atomic volatile double %f.add, ptr %e syncscope("block") seq_cst, align 8 923 924 ret void 925} 926 927;; global statespace 928 929; CHECK-LABEL: global_unordered_gpu 930define void @global_unordered_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 931 ; CHECK: ld.relaxed.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 932 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") unordered, align 1 933 %a.add = add i8 %a.load, 1 934 ; CHECK: st.relaxed.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 935 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") unordered, align 1 936 937 ; CHECK: ld.relaxed.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 938 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") unordered, align 2 939 %b.add = add i16 %b.load, 1 940 ; CHECK: st.relaxed.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 941 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") unordered, align 2 942 943 ; CHECK: ld.relaxed.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 944 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") unordered, align 4 945 %c.add = add i32 %c.load, 1 946 ; CHECK: st.relaxed.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 947 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") unordered, align 4 948 949 ; CHECK: ld.relaxed.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 950 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") unordered, align 8 951 %d.add = add i64 %d.load, 1 952 ; CHECK: st.relaxed.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 953 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") unordered, align 8 954 955 ; CHECK: ld.relaxed.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 956 %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") unordered, align 4 957 %e.add = fadd float %e.load, 1. 958 ; CHECK: st.relaxed.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 959 store atomic float %e.add, ptr addrspace(1) %e syncscope("device") unordered, align 4 960 961 ; CHECK: ld.relaxed.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 962 %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") unordered, align 8 963 %f.add = fadd double %f.load, 1. 964 ; CHECK: st.relaxed.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 965 store atomic double %f.add, ptr addrspace(1) %e syncscope("device") unordered, align 8 966 967 ret void 968} 969 970; CHECK-LABEL: global_unordered_volatile_gpu 971define void @global_unordered_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 972 ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 973 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") unordered, align 1 974 %a.add = add i8 %a.load, 1 975 ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 976 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") unordered, align 1 977 978 ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 979 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") unordered, align 2 980 %b.add = add i16 %b.load, 1 981 ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 982 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") unordered, align 2 983 984 ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 985 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") unordered, align 4 986 %c.add = add i32 %c.load, 1 987 ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 988 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") unordered, align 4 989 990 ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 991 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") unordered, align 8 992 %d.add = add i64 %d.load, 1 993 ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 994 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") unordered, align 8 995 996 ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 997 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") unordered, align 4 998 %e.add = fadd float %e.load, 1. 999 ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1000 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") unordered, align 4 1001 1002 ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1003 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") unordered, align 8 1004 %f.add = fadd double %f.load, 1. 1005 ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1006 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") unordered, align 8 1007 1008 ret void 1009} 1010 1011; CHECK-LABEL: global_unordered_cta 1012define void @global_unordered_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1013 ; CHECK: ld.relaxed.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1014 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") unordered, align 1 1015 %a.add = add i8 %a.load, 1 1016 ; CHECK: st.relaxed.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1017 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") unordered, align 1 1018 1019 ; CHECK: ld.relaxed.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1020 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") unordered, align 2 1021 %b.add = add i16 %b.load, 1 1022 ; CHECK: st.relaxed.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1023 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") unordered, align 2 1024 1025 ; CHECK: ld.relaxed.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1026 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") unordered, align 4 1027 %c.add = add i32 %c.load, 1 1028 ; CHECK: st.relaxed.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1029 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") unordered, align 4 1030 1031 ; CHECK: ld.relaxed.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1032 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") unordered, align 8 1033 %d.add = add i64 %d.load, 1 1034 ; CHECK: st.relaxed.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1035 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") unordered, align 8 1036 1037 ; CHECK: ld.relaxed.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1038 %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") unordered, align 4 1039 %e.add = fadd float %e.load, 1. 1040 ; CHECK: st.relaxed.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1041 store atomic float %e.add, ptr addrspace(1) %e syncscope("block") unordered, align 4 1042 1043 ; CHECK: ld.relaxed.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1044 %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") unordered, align 8 1045 %f.add = fadd double %f.load, 1. 1046 ; CHECK: st.relaxed.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1047 store atomic double %f.add, ptr addrspace(1) %e syncscope("block") unordered, align 8 1048 1049 ret void 1050} 1051 1052; CHECK-LABEL: global_unordered_volatile_cta 1053define void @global_unordered_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1054 ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1055 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") unordered, align 1 1056 %a.add = add i8 %a.load, 1 1057 ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1058 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") unordered, align 1 1059 1060 ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1061 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") unordered, align 2 1062 %b.add = add i16 %b.load, 1 1063 ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1064 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") unordered, align 2 1065 1066 ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1067 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") unordered, align 4 1068 %c.add = add i32 %c.load, 1 1069 ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1070 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") unordered, align 4 1071 1072 ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1073 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") unordered, align 8 1074 %d.add = add i64 %d.load, 1 1075 ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1076 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") unordered, align 8 1077 1078 ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1079 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") unordered, align 4 1080 %e.add = fadd float %e.load, 1. 1081 ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1082 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") unordered, align 4 1083 1084 ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1085 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") unordered, align 8 1086 %f.add = fadd double %f.load, 1. 1087 ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1088 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") unordered, align 8 1089 1090 ret void 1091} 1092 1093; CHECK-LABEL: global_monotonic_gpu 1094define void @global_monotonic_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1095 ; CHECK: ld.relaxed.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1096 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") monotonic, align 1 1097 %a.add = add i8 %a.load, 1 1098 ; CHECK: st.relaxed.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1099 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") monotonic, align 1 1100 1101 ; CHECK: ld.relaxed.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1102 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") monotonic, align 2 1103 %b.add = add i16 %b.load, 1 1104 ; CHECK: st.relaxed.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1105 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") monotonic, align 2 1106 1107 ; CHECK: ld.relaxed.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1108 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") monotonic, align 4 1109 %c.add = add i32 %c.load, 1 1110 ; CHECK: st.relaxed.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1111 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") monotonic, align 4 1112 1113 ; CHECK: ld.relaxed.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1114 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") monotonic, align 8 1115 %d.add = add i64 %d.load, 1 1116 ; CHECK: st.relaxed.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1117 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") monotonic, align 8 1118 1119 ; CHECK: ld.relaxed.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1120 %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") monotonic, align 4 1121 %e.add = fadd float %e.load, 1. 1122 ; CHECK: st.relaxed.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1123 store atomic float %e.add, ptr addrspace(1) %e syncscope("device") monotonic, align 4 1124 1125 ; CHECK: ld.relaxed.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1126 %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") monotonic, align 8 1127 %f.add = fadd double %f.load, 1. 1128 ; CHECK: st.relaxed.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1129 store atomic double %f.add, ptr addrspace(1) %e syncscope("device") monotonic, align 8 1130 1131 ret void 1132} 1133 1134; CHECK-LABEL: global_monotonic_volatile_gpu 1135define void @global_monotonic_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1136 ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1137 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") monotonic, align 1 1138 %a.add = add i8 %a.load, 1 1139 ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1140 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") monotonic, align 1 1141 1142 ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1143 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") monotonic, align 2 1144 %b.add = add i16 %b.load, 1 1145 ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1146 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") monotonic, align 2 1147 1148 ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1149 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") monotonic, align 4 1150 %c.add = add i32 %c.load, 1 1151 ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1152 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") monotonic, align 4 1153 1154 ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1155 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") monotonic, align 8 1156 %d.add = add i64 %d.load, 1 1157 ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1158 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") monotonic, align 8 1159 1160 ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1161 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") monotonic, align 4 1162 %e.add = fadd float %e.load, 1. 1163 ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1164 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") monotonic, align 4 1165 1166 ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1167 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") monotonic, align 8 1168 %f.add = fadd double %f.load, 1. 1169 ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1170 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") monotonic, align 8 1171 1172 ret void 1173} 1174 1175; CHECK-LABEL: global_monotonic_cta 1176define void @global_monotonic_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1177 ; CHECK: ld.relaxed.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1178 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") monotonic, align 1 1179 %a.add = add i8 %a.load, 1 1180 ; CHECK: st.relaxed.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1181 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") monotonic, align 1 1182 1183 ; CHECK: ld.relaxed.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1184 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") monotonic, align 2 1185 %b.add = add i16 %b.load, 1 1186 ; CHECK: st.relaxed.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1187 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") monotonic, align 2 1188 1189 ; CHECK: ld.relaxed.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1190 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") monotonic, align 4 1191 %c.add = add i32 %c.load, 1 1192 ; CHECK: st.relaxed.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1193 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") monotonic, align 4 1194 1195 ; CHECK: ld.relaxed.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1196 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") monotonic, align 8 1197 %d.add = add i64 %d.load, 1 1198 ; CHECK: st.relaxed.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1199 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") monotonic, align 8 1200 1201 ; CHECK: ld.relaxed.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1202 %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") monotonic, align 4 1203 %e.add = fadd float %e.load, 1. 1204 ; CHECK: st.relaxed.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1205 store atomic float %e.add, ptr addrspace(1) %e syncscope("block") monotonic, align 4 1206 1207 ; CHECK: ld.relaxed.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1208 %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") monotonic, align 8 1209 %f.add = fadd double %f.load, 1. 1210 ; CHECK: st.relaxed.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1211 store atomic double %f.add, ptr addrspace(1) %e syncscope("block") monotonic, align 8 1212 1213 ret void 1214} 1215 1216; CHECK-LABEL: global_monotonic_volatile_cta 1217define void @global_monotonic_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1218 ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1219 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") monotonic, align 1 1220 %a.add = add i8 %a.load, 1 1221 ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1222 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") monotonic, align 1 1223 1224 ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1225 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") monotonic, align 2 1226 %b.add = add i16 %b.load, 1 1227 ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1228 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") monotonic, align 2 1229 1230 ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1231 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") monotonic, align 4 1232 %c.add = add i32 %c.load, 1 1233 ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1234 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") monotonic, align 4 1235 1236 ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1237 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") monotonic, align 8 1238 %d.add = add i64 %d.load, 1 1239 ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1240 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") monotonic, align 8 1241 1242 ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1243 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") monotonic, align 4 1244 %e.add = fadd float %e.load, 1. 1245 ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1246 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") monotonic, align 4 1247 1248 ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1249 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") monotonic, align 8 1250 %f.add = fadd double %f.load, 1. 1251 ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1252 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") monotonic, align 8 1253 1254 ret void 1255} 1256 1257; CHECK-LABEL: global_acq_rel_sys 1258define void @global_acq_rel_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1259 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1260 %a.load = load atomic i8, ptr addrspace(1) %a acquire, align 1 1261 %a.add = add i8 %a.load, 1 1262 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1263 store atomic i8 %a.add, ptr addrspace(1) %a release, align 1 1264 1265 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1266 %b.load = load atomic i16, ptr addrspace(1) %b acquire, align 2 1267 %b.add = add i16 %b.load, 1 1268 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1269 store atomic i16 %b.add, ptr addrspace(1) %b release, align 2 1270 1271 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1272 %c.load = load atomic i32, ptr addrspace(1) %c acquire, align 4 1273 %c.add = add i32 %c.load, 1 1274 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1275 store atomic i32 %c.add, ptr addrspace(1) %c release, align 4 1276 1277 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1278 %d.load = load atomic i64, ptr addrspace(1) %d acquire, align 8 1279 %d.add = add i64 %d.load, 1 1280 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1281 store atomic i64 %d.add, ptr addrspace(1) %d release, align 8 1282 1283 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1284 %e.load = load atomic float, ptr addrspace(1) %e acquire, align 4 1285 %e.add = fadd float %e.load, 1. 1286 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1287 store atomic float %e.add, ptr addrspace(1) %e release, align 4 1288 1289 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1290 %f.load = load atomic double, ptr addrspace(1) %e acquire, align 8 1291 %f.add = fadd double %f.load, 1. 1292 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1293 store atomic double %f.add, ptr addrspace(1) %e release, align 8 1294 1295 ret void 1296} 1297 1298; CHECK-LABEL: global_acq_rel_volatile_sys 1299define void @global_acq_rel_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1300 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1301 %a.load = load atomic volatile i8, ptr addrspace(1) %a acquire, align 1 1302 %a.add = add i8 %a.load, 1 1303 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1304 store atomic volatile i8 %a.add, ptr addrspace(1) %a release, align 1 1305 1306 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1307 %b.load = load atomic volatile i16, ptr addrspace(1) %b acquire, align 2 1308 %b.add = add i16 %b.load, 1 1309 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1310 store atomic volatile i16 %b.add, ptr addrspace(1) %b release, align 2 1311 1312 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1313 %c.load = load atomic volatile i32, ptr addrspace(1) %c acquire, align 4 1314 %c.add = add i32 %c.load, 1 1315 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1316 store atomic volatile i32 %c.add, ptr addrspace(1) %c release, align 4 1317 1318 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1319 %d.load = load atomic volatile i64, ptr addrspace(1) %d acquire, align 8 1320 %d.add = add i64 %d.load, 1 1321 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1322 store atomic volatile i64 %d.add, ptr addrspace(1) %d release, align 8 1323 1324 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1325 %e.load = load atomic volatile float, ptr addrspace(1) %e acquire, align 4 1326 %e.add = fadd float %e.load, 1. 1327 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1328 store atomic volatile float %e.add, ptr addrspace(1) %e release, align 4 1329 1330 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1331 %f.load = load atomic volatile double, ptr addrspace(1) %e acquire, align 8 1332 %f.add = fadd double %f.load, 1. 1333 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1334 store atomic volatile double %f.add, ptr addrspace(1) %e release, align 8 1335 1336 ret void 1337} 1338 1339; CHECK-LABEL: global_acq_rel_gpu 1340define void @global_acq_rel_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1341 ; CHECK: ld.acquire.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1342 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") acquire, align 1 1343 %a.add = add i8 %a.load, 1 1344 ; CHECK: st.release.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1345 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") release, align 1 1346 1347 ; CHECK: ld.acquire.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1348 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") acquire, align 2 1349 %b.add = add i16 %b.load, 1 1350 ; CHECK: st.release.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1351 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") release, align 2 1352 1353 ; CHECK: ld.acquire.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1354 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") acquire, align 4 1355 %c.add = add i32 %c.load, 1 1356 ; CHECK: st.release.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1357 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") release, align 4 1358 1359 ; CHECK: ld.acquire.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1360 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") acquire, align 8 1361 %d.add = add i64 %d.load, 1 1362 ; CHECK: st.release.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1363 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") release, align 8 1364 1365 ; CHECK: ld.acquire.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1366 %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") acquire, align 4 1367 %e.add = fadd float %e.load, 1. 1368 ; CHECK: st.release.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1369 store atomic float %e.add, ptr addrspace(1) %e syncscope("device") release, align 4 1370 1371 ; CHECK: ld.acquire.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1372 %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") acquire, align 8 1373 %f.add = fadd double %f.load, 1. 1374 ; CHECK: st.release.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1375 store atomic double %f.add, ptr addrspace(1) %e syncscope("device") release, align 8 1376 1377 ret void 1378} 1379 1380; CHECK-LABEL: global_acq_rel_volatile_gpu 1381define void @global_acq_rel_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1382 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1383 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") acquire, align 1 1384 %a.add = add i8 %a.load, 1 1385 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1386 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") release, align 1 1387 1388 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1389 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") acquire, align 2 1390 %b.add = add i16 %b.load, 1 1391 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1392 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") release, align 2 1393 1394 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1395 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") acquire, align 4 1396 %c.add = add i32 %c.load, 1 1397 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1398 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") release, align 4 1399 1400 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1401 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") acquire, align 8 1402 %d.add = add i64 %d.load, 1 1403 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1404 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") release, align 8 1405 1406 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1407 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") acquire, align 4 1408 %e.add = fadd float %e.load, 1. 1409 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1410 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") release, align 4 1411 1412 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1413 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") acquire, align 8 1414 %f.add = fadd double %f.load, 1. 1415 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1416 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") release, align 8 1417 1418 ret void 1419} 1420 1421; CHECK-LABEL: global_acq_rel_cta 1422define void @global_acq_rel_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1423 ; CHECK: ld.acquire.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1424 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") acquire, align 1 1425 %a.add = add i8 %a.load, 1 1426 ; CHECK: st.release.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1427 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") release, align 1 1428 1429 ; CHECK: ld.acquire.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1430 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") acquire, align 2 1431 %b.add = add i16 %b.load, 1 1432 ; CHECK: st.release.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1433 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") release, align 2 1434 1435 ; CHECK: ld.acquire.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1436 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") acquire, align 4 1437 %c.add = add i32 %c.load, 1 1438 ; CHECK: st.release.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1439 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") release, align 4 1440 1441 ; CHECK: ld.acquire.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1442 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") acquire, align 8 1443 %d.add = add i64 %d.load, 1 1444 ; CHECK: st.release.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1445 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") release, align 8 1446 1447 ; CHECK: ld.acquire.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1448 %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") acquire, align 4 1449 %e.add = fadd float %e.load, 1. 1450 ; CHECK: st.release.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1451 store atomic float %e.add, ptr addrspace(1) %e syncscope("block") release, align 4 1452 1453 ; CHECK: ld.acquire.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1454 %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") acquire, align 8 1455 %f.add = fadd double %f.load, 1. 1456 ; CHECK: st.release.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1457 store atomic double %f.add, ptr addrspace(1) %e syncscope("block") release, align 8 1458 1459 ret void 1460} 1461 1462; CHECK-LABEL: global_acq_rel_volatile_cta 1463define void @global_acq_rel_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1464 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1465 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") acquire, align 1 1466 %a.add = add i8 %a.load, 1 1467 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1468 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") release, align 1 1469 1470 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1471 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") acquire, align 2 1472 %b.add = add i16 %b.load, 1 1473 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1474 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") release, align 2 1475 1476 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1477 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") acquire, align 4 1478 %c.add = add i32 %c.load, 1 1479 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1480 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") release, align 4 1481 1482 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1483 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") acquire, align 8 1484 %d.add = add i64 %d.load, 1 1485 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1486 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") release, align 8 1487 1488 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1489 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") acquire, align 4 1490 %e.add = fadd float %e.load, 1. 1491 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1492 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") release, align 4 1493 1494 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1495 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") acquire, align 8 1496 %f.add = fadd double %f.load, 1. 1497 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1498 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") release, align 8 1499 1500 ret void 1501} 1502 1503; CHECK-LABEL: global_seq_cst_sys 1504define void @global_seq_cst_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1505 ; CHECK: fence.sc.sys 1506 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1507 %a.load = load atomic i8, ptr addrspace(1) %a seq_cst, align 1 1508 %a.add = add i8 %a.load, 1 1509 ; CHECK: fence.sc.sys 1510 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1511 store atomic i8 %a.add, ptr addrspace(1) %a seq_cst, align 1 1512 1513 ; CHECK: fence.sc.sys 1514 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1515 %b.load = load atomic i16, ptr addrspace(1) %b seq_cst, align 2 1516 %b.add = add i16 %b.load, 1 1517 ; CHECK: fence.sc.sys 1518 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1519 store atomic i16 %b.add, ptr addrspace(1) %b seq_cst, align 2 1520 1521 ; CHECK: fence.sc.sys 1522 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1523 %c.load = load atomic i32, ptr addrspace(1) %c seq_cst, align 4 1524 %c.add = add i32 %c.load, 1 1525 ; CHECK: fence.sc.sys 1526 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1527 store atomic i32 %c.add, ptr addrspace(1) %c seq_cst, align 4 1528 1529 ; CHECK: fence.sc.sys 1530 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1531 %d.load = load atomic i64, ptr addrspace(1) %d seq_cst, align 8 1532 %d.add = add i64 %d.load, 1 1533 ; CHECK: fence.sc.sys 1534 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1535 store atomic i64 %d.add, ptr addrspace(1) %d seq_cst, align 8 1536 1537 ; CHECK: fence.sc.sys 1538 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1539 %e.load = load atomic float, ptr addrspace(1) %e seq_cst, align 4 1540 %e.add = fadd float %e.load, 1. 1541 ; CHECK: fence.sc.sys 1542 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1543 store atomic float %e.add, ptr addrspace(1) %e seq_cst, align 4 1544 1545 ; CHECK: fence.sc.sys 1546 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1547 %f.load = load atomic double, ptr addrspace(1) %e seq_cst, align 8 1548 %f.add = fadd double %f.load, 1. 1549 ; CHECK: fence.sc.sys 1550 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1551 store atomic double %f.add, ptr addrspace(1) %e seq_cst, align 8 1552 1553 ret void 1554} 1555 1556; CHECK-LABEL: global_seq_cst_volatile_sys 1557define void @global_seq_cst_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1558 ; CHECK: fence.sc.sys 1559 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1560 %a.load = load atomic volatile i8, ptr addrspace(1) %a seq_cst, align 1 1561 %a.add = add i8 %a.load, 1 1562 ; CHECK: fence.sc.sys 1563 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1564 store atomic volatile i8 %a.add, ptr addrspace(1) %a seq_cst, align 1 1565 1566 ; CHECK: fence.sc.sys 1567 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1568 %b.load = load atomic volatile i16, ptr addrspace(1) %b seq_cst, align 2 1569 %b.add = add i16 %b.load, 1 1570 ; CHECK: fence.sc.sys 1571 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1572 store atomic volatile i16 %b.add, ptr addrspace(1) %b seq_cst, align 2 1573 1574 ; CHECK: fence.sc.sys 1575 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1576 %c.load = load atomic volatile i32, ptr addrspace(1) %c seq_cst, align 4 1577 %c.add = add i32 %c.load, 1 1578 ; CHECK: fence.sc.sys 1579 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1580 store atomic volatile i32 %c.add, ptr addrspace(1) %c seq_cst, align 4 1581 1582 ; CHECK: fence.sc.sys 1583 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1584 %d.load = load atomic volatile i64, ptr addrspace(1) %d seq_cst, align 8 1585 %d.add = add i64 %d.load, 1 1586 ; CHECK: fence.sc.sys 1587 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1588 store atomic volatile i64 %d.add, ptr addrspace(1) %d seq_cst, align 8 1589 1590 ; CHECK: fence.sc.sys 1591 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1592 %e.load = load atomic volatile float, ptr addrspace(1) %e seq_cst, align 4 1593 %e.add = fadd float %e.load, 1. 1594 ; CHECK: fence.sc.sys 1595 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1596 store atomic volatile float %e.add, ptr addrspace(1) %e seq_cst, align 4 1597 1598 ; CHECK: fence.sc.sys 1599 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1600 %f.load = load atomic volatile double, ptr addrspace(1) %e seq_cst, align 8 1601 %f.add = fadd double %f.load, 1. 1602 ; CHECK: fence.sc.sys 1603 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1604 store atomic volatile double %f.add, ptr addrspace(1) %e seq_cst, align 8 1605 1606 ret void 1607} 1608 1609; CHECK-LABEL: global_seq_cst_gpu 1610define void @global_seq_cst_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1611 ; CHECK: fence.sc.gpu 1612 ; CHECK: ld.acquire.gpu.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1613 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("device") seq_cst, align 1 1614 %a.add = add i8 %a.load, 1 1615 ; CHECK: fence.sc.gpu 1616 ; CHECK: st.release.gpu.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1617 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("device") seq_cst, align 1 1618 1619 ; CHECK: fence.sc.gpu 1620 ; CHECK: ld.acquire.gpu.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1621 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("device") seq_cst, align 2 1622 %b.add = add i16 %b.load, 1 1623 ; CHECK: fence.sc.gpu 1624 ; CHECK: st.release.gpu.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1625 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("device") seq_cst, align 2 1626 1627 ; CHECK: fence.sc.gpu 1628 ; CHECK: ld.acquire.gpu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1629 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("device") seq_cst, align 4 1630 %c.add = add i32 %c.load, 1 1631 ; CHECK: fence.sc.gpu 1632 ; CHECK: st.release.gpu.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1633 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("device") seq_cst, align 4 1634 1635 ; CHECK: fence.sc.gpu 1636 ; CHECK: ld.acquire.gpu.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1637 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("device") seq_cst, align 8 1638 %d.add = add i64 %d.load, 1 1639 ; CHECK: fence.sc.gpu 1640 ; CHECK: st.release.gpu.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1641 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("device") seq_cst, align 8 1642 1643 ; CHECK: fence.sc.gpu 1644 ; CHECK: ld.acquire.gpu.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1645 %e.load = load atomic float, ptr addrspace(1) %e syncscope("device") seq_cst, align 4 1646 %e.add = fadd float %e.load, 1. 1647 ; CHECK: fence.sc.gpu 1648 ; CHECK: st.release.gpu.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1649 store atomic float %e.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 4 1650 1651 ; CHECK: fence.sc.gpu 1652 ; CHECK: ld.acquire.gpu.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1653 %f.load = load atomic double, ptr addrspace(1) %e syncscope("device") seq_cst, align 8 1654 %f.add = fadd double %f.load, 1. 1655 ; CHECK: fence.sc.gpu 1656 ; CHECK: st.release.gpu.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1657 store atomic double %f.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 8 1658 1659 ret void 1660} 1661 1662; CHECK-LABEL: global_seq_cst_volatile_gpu 1663define void @global_seq_cst_volatile_gpu(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1664 ; CHECK: fence.sc.sys 1665 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1666 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("device") seq_cst, align 1 1667 %a.add = add i8 %a.load, 1 1668 ; CHECK: fence.sc.sys 1669 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1670 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("device") seq_cst, align 1 1671 1672 ; CHECK: fence.sc.sys 1673 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1674 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("device") seq_cst, align 2 1675 %b.add = add i16 %b.load, 1 1676 ; CHECK: fence.sc.sys 1677 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1678 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("device") seq_cst, align 2 1679 1680 ; CHECK: fence.sc.sys 1681 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1682 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("device") seq_cst, align 4 1683 %c.add = add i32 %c.load, 1 1684 ; CHECK: fence.sc.sys 1685 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1686 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("device") seq_cst, align 4 1687 1688 ; CHECK: fence.sc.sys 1689 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1690 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("device") seq_cst, align 8 1691 %d.add = add i64 %d.load, 1 1692 ; CHECK: fence.sc.sys 1693 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1694 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("device") seq_cst, align 8 1695 1696 ; CHECK: fence.sc.sys 1697 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1698 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("device") seq_cst, align 4 1699 %e.add = fadd float %e.load, 1. 1700 ; CHECK: fence.sc.sys 1701 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1702 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 4 1703 1704 ; CHECK: fence.sc.sys 1705 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1706 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("device") seq_cst, align 8 1707 %f.add = fadd double %f.load, 1. 1708 ; CHECK: fence.sc.sys 1709 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1710 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("device") seq_cst, align 8 1711 1712 ret void 1713} 1714 1715; CHECK-LABEL: global_seq_cst_cta 1716define void @global_seq_cst_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1717 ; CHECK: fence.sc.cta 1718 ; CHECK: ld.acquire.cta.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1719 %a.load = load atomic i8, ptr addrspace(1) %a syncscope("block") seq_cst, align 1 1720 %a.add = add i8 %a.load, 1 1721 ; CHECK: fence.sc.cta 1722 ; CHECK: st.release.cta.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1723 store atomic i8 %a.add, ptr addrspace(1) %a syncscope("block") seq_cst, align 1 1724 1725 ; CHECK: fence.sc.cta 1726 ; CHECK: ld.acquire.cta.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1727 %b.load = load atomic i16, ptr addrspace(1) %b syncscope("block") seq_cst, align 2 1728 %b.add = add i16 %b.load, 1 1729 ; CHECK: fence.sc.cta 1730 ; CHECK: st.release.cta.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1731 store atomic i16 %b.add, ptr addrspace(1) %b syncscope("block") seq_cst, align 2 1732 1733 ; CHECK: fence.sc.cta 1734 ; CHECK: ld.acquire.cta.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1735 %c.load = load atomic i32, ptr addrspace(1) %c syncscope("block") seq_cst, align 4 1736 %c.add = add i32 %c.load, 1 1737 ; CHECK: fence.sc.cta 1738 ; CHECK: st.release.cta.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1739 store atomic i32 %c.add, ptr addrspace(1) %c syncscope("block") seq_cst, align 4 1740 1741 ; CHECK: fence.sc.cta 1742 ; CHECK: ld.acquire.cta.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1743 %d.load = load atomic i64, ptr addrspace(1) %d syncscope("block") seq_cst, align 8 1744 %d.add = add i64 %d.load, 1 1745 ; CHECK: fence.sc.cta 1746 ; CHECK: st.release.cta.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1747 store atomic i64 %d.add, ptr addrspace(1) %d syncscope("block") seq_cst, align 8 1748 1749 ; CHECK: fence.sc.cta 1750 ; CHECK: ld.acquire.cta.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1751 %e.load = load atomic float, ptr addrspace(1) %e syncscope("block") seq_cst, align 4 1752 %e.add = fadd float %e.load, 1. 1753 ; CHECK: fence.sc.cta 1754 ; CHECK: st.release.cta.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1755 store atomic float %e.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 4 1756 1757 ; CHECK: fence.sc.cta 1758 ; CHECK: ld.acquire.cta.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1759 %f.load = load atomic double, ptr addrspace(1) %e syncscope("block") seq_cst, align 8 1760 %f.add = fadd double %f.load, 1. 1761 ; CHECK: fence.sc.cta 1762 ; CHECK: st.release.cta.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1763 store atomic double %f.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 8 1764 1765 ret void 1766} 1767 1768; CHECK-LABEL: global_seq_cst_volatile_cta 1769define void @global_seq_cst_volatile_cta(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr { 1770 ; CHECK: fence.sc.sys 1771 ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1772 %a.load = load atomic volatile i8, ptr addrspace(1) %a syncscope("block") seq_cst, align 1 1773 %a.add = add i8 %a.load, 1 1774 ; CHECK: fence.sc.sys 1775 ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1776 store atomic volatile i8 %a.add, ptr addrspace(1) %a syncscope("block") seq_cst, align 1 1777 1778 ; CHECK: fence.sc.sys 1779 ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1780 %b.load = load atomic volatile i16, ptr addrspace(1) %b syncscope("block") seq_cst, align 2 1781 %b.add = add i16 %b.load, 1 1782 ; CHECK: fence.sc.sys 1783 ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1784 store atomic volatile i16 %b.add, ptr addrspace(1) %b syncscope("block") seq_cst, align 2 1785 1786 ; CHECK: fence.sc.sys 1787 ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1788 %c.load = load atomic volatile i32, ptr addrspace(1) %c syncscope("block") seq_cst, align 4 1789 %c.add = add i32 %c.load, 1 1790 ; CHECK: fence.sc.sys 1791 ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1792 store atomic volatile i32 %c.add, ptr addrspace(1) %c syncscope("block") seq_cst, align 4 1793 1794 ; CHECK: fence.sc.sys 1795 ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1796 %d.load = load atomic volatile i64, ptr addrspace(1) %d syncscope("block") seq_cst, align 8 1797 %d.add = add i64 %d.load, 1 1798 ; CHECK: fence.sc.sys 1799 ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1800 store atomic volatile i64 %d.add, ptr addrspace(1) %d syncscope("block") seq_cst, align 8 1801 1802 ; CHECK: fence.sc.sys 1803 ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1804 %e.load = load atomic volatile float, ptr addrspace(1) %e syncscope("block") seq_cst, align 4 1805 %e.add = fadd float %e.load, 1. 1806 ; CHECK: fence.sc.sys 1807 ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1808 store atomic volatile float %e.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 4 1809 1810 ; CHECK: fence.sc.sys 1811 ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1812 %f.load = load atomic volatile double, ptr addrspace(1) %e syncscope("block") seq_cst, align 8 1813 %f.add = fadd double %f.load, 1. 1814 ; CHECK: fence.sc.sys 1815 ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1816 store atomic volatile double %f.add, ptr addrspace(1) %e syncscope("block") seq_cst, align 8 1817 1818 ret void 1819} 1820 1821;; shared statespace 1822 1823; CHECK-LABEL: shared_unordered_gpu 1824define void @shared_unordered_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 1825 ; CHECK: ld.relaxed.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1826 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") unordered, align 1 1827 %a.add = add i8 %a.load, 1 1828 ; CHECK: st.relaxed.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1829 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") unordered, align 1 1830 1831 ; CHECK: ld.relaxed.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1832 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") unordered, align 2 1833 %b.add = add i16 %b.load, 1 1834 ; CHECK: st.relaxed.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1835 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") unordered, align 2 1836 1837 ; CHECK: ld.relaxed.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1838 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") unordered, align 4 1839 %c.add = add i32 %c.load, 1 1840 ; CHECK: st.relaxed.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1841 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") unordered, align 4 1842 1843 ; CHECK: ld.relaxed.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1844 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") unordered, align 8 1845 %d.add = add i64 %d.load, 1 1846 ; CHECK: st.relaxed.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1847 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") unordered, align 8 1848 1849 ; CHECK: ld.relaxed.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1850 %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") unordered, align 4 1851 %e.add = fadd float %e.load, 1. 1852 ; CHECK: st.relaxed.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1853 store atomic float %e.add, ptr addrspace(3) %e syncscope("device") unordered, align 4 1854 1855 ; CHECK: ld.relaxed.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1856 %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") unordered, align 8 1857 %f.add = fadd double %f.load, 1. 1858 ; CHECK: st.relaxed.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1859 store atomic double %f.add, ptr addrspace(3) %e syncscope("device") unordered, align 8 1860 1861 ret void 1862} 1863 1864; CHECK-LABEL: shared_unordered_volatile_gpu 1865define void @shared_unordered_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 1866 ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1867 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") unordered, align 1 1868 %a.add = add i8 %a.load, 1 1869 ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1870 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") unordered, align 1 1871 1872 ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1873 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") unordered, align 2 1874 %b.add = add i16 %b.load, 1 1875 ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1876 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") unordered, align 2 1877 1878 ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1879 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") unordered, align 4 1880 %c.add = add i32 %c.load, 1 1881 ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1882 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") unordered, align 4 1883 1884 ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1885 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") unordered, align 8 1886 %d.add = add i64 %d.load, 1 1887 ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1888 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") unordered, align 8 1889 1890 ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1891 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") unordered, align 4 1892 %e.add = fadd float %e.load, 1. 1893 ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1894 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") unordered, align 4 1895 1896 ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1897 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") unordered, align 8 1898 %f.add = fadd double %f.load, 1. 1899 ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1900 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") unordered, align 8 1901 1902 ret void 1903} 1904 1905; CHECK-LABEL: shared_unordered_cta 1906define void @shared_unordered_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 1907 ; CHECK: ld.relaxed.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1908 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") unordered, align 1 1909 %a.add = add i8 %a.load, 1 1910 ; CHECK: st.relaxed.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1911 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") unordered, align 1 1912 1913 ; CHECK: ld.relaxed.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1914 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") unordered, align 2 1915 %b.add = add i16 %b.load, 1 1916 ; CHECK: st.relaxed.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1917 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") unordered, align 2 1918 1919 ; CHECK: ld.relaxed.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1920 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") unordered, align 4 1921 %c.add = add i32 %c.load, 1 1922 ; CHECK: st.relaxed.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1923 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") unordered, align 4 1924 1925 ; CHECK: ld.relaxed.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1926 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") unordered, align 8 1927 %d.add = add i64 %d.load, 1 1928 ; CHECK: st.relaxed.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1929 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") unordered, align 8 1930 1931 ; CHECK: ld.relaxed.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1932 %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") unordered, align 4 1933 %e.add = fadd float %e.load, 1. 1934 ; CHECK: st.relaxed.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1935 store atomic float %e.add, ptr addrspace(3) %e syncscope("block") unordered, align 4 1936 1937 ; CHECK: ld.relaxed.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1938 %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") unordered, align 8 1939 %f.add = fadd double %f.load, 1. 1940 ; CHECK: st.relaxed.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1941 store atomic double %f.add, ptr addrspace(3) %e syncscope("block") unordered, align 8 1942 1943 ret void 1944} 1945 1946; CHECK-LABEL: shared_unordered_volatile_cta 1947define void @shared_unordered_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 1948 ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1949 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") unordered, align 1 1950 %a.add = add i8 %a.load, 1 1951 ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1952 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") unordered, align 1 1953 1954 ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1955 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") unordered, align 2 1956 %b.add = add i16 %b.load, 1 1957 ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1958 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") unordered, align 2 1959 1960 ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 1961 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") unordered, align 4 1962 %c.add = add i32 %c.load, 1 1963 ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 1964 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") unordered, align 4 1965 1966 ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 1967 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") unordered, align 8 1968 %d.add = add i64 %d.load, 1 1969 ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 1970 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") unordered, align 8 1971 1972 ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 1973 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") unordered, align 4 1974 %e.add = fadd float %e.load, 1. 1975 ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 1976 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") unordered, align 4 1977 1978 ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 1979 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") unordered, align 8 1980 %f.add = fadd double %f.load, 1. 1981 ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 1982 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") unordered, align 8 1983 1984 ret void 1985} 1986 1987; CHECK-LABEL: shared_monotonic_gpu 1988define void @shared_monotonic_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 1989 ; CHECK: ld.relaxed.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1990 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") monotonic, align 1 1991 %a.add = add i8 %a.load, 1 1992 ; CHECK: st.relaxed.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1993 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") monotonic, align 1 1994 1995 ; CHECK: ld.relaxed.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 1996 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") monotonic, align 2 1997 %b.add = add i16 %b.load, 1 1998 ; CHECK: st.relaxed.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 1999 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") monotonic, align 2 2000 2001 ; CHECK: ld.relaxed.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2002 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") monotonic, align 4 2003 %c.add = add i32 %c.load, 1 2004 ; CHECK: st.relaxed.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2005 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") monotonic, align 4 2006 2007 ; CHECK: ld.relaxed.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2008 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") monotonic, align 8 2009 %d.add = add i64 %d.load, 1 2010 ; CHECK: st.relaxed.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2011 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") monotonic, align 8 2012 2013 ; CHECK: ld.relaxed.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2014 %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") monotonic, align 4 2015 %e.add = fadd float %e.load, 1. 2016 ; CHECK: st.relaxed.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2017 store atomic float %e.add, ptr addrspace(3) %e syncscope("device") monotonic, align 4 2018 2019 ; CHECK: ld.relaxed.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2020 %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") monotonic, align 8 2021 %f.add = fadd double %f.load, 1. 2022 ; CHECK: st.relaxed.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2023 store atomic double %f.add, ptr addrspace(3) %e syncscope("device") monotonic, align 8 2024 2025 ret void 2026} 2027 2028; CHECK-LABEL: shared_monotonic_volatile_gpu 2029define void @shared_monotonic_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2030 ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2031 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") monotonic, align 1 2032 %a.add = add i8 %a.load, 1 2033 ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2034 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") monotonic, align 1 2035 2036 ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2037 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") monotonic, align 2 2038 %b.add = add i16 %b.load, 1 2039 ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2040 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") monotonic, align 2 2041 2042 ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2043 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") monotonic, align 4 2044 %c.add = add i32 %c.load, 1 2045 ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2046 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") monotonic, align 4 2047 2048 ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2049 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") monotonic, align 8 2050 %d.add = add i64 %d.load, 1 2051 ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2052 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") monotonic, align 8 2053 2054 ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2055 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") monotonic, align 4 2056 %e.add = fadd float %e.load, 1. 2057 ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2058 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") monotonic, align 4 2059 2060 ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2061 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") monotonic, align 8 2062 %f.add = fadd double %f.load, 1. 2063 ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2064 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") monotonic, align 8 2065 2066 ret void 2067} 2068 2069; CHECK-LABEL: shared_monotonic_cta 2070define void @shared_monotonic_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2071 ; CHECK: ld.relaxed.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2072 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") monotonic, align 1 2073 %a.add = add i8 %a.load, 1 2074 ; CHECK: st.relaxed.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2075 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") monotonic, align 1 2076 2077 ; CHECK: ld.relaxed.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2078 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") monotonic, align 2 2079 %b.add = add i16 %b.load, 1 2080 ; CHECK: st.relaxed.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2081 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") monotonic, align 2 2082 2083 ; CHECK: ld.relaxed.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2084 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") monotonic, align 4 2085 %c.add = add i32 %c.load, 1 2086 ; CHECK: st.relaxed.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2087 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") monotonic, align 4 2088 2089 ; CHECK: ld.relaxed.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2090 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") monotonic, align 8 2091 %d.add = add i64 %d.load, 1 2092 ; CHECK: st.relaxed.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2093 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") monotonic, align 8 2094 2095 ; CHECK: ld.relaxed.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2096 %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") monotonic, align 4 2097 %e.add = fadd float %e.load, 1. 2098 ; CHECK: st.relaxed.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2099 store atomic float %e.add, ptr addrspace(3) %e syncscope("block") monotonic, align 4 2100 2101 ; CHECK: ld.relaxed.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2102 %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") monotonic, align 8 2103 %f.add = fadd double %f.load, 1. 2104 ; CHECK: st.relaxed.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2105 store atomic double %f.add, ptr addrspace(3) %e syncscope("block") monotonic, align 8 2106 2107 ret void 2108} 2109 2110; CHECK-LABEL: shared_monotonic_volatile_cta 2111define void @shared_monotonic_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2112 ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2113 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") monotonic, align 1 2114 %a.add = add i8 %a.load, 1 2115 ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2116 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") monotonic, align 1 2117 2118 ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2119 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") monotonic, align 2 2120 %b.add = add i16 %b.load, 1 2121 ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2122 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") monotonic, align 2 2123 2124 ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2125 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") monotonic, align 4 2126 %c.add = add i32 %c.load, 1 2127 ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2128 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") monotonic, align 4 2129 2130 ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2131 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") monotonic, align 8 2132 %d.add = add i64 %d.load, 1 2133 ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2134 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") monotonic, align 8 2135 2136 ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2137 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") monotonic, align 4 2138 %e.add = fadd float %e.load, 1. 2139 ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2140 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") monotonic, align 4 2141 2142 ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2143 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") monotonic, align 8 2144 %f.add = fadd double %f.load, 1. 2145 ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2146 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") monotonic, align 8 2147 2148 ret void 2149} 2150 2151; CHECK-LABEL: shared_acq_rel_sys 2152define void @shared_acq_rel_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2153 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2154 %a.load = load atomic i8, ptr addrspace(3) %a acquire, align 1 2155 %a.add = add i8 %a.load, 1 2156 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2157 store atomic i8 %a.add, ptr addrspace(3) %a release, align 1 2158 2159 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2160 %b.load = load atomic i16, ptr addrspace(3) %b acquire, align 2 2161 %b.add = add i16 %b.load, 1 2162 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2163 store atomic i16 %b.add, ptr addrspace(3) %b release, align 2 2164 2165 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2166 %c.load = load atomic i32, ptr addrspace(3) %c acquire, align 4 2167 %c.add = add i32 %c.load, 1 2168 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2169 store atomic i32 %c.add, ptr addrspace(3) %c release, align 4 2170 2171 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2172 %d.load = load atomic i64, ptr addrspace(3) %d acquire, align 8 2173 %d.add = add i64 %d.load, 1 2174 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2175 store atomic i64 %d.add, ptr addrspace(3) %d release, align 8 2176 2177 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2178 %e.load = load atomic float, ptr addrspace(3) %e acquire, align 4 2179 %e.add = fadd float %e.load, 1. 2180 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2181 store atomic float %e.add, ptr addrspace(3) %e release, align 4 2182 2183 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2184 %f.load = load atomic double, ptr addrspace(3) %e acquire, align 8 2185 %f.add = fadd double %f.load, 1. 2186 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2187 store atomic double %f.add, ptr addrspace(3) %e release, align 8 2188 2189 ret void 2190} 2191 2192; CHECK-LABEL: shared_acq_rel_volatile_sys 2193define void @shared_acq_rel_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2194 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2195 %a.load = load atomic volatile i8, ptr addrspace(3) %a acquire, align 1 2196 %a.add = add i8 %a.load, 1 2197 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2198 store atomic volatile i8 %a.add, ptr addrspace(3) %a release, align 1 2199 2200 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2201 %b.load = load atomic volatile i16, ptr addrspace(3) %b acquire, align 2 2202 %b.add = add i16 %b.load, 1 2203 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2204 store atomic volatile i16 %b.add, ptr addrspace(3) %b release, align 2 2205 2206 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2207 %c.load = load atomic volatile i32, ptr addrspace(3) %c acquire, align 4 2208 %c.add = add i32 %c.load, 1 2209 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2210 store atomic volatile i32 %c.add, ptr addrspace(3) %c release, align 4 2211 2212 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2213 %d.load = load atomic volatile i64, ptr addrspace(3) %d acquire, align 8 2214 %d.add = add i64 %d.load, 1 2215 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2216 store atomic volatile i64 %d.add, ptr addrspace(3) %d release, align 8 2217 2218 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2219 %e.load = load atomic volatile float, ptr addrspace(3) %e acquire, align 4 2220 %e.add = fadd float %e.load, 1. 2221 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2222 store atomic volatile float %e.add, ptr addrspace(3) %e release, align 4 2223 2224 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2225 %f.load = load atomic volatile double, ptr addrspace(3) %e acquire, align 8 2226 %f.add = fadd double %f.load, 1. 2227 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2228 store atomic volatile double %f.add, ptr addrspace(3) %e release, align 8 2229 2230 ret void 2231} 2232 2233; CHECK-LABEL: shared_acq_rel_gpu 2234define void @shared_acq_rel_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2235 ; CHECK: ld.acquire.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2236 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") acquire, align 1 2237 %a.add = add i8 %a.load, 1 2238 ; CHECK: st.release.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2239 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") release, align 1 2240 2241 ; CHECK: ld.acquire.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2242 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") acquire, align 2 2243 %b.add = add i16 %b.load, 1 2244 ; CHECK: st.release.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2245 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") release, align 2 2246 2247 ; CHECK: ld.acquire.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2248 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") acquire, align 4 2249 %c.add = add i32 %c.load, 1 2250 ; CHECK: st.release.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2251 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") release, align 4 2252 2253 ; CHECK: ld.acquire.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2254 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") acquire, align 8 2255 %d.add = add i64 %d.load, 1 2256 ; CHECK: st.release.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2257 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") release, align 8 2258 2259 ; CHECK: ld.acquire.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2260 %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") acquire, align 4 2261 %e.add = fadd float %e.load, 1. 2262 ; CHECK: st.release.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2263 store atomic float %e.add, ptr addrspace(3) %e syncscope("device") release, align 4 2264 2265 ; CHECK: ld.acquire.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2266 %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") acquire, align 8 2267 %f.add = fadd double %f.load, 1. 2268 ; CHECK: st.release.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2269 store atomic double %f.add, ptr addrspace(3) %e syncscope("device") release, align 8 2270 2271 ret void 2272} 2273 2274; CHECK-LABEL: shared_acq_rel_volatile_gpu 2275define void @shared_acq_rel_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2276 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2277 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") acquire, align 1 2278 %a.add = add i8 %a.load, 1 2279 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2280 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") release, align 1 2281 2282 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2283 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") acquire, align 2 2284 %b.add = add i16 %b.load, 1 2285 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2286 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") release, align 2 2287 2288 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2289 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") acquire, align 4 2290 %c.add = add i32 %c.load, 1 2291 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2292 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") release, align 4 2293 2294 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2295 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") acquire, align 8 2296 %d.add = add i64 %d.load, 1 2297 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2298 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") release, align 8 2299 2300 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2301 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") acquire, align 4 2302 %e.add = fadd float %e.load, 1. 2303 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2304 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") release, align 4 2305 2306 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2307 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") acquire, align 8 2308 %f.add = fadd double %f.load, 1. 2309 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2310 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") release, align 8 2311 2312 ret void 2313} 2314 2315; CHECK-LABEL: shared_acq_rel_cta 2316define void @shared_acq_rel_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2317 ; CHECK: ld.acquire.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2318 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") acquire, align 1 2319 %a.add = add i8 %a.load, 1 2320 ; CHECK: st.release.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2321 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") release, align 1 2322 2323 ; CHECK: ld.acquire.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2324 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") acquire, align 2 2325 %b.add = add i16 %b.load, 1 2326 ; CHECK: st.release.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2327 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") release, align 2 2328 2329 ; CHECK: ld.acquire.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2330 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") acquire, align 4 2331 %c.add = add i32 %c.load, 1 2332 ; CHECK: st.release.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2333 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") release, align 4 2334 2335 ; CHECK: ld.acquire.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2336 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") acquire, align 8 2337 %d.add = add i64 %d.load, 1 2338 ; CHECK: st.release.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2339 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") release, align 8 2340 2341 ; CHECK: ld.acquire.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2342 %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") acquire, align 4 2343 %e.add = fadd float %e.load, 1. 2344 ; CHECK: st.release.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2345 store atomic float %e.add, ptr addrspace(3) %e syncscope("block") release, align 4 2346 2347 ; CHECK: ld.acquire.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2348 %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") acquire, align 8 2349 %f.add = fadd double %f.load, 1. 2350 ; CHECK: st.release.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2351 store atomic double %f.add, ptr addrspace(3) %e syncscope("block") release, align 8 2352 2353 ret void 2354} 2355 2356; CHECK-LABEL: shared_acq_rel_volatile_cta 2357define void @shared_acq_rel_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2358 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2359 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") acquire, align 1 2360 %a.add = add i8 %a.load, 1 2361 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2362 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") release, align 1 2363 2364 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2365 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") acquire, align 2 2366 %b.add = add i16 %b.load, 1 2367 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2368 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") release, align 2 2369 2370 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2371 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") acquire, align 4 2372 %c.add = add i32 %c.load, 1 2373 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2374 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") release, align 4 2375 2376 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2377 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") acquire, align 8 2378 %d.add = add i64 %d.load, 1 2379 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2380 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") release, align 8 2381 2382 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2383 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") acquire, align 4 2384 %e.add = fadd float %e.load, 1. 2385 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2386 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") release, align 4 2387 2388 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2389 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") acquire, align 8 2390 %f.add = fadd double %f.load, 1. 2391 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2392 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") release, align 8 2393 2394 ret void 2395} 2396 2397; CHECK-LABEL: shared_seq_cst_sys 2398define void @shared_seq_cst_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2399 ; CHECK: fence.sc.sys 2400 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2401 %a.load = load atomic i8, ptr addrspace(3) %a seq_cst, align 1 2402 %a.add = add i8 %a.load, 1 2403 ; CHECK: fence.sc.sys 2404 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2405 store atomic i8 %a.add, ptr addrspace(3) %a seq_cst, align 1 2406 2407 ; CHECK: fence.sc.sys 2408 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2409 %b.load = load atomic i16, ptr addrspace(3) %b seq_cst, align 2 2410 %b.add = add i16 %b.load, 1 2411 ; CHECK: fence.sc.sys 2412 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2413 store atomic i16 %b.add, ptr addrspace(3) %b seq_cst, align 2 2414 2415 ; CHECK: fence.sc.sys 2416 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2417 %c.load = load atomic i32, ptr addrspace(3) %c seq_cst, align 4 2418 %c.add = add i32 %c.load, 1 2419 ; CHECK: fence.sc.sys 2420 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2421 store atomic i32 %c.add, ptr addrspace(3) %c seq_cst, align 4 2422 2423 ; CHECK: fence.sc.sys 2424 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2425 %d.load = load atomic i64, ptr addrspace(3) %d seq_cst, align 8 2426 %d.add = add i64 %d.load, 1 2427 ; CHECK: fence.sc.sys 2428 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2429 store atomic i64 %d.add, ptr addrspace(3) %d seq_cst, align 8 2430 2431 ; CHECK: fence.sc.sys 2432 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2433 %e.load = load atomic float, ptr addrspace(3) %e seq_cst, align 4 2434 %e.add = fadd float %e.load, 1. 2435 ; CHECK: fence.sc.sys 2436 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2437 store atomic float %e.add, ptr addrspace(3) %e seq_cst, align 4 2438 2439 ; CHECK: fence.sc.sys 2440 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2441 %f.load = load atomic double, ptr addrspace(3) %e seq_cst, align 8 2442 %f.add = fadd double %f.load, 1. 2443 ; CHECK: fence.sc.sys 2444 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2445 store atomic double %f.add, ptr addrspace(3) %e seq_cst, align 8 2446 2447 ret void 2448} 2449 2450; CHECK-LABEL: shared_seq_cst_volatile_sys 2451define void @shared_seq_cst_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2452 ; CHECK: fence.sc.sys 2453 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2454 %a.load = load atomic volatile i8, ptr addrspace(3) %a seq_cst, align 1 2455 %a.add = add i8 %a.load, 1 2456 ; CHECK: fence.sc.sys 2457 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2458 store atomic volatile i8 %a.add, ptr addrspace(3) %a seq_cst, align 1 2459 2460 ; CHECK: fence.sc.sys 2461 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2462 %b.load = load atomic volatile i16, ptr addrspace(3) %b seq_cst, align 2 2463 %b.add = add i16 %b.load, 1 2464 ; CHECK: fence.sc.sys 2465 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2466 store atomic volatile i16 %b.add, ptr addrspace(3) %b seq_cst, align 2 2467 2468 ; CHECK: fence.sc.sys 2469 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2470 %c.load = load atomic volatile i32, ptr addrspace(3) %c seq_cst, align 4 2471 %c.add = add i32 %c.load, 1 2472 ; CHECK: fence.sc.sys 2473 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2474 store atomic volatile i32 %c.add, ptr addrspace(3) %c seq_cst, align 4 2475 2476 ; CHECK: fence.sc.sys 2477 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2478 %d.load = load atomic volatile i64, ptr addrspace(3) %d seq_cst, align 8 2479 %d.add = add i64 %d.load, 1 2480 ; CHECK: fence.sc.sys 2481 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2482 store atomic volatile i64 %d.add, ptr addrspace(3) %d seq_cst, align 8 2483 2484 ; CHECK: fence.sc.sys 2485 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2486 %e.load = load atomic volatile float, ptr addrspace(3) %e seq_cst, align 4 2487 %e.add = fadd float %e.load, 1. 2488 ; CHECK: fence.sc.sys 2489 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2490 store atomic volatile float %e.add, ptr addrspace(3) %e seq_cst, align 4 2491 2492 ; CHECK: fence.sc.sys 2493 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2494 %f.load = load atomic volatile double, ptr addrspace(3) %e seq_cst, align 8 2495 %f.add = fadd double %f.load, 1. 2496 ; CHECK: fence.sc.sys 2497 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2498 store atomic volatile double %f.add, ptr addrspace(3) %e seq_cst, align 8 2499 2500 ret void 2501} 2502 2503; CHECK-LABEL: shared_seq_cst_gpu 2504define void @shared_seq_cst_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2505 ; CHECK: fence.sc.gpu 2506 ; CHECK: ld.acquire.gpu.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2507 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("device") seq_cst, align 1 2508 %a.add = add i8 %a.load, 1 2509 ; CHECK: fence.sc.gpu 2510 ; CHECK: st.release.gpu.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2511 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("device") seq_cst, align 1 2512 2513 ; CHECK: fence.sc.gpu 2514 ; CHECK: ld.acquire.gpu.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2515 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("device") seq_cst, align 2 2516 %b.add = add i16 %b.load, 1 2517 ; CHECK: fence.sc.gpu 2518 ; CHECK: st.release.gpu.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2519 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("device") seq_cst, align 2 2520 2521 ; CHECK: fence.sc.gpu 2522 ; CHECK: ld.acquire.gpu.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2523 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("device") seq_cst, align 4 2524 %c.add = add i32 %c.load, 1 2525 ; CHECK: fence.sc.gpu 2526 ; CHECK: st.release.gpu.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2527 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("device") seq_cst, align 4 2528 2529 ; CHECK: fence.sc.gpu 2530 ; CHECK: ld.acquire.gpu.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2531 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("device") seq_cst, align 8 2532 %d.add = add i64 %d.load, 1 2533 ; CHECK: fence.sc.gpu 2534 ; CHECK: st.release.gpu.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2535 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("device") seq_cst, align 8 2536 2537 ; CHECK: fence.sc.gpu 2538 ; CHECK: ld.acquire.gpu.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2539 %e.load = load atomic float, ptr addrspace(3) %e syncscope("device") seq_cst, align 4 2540 %e.add = fadd float %e.load, 1. 2541 ; CHECK: fence.sc.gpu 2542 ; CHECK: st.release.gpu.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2543 store atomic float %e.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 4 2544 2545 ; CHECK: fence.sc.gpu 2546 ; CHECK: ld.acquire.gpu.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2547 %f.load = load atomic double, ptr addrspace(3) %e syncscope("device") seq_cst, align 8 2548 %f.add = fadd double %f.load, 1. 2549 ; CHECK: fence.sc.gpu 2550 ; CHECK: st.release.gpu.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2551 store atomic double %f.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 8 2552 2553 ret void 2554} 2555 2556; CHECK-LABEL: shared_seq_cst_volatile_gpu 2557define void @shared_seq_cst_volatile_gpu(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2558 ; CHECK: fence.sc.sys 2559 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2560 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("device") seq_cst, align 1 2561 %a.add = add i8 %a.load, 1 2562 ; CHECK: fence.sc.sys 2563 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2564 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("device") seq_cst, align 1 2565 2566 ; CHECK: fence.sc.sys 2567 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2568 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("device") seq_cst, align 2 2569 %b.add = add i16 %b.load, 1 2570 ; CHECK: fence.sc.sys 2571 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2572 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("device") seq_cst, align 2 2573 2574 ; CHECK: fence.sc.sys 2575 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2576 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("device") seq_cst, align 4 2577 %c.add = add i32 %c.load, 1 2578 ; CHECK: fence.sc.sys 2579 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2580 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("device") seq_cst, align 4 2581 2582 ; CHECK: fence.sc.sys 2583 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2584 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("device") seq_cst, align 8 2585 %d.add = add i64 %d.load, 1 2586 ; CHECK: fence.sc.sys 2587 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2588 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("device") seq_cst, align 8 2589 2590 ; CHECK: fence.sc.sys 2591 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2592 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("device") seq_cst, align 4 2593 %e.add = fadd float %e.load, 1. 2594 ; CHECK: fence.sc.sys 2595 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2596 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 4 2597 2598 ; CHECK: fence.sc.sys 2599 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2600 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("device") seq_cst, align 8 2601 %f.add = fadd double %f.load, 1. 2602 ; CHECK: fence.sc.sys 2603 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2604 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("device") seq_cst, align 8 2605 2606 ret void 2607} 2608 2609; CHECK-LABEL: shared_seq_cst_cta 2610define void @shared_seq_cst_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2611 ; CHECK: fence.sc.cta 2612 ; CHECK: ld.acquire.cta.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2613 %a.load = load atomic i8, ptr addrspace(3) %a syncscope("block") seq_cst, align 1 2614 %a.add = add i8 %a.load, 1 2615 ; CHECK: fence.sc.cta 2616 ; CHECK: st.release.cta.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2617 store atomic i8 %a.add, ptr addrspace(3) %a syncscope("block") seq_cst, align 1 2618 2619 ; CHECK: fence.sc.cta 2620 ; CHECK: ld.acquire.cta.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2621 %b.load = load atomic i16, ptr addrspace(3) %b syncscope("block") seq_cst, align 2 2622 %b.add = add i16 %b.load, 1 2623 ; CHECK: fence.sc.cta 2624 ; CHECK: st.release.cta.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2625 store atomic i16 %b.add, ptr addrspace(3) %b syncscope("block") seq_cst, align 2 2626 2627 ; CHECK: fence.sc.cta 2628 ; CHECK: ld.acquire.cta.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2629 %c.load = load atomic i32, ptr addrspace(3) %c syncscope("block") seq_cst, align 4 2630 %c.add = add i32 %c.load, 1 2631 ; CHECK: fence.sc.cta 2632 ; CHECK: st.release.cta.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2633 store atomic i32 %c.add, ptr addrspace(3) %c syncscope("block") seq_cst, align 4 2634 2635 ; CHECK: fence.sc.cta 2636 ; CHECK: ld.acquire.cta.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2637 %d.load = load atomic i64, ptr addrspace(3) %d syncscope("block") seq_cst, align 8 2638 %d.add = add i64 %d.load, 1 2639 ; CHECK: fence.sc.cta 2640 ; CHECK: st.release.cta.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2641 store atomic i64 %d.add, ptr addrspace(3) %d syncscope("block") seq_cst, align 8 2642 2643 ; CHECK: fence.sc.cta 2644 ; CHECK: ld.acquire.cta.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2645 %e.load = load atomic float, ptr addrspace(3) %e syncscope("block") seq_cst, align 4 2646 %e.add = fadd float %e.load, 1. 2647 ; CHECK: fence.sc.cta 2648 ; CHECK: st.release.cta.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2649 store atomic float %e.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 4 2650 2651 ; CHECK: fence.sc.cta 2652 ; CHECK: ld.acquire.cta.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2653 %f.load = load atomic double, ptr addrspace(3) %e syncscope("block") seq_cst, align 8 2654 %f.add = fadd double %f.load, 1. 2655 ; CHECK: fence.sc.cta 2656 ; CHECK: st.release.cta.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2657 store atomic double %f.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 8 2658 2659 ret void 2660} 2661 2662; CHECK-LABEL: shared_seq_cst_volatile_cta 2663define void @shared_seq_cst_volatile_cta(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr { 2664 ; CHECK: fence.sc.sys 2665 ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2666 %a.load = load atomic volatile i8, ptr addrspace(3) %a syncscope("block") seq_cst, align 1 2667 %a.add = add i8 %a.load, 1 2668 ; CHECK: fence.sc.sys 2669 ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2670 store atomic volatile i8 %a.add, ptr addrspace(3) %a syncscope("block") seq_cst, align 1 2671 2672 ; CHECK: fence.sc.sys 2673 ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2674 %b.load = load atomic volatile i16, ptr addrspace(3) %b syncscope("block") seq_cst, align 2 2675 %b.add = add i16 %b.load, 1 2676 ; CHECK: fence.sc.sys 2677 ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2678 store atomic volatile i16 %b.add, ptr addrspace(3) %b syncscope("block") seq_cst, align 2 2679 2680 ; CHECK: fence.sc.sys 2681 ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2682 %c.load = load atomic volatile i32, ptr addrspace(3) %c syncscope("block") seq_cst, align 4 2683 %c.add = add i32 %c.load, 1 2684 ; CHECK: fence.sc.sys 2685 ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2686 store atomic volatile i32 %c.add, ptr addrspace(3) %c syncscope("block") seq_cst, align 4 2687 2688 ; CHECK: fence.sc.sys 2689 ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2690 %d.load = load atomic volatile i64, ptr addrspace(3) %d syncscope("block") seq_cst, align 8 2691 %d.add = add i64 %d.load, 1 2692 ; CHECK: fence.sc.sys 2693 ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2694 store atomic volatile i64 %d.add, ptr addrspace(3) %d syncscope("block") seq_cst, align 8 2695 2696 ; CHECK: fence.sc.sys 2697 ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2698 %e.load = load atomic volatile float, ptr addrspace(3) %e syncscope("block") seq_cst, align 4 2699 %e.add = fadd float %e.load, 1. 2700 ; CHECK: fence.sc.sys 2701 ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2702 store atomic volatile float %e.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 4 2703 2704 ; CHECK: fence.sc.sys 2705 ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2706 %f.load = load atomic volatile double, ptr addrspace(3) %e syncscope("block") seq_cst, align 8 2707 %f.add = fadd double %f.load, 1. 2708 ; CHECK: fence.sc.sys 2709 ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2710 store atomic volatile double %f.add, ptr addrspace(3) %e syncscope("block") seq_cst, align 8 2711 2712 ret void 2713} 2714 2715;; local statespace 2716 2717; CHECK-LABEL: local_unordered_gpu 2718define void @local_unordered_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 2719 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2720 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") unordered, align 1 2721 %a.add = add i8 %a.load, 1 2722 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2723 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") unordered, align 1 2724 2725 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2726 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") unordered, align 2 2727 %b.add = add i16 %b.load, 1 2728 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2729 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") unordered, align 2 2730 2731 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2732 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") unordered, align 4 2733 %c.add = add i32 %c.load, 1 2734 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2735 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") unordered, align 4 2736 2737 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2738 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") unordered, align 8 2739 %d.add = add i64 %d.load, 1 2740 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2741 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") unordered, align 8 2742 2743 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2744 %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") unordered, align 4 2745 %e.add = fadd float %e.load, 1. 2746 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2747 store atomic float %e.add, ptr addrspace(5) %e syncscope("device") unordered, align 4 2748 2749 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2750 %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") unordered, align 8 2751 %f.add = fadd double %f.load, 1. 2752 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2753 store atomic double %f.add, ptr addrspace(5) %e syncscope("device") unordered, align 8 2754 2755 ret void 2756} 2757 2758; CHECK-LABEL: local_unordered_volatile_gpu 2759define void @local_unordered_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 2760 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2761 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") unordered, align 1 2762 %a.add = add i8 %a.load, 1 2763 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2764 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") unordered, align 1 2765 2766 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2767 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") unordered, align 2 2768 %b.add = add i16 %b.load, 1 2769 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2770 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") unordered, align 2 2771 2772 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2773 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") unordered, align 4 2774 %c.add = add i32 %c.load, 1 2775 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2776 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") unordered, align 4 2777 2778 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2779 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") unordered, align 8 2780 %d.add = add i64 %d.load, 1 2781 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2782 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") unordered, align 8 2783 2784 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2785 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") unordered, align 4 2786 %e.add = fadd float %e.load, 1. 2787 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2788 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") unordered, align 4 2789 2790 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2791 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") unordered, align 8 2792 %f.add = fadd double %f.load, 1. 2793 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2794 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") unordered, align 8 2795 2796 ret void 2797} 2798 2799; CHECK-LABEL: local_unordered_cta 2800define void @local_unordered_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 2801 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2802 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") unordered, align 1 2803 %a.add = add i8 %a.load, 1 2804 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2805 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") unordered, align 1 2806 2807 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2808 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") unordered, align 2 2809 %b.add = add i16 %b.load, 1 2810 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2811 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") unordered, align 2 2812 2813 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2814 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") unordered, align 4 2815 %c.add = add i32 %c.load, 1 2816 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2817 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") unordered, align 4 2818 2819 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2820 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") unordered, align 8 2821 %d.add = add i64 %d.load, 1 2822 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2823 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") unordered, align 8 2824 2825 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2826 %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") unordered, align 4 2827 %e.add = fadd float %e.load, 1. 2828 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2829 store atomic float %e.add, ptr addrspace(5) %e syncscope("block") unordered, align 4 2830 2831 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2832 %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") unordered, align 8 2833 %f.add = fadd double %f.load, 1. 2834 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2835 store atomic double %f.add, ptr addrspace(5) %e syncscope("block") unordered, align 8 2836 2837 ret void 2838} 2839 2840; CHECK-LABEL: local_unordered_volatile_cta 2841define void @local_unordered_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 2842 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2843 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") unordered, align 1 2844 %a.add = add i8 %a.load, 1 2845 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2846 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") unordered, align 1 2847 2848 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2849 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") unordered, align 2 2850 %b.add = add i16 %b.load, 1 2851 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2852 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") unordered, align 2 2853 2854 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2855 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") unordered, align 4 2856 %c.add = add i32 %c.load, 1 2857 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2858 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") unordered, align 4 2859 2860 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2861 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") unordered, align 8 2862 %d.add = add i64 %d.load, 1 2863 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2864 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") unordered, align 8 2865 2866 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2867 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") unordered, align 4 2868 %e.add = fadd float %e.load, 1. 2869 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2870 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") unordered, align 4 2871 2872 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2873 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") unordered, align 8 2874 %f.add = fadd double %f.load, 1. 2875 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2876 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") unordered, align 8 2877 2878 ret void 2879} 2880 2881; CHECK-LABEL: local_monotonic_gpu 2882define void @local_monotonic_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 2883 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2884 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") monotonic, align 1 2885 %a.add = add i8 %a.load, 1 2886 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2887 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") monotonic, align 1 2888 2889 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2890 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") monotonic, align 2 2891 %b.add = add i16 %b.load, 1 2892 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2893 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") monotonic, align 2 2894 2895 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2896 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") monotonic, align 4 2897 %c.add = add i32 %c.load, 1 2898 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2899 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") monotonic, align 4 2900 2901 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2902 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") monotonic, align 8 2903 %d.add = add i64 %d.load, 1 2904 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2905 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") monotonic, align 8 2906 2907 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2908 %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") monotonic, align 4 2909 %e.add = fadd float %e.load, 1. 2910 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2911 store atomic float %e.add, ptr addrspace(5) %e syncscope("device") monotonic, align 4 2912 2913 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2914 %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") monotonic, align 8 2915 %f.add = fadd double %f.load, 1. 2916 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2917 store atomic double %f.add, ptr addrspace(5) %e syncscope("device") monotonic, align 8 2918 2919 ret void 2920} 2921 2922; CHECK-LABEL: local_monotonic_volatile_gpu 2923define void @local_monotonic_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 2924 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2925 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") monotonic, align 1 2926 %a.add = add i8 %a.load, 1 2927 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2928 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") monotonic, align 1 2929 2930 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2931 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") monotonic, align 2 2932 %b.add = add i16 %b.load, 1 2933 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2934 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") monotonic, align 2 2935 2936 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2937 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") monotonic, align 4 2938 %c.add = add i32 %c.load, 1 2939 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2940 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") monotonic, align 4 2941 2942 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2943 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") monotonic, align 8 2944 %d.add = add i64 %d.load, 1 2945 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2946 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") monotonic, align 8 2947 2948 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2949 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") monotonic, align 4 2950 %e.add = fadd float %e.load, 1. 2951 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2952 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") monotonic, align 4 2953 2954 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2955 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") monotonic, align 8 2956 %f.add = fadd double %f.load, 1. 2957 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2958 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") monotonic, align 8 2959 2960 ret void 2961} 2962 2963; CHECK-LABEL: local_monotonic_cta 2964define void @local_monotonic_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 2965 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2966 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") monotonic, align 1 2967 %a.add = add i8 %a.load, 1 2968 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2969 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") monotonic, align 1 2970 2971 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 2972 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") monotonic, align 2 2973 %b.add = add i16 %b.load, 1 2974 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 2975 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") monotonic, align 2 2976 2977 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 2978 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") monotonic, align 4 2979 %c.add = add i32 %c.load, 1 2980 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 2981 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") monotonic, align 4 2982 2983 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 2984 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") monotonic, align 8 2985 %d.add = add i64 %d.load, 1 2986 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 2987 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") monotonic, align 8 2988 2989 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 2990 %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") monotonic, align 4 2991 %e.add = fadd float %e.load, 1. 2992 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 2993 store atomic float %e.add, ptr addrspace(5) %e syncscope("block") monotonic, align 4 2994 2995 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 2996 %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") monotonic, align 8 2997 %f.add = fadd double %f.load, 1. 2998 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 2999 store atomic double %f.add, ptr addrspace(5) %e syncscope("block") monotonic, align 8 3000 3001 ret void 3002} 3003 3004; CHECK-LABEL: local_monotonic_volatile_cta 3005define void @local_monotonic_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3006 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3007 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") monotonic, align 1 3008 %a.add = add i8 %a.load, 1 3009 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3010 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") monotonic, align 1 3011 3012 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3013 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") monotonic, align 2 3014 %b.add = add i16 %b.load, 1 3015 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3016 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") monotonic, align 2 3017 3018 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3019 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") monotonic, align 4 3020 %c.add = add i32 %c.load, 1 3021 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3022 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") monotonic, align 4 3023 3024 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3025 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") monotonic, align 8 3026 %d.add = add i64 %d.load, 1 3027 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3028 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") monotonic, align 8 3029 3030 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3031 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") monotonic, align 4 3032 %e.add = fadd float %e.load, 1. 3033 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3034 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") monotonic, align 4 3035 3036 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3037 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") monotonic, align 8 3038 %f.add = fadd double %f.load, 1. 3039 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3040 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") monotonic, align 8 3041 3042 ret void 3043} 3044 3045; CHECK-LABEL: local_acq_rel_sys 3046define void @local_acq_rel_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3047 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3048 %a.load = load atomic i8, ptr addrspace(5) %a acquire, align 1 3049 %a.add = add i8 %a.load, 1 3050 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3051 store atomic i8 %a.add, ptr addrspace(5) %a release, align 1 3052 3053 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3054 %b.load = load atomic i16, ptr addrspace(5) %b acquire, align 2 3055 %b.add = add i16 %b.load, 1 3056 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3057 store atomic i16 %b.add, ptr addrspace(5) %b release, align 2 3058 3059 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3060 %c.load = load atomic i32, ptr addrspace(5) %c acquire, align 4 3061 %c.add = add i32 %c.load, 1 3062 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3063 store atomic i32 %c.add, ptr addrspace(5) %c release, align 4 3064 3065 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3066 %d.load = load atomic i64, ptr addrspace(5) %d acquire, align 8 3067 %d.add = add i64 %d.load, 1 3068 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3069 store atomic i64 %d.add, ptr addrspace(5) %d release, align 8 3070 3071 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3072 %e.load = load atomic float, ptr addrspace(5) %e acquire, align 4 3073 %e.add = fadd float %e.load, 1. 3074 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3075 store atomic float %e.add, ptr addrspace(5) %e release, align 4 3076 3077 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3078 %f.load = load atomic double, ptr addrspace(5) %e acquire, align 8 3079 %f.add = fadd double %f.load, 1. 3080 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3081 store atomic double %f.add, ptr addrspace(5) %e release, align 8 3082 3083 ret void 3084} 3085 3086; CHECK-LABEL: local_acq_rel_volatile_sys 3087define void @local_acq_rel_volatile_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3088 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3089 %a.load = load atomic volatile i8, ptr addrspace(5) %a acquire, align 1 3090 %a.add = add i8 %a.load, 1 3091 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3092 store atomic volatile i8 %a.add, ptr addrspace(5) %a release, align 1 3093 3094 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3095 %b.load = load atomic volatile i16, ptr addrspace(5) %b acquire, align 2 3096 %b.add = add i16 %b.load, 1 3097 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3098 store atomic volatile i16 %b.add, ptr addrspace(5) %b release, align 2 3099 3100 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3101 %c.load = load atomic volatile i32, ptr addrspace(5) %c acquire, align 4 3102 %c.add = add i32 %c.load, 1 3103 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3104 store atomic volatile i32 %c.add, ptr addrspace(5) %c release, align 4 3105 3106 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3107 %d.load = load atomic volatile i64, ptr addrspace(5) %d acquire, align 8 3108 %d.add = add i64 %d.load, 1 3109 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3110 store atomic volatile i64 %d.add, ptr addrspace(5) %d release, align 8 3111 3112 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3113 %e.load = load atomic volatile float, ptr addrspace(5) %e acquire, align 4 3114 %e.add = fadd float %e.load, 1. 3115 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3116 store atomic volatile float %e.add, ptr addrspace(5) %e release, align 4 3117 3118 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3119 %f.load = load atomic volatile double, ptr addrspace(5) %e acquire, align 8 3120 %f.add = fadd double %f.load, 1. 3121 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3122 store atomic volatile double %f.add, ptr addrspace(5) %e release, align 8 3123 3124 ret void 3125} 3126 3127; CHECK-LABEL: local_acq_rel_gpu 3128define void @local_acq_rel_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3129 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3130 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") acquire, align 1 3131 %a.add = add i8 %a.load, 1 3132 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3133 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") release, align 1 3134 3135 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3136 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") acquire, align 2 3137 %b.add = add i16 %b.load, 1 3138 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3139 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") release, align 2 3140 3141 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3142 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") acquire, align 4 3143 %c.add = add i32 %c.load, 1 3144 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3145 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") release, align 4 3146 3147 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3148 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") acquire, align 8 3149 %d.add = add i64 %d.load, 1 3150 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3151 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") release, align 8 3152 3153 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3154 %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") acquire, align 4 3155 %e.add = fadd float %e.load, 1. 3156 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3157 store atomic float %e.add, ptr addrspace(5) %e syncscope("device") release, align 4 3158 3159 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3160 %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") acquire, align 8 3161 %f.add = fadd double %f.load, 1. 3162 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3163 store atomic double %f.add, ptr addrspace(5) %e syncscope("device") release, align 8 3164 3165 ret void 3166} 3167 3168; CHECK-LABEL: local_acq_rel_volatile_gpu 3169define void @local_acq_rel_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3170 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3171 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") acquire, align 1 3172 %a.add = add i8 %a.load, 1 3173 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3174 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") release, align 1 3175 3176 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3177 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") acquire, align 2 3178 %b.add = add i16 %b.load, 1 3179 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3180 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") release, align 2 3181 3182 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3183 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") acquire, align 4 3184 %c.add = add i32 %c.load, 1 3185 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3186 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") release, align 4 3187 3188 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3189 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") acquire, align 8 3190 %d.add = add i64 %d.load, 1 3191 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3192 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") release, align 8 3193 3194 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3195 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") acquire, align 4 3196 %e.add = fadd float %e.load, 1. 3197 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3198 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") release, align 4 3199 3200 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3201 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") acquire, align 8 3202 %f.add = fadd double %f.load, 1. 3203 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3204 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") release, align 8 3205 3206 ret void 3207} 3208 3209; CHECK-LABEL: local_acq_rel_cta 3210define void @local_acq_rel_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3211 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3212 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") acquire, align 1 3213 %a.add = add i8 %a.load, 1 3214 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3215 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") release, align 1 3216 3217 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3218 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") acquire, align 2 3219 %b.add = add i16 %b.load, 1 3220 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3221 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") release, align 2 3222 3223 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3224 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") acquire, align 4 3225 %c.add = add i32 %c.load, 1 3226 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3227 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") release, align 4 3228 3229 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3230 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") acquire, align 8 3231 %d.add = add i64 %d.load, 1 3232 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3233 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") release, align 8 3234 3235 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3236 %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") acquire, align 4 3237 %e.add = fadd float %e.load, 1. 3238 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3239 store atomic float %e.add, ptr addrspace(5) %e syncscope("block") release, align 4 3240 3241 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3242 %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") acquire, align 8 3243 %f.add = fadd double %f.load, 1. 3244 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3245 store atomic double %f.add, ptr addrspace(5) %e syncscope("block") release, align 8 3246 3247 ret void 3248} 3249 3250; CHECK-LABEL: local_acq_rel_volatile_cta 3251define void @local_acq_rel_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3252 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3253 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") acquire, align 1 3254 %a.add = add i8 %a.load, 1 3255 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3256 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") release, align 1 3257 3258 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3259 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") acquire, align 2 3260 %b.add = add i16 %b.load, 1 3261 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3262 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") release, align 2 3263 3264 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3265 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") acquire, align 4 3266 %c.add = add i32 %c.load, 1 3267 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3268 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") release, align 4 3269 3270 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3271 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") acquire, align 8 3272 %d.add = add i64 %d.load, 1 3273 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3274 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") release, align 8 3275 3276 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3277 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") acquire, align 4 3278 %e.add = fadd float %e.load, 1. 3279 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3280 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") release, align 4 3281 3282 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3283 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") acquire, align 8 3284 %f.add = fadd double %f.load, 1. 3285 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3286 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") release, align 8 3287 3288 ret void 3289} 3290 3291; CHECK-LABEL: local_seq_cst_sys 3292define void @local_seq_cst_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3293 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3294 %a.load = load atomic i8, ptr addrspace(5) %a seq_cst, align 1 3295 %a.add = add i8 %a.load, 1 3296 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3297 store atomic i8 %a.add, ptr addrspace(5) %a seq_cst, align 1 3298 3299 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3300 %b.load = load atomic i16, ptr addrspace(5) %b seq_cst, align 2 3301 %b.add = add i16 %b.load, 1 3302 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3303 store atomic i16 %b.add, ptr addrspace(5) %b seq_cst, align 2 3304 3305 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3306 %c.load = load atomic i32, ptr addrspace(5) %c seq_cst, align 4 3307 %c.add = add i32 %c.load, 1 3308 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3309 store atomic i32 %c.add, ptr addrspace(5) %c seq_cst, align 4 3310 3311 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3312 %d.load = load atomic i64, ptr addrspace(5) %d seq_cst, align 8 3313 %d.add = add i64 %d.load, 1 3314 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3315 store atomic i64 %d.add, ptr addrspace(5) %d seq_cst, align 8 3316 3317 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3318 %e.load = load atomic float, ptr addrspace(5) %e seq_cst, align 4 3319 %e.add = fadd float %e.load, 1. 3320 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3321 store atomic float %e.add, ptr addrspace(5) %e seq_cst, align 4 3322 3323 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3324 %f.load = load atomic double, ptr addrspace(5) %e seq_cst, align 8 3325 %f.add = fadd double %f.load, 1. 3326 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3327 store atomic double %f.add, ptr addrspace(5) %e seq_cst, align 8 3328 3329 ret void 3330} 3331 3332; CHECK-LABEL: local_seq_cst_volatile_sys 3333define void @local_seq_cst_volatile_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3334 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3335 %a.load = load atomic volatile i8, ptr addrspace(5) %a seq_cst, align 1 3336 %a.add = add i8 %a.load, 1 3337 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3338 store atomic volatile i8 %a.add, ptr addrspace(5) %a seq_cst, align 1 3339 3340 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3341 %b.load = load atomic volatile i16, ptr addrspace(5) %b seq_cst, align 2 3342 %b.add = add i16 %b.load, 1 3343 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3344 store atomic volatile i16 %b.add, ptr addrspace(5) %b seq_cst, align 2 3345 3346 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3347 %c.load = load atomic volatile i32, ptr addrspace(5) %c seq_cst, align 4 3348 %c.add = add i32 %c.load, 1 3349 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3350 store atomic volatile i32 %c.add, ptr addrspace(5) %c seq_cst, align 4 3351 3352 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3353 %d.load = load atomic volatile i64, ptr addrspace(5) %d seq_cst, align 8 3354 %d.add = add i64 %d.load, 1 3355 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3356 store atomic volatile i64 %d.add, ptr addrspace(5) %d seq_cst, align 8 3357 3358 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3359 %e.load = load atomic volatile float, ptr addrspace(5) %e seq_cst, align 4 3360 %e.add = fadd float %e.load, 1. 3361 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3362 store atomic volatile float %e.add, ptr addrspace(5) %e seq_cst, align 4 3363 3364 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3365 %f.load = load atomic volatile double, ptr addrspace(5) %e seq_cst, align 8 3366 %f.add = fadd double %f.load, 1. 3367 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3368 store atomic volatile double %f.add, ptr addrspace(5) %e seq_cst, align 8 3369 3370 ret void 3371} 3372 3373; CHECK-LABEL: local_seq_cst_gpu 3374define void @local_seq_cst_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3375 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3376 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("device") seq_cst, align 1 3377 %a.add = add i8 %a.load, 1 3378 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3379 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("device") seq_cst, align 1 3380 3381 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3382 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("device") seq_cst, align 2 3383 %b.add = add i16 %b.load, 1 3384 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3385 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("device") seq_cst, align 2 3386 3387 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3388 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("device") seq_cst, align 4 3389 %c.add = add i32 %c.load, 1 3390 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3391 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("device") seq_cst, align 4 3392 3393 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3394 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("device") seq_cst, align 8 3395 %d.add = add i64 %d.load, 1 3396 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3397 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("device") seq_cst, align 8 3398 3399 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3400 %e.load = load atomic float, ptr addrspace(5) %e syncscope("device") seq_cst, align 4 3401 %e.add = fadd float %e.load, 1. 3402 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3403 store atomic float %e.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 4 3404 3405 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3406 %f.load = load atomic double, ptr addrspace(5) %e syncscope("device") seq_cst, align 8 3407 %f.add = fadd double %f.load, 1. 3408 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3409 store atomic double %f.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 8 3410 3411 ret void 3412} 3413 3414; CHECK-LABEL: local_seq_cst_volatile_gpu 3415define void @local_seq_cst_volatile_gpu(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3416 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3417 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("device") seq_cst, align 1 3418 %a.add = add i8 %a.load, 1 3419 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3420 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("device") seq_cst, align 1 3421 3422 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3423 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("device") seq_cst, align 2 3424 %b.add = add i16 %b.load, 1 3425 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3426 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("device") seq_cst, align 2 3427 3428 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3429 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("device") seq_cst, align 4 3430 %c.add = add i32 %c.load, 1 3431 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3432 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("device") seq_cst, align 4 3433 3434 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3435 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("device") seq_cst, align 8 3436 %d.add = add i64 %d.load, 1 3437 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3438 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("device") seq_cst, align 8 3439 3440 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3441 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("device") seq_cst, align 4 3442 %e.add = fadd float %e.load, 1. 3443 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3444 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 4 3445 3446 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3447 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("device") seq_cst, align 8 3448 %f.add = fadd double %f.load, 1. 3449 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3450 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("device") seq_cst, align 8 3451 3452 ret void 3453} 3454 3455; CHECK-LABEL: local_seq_cst_cta 3456define void @local_seq_cst_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3457 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3458 %a.load = load atomic i8, ptr addrspace(5) %a syncscope("block") seq_cst, align 1 3459 %a.add = add i8 %a.load, 1 3460 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3461 store atomic i8 %a.add, ptr addrspace(5) %a syncscope("block") seq_cst, align 1 3462 3463 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3464 %b.load = load atomic i16, ptr addrspace(5) %b syncscope("block") seq_cst, align 2 3465 %b.add = add i16 %b.load, 1 3466 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3467 store atomic i16 %b.add, ptr addrspace(5) %b syncscope("block") seq_cst, align 2 3468 3469 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3470 %c.load = load atomic i32, ptr addrspace(5) %c syncscope("block") seq_cst, align 4 3471 %c.add = add i32 %c.load, 1 3472 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3473 store atomic i32 %c.add, ptr addrspace(5) %c syncscope("block") seq_cst, align 4 3474 3475 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3476 %d.load = load atomic i64, ptr addrspace(5) %d syncscope("block") seq_cst, align 8 3477 %d.add = add i64 %d.load, 1 3478 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3479 store atomic i64 %d.add, ptr addrspace(5) %d syncscope("block") seq_cst, align 8 3480 3481 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3482 %e.load = load atomic float, ptr addrspace(5) %e syncscope("block") seq_cst, align 4 3483 %e.add = fadd float %e.load, 1. 3484 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3485 store atomic float %e.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 4 3486 3487 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3488 %f.load = load atomic double, ptr addrspace(5) %e syncscope("block") seq_cst, align 8 3489 %f.add = fadd double %f.load, 1. 3490 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3491 store atomic double %f.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 8 3492 3493 ret void 3494} 3495 3496; CHECK-LABEL: local_seq_cst_volatile_cta 3497define void @local_seq_cst_volatile_cta(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { 3498 ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3499 %a.load = load atomic volatile i8, ptr addrspace(5) %a syncscope("block") seq_cst, align 1 3500 %a.add = add i8 %a.load, 1 3501 ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3502 store atomic volatile i8 %a.add, ptr addrspace(5) %a syncscope("block") seq_cst, align 1 3503 3504 ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] 3505 %b.load = load atomic volatile i16, ptr addrspace(5) %b syncscope("block") seq_cst, align 2 3506 %b.add = add i16 %b.load, 1 3507 ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} 3508 store atomic volatile i16 %b.add, ptr addrspace(5) %b syncscope("block") seq_cst, align 2 3509 3510 ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] 3511 %c.load = load atomic volatile i32, ptr addrspace(5) %c syncscope("block") seq_cst, align 4 3512 %c.add = add i32 %c.load, 1 3513 ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} 3514 store atomic volatile i32 %c.add, ptr addrspace(5) %c syncscope("block") seq_cst, align 4 3515 3516 ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] 3517 %d.load = load atomic volatile i64, ptr addrspace(5) %d syncscope("block") seq_cst, align 8 3518 %d.add = add i64 %d.load, 1 3519 ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} 3520 store atomic volatile i64 %d.add, ptr addrspace(5) %d syncscope("block") seq_cst, align 8 3521 3522 ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] 3523 %e.load = load atomic volatile float, ptr addrspace(5) %e syncscope("block") seq_cst, align 4 3524 %e.add = fadd float %e.load, 1. 3525 ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}} 3526 store atomic volatile float %e.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 4 3527 3528 ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] 3529 %f.load = load atomic volatile double, ptr addrspace(5) %e syncscope("block") seq_cst, align 8 3530 %f.add = fadd double %f.load, 1. 3531 ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}} 3532 store atomic volatile double %f.add, ptr addrspace(5) %e syncscope("block") seq_cst, align 8 3533 3534 ret void 3535} 3536