1 // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ 2 // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ 3 // RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=CHECK,SAFEIR %s 4 5 // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ 6 // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ 7 // RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=CHECK,UNSAFEIR %s 8 9 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ 10 // RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \ 11 // RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s 12 13 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ 14 // RUN: -fcuda-is-device -target-cpu gfx940 -fnative-half-type \ 15 // RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics \ 16 // RUN: | FileCheck -check-prefix=UNSAFE %s 17 18 // REQUIRES: amdgpu-registered-target 19 20 #include "Inputs/cuda.h" 21 #include <stdatomic.h> 22 23 __global__ void ffp1(float *p) { 24 // CHECK-LABEL: @_Z4ffp1Pf 25 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}} 26 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}} 27 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}} 28 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}} 29 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]]{{$}} 30 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 31 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 32 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 33 34 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} 35 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 36 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 37 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 38 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]], !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} 39 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 40 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 41 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 42 43 // SAFE: _Z4ffp1Pf 44 // SAFE: global_atomic_cmpswap 45 // SAFE: global_atomic_cmpswap 46 // SAFE: global_atomic_cmpswap 47 // SAFE: global_atomic_cmpswap 48 // SAFE: global_atomic_cmpswap 49 // SAFE: global_atomic_cmpswap 50 51 // UNSAFE: _Z4ffp1Pf 52 // UNSAFE: global_atomic_add_f32 53 // UNSAFE: global_atomic_cmpswap 54 // UNSAFE: global_atomic_cmpswap 55 // UNSAFE: global_atomic_cmpswap 56 // UNSAFE: global_atomic_cmpswap 57 // UNSAFE: global_atomic_cmpswap 58 59 __atomic_fetch_add(p, 1.0f, memory_order_relaxed); 60 __atomic_fetch_sub(p, 1.0f, memory_order_relaxed); 61 __atomic_fetch_max(p, 1.0f, memory_order_relaxed); 62 __atomic_fetch_min(p, 1.0f, memory_order_relaxed); 63 64 __hip_atomic_fetch_add(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 65 __hip_atomic_fetch_sub(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 66 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 67 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 68 } 69 70 __global__ void ffp2(double *p) { 71 // CHECK-LABEL: @_Z4ffp2Pd 72 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}} 73 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} 74 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}} 75 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}} 76 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 77 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 78 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 79 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 80 81 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 82 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 83 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 84 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 85 // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 86 // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 87 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 88 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 89 90 // SAFE-LABEL: @_Z4ffp2Pd 91 // SAFE: global_atomic_cmpswap_b64 92 // SAFE: global_atomic_cmpswap_b64 93 // SAFE: global_atomic_cmpswap_b64 94 // SAFE: global_atomic_cmpswap_b64 95 // SAFE: global_atomic_cmpswap_b64 96 // SAFE: global_atomic_cmpswap_b64 97 98 // UNSAFE-LABEL: @_Z4ffp2Pd 99 // UNSAFE: global_atomic_add_f64 100 // UNSAFE: global_atomic_cmpswap_x2 101 // UNSAFE: global_atomic_max_f64 102 // UNSAFE: global_atomic_min_f64 103 // UNSAFE: global_atomic_max_f64 104 // UNSAFE: global_atomic_min_f64 105 __atomic_fetch_add(p, 1.0, memory_order_relaxed); 106 __atomic_fetch_sub(p, 1.0, memory_order_relaxed); 107 __atomic_fetch_max(p, 1.0, memory_order_relaxed); 108 __atomic_fetch_min(p, 1.0, memory_order_relaxed); 109 __hip_atomic_fetch_add(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 110 __hip_atomic_fetch_sub(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 111 __hip_atomic_fetch_max(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 112 __hip_atomic_fetch_min(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 113 } 114 115 // long double is the same as double for amdgcn. 116 __global__ void ffp3(long double *p) { 117 // CHECK-LABEL: @_Z4ffp3Pe 118 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}} 119 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} 120 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}} 121 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}} 122 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 123 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 124 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 125 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 126 127 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 128 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 129 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 130 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 131 // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 132 // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 133 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 134 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 135 136 // SAFE-LABEL: @_Z4ffp3Pe 137 // SAFE: global_atomic_cmpswap_b64 138 // SAFE: global_atomic_cmpswap_b64 139 // SAFE: global_atomic_cmpswap_b64 140 // SAFE: global_atomic_cmpswap_b64 141 // SAFE: global_atomic_cmpswap_b64 142 // UNSAFE-LABEL: @_Z4ffp3Pe 143 // UNSAFE: global_atomic_cmpswap_x2 144 // UNSAFE: global_atomic_max_f64 145 // UNSAFE: global_atomic_min_f64 146 // UNSAFE: global_atomic_max_f64 147 // UNSAFE: global_atomic_min_f64 148 __atomic_fetch_add(p, 1.0L, memory_order_relaxed); 149 __atomic_fetch_sub(p, 1.0L, memory_order_relaxed); 150 __atomic_fetch_max(p, 1.0L, memory_order_relaxed); 151 __atomic_fetch_min(p, 1.0L, memory_order_relaxed); 152 __hip_atomic_fetch_add(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 153 __hip_atomic_fetch_sub(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 154 __hip_atomic_fetch_max(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 155 __hip_atomic_fetch_min(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 156 } 157 158 __device__ double ffp4(double *p, float f) { 159 // CHECK-LABEL: @_Z4ffp4Pdf 160 // CHECK: fpext contract float {{.*}} to double 161 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} 162 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 163 164 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 165 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 166 __atomic_fetch_sub(p, f, memory_order_relaxed); 167 return __hip_atomic_fetch_sub(p, f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 168 } 169 170 __device__ double ffp5(double *p, int i) { 171 // CHECK-LABEL: @_Z4ffp5Pdi 172 // CHECK: sitofp i32 {{.*}} to double 173 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} 174 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 175 __atomic_fetch_sub(p, i, memory_order_relaxed); 176 177 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 178 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 179 return __hip_atomic_fetch_sub(p, i, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 180 } 181 182 __global__ void ffp6(_Float16 *p) { 183 // CHECK-LABEL: @_Z4ffp6PDF16 184 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}} 185 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}} 186 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}} 187 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}} 188 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 189 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 190 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 191 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 192 193 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 194 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 195 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 196 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 197 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 198 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 199 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 200 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} 201 202 // SAFE: _Z4ffp6PDF16 203 // SAFE: global_atomic_cmpswap 204 // SAFE: global_atomic_cmpswap 205 // SAFE: global_atomic_cmpswap 206 // SAFE: global_atomic_cmpswap 207 // SAFE: global_atomic_cmpswap 208 // SAFE: global_atomic_cmpswap 209 210 // UNSAFE: _Z4ffp6PDF16 211 // UNSAFE: global_atomic_cmpswap 212 // UNSAFE: global_atomic_cmpswap 213 // UNSAFE: global_atomic_cmpswap 214 // UNSAFE: global_atomic_cmpswap 215 // UNSAFE: global_atomic_cmpswap 216 // UNSAFE: global_atomic_cmpswap 217 __atomic_fetch_add(p, 1.0, memory_order_relaxed); 218 __atomic_fetch_sub(p, 1.0, memory_order_relaxed); 219 __atomic_fetch_max(p, 1.0, memory_order_relaxed); 220 __atomic_fetch_min(p, 1.0, memory_order_relaxed); 221 222 __hip_atomic_fetch_add(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 223 __hip_atomic_fetch_sub(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 224 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); 225 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); 226 } 227 228 // CHECK-LABEL: @_Z12test_cmpxchgPiii 229 // CHECK: cmpxchg ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}} 230 // CHECK: cmpxchg weak ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}} 231 // CHECK: cmpxchg ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} syncscope("workgroup") monotonic monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 232 // CHECK: cmpxchg weak ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} syncscope("workgroup") monotonic monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} 233 __device__ int test_cmpxchg(int *ptr, int cmp, int desired) { 234 bool flag = __atomic_compare_exchange(ptr, &cmp, &desired, 0, memory_order_acquire, memory_order_acquire); 235 flag = __atomic_compare_exchange_n(ptr, &cmp, desired, 1, memory_order_acquire, memory_order_acquire); 236 flag = __hip_atomic_compare_exchange_strong(ptr, &cmp, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); 237 flag = __hip_atomic_compare_exchange_weak(ptr, &cmp, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); 238 return flag; 239 } 240 241 // SAFEIR: ![[$NO_PRIVATE]] = !{i32 5, i32 6} 242 // UNSAFEIR: ![[$NO_PRIVATE]] = !{i32 5, i32 6} 243