xref: /llvm-project/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu (revision 76894c5e6e20bfe8a30f7d8bdd39c41a7af54d65)
1 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \
2 // RUN:  -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
3 // RUN:  -o - | FileCheck %s
4 
5 #define __device__ __attribute__((device))
6 typedef __attribute__((address_space(3))) float *LP;
7 
8 // CHECK-LABEL: test_ds_atomic_add_f32
9 // CHECK: %[[ADDR_ADDR:.*]] = alloca ptr, align 8, addrspace(5)
10 // CHECK: %[[ADDR_ADDR_ASCAST_PTR:.*]] = addrspacecast ptr addrspace(5) %[[ADDR_ADDR]] to ptr
11 // CHECK: store ptr %addr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8
12 // CHECK: %[[ADDR_ADDR_ASCAST:.*]] = load ptr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8
13 // CHECK: %[[AS_CAST:.*]] = addrspacecast ptr %[[ADDR_ADDR_ASCAST]] to ptr addrspace(3)
14 // CHECK: [[TMP2:%.+]] = load float, ptr %val.addr.ascast, align 4
15 // CHECK: [[TMP3:%.+]] = atomicrmw fadd ptr addrspace(3) %[[AS_CAST]], float [[TMP2]] monotonic, align 4
16 // CHECK: %4 = load ptr, ptr %rtn.ascast, align 8
17 // CHECK: store float [[TMP3]], ptr %4, align 4
test_ds_atomic_add_f32(float * addr,float val)18 __device__ void test_ds_atomic_add_f32(float *addr, float val) {
19   float *rtn;
20   *rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0);
21 }
22