xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940
3
4define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
5; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat:
6; GFX940:       ; %bb.0:
7; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
8; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
9; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
10; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
11; GFX940-NEXT:    buffer_wbl2 sc0 sc1
12; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
13; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
14; GFX940-NEXT:    buffer_inv sc0 sc1
15; GFX940-NEXT:    s_endpgm
16  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
17  ret void
18}
19
20define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
21; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
22; GFX940:       ; %bb.0:
23; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
24; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
25; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
26; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
27; GFX940-NEXT:    buffer_wbl2 sc0 sc1
28; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
29; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
30; GFX940-NEXT:    buffer_inv sc0 sc1
31; GFX940-NEXT:    s_endpgm
32  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
33  ret void
34}
35
36define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
37; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat:
38; GFX940:       ; %bb.0:
39; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
41; GFX940-NEXT:    buffer_wbl2 sc0 sc1
42; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
43; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
44; GFX940-NEXT:    buffer_inv sc0 sc1
45; GFX940-NEXT:    s_setpc_b64 s[30:31]
46  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
47  ret float %ret
48}
49
50define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
51; GFX940-LABEL: local_atomic_fadd_ret_v2f16_offset:
52; GFX940:       ; %bb.0:
53; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX940-NEXT:    ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
55; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
56; GFX940-NEXT:    s_setpc_b64 s[30:31]
57  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
58  %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
59  ret <2 x half> %result
60}
61
62define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
63; GFX940-LABEL: local_atomic_fadd_noret_v2f16_offset:
64; GFX940:       ; %bb.0:
65; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX940-NEXT:    ds_pk_add_f16 v0, v1 offset:65532
67; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
68; GFX940-NEXT:    s_setpc_b64 s[30:31]
69  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
70  %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
71  ret void
72}
73
74define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
75; GFX940-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
76; GFX940:       ; %bb.0:
77; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX940-NEXT:    buffer_wbl2 sc1
79; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0
80; GFX940-NEXT:    s_waitcnt vmcnt(0)
81; GFX940-NEXT:    buffer_inv sc1
82; GFX940-NEXT:    s_setpc_b64 s[30:31]
83  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
84  %result = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
85  ret <2 x half> %result
86}
87
88define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
89; GFX940-LABEL: global_atomic_fadd_noret_v2f16_agent_offset:
90; GFX940:       ; %bb.0:
91; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX940-NEXT:    buffer_wbl2 sc1
93; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:1024
94; GFX940-NEXT:    s_waitcnt vmcnt(0)
95; GFX940-NEXT:    buffer_inv sc1
96; GFX940-NEXT:    s_setpc_b64 s[30:31]
97  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
98  %unused = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
99  ret void
100}
101
102define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
103; GFX940-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
104; GFX940:       ; %bb.0:
105; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX940-NEXT:    buffer_wbl2 sc1
107; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
108; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
109; GFX940-NEXT:    buffer_inv sc1
110; GFX940-NEXT:    s_setpc_b64 s[30:31]
111  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
112  %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
113  ret <2 x half> %result
114}
115
116define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
117; GFX940-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
118; GFX940:       ; %bb.0:
119; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX940-NEXT:    buffer_wbl2 sc1
121; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
122; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
123; GFX940-NEXT:    buffer_inv sc1
124; GFX940-NEXT:    s_setpc_b64 s[30:31]
125  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
126  %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
127  ret void
128}
129
130attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
131
132!0 = !{}
133