xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll (revision 5a81a559d69fb84e1e8ef623ac4b642081c14c51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 | FileCheck %s -check-prefix=GFX12-SDAG
3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 -global-isel-abort=2 | FileCheck %s -check-prefix=GFX12-GISEL
4
5declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
6declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg)
7declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32)
8declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32)
9
10define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
11; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret_offset:
12; GFX12-SDAG:       ; %bb.0:
13; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
14; GFX12-SDAG-NEXT:    s_endpgm
15;
16; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret_offset:
17; GFX12-GISEL:       ; %bb.0:
18; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
19; GFX12-GISEL-NEXT:    s_endpgm
20  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
21  ret void
22}
23
24define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
25; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret:
26; GFX12-SDAG:       ; %bb.0:
27; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
28; GFX12-SDAG-NEXT:    s_endpgm
29;
30; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret:
31; GFX12-GISEL:       ; %bb.0:
32; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
33; GFX12-GISEL-NEXT:    s_endpgm
34  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
35  ret void
36}
37
38define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
39; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret_offset:
40; GFX12-SDAG:       ; %bb.0:
41; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
42; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
43; GFX12-SDAG-NEXT:    ; return to shader part epilog
44;
45; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret_offset:
46; GFX12-GISEL:       ; %bb.0:
47; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
48; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
49; GFX12-GISEL-NEXT:    ; return to shader part epilog
50  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
51  ret <2 x half> %ret
52}
53
54define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
55; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret:
56; GFX12-SDAG:       ; %bb.0:
57; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
58; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
59; GFX12-SDAG-NEXT:    ; return to shader part epilog
60;
61; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret:
62; GFX12-GISEL:       ; %bb.0:
63; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
64; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
65; GFX12-GISEL-NEXT:    ; return to shader part epilog
66  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
67  ret <2 x half> %ret
68}
69
70define amdgpu_ps float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
71; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_ret:
72; GFX12-SDAG:       ; %bb.0:
73; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
74; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
75; GFX12-SDAG-NEXT:    ; return to shader part epilog
76;
77; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_ret:
78; GFX12-GISEL:       ; %bb.0:
79; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
80; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
81; GFX12-GISEL-NEXT:    ; return to shader part epilog
82  %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
83  %r = bitcast <2 x half> %orig to float
84  ret float %r
85}
86
87define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
88; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_noret:
89; GFX12-SDAG:       ; %bb.0:
90; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen
91; GFX12-SDAG-NEXT:    s_endpgm
92;
93; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_noret:
94; GFX12-GISEL:       ; %bb.0:
95; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen
96; GFX12-GISEL-NEXT:    s_endpgm
97  %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
98  ret void
99}
100
101define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
102; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret:
103; GFX12-SDAG:       ; %bb.0:
104; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
105; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, 0
106; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
107; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
108; GFX12-SDAG-NEXT:    flat_store_b32 v[1:2], v0
109; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 1.0
110; GFX12-SDAG-NEXT:    s_wait_dscnt 0x0
111; GFX12-SDAG-NEXT:    ; return to shader part epilog
112;
113; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret:
114; GFX12-GISEL:       ; %bb.0:
115; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
116; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
117; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, 0
118; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
119; GFX12-GISEL-NEXT:    flat_store_b32 v[1:2], v0
120; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 1.0
121; GFX12-GISEL-NEXT:    s_wait_dscnt 0x0
122; GFX12-GISEL-NEXT:    ; return to shader part epilog
123  %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
124  store <2 x bfloat> %orig, ptr null
125  ret float 1.0
126}
127
128define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
129; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret:
130; GFX12-SDAG:       ; %bb.0:
131; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
132; GFX12-SDAG-NEXT:    s_endpgm
133;
134; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret:
135; GFX12-GISEL:       ; %bb.0:
136; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
137; GFX12-GISEL-NEXT:    s_endpgm
138  %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
139  ret void
140}
141
142define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
143; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16:
144; GFX12-SDAG:       ; %bb.0:
145; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
146; GFX12-SDAG-NEXT:    s_endpgm
147;
148; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16:
149; GFX12-GISEL:       ; %bb.0:
150; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
151; GFX12-GISEL-NEXT:    s_endpgm
152  %ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
153  ret void
154}
155
156define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
157; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret:
158; GFX12-SDAG:       ; %bb.0:
159; GFX12-SDAG-NEXT:    buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
160; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, 0
161; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
162; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
163; GFX12-SDAG-NEXT:    flat_store_b32 v[1:2], v0
164; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 1.0
165; GFX12-SDAG-NEXT:    s_wait_dscnt 0x0
166; GFX12-SDAG-NEXT:    ; return to shader part epilog
167;
168; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret:
169; GFX12-GISEL:       ; %bb.0:
170; GFX12-GISEL-NEXT:    buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
171; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
172; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, 0
173; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
174; GFX12-GISEL-NEXT:    flat_store_b32 v[1:2], v0
175; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 1.0
176; GFX12-GISEL-NEXT:    s_wait_dscnt 0x0
177; GFX12-GISEL-NEXT:    ; return to shader part epilog
178  %orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
179  store <2 x bfloat> %orig, ptr null
180  ret float 1.0
181}
182