xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_nortn.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
5; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
6
7define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
8; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
9; GFX908:       ; %bb.0:
10; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX908-NEXT:    buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen
12; GFX908-NEXT:    s_waitcnt vmcnt(0)
13; GFX908-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
16; GFX90A:       ; %bb.0:
17; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX90A-NEXT:    buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen scc
19; GFX90A-NEXT:    s_waitcnt vmcnt(0)
20; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
23; GFX940:       ; %bb.0:
24; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc1
26; GFX940-NEXT:    s_waitcnt vmcnt(0)
27; GFX940-NEXT:    s_setpc_b64 s[30:31]
28;
29; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
30; GFX12:       ; %bb.0:
31; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
32; GFX12-NEXT:    s_wait_expcnt 0x0
33; GFX12-NEXT:    s_wait_samplecnt 0x0
34; GFX12-NEXT:    s_wait_bvhcnt 0x0
35; GFX12-NEXT:    s_wait_kmcnt 0x0
36; GFX12-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen scope:SCOPE_SYS
37; GFX12-NEXT:    s_setpc_b64 s[30:31]
38  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 24)
39  ret void
40}
41
42define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) #0 {
43; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
44; GFX908:       ; %bb.0:
45; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX908-NEXT:    buffer_atomic_add_f32 v0, off, s[16:19], s20
47; GFX908-NEXT:    s_waitcnt vmcnt(0)
48; GFX908-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
51; GFX90A:       ; %bb.0:
52; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX90A-NEXT:    buffer_atomic_add_f32 v0, off, s[16:19], s20
54; GFX90A-NEXT:    s_waitcnt vmcnt(0)
55; GFX90A-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
58; GFX940:       ; %bb.0:
59; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX940-NEXT:    buffer_atomic_add_f32 v0, off, s[0:3], s16
61; GFX940-NEXT:    s_waitcnt vmcnt(0)
62; GFX940-NEXT:    s_setpc_b64 s[30:31]
63;
64; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
65; GFX12:       ; %bb.0:
66; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
67; GFX12-NEXT:    s_wait_expcnt 0x0
68; GFX12-NEXT:    s_wait_samplecnt 0x0
69; GFX12-NEXT:    s_wait_bvhcnt 0x0
70; GFX12-NEXT:    s_wait_kmcnt 0x0
71; GFX12-NEXT:    buffer_atomic_add_f32 v0, off, s[0:3], s16
72; GFX12-NEXT:    s_setpc_b64 s[30:31]
73  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
74  ret void
75}
76
77define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
78; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
79; GFX908:       ; %bb.0:
80; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; GFX908-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[16:19], s20 offen
82; GFX908-NEXT:    s_waitcnt vmcnt(0)
83; GFX908-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
86; GFX90A:       ; %bb.0:
87; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX90A-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[16:19], s20 offen
89; GFX90A-NEXT:    s_waitcnt vmcnt(0)
90; GFX90A-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
93; GFX940:       ; %bb.0:
94; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen
96; GFX940-NEXT:    s_waitcnt vmcnt(0)
97; GFX940-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
100; GFX12:       ; %bb.0:
101; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
102; GFX12-NEXT:    s_wait_expcnt 0x0
103; GFX12-NEXT:    s_wait_samplecnt 0x0
104; GFX12-NEXT:    s_wait_bvhcnt 0x0
105; GFX12-NEXT:    s_wait_kmcnt 0x0
106; GFX12-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen
107; GFX12-NEXT:    s_setpc_b64 s[30:31]
108  %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
109  ret void
110}
111
112define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
113; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
114; GFX908:       ; %bb.0:
115; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX908-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[16:19], s20 offset:92
117; GFX908-NEXT:    s_waitcnt vmcnt(0)
118; GFX908-NEXT:    s_setpc_b64 s[30:31]
119;
120; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
121; GFX90A:       ; %bb.0:
122; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GFX90A-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[16:19], s20 offset:92
124; GFX90A-NEXT:    s_waitcnt vmcnt(0)
125; GFX90A-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
128; GFX940:       ; %bb.0:
129; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92
131; GFX940-NEXT:    s_waitcnt vmcnt(0)
132; GFX940-NEXT:    s_setpc_b64 s[30:31]
133;
134; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
135; GFX12:       ; %bb.0:
136; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
137; GFX12-NEXT:    s_wait_expcnt 0x0
138; GFX12-NEXT:    s_wait_samplecnt 0x0
139; GFX12-NEXT:    s_wait_bvhcnt 0x0
140; GFX12-NEXT:    s_wait_kmcnt 0x0
141; GFX12-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92
142; GFX12-NEXT:    s_setpc_b64 s[30:31]
143  %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 92, i32 %soffset, i32 0)
144  ret void
145}
146
147define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
148; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
149; GFX908:       ; %bb.0:
150; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GFX908-NEXT:    buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen slc
152; GFX908-NEXT:    s_waitcnt vmcnt(0)
153; GFX908-NEXT:    s_setpc_b64 s[30:31]
154;
155; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
156; GFX90A:       ; %bb.0:
157; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GFX90A-NEXT:    buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen slc
159; GFX90A-NEXT:    s_waitcnt vmcnt(0)
160; GFX90A-NEXT:    s_setpc_b64 s[30:31]
161;
162; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
163; GFX940:       ; %bb.0:
164; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen nt
166; GFX940-NEXT:    s_waitcnt vmcnt(0)
167; GFX940-NEXT:    s_setpc_b64 s[30:31]
168;
169; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
170; GFX12:       ; %bb.0:
171; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
172; GFX12-NEXT:    s_wait_expcnt 0x0
173; GFX12-NEXT:    s_wait_samplecnt 0x0
174; GFX12-NEXT:    s_wait_bvhcnt 0x0
175; GFX12-NEXT:    s_wait_kmcnt 0x0
176; GFX12-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen th:TH_ATOMIC_NT
177; GFX12-NEXT:    s_setpc_b64 s[30:31]
178  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2)
179  ret void
180}
181
182declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
183declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
184
185attributes #0 = { nounwind }
186