xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1*9e9907f1SFangrui Song;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs | FileCheck %s
2*9e9907f1SFangrui Song;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs | FileCheck %s
3faa2c678SKrzysztof Drewniak
4faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test1:
5faa2c678SKrzysztof Drewniak;CHECK-NOT: s_waitcnt
6faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc
7faa2c678SKrzysztof Drewniak;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc
8faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
9faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 offen glc
10faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
11faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 offen offset:42 glc
12faa2c678SKrzysztof Drewniak;CHECK-DAG: s_waitcnt vmcnt(0)
13faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:4 glc
14faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
15faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}}
16faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc
17faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test1(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) {
18faa2c678SKrzysztof Drewniakmain_body:
19faa2c678SKrzysztof Drewniak  %o1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %data, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
20faa2c678SKrzysztof Drewniak  %o3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o1, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
21faa2c678SKrzysztof Drewniak  %off5 = add i32 %voffset, 42
22faa2c678SKrzysztof Drewniak  %o5 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o3, ptr addrspace(8) %rsrc, i32 %off5, i32 0, i32 0)
23faa2c678SKrzysztof Drewniak  %o6 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o5, ptr addrspace(8) %rsrc, i32 4, i32 8188, i32 0)
24faa2c678SKrzysztof Drewniak  %unused = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o6, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
25faa2c678SKrzysztof Drewniak  %o7 = bitcast i32 %o6 to float
26faa2c678SKrzysztof Drewniak  %out = call float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float %o7, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
27faa2c678SKrzysztof Drewniak  ret float %out
28faa2c678SKrzysztof Drewniak}
29faa2c678SKrzysztof Drewniak
30faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test2:
31faa2c678SKrzysztof Drewniak;CHECK-NOT: s_waitcnt
32faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 offen glc{{$}}
33faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
34faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_sub v0, v1, s[0:3], 0 offen glc slc
35faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
36faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_smin v0, v1, s[0:3], 0 offen glc{{$}}
37faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
38faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_umin v0, v1, s[0:3], 0 offen glc slc
39faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
40faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_smax v0, v1, s[0:3], 0 offen glc{{$}}
41faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
42faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_umax v0, v1, s[0:3], 0 offen glc slc
43faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
44faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_and v0, v1, s[0:3], 0 offen glc{{$}}
45faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
46faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_or v0, v1, s[0:3], 0 offen glc slc
47faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
48faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 offen glc
49faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
50faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_inc v0, v1, s[0:3], 0 offen glc
51faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
52faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_dec v0, v1, s[0:3], 0 offen glc
53faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test2(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) {
54faa2c678SKrzysztof Drewniakmain_body:
55faa2c678SKrzysztof Drewniak  %t1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
56faa2c678SKrzysztof Drewniak  %t2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 %t1, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2)
57faa2c678SKrzysztof Drewniak  %t3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 %t2, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
58faa2c678SKrzysztof Drewniak  %t4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 %t3, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2)
59faa2c678SKrzysztof Drewniak  %t5 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 %t4, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
60faa2c678SKrzysztof Drewniak  %t6 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 %t5, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2)
61faa2c678SKrzysztof Drewniak  %t7 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 %t6, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
62faa2c678SKrzysztof Drewniak  %t8 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 %t7, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2)
63faa2c678SKrzysztof Drewniak  %t9 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 %t8, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
64faa2c678SKrzysztof Drewniak  %t10 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32 %t9, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
65faa2c678SKrzysztof Drewniak  %t11 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32 %t10, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
66faa2c678SKrzysztof Drewniak  %out = bitcast i32 %t11 to float
67faa2c678SKrzysztof Drewniak  ret float %out
68faa2c678SKrzysztof Drewniak}
69faa2c678SKrzysztof Drewniak
70faa2c678SKrzysztof Drewniak; Ideally, we would teach tablegen & friends that cmpswap only modifies the
71faa2c678SKrzysztof Drewniak; first vgpr. Since we don't do that yet, the register allocator will have to
72faa2c678SKrzysztof Drewniak; create copies which we don't bother to track here.
73faa2c678SKrzysztof Drewniak;
74faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test3:
75faa2c678SKrzysztof Drewniak;CHECK-NOT: s_waitcnt
76faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc
77faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
78faa2c678SKrzysztof Drewniak;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc
79faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 offen glc
80faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0)
81faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 offen offset:44 glc
82faa2c678SKrzysztof Drewniak;CHECK-DAG: s_waitcnt vmcnt(0)
83faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc
84faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test3(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
85faa2c678SKrzysztof Drewniakmain_body:
86faa2c678SKrzysztof Drewniak  %o1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
87faa2c678SKrzysztof Drewniak  %o3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o1, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
88faa2c678SKrzysztof Drewniak  %ofs.5 = add i32 %voffset, 44
89faa2c678SKrzysztof Drewniak  %o5 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o3, i32 %cmp, ptr addrspace(8) %rsrc, i32 %ofs.5, i32 0, i32 0)
90faa2c678SKrzysztof Drewniak  %o6 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o5, i32 %cmp, ptr addrspace(8) %rsrc, i32 4, i32 8188, i32 0)
91faa2c678SKrzysztof Drewniak
92faa2c678SKrzysztof Drewniak; Detecting the no-return variant doesn't work right now because of how the
93faa2c678SKrzysztof Drewniak; intrinsic is replaced by an instruction that feeds into an EXTRACT_SUBREG.
94faa2c678SKrzysztof Drewniak; Since there probably isn't a reasonable use-case of cmpswap that discards
95faa2c678SKrzysztof Drewniak; the return value, that seems okay.
96faa2c678SKrzysztof Drewniak;
97faa2c678SKrzysztof Drewniak;  %unused = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o6, i32 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
98faa2c678SKrzysztof Drewniak  %out = bitcast i32 %o6 to float
99faa2c678SKrzysztof Drewniak  ret float %out
100faa2c678SKrzysztof Drewniak}
101faa2c678SKrzysztof Drewniak
102faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test4:
103faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_add v0,
104faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test4() {
105faa2c678SKrzysztof Drewniakmain_body:
106faa2c678SKrzysztof Drewniak  %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 4, i32 0, i32 0)
107faa2c678SKrzysztof Drewniak  %v.float = bitcast i32 %v to float
108faa2c678SKrzysztof Drewniak  ret float %v.float
109faa2c678SKrzysztof Drewniak}
110faa2c678SKrzysztof Drewniak
111101008beSJay Foad;CHECK-LABEL: {{^}}test5:
112101008beSJay Foad;CHECK-NOT: s_waitcnt
113101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc
114101008beSJay Foad;CHECK-DAG: s_waitcnt vmcnt(0)
115101008beSJay Foad;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc
116101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen glc
117101008beSJay Foad;CHECK: s_waitcnt vmcnt(0)
118101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen offset:44 glc
119101008beSJay Foad;CHECK-DAG: s_waitcnt vmcnt(0)
120101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc
121101008beSJay Foaddefine amdgpu_ps float @test5(ptr addrspace(8) inreg %rsrc, i64 %data, i64 %cmp, i32 %vindex, i32 %voffset) {
122101008beSJay Foadmain_body:
123101008beSJay Foad  %o1 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %data, i64 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
124101008beSJay Foad  %o3 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o1, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
125101008beSJay Foad  %ofs.5 = add i32 %voffset, 44
126101008beSJay Foad  %o5 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o3, i64 %cmp, ptr addrspace(8) %rsrc, i32 %ofs.5, i32 0, i32 0)
127101008beSJay Foad  %o6 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o5, i64 %cmp, ptr addrspace(8) %rsrc, i32 4, i32 8188, i32 0)
128101008beSJay Foad  %out = sitofp i64 %o6 to float
129101008beSJay Foad  ret float %out
130101008beSJay Foad}
131101008beSJay Foad
13288871784SKrzysztof Drewniak;CHECK-LABEL: {{^}}test_volatile:
13388871784SKrzysztof Drewniak;CHECK-NOT: s_waitcnt
13488871784SKrzysztof Drewniak;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 offen glc{{$}}
13588871784SKrzysztof Drewniak;CHECK-DAG: s_waitcnt vmcnt(0)
13688871784SKrzysztof Drewniakdefine amdgpu_ps float @test_volatile(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) {
13788871784SKrzysztof Drewniakmain_body:
13888871784SKrzysztof Drewniak  %t1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 -2147483648)
13988871784SKrzysztof Drewniak  %out = bitcast i32 %t1 to float
14088871784SKrzysztof Drewniak  ret float %out
14188871784SKrzysztof Drewniak}
14288871784SKrzysztof Drewniak
14388871784SKrzysztof Drewniak;CHECK-LABEL: {{^}}test_volatile_noret:
14488871784SKrzysztof Drewniak;CHECK-NOT: s_waitcnt
14588871784SKrzysztof Drewniak;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 offen{{$}}
14688871784SKrzysztof Drewniakdefine amdgpu_ps void @test_volatile_noret(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) {
14788871784SKrzysztof Drewniakmain_body:
14888871784SKrzysztof Drewniak  %t1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 -2147483648)
14988871784SKrzysztof Drewniak  ret void
15088871784SKrzysztof Drewniak}
15188871784SKrzysztof Drewniak
152faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) #0
153faa2c678SKrzysztof Drewniakdeclare float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32) #0
154faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #0
155faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32) #0
156faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32) #0
157faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32) #0
158faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32) #0
159faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32) #0
160faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32) #0
161faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32) #0
162faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32) #0
163faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32) #0
164faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32) #0
165faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #0
166101008beSJay Foaddeclare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32) #0
167faa2c678SKrzysztof Drewniak
168faa2c678SKrzysztof Drewniakattributes #0 = { nounwind }
169