xref: /llvm-project/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; XUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI %s
4
5; FIXME: broken on VI because flat instructions need to be emitted
6; instead of addr64 equivalent of the _OFFSET variants.
7
8; Check that moving the pointer out of the resource descriptor to
9; vaddr works for atomics.
10
11declare i32 @llvm.amdgcn.workitem.id.x() #1
12
13define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
14; GCN-LABEL: atomic_max_i32:
15; GCN:       ; %bb.0:
16; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
17; GCN-NEXT:    s_mov_b32 s11, 0xf000
18; GCN-NEXT:    s_mov_b32 s10, 0
19; GCN-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
20; GCN-NEXT:    v_mov_b32_e32 v2, 0
21; GCN-NEXT:    s_waitcnt lgkmcnt(0)
22; GCN-NEXT:    s_mov_b64 s[8:9], s[2:3]
23; GCN-NEXT:    buffer_load_dwordx2 v[1:2], v[1:2], s[8:11], 0 addr64 glc
24; GCN-NEXT:    s_waitcnt vmcnt(0)
25; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
26; GCN-NEXT:    s_and_saveexec_b64 s[2:3], vcc
27; GCN-NEXT:    s_cbranch_execz .LBB0_4
28; GCN-NEXT:  ; %bb.1: ; %atomic
29; GCN-NEXT:    s_mov_b32 s8, s10
30; GCN-NEXT:    s_mov_b32 s9, s10
31; GCN-NEXT:    buffer_load_dword v4, v[1:2], s[8:11], 0 addr64 offset:400
32; GCN-NEXT:    s_load_dword s4, s[4:5], 0xf
33; GCN-NEXT:    s_mov_b64 s[2:3], 0
34; GCN-NEXT:  .LBB0_2: ; %atomicrmw.start
35; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
36; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
37; GCN-NEXT:    v_max_i32_e32 v3, s4, v4
38; GCN-NEXT:    s_waitcnt expcnt(0)
39; GCN-NEXT:    v_mov_b32_e32 v6, v4
40; GCN-NEXT:    v_mov_b32_e32 v5, v3
41; GCN-NEXT:    buffer_atomic_cmpswap v[5:6], v[1:2], s[8:11], 0 addr64 offset:400 glc
42; GCN-NEXT:    s_waitcnt vmcnt(0)
43; GCN-NEXT:    buffer_wbinvl1
44; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v4
45; GCN-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
46; GCN-NEXT:    v_mov_b32_e32 v4, v5
47; GCN-NEXT:    s_andn2_b64 exec, exec, s[2:3]
48; GCN-NEXT:    s_cbranch_execnz .LBB0_2
49; GCN-NEXT:  ; %bb.3: ; %atomicrmw.end
50; GCN-NEXT:    s_or_b64 exec, exec, s[2:3]
51; GCN-NEXT:    s_mov_b32 s3, 0xf000
52; GCN-NEXT:    s_mov_b32 s2, -1
53; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], 0
54; GCN-NEXT:  .LBB0_4: ; %exit
55; GCN-NEXT:    s_endpgm
56  %tid = call i32 @llvm.amdgcn.workitem.id.x()
57  %tid.gep = getelementptr ptr addrspace(1), ptr addrspace(1) %in, i32 %tid
58  %ptr = load volatile ptr addrspace(1), ptr addrspace(1) %tid.gep
59  %xor = xor i32 %tid, 1
60  %cmp = icmp ne i32 %xor, 0
61  br i1 %cmp, label %atomic, label %exit
62
63atomic:
64  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 100
65  %ret = atomicrmw max ptr addrspace(1) %gep, i32 %y seq_cst
66  store i32 %ret, ptr addrspace(1) %out
67  br label %exit
68
69exit:
70  ret void
71}
72
73define amdgpu_kernel void @atomic_max_i32_noret(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
74; GCN-LABEL: atomic_max_i32_noret:
75; GCN:       ; %bb.0:
76; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
77; GCN-NEXT:    s_mov_b32 s3, 0xf000
78; GCN-NEXT:    s_mov_b32 s2, 0
79; GCN-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
80; GCN-NEXT:    v_mov_b32_e32 v2, 0
81; GCN-NEXT:    s_waitcnt lgkmcnt(0)
82; GCN-NEXT:    buffer_load_dwordx2 v[1:2], v[1:2], s[0:3], 0 addr64 glc
83; GCN-NEXT:    s_waitcnt vmcnt(0)
84; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
85; GCN-NEXT:    s_and_saveexec_b64 s[0:1], vcc
86; GCN-NEXT:    s_cbranch_execz .LBB1_3
87; GCN-NEXT:  ; %bb.1: ; %atomic
88; GCN-NEXT:    s_mov_b32 s0, s2
89; GCN-NEXT:    s_mov_b32 s1, s2
90; GCN-NEXT:    buffer_load_dword v4, v[1:2], s[0:3], 0 addr64 offset:400
91; GCN-NEXT:    s_load_dword s6, s[4:5], 0xf
92; GCN-NEXT:    s_mov_b64 s[4:5], 0
93; GCN-NEXT:  .LBB1_2: ; %atomicrmw.start
94; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
95; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
96; GCN-NEXT:    v_max_i32_e32 v3, s6, v4
97; GCN-NEXT:    s_waitcnt expcnt(0)
98; GCN-NEXT:    v_mov_b32_e32 v6, v4
99; GCN-NEXT:    v_mov_b32_e32 v5, v3
100; GCN-NEXT:    buffer_atomic_cmpswap v[5:6], v[1:2], s[0:3], 0 addr64 offset:400 glc
101; GCN-NEXT:    s_waitcnt vmcnt(0)
102; GCN-NEXT:    buffer_wbinvl1
103; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v4
104; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
105; GCN-NEXT:    v_mov_b32_e32 v4, v5
106; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
107; GCN-NEXT:    s_cbranch_execnz .LBB1_2
108; GCN-NEXT:  .LBB1_3: ; %exit
109; GCN-NEXT:    s_endpgm
110  %tid = call i32 @llvm.amdgcn.workitem.id.x()
111  %tid.gep = getelementptr ptr addrspace(1), ptr addrspace(1) %in, i32 %tid
112  %ptr = load volatile ptr addrspace(1), ptr addrspace(1) %tid.gep
113  %xor = xor i32 %tid, 1
114  %cmp = icmp ne i32 %xor, 0
115  br i1 %cmp, label %atomic, label %exit
116
117atomic:
118  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 100
119  %ret = atomicrmw max ptr addrspace(1) %gep, i32 %y seq_cst
120  br label %exit
121
122exit:
123  ret void
124}
125
126attributes #0 = { nounwind }
127attributes #1 = { nounwind readnone }
128