xref: /llvm-project/llvm/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,SICI,SICIVI,PREGFX11,GCN %s
2; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=SICI,CIVI,SICIVI,PREGFX11,GCN %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=CIVI,SICIVI,GFX8PLUS,PREGFX11,GCN %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9PLUS,GFX8PLUS,PREGFX11,GCN %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX9PLUS,GFX8PLUS,GCN %s
6
7; GCN-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
8; GFX9PLUS-NOT: m0
9; SICIVI-DAG: s_mov_b32 m0
10
11; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
12; SICI-DAG: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c
13; GFX8PLUS-DAG: s_load_{{dword|b32}} [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x4c
14; GFX8PLUS-DAG: s_load_{{dword|b32}} [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x70
15; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
16; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
17; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
18; PREGFX11: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
19; GFX11: ds_cmpstore_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VSWAP]], [[VCMP]] offset:16
20; GCN: s_endpgm
21define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_offset(ptr addrspace(1) %out, [8 x i32], ptr addrspace(3) %ptr, [8 x i32], i32 %swap) nounwind {
22  %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
23  %pair = cmpxchg ptr addrspace(3) %gep, i32 7, i32 %swap seq_cst monotonic
24  %result = extractvalue { i32, i1 } %pair, 0
25  store i32 %result, ptr addrspace(1) %out, align 4
26  ret void
27}
28
29; GCN-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
30; GFX9PLUS-NOT: m0
31; SICIVI-DAG: s_mov_b32 m0
32
33; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
34; SICI-DAG: s_load_dwordx2 s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
35; GFX8PLUS-DAG: s_load_{{dword|b32}} [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
36; GFX8PLUS-DAG: s_load_{{dwordx2|b64}} s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
37; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
38; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
39; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
40; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
41; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
42; PREGFX11: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVCMP]]:[[HIVCMP]]], v[[[LOSWAPV]]:[[HISWAPV]]] offset:32
43; GFX11: ds_cmpstore_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOSWAPV]]:[[HISWAPV]]], v[[[LOVCMP]]:[[HIVCMP]]] offset:32
44; GCN: [[RESULT]]
45; GCN: s_endpgm
46define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr, i64 %swap) nounwind {
47  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
48  %pair = cmpxchg ptr addrspace(3) %gep, i64 7, i64 %swap seq_cst monotonic
49  %result = extractvalue { i64, i1 } %pair, 0
50  store i64 %result, ptr addrspace(1) %out, align 8
51  ret void
52}
53
54; GCN-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
55; GFX9PLUS-NOT: m0
56; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
57; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
58; GFX9PLUS: ds_{{cmpst|cmpstore}}_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
59; GCN: s_endpgm
60define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
61  %sub = sub i32 %a, %b
62  %add = add i32 %sub, 4
63  %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 %add
64  %pair = cmpxchg ptr addrspace(3) %gep, i32 7, i32 %swap seq_cst monotonic
65  %result = extractvalue { i32, i1 } %pair, 0
66  store i32 %result, ptr addrspace(1) %out, align 4
67  ret void
68}
69
70; GCN-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
71; GFX9PLUS-NOT: m0
72; SICIVI-DAG: s_mov_b32 m0
73
74
75; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
76; SICI-DAG: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x12
77; GFX8PLUS-DAG: s_load_{{dword|b32}} [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
78; GFX8PLUS-DAG: s_load_{{dword|b32}} [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x48
79; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
80; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
81; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
82; PREGFX11: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
83; GFX11: ds_cmpstore_b32 [[VPTR]], [[VSWAP]], [[VCMP]] offset:16
84; GCN: s_endpgm
85define amdgpu_kernel void @lds_atomic_cmpxchg_noret_i32_offset(ptr addrspace(3) %ptr, [8 x i32], i32 %swap) nounwind {
86  %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
87  %pair = cmpxchg ptr addrspace(3) %gep, i32 7, i32 %swap seq_cst monotonic
88  %result = extractvalue { i32, i1 } %pair, 0
89  ret void
90}
91
92; GCN-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
93; GFX9PLUS-NOT: m0
94; SICIVI-DAG: s_mov_b32 m0
95
96; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
97; SICI-DAG: s_load_dwordx2 s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
98; GFX8PLUS-DAG: s_load_{{dword|b32}} [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
99; GFX8PLUS-DAG: s_load_{{dwordx2|b64}} s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
100; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
101; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
102; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
103; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
104; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
105; PREGFX11: ds_cmpst_b64 [[VPTR]], v[[[LOVCMP]]:[[HIVCMP]]], v[[[LOSWAPV]]:[[HISWAPV]]] offset:32
106; GFX11: ds_cmpstore_b64 [[VPTR]], v[[[LOSWAPV]]:[[HISWAPV]]], v[[[LOVCMP]]:[[HIVCMP]]] offset:32
107; GCN: s_endpgm
108define amdgpu_kernel void @lds_atomic_cmpxchg_noret_i64_offset(ptr addrspace(3) %ptr, i64 %swap) nounwind {
109  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
110  %pair = cmpxchg ptr addrspace(3) %gep, i64 7, i64 %swap seq_cst monotonic
111  %result = extractvalue { i64, i1 } %pair, 0
112  ret void
113}
114