xref: /llvm-project/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll (revision 8ee5e19c879ee2d467aa0f1eb8f1d8ed34321496)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
6
7declare amdgpu_gfx void @use(...)
8
9define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
10; GISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
11; GISEL-GFX11:       ; %bb.0:
12; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GISEL-GFX11-NEXT:    s_endpgm
14;
15; GISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
16; GISEL-GFX10:       ; %bb.0:
17; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GISEL-GFX10-NEXT:    s_endpgm
19;
20; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
21; DAGISEL-GFX11:       ; %bb.0:
22; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; DAGISEL-GFX11-NEXT:    s_endpgm
24;
25; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
26; DAGISEL-GFX10:       ; %bb.0:
27; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; DAGISEL-GFX10-NEXT:    s_endpgm
29  ret void
30}
31
32define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, <4 x i32> %vgpr) {
33; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
34; GISEL-GFX11:       ; %bb.0:
35; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GISEL-GFX11-NEXT:    v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9
37; GISEL-GFX11-NEXT:    v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11
38; GISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
39; GISEL-GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
40; GISEL-GFX11-NEXT:    s_mov_b32 s4, use@abs32@lo
41; GISEL-GFX11-NEXT:    s_mov_b32 s5, use@abs32@hi
42; GISEL-GFX11-NEXT:    s_mov_b32 s32, 0
43; GISEL-GFX11-NEXT:    s_swappc_b64 s[30:31], s[4:5]
44; GISEL-GFX11-NEXT:    s_endpgm
45;
46; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
47; GISEL-GFX10:       ; %bb.0:
48; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GISEL-GFX10-NEXT:    v_mov_b32_e32 v4, v8
50; GISEL-GFX10-NEXT:    v_mov_b32_e32 v5, v9
51; GISEL-GFX10-NEXT:    v_mov_b32_e32 v6, v10
52; GISEL-GFX10-NEXT:    v_mov_b32_e32 v7, v11
53; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
54; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, s1
55; GISEL-GFX10-NEXT:    v_mov_b32_e32 v2, s2
56; GISEL-GFX10-NEXT:    v_mov_b32_e32 v3, s3
57; GISEL-GFX10-NEXT:    s_mov_b64 s[0:1], s[48:49]
58; GISEL-GFX10-NEXT:    s_mov_b32 s4, use@abs32@lo
59; GISEL-GFX10-NEXT:    s_mov_b32 s5, use@abs32@hi
60; GISEL-GFX10-NEXT:    s_mov_b64 s[2:3], s[50:51]
61; GISEL-GFX10-NEXT:    s_mov_b32 s32, 0
62; GISEL-GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
63; GISEL-GFX10-NEXT:    s_endpgm
64;
65; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
66; DAGISEL-GFX11:       ; %bb.0:
67; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10
69; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8
70; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
71; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
72; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, use@abs32@hi
73; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, use@abs32@lo
74; DAGISEL-GFX11-NEXT:    s_mov_b32 s32, 0
75; DAGISEL-GFX11-NEXT:    s_swappc_b64 s[30:31], s[4:5]
76; DAGISEL-GFX11-NEXT:    s_endpgm
77;
78; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
79; DAGISEL-GFX10:       ; %bb.0:
80; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v7, v11
82; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v6, v10
83; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v5, v9
84; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v4, v8
85; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
86; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, s1
87; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v2, s2
88; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v3, s3
89; DAGISEL-GFX10-NEXT:    s_mov_b64 s[0:1], s[48:49]
90; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, use@abs32@hi
91; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, use@abs32@lo
92; DAGISEL-GFX10-NEXT:    s_mov_b64 s[2:3], s[50:51]
93; DAGISEL-GFX10-NEXT:    s_mov_b32 s32, 0
94; DAGISEL-GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
95; DAGISEL-GFX10-NEXT:    s_endpgm
96  call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr)
97  ret void
98}
99
100define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 x i32> %vgprs) {
101; GISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
102; GISEL-GFX11:       ; %bb.0:
103; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GISEL-GFX11-NEXT:    s_mov_b32 s32, 0
105; GISEL-GFX11-NEXT:    v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9
106; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 4
107; GISEL-GFX11-NEXT:    scratch_store_b32 off, v16, s32
108; GISEL-GFX11-NEXT:    scratch_store_b32 off, v17, s24
109; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 8
110; GISEL-GFX11-NEXT:    s_add_u32 s25, s32, 12
111; GISEL-GFX11-NEXT:    scratch_store_b32 off, v18, s24
112; GISEL-GFX11-NEXT:    scratch_store_b32 off, v19, s25
113; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 16
114; GISEL-GFX11-NEXT:    s_add_u32 s25, s32, 20
115; GISEL-GFX11-NEXT:    scratch_store_b32 off, v20, s24
116; GISEL-GFX11-NEXT:    scratch_store_b32 off, v21, s25
117; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 24
118; GISEL-GFX11-NEXT:    s_add_u32 s25, s32, 28
119; GISEL-GFX11-NEXT:    scratch_store_b32 off, v22, s24
120; GISEL-GFX11-NEXT:    scratch_store_b32 off, v23, s25
121; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 32
122; GISEL-GFX11-NEXT:    s_add_u32 s25, s32, 36
123; GISEL-GFX11-NEXT:    scratch_store_b32 off, v24, s24
124; GISEL-GFX11-NEXT:    scratch_store_b32 off, v25, s25
125; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 40
126; GISEL-GFX11-NEXT:    v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11
127; GISEL-GFX11-NEXT:    v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13
128; GISEL-GFX11-NEXT:    v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15
129; GISEL-GFX11-NEXT:    s_add_u32 s25, s32, 44
130; GISEL-GFX11-NEXT:    scratch_store_b32 off, v26, s24
131; GISEL-GFX11-NEXT:    scratch_store_b32 off, v27, s25
132; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 48
133; GISEL-GFX11-NEXT:    s_add_u32 s25, s32, 52
134; GISEL-GFX11-NEXT:    scratch_store_b32 off, v28, s24
135; GISEL-GFX11-NEXT:    scratch_store_b32 off, v29, s25
136; GISEL-GFX11-NEXT:    s_add_u32 s24, s32, 56
137; GISEL-GFX11-NEXT:    s_add_u32 s25, s32, 60
138; GISEL-GFX11-NEXT:    scratch_store_b32 off, v30, s24
139; GISEL-GFX11-NEXT:    scratch_store_b32 off, v31, s25
140; GISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
141; GISEL-GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
142; GISEL-GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
143; GISEL-GFX11-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
144; GISEL-GFX11-NEXT:    v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
145; GISEL-GFX11-NEXT:    v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
146; GISEL-GFX11-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
147; GISEL-GFX11-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
148; GISEL-GFX11-NEXT:    v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
149; GISEL-GFX11-NEXT:    v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
150; GISEL-GFX11-NEXT:    v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
151; GISEL-GFX11-NEXT:    v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
152; GISEL-GFX11-NEXT:    v_dual_mov_b32 v24, v32 :: v_dual_mov_b32 v25, v33
153; GISEL-GFX11-NEXT:    v_dual_mov_b32 v26, v34 :: v_dual_mov_b32 v27, v35
154; GISEL-GFX11-NEXT:    v_dual_mov_b32 v28, v36 :: v_dual_mov_b32 v29, v37
155; GISEL-GFX11-NEXT:    v_dual_mov_b32 v30, v38 :: v_dual_mov_b32 v31, v39
156; GISEL-GFX11-NEXT:    s_mov_b32 s24, use@abs32@lo
157; GISEL-GFX11-NEXT:    s_mov_b32 s25, use@abs32@hi
158; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
159; GISEL-GFX11-NEXT:    s_swappc_b64 s[30:31], s[24:25]
160; GISEL-GFX11-NEXT:    s_endpgm
161;
162; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
163; GISEL-GFX10:       ; %bb.0:
164; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GISEL-GFX10-NEXT:    v_mov_b32_e32 v32, v8
166; GISEL-GFX10-NEXT:    v_mov_b32_e32 v33, v9
167; GISEL-GFX10-NEXT:    v_mov_b32_e32 v34, v10
168; GISEL-GFX10-NEXT:    v_mov_b32_e32 v35, v11
169; GISEL-GFX10-NEXT:    v_mov_b32_e32 v36, v12
170; GISEL-GFX10-NEXT:    v_mov_b32_e32 v37, v13
171; GISEL-GFX10-NEXT:    v_mov_b32_e32 v38, v14
172; GISEL-GFX10-NEXT:    v_mov_b32_e32 v39, v15
173; GISEL-GFX10-NEXT:    s_mov_b32 s32, 0
174; GISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], s32
175; GISEL-GFX10-NEXT:    buffer_store_dword v17, off, s[48:51], s32 offset:4
176; GISEL-GFX10-NEXT:    buffer_store_dword v18, off, s[48:51], s32 offset:8
177; GISEL-GFX10-NEXT:    buffer_store_dword v19, off, s[48:51], s32 offset:12
178; GISEL-GFX10-NEXT:    buffer_store_dword v20, off, s[48:51], s32 offset:16
179; GISEL-GFX10-NEXT:    buffer_store_dword v21, off, s[48:51], s32 offset:20
180; GISEL-GFX10-NEXT:    buffer_store_dword v22, off, s[48:51], s32 offset:24
181; GISEL-GFX10-NEXT:    buffer_store_dword v23, off, s[48:51], s32 offset:28
182; GISEL-GFX10-NEXT:    buffer_store_dword v24, off, s[48:51], s32 offset:32
183; GISEL-GFX10-NEXT:    buffer_store_dword v25, off, s[48:51], s32 offset:36
184; GISEL-GFX10-NEXT:    buffer_store_dword v26, off, s[48:51], s32 offset:40
185; GISEL-GFX10-NEXT:    buffer_store_dword v27, off, s[48:51], s32 offset:44
186; GISEL-GFX10-NEXT:    buffer_store_dword v28, off, s[48:51], s32 offset:48
187; GISEL-GFX10-NEXT:    buffer_store_dword v29, off, s[48:51], s32 offset:52
188; GISEL-GFX10-NEXT:    buffer_store_dword v30, off, s[48:51], s32 offset:56
189; GISEL-GFX10-NEXT:    buffer_store_dword v31, off, s[48:51], s32 offset:60
190; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
191; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, s1
192; GISEL-GFX10-NEXT:    v_mov_b32_e32 v2, s2
193; GISEL-GFX10-NEXT:    v_mov_b32_e32 v3, s3
194; GISEL-GFX10-NEXT:    v_mov_b32_e32 v4, s4
195; GISEL-GFX10-NEXT:    v_mov_b32_e32 v5, s5
196; GISEL-GFX10-NEXT:    v_mov_b32_e32 v6, s6
197; GISEL-GFX10-NEXT:    v_mov_b32_e32 v7, s7
198; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, s8
199; GISEL-GFX10-NEXT:    v_mov_b32_e32 v9, s9
200; GISEL-GFX10-NEXT:    v_mov_b32_e32 v10, s10
201; GISEL-GFX10-NEXT:    v_mov_b32_e32 v11, s11
202; GISEL-GFX10-NEXT:    v_mov_b32_e32 v12, s12
203; GISEL-GFX10-NEXT:    v_mov_b32_e32 v13, s13
204; GISEL-GFX10-NEXT:    v_mov_b32_e32 v14, s14
205; GISEL-GFX10-NEXT:    v_mov_b32_e32 v15, s15
206; GISEL-GFX10-NEXT:    v_mov_b32_e32 v16, s16
207; GISEL-GFX10-NEXT:    v_mov_b32_e32 v17, s17
208; GISEL-GFX10-NEXT:    v_mov_b32_e32 v18, s18
209; GISEL-GFX10-NEXT:    v_mov_b32_e32 v19, s19
210; GISEL-GFX10-NEXT:    v_mov_b32_e32 v20, s20
211; GISEL-GFX10-NEXT:    v_mov_b32_e32 v21, s21
212; GISEL-GFX10-NEXT:    v_mov_b32_e32 v22, s22
213; GISEL-GFX10-NEXT:    v_mov_b32_e32 v23, s23
214; GISEL-GFX10-NEXT:    v_mov_b32_e32 v24, v32
215; GISEL-GFX10-NEXT:    v_mov_b32_e32 v25, v33
216; GISEL-GFX10-NEXT:    v_mov_b32_e32 v26, v34
217; GISEL-GFX10-NEXT:    v_mov_b32_e32 v27, v35
218; GISEL-GFX10-NEXT:    v_mov_b32_e32 v28, v36
219; GISEL-GFX10-NEXT:    v_mov_b32_e32 v29, v37
220; GISEL-GFX10-NEXT:    v_mov_b32_e32 v30, v38
221; GISEL-GFX10-NEXT:    v_mov_b32_e32 v31, v39
222; GISEL-GFX10-NEXT:    s_mov_b64 s[0:1], s[48:49]
223; GISEL-GFX10-NEXT:    s_mov_b32 s24, use@abs32@lo
224; GISEL-GFX10-NEXT:    s_mov_b32 s25, use@abs32@hi
225; GISEL-GFX10-NEXT:    s_mov_b64 s[2:3], s[50:51]
226; GISEL-GFX10-NEXT:    s_swappc_b64 s[30:31], s[24:25]
227; GISEL-GFX10-NEXT:    s_endpgm
228;
229; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
230; DAGISEL-GFX11:       ; %bb.0:
231; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; DAGISEL-GFX11-NEXT:    s_mov_b32 s32, 0
233; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14
234; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 60
235; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v16, s32
236; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v31, s24
237; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 56
238; DAGISEL-GFX11-NEXT:    s_add_i32 s25, s32, 52
239; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v30, s24
240; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v29, s25
241; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 48
242; DAGISEL-GFX11-NEXT:    s_add_i32 s25, s32, 44
243; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v28, s24
244; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v27, s25
245; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 40
246; DAGISEL-GFX11-NEXT:    s_add_i32 s25, s32, 36
247; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v26, s24
248; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v25, s25
249; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 32
250; DAGISEL-GFX11-NEXT:    s_add_i32 s25, s32, 28
251; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v24, s24
252; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v23, s25
253; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 24
254; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12
255; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10
256; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8
257; DAGISEL-GFX11-NEXT:    s_add_i32 s25, s32, 20
258; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v22, s24
259; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v21, s25
260; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 16
261; DAGISEL-GFX11-NEXT:    s_add_i32 s25, s32, 12
262; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v20, s24
263; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v19, s25
264; DAGISEL-GFX11-NEXT:    s_add_i32 s24, s32, 8
265; DAGISEL-GFX11-NEXT:    s_add_i32 s25, s32, 4
266; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v18, s24
267; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v17, s25
268; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
269; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
270; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
271; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
272; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
273; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
274; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
275; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
276; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
277; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
278; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
279; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
280; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v24, v39 :: v_dual_mov_b32 v25, v38
281; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v26, v37 :: v_dual_mov_b32 v27, v36
282; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v28, v35 :: v_dual_mov_b32 v29, v34
283; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v30, v33 :: v_dual_mov_b32 v31, v32
284; DAGISEL-GFX11-NEXT:    s_mov_b32 s25, use@abs32@hi
285; DAGISEL-GFX11-NEXT:    s_mov_b32 s24, use@abs32@lo
286; DAGISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
287; DAGISEL-GFX11-NEXT:    s_swappc_b64 s[30:31], s[24:25]
288; DAGISEL-GFX11-NEXT:    s_endpgm
289;
290; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
291; DAGISEL-GFX10:       ; %bb.0:
292; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v32, v15
294; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v33, v14
295; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v34, v13
296; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v35, v12
297; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v36, v11
298; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v37, v10
299; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v38, v9
300; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v39, v8
301; DAGISEL-GFX10-NEXT:    s_mov_b32 s32, 0
302; DAGISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], s32
303; DAGISEL-GFX10-NEXT:    buffer_store_dword v17, off, s[48:51], s32 offset:4
304; DAGISEL-GFX10-NEXT:    buffer_store_dword v18, off, s[48:51], s32 offset:8
305; DAGISEL-GFX10-NEXT:    buffer_store_dword v19, off, s[48:51], s32 offset:12
306; DAGISEL-GFX10-NEXT:    buffer_store_dword v20, off, s[48:51], s32 offset:16
307; DAGISEL-GFX10-NEXT:    buffer_store_dword v21, off, s[48:51], s32 offset:20
308; DAGISEL-GFX10-NEXT:    buffer_store_dword v22, off, s[48:51], s32 offset:24
309; DAGISEL-GFX10-NEXT:    buffer_store_dword v23, off, s[48:51], s32 offset:28
310; DAGISEL-GFX10-NEXT:    buffer_store_dword v24, off, s[48:51], s32 offset:32
311; DAGISEL-GFX10-NEXT:    buffer_store_dword v25, off, s[48:51], s32 offset:36
312; DAGISEL-GFX10-NEXT:    buffer_store_dword v26, off, s[48:51], s32 offset:40
313; DAGISEL-GFX10-NEXT:    buffer_store_dword v27, off, s[48:51], s32 offset:44
314; DAGISEL-GFX10-NEXT:    buffer_store_dword v28, off, s[48:51], s32 offset:48
315; DAGISEL-GFX10-NEXT:    buffer_store_dword v29, off, s[48:51], s32 offset:52
316; DAGISEL-GFX10-NEXT:    buffer_store_dword v30, off, s[48:51], s32 offset:56
317; DAGISEL-GFX10-NEXT:    buffer_store_dword v31, off, s[48:51], s32 offset:60
318; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
319; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, s1
320; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v2, s2
321; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v3, s3
322; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v4, s4
323; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v5, s5
324; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v6, s6
325; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v7, s7
326; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, s8
327; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v9, s9
328; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v10, s10
329; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v11, s11
330; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v12, s12
331; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v13, s13
332; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v14, s14
333; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v15, s15
334; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v16, s16
335; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v17, s17
336; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v18, s18
337; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v19, s19
338; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v20, s20
339; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v21, s21
340; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v22, s22
341; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v23, s23
342; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v24, v39
343; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v25, v38
344; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v26, v37
345; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v27, v36
346; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v28, v35
347; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v29, v34
348; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v30, v33
349; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v31, v32
350; DAGISEL-GFX10-NEXT:    s_mov_b64 s[0:1], s[48:49]
351; DAGISEL-GFX10-NEXT:    s_mov_b32 s25, use@abs32@hi
352; DAGISEL-GFX10-NEXT:    s_mov_b32 s24, use@abs32@lo
353; DAGISEL-GFX10-NEXT:    s_mov_b64 s[2:3], s[50:51]
354; DAGISEL-GFX10-NEXT:    s_swappc_b64 s[30:31], s[24:25]
355; DAGISEL-GFX10-NEXT:    s_endpgm
356  call amdgpu_gfx void @use(<24 x i32> %sgprs, <24 x i32> %vgprs)
357  ret void
358}
359
360define amdgpu_cs_chain void @alloca_and_call() {
361; GISEL-GFX11-LABEL: alloca_and_call:
362; GISEL-GFX11:       ; %bb.0: ; %.entry
363; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 42
365; GISEL-GFX11-NEXT:    s_mov_b32 s0, use@abs32@lo
366; GISEL-GFX11-NEXT:    s_mov_b32 s1, use@abs32@hi
367; GISEL-GFX11-NEXT:    s_mov_b32 s32, 16
368; GISEL-GFX11-NEXT:    scratch_store_b32 off, v0, off
369; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 0
370; GISEL-GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
371; GISEL-GFX11-NEXT:    s_endpgm
372;
373; GISEL-GFX10-LABEL: alloca_and_call:
374; GISEL-GFX10:       ; %bb.0: ; %.entry
375; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
376; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 42
377; GISEL-GFX10-NEXT:    s_mov_b64 s[0:1], s[48:49]
378; GISEL-GFX10-NEXT:    s_mov_b32 s4, use@abs32@lo
379; GISEL-GFX10-NEXT:    s_mov_b32 s5, use@abs32@hi
380; GISEL-GFX10-NEXT:    s_mov_b64 s[2:3], s[50:51]
381; GISEL-GFX10-NEXT:    buffer_store_dword v0, off, s[48:51], 0
382; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 0
383; GISEL-GFX10-NEXT:    s_movk_i32 s32, 0x200
384; GISEL-GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
385; GISEL-GFX10-NEXT:    s_endpgm
386;
387; DAGISEL-GFX11-LABEL: alloca_and_call:
388; DAGISEL-GFX11:       ; %bb.0: ; %.entry
389; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 42
391; DAGISEL-GFX11-NEXT:    s_mov_b32 s1, use@abs32@hi
392; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, use@abs32@lo
393; DAGISEL-GFX11-NEXT:    s_mov_b32 s32, 16
394; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v0, off
395; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 0
396; DAGISEL-GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
397; DAGISEL-GFX11-NEXT:    s_endpgm
398;
399; DAGISEL-GFX10-LABEL: alloca_and_call:
400; DAGISEL-GFX10:       ; %bb.0: ; %.entry
401; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 42
403; DAGISEL-GFX10-NEXT:    s_mov_b64 s[0:1], s[48:49]
404; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, use@abs32@hi
405; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, use@abs32@lo
406; DAGISEL-GFX10-NEXT:    s_mov_b64 s[2:3], s[50:51]
407; DAGISEL-GFX10-NEXT:    buffer_store_dword v0, off, s[48:51], 0
408; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 0
409; DAGISEL-GFX10-NEXT:    s_movk_i32 s32, 0x200
410; DAGISEL-GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
411; DAGISEL-GFX10-NEXT:    s_endpgm
412.entry:
413  %v = alloca [3 x i32], addrspace(5)
414  store i32 42, ptr addrspace(5) %v
415  call amdgpu_gfx void @use(ptr addrspace(5) %v)
416  ret void
417}
418
419define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
420; GISEL-GFX11-LABEL: cs_to_chain:
421; GISEL-GFX11:       ; %bb.0:
422; GISEL-GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
423; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
424; GISEL-GFX11-NEXT:    ;;#ASMSTART
425; GISEL-GFX11-NEXT:    s_nop
426; GISEL-GFX11-NEXT:    ;;#ASMEND
427; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
428; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
429; GISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
430; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
431; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
432; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
433; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
434;
435; GISEL-GFX10-LABEL: cs_to_chain:
436; GISEL-GFX10:       ; %bb.0:
437; GISEL-GFX10-NEXT:    s_getpc_b64 s[100:101]
438; GISEL-GFX10-NEXT:    s_mov_b32 s100, s0
439; GISEL-GFX10-NEXT:    v_mov_b32_e32 v3, v0
440; GISEL-GFX10-NEXT:    s_load_dwordx4 s[100:103], s[100:101], 0x10
441; GISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v1
442; GISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v2
443; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
444; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
445; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
446; GISEL-GFX10-NEXT:    s_bitset0_b32 s103, 21
447; GISEL-GFX10-NEXT:    s_add_u32 s100, s100, s3
448; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
449; GISEL-GFX10-NEXT:    ;;#ASMSTART
450; GISEL-GFX10-NEXT:    s_nop
451; GISEL-GFX10-NEXT:    ;;#ASMEND
452; GISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
453; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
454; GISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
455; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
456; GISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
457; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
458; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
459;
460; DAGISEL-GFX11-LABEL: cs_to_chain:
461; DAGISEL-GFX11:       ; %bb.0:
462; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
463; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
464; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
465; DAGISEL-GFX11-NEXT:    s_nop
466; DAGISEL-GFX11-NEXT:    ;;#ASMEND
467; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
468; DAGISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
469; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
470; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
471; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
472; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
473; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
474;
475; DAGISEL-GFX10-LABEL: cs_to_chain:
476; DAGISEL-GFX10:       ; %bb.0:
477; DAGISEL-GFX10-NEXT:    s_getpc_b64 s[100:101]
478; DAGISEL-GFX10-NEXT:    s_mov_b32 s100, s0
479; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v3, v0
480; DAGISEL-GFX10-NEXT:    s_load_dwordx4 s[100:103], s[100:101], 0x10
481; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v1
482; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v2
483; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
484; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
485; DAGISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
486; DAGISEL-GFX10-NEXT:    s_bitset0_b32 s103, 21
487; DAGISEL-GFX10-NEXT:    s_add_u32 s100, s100, s3
488; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
489; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
490; DAGISEL-GFX10-NEXT:    s_nop
491; DAGISEL-GFX10-NEXT:    ;;#ASMEND
492; DAGISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
493; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
494; DAGISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
495; DAGISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
496; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
497; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
498; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
499  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
500  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
501  unreachable
502}
503
504; Chain call with SGPR arguments that we cannot prove are uniform.
505define amdgpu_cs void @cs_to_chain_nonuniform(<3 x i32> %a, <3 x i32> %b) {
506; GISEL-GFX11-LABEL: cs_to_chain_nonuniform:
507; GISEL-GFX11:       ; %bb.0:
508; GISEL-GFX11-NEXT:    v_readfirstlane_b32 s0, v0
509; GISEL-GFX11-NEXT:    v_readfirstlane_b32 s1, v1
510; GISEL-GFX11-NEXT:    v_readfirstlane_b32 s2, v2
511; GISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
512; GISEL-GFX11-NEXT:    v_mov_b32_e32 v10, v5
513; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
514; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
515; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
516; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
517;
518; GISEL-GFX10-LABEL: cs_to_chain_nonuniform:
519; GISEL-GFX10:       ; %bb.0:
520; GISEL-GFX10-NEXT:    s_getpc_b64 s[100:101]
521; GISEL-GFX10-NEXT:    s_mov_b32 s100, s0
522; GISEL-GFX10-NEXT:    v_readfirstlane_b32 s1, v1
523; GISEL-GFX10-NEXT:    s_load_dwordx4 s[100:103], s[100:101], 0x10
524; GISEL-GFX10-NEXT:    v_readfirstlane_b32 s2, v2
525; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
526; GISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v4
527; GISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v5
528; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
529; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
530; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
531; GISEL-GFX10-NEXT:    s_bitset0_b32 s103, 21
532; GISEL-GFX10-NEXT:    s_add_u32 s100, s100, s0
533; GISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
534; GISEL-GFX10-NEXT:    v_readfirstlane_b32 s0, v0
535; GISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
536; GISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
537; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
538; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
539;
540; DAGISEL-GFX11-LABEL: cs_to_chain_nonuniform:
541; DAGISEL-GFX11:       ; %bb.0:
542; DAGISEL-GFX11-NEXT:    v_readfirstlane_b32 s0, v0
543; DAGISEL-GFX11-NEXT:    v_readfirstlane_b32 s1, v1
544; DAGISEL-GFX11-NEXT:    v_readfirstlane_b32 s2, v2
545; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
546; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v10, v5
547; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
548; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
549; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
550; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
551;
552; DAGISEL-GFX10-LABEL: cs_to_chain_nonuniform:
553; DAGISEL-GFX10:       ; %bb.0:
554; DAGISEL-GFX10-NEXT:    s_getpc_b64 s[100:101]
555; DAGISEL-GFX10-NEXT:    s_mov_b32 s100, s0
556; DAGISEL-GFX10-NEXT:    v_readfirstlane_b32 s1, v1
557; DAGISEL-GFX10-NEXT:    s_load_dwordx4 s[100:103], s[100:101], 0x10
558; DAGISEL-GFX10-NEXT:    v_readfirstlane_b32 s2, v2
559; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
560; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v4
561; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v5
562; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
563; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
564; DAGISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
565; DAGISEL-GFX10-NEXT:    s_bitset0_b32 s103, 21
566; DAGISEL-GFX10-NEXT:    s_add_u32 s100, s100, s0
567; DAGISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
568; DAGISEL-GFX10-NEXT:    v_readfirstlane_b32 s0, v0
569; DAGISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
570; DAGISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
571; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
572; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
573  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
574  unreachable
575}
576
577define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
578; GISEL-GFX11-LABEL: chain_to_chain:
579; GISEL-GFX11:       ; %bb.0:
580; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
582; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
583; GISEL-GFX11-NEXT:    ;;#ASMSTART
584; GISEL-GFX11-NEXT:    s_nop
585; GISEL-GFX11-NEXT:    ;;#ASMEND
586; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
587; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
588; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
589; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
590; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
591; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
592;
593; GISEL-GFX10-LABEL: chain_to_chain:
594; GISEL-GFX10:       ; %bb.0:
595; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
597; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
598; GISEL-GFX10-NEXT:    ;;#ASMSTART
599; GISEL-GFX10-NEXT:    s_nop
600; GISEL-GFX10-NEXT:    ;;#ASMEND
601; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
602; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
603; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
604; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
605; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
606; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
607;
608; DAGISEL-GFX11-LABEL: chain_to_chain:
609; DAGISEL-GFX11:       ; %bb.0:
610; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
612; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
613; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
614; DAGISEL-GFX11-NEXT:    s_nop
615; DAGISEL-GFX11-NEXT:    ;;#ASMEND
616; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
617; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
618; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
619; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
620; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
621; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
622;
623; DAGISEL-GFX10-LABEL: chain_to_chain:
624; DAGISEL-GFX10:       ; %bb.0:
625; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
627; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
628; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
629; DAGISEL-GFX10-NEXT:    s_nop
630; DAGISEL-GFX10-NEXT:    ;;#ASMEND
631; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
632; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
633; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
634; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
635; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
636; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
637  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
638  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
639  unreachable
640}
641
642define amdgpu_cs_chain void @chain_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) {
643; GISEL-GFX11-LABEL: chain_to_chain_wwm:
644; GISEL-GFX11:       ; %bb.0:
645; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
647; GISEL-GFX11-NEXT:    s_or_saveexec_b32 s0, -1
648; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
649; GISEL-GFX11-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s0
650; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, s0
651; GISEL-GFX11-NEXT:    v_mov_b32_e32 v2, v1
652; GISEL-GFX11-NEXT:    ;;#ASMSTART
653; GISEL-GFX11-NEXT:    s_nop
654; GISEL-GFX11-NEXT:    ;;#ASMEND
655; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
656; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
657; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
658; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v2
659; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
660; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
661;
662; GISEL-GFX10-LABEL: chain_to_chain_wwm:
663; GISEL-GFX10:       ; %bb.0:
664; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
666; GISEL-GFX10-NEXT:    s_or_saveexec_b32 s0, -1
667; GISEL-GFX10-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s0
668; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, s0
669; GISEL-GFX10-NEXT:    v_mov_b32_e32 v2, v1
670; GISEL-GFX10-NEXT:    ;;#ASMSTART
671; GISEL-GFX10-NEXT:    s_nop
672; GISEL-GFX10-NEXT:    ;;#ASMEND
673; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
674; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
675; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
676; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v2
677; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
678; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
679;
680; DAGISEL-GFX11-LABEL: chain_to_chain_wwm:
681; DAGISEL-GFX11:       ; %bb.0:
682; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683; DAGISEL-GFX11-NEXT:    s_or_saveexec_b32 s4, -1
684; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
685; DAGISEL-GFX11-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s4
686; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, s4
687; DAGISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
688; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v2, v1
689; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
690; DAGISEL-GFX11-NEXT:    s_nop
691; DAGISEL-GFX11-NEXT:    ;;#ASMEND
692; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
693; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
694; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
695; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v2
696; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
697; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
698;
699; DAGISEL-GFX10-LABEL: chain_to_chain_wwm:
700; DAGISEL-GFX10:       ; %bb.0:
701; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
702; DAGISEL-GFX10-NEXT:    s_or_saveexec_b32 s4, -1
703; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
704; DAGISEL-GFX10-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s4
705; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, s4
706; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v2, v1
707; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
708; DAGISEL-GFX10-NEXT:    s_nop
709; DAGISEL-GFX10-NEXT:    ;;#ASMEND
710; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
711; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
712; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
713; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v2
714; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
715; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
716  %i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4)
717  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
718  %w = call i32 @llvm.amdgcn.wwm(i32 %i)
719  %c = insertelement <3 x i32> %b, i32 %w, i32 0
720  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0)
721  unreachable
722}
723
724define amdgpu_cs_chain void @chain_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) {
725; GISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
726; GISEL-GFX11:       ; %bb.0:
727; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728; GISEL-GFX11-NEXT:    v_mov_b32_e32 v11, v8
729; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
730; GISEL-GFX11-NEXT:    ;;#ASMSTART
731; GISEL-GFX11-NEXT:    s_nop
732; GISEL-GFX11-NEXT:    ;;#ASMEND
733; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
734; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
735; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v11
736; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
737; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
738; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
739;
740; GISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
741; GISEL-GFX10:       ; %bb.0:
742; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743; GISEL-GFX10-NEXT:    v_mov_b32_e32 v11, v8
744; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
745; GISEL-GFX10-NEXT:    ;;#ASMSTART
746; GISEL-GFX10-NEXT:    s_nop
747; GISEL-GFX10-NEXT:    ;;#ASMEND
748; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
749; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
750; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v11
751; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
752; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
753; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
754;
755; DAGISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
756; DAGISEL-GFX11:       ; %bb.0:
757; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v11, v8
759; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
760; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
761; DAGISEL-GFX11-NEXT:    s_nop
762; DAGISEL-GFX11-NEXT:    ;;#ASMEND
763; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
764; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
765; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v11
766; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
767; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
768; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
769;
770; DAGISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
771; DAGISEL-GFX10:       ; %bb.0:
772; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v11, v8
774; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
775; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
776; DAGISEL-GFX10-NEXT:    s_nop
777; DAGISEL-GFX10-NEXT:    ;;#ASMEND
778; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
779; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
780; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v11
781; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
782; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
783; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
784  call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"()
785  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
786  unreachable
787}
788
789define amdgpu_cs_chain void @chain_to_chain_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) {
790; GISEL-GFX11-LABEL: chain_to_chain_fewer_args:
791; GISEL-GFX11:       ; %bb.0:
792; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
794; GISEL-GFX11-NEXT:    s_mov_b32 s2, s0
795; GISEL-GFX11-NEXT:    ;;#ASMSTART
796; GISEL-GFX11-NEXT:    s_nop
797; GISEL-GFX11-NEXT:    ;;#ASMEND
798; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
799; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
800; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
801; GISEL-GFX11-NEXT:    s_mov_b32 s0, s2
802; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
803; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
804;
805; GISEL-GFX10-LABEL: chain_to_chain_fewer_args:
806; GISEL-GFX10:       ; %bb.0:
807; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
809; GISEL-GFX10-NEXT:    s_mov_b32 s2, s0
810; GISEL-GFX10-NEXT:    ;;#ASMSTART
811; GISEL-GFX10-NEXT:    s_nop
812; GISEL-GFX10-NEXT:    ;;#ASMEND
813; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
814; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
815; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
816; GISEL-GFX10-NEXT:    s_mov_b32 s0, s2
817; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
818; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
819;
820; DAGISEL-GFX11-LABEL: chain_to_chain_fewer_args:
821; DAGISEL-GFX11:       ; %bb.0:
822; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
824; DAGISEL-GFX11-NEXT:    s_mov_b32 s2, s0
825; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
826; DAGISEL-GFX11-NEXT:    s_nop
827; DAGISEL-GFX11-NEXT:    ;;#ASMEND
828; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
829; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
830; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
831; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s2
832; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
833; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
834;
835; DAGISEL-GFX10-LABEL: chain_to_chain_fewer_args:
836; DAGISEL-GFX10:       ; %bb.0:
837; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
838; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
839; DAGISEL-GFX10-NEXT:    s_mov_b32 s2, s0
840; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
841; DAGISEL-GFX10-NEXT:    s_nop
842; DAGISEL-GFX10-NEXT:    ;;#ASMEND
843; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
844; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
845; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
846; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s2
847; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
848; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
849  %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
850  %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
851  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
852  call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0)
853  unreachable
854}
855
856define amdgpu_cs_chain void @chain_to_chain_more_args(<3 x i32> inreg %a, <3 x i32> %b) {
857; GISEL-GFX11-LABEL: chain_to_chain_more_args:
858; GISEL-GFX11:       ; %bb.0:
859; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
861; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
862; GISEL-GFX11-NEXT:    ;;#ASMSTART
863; GISEL-GFX11-NEXT:    s_nop
864; GISEL-GFX11-NEXT:    ;;#ASMEND
865; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
866; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
867; GISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
868; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
869; GISEL-GFX11-NEXT:    s_mov_b32 s3, 0
870; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
871; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
872;
873; GISEL-GFX10-LABEL: chain_to_chain_more_args:
874; GISEL-GFX10:       ; %bb.0:
875; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
876; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
877; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
878; GISEL-GFX10-NEXT:    ;;#ASMSTART
879; GISEL-GFX10-NEXT:    s_nop
880; GISEL-GFX10-NEXT:    ;;#ASMEND
881; GISEL-GFX10-NEXT:    v_mov_b32_e32 v11, 0
882; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
883; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
884; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
885; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
886; GISEL-GFX10-NEXT:    s_mov_b32 s3, 0
887; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
888; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
889;
890; DAGISEL-GFX11-LABEL: chain_to_chain_more_args:
891; DAGISEL-GFX11:       ; %bb.0:
892; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
893; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
894; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
895; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
896; DAGISEL-GFX11-NEXT:    s_nop
897; DAGISEL-GFX11-NEXT:    ;;#ASMEND
898; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
899; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
900; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
901; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
902; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, 0
903; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
904; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
905;
906; DAGISEL-GFX10-LABEL: chain_to_chain_more_args:
907; DAGISEL-GFX10:       ; %bb.0:
908; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
910; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
911; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
912; DAGISEL-GFX10-NEXT:    s_nop
913; DAGISEL-GFX10-NEXT:    ;;#ASMEND
914; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v11, 0
915; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee_2@abs32@hi
916; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
917; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee_2@abs32@lo
918; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
919; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, 0
920; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
921; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
922  %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
923  %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
924  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
925  call void(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v4i32(ptr @chain_callee_2, i32 -1, <4 x i32> inreg %s, <4 x i32> %v, i32 0)
926  unreachable
927}
928
929define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) {
930; GISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
931; GISEL-GFX11:       ; %bb.0:
932; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933; GISEL-GFX11-NEXT:    s_mov_b32 s3, 4
934; GISEL-GFX11-NEXT:    s_mov_b32 s2, 3
935; GISEL-GFX11-NEXT:    s_mov_b32 s1, 2
936; GISEL-GFX11-NEXT:    s_mov_b32 s0, 1
937; GISEL-GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v8
938; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
939; GISEL-GFX11-NEXT:    v_mov_b32_e32 v4, v0
940; GISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
941; GISEL-GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
942; GISEL-GFX11-NEXT:    scratch_store_b128 v4, v[0:3], off dlc
943; GISEL-GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
944; GISEL-GFX11-NEXT:    s_endpgm
945;
946; GISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
947; GISEL-GFX10:       ; %bb.0:
948; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949; GISEL-GFX10-NEXT:    v_lshlrev_b32_e32 v0, 4, v8
950; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 1
951; GISEL-GFX10-NEXT:    v_mov_b32_e32 v2, 2
952; GISEL-GFX10-NEXT:    v_mov_b32_e32 v3, 3
953; GISEL-GFX10-NEXT:    v_mov_b32_e32 v4, 4
954; GISEL-GFX10-NEXT:    buffer_store_dword v1, v0, s[48:51], 0 offen
955; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
956; GISEL-GFX10-NEXT:    buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
957; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
958; GISEL-GFX10-NEXT:    buffer_store_dword v3, v0, s[48:51], 0 offen offset:8
959; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
960; GISEL-GFX10-NEXT:    buffer_store_dword v4, v0, s[48:51], 0 offen offset:12
961; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
962; GISEL-GFX10-NEXT:    s_endpgm
963;
964; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
965; DAGISEL-GFX11:       ; %bb.0:
966; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
967; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
968; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
969; DAGISEL-GFX11-NEXT:    v_lshl_add_u32 v4, v8, 4, 0
970; DAGISEL-GFX11-NEXT:    scratch_store_b128 v4, v[0:3], off dlc
971; DAGISEL-GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
972; DAGISEL-GFX11-NEXT:    s_endpgm
973;
974; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
975; DAGISEL-GFX10:       ; %bb.0:
976; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 4
978; DAGISEL-GFX10-NEXT:    v_lshl_add_u32 v1, v8, 4, 0
979; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v2, 3
980; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v3, 2
981; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v4, 1
982; DAGISEL-GFX10-NEXT:    buffer_store_dword v0, v1, s[48:51], 0 offen offset:12
983; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
984; DAGISEL-GFX10-NEXT:    buffer_store_dword v2, v1, s[48:51], 0 offen offset:8
985; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
986; DAGISEL-GFX10-NEXT:    buffer_store_dword v3, v1, s[48:51], 0 offen offset:4
987; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
988; DAGISEL-GFX10-NEXT:    buffer_store_dword v4, v1, s[48:51], 0 offen
989; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
990; DAGISEL-GFX10-NEXT:    s_endpgm
991  %alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5)
992  %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx
993  store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
994  ret void
995}
996
997declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
998declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
999declare void @llvm.amdgcn.cs.chain.v4i32(ptr, i32, <4 x i32>, <4 x i32>, i32, ...)
1000declare amdgpu_cs_chain void @chain_callee_2(<2 x i32> inreg, <2 x i32>)
1001declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
1002declare amdgpu_cs_chain void @chain_callee_4(<4 x i32> inreg, <4 x i32>)
1003declare i32 @llvm.amdgcn.set.inactive(i32, i32)
1004declare i32 @llvm.amdgcn.wwm(i32)
1005