xref: /llvm-project/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
4; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
5
6declare hidden amdgpu_gfx void @external_void_func_void() #0
7
8define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
9; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX9-NEXT:    s_mov_b32 s34, s33
13; GFX9-NEXT:    s_mov_b32 s33, s32
14; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
15; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
17; GFX9-NEXT:    v_writelane_b32 v40, s34, 4
18; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
19; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
20; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
21; GFX9-NEXT:    s_mov_b32 s5, external_void_func_void@abs32@hi
22; GFX9-NEXT:    s_mov_b32 s4, external_void_func_void@abs32@lo
23; GFX9-NEXT:    s_addk_i32 s32, 0x400
24; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
25; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
26; GFX9-NEXT:    ;;#ASMSTART
27; GFX9-NEXT:    ;;#ASMEND
28; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
29; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
30; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
31; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
32; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
33; GFX9-NEXT:    s_mov_b32 s32, s33
34; GFX9-NEXT:    v_readlane_b32 s34, v40, 4
35; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
36; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
37; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
38; GFX9-NEXT:    s_mov_b32 s33, s34
39; GFX9-NEXT:    s_waitcnt vmcnt(0)
40; GFX9-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
43; GFX10:       ; %bb.0:
44; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX10-NEXT:    s_mov_b32 s34, s33
46; GFX10-NEXT:    s_mov_b32 s33, s32
47; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
48; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
49; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
50; GFX10-NEXT:    s_mov_b32 exec_lo, s35
51; GFX10-NEXT:    v_writelane_b32 v40, s34, 4
52; GFX10-NEXT:    s_addk_i32 s32, 0x200
53; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
54; GFX10-NEXT:    s_mov_b32 s4, external_void_func_void@abs32@lo
55; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
56; GFX10-NEXT:    s_mov_b32 s5, external_void_func_void@abs32@hi
57; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
58; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
59; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
60; GFX10-NEXT:    ;;#ASMSTART
61; GFX10-NEXT:    ;;#ASMEND
62; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
63; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
64; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
65; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
66; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
67; GFX10-NEXT:    s_mov_b32 s32, s33
68; GFX10-NEXT:    v_readlane_b32 s34, v40, 4
69; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
70; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
71; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
72; GFX10-NEXT:    s_mov_b32 exec_lo, s35
73; GFX10-NEXT:    s_mov_b32 s33, s34
74; GFX10-NEXT:    s_waitcnt vmcnt(0)
75; GFX10-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX11-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
78; GFX11:       ; %bb.0:
79; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX11-NEXT:    s_mov_b32 s0, s33
81; GFX11-NEXT:    s_mov_b32 s33, s32
82; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
83; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
84; GFX11-NEXT:    s_mov_b32 exec_lo, s1
85; GFX11-NEXT:    v_writelane_b32 v40, s0, 4
86; GFX11-NEXT:    s_add_i32 s32, s32, 16
87; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
88; GFX11-NEXT:    s_mov_b32 s4, external_void_func_void@abs32@lo
89; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
90; GFX11-NEXT:    s_mov_b32 s5, external_void_func_void@abs32@hi
91; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
92; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
93; GFX11-NEXT:    s_swappc_b64 s[30:31], s[4:5]
94; GFX11-NEXT:    ;;#ASMSTART
95; GFX11-NEXT:    ;;#ASMEND
96; GFX11-NEXT:    s_swappc_b64 s[30:31], s[4:5]
97; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
98; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
99; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
100; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
101; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
102; GFX11-NEXT:    s_mov_b32 s32, s33
103; GFX11-NEXT:    v_readlane_b32 s0, v40, 4
104; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
105; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
106; GFX11-NEXT:    s_mov_b32 exec_lo, s1
107; GFX11-NEXT:    s_mov_b32 s33, s0
108; GFX11-NEXT:    s_waitcnt vmcnt(0)
109; GFX11-NEXT:    s_setpc_b64 s[30:31]
110  call amdgpu_gfx void @external_void_func_void()
111  call void asm sideeffect "", ""() #0
112  call amdgpu_gfx void @external_void_func_void()
113  ret void
114}
115
116define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 {
117; GFX9-LABEL: void_func_void_clobber_s28_s29:
118; GFX9:       ; %bb.0:
119; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
121; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
122; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
123; GFX9-NEXT:    v_writelane_b32 v0, s28, 0
124; GFX9-NEXT:    v_writelane_b32 v0, s29, 1
125; GFX9-NEXT:    v_writelane_b32 v0, s30, 2
126; GFX9-NEXT:    v_writelane_b32 v0, s31, 3
127; GFX9-NEXT:    ;;#ASMSTART
128; GFX9-NEXT:    ; clobber
129; GFX9-NEXT:    ;;#ASMEND
130; GFX9-NEXT:    ;;#ASMSTART
131; GFX9-NEXT:    ; clobber
132; GFX9-NEXT:    ;;#ASMEND
133; GFX9-NEXT:    v_readlane_b32 s31, v0, 3
134; GFX9-NEXT:    v_readlane_b32 s30, v0, 2
135; GFX9-NEXT:    v_readlane_b32 s29, v0, 1
136; GFX9-NEXT:    v_readlane_b32 s28, v0, 0
137; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
138; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
139; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
140; GFX9-NEXT:    s_waitcnt vmcnt(0)
141; GFX9-NEXT:    s_setpc_b64 s[30:31]
142;
143; GFX10-LABEL: void_func_void_clobber_s28_s29:
144; GFX10:       ; %bb.0:
145; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
147; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
148; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
149; GFX10-NEXT:    s_mov_b32 exec_lo, s34
150; GFX10-NEXT:    v_writelane_b32 v0, s28, 0
151; GFX10-NEXT:    v_writelane_b32 v0, s29, 1
152; GFX10-NEXT:    v_writelane_b32 v0, s30, 2
153; GFX10-NEXT:    v_writelane_b32 v0, s31, 3
154; GFX10-NEXT:    ;;#ASMSTART
155; GFX10-NEXT:    ; clobber
156; GFX10-NEXT:    ;;#ASMEND
157; GFX10-NEXT:    ;;#ASMSTART
158; GFX10-NEXT:    ; clobber
159; GFX10-NEXT:    ;;#ASMEND
160; GFX10-NEXT:    v_readlane_b32 s31, v0, 3
161; GFX10-NEXT:    v_readlane_b32 s30, v0, 2
162; GFX10-NEXT:    v_readlane_b32 s29, v0, 1
163; GFX10-NEXT:    v_readlane_b32 s28, v0, 0
164; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
165; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
166; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
167; GFX10-NEXT:    s_mov_b32 exec_lo, s34
168; GFX10-NEXT:    s_waitcnt vmcnt(0)
169; GFX10-NEXT:    s_setpc_b64 s[30:31]
170;
171; GFX11-LABEL: void_func_void_clobber_s28_s29:
172; GFX11:       ; %bb.0:
173; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
175; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
176; GFX11-NEXT:    s_mov_b32 exec_lo, s0
177; GFX11-NEXT:    v_writelane_b32 v0, s28, 0
178; GFX11-NEXT:    v_writelane_b32 v0, s29, 1
179; GFX11-NEXT:    v_writelane_b32 v0, s30, 2
180; GFX11-NEXT:    v_writelane_b32 v0, s31, 3
181; GFX11-NEXT:    ;;#ASMSTART
182; GFX11-NEXT:    ; clobber
183; GFX11-NEXT:    ;;#ASMEND
184; GFX11-NEXT:    ;;#ASMSTART
185; GFX11-NEXT:    ; clobber
186; GFX11-NEXT:    ;;#ASMEND
187; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
188; GFX11-NEXT:    v_readlane_b32 s31, v0, 3
189; GFX11-NEXT:    v_readlane_b32 s30, v0, 2
190; GFX11-NEXT:    v_readlane_b32 s29, v0, 1
191; GFX11-NEXT:    v_readlane_b32 s28, v0, 0
192; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
193; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
194; GFX11-NEXT:    s_mov_b32 exec_lo, s0
195; GFX11-NEXT:    s_waitcnt vmcnt(0)
196; GFX11-NEXT:    s_setpc_b64 s[30:31]
197  call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
198  call void asm sideeffect "; clobber", "~{s[28:29]}"() #0
199  ret void
200}
201
202define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
203; GFX9-LABEL: test_call_void_func_void_mayclobber_s31:
204; GFX9:       ; %bb.0:
205; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX9-NEXT:    s_mov_b32 s34, s33
207; GFX9-NEXT:    s_mov_b32 s33, s32
208; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
209; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
210; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
211; GFX9-NEXT:    v_writelane_b32 v40, s34, 3
212; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
213; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
214; GFX9-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
215; GFX9-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
216; GFX9-NEXT:    s_addk_i32 s32, 0x400
217; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
218; GFX9-NEXT:    ;;#ASMSTART
219; GFX9-NEXT:    ; def s31
220; GFX9-NEXT:    ;;#ASMEND
221; GFX9-NEXT:    s_mov_b32 s4, s31
222; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
223; GFX9-NEXT:    s_mov_b32 s31, s4
224; GFX9-NEXT:    ;;#ASMSTART
225; GFX9-NEXT:    ; use s31
226; GFX9-NEXT:    ;;#ASMEND
227; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
228; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
229; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
230; GFX9-NEXT:    s_mov_b32 s32, s33
231; GFX9-NEXT:    v_readlane_b32 s34, v40, 3
232; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
233; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
234; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
235; GFX9-NEXT:    s_mov_b32 s33, s34
236; GFX9-NEXT:    s_waitcnt vmcnt(0)
237; GFX9-NEXT:    s_setpc_b64 s[30:31]
238;
239; GFX10-LABEL: test_call_void_func_void_mayclobber_s31:
240; GFX10:       ; %bb.0:
241; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; GFX10-NEXT:    s_mov_b32 s34, s33
243; GFX10-NEXT:    s_mov_b32 s33, s32
244; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
245; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
246; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
247; GFX10-NEXT:    s_mov_b32 exec_lo, s35
248; GFX10-NEXT:    v_writelane_b32 v40, s34, 3
249; GFX10-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
250; GFX10-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
251; GFX10-NEXT:    s_addk_i32 s32, 0x200
252; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
253; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
254; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
255; GFX10-NEXT:    ;;#ASMSTART
256; GFX10-NEXT:    ; def s31
257; GFX10-NEXT:    ;;#ASMEND
258; GFX10-NEXT:    s_mov_b32 s4, s31
259; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
260; GFX10-NEXT:    s_mov_b32 s31, s4
261; GFX10-NEXT:    ;;#ASMSTART
262; GFX10-NEXT:    ; use s31
263; GFX10-NEXT:    ;;#ASMEND
264; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
265; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
266; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
267; GFX10-NEXT:    s_mov_b32 s32, s33
268; GFX10-NEXT:    v_readlane_b32 s34, v40, 3
269; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
270; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
271; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
272; GFX10-NEXT:    s_mov_b32 exec_lo, s35
273; GFX10-NEXT:    s_mov_b32 s33, s34
274; GFX10-NEXT:    s_waitcnt vmcnt(0)
275; GFX10-NEXT:    s_setpc_b64 s[30:31]
276;
277; GFX11-LABEL: test_call_void_func_void_mayclobber_s31:
278; GFX11:       ; %bb.0:
279; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280; GFX11-NEXT:    s_mov_b32 s0, s33
281; GFX11-NEXT:    s_mov_b32 s33, s32
282; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
283; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
284; GFX11-NEXT:    s_mov_b32 exec_lo, s1
285; GFX11-NEXT:    v_writelane_b32 v40, s0, 3
286; GFX11-NEXT:    s_mov_b32 s1, external_void_func_void@abs32@hi
287; GFX11-NEXT:    s_mov_b32 s0, external_void_func_void@abs32@lo
288; GFX11-NEXT:    s_add_i32 s32, s32, 16
289; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
290; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
291; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
292; GFX11-NEXT:    ;;#ASMSTART
293; GFX11-NEXT:    ; def s31
294; GFX11-NEXT:    ;;#ASMEND
295; GFX11-NEXT:    s_mov_b32 s4, s31
296; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
297; GFX11-NEXT:    s_mov_b32 s31, s4
298; GFX11-NEXT:    ;;#ASMSTART
299; GFX11-NEXT:    ; use s31
300; GFX11-NEXT:    ;;#ASMEND
301; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
302; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
303; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
304; GFX11-NEXT:    s_mov_b32 s32, s33
305; GFX11-NEXT:    v_readlane_b32 s0, v40, 3
306; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
307; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
308; GFX11-NEXT:    s_mov_b32 exec_lo, s1
309; GFX11-NEXT:    s_mov_b32 s33, s0
310; GFX11-NEXT:    s_waitcnt vmcnt(0)
311; GFX11-NEXT:    s_setpc_b64 s[30:31]
312  %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
313  call amdgpu_gfx void @external_void_func_void()
314  call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
315  ret void
316}
317
318define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
319; GFX9-LABEL: test_call_void_func_void_mayclobber_v31:
320; GFX9:       ; %bb.0:
321; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX9-NEXT:    s_mov_b32 s34, s33
323; GFX9-NEXT:    s_mov_b32 s33, s32
324; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
325; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
326; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
327; GFX9-NEXT:    v_writelane_b32 v41, s34, 2
328; GFX9-NEXT:    v_writelane_b32 v41, s30, 0
329; GFX9-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
330; GFX9-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
331; GFX9-NEXT:    s_addk_i32 s32, 0x400
332; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
333; GFX9-NEXT:    v_writelane_b32 v41, s31, 1
334; GFX9-NEXT:    ;;#ASMSTART
335; GFX9-NEXT:    ; def v31
336; GFX9-NEXT:    ;;#ASMEND
337; GFX9-NEXT:    v_mov_b32_e32 v40, v31
338; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
339; GFX9-NEXT:    v_mov_b32_e32 v31, v40
340; GFX9-NEXT:    ;;#ASMSTART
341; GFX9-NEXT:    ; use v31
342; GFX9-NEXT:    ;;#ASMEND
343; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
344; GFX9-NEXT:    v_readlane_b32 s31, v41, 1
345; GFX9-NEXT:    v_readlane_b32 s30, v41, 0
346; GFX9-NEXT:    s_mov_b32 s32, s33
347; GFX9-NEXT:    v_readlane_b32 s34, v41, 2
348; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
349; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
350; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
351; GFX9-NEXT:    s_mov_b32 s33, s34
352; GFX9-NEXT:    s_waitcnt vmcnt(0)
353; GFX9-NEXT:    s_setpc_b64 s[30:31]
354;
355; GFX10-LABEL: test_call_void_func_void_mayclobber_v31:
356; GFX10:       ; %bb.0:
357; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358; GFX10-NEXT:    s_mov_b32 s34, s33
359; GFX10-NEXT:    s_mov_b32 s33, s32
360; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
361; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
362; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
363; GFX10-NEXT:    s_mov_b32 exec_lo, s35
364; GFX10-NEXT:    v_writelane_b32 v41, s34, 2
365; GFX10-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
366; GFX10-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
367; GFX10-NEXT:    s_addk_i32 s32, 0x200
368; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
369; GFX10-NEXT:    v_writelane_b32 v41, s30, 0
370; GFX10-NEXT:    ;;#ASMSTART
371; GFX10-NEXT:    ; def v31
372; GFX10-NEXT:    ;;#ASMEND
373; GFX10-NEXT:    v_mov_b32_e32 v40, v31
374; GFX10-NEXT:    v_writelane_b32 v41, s31, 1
375; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
376; GFX10-NEXT:    v_mov_b32_e32 v31, v40
377; GFX10-NEXT:    ;;#ASMSTART
378; GFX10-NEXT:    ; use v31
379; GFX10-NEXT:    ;;#ASMEND
380; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
381; GFX10-NEXT:    v_readlane_b32 s31, v41, 1
382; GFX10-NEXT:    v_readlane_b32 s30, v41, 0
383; GFX10-NEXT:    s_mov_b32 s32, s33
384; GFX10-NEXT:    v_readlane_b32 s34, v41, 2
385; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
386; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
387; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
388; GFX10-NEXT:    s_mov_b32 exec_lo, s35
389; GFX10-NEXT:    s_mov_b32 s33, s34
390; GFX10-NEXT:    s_waitcnt vmcnt(0)
391; GFX10-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX11-LABEL: test_call_void_func_void_mayclobber_v31:
394; GFX11:       ; %bb.0:
395; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX11-NEXT:    s_mov_b32 s0, s33
397; GFX11-NEXT:    s_mov_b32 s33, s32
398; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
399; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill
400; GFX11-NEXT:    s_mov_b32 exec_lo, s1
401; GFX11-NEXT:    v_writelane_b32 v41, s0, 2
402; GFX11-NEXT:    s_mov_b32 s1, external_void_func_void@abs32@hi
403; GFX11-NEXT:    s_mov_b32 s0, external_void_func_void@abs32@lo
404; GFX11-NEXT:    s_add_i32 s32, s32, 16
405; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
406; GFX11-NEXT:    v_writelane_b32 v41, s30, 0
407; GFX11-NEXT:    ;;#ASMSTART
408; GFX11-NEXT:    ; def v31
409; GFX11-NEXT:    ;;#ASMEND
410; GFX11-NEXT:    v_mov_b32_e32 v40, v31
411; GFX11-NEXT:    v_writelane_b32 v41, s31, 1
412; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
413; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
414; GFX11-NEXT:    v_mov_b32_e32 v31, v40
415; GFX11-NEXT:    ;;#ASMSTART
416; GFX11-NEXT:    ; use v31
417; GFX11-NEXT:    ;;#ASMEND
418; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
419; GFX11-NEXT:    v_readlane_b32 s31, v41, 1
420; GFX11-NEXT:    v_readlane_b32 s30, v41, 0
421; GFX11-NEXT:    s_mov_b32 s32, s33
422; GFX11-NEXT:    v_readlane_b32 s0, v41, 2
423; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
424; GFX11-NEXT:    scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload
425; GFX11-NEXT:    s_mov_b32 exec_lo, s1
426; GFX11-NEXT:    s_mov_b32 s33, s0
427; GFX11-NEXT:    s_waitcnt vmcnt(0)
428; GFX11-NEXT:    s_setpc_b64 s[30:31]
429  %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
430  call amdgpu_gfx void @external_void_func_void()
431  call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
432  ret void
433}
434
435
436define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
437; GFX9-LABEL: test_call_void_func_void_preserves_s33:
438; GFX9:       ; %bb.0:
439; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440; GFX9-NEXT:    s_mov_b32 s34, s33
441; GFX9-NEXT:    s_mov_b32 s33, s32
442; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
443; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
444; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
445; GFX9-NEXT:    v_writelane_b32 v40, s34, 3
446; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
447; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
448; GFX9-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
449; GFX9-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
450; GFX9-NEXT:    s_addk_i32 s32, 0x400
451; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
452; GFX9-NEXT:    ;;#ASMSTART
453; GFX9-NEXT:    ; def s33
454; GFX9-NEXT:    ;;#ASMEND
455; GFX9-NEXT:    s_mov_b32 s4, s33
456; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
457; GFX9-NEXT:    s_mov_b32 s33, s4
458; GFX9-NEXT:    ;;#ASMSTART
459; GFX9-NEXT:    ; use s33
460; GFX9-NEXT:    ;;#ASMEND
461; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
462; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
463; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
464; GFX9-NEXT:    s_mov_b32 s32, s33
465; GFX9-NEXT:    v_readlane_b32 s34, v40, 3
466; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
467; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
468; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
469; GFX9-NEXT:    s_mov_b32 s33, s34
470; GFX9-NEXT:    s_waitcnt vmcnt(0)
471; GFX9-NEXT:    s_setpc_b64 s[30:31]
472;
473; GFX10-LABEL: test_call_void_func_void_preserves_s33:
474; GFX10:       ; %bb.0:
475; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX10-NEXT:    s_mov_b32 s34, s33
477; GFX10-NEXT:    s_mov_b32 s33, s32
478; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
479; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
480; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
481; GFX10-NEXT:    s_mov_b32 exec_lo, s35
482; GFX10-NEXT:    v_writelane_b32 v40, s34, 3
483; GFX10-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
484; GFX10-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
485; GFX10-NEXT:    s_addk_i32 s32, 0x200
486; GFX10-NEXT:    ;;#ASMSTART
487; GFX10-NEXT:    ; def s33
488; GFX10-NEXT:    ;;#ASMEND
489; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
490; GFX10-NEXT:    s_mov_b32 s4, s33
491; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
492; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
493; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
494; GFX10-NEXT:    s_mov_b32 s33, s4
495; GFX10-NEXT:    ;;#ASMSTART
496; GFX10-NEXT:    ; use s33
497; GFX10-NEXT:    ;;#ASMEND
498; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
499; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
500; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
501; GFX10-NEXT:    s_mov_b32 s32, s33
502; GFX10-NEXT:    v_readlane_b32 s34, v40, 3
503; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
504; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
505; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
506; GFX10-NEXT:    s_mov_b32 exec_lo, s35
507; GFX10-NEXT:    s_mov_b32 s33, s34
508; GFX10-NEXT:    s_waitcnt vmcnt(0)
509; GFX10-NEXT:    s_setpc_b64 s[30:31]
510;
511; GFX11-LABEL: test_call_void_func_void_preserves_s33:
512; GFX11:       ; %bb.0:
513; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514; GFX11-NEXT:    s_mov_b32 s0, s33
515; GFX11-NEXT:    s_mov_b32 s33, s32
516; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
517; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
518; GFX11-NEXT:    s_mov_b32 exec_lo, s1
519; GFX11-NEXT:    v_writelane_b32 v40, s0, 3
520; GFX11-NEXT:    s_mov_b32 s1, external_void_func_void@abs32@hi
521; GFX11-NEXT:    s_mov_b32 s0, external_void_func_void@abs32@lo
522; GFX11-NEXT:    s_add_i32 s32, s32, 16
523; GFX11-NEXT:    ;;#ASMSTART
524; GFX11-NEXT:    ; def s33
525; GFX11-NEXT:    ;;#ASMEND
526; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
527; GFX11-NEXT:    s_mov_b32 s4, s33
528; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
529; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
530; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
531; GFX11-NEXT:    s_mov_b32 s33, s4
532; GFX11-NEXT:    ;;#ASMSTART
533; GFX11-NEXT:    ; use s33
534; GFX11-NEXT:    ;;#ASMEND
535; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
536; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
537; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
538; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
539; GFX11-NEXT:    s_mov_b32 s32, s33
540; GFX11-NEXT:    v_readlane_b32 s0, v40, 3
541; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
542; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
543; GFX11-NEXT:    s_mov_b32 exec_lo, s1
544; GFX11-NEXT:    s_mov_b32 s33, s0
545; GFX11-NEXT:    s_waitcnt vmcnt(0)
546; GFX11-NEXT:    s_setpc_b64 s[30:31]
547  %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
548  call amdgpu_gfx void @external_void_func_void()
549  call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
550  ret void
551}
552
553define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
554; GFX9-LABEL: test_call_void_func_void_preserves_s34:
555; GFX9:       ; %bb.0:
556; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
557; GFX9-NEXT:    s_mov_b32 s34, s33
558; GFX9-NEXT:    s_mov_b32 s33, s32
559; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
560; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
561; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
562; GFX9-NEXT:    v_writelane_b32 v40, s34, 3
563; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
564; GFX9-NEXT:    ;;#ASMSTART
565; GFX9-NEXT:    ; def s34
566; GFX9-NEXT:    ;;#ASMEND
567; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
568; GFX9-NEXT:    s_mov_b32 s4, s34
569; GFX9-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
570; GFX9-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
571; GFX9-NEXT:    s_addk_i32 s32, 0x400
572; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
573; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
574; GFX9-NEXT:    s_mov_b32 s34, s4
575; GFX9-NEXT:    ;;#ASMSTART
576; GFX9-NEXT:    ; use s34
577; GFX9-NEXT:    ;;#ASMEND
578; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
579; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
580; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
581; GFX9-NEXT:    s_mov_b32 s32, s33
582; GFX9-NEXT:    v_readlane_b32 s34, v40, 3
583; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
584; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
585; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
586; GFX9-NEXT:    s_mov_b32 s33, s34
587; GFX9-NEXT:    s_waitcnt vmcnt(0)
588; GFX9-NEXT:    s_setpc_b64 s[30:31]
589;
590; GFX10-LABEL: test_call_void_func_void_preserves_s34:
591; GFX10:       ; %bb.0:
592; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593; GFX10-NEXT:    s_mov_b32 s34, s33
594; GFX10-NEXT:    s_mov_b32 s33, s32
595; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
596; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
597; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
598; GFX10-NEXT:    s_mov_b32 exec_lo, s35
599; GFX10-NEXT:    v_writelane_b32 v40, s34, 3
600; GFX10-NEXT:    ;;#ASMSTART
601; GFX10-NEXT:    ; def s34
602; GFX10-NEXT:    ;;#ASMEND
603; GFX10-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
604; GFX10-NEXT:    s_addk_i32 s32, 0x200
605; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
606; GFX10-NEXT:    s_mov_b32 s4, s34
607; GFX10-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
608; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
609; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
610; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
611; GFX10-NEXT:    s_mov_b32 s34, s4
612; GFX10-NEXT:    ;;#ASMSTART
613; GFX10-NEXT:    ; use s34
614; GFX10-NEXT:    ;;#ASMEND
615; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
616; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
617; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
618; GFX10-NEXT:    s_mov_b32 s32, s33
619; GFX10-NEXT:    v_readlane_b32 s34, v40, 3
620; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
621; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
622; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
623; GFX10-NEXT:    s_mov_b32 exec_lo, s35
624; GFX10-NEXT:    s_mov_b32 s33, s34
625; GFX10-NEXT:    s_waitcnt vmcnt(0)
626; GFX10-NEXT:    s_setpc_b64 s[30:31]
627;
628; GFX11-LABEL: test_call_void_func_void_preserves_s34:
629; GFX11:       ; %bb.0:
630; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; GFX11-NEXT:    s_mov_b32 s0, s33
632; GFX11-NEXT:    s_mov_b32 s33, s32
633; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
634; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
635; GFX11-NEXT:    s_mov_b32 exec_lo, s1
636; GFX11-NEXT:    v_writelane_b32 v40, s0, 3
637; GFX11-NEXT:    s_mov_b32 s1, external_void_func_void@abs32@hi
638; GFX11-NEXT:    s_mov_b32 s0, external_void_func_void@abs32@lo
639; GFX11-NEXT:    s_add_i32 s32, s32, 16
640; GFX11-NEXT:    ;;#ASMSTART
641; GFX11-NEXT:    ; def s34
642; GFX11-NEXT:    ;;#ASMEND
643; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
644; GFX11-NEXT:    s_mov_b32 s4, s34
645; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
646; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
647; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
648; GFX11-NEXT:    s_mov_b32 s34, s4
649; GFX11-NEXT:    ;;#ASMSTART
650; GFX11-NEXT:    ; use s34
651; GFX11-NEXT:    ;;#ASMEND
652; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
653; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
654; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
655; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
656; GFX11-NEXT:    s_mov_b32 s32, s33
657; GFX11-NEXT:    v_readlane_b32 s0, v40, 3
658; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
659; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
660; GFX11-NEXT:    s_mov_b32 exec_lo, s1
661; GFX11-NEXT:    s_mov_b32 s33, s0
662; GFX11-NEXT:    s_waitcnt vmcnt(0)
663; GFX11-NEXT:    s_setpc_b64 s[30:31]
664  %s34 = call i32 asm sideeffect "; def $0", "={s34}"()
665  call amdgpu_gfx void @external_void_func_void()
666  call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
667  ret void
668}
669
670define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 {
671; GFX9-LABEL: test_call_void_func_void_preserves_v40:
672; GFX9:       ; %bb.0:
673; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674; GFX9-NEXT:    s_mov_b32 s34, s33
675; GFX9-NEXT:    s_mov_b32 s33, s32
676; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
677; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
678; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
679; GFX9-NEXT:    v_writelane_b32 v41, s34, 2
680; GFX9-NEXT:    v_writelane_b32 v41, s30, 0
681; GFX9-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
682; GFX9-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
683; GFX9-NEXT:    s_addk_i32 s32, 0x400
684; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
685; GFX9-NEXT:    v_writelane_b32 v41, s31, 1
686; GFX9-NEXT:    ;;#ASMSTART
687; GFX9-NEXT:    ; def v40
688; GFX9-NEXT:    ;;#ASMEND
689; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
690; GFX9-NEXT:    ;;#ASMSTART
691; GFX9-NEXT:    ; use v40
692; GFX9-NEXT:    ;;#ASMEND
693; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
694; GFX9-NEXT:    v_readlane_b32 s31, v41, 1
695; GFX9-NEXT:    v_readlane_b32 s30, v41, 0
696; GFX9-NEXT:    s_mov_b32 s32, s33
697; GFX9-NEXT:    v_readlane_b32 s34, v41, 2
698; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
699; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
700; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
701; GFX9-NEXT:    s_mov_b32 s33, s34
702; GFX9-NEXT:    s_waitcnt vmcnt(0)
703; GFX9-NEXT:    s_setpc_b64 s[30:31]
704;
705; GFX10-LABEL: test_call_void_func_void_preserves_v40:
706; GFX10:       ; %bb.0:
707; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708; GFX10-NEXT:    s_mov_b32 s34, s33
709; GFX10-NEXT:    s_mov_b32 s33, s32
710; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
711; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
712; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
713; GFX10-NEXT:    s_mov_b32 exec_lo, s35
714; GFX10-NEXT:    v_writelane_b32 v41, s34, 2
715; GFX10-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
716; GFX10-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
717; GFX10-NEXT:    s_addk_i32 s32, 0x200
718; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
719; GFX10-NEXT:    v_writelane_b32 v41, s30, 0
720; GFX10-NEXT:    ;;#ASMSTART
721; GFX10-NEXT:    ; def v40
722; GFX10-NEXT:    ;;#ASMEND
723; GFX10-NEXT:    v_writelane_b32 v41, s31, 1
724; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
725; GFX10-NEXT:    ;;#ASMSTART
726; GFX10-NEXT:    ; use v40
727; GFX10-NEXT:    ;;#ASMEND
728; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
729; GFX10-NEXT:    v_readlane_b32 s31, v41, 1
730; GFX10-NEXT:    v_readlane_b32 s30, v41, 0
731; GFX10-NEXT:    s_mov_b32 s32, s33
732; GFX10-NEXT:    v_readlane_b32 s34, v41, 2
733; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
734; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
735; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
736; GFX10-NEXT:    s_mov_b32 exec_lo, s35
737; GFX10-NEXT:    s_mov_b32 s33, s34
738; GFX10-NEXT:    s_waitcnt vmcnt(0)
739; GFX10-NEXT:    s_setpc_b64 s[30:31]
740;
741; GFX11-LABEL: test_call_void_func_void_preserves_v40:
742; GFX11:       ; %bb.0:
743; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744; GFX11-NEXT:    s_mov_b32 s0, s33
745; GFX11-NEXT:    s_mov_b32 s33, s32
746; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
747; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill
748; GFX11-NEXT:    s_mov_b32 exec_lo, s1
749; GFX11-NEXT:    v_writelane_b32 v41, s0, 2
750; GFX11-NEXT:    s_mov_b32 s1, external_void_func_void@abs32@hi
751; GFX11-NEXT:    s_mov_b32 s0, external_void_func_void@abs32@lo
752; GFX11-NEXT:    s_add_i32 s32, s32, 16
753; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
754; GFX11-NEXT:    v_writelane_b32 v41, s30, 0
755; GFX11-NEXT:    ;;#ASMSTART
756; GFX11-NEXT:    ; def v40
757; GFX11-NEXT:    ;;#ASMEND
758; GFX11-NEXT:    v_writelane_b32 v41, s31, 1
759; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
760; GFX11-NEXT:    ;;#ASMSTART
761; GFX11-NEXT:    ; use v40
762; GFX11-NEXT:    ;;#ASMEND
763; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
764; GFX11-NEXT:    v_readlane_b32 s31, v41, 1
765; GFX11-NEXT:    v_readlane_b32 s30, v41, 0
766; GFX11-NEXT:    s_mov_b32 s32, s33
767; GFX11-NEXT:    v_readlane_b32 s0, v41, 2
768; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
769; GFX11-NEXT:    scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload
770; GFX11-NEXT:    s_mov_b32 exec_lo, s1
771; GFX11-NEXT:    s_mov_b32 s33, s0
772; GFX11-NEXT:    s_waitcnt vmcnt(0)
773; GFX11-NEXT:    s_setpc_b64 s[30:31]
774  %v40 = call i32 asm sideeffect "; def $0", "={v40}"()
775  call amdgpu_gfx void @external_void_func_void()
776  call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
777  ret void
778}
779
780define hidden void @void_func_void_clobber_s33() #1 {
781; GFX9-LABEL: void_func_void_clobber_s33:
782; GFX9:       ; %bb.0:
783; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
785; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
786; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
787; GFX9-NEXT:    v_writelane_b32 v0, s33, 0
788; GFX9-NEXT:    ;;#ASMSTART
789; GFX9-NEXT:    ; clobber
790; GFX9-NEXT:    ;;#ASMEND
791; GFX9-NEXT:    v_readlane_b32 s33, v0, 0
792; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
793; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
794; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
795; GFX9-NEXT:    s_waitcnt vmcnt(0)
796; GFX9-NEXT:    s_setpc_b64 s[30:31]
797;
798; GFX10-LABEL: void_func_void_clobber_s33:
799; GFX10:       ; %bb.0:
800; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
802; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
803; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
804; GFX10-NEXT:    s_mov_b32 exec_lo, s4
805; GFX10-NEXT:    v_writelane_b32 v0, s33, 0
806; GFX10-NEXT:    ;;#ASMSTART
807; GFX10-NEXT:    ; clobber
808; GFX10-NEXT:    ;;#ASMEND
809; GFX10-NEXT:    v_readlane_b32 s33, v0, 0
810; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
811; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
812; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
813; GFX10-NEXT:    s_mov_b32 exec_lo, s4
814; GFX10-NEXT:    s_waitcnt vmcnt(0)
815; GFX10-NEXT:    s_setpc_b64 s[30:31]
816;
817; GFX11-LABEL: void_func_void_clobber_s33:
818; GFX11:       ; %bb.0:
819; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
821; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
822; GFX11-NEXT:    s_mov_b32 exec_lo, s0
823; GFX11-NEXT:    v_writelane_b32 v0, s33, 0
824; GFX11-NEXT:    ;;#ASMSTART
825; GFX11-NEXT:    ; clobber
826; GFX11-NEXT:    ;;#ASMEND
827; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
828; GFX11-NEXT:    v_readlane_b32 s33, v0, 0
829; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
830; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
831; GFX11-NEXT:    s_mov_b32 exec_lo, s0
832; GFX11-NEXT:    s_waitcnt vmcnt(0)
833; GFX11-NEXT:    s_setpc_b64 s[30:31]
834  call void asm sideeffect "; clobber", "~{s33}"() #0
835  ret void
836}
837
838define hidden void @void_func_void_clobber_s34() #1 {
839; GFX9-LABEL: void_func_void_clobber_s34:
840; GFX9:       ; %bb.0:
841; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
843; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
844; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
845; GFX9-NEXT:    v_writelane_b32 v0, s34, 0
846; GFX9-NEXT:    ;;#ASMSTART
847; GFX9-NEXT:    ; clobber
848; GFX9-NEXT:    ;;#ASMEND
849; GFX9-NEXT:    v_readlane_b32 s34, v0, 0
850; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
851; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
852; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
853; GFX9-NEXT:    s_waitcnt vmcnt(0)
854; GFX9-NEXT:    s_setpc_b64 s[30:31]
855;
856; GFX10-LABEL: void_func_void_clobber_s34:
857; GFX10:       ; %bb.0:
858; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
860; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
861; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
862; GFX10-NEXT:    s_mov_b32 exec_lo, s4
863; GFX10-NEXT:    v_writelane_b32 v0, s34, 0
864; GFX10-NEXT:    ;;#ASMSTART
865; GFX10-NEXT:    ; clobber
866; GFX10-NEXT:    ;;#ASMEND
867; GFX10-NEXT:    v_readlane_b32 s34, v0, 0
868; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
869; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
870; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
871; GFX10-NEXT:    s_mov_b32 exec_lo, s4
872; GFX10-NEXT:    s_waitcnt vmcnt(0)
873; GFX10-NEXT:    s_setpc_b64 s[30:31]
874;
875; GFX11-LABEL: void_func_void_clobber_s34:
876; GFX11:       ; %bb.0:
877; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
878; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
879; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
880; GFX11-NEXT:    s_mov_b32 exec_lo, s0
881; GFX11-NEXT:    v_writelane_b32 v0, s34, 0
882; GFX11-NEXT:    ;;#ASMSTART
883; GFX11-NEXT:    ; clobber
884; GFX11-NEXT:    ;;#ASMEND
885; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
886; GFX11-NEXT:    v_readlane_b32 s34, v0, 0
887; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
888; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
889; GFX11-NEXT:    s_mov_b32 exec_lo, s0
890; GFX11-NEXT:    s_waitcnt vmcnt(0)
891; GFX11-NEXT:    s_setpc_b64 s[30:31]
892  call void asm sideeffect "; clobber", "~{s34}"() #0
893  ret void
894}
895
896define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
897; GFX9-LABEL: test_call_void_func_void_clobber_s33:
898; GFX9:       ; %bb.0:
899; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900; GFX9-NEXT:    s_mov_b32 s34, s33
901; GFX9-NEXT:    s_mov_b32 s33, s32
902; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
903; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
904; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
905; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
906; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
907; GFX9-NEXT:    s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi
908; GFX9-NEXT:    s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo
909; GFX9-NEXT:    s_addk_i32 s32, 0x400
910; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
911; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
912; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
913; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
914; GFX9-NEXT:    s_mov_b32 s32, s33
915; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
916; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
917; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
918; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
919; GFX9-NEXT:    s_mov_b32 s33, s34
920; GFX9-NEXT:    s_waitcnt vmcnt(0)
921; GFX9-NEXT:    s_setpc_b64 s[30:31]
922;
923; GFX10-LABEL: test_call_void_func_void_clobber_s33:
924; GFX10:       ; %bb.0:
925; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926; GFX10-NEXT:    s_mov_b32 s34, s33
927; GFX10-NEXT:    s_mov_b32 s33, s32
928; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
929; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
930; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
931; GFX10-NEXT:    s_mov_b32 exec_lo, s35
932; GFX10-NEXT:    v_writelane_b32 v40, s34, 2
933; GFX10-NEXT:    s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi
934; GFX10-NEXT:    s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo
935; GFX10-NEXT:    s_addk_i32 s32, 0x200
936; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
937; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
938; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
939; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
940; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
941; GFX10-NEXT:    s_mov_b32 s32, s33
942; GFX10-NEXT:    v_readlane_b32 s34, v40, 2
943; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
944; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
945; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
946; GFX10-NEXT:    s_mov_b32 exec_lo, s35
947; GFX10-NEXT:    s_mov_b32 s33, s34
948; GFX10-NEXT:    s_waitcnt vmcnt(0)
949; GFX10-NEXT:    s_setpc_b64 s[30:31]
950;
951; GFX11-LABEL: test_call_void_func_void_clobber_s33:
952; GFX11:       ; %bb.0:
953; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954; GFX11-NEXT:    s_mov_b32 s0, s33
955; GFX11-NEXT:    s_mov_b32 s33, s32
956; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
957; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
958; GFX11-NEXT:    s_mov_b32 exec_lo, s1
959; GFX11-NEXT:    v_writelane_b32 v40, s0, 2
960; GFX11-NEXT:    s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi
961; GFX11-NEXT:    s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo
962; GFX11-NEXT:    s_add_i32 s32, s32, 16
963; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
964; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
965; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
966; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
967; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
968; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
969; GFX11-NEXT:    s_mov_b32 s32, s33
970; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
971; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
972; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
973; GFX11-NEXT:    s_mov_b32 exec_lo, s1
974; GFX11-NEXT:    s_mov_b32 s33, s0
975; GFX11-NEXT:    s_waitcnt vmcnt(0)
976; GFX11-NEXT:    s_setpc_b64 s[30:31]
977  call amdgpu_gfx void @void_func_void_clobber_s33()
978  ret void
979}
980
981define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
982; GFX9-LABEL: test_call_void_func_void_clobber_s34:
983; GFX9:       ; %bb.0:
984; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985; GFX9-NEXT:    s_mov_b32 s34, s33
986; GFX9-NEXT:    s_mov_b32 s33, s32
987; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
988; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
989; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
990; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
991; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
992; GFX9-NEXT:    s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi
993; GFX9-NEXT:    s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo
994; GFX9-NEXT:    s_addk_i32 s32, 0x400
995; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
996; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
997; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
998; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
999; GFX9-NEXT:    s_mov_b32 s32, s33
1000; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
1001; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
1002; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1003; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
1004; GFX9-NEXT:    s_mov_b32 s33, s34
1005; GFX9-NEXT:    s_waitcnt vmcnt(0)
1006; GFX9-NEXT:    s_setpc_b64 s[30:31]
1007;
1008; GFX10-LABEL: test_call_void_func_void_clobber_s34:
1009; GFX10:       ; %bb.0:
1010; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011; GFX10-NEXT:    s_mov_b32 s34, s33
1012; GFX10-NEXT:    s_mov_b32 s33, s32
1013; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
1014; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1015; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1016; GFX10-NEXT:    s_mov_b32 exec_lo, s35
1017; GFX10-NEXT:    v_writelane_b32 v40, s34, 2
1018; GFX10-NEXT:    s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi
1019; GFX10-NEXT:    s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo
1020; GFX10-NEXT:    s_addk_i32 s32, 0x200
1021; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1022; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1023; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1024; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1025; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1026; GFX10-NEXT:    s_mov_b32 s32, s33
1027; GFX10-NEXT:    v_readlane_b32 s34, v40, 2
1028; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
1029; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1030; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1031; GFX10-NEXT:    s_mov_b32 exec_lo, s35
1032; GFX10-NEXT:    s_mov_b32 s33, s34
1033; GFX10-NEXT:    s_waitcnt vmcnt(0)
1034; GFX10-NEXT:    s_setpc_b64 s[30:31]
1035;
1036; GFX11-LABEL: test_call_void_func_void_clobber_s34:
1037; GFX11:       ; %bb.0:
1038; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039; GFX11-NEXT:    s_mov_b32 s0, s33
1040; GFX11-NEXT:    s_mov_b32 s33, s32
1041; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
1042; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1043; GFX11-NEXT:    s_mov_b32 exec_lo, s1
1044; GFX11-NEXT:    v_writelane_b32 v40, s0, 2
1045; GFX11-NEXT:    s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi
1046; GFX11-NEXT:    s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo
1047; GFX11-NEXT:    s_add_i32 s32, s32, 16
1048; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1049; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1050; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1051; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1052; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1053; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1054; GFX11-NEXT:    s_mov_b32 s32, s33
1055; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
1056; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
1057; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1058; GFX11-NEXT:    s_mov_b32 exec_lo, s1
1059; GFX11-NEXT:    s_mov_b32 s33, s0
1060; GFX11-NEXT:    s_waitcnt vmcnt(0)
1061; GFX11-NEXT:    s_setpc_b64 s[30:31]
1062  call amdgpu_gfx void @void_func_void_clobber_s34()
1063  ret void
1064}
1065
1066define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
1067; GFX9-LABEL: callee_saved_sgpr_kernel:
1068; GFX9:       ; %bb.0:
1069; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1070; GFX9-NEXT:    s_mov_b32 s34, s33
1071; GFX9-NEXT:    s_mov_b32 s33, s32
1072; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
1073; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1074; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
1075; GFX9-NEXT:    v_writelane_b32 v40, s34, 3
1076; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
1077; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
1078; GFX9-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
1079; GFX9-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
1080; GFX9-NEXT:    s_addk_i32 s32, 0x400
1081; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
1082; GFX9-NEXT:    ;;#ASMSTART
1083; GFX9-NEXT:    ; def s40
1084; GFX9-NEXT:    ;;#ASMEND
1085; GFX9-NEXT:    s_mov_b32 s4, s40
1086; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1087; GFX9-NEXT:    ;;#ASMSTART
1088; GFX9-NEXT:    ; use s4
1089; GFX9-NEXT:    ;;#ASMEND
1090; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
1091; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
1092; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
1093; GFX9-NEXT:    s_mov_b32 s32, s33
1094; GFX9-NEXT:    v_readlane_b32 s34, v40, 3
1095; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
1096; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1097; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
1098; GFX9-NEXT:    s_mov_b32 s33, s34
1099; GFX9-NEXT:    s_waitcnt vmcnt(0)
1100; GFX9-NEXT:    s_setpc_b64 s[30:31]
1101;
1102; GFX10-LABEL: callee_saved_sgpr_kernel:
1103; GFX10:       ; %bb.0:
1104; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105; GFX10-NEXT:    s_mov_b32 s34, s33
1106; GFX10-NEXT:    s_mov_b32 s33, s32
1107; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
1108; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1109; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1110; GFX10-NEXT:    s_mov_b32 exec_lo, s35
1111; GFX10-NEXT:    v_writelane_b32 v40, s34, 3
1112; GFX10-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
1113; GFX10-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
1114; GFX10-NEXT:    s_addk_i32 s32, 0x200
1115; GFX10-NEXT:    ;;#ASMSTART
1116; GFX10-NEXT:    ; def s40
1117; GFX10-NEXT:    ;;#ASMEND
1118; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
1119; GFX10-NEXT:    s_mov_b32 s4, s40
1120; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
1121; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
1122; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1123; GFX10-NEXT:    ;;#ASMSTART
1124; GFX10-NEXT:    ; use s4
1125; GFX10-NEXT:    ;;#ASMEND
1126; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
1127; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
1128; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
1129; GFX10-NEXT:    s_mov_b32 s32, s33
1130; GFX10-NEXT:    v_readlane_b32 s34, v40, 3
1131; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
1132; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1133; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1134; GFX10-NEXT:    s_mov_b32 exec_lo, s35
1135; GFX10-NEXT:    s_mov_b32 s33, s34
1136; GFX10-NEXT:    s_waitcnt vmcnt(0)
1137; GFX10-NEXT:    s_setpc_b64 s[30:31]
1138;
1139; GFX11-LABEL: callee_saved_sgpr_kernel:
1140; GFX11:       ; %bb.0:
1141; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142; GFX11-NEXT:    s_mov_b32 s0, s33
1143; GFX11-NEXT:    s_mov_b32 s33, s32
1144; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
1145; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1146; GFX11-NEXT:    s_mov_b32 exec_lo, s1
1147; GFX11-NEXT:    v_writelane_b32 v40, s0, 3
1148; GFX11-NEXT:    s_mov_b32 s1, external_void_func_void@abs32@hi
1149; GFX11-NEXT:    s_mov_b32 s0, external_void_func_void@abs32@lo
1150; GFX11-NEXT:    s_add_i32 s32, s32, 16
1151; GFX11-NEXT:    ;;#ASMSTART
1152; GFX11-NEXT:    ; def s40
1153; GFX11-NEXT:    ;;#ASMEND
1154; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
1155; GFX11-NEXT:    s_mov_b32 s4, s40
1156; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
1157; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
1158; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1159; GFX11-NEXT:    ;;#ASMSTART
1160; GFX11-NEXT:    ; use s4
1161; GFX11-NEXT:    ;;#ASMEND
1162; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1163; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
1164; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
1165; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
1166; GFX11-NEXT:    s_mov_b32 s32, s33
1167; GFX11-NEXT:    v_readlane_b32 s0, v40, 3
1168; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
1169; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1170; GFX11-NEXT:    s_mov_b32 exec_lo, s1
1171; GFX11-NEXT:    s_mov_b32 s33, s0
1172; GFX11-NEXT:    s_waitcnt vmcnt(0)
1173; GFX11-NEXT:    s_setpc_b64 s[30:31]
1174  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
1175  call amdgpu_gfx void @external_void_func_void()
1176  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
1177  ret void
1178}
1179
1180define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
1181; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel:
1182; GFX9:       ; %bb.0:
1183; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184; GFX9-NEXT:    s_mov_b32 s34, s33
1185; GFX9-NEXT:    s_mov_b32 s33, s32
1186; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
1187; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1188; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
1189; GFX9-NEXT:    v_writelane_b32 v41, s34, 3
1190; GFX9-NEXT:    v_writelane_b32 v41, s4, 0
1191; GFX9-NEXT:    v_writelane_b32 v41, s30, 1
1192; GFX9-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
1193; GFX9-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
1194; GFX9-NEXT:    s_addk_i32 s32, 0x400
1195; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1196; GFX9-NEXT:    v_writelane_b32 v41, s31, 2
1197; GFX9-NEXT:    ;;#ASMSTART
1198; GFX9-NEXT:    ; def s40
1199; GFX9-NEXT:    ;;#ASMEND
1200; GFX9-NEXT:    s_mov_b32 s4, s40
1201; GFX9-NEXT:    ;;#ASMSTART
1202; GFX9-NEXT:    ; def v32
1203; GFX9-NEXT:    ;;#ASMEND
1204; GFX9-NEXT:    v_mov_b32_e32 v40, v32
1205; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1206; GFX9-NEXT:    ;;#ASMSTART
1207; GFX9-NEXT:    ; use s4
1208; GFX9-NEXT:    ;;#ASMEND
1209; GFX9-NEXT:    ;;#ASMSTART
1210; GFX9-NEXT:    ; use v40
1211; GFX9-NEXT:    ;;#ASMEND
1212; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1213; GFX9-NEXT:    v_readlane_b32 s31, v41, 2
1214; GFX9-NEXT:    v_readlane_b32 s30, v41, 1
1215; GFX9-NEXT:    v_readlane_b32 s4, v41, 0
1216; GFX9-NEXT:    s_mov_b32 s32, s33
1217; GFX9-NEXT:    v_readlane_b32 s34, v41, 3
1218; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
1219; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1220; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
1221; GFX9-NEXT:    s_mov_b32 s33, s34
1222; GFX9-NEXT:    s_waitcnt vmcnt(0)
1223; GFX9-NEXT:    s_setpc_b64 s[30:31]
1224;
1225; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel:
1226; GFX10:       ; %bb.0:
1227; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228; GFX10-NEXT:    s_mov_b32 s34, s33
1229; GFX10-NEXT:    s_mov_b32 s33, s32
1230; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
1231; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1232; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1233; GFX10-NEXT:    s_mov_b32 exec_lo, s35
1234; GFX10-NEXT:    v_writelane_b32 v41, s34, 3
1235; GFX10-NEXT:    s_mov_b32 s35, external_void_func_void@abs32@hi
1236; GFX10-NEXT:    s_mov_b32 s34, external_void_func_void@abs32@lo
1237; GFX10-NEXT:    s_addk_i32 s32, 0x200
1238; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1239; GFX10-NEXT:    v_writelane_b32 v41, s4, 0
1240; GFX10-NEXT:    ;;#ASMSTART
1241; GFX10-NEXT:    ; def s40
1242; GFX10-NEXT:    ;;#ASMEND
1243; GFX10-NEXT:    s_mov_b32 s4, s40
1244; GFX10-NEXT:    ;;#ASMSTART
1245; GFX10-NEXT:    ; def v32
1246; GFX10-NEXT:    ;;#ASMEND
1247; GFX10-NEXT:    v_mov_b32_e32 v40, v32
1248; GFX10-NEXT:    v_writelane_b32 v41, s30, 1
1249; GFX10-NEXT:    v_writelane_b32 v41, s31, 2
1250; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1251; GFX10-NEXT:    ;;#ASMSTART
1252; GFX10-NEXT:    ; use s4
1253; GFX10-NEXT:    ;;#ASMEND
1254; GFX10-NEXT:    ;;#ASMSTART
1255; GFX10-NEXT:    ; use v40
1256; GFX10-NEXT:    ;;#ASMEND
1257; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1258; GFX10-NEXT:    v_readlane_b32 s31, v41, 2
1259; GFX10-NEXT:    v_readlane_b32 s30, v41, 1
1260; GFX10-NEXT:    v_readlane_b32 s4, v41, 0
1261; GFX10-NEXT:    s_mov_b32 s32, s33
1262; GFX10-NEXT:    v_readlane_b32 s34, v41, 3
1263; GFX10-NEXT:    s_or_saveexec_b32 s35, -1
1264; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1265; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1266; GFX10-NEXT:    s_mov_b32 exec_lo, s35
1267; GFX10-NEXT:    s_mov_b32 s33, s34
1268; GFX10-NEXT:    s_waitcnt vmcnt(0)
1269; GFX10-NEXT:    s_setpc_b64 s[30:31]
1270;
1271; GFX11-LABEL: callee_saved_sgpr_vgpr_kernel:
1272; GFX11:       ; %bb.0:
1273; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1274; GFX11-NEXT:    s_mov_b32 s0, s33
1275; GFX11-NEXT:    s_mov_b32 s33, s32
1276; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
1277; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill
1278; GFX11-NEXT:    s_mov_b32 exec_lo, s1
1279; GFX11-NEXT:    v_writelane_b32 v41, s0, 3
1280; GFX11-NEXT:    s_mov_b32 s1, external_void_func_void@abs32@hi
1281; GFX11-NEXT:    s_mov_b32 s0, external_void_func_void@abs32@lo
1282; GFX11-NEXT:    s_add_i32 s32, s32, 16
1283; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1284; GFX11-NEXT:    v_writelane_b32 v41, s4, 0
1285; GFX11-NEXT:    ;;#ASMSTART
1286; GFX11-NEXT:    ; def s40
1287; GFX11-NEXT:    ;;#ASMEND
1288; GFX11-NEXT:    s_mov_b32 s4, s40
1289; GFX11-NEXT:    ;;#ASMSTART
1290; GFX11-NEXT:    ; def v32
1291; GFX11-NEXT:    ;;#ASMEND
1292; GFX11-NEXT:    v_mov_b32_e32 v40, v32
1293; GFX11-NEXT:    v_writelane_b32 v41, s30, 1
1294; GFX11-NEXT:    v_writelane_b32 v41, s31, 2
1295; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1296; GFX11-NEXT:    ;;#ASMSTART
1297; GFX11-NEXT:    ; use s4
1298; GFX11-NEXT:    ;;#ASMEND
1299; GFX11-NEXT:    ;;#ASMSTART
1300; GFX11-NEXT:    ; use v40
1301; GFX11-NEXT:    ;;#ASMEND
1302; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1303; GFX11-NEXT:    v_readlane_b32 s31, v41, 2
1304; GFX11-NEXT:    v_readlane_b32 s30, v41, 1
1305; GFX11-NEXT:    v_readlane_b32 s4, v41, 0
1306; GFX11-NEXT:    s_mov_b32 s32, s33
1307; GFX11-NEXT:    v_readlane_b32 s0, v41, 3
1308; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
1309; GFX11-NEXT:    scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload
1310; GFX11-NEXT:    s_mov_b32 exec_lo, s1
1311; GFX11-NEXT:    s_mov_b32 s33, s0
1312; GFX11-NEXT:    s_waitcnt vmcnt(0)
1313; GFX11-NEXT:    s_setpc_b64 s[30:31]
1314  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
1315  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
1316  call amdgpu_gfx void @external_void_func_void()
1317  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
1318  call void asm sideeffect "; use $0", "v"(i32 %v32) #0
1319  ret void
1320}
1321
1322attributes #0 = { nounwind }
1323attributes #1 = { nounwind noinline }
1324