xref: /llvm-project/llvm/test/CodeGen/AMDGPU/indirect-call.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
4
5@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
6@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
7
8define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
9; GCN-LABEL: test_indirect_call_sgpr_ptr:
10; GCN:       ; %bb.0:
11; GCN-NEXT:    s_mov_b32 s32, 0
12; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
13; GCN-NEXT:    s_add_i32 s12, s12, s17
14; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
15; GCN-NEXT:    s_add_u32 s0, s0, s17
16; GCN-NEXT:    s_addc_u32 s1, s1, 0
17; GCN-NEXT:    s_mov_b32 s13, s15
18; GCN-NEXT:    s_mov_b32 s12, s14
19; GCN-NEXT:    s_getpc_b64 s[14:15]
20; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
21; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
22; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
23; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
24; GCN-NEXT:    s_add_u32 s8, s8, 8
25; GCN-NEXT:    s_addc_u32 s9, s9, 0
26; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
27; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
28; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
29; GCN-NEXT:    s_mov_b32 s14, s16
30; GCN-NEXT:    s_waitcnt lgkmcnt(0)
31; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
32; GCN-NEXT:    s_endpgm
33;
34; GISEL-LABEL: test_indirect_call_sgpr_ptr:
35; GISEL:       ; %bb.0:
36; GISEL-NEXT:    s_mov_b32 s32, 0
37; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
38; GISEL-NEXT:    s_add_i32 s12, s12, s17
39; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
40; GISEL-NEXT:    s_add_u32 s0, s0, s17
41; GISEL-NEXT:    s_addc_u32 s1, s1, 0
42; GISEL-NEXT:    s_mov_b32 s13, s15
43; GISEL-NEXT:    s_mov_b32 s12, s14
44; GISEL-NEXT:    s_getpc_b64 s[14:15]
45; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
46; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
47; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
48; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
49; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
50; GISEL-NEXT:    s_add_u32 s8, s8, 8
51; GISEL-NEXT:    s_addc_u32 s9, s9, 0
52; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
53; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
54; GISEL-NEXT:    s_mov_b32 s14, s16
55; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
56; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
57; GISEL-NEXT:    s_endpgm
58  %fptr = load ptr, ptr addrspace(4) @gv.fptr0
59  call void %fptr()
60  ret void
61}
62
63define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
64; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
65; GCN:       ; %bb.0:
66; GCN-NEXT:    s_mov_b32 s32, 0
67; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
68; GCN-NEXT:    s_add_i32 s12, s12, s17
69; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
70; GCN-NEXT:    s_add_u32 s0, s0, s17
71; GCN-NEXT:    s_addc_u32 s1, s1, 0
72; GCN-NEXT:    s_mov_b32 s13, s15
73; GCN-NEXT:    s_mov_b32 s12, s14
74; GCN-NEXT:    s_getpc_b64 s[14:15]
75; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
76; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
77; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
78; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
79; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
80; GCN-NEXT:    s_add_u32 s8, s8, 8
81; GCN-NEXT:    s_addc_u32 s9, s9, 0
82; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
83; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
84; GCN-NEXT:    v_mov_b32_e32 v0, 0x7b
85; GCN-NEXT:    s_mov_b32 s14, s16
86; GCN-NEXT:    s_waitcnt lgkmcnt(0)
87; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
88; GCN-NEXT:    s_endpgm
89;
90; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
91; GISEL:       ; %bb.0:
92; GISEL-NEXT:    s_mov_b32 s32, 0
93; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
94; GISEL-NEXT:    s_add_i32 s12, s12, s17
95; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
96; GISEL-NEXT:    s_add_u32 s0, s0, s17
97; GISEL-NEXT:    s_addc_u32 s1, s1, 0
98; GISEL-NEXT:    s_mov_b32 s13, s15
99; GISEL-NEXT:    s_mov_b32 s12, s14
100; GISEL-NEXT:    s_getpc_b64 s[14:15]
101; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
102; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
103; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
104; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
105; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
106; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
107; GISEL-NEXT:    s_add_u32 s8, s8, 8
108; GISEL-NEXT:    s_addc_u32 s9, s9, 0
109; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
110; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
111; GISEL-NEXT:    s_mov_b32 s14, s16
112; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
113; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
114; GISEL-NEXT:    s_endpgm
115  %fptr = load ptr, ptr addrspace(4) @gv.fptr1
116  call void %fptr(i32 123)
117  ret void
118}
119
120define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
121; GCN-LABEL: test_indirect_call_vgpr_ptr:
122; GCN:       ; %bb.0:
123; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GCN-NEXT:    s_mov_b32 s16, s33
125; GCN-NEXT:    s_mov_b32 s33, s32
126; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
127; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
128; GCN-NEXT:    s_mov_b64 exec, s[18:19]
129; GCN-NEXT:    v_writelane_b32 v40, s16, 18
130; GCN-NEXT:    s_addk_i32 s32, 0x400
131; GCN-NEXT:    v_writelane_b32 v40, s30, 0
132; GCN-NEXT:    v_writelane_b32 v40, s31, 1
133; GCN-NEXT:    v_writelane_b32 v40, s34, 2
134; GCN-NEXT:    v_writelane_b32 v40, s35, 3
135; GCN-NEXT:    v_writelane_b32 v40, s36, 4
136; GCN-NEXT:    v_writelane_b32 v40, s37, 5
137; GCN-NEXT:    v_writelane_b32 v40, s38, 6
138; GCN-NEXT:    v_writelane_b32 v40, s39, 7
139; GCN-NEXT:    v_writelane_b32 v40, s40, 8
140; GCN-NEXT:    v_writelane_b32 v40, s41, 9
141; GCN-NEXT:    v_writelane_b32 v40, s42, 10
142; GCN-NEXT:    v_writelane_b32 v40, s43, 11
143; GCN-NEXT:    v_writelane_b32 v40, s44, 12
144; GCN-NEXT:    v_writelane_b32 v40, s45, 13
145; GCN-NEXT:    v_writelane_b32 v40, s46, 14
146; GCN-NEXT:    v_writelane_b32 v40, s47, 15
147; GCN-NEXT:    v_writelane_b32 v40, s48, 16
148; GCN-NEXT:    v_writelane_b32 v40, s49, 17
149; GCN-NEXT:    s_mov_b32 s42, s15
150; GCN-NEXT:    s_mov_b32 s43, s14
151; GCN-NEXT:    s_mov_b32 s44, s13
152; GCN-NEXT:    s_mov_b32 s45, s12
153; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
154; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
155; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
156; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
157; GCN-NEXT:    s_mov_b64 s[46:47], exec
158; GCN-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
159; GCN-NEXT:    v_readfirstlane_b32 s16, v0
160; GCN-NEXT:    v_readfirstlane_b32 s17, v1
161; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
162; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
163; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
164; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
165; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
166; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
167; GCN-NEXT:    s_mov_b32 s12, s45
168; GCN-NEXT:    s_mov_b32 s13, s44
169; GCN-NEXT:    s_mov_b32 s14, s43
170; GCN-NEXT:    s_mov_b32 s15, s42
171; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
172; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
173; GCN-NEXT:    ; implicit-def: $vgpr31
174; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
175; GCN-NEXT:    s_cbranch_execnz .LBB2_1
176; GCN-NEXT:  ; %bb.2:
177; GCN-NEXT:    s_mov_b64 exec, s[46:47]
178; GCN-NEXT:    v_readlane_b32 s49, v40, 17
179; GCN-NEXT:    v_readlane_b32 s48, v40, 16
180; GCN-NEXT:    v_readlane_b32 s47, v40, 15
181; GCN-NEXT:    v_readlane_b32 s46, v40, 14
182; GCN-NEXT:    v_readlane_b32 s45, v40, 13
183; GCN-NEXT:    v_readlane_b32 s44, v40, 12
184; GCN-NEXT:    v_readlane_b32 s43, v40, 11
185; GCN-NEXT:    v_readlane_b32 s42, v40, 10
186; GCN-NEXT:    v_readlane_b32 s41, v40, 9
187; GCN-NEXT:    v_readlane_b32 s40, v40, 8
188; GCN-NEXT:    v_readlane_b32 s39, v40, 7
189; GCN-NEXT:    v_readlane_b32 s38, v40, 6
190; GCN-NEXT:    v_readlane_b32 s37, v40, 5
191; GCN-NEXT:    v_readlane_b32 s36, v40, 4
192; GCN-NEXT:    v_readlane_b32 s35, v40, 3
193; GCN-NEXT:    v_readlane_b32 s34, v40, 2
194; GCN-NEXT:    v_readlane_b32 s31, v40, 1
195; GCN-NEXT:    v_readlane_b32 s30, v40, 0
196; GCN-NEXT:    s_mov_b32 s32, s33
197; GCN-NEXT:    v_readlane_b32 s4, v40, 18
198; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
199; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
200; GCN-NEXT:    s_mov_b64 exec, s[6:7]
201; GCN-NEXT:    s_mov_b32 s33, s4
202; GCN-NEXT:    s_waitcnt vmcnt(0)
203; GCN-NEXT:    s_setpc_b64 s[30:31]
204;
205; GISEL-LABEL: test_indirect_call_vgpr_ptr:
206; GISEL:       ; %bb.0:
207; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; GISEL-NEXT:    s_mov_b32 s16, s33
209; GISEL-NEXT:    s_mov_b32 s33, s32
210; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
211; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
212; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
213; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
214; GISEL-NEXT:    s_addk_i32 s32, 0x400
215; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
216; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
217; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
218; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
219; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
220; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
221; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
222; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
223; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
224; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
225; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
226; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
227; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
228; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
229; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
230; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
231; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
232; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
233; GISEL-NEXT:    s_mov_b32 s42, s15
234; GISEL-NEXT:    s_mov_b32 s43, s14
235; GISEL-NEXT:    s_mov_b32 s44, s13
236; GISEL-NEXT:    s_mov_b32 s45, s12
237; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
238; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
239; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
240; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
241; GISEL-NEXT:    s_mov_b64 s[46:47], exec
242; GISEL-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
243; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
244; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
245; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
246; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
247; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
248; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
249; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
250; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
251; GISEL-NEXT:    s_mov_b32 s12, s45
252; GISEL-NEXT:    s_mov_b32 s13, s44
253; GISEL-NEXT:    s_mov_b32 s14, s43
254; GISEL-NEXT:    s_mov_b32 s15, s42
255; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
256; GISEL-NEXT:    ; implicit-def: $vgpr0
257; GISEL-NEXT:    ; implicit-def: $vgpr31
258; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
259; GISEL-NEXT:    s_cbranch_execnz .LBB2_1
260; GISEL-NEXT:  ; %bb.2:
261; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
262; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
263; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
264; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
265; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
266; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
267; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
268; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
269; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
270; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
271; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
272; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
273; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
274; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
275; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
276; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
277; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
278; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
279; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
280; GISEL-NEXT:    s_mov_b32 s32, s33
281; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
282; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
283; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
284; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
285; GISEL-NEXT:    s_mov_b32 s33, s4
286; GISEL-NEXT:    s_waitcnt vmcnt(0)
287; GISEL-NEXT:    s_setpc_b64 s[30:31]
288  call void %fptr()
289  ret void
290}
291
292define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
293; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
294; GCN:       ; %bb.0:
295; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GCN-NEXT:    s_mov_b32 s16, s33
297; GCN-NEXT:    s_mov_b32 s33, s32
298; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
299; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
300; GCN-NEXT:    s_mov_b64 exec, s[18:19]
301; GCN-NEXT:    v_writelane_b32 v40, s16, 18
302; GCN-NEXT:    s_addk_i32 s32, 0x400
303; GCN-NEXT:    v_writelane_b32 v40, s30, 0
304; GCN-NEXT:    v_writelane_b32 v40, s31, 1
305; GCN-NEXT:    v_writelane_b32 v40, s34, 2
306; GCN-NEXT:    v_writelane_b32 v40, s35, 3
307; GCN-NEXT:    v_writelane_b32 v40, s36, 4
308; GCN-NEXT:    v_writelane_b32 v40, s37, 5
309; GCN-NEXT:    v_writelane_b32 v40, s38, 6
310; GCN-NEXT:    v_writelane_b32 v40, s39, 7
311; GCN-NEXT:    v_writelane_b32 v40, s40, 8
312; GCN-NEXT:    v_writelane_b32 v40, s41, 9
313; GCN-NEXT:    v_writelane_b32 v40, s42, 10
314; GCN-NEXT:    v_writelane_b32 v40, s43, 11
315; GCN-NEXT:    v_writelane_b32 v40, s44, 12
316; GCN-NEXT:    v_writelane_b32 v40, s45, 13
317; GCN-NEXT:    v_writelane_b32 v40, s46, 14
318; GCN-NEXT:    v_writelane_b32 v40, s47, 15
319; GCN-NEXT:    v_writelane_b32 v40, s48, 16
320; GCN-NEXT:    v_writelane_b32 v40, s49, 17
321; GCN-NEXT:    s_mov_b32 s42, s15
322; GCN-NEXT:    s_mov_b32 s43, s14
323; GCN-NEXT:    s_mov_b32 s44, s13
324; GCN-NEXT:    s_mov_b32 s45, s12
325; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
326; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
327; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
328; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
329; GCN-NEXT:    s_mov_b64 s[46:47], exec
330; GCN-NEXT:    v_mov_b32_e32 v2, 0x7b
331; GCN-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
332; GCN-NEXT:    v_readfirstlane_b32 s16, v0
333; GCN-NEXT:    v_readfirstlane_b32 s17, v1
334; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
335; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
336; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
337; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
338; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
339; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
340; GCN-NEXT:    s_mov_b32 s12, s45
341; GCN-NEXT:    s_mov_b32 s13, s44
342; GCN-NEXT:    s_mov_b32 s14, s43
343; GCN-NEXT:    s_mov_b32 s15, s42
344; GCN-NEXT:    v_mov_b32_e32 v0, v2
345; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
346; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
347; GCN-NEXT:    ; implicit-def: $vgpr31
348; GCN-NEXT:    ; implicit-def: $vgpr2
349; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
350; GCN-NEXT:    s_cbranch_execnz .LBB3_1
351; GCN-NEXT:  ; %bb.2:
352; GCN-NEXT:    s_mov_b64 exec, s[46:47]
353; GCN-NEXT:    v_readlane_b32 s49, v40, 17
354; GCN-NEXT:    v_readlane_b32 s48, v40, 16
355; GCN-NEXT:    v_readlane_b32 s47, v40, 15
356; GCN-NEXT:    v_readlane_b32 s46, v40, 14
357; GCN-NEXT:    v_readlane_b32 s45, v40, 13
358; GCN-NEXT:    v_readlane_b32 s44, v40, 12
359; GCN-NEXT:    v_readlane_b32 s43, v40, 11
360; GCN-NEXT:    v_readlane_b32 s42, v40, 10
361; GCN-NEXT:    v_readlane_b32 s41, v40, 9
362; GCN-NEXT:    v_readlane_b32 s40, v40, 8
363; GCN-NEXT:    v_readlane_b32 s39, v40, 7
364; GCN-NEXT:    v_readlane_b32 s38, v40, 6
365; GCN-NEXT:    v_readlane_b32 s37, v40, 5
366; GCN-NEXT:    v_readlane_b32 s36, v40, 4
367; GCN-NEXT:    v_readlane_b32 s35, v40, 3
368; GCN-NEXT:    v_readlane_b32 s34, v40, 2
369; GCN-NEXT:    v_readlane_b32 s31, v40, 1
370; GCN-NEXT:    v_readlane_b32 s30, v40, 0
371; GCN-NEXT:    s_mov_b32 s32, s33
372; GCN-NEXT:    v_readlane_b32 s4, v40, 18
373; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
374; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
375; GCN-NEXT:    s_mov_b64 exec, s[6:7]
376; GCN-NEXT:    s_mov_b32 s33, s4
377; GCN-NEXT:    s_waitcnt vmcnt(0)
378; GCN-NEXT:    s_setpc_b64 s[30:31]
379;
380; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
381; GISEL:       ; %bb.0:
382; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383; GISEL-NEXT:    s_mov_b32 s16, s33
384; GISEL-NEXT:    s_mov_b32 s33, s32
385; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
386; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
387; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
388; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
389; GISEL-NEXT:    s_addk_i32 s32, 0x400
390; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
391; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
392; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
393; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
394; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
395; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
396; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
397; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
398; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
399; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
400; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
401; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
402; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
403; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
404; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
405; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
406; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
407; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
408; GISEL-NEXT:    s_mov_b32 s42, s15
409; GISEL-NEXT:    s_mov_b32 s43, s14
410; GISEL-NEXT:    s_mov_b32 s44, s13
411; GISEL-NEXT:    s_mov_b32 s45, s12
412; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
413; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
414; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
415; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
416; GISEL-NEXT:    s_mov_b64 s[46:47], exec
417; GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
418; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
419; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
420; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
421; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
422; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
423; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
424; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
425; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
426; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
427; GISEL-NEXT:    s_mov_b32 s12, s45
428; GISEL-NEXT:    s_mov_b32 s13, s44
429; GISEL-NEXT:    s_mov_b32 s14, s43
430; GISEL-NEXT:    s_mov_b32 s15, s42
431; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
432; GISEL-NEXT:    ; implicit-def: $vgpr0
433; GISEL-NEXT:    ; implicit-def: $vgpr31
434; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
435; GISEL-NEXT:    s_cbranch_execnz .LBB3_1
436; GISEL-NEXT:  ; %bb.2:
437; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
438; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
439; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
440; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
441; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
442; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
443; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
444; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
445; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
446; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
447; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
448; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
449; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
450; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
451; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
452; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
453; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
454; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
455; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
456; GISEL-NEXT:    s_mov_b32 s32, s33
457; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
458; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
459; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
460; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
461; GISEL-NEXT:    s_mov_b32 s33, s4
462; GISEL-NEXT:    s_waitcnt vmcnt(0)
463; GISEL-NEXT:    s_setpc_b64 s[30:31]
464  call void %fptr(i32 123)
465  ret void
466}
467
468define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
469; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
470; GCN:       ; %bb.0:
471; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472; GCN-NEXT:    s_mov_b32 s16, s33
473; GCN-NEXT:    s_mov_b32 s33, s32
474; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
475; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
476; GCN-NEXT:    s_mov_b64 exec, s[18:19]
477; GCN-NEXT:    v_writelane_b32 v40, s16, 18
478; GCN-NEXT:    s_addk_i32 s32, 0x400
479; GCN-NEXT:    v_writelane_b32 v40, s30, 0
480; GCN-NEXT:    v_writelane_b32 v40, s31, 1
481; GCN-NEXT:    v_writelane_b32 v40, s34, 2
482; GCN-NEXT:    v_writelane_b32 v40, s35, 3
483; GCN-NEXT:    v_writelane_b32 v40, s36, 4
484; GCN-NEXT:    v_writelane_b32 v40, s37, 5
485; GCN-NEXT:    v_writelane_b32 v40, s38, 6
486; GCN-NEXT:    v_writelane_b32 v40, s39, 7
487; GCN-NEXT:    v_writelane_b32 v40, s40, 8
488; GCN-NEXT:    v_writelane_b32 v40, s41, 9
489; GCN-NEXT:    v_writelane_b32 v40, s42, 10
490; GCN-NEXT:    v_writelane_b32 v40, s43, 11
491; GCN-NEXT:    v_writelane_b32 v40, s44, 12
492; GCN-NEXT:    v_writelane_b32 v40, s45, 13
493; GCN-NEXT:    v_writelane_b32 v40, s46, 14
494; GCN-NEXT:    v_writelane_b32 v40, s47, 15
495; GCN-NEXT:    v_writelane_b32 v40, s48, 16
496; GCN-NEXT:    v_writelane_b32 v40, s49, 17
497; GCN-NEXT:    s_mov_b32 s42, s15
498; GCN-NEXT:    s_mov_b32 s43, s14
499; GCN-NEXT:    s_mov_b32 s44, s13
500; GCN-NEXT:    s_mov_b32 s45, s12
501; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
502; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
503; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
504; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
505; GCN-NEXT:    s_mov_b64 s[46:47], exec
506; GCN-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
507; GCN-NEXT:    v_readfirstlane_b32 s16, v0
508; GCN-NEXT:    v_readfirstlane_b32 s17, v1
509; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
510; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
511; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
512; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
513; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
514; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
515; GCN-NEXT:    s_mov_b32 s12, s45
516; GCN-NEXT:    s_mov_b32 s13, s44
517; GCN-NEXT:    s_mov_b32 s14, s43
518; GCN-NEXT:    s_mov_b32 s15, s42
519; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
520; GCN-NEXT:    v_mov_b32_e32 v2, v0
521; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
522; GCN-NEXT:    ; implicit-def: $vgpr31
523; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
524; GCN-NEXT:    s_cbranch_execnz .LBB4_1
525; GCN-NEXT:  ; %bb.2:
526; GCN-NEXT:    s_mov_b64 exec, s[46:47]
527; GCN-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
528; GCN-NEXT:    v_readlane_b32 s49, v40, 17
529; GCN-NEXT:    v_readlane_b32 s48, v40, 16
530; GCN-NEXT:    v_readlane_b32 s47, v40, 15
531; GCN-NEXT:    v_readlane_b32 s46, v40, 14
532; GCN-NEXT:    v_readlane_b32 s45, v40, 13
533; GCN-NEXT:    v_readlane_b32 s44, v40, 12
534; GCN-NEXT:    v_readlane_b32 s43, v40, 11
535; GCN-NEXT:    v_readlane_b32 s42, v40, 10
536; GCN-NEXT:    v_readlane_b32 s41, v40, 9
537; GCN-NEXT:    v_readlane_b32 s40, v40, 8
538; GCN-NEXT:    v_readlane_b32 s39, v40, 7
539; GCN-NEXT:    v_readlane_b32 s38, v40, 6
540; GCN-NEXT:    v_readlane_b32 s37, v40, 5
541; GCN-NEXT:    v_readlane_b32 s36, v40, 4
542; GCN-NEXT:    v_readlane_b32 s35, v40, 3
543; GCN-NEXT:    v_readlane_b32 s34, v40, 2
544; GCN-NEXT:    v_readlane_b32 s31, v40, 1
545; GCN-NEXT:    v_readlane_b32 s30, v40, 0
546; GCN-NEXT:    s_mov_b32 s32, s33
547; GCN-NEXT:    v_readlane_b32 s4, v40, 18
548; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
549; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
550; GCN-NEXT:    s_mov_b64 exec, s[6:7]
551; GCN-NEXT:    s_mov_b32 s33, s4
552; GCN-NEXT:    s_waitcnt vmcnt(0)
553; GCN-NEXT:    s_setpc_b64 s[30:31]
554;
555; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
556; GISEL:       ; %bb.0:
557; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558; GISEL-NEXT:    s_mov_b32 s16, s33
559; GISEL-NEXT:    s_mov_b32 s33, s32
560; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
561; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
562; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
563; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
564; GISEL-NEXT:    s_addk_i32 s32, 0x400
565; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
566; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
567; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
568; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
569; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
570; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
571; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
572; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
573; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
574; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
575; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
576; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
577; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
578; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
579; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
580; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
581; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
582; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
583; GISEL-NEXT:    s_mov_b32 s42, s15
584; GISEL-NEXT:    s_mov_b32 s43, s14
585; GISEL-NEXT:    s_mov_b32 s44, s13
586; GISEL-NEXT:    s_mov_b32 s45, s12
587; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
588; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
589; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
590; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
591; GISEL-NEXT:    s_mov_b64 s[46:47], exec
592; GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
593; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
594; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
595; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
596; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
597; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
598; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
599; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
600; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
601; GISEL-NEXT:    s_mov_b32 s12, s45
602; GISEL-NEXT:    s_mov_b32 s13, s44
603; GISEL-NEXT:    s_mov_b32 s14, s43
604; GISEL-NEXT:    s_mov_b32 s15, s42
605; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
606; GISEL-NEXT:    v_mov_b32_e32 v1, v0
607; GISEL-NEXT:    ; implicit-def: $vgpr0
608; GISEL-NEXT:    ; implicit-def: $vgpr31
609; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
610; GISEL-NEXT:    s_cbranch_execnz .LBB4_1
611; GISEL-NEXT:  ; %bb.2:
612; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
613; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
614; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
615; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
616; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
617; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
618; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
619; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
620; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
621; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
622; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
623; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
624; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
625; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
626; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
627; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
628; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
629; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
630; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
631; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
632; GISEL-NEXT:    s_mov_b32 s32, s33
633; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
634; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
635; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
636; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
637; GISEL-NEXT:    s_mov_b32 s33, s4
638; GISEL-NEXT:    s_waitcnt vmcnt(0)
639; GISEL-NEXT:    s_setpc_b64 s[30:31]
640  %a = call i32 %fptr()
641  %b = add i32 %a, 1
642  ret i32 %b
643}
644
645define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
646; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
647; GCN:       ; %bb.0: ; %bb0
648; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649; GCN-NEXT:    s_mov_b32 s16, s33
650; GCN-NEXT:    s_mov_b32 s33, s32
651; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
652; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
653; GCN-NEXT:    s_mov_b64 exec, s[18:19]
654; GCN-NEXT:    v_writelane_b32 v40, s16, 20
655; GCN-NEXT:    s_addk_i32 s32, 0x400
656; GCN-NEXT:    v_writelane_b32 v40, s30, 0
657; GCN-NEXT:    v_writelane_b32 v40, s31, 1
658; GCN-NEXT:    v_writelane_b32 v40, s34, 2
659; GCN-NEXT:    v_writelane_b32 v40, s35, 3
660; GCN-NEXT:    v_writelane_b32 v40, s36, 4
661; GCN-NEXT:    v_writelane_b32 v40, s37, 5
662; GCN-NEXT:    v_writelane_b32 v40, s38, 6
663; GCN-NEXT:    v_writelane_b32 v40, s39, 7
664; GCN-NEXT:    v_writelane_b32 v40, s40, 8
665; GCN-NEXT:    v_writelane_b32 v40, s41, 9
666; GCN-NEXT:    v_writelane_b32 v40, s42, 10
667; GCN-NEXT:    v_writelane_b32 v40, s43, 11
668; GCN-NEXT:    v_writelane_b32 v40, s44, 12
669; GCN-NEXT:    v_writelane_b32 v40, s45, 13
670; GCN-NEXT:    v_writelane_b32 v40, s46, 14
671; GCN-NEXT:    v_writelane_b32 v40, s47, 15
672; GCN-NEXT:    v_writelane_b32 v40, s48, 16
673; GCN-NEXT:    v_writelane_b32 v40, s49, 17
674; GCN-NEXT:    v_writelane_b32 v40, s50, 18
675; GCN-NEXT:    v_writelane_b32 v40, s51, 19
676; GCN-NEXT:    s_mov_b32 s42, s15
677; GCN-NEXT:    s_mov_b32 s43, s14
678; GCN-NEXT:    s_mov_b32 s44, s13
679; GCN-NEXT:    s_mov_b32 s45, s12
680; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
681; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
682; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
683; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
684; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
685; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
686; GCN-NEXT:    s_and_saveexec_b64 s[46:47], vcc
687; GCN-NEXT:    s_cbranch_execz .LBB5_4
688; GCN-NEXT:  ; %bb.1: ; %bb1
689; GCN-NEXT:    s_mov_b64 s[48:49], exec
690; GCN-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
691; GCN-NEXT:    v_readfirstlane_b32 s16, v0
692; GCN-NEXT:    v_readfirstlane_b32 s17, v1
693; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
694; GCN-NEXT:    s_and_saveexec_b64 s[50:51], vcc
695; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
696; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
697; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
698; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
699; GCN-NEXT:    s_mov_b32 s12, s45
700; GCN-NEXT:    s_mov_b32 s13, s44
701; GCN-NEXT:    s_mov_b32 s14, s43
702; GCN-NEXT:    s_mov_b32 s15, s42
703; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
704; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
705; GCN-NEXT:    ; implicit-def: $vgpr31
706; GCN-NEXT:    s_xor_b64 exec, exec, s[50:51]
707; GCN-NEXT:    s_cbranch_execnz .LBB5_2
708; GCN-NEXT:  ; %bb.3:
709; GCN-NEXT:    s_mov_b64 exec, s[48:49]
710; GCN-NEXT:  .LBB5_4: ; %bb2
711; GCN-NEXT:    s_or_b64 exec, exec, s[46:47]
712; GCN-NEXT:    v_readlane_b32 s51, v40, 19
713; GCN-NEXT:    v_readlane_b32 s50, v40, 18
714; GCN-NEXT:    v_readlane_b32 s49, v40, 17
715; GCN-NEXT:    v_readlane_b32 s48, v40, 16
716; GCN-NEXT:    v_readlane_b32 s47, v40, 15
717; GCN-NEXT:    v_readlane_b32 s46, v40, 14
718; GCN-NEXT:    v_readlane_b32 s45, v40, 13
719; GCN-NEXT:    v_readlane_b32 s44, v40, 12
720; GCN-NEXT:    v_readlane_b32 s43, v40, 11
721; GCN-NEXT:    v_readlane_b32 s42, v40, 10
722; GCN-NEXT:    v_readlane_b32 s41, v40, 9
723; GCN-NEXT:    v_readlane_b32 s40, v40, 8
724; GCN-NEXT:    v_readlane_b32 s39, v40, 7
725; GCN-NEXT:    v_readlane_b32 s38, v40, 6
726; GCN-NEXT:    v_readlane_b32 s37, v40, 5
727; GCN-NEXT:    v_readlane_b32 s36, v40, 4
728; GCN-NEXT:    v_readlane_b32 s35, v40, 3
729; GCN-NEXT:    v_readlane_b32 s34, v40, 2
730; GCN-NEXT:    v_readlane_b32 s31, v40, 1
731; GCN-NEXT:    v_readlane_b32 s30, v40, 0
732; GCN-NEXT:    s_mov_b32 s32, s33
733; GCN-NEXT:    v_readlane_b32 s4, v40, 20
734; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
735; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
736; GCN-NEXT:    s_mov_b64 exec, s[6:7]
737; GCN-NEXT:    s_mov_b32 s33, s4
738; GCN-NEXT:    s_waitcnt vmcnt(0)
739; GCN-NEXT:    s_setpc_b64 s[30:31]
740;
741; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
742; GISEL:       ; %bb.0: ; %bb0
743; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744; GISEL-NEXT:    s_mov_b32 s16, s33
745; GISEL-NEXT:    s_mov_b32 s33, s32
746; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
747; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
748; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
749; GISEL-NEXT:    v_writelane_b32 v40, s16, 20
750; GISEL-NEXT:    s_addk_i32 s32, 0x400
751; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
752; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
753; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
754; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
755; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
756; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
757; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
758; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
759; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
760; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
761; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
762; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
763; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
764; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
765; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
766; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
767; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
768; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
769; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
770; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
771; GISEL-NEXT:    s_mov_b32 s42, s15
772; GISEL-NEXT:    s_mov_b32 s43, s14
773; GISEL-NEXT:    s_mov_b32 s44, s13
774; GISEL-NEXT:    s_mov_b32 s45, s12
775; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
776; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
777; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
778; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
779; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
780; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
781; GISEL-NEXT:    s_and_saveexec_b64 s[46:47], vcc
782; GISEL-NEXT:    s_cbranch_execz .LBB5_4
783; GISEL-NEXT:  ; %bb.1: ; %bb1
784; GISEL-NEXT:    s_mov_b64 s[48:49], exec
785; GISEL-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
786; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
787; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
788; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
789; GISEL-NEXT:    s_and_saveexec_b64 s[50:51], vcc
790; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
791; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
792; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
793; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
794; GISEL-NEXT:    s_mov_b32 s12, s45
795; GISEL-NEXT:    s_mov_b32 s13, s44
796; GISEL-NEXT:    s_mov_b32 s14, s43
797; GISEL-NEXT:    s_mov_b32 s15, s42
798; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
799; GISEL-NEXT:    ; implicit-def: $vgpr0
800; GISEL-NEXT:    ; implicit-def: $vgpr31
801; GISEL-NEXT:    s_xor_b64 exec, exec, s[50:51]
802; GISEL-NEXT:    s_cbranch_execnz .LBB5_2
803; GISEL-NEXT:  ; %bb.3:
804; GISEL-NEXT:    s_mov_b64 exec, s[48:49]
805; GISEL-NEXT:  .LBB5_4: ; %bb2
806; GISEL-NEXT:    s_or_b64 exec, exec, s[46:47]
807; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
808; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
809; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
810; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
811; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
812; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
813; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
814; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
815; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
816; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
817; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
818; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
819; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
820; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
821; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
822; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
823; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
824; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
825; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
826; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
827; GISEL-NEXT:    s_mov_b32 s32, s33
828; GISEL-NEXT:    v_readlane_b32 s4, v40, 20
829; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
830; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
831; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
832; GISEL-NEXT:    s_mov_b32 s33, s4
833; GISEL-NEXT:    s_waitcnt vmcnt(0)
834; GISEL-NEXT:    s_setpc_b64 s[30:31]
835bb0:
836  br i1 %cond, label %bb1, label %bb2
837
838bb1:
839  call void %fptr()
840  br label %bb2
841
842bb2:
843  ret void
844}
845
846define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
847; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
848; GCN:       ; %bb.0:
849; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GCN-NEXT:    s_mov_b32 s5, s33
851; GCN-NEXT:    s_mov_b32 s33, s32
852; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
853; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
854; GCN-NEXT:    s_mov_b64 exec, s[6:7]
855; GCN-NEXT:    s_addk_i32 s32, 0x400
856; GCN-NEXT:    v_writelane_b32 v40, s30, 0
857; GCN-NEXT:    v_writelane_b32 v40, s31, 1
858; GCN-NEXT:    v_writelane_b32 v40, s34, 2
859; GCN-NEXT:    v_writelane_b32 v40, s35, 3
860; GCN-NEXT:    v_writelane_b32 v40, s36, 4
861; GCN-NEXT:    v_writelane_b32 v40, s37, 5
862; GCN-NEXT:    v_writelane_b32 v40, s38, 6
863; GCN-NEXT:    v_writelane_b32 v40, s39, 7
864; GCN-NEXT:    v_writelane_b32 v40, s40, 8
865; GCN-NEXT:    v_writelane_b32 v40, s41, 9
866; GCN-NEXT:    v_writelane_b32 v40, s42, 10
867; GCN-NEXT:    v_writelane_b32 v40, s43, 11
868; GCN-NEXT:    v_writelane_b32 v40, s44, 12
869; GCN-NEXT:    v_writelane_b32 v40, s45, 13
870; GCN-NEXT:    v_writelane_b32 v40, s46, 14
871; GCN-NEXT:    v_writelane_b32 v40, s47, 15
872; GCN-NEXT:    v_writelane_b32 v40, s48, 16
873; GCN-NEXT:    v_writelane_b32 v40, s49, 17
874; GCN-NEXT:    v_writelane_b32 v40, s50, 18
875; GCN-NEXT:    v_writelane_b32 v40, s51, 19
876; GCN-NEXT:    v_writelane_b32 v40, s52, 20
877; GCN-NEXT:    v_writelane_b32 v40, s53, 21
878; GCN-NEXT:    v_writelane_b32 v40, s54, 22
879; GCN-NEXT:    v_writelane_b32 v40, s55, 23
880; GCN-NEXT:    v_writelane_b32 v40, s56, 24
881; GCN-NEXT:    v_writelane_b32 v40, s57, 25
882; GCN-NEXT:    v_writelane_b32 v40, s58, 26
883; GCN-NEXT:    v_writelane_b32 v40, s59, 27
884; GCN-NEXT:    v_writelane_b32 v40, s60, 28
885; GCN-NEXT:    v_writelane_b32 v40, s61, 29
886; GCN-NEXT:    v_writelane_b32 v40, s62, 30
887; GCN-NEXT:    v_writelane_b32 v40, s63, 31
888; GCN-NEXT:    s_mov_b64 s[6:7], exec
889; GCN-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
890; GCN-NEXT:    v_readfirstlane_b32 s8, v0
891; GCN-NEXT:    v_readfirstlane_b32 s9, v1
892; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
893; GCN-NEXT:    s_and_saveexec_b64 s[10:11], vcc
894; GCN-NEXT:    s_movk_i32 s4, 0x7b
895; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
896; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
897; GCN-NEXT:    s_xor_b64 exec, exec, s[10:11]
898; GCN-NEXT:    s_cbranch_execnz .LBB6_1
899; GCN-NEXT:  ; %bb.2:
900; GCN-NEXT:    s_mov_b64 exec, s[6:7]
901; GCN-NEXT:    v_readlane_b32 s63, v40, 31
902; GCN-NEXT:    v_readlane_b32 s62, v40, 30
903; GCN-NEXT:    v_readlane_b32 s61, v40, 29
904; GCN-NEXT:    v_readlane_b32 s60, v40, 28
905; GCN-NEXT:    v_readlane_b32 s59, v40, 27
906; GCN-NEXT:    v_readlane_b32 s58, v40, 26
907; GCN-NEXT:    v_readlane_b32 s57, v40, 25
908; GCN-NEXT:    v_readlane_b32 s56, v40, 24
909; GCN-NEXT:    v_readlane_b32 s55, v40, 23
910; GCN-NEXT:    v_readlane_b32 s54, v40, 22
911; GCN-NEXT:    v_readlane_b32 s53, v40, 21
912; GCN-NEXT:    v_readlane_b32 s52, v40, 20
913; GCN-NEXT:    v_readlane_b32 s51, v40, 19
914; GCN-NEXT:    v_readlane_b32 s50, v40, 18
915; GCN-NEXT:    v_readlane_b32 s49, v40, 17
916; GCN-NEXT:    v_readlane_b32 s48, v40, 16
917; GCN-NEXT:    v_readlane_b32 s47, v40, 15
918; GCN-NEXT:    v_readlane_b32 s46, v40, 14
919; GCN-NEXT:    v_readlane_b32 s45, v40, 13
920; GCN-NEXT:    v_readlane_b32 s44, v40, 12
921; GCN-NEXT:    v_readlane_b32 s43, v40, 11
922; GCN-NEXT:    v_readlane_b32 s42, v40, 10
923; GCN-NEXT:    v_readlane_b32 s41, v40, 9
924; GCN-NEXT:    v_readlane_b32 s40, v40, 8
925; GCN-NEXT:    v_readlane_b32 s39, v40, 7
926; GCN-NEXT:    v_readlane_b32 s38, v40, 6
927; GCN-NEXT:    v_readlane_b32 s37, v40, 5
928; GCN-NEXT:    v_readlane_b32 s36, v40, 4
929; GCN-NEXT:    v_readlane_b32 s35, v40, 3
930; GCN-NEXT:    v_readlane_b32 s34, v40, 2
931; GCN-NEXT:    v_readlane_b32 s31, v40, 1
932; GCN-NEXT:    v_readlane_b32 s30, v40, 0
933; GCN-NEXT:    s_mov_b32 s32, s33
934; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
935; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
936; GCN-NEXT:    s_mov_b64 exec, s[6:7]
937; GCN-NEXT:    s_mov_b32 s33, s5
938; GCN-NEXT:    s_waitcnt vmcnt(0)
939; GCN-NEXT:    s_setpc_b64 s[30:31]
940;
941; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
942; GISEL:       ; %bb.0:
943; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944; GISEL-NEXT:    s_mov_b32 s5, s33
945; GISEL-NEXT:    s_mov_b32 s33, s32
946; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
947; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
948; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
949; GISEL-NEXT:    s_addk_i32 s32, 0x400
950; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
951; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
952; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
953; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
954; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
955; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
956; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
957; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
958; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
959; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
960; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
961; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
962; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
963; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
964; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
965; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
966; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
967; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
968; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
969; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
970; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
971; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
972; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
973; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
974; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
975; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
976; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
977; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
978; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
979; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
980; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
981; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
982; GISEL-NEXT:    s_mov_b64 s[6:7], exec
983; GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
984; GISEL-NEXT:    v_readfirstlane_b32 s8, v0
985; GISEL-NEXT:    v_readfirstlane_b32 s9, v1
986; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
987; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
988; GISEL-NEXT:    s_movk_i32 s4, 0x7b
989; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
990; GISEL-NEXT:    ; implicit-def: $vgpr0
991; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
992; GISEL-NEXT:    s_cbranch_execnz .LBB6_1
993; GISEL-NEXT:  ; %bb.2:
994; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
995; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
996; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
997; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
998; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
999; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
1000; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
1001; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
1002; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
1003; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
1004; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
1005; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
1006; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
1007; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1008; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1009; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1010; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1011; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1012; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1013; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1014; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1015; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1016; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1017; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1018; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1019; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1020; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1021; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1022; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1023; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1024; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1025; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1026; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1027; GISEL-NEXT:    s_mov_b32 s32, s33
1028; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
1029; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1030; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1031; GISEL-NEXT:    s_mov_b32 s33, s5
1032; GISEL-NEXT:    s_waitcnt vmcnt(0)
1033; GISEL-NEXT:    s_setpc_b64 s[30:31]
1034  call amdgpu_gfx void %fptr(i32 inreg 123)
1035  ret void
1036}
1037
1038define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
1039; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1040; GCN:       ; %bb.0:
1041; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1042; GCN-NEXT:    s_mov_b32 s10, s33
1043; GCN-NEXT:    s_mov_b32 s33, s32
1044; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1045; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1046; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1047; GCN-NEXT:    s_addk_i32 s32, 0x400
1048; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1049; GCN-NEXT:    v_writelane_b32 v41, s30, 0
1050; GCN-NEXT:    v_writelane_b32 v41, s31, 1
1051; GCN-NEXT:    v_writelane_b32 v41, s34, 2
1052; GCN-NEXT:    v_writelane_b32 v41, s35, 3
1053; GCN-NEXT:    v_writelane_b32 v41, s36, 4
1054; GCN-NEXT:    v_writelane_b32 v41, s37, 5
1055; GCN-NEXT:    v_writelane_b32 v41, s38, 6
1056; GCN-NEXT:    v_writelane_b32 v41, s39, 7
1057; GCN-NEXT:    v_writelane_b32 v41, s40, 8
1058; GCN-NEXT:    v_writelane_b32 v41, s41, 9
1059; GCN-NEXT:    v_writelane_b32 v41, s42, 10
1060; GCN-NEXT:    v_writelane_b32 v41, s43, 11
1061; GCN-NEXT:    v_writelane_b32 v41, s44, 12
1062; GCN-NEXT:    v_writelane_b32 v41, s45, 13
1063; GCN-NEXT:    v_writelane_b32 v41, s46, 14
1064; GCN-NEXT:    v_writelane_b32 v41, s47, 15
1065; GCN-NEXT:    v_writelane_b32 v41, s48, 16
1066; GCN-NEXT:    v_writelane_b32 v41, s49, 17
1067; GCN-NEXT:    v_writelane_b32 v41, s50, 18
1068; GCN-NEXT:    v_writelane_b32 v41, s51, 19
1069; GCN-NEXT:    v_writelane_b32 v41, s52, 20
1070; GCN-NEXT:    v_writelane_b32 v41, s53, 21
1071; GCN-NEXT:    v_writelane_b32 v41, s54, 22
1072; GCN-NEXT:    v_writelane_b32 v41, s55, 23
1073; GCN-NEXT:    v_writelane_b32 v41, s56, 24
1074; GCN-NEXT:    v_writelane_b32 v41, s57, 25
1075; GCN-NEXT:    v_writelane_b32 v41, s58, 26
1076; GCN-NEXT:    v_writelane_b32 v41, s59, 27
1077; GCN-NEXT:    v_writelane_b32 v41, s60, 28
1078; GCN-NEXT:    v_writelane_b32 v41, s61, 29
1079; GCN-NEXT:    v_writelane_b32 v41, s62, 30
1080; GCN-NEXT:    v_writelane_b32 v41, s63, 31
1081; GCN-NEXT:    v_mov_b32_e32 v40, v0
1082; GCN-NEXT:    s_mov_b64 s[4:5], exec
1083; GCN-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
1084; GCN-NEXT:    v_readfirstlane_b32 s6, v1
1085; GCN-NEXT:    v_readfirstlane_b32 s7, v2
1086; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1087; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1088; GCN-NEXT:    v_mov_b32_e32 v0, v40
1089; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1090; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
1091; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1092; GCN-NEXT:    s_cbranch_execnz .LBB7_1
1093; GCN-NEXT:  ; %bb.2:
1094; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1095; GCN-NEXT:    v_mov_b32_e32 v0, v40
1096; GCN-NEXT:    v_readlane_b32 s63, v41, 31
1097; GCN-NEXT:    v_readlane_b32 s62, v41, 30
1098; GCN-NEXT:    v_readlane_b32 s61, v41, 29
1099; GCN-NEXT:    v_readlane_b32 s60, v41, 28
1100; GCN-NEXT:    v_readlane_b32 s59, v41, 27
1101; GCN-NEXT:    v_readlane_b32 s58, v41, 26
1102; GCN-NEXT:    v_readlane_b32 s57, v41, 25
1103; GCN-NEXT:    v_readlane_b32 s56, v41, 24
1104; GCN-NEXT:    v_readlane_b32 s55, v41, 23
1105; GCN-NEXT:    v_readlane_b32 s54, v41, 22
1106; GCN-NEXT:    v_readlane_b32 s53, v41, 21
1107; GCN-NEXT:    v_readlane_b32 s52, v41, 20
1108; GCN-NEXT:    v_readlane_b32 s51, v41, 19
1109; GCN-NEXT:    v_readlane_b32 s50, v41, 18
1110; GCN-NEXT:    v_readlane_b32 s49, v41, 17
1111; GCN-NEXT:    v_readlane_b32 s48, v41, 16
1112; GCN-NEXT:    v_readlane_b32 s47, v41, 15
1113; GCN-NEXT:    v_readlane_b32 s46, v41, 14
1114; GCN-NEXT:    v_readlane_b32 s45, v41, 13
1115; GCN-NEXT:    v_readlane_b32 s44, v41, 12
1116; GCN-NEXT:    v_readlane_b32 s43, v41, 11
1117; GCN-NEXT:    v_readlane_b32 s42, v41, 10
1118; GCN-NEXT:    v_readlane_b32 s41, v41, 9
1119; GCN-NEXT:    v_readlane_b32 s40, v41, 8
1120; GCN-NEXT:    v_readlane_b32 s39, v41, 7
1121; GCN-NEXT:    v_readlane_b32 s38, v41, 6
1122; GCN-NEXT:    v_readlane_b32 s37, v41, 5
1123; GCN-NEXT:    v_readlane_b32 s36, v41, 4
1124; GCN-NEXT:    v_readlane_b32 s35, v41, 3
1125; GCN-NEXT:    v_readlane_b32 s34, v41, 2
1126; GCN-NEXT:    v_readlane_b32 s31, v41, 1
1127; GCN-NEXT:    v_readlane_b32 s30, v41, 0
1128; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1129; GCN-NEXT:    s_mov_b32 s32, s33
1130; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1131; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1132; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1133; GCN-NEXT:    s_mov_b32 s33, s10
1134; GCN-NEXT:    s_waitcnt vmcnt(0)
1135; GCN-NEXT:    s_setpc_b64 s[30:31]
1136;
1137; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1138; GISEL:       ; %bb.0:
1139; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140; GISEL-NEXT:    s_mov_b32 s10, s33
1141; GISEL-NEXT:    s_mov_b32 s33, s32
1142; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1143; GISEL-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1144; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1145; GISEL-NEXT:    s_addk_i32 s32, 0x400
1146; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1147; GISEL-NEXT:    v_writelane_b32 v41, s30, 0
1148; GISEL-NEXT:    v_writelane_b32 v41, s31, 1
1149; GISEL-NEXT:    v_writelane_b32 v41, s34, 2
1150; GISEL-NEXT:    v_writelane_b32 v41, s35, 3
1151; GISEL-NEXT:    v_writelane_b32 v41, s36, 4
1152; GISEL-NEXT:    v_writelane_b32 v41, s37, 5
1153; GISEL-NEXT:    v_writelane_b32 v41, s38, 6
1154; GISEL-NEXT:    v_writelane_b32 v41, s39, 7
1155; GISEL-NEXT:    v_writelane_b32 v41, s40, 8
1156; GISEL-NEXT:    v_writelane_b32 v41, s41, 9
1157; GISEL-NEXT:    v_writelane_b32 v41, s42, 10
1158; GISEL-NEXT:    v_writelane_b32 v41, s43, 11
1159; GISEL-NEXT:    v_writelane_b32 v41, s44, 12
1160; GISEL-NEXT:    v_writelane_b32 v41, s45, 13
1161; GISEL-NEXT:    v_writelane_b32 v41, s46, 14
1162; GISEL-NEXT:    v_writelane_b32 v41, s47, 15
1163; GISEL-NEXT:    v_writelane_b32 v41, s48, 16
1164; GISEL-NEXT:    v_writelane_b32 v41, s49, 17
1165; GISEL-NEXT:    v_writelane_b32 v41, s50, 18
1166; GISEL-NEXT:    v_writelane_b32 v41, s51, 19
1167; GISEL-NEXT:    v_writelane_b32 v41, s52, 20
1168; GISEL-NEXT:    v_writelane_b32 v41, s53, 21
1169; GISEL-NEXT:    v_writelane_b32 v41, s54, 22
1170; GISEL-NEXT:    v_writelane_b32 v41, s55, 23
1171; GISEL-NEXT:    v_writelane_b32 v41, s56, 24
1172; GISEL-NEXT:    v_writelane_b32 v41, s57, 25
1173; GISEL-NEXT:    v_writelane_b32 v41, s58, 26
1174; GISEL-NEXT:    v_writelane_b32 v41, s59, 27
1175; GISEL-NEXT:    v_writelane_b32 v41, s60, 28
1176; GISEL-NEXT:    v_writelane_b32 v41, s61, 29
1177; GISEL-NEXT:    v_writelane_b32 v41, s62, 30
1178; GISEL-NEXT:    v_writelane_b32 v41, s63, 31
1179; GISEL-NEXT:    v_mov_b32_e32 v40, v0
1180; GISEL-NEXT:    s_mov_b64 s[4:5], exec
1181; GISEL-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
1182; GISEL-NEXT:    v_readfirstlane_b32 s6, v1
1183; GISEL-NEXT:    v_readfirstlane_b32 s7, v2
1184; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1185; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1186; GISEL-NEXT:    v_mov_b32_e32 v0, v40
1187; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1188; GISEL-NEXT:    ; implicit-def: $vgpr1
1189; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
1190; GISEL-NEXT:    s_cbranch_execnz .LBB7_1
1191; GISEL-NEXT:  ; %bb.2:
1192; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1193; GISEL-NEXT:    v_mov_b32_e32 v0, v40
1194; GISEL-NEXT:    v_readlane_b32 s63, v41, 31
1195; GISEL-NEXT:    v_readlane_b32 s62, v41, 30
1196; GISEL-NEXT:    v_readlane_b32 s61, v41, 29
1197; GISEL-NEXT:    v_readlane_b32 s60, v41, 28
1198; GISEL-NEXT:    v_readlane_b32 s59, v41, 27
1199; GISEL-NEXT:    v_readlane_b32 s58, v41, 26
1200; GISEL-NEXT:    v_readlane_b32 s57, v41, 25
1201; GISEL-NEXT:    v_readlane_b32 s56, v41, 24
1202; GISEL-NEXT:    v_readlane_b32 s55, v41, 23
1203; GISEL-NEXT:    v_readlane_b32 s54, v41, 22
1204; GISEL-NEXT:    v_readlane_b32 s53, v41, 21
1205; GISEL-NEXT:    v_readlane_b32 s52, v41, 20
1206; GISEL-NEXT:    v_readlane_b32 s51, v41, 19
1207; GISEL-NEXT:    v_readlane_b32 s50, v41, 18
1208; GISEL-NEXT:    v_readlane_b32 s49, v41, 17
1209; GISEL-NEXT:    v_readlane_b32 s48, v41, 16
1210; GISEL-NEXT:    v_readlane_b32 s47, v41, 15
1211; GISEL-NEXT:    v_readlane_b32 s46, v41, 14
1212; GISEL-NEXT:    v_readlane_b32 s45, v41, 13
1213; GISEL-NEXT:    v_readlane_b32 s44, v41, 12
1214; GISEL-NEXT:    v_readlane_b32 s43, v41, 11
1215; GISEL-NEXT:    v_readlane_b32 s42, v41, 10
1216; GISEL-NEXT:    v_readlane_b32 s41, v41, 9
1217; GISEL-NEXT:    v_readlane_b32 s40, v41, 8
1218; GISEL-NEXT:    v_readlane_b32 s39, v41, 7
1219; GISEL-NEXT:    v_readlane_b32 s38, v41, 6
1220; GISEL-NEXT:    v_readlane_b32 s37, v41, 5
1221; GISEL-NEXT:    v_readlane_b32 s36, v41, 4
1222; GISEL-NEXT:    v_readlane_b32 s35, v41, 3
1223; GISEL-NEXT:    v_readlane_b32 s34, v41, 2
1224; GISEL-NEXT:    v_readlane_b32 s31, v41, 1
1225; GISEL-NEXT:    v_readlane_b32 s30, v41, 0
1226; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1227; GISEL-NEXT:    s_mov_b32 s32, s33
1228; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1229; GISEL-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1230; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1231; GISEL-NEXT:    s_mov_b32 s33, s10
1232; GISEL-NEXT:    s_waitcnt vmcnt(0)
1233; GISEL-NEXT:    s_setpc_b64 s[30:31]
1234  call amdgpu_gfx void %fptr(i32 %i)
1235  ret i32 %i
1236}
1237
1238; Use a variable inside a waterfall loop and use the return variable after the loop.
1239; TODO The argument and return variable could be in the same physical register, but the register
1240; allocator is not able to do that because the return value clashes with the liverange of an
1241; IMPLICIT_DEF of the argument.
1242define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
1243; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1244; GCN:       ; %bb.0:
1245; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246; GCN-NEXT:    s_mov_b32 s10, s33
1247; GCN-NEXT:    s_mov_b32 s33, s32
1248; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1249; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1250; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1251; GCN-NEXT:    s_addk_i32 s32, 0x400
1252; GCN-NEXT:    v_writelane_b32 v40, s30, 0
1253; GCN-NEXT:    v_writelane_b32 v40, s31, 1
1254; GCN-NEXT:    v_writelane_b32 v40, s34, 2
1255; GCN-NEXT:    v_writelane_b32 v40, s35, 3
1256; GCN-NEXT:    v_writelane_b32 v40, s36, 4
1257; GCN-NEXT:    v_writelane_b32 v40, s37, 5
1258; GCN-NEXT:    v_writelane_b32 v40, s38, 6
1259; GCN-NEXT:    v_writelane_b32 v40, s39, 7
1260; GCN-NEXT:    v_writelane_b32 v40, s40, 8
1261; GCN-NEXT:    v_writelane_b32 v40, s41, 9
1262; GCN-NEXT:    v_writelane_b32 v40, s42, 10
1263; GCN-NEXT:    v_writelane_b32 v40, s43, 11
1264; GCN-NEXT:    v_writelane_b32 v40, s44, 12
1265; GCN-NEXT:    v_writelane_b32 v40, s45, 13
1266; GCN-NEXT:    v_writelane_b32 v40, s46, 14
1267; GCN-NEXT:    v_writelane_b32 v40, s47, 15
1268; GCN-NEXT:    v_writelane_b32 v40, s48, 16
1269; GCN-NEXT:    v_writelane_b32 v40, s49, 17
1270; GCN-NEXT:    v_writelane_b32 v40, s50, 18
1271; GCN-NEXT:    v_writelane_b32 v40, s51, 19
1272; GCN-NEXT:    v_writelane_b32 v40, s52, 20
1273; GCN-NEXT:    v_writelane_b32 v40, s53, 21
1274; GCN-NEXT:    v_writelane_b32 v40, s54, 22
1275; GCN-NEXT:    v_writelane_b32 v40, s55, 23
1276; GCN-NEXT:    v_writelane_b32 v40, s56, 24
1277; GCN-NEXT:    v_writelane_b32 v40, s57, 25
1278; GCN-NEXT:    v_writelane_b32 v40, s58, 26
1279; GCN-NEXT:    v_writelane_b32 v40, s59, 27
1280; GCN-NEXT:    v_writelane_b32 v40, s60, 28
1281; GCN-NEXT:    v_writelane_b32 v40, s61, 29
1282; GCN-NEXT:    v_writelane_b32 v40, s62, 30
1283; GCN-NEXT:    v_writelane_b32 v40, s63, 31
1284; GCN-NEXT:    s_mov_b64 s[4:5], exec
1285; GCN-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1286; GCN-NEXT:    v_readfirstlane_b32 s8, v1
1287; GCN-NEXT:    v_readfirstlane_b32 s9, v2
1288; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1289; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
1290; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1291; GCN-NEXT:    v_mov_b32_e32 v3, v0
1292; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
1293; GCN-NEXT:    ; implicit-def: $vgpr0
1294; GCN-NEXT:    s_xor_b64 exec, exec, s[6:7]
1295; GCN-NEXT:    s_cbranch_execnz .LBB8_1
1296; GCN-NEXT:  ; %bb.2:
1297; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1298; GCN-NEXT:    v_mov_b32_e32 v0, v3
1299; GCN-NEXT:    v_readlane_b32 s63, v40, 31
1300; GCN-NEXT:    v_readlane_b32 s62, v40, 30
1301; GCN-NEXT:    v_readlane_b32 s61, v40, 29
1302; GCN-NEXT:    v_readlane_b32 s60, v40, 28
1303; GCN-NEXT:    v_readlane_b32 s59, v40, 27
1304; GCN-NEXT:    v_readlane_b32 s58, v40, 26
1305; GCN-NEXT:    v_readlane_b32 s57, v40, 25
1306; GCN-NEXT:    v_readlane_b32 s56, v40, 24
1307; GCN-NEXT:    v_readlane_b32 s55, v40, 23
1308; GCN-NEXT:    v_readlane_b32 s54, v40, 22
1309; GCN-NEXT:    v_readlane_b32 s53, v40, 21
1310; GCN-NEXT:    v_readlane_b32 s52, v40, 20
1311; GCN-NEXT:    v_readlane_b32 s51, v40, 19
1312; GCN-NEXT:    v_readlane_b32 s50, v40, 18
1313; GCN-NEXT:    v_readlane_b32 s49, v40, 17
1314; GCN-NEXT:    v_readlane_b32 s48, v40, 16
1315; GCN-NEXT:    v_readlane_b32 s47, v40, 15
1316; GCN-NEXT:    v_readlane_b32 s46, v40, 14
1317; GCN-NEXT:    v_readlane_b32 s45, v40, 13
1318; GCN-NEXT:    v_readlane_b32 s44, v40, 12
1319; GCN-NEXT:    v_readlane_b32 s43, v40, 11
1320; GCN-NEXT:    v_readlane_b32 s42, v40, 10
1321; GCN-NEXT:    v_readlane_b32 s41, v40, 9
1322; GCN-NEXT:    v_readlane_b32 s40, v40, 8
1323; GCN-NEXT:    v_readlane_b32 s39, v40, 7
1324; GCN-NEXT:    v_readlane_b32 s38, v40, 6
1325; GCN-NEXT:    v_readlane_b32 s37, v40, 5
1326; GCN-NEXT:    v_readlane_b32 s36, v40, 4
1327; GCN-NEXT:    v_readlane_b32 s35, v40, 3
1328; GCN-NEXT:    v_readlane_b32 s34, v40, 2
1329; GCN-NEXT:    v_readlane_b32 s31, v40, 1
1330; GCN-NEXT:    v_readlane_b32 s30, v40, 0
1331; GCN-NEXT:    s_mov_b32 s32, s33
1332; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1333; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1334; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1335; GCN-NEXT:    s_mov_b32 s33, s10
1336; GCN-NEXT:    s_waitcnt vmcnt(0)
1337; GCN-NEXT:    s_setpc_b64 s[30:31]
1338;
1339; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1340; GISEL:       ; %bb.0:
1341; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342; GISEL-NEXT:    s_mov_b32 s10, s33
1343; GISEL-NEXT:    s_mov_b32 s33, s32
1344; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1345; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1346; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1347; GISEL-NEXT:    s_addk_i32 s32, 0x400
1348; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
1349; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
1350; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
1351; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
1352; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
1353; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
1354; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
1355; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
1356; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
1357; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
1358; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
1359; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
1360; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
1361; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
1362; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
1363; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
1364; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
1365; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
1366; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
1367; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
1368; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
1369; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
1370; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
1371; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
1372; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
1373; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
1374; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
1375; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
1376; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
1377; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
1378; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
1379; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
1380; GISEL-NEXT:    s_mov_b64 s[4:5], exec
1381; GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1382; GISEL-NEXT:    v_readfirstlane_b32 s8, v1
1383; GISEL-NEXT:    v_readfirstlane_b32 s9, v2
1384; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1385; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
1386; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1387; GISEL-NEXT:    v_mov_b32_e32 v2, v0
1388; GISEL-NEXT:    ; implicit-def: $vgpr1
1389; GISEL-NEXT:    ; implicit-def: $vgpr0
1390; GISEL-NEXT:    s_xor_b64 exec, exec, s[6:7]
1391; GISEL-NEXT:    s_cbranch_execnz .LBB8_1
1392; GISEL-NEXT:  ; %bb.2:
1393; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1394; GISEL-NEXT:    v_mov_b32_e32 v0, v2
1395; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
1396; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
1397; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
1398; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
1399; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
1400; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
1401; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
1402; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
1403; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
1404; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
1405; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
1406; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
1407; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1408; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1409; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1410; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1411; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1412; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1413; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1414; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1415; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1416; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1417; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1418; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1419; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1420; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1421; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1422; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1423; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1424; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1425; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1426; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1427; GISEL-NEXT:    s_mov_b32 s32, s33
1428; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1429; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1430; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1431; GISEL-NEXT:    s_mov_b32 s33, s10
1432; GISEL-NEXT:    s_waitcnt vmcnt(0)
1433; GISEL-NEXT:    s_setpc_b64 s[30:31]
1434  %ret = call amdgpu_gfx i32 %fptr(i32 %i)
1435  ret i32 %ret
1436}
1437
1438; Calling a vgpr can never be a tail call.
1439define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
1440; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
1441; GCN:       ; %bb.0:
1442; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443; GCN-NEXT:    s_mov_b32 s10, s33
1444; GCN-NEXT:    s_mov_b32 s33, s32
1445; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1446; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1447; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1448; GCN-NEXT:    s_addk_i32 s32, 0x400
1449; GCN-NEXT:    v_writelane_b32 v40, s30, 0
1450; GCN-NEXT:    v_writelane_b32 v40, s31, 1
1451; GCN-NEXT:    v_writelane_b32 v40, s34, 2
1452; GCN-NEXT:    v_writelane_b32 v40, s35, 3
1453; GCN-NEXT:    v_writelane_b32 v40, s36, 4
1454; GCN-NEXT:    v_writelane_b32 v40, s37, 5
1455; GCN-NEXT:    v_writelane_b32 v40, s38, 6
1456; GCN-NEXT:    v_writelane_b32 v40, s39, 7
1457; GCN-NEXT:    v_writelane_b32 v40, s40, 8
1458; GCN-NEXT:    v_writelane_b32 v40, s41, 9
1459; GCN-NEXT:    v_writelane_b32 v40, s42, 10
1460; GCN-NEXT:    v_writelane_b32 v40, s43, 11
1461; GCN-NEXT:    v_writelane_b32 v40, s44, 12
1462; GCN-NEXT:    v_writelane_b32 v40, s45, 13
1463; GCN-NEXT:    v_writelane_b32 v40, s46, 14
1464; GCN-NEXT:    v_writelane_b32 v40, s47, 15
1465; GCN-NEXT:    v_writelane_b32 v40, s48, 16
1466; GCN-NEXT:    v_writelane_b32 v40, s49, 17
1467; GCN-NEXT:    v_writelane_b32 v40, s50, 18
1468; GCN-NEXT:    v_writelane_b32 v40, s51, 19
1469; GCN-NEXT:    v_writelane_b32 v40, s52, 20
1470; GCN-NEXT:    v_writelane_b32 v40, s53, 21
1471; GCN-NEXT:    v_writelane_b32 v40, s54, 22
1472; GCN-NEXT:    v_writelane_b32 v40, s55, 23
1473; GCN-NEXT:    v_writelane_b32 v40, s56, 24
1474; GCN-NEXT:    v_writelane_b32 v40, s57, 25
1475; GCN-NEXT:    v_writelane_b32 v40, s58, 26
1476; GCN-NEXT:    v_writelane_b32 v40, s59, 27
1477; GCN-NEXT:    v_writelane_b32 v40, s60, 28
1478; GCN-NEXT:    v_writelane_b32 v40, s61, 29
1479; GCN-NEXT:    v_writelane_b32 v40, s62, 30
1480; GCN-NEXT:    v_writelane_b32 v40, s63, 31
1481; GCN-NEXT:    s_mov_b64 s[4:5], exec
1482; GCN-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1483; GCN-NEXT:    v_readfirstlane_b32 s6, v0
1484; GCN-NEXT:    v_readfirstlane_b32 s7, v1
1485; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1486; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1487; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1488; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
1489; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1490; GCN-NEXT:    s_cbranch_execnz .LBB9_1
1491; GCN-NEXT:  ; %bb.2:
1492; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1493; GCN-NEXT:    v_readlane_b32 s63, v40, 31
1494; GCN-NEXT:    v_readlane_b32 s62, v40, 30
1495; GCN-NEXT:    v_readlane_b32 s61, v40, 29
1496; GCN-NEXT:    v_readlane_b32 s60, v40, 28
1497; GCN-NEXT:    v_readlane_b32 s59, v40, 27
1498; GCN-NEXT:    v_readlane_b32 s58, v40, 26
1499; GCN-NEXT:    v_readlane_b32 s57, v40, 25
1500; GCN-NEXT:    v_readlane_b32 s56, v40, 24
1501; GCN-NEXT:    v_readlane_b32 s55, v40, 23
1502; GCN-NEXT:    v_readlane_b32 s54, v40, 22
1503; GCN-NEXT:    v_readlane_b32 s53, v40, 21
1504; GCN-NEXT:    v_readlane_b32 s52, v40, 20
1505; GCN-NEXT:    v_readlane_b32 s51, v40, 19
1506; GCN-NEXT:    v_readlane_b32 s50, v40, 18
1507; GCN-NEXT:    v_readlane_b32 s49, v40, 17
1508; GCN-NEXT:    v_readlane_b32 s48, v40, 16
1509; GCN-NEXT:    v_readlane_b32 s47, v40, 15
1510; GCN-NEXT:    v_readlane_b32 s46, v40, 14
1511; GCN-NEXT:    v_readlane_b32 s45, v40, 13
1512; GCN-NEXT:    v_readlane_b32 s44, v40, 12
1513; GCN-NEXT:    v_readlane_b32 s43, v40, 11
1514; GCN-NEXT:    v_readlane_b32 s42, v40, 10
1515; GCN-NEXT:    v_readlane_b32 s41, v40, 9
1516; GCN-NEXT:    v_readlane_b32 s40, v40, 8
1517; GCN-NEXT:    v_readlane_b32 s39, v40, 7
1518; GCN-NEXT:    v_readlane_b32 s38, v40, 6
1519; GCN-NEXT:    v_readlane_b32 s37, v40, 5
1520; GCN-NEXT:    v_readlane_b32 s36, v40, 4
1521; GCN-NEXT:    v_readlane_b32 s35, v40, 3
1522; GCN-NEXT:    v_readlane_b32 s34, v40, 2
1523; GCN-NEXT:    v_readlane_b32 s31, v40, 1
1524; GCN-NEXT:    v_readlane_b32 s30, v40, 0
1525; GCN-NEXT:    s_mov_b32 s32, s33
1526; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1527; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1528; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1529; GCN-NEXT:    s_mov_b32 s33, s10
1530; GCN-NEXT:    s_waitcnt vmcnt(0)
1531; GCN-NEXT:    s_setpc_b64 s[30:31]
1532;
1533; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
1534; GISEL:       ; %bb.0:
1535; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1536; GISEL-NEXT:    s_mov_b32 s10, s33
1537; GISEL-NEXT:    s_mov_b32 s33, s32
1538; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1539; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1540; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1541; GISEL-NEXT:    s_addk_i32 s32, 0x400
1542; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
1543; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
1544; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
1545; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
1546; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
1547; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
1548; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
1549; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
1550; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
1551; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
1552; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
1553; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
1554; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
1555; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
1556; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
1557; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
1558; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
1559; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
1560; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
1561; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
1562; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
1563; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
1564; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
1565; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
1566; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
1567; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
1568; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
1569; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
1570; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
1571; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
1572; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
1573; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
1574; GISEL-NEXT:    s_mov_b64 s[4:5], exec
1575; GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1576; GISEL-NEXT:    v_readfirstlane_b32 s6, v0
1577; GISEL-NEXT:    v_readfirstlane_b32 s7, v1
1578; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1579; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1580; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1581; GISEL-NEXT:    ; implicit-def: $vgpr0
1582; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
1583; GISEL-NEXT:    s_cbranch_execnz .LBB9_1
1584; GISEL-NEXT:  ; %bb.2:
1585; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1586; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
1587; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
1588; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
1589; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
1590; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
1591; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
1592; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
1593; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
1594; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
1595; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
1596; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
1597; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
1598; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1599; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1600; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1601; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1602; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1603; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1604; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1605; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1606; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1607; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1608; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1609; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1610; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1611; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1612; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1613; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1614; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1615; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1616; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1617; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1618; GISEL-NEXT:    s_mov_b32 s32, s33
1619; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1620; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1621; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1622; GISEL-NEXT:    s_mov_b32 s33, s10
1623; GISEL-NEXT:    s_waitcnt vmcnt(0)
1624; GISEL-NEXT:    s_setpc_b64 s[30:31]
1625  tail call amdgpu_gfx void %fptr()
1626  ret void
1627}
1628
1629!llvm.module.flags = !{!0}
1630!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
1631