xref: /llvm-project/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O0 -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; FIXME: we should disable sdwa peephole because dead-code elimination, that
5; runs after peephole, ruins this test (different register numbers)
6
7; Spill all SGPRs so multiple VGPRs are required for spilling all of them.
8
9; Ideally we only need 2 VGPRs for all spilling. The VGPRs are
10; allocated per-frame index, so it's possible to get up with more.
11define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) #0 {
12; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
13; GCN:       ; %bb.0:
14; GCN-NEXT:    s_mov_b32 s92, SCRATCH_RSRC_DWORD0
15; GCN-NEXT:    s_mov_b32 s93, SCRATCH_RSRC_DWORD1
16; GCN-NEXT:    s_mov_b32 s94, -1
17; GCN-NEXT:    s_mov_b32 s95, 0xe8f000
18; GCN-NEXT:    s_add_u32 s92, s92, s11
19; GCN-NEXT:    s_addc_u32 s93, s93, 0
20; GCN-NEXT:    s_load_dword s0, s[4:5], 0xb
21; GCN-NEXT:    ;;#ASMSTART
22; GCN-NEXT:    ; def s[4:11]
23; GCN-NEXT:    ;;#ASMEND
24; GCN-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
25; GCN-NEXT:    v_writelane_b32 v2, s4, 0
26; GCN-NEXT:    v_writelane_b32 v2, s5, 1
27; GCN-NEXT:    v_writelane_b32 v2, s6, 2
28; GCN-NEXT:    v_writelane_b32 v2, s7, 3
29; GCN-NEXT:    v_writelane_b32 v2, s8, 4
30; GCN-NEXT:    v_writelane_b32 v2, s9, 5
31; GCN-NEXT:    v_writelane_b32 v2, s10, 6
32; GCN-NEXT:    v_writelane_b32 v2, s11, 7
33; GCN-NEXT:    ;;#ASMSTART
34; GCN-NEXT:    ; def s[4:11]
35; GCN-NEXT:    ;;#ASMEND
36; GCN-NEXT:    v_writelane_b32 v2, s4, 8
37; GCN-NEXT:    v_writelane_b32 v2, s5, 9
38; GCN-NEXT:    v_writelane_b32 v2, s6, 10
39; GCN-NEXT:    v_writelane_b32 v2, s7, 11
40; GCN-NEXT:    v_writelane_b32 v2, s8, 12
41; GCN-NEXT:    v_writelane_b32 v2, s9, 13
42; GCN-NEXT:    v_writelane_b32 v2, s10, 14
43; GCN-NEXT:    v_writelane_b32 v2, s11, 15
44; GCN-NEXT:    ;;#ASMSTART
45; GCN-NEXT:    ; def s[4:11]
46; GCN-NEXT:    ;;#ASMEND
47; GCN-NEXT:    v_writelane_b32 v2, s4, 16
48; GCN-NEXT:    v_writelane_b32 v2, s5, 17
49; GCN-NEXT:    v_writelane_b32 v2, s6, 18
50; GCN-NEXT:    v_writelane_b32 v2, s7, 19
51; GCN-NEXT:    v_writelane_b32 v2, s8, 20
52; GCN-NEXT:    v_writelane_b32 v2, s9, 21
53; GCN-NEXT:    v_writelane_b32 v2, s10, 22
54; GCN-NEXT:    v_writelane_b32 v2, s11, 23
55; GCN-NEXT:    ;;#ASMSTART
56; GCN-NEXT:    ; def s[4:11]
57; GCN-NEXT:    ;;#ASMEND
58; GCN-NEXT:    v_writelane_b32 v2, s4, 24
59; GCN-NEXT:    v_writelane_b32 v2, s5, 25
60; GCN-NEXT:    v_writelane_b32 v2, s6, 26
61; GCN-NEXT:    v_writelane_b32 v2, s7, 27
62; GCN-NEXT:    v_writelane_b32 v2, s8, 28
63; GCN-NEXT:    v_writelane_b32 v2, s9, 29
64; GCN-NEXT:    v_writelane_b32 v2, s10, 30
65; GCN-NEXT:    v_writelane_b32 v2, s11, 31
66; GCN-NEXT:    ;;#ASMSTART
67; GCN-NEXT:    ; def s[4:11]
68; GCN-NEXT:    ;;#ASMEND
69; GCN-NEXT:    v_writelane_b32 v2, s4, 32
70; GCN-NEXT:    v_writelane_b32 v2, s5, 33
71; GCN-NEXT:    v_writelane_b32 v2, s6, 34
72; GCN-NEXT:    v_writelane_b32 v2, s7, 35
73; GCN-NEXT:    v_writelane_b32 v2, s8, 36
74; GCN-NEXT:    v_writelane_b32 v2, s9, 37
75; GCN-NEXT:    v_writelane_b32 v2, s10, 38
76; GCN-NEXT:    v_writelane_b32 v2, s11, 39
77; GCN-NEXT:    ;;#ASMSTART
78; GCN-NEXT:    ; def s[4:11]
79; GCN-NEXT:    ;;#ASMEND
80; GCN-NEXT:    v_writelane_b32 v2, s4, 40
81; GCN-NEXT:    v_writelane_b32 v2, s5, 41
82; GCN-NEXT:    v_writelane_b32 v2, s6, 42
83; GCN-NEXT:    v_writelane_b32 v2, s7, 43
84; GCN-NEXT:    v_writelane_b32 v2, s8, 44
85; GCN-NEXT:    v_writelane_b32 v2, s9, 45
86; GCN-NEXT:    v_writelane_b32 v2, s10, 46
87; GCN-NEXT:    v_writelane_b32 v2, s11, 47
88; GCN-NEXT:    ;;#ASMSTART
89; GCN-NEXT:    ; def s[4:11]
90; GCN-NEXT:    ;;#ASMEND
91; GCN-NEXT:    v_writelane_b32 v2, s4, 48
92; GCN-NEXT:    v_writelane_b32 v2, s5, 49
93; GCN-NEXT:    v_writelane_b32 v2, s6, 50
94; GCN-NEXT:    v_writelane_b32 v2, s7, 51
95; GCN-NEXT:    v_writelane_b32 v2, s8, 52
96; GCN-NEXT:    v_writelane_b32 v2, s9, 53
97; GCN-NEXT:    v_writelane_b32 v2, s10, 54
98; GCN-NEXT:    v_writelane_b32 v2, s11, 55
99; GCN-NEXT:    ;;#ASMSTART
100; GCN-NEXT:    ; def s[4:11]
101; GCN-NEXT:    ;;#ASMEND
102; GCN-NEXT:    v_writelane_b32 v2, s4, 56
103; GCN-NEXT:    v_writelane_b32 v2, s5, 57
104; GCN-NEXT:    v_writelane_b32 v2, s6, 58
105; GCN-NEXT:    v_writelane_b32 v2, s7, 59
106; GCN-NEXT:    v_writelane_b32 v2, s8, 60
107; GCN-NEXT:    v_writelane_b32 v2, s9, 61
108; GCN-NEXT:    v_writelane_b32 v2, s10, 62
109; GCN-NEXT:    v_writelane_b32 v2, s11, 63
110; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
111; GCN-NEXT:    buffer_store_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Spill
112; GCN-NEXT:    s_mov_b64 exec, s[34:35]
113; GCN-NEXT:    ;;#ASMSTART
114; GCN-NEXT:    ; def s[4:11]
115; GCN-NEXT:    ;;#ASMEND
116; GCN-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
117; GCN-NEXT:    v_writelane_b32 v2, s4, 0
118; GCN-NEXT:    v_writelane_b32 v2, s5, 1
119; GCN-NEXT:    v_writelane_b32 v2, s6, 2
120; GCN-NEXT:    v_writelane_b32 v2, s7, 3
121; GCN-NEXT:    v_writelane_b32 v2, s8, 4
122; GCN-NEXT:    v_writelane_b32 v2, s9, 5
123; GCN-NEXT:    v_writelane_b32 v2, s10, 6
124; GCN-NEXT:    v_writelane_b32 v2, s11, 7
125; GCN-NEXT:    ;;#ASMSTART
126; GCN-NEXT:    ; def s[4:11]
127; GCN-NEXT:    ;;#ASMEND
128; GCN-NEXT:    v_writelane_b32 v2, s4, 8
129; GCN-NEXT:    v_writelane_b32 v2, s5, 9
130; GCN-NEXT:    v_writelane_b32 v2, s6, 10
131; GCN-NEXT:    v_writelane_b32 v2, s7, 11
132; GCN-NEXT:    v_writelane_b32 v2, s8, 12
133; GCN-NEXT:    v_writelane_b32 v2, s9, 13
134; GCN-NEXT:    v_writelane_b32 v2, s10, 14
135; GCN-NEXT:    v_writelane_b32 v2, s11, 15
136; GCN-NEXT:    ;;#ASMSTART
137; GCN-NEXT:    ; def s[4:11]
138; GCN-NEXT:    ;;#ASMEND
139; GCN-NEXT:    v_writelane_b32 v2, s4, 16
140; GCN-NEXT:    v_writelane_b32 v2, s5, 17
141; GCN-NEXT:    v_writelane_b32 v2, s6, 18
142; GCN-NEXT:    v_writelane_b32 v2, s7, 19
143; GCN-NEXT:    v_writelane_b32 v2, s8, 20
144; GCN-NEXT:    v_writelane_b32 v2, s9, 21
145; GCN-NEXT:    v_writelane_b32 v2, s10, 22
146; GCN-NEXT:    v_writelane_b32 v2, s11, 23
147; GCN-NEXT:    ;;#ASMSTART
148; GCN-NEXT:    ; def s[4:11]
149; GCN-NEXT:    ;;#ASMEND
150; GCN-NEXT:    v_writelane_b32 v2, s4, 24
151; GCN-NEXT:    v_writelane_b32 v2, s5, 25
152; GCN-NEXT:    v_writelane_b32 v2, s6, 26
153; GCN-NEXT:    v_writelane_b32 v2, s7, 27
154; GCN-NEXT:    v_writelane_b32 v2, s8, 28
155; GCN-NEXT:    v_writelane_b32 v2, s9, 29
156; GCN-NEXT:    v_writelane_b32 v2, s10, 30
157; GCN-NEXT:    v_writelane_b32 v2, s11, 31
158; GCN-NEXT:    ;;#ASMSTART
159; GCN-NEXT:    ; def s[4:11]
160; GCN-NEXT:    ;;#ASMEND
161; GCN-NEXT:    v_writelane_b32 v2, s4, 32
162; GCN-NEXT:    v_writelane_b32 v2, s5, 33
163; GCN-NEXT:    v_writelane_b32 v2, s6, 34
164; GCN-NEXT:    v_writelane_b32 v2, s7, 35
165; GCN-NEXT:    v_writelane_b32 v2, s8, 36
166; GCN-NEXT:    v_writelane_b32 v2, s9, 37
167; GCN-NEXT:    v_writelane_b32 v2, s10, 38
168; GCN-NEXT:    v_writelane_b32 v2, s11, 39
169; GCN-NEXT:    ;;#ASMSTART
170; GCN-NEXT:    ; def s[4:11]
171; GCN-NEXT:    ;;#ASMEND
172; GCN-NEXT:    v_writelane_b32 v2, s4, 40
173; GCN-NEXT:    v_writelane_b32 v2, s5, 41
174; GCN-NEXT:    v_writelane_b32 v2, s6, 42
175; GCN-NEXT:    v_writelane_b32 v2, s7, 43
176; GCN-NEXT:    v_writelane_b32 v2, s8, 44
177; GCN-NEXT:    v_writelane_b32 v2, s9, 45
178; GCN-NEXT:    v_writelane_b32 v2, s10, 46
179; GCN-NEXT:    v_writelane_b32 v2, s11, 47
180; GCN-NEXT:    ;;#ASMSTART
181; GCN-NEXT:    ; def s[4:11]
182; GCN-NEXT:    ;;#ASMEND
183; GCN-NEXT:    v_writelane_b32 v2, s4, 48
184; GCN-NEXT:    v_writelane_b32 v2, s5, 49
185; GCN-NEXT:    v_writelane_b32 v2, s6, 50
186; GCN-NEXT:    v_writelane_b32 v2, s7, 51
187; GCN-NEXT:    v_writelane_b32 v2, s8, 52
188; GCN-NEXT:    v_writelane_b32 v2, s9, 53
189; GCN-NEXT:    v_writelane_b32 v2, s10, 54
190; GCN-NEXT:    v_writelane_b32 v2, s11, 55
191; GCN-NEXT:    ;;#ASMSTART
192; GCN-NEXT:    ; def s[4:11]
193; GCN-NEXT:    ;;#ASMEND
194; GCN-NEXT:    v_writelane_b32 v2, s4, 56
195; GCN-NEXT:    v_writelane_b32 v2, s5, 57
196; GCN-NEXT:    v_writelane_b32 v2, s6, 58
197; GCN-NEXT:    v_writelane_b32 v2, s7, 59
198; GCN-NEXT:    v_writelane_b32 v2, s8, 60
199; GCN-NEXT:    v_writelane_b32 v2, s9, 61
200; GCN-NEXT:    v_writelane_b32 v2, s10, 62
201; GCN-NEXT:    v_writelane_b32 v2, s11, 63
202; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
203; GCN-NEXT:    buffer_store_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill
204; GCN-NEXT:    s_mov_b64 exec, s[34:35]
205; GCN-NEXT:    ;;#ASMSTART
206; GCN-NEXT:    ; def s[4:11]
207; GCN-NEXT:    ;;#ASMEND
208; GCN-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
209; GCN-NEXT:    v_writelane_b32 v2, s4, 0
210; GCN-NEXT:    v_writelane_b32 v2, s5, 1
211; GCN-NEXT:    v_writelane_b32 v2, s6, 2
212; GCN-NEXT:    v_writelane_b32 v2, s7, 3
213; GCN-NEXT:    v_writelane_b32 v2, s8, 4
214; GCN-NEXT:    v_writelane_b32 v2, s9, 5
215; GCN-NEXT:    v_writelane_b32 v2, s10, 6
216; GCN-NEXT:    v_writelane_b32 v2, s11, 7
217; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
218; GCN-NEXT:    buffer_store_dword v2, off, s[92:95], 0 ; 4-byte Folded Spill
219; GCN-NEXT:    s_mov_b64 exec, s[34:35]
220; GCN-NEXT:    s_mov_b32 s1, 0
221; GCN-NEXT:    s_waitcnt lgkmcnt(0)
222; GCN-NEXT:    s_cmp_lg_u32 s0, s1
223; GCN-NEXT:    s_cbranch_scc1 .LBB0_2
224; GCN-NEXT:  ; %bb.1: ; %bb0
225; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
226; GCN-NEXT:    buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
227; GCN-NEXT:    s_mov_b64 exec, s[34:35]
228; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
229; GCN-NEXT:    buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
230; GCN-NEXT:    s_mov_b64 exec, s[34:35]
231; GCN-NEXT:    s_waitcnt vmcnt(1)
232; GCN-NEXT:    v_readlane_b32 s8, v0, 56
233; GCN-NEXT:    v_readlane_b32 s9, v0, 57
234; GCN-NEXT:    v_readlane_b32 s10, v0, 58
235; GCN-NEXT:    v_readlane_b32 s11, v0, 59
236; GCN-NEXT:    v_readlane_b32 s12, v0, 60
237; GCN-NEXT:    v_readlane_b32 s13, v0, 61
238; GCN-NEXT:    v_readlane_b32 s14, v0, 62
239; GCN-NEXT:    v_readlane_b32 s15, v0, 63
240; GCN-NEXT:    v_readlane_b32 s16, v0, 48
241; GCN-NEXT:    v_readlane_b32 s17, v0, 49
242; GCN-NEXT:    v_readlane_b32 s18, v0, 50
243; GCN-NEXT:    v_readlane_b32 s19, v0, 51
244; GCN-NEXT:    v_readlane_b32 s20, v0, 52
245; GCN-NEXT:    v_readlane_b32 s21, v0, 53
246; GCN-NEXT:    v_readlane_b32 s22, v0, 54
247; GCN-NEXT:    v_readlane_b32 s23, v0, 55
248; GCN-NEXT:    v_readlane_b32 s24, v0, 40
249; GCN-NEXT:    v_readlane_b32 s25, v0, 41
250; GCN-NEXT:    v_readlane_b32 s26, v0, 42
251; GCN-NEXT:    v_readlane_b32 s27, v0, 43
252; GCN-NEXT:    v_readlane_b32 s28, v0, 44
253; GCN-NEXT:    v_readlane_b32 s29, v0, 45
254; GCN-NEXT:    v_readlane_b32 s30, v0, 46
255; GCN-NEXT:    v_readlane_b32 s31, v0, 47
256; GCN-NEXT:    v_readlane_b32 s36, v0, 32
257; GCN-NEXT:    v_readlane_b32 s37, v0, 33
258; GCN-NEXT:    v_readlane_b32 s38, v0, 34
259; GCN-NEXT:    v_readlane_b32 s39, v0, 35
260; GCN-NEXT:    v_readlane_b32 s40, v0, 36
261; GCN-NEXT:    v_readlane_b32 s41, v0, 37
262; GCN-NEXT:    v_readlane_b32 s42, v0, 38
263; GCN-NEXT:    v_readlane_b32 s43, v0, 39
264; GCN-NEXT:    v_readlane_b32 s44, v0, 24
265; GCN-NEXT:    v_readlane_b32 s45, v0, 25
266; GCN-NEXT:    v_readlane_b32 s46, v0, 26
267; GCN-NEXT:    v_readlane_b32 s47, v0, 27
268; GCN-NEXT:    v_readlane_b32 s48, v0, 28
269; GCN-NEXT:    v_readlane_b32 s49, v0, 29
270; GCN-NEXT:    v_readlane_b32 s50, v0, 30
271; GCN-NEXT:    v_readlane_b32 s51, v0, 31
272; GCN-NEXT:    v_readlane_b32 s52, v0, 16
273; GCN-NEXT:    v_readlane_b32 s53, v0, 17
274; GCN-NEXT:    v_readlane_b32 s54, v0, 18
275; GCN-NEXT:    v_readlane_b32 s55, v0, 19
276; GCN-NEXT:    v_readlane_b32 s56, v0, 20
277; GCN-NEXT:    v_readlane_b32 s57, v0, 21
278; GCN-NEXT:    v_readlane_b32 s58, v0, 22
279; GCN-NEXT:    v_readlane_b32 s59, v0, 23
280; GCN-NEXT:    v_readlane_b32 s60, v0, 8
281; GCN-NEXT:    v_readlane_b32 s61, v0, 9
282; GCN-NEXT:    v_readlane_b32 s62, v0, 10
283; GCN-NEXT:    v_readlane_b32 s63, v0, 11
284; GCN-NEXT:    v_readlane_b32 s64, v0, 12
285; GCN-NEXT:    v_readlane_b32 s65, v0, 13
286; GCN-NEXT:    v_readlane_b32 s66, v0, 14
287; GCN-NEXT:    v_readlane_b32 s67, v0, 15
288; GCN-NEXT:    v_readlane_b32 s68, v0, 0
289; GCN-NEXT:    v_readlane_b32 s69, v0, 1
290; GCN-NEXT:    v_readlane_b32 s70, v0, 2
291; GCN-NEXT:    v_readlane_b32 s71, v0, 3
292; GCN-NEXT:    v_readlane_b32 s72, v0, 4
293; GCN-NEXT:    v_readlane_b32 s73, v0, 5
294; GCN-NEXT:    v_readlane_b32 s74, v0, 6
295; GCN-NEXT:    v_readlane_b32 s75, v0, 7
296; GCN-NEXT:    s_waitcnt vmcnt(0)
297; GCN-NEXT:    v_readlane_b32 s76, v1, 56
298; GCN-NEXT:    v_readlane_b32 s77, v1, 57
299; GCN-NEXT:    v_readlane_b32 s78, v1, 58
300; GCN-NEXT:    v_readlane_b32 s79, v1, 59
301; GCN-NEXT:    v_readlane_b32 s80, v1, 60
302; GCN-NEXT:    v_readlane_b32 s81, v1, 61
303; GCN-NEXT:    v_readlane_b32 s82, v1, 62
304; GCN-NEXT:    v_readlane_b32 s83, v1, 63
305; GCN-NEXT:    v_readlane_b32 s84, v1, 48
306; GCN-NEXT:    v_readlane_b32 s85, v1, 49
307; GCN-NEXT:    v_readlane_b32 s86, v1, 50
308; GCN-NEXT:    v_readlane_b32 s87, v1, 51
309; GCN-NEXT:    v_readlane_b32 s88, v1, 52
310; GCN-NEXT:    v_readlane_b32 s89, v1, 53
311; GCN-NEXT:    v_readlane_b32 s90, v1, 54
312; GCN-NEXT:    v_readlane_b32 s91, v1, 55
313; GCN-NEXT:    v_readlane_b32 s0, v1, 0
314; GCN-NEXT:    v_readlane_b32 s1, v1, 1
315; GCN-NEXT:    v_readlane_b32 s2, v1, 2
316; GCN-NEXT:    v_readlane_b32 s3, v1, 3
317; GCN-NEXT:    v_readlane_b32 s4, v1, 4
318; GCN-NEXT:    v_readlane_b32 s5, v1, 5
319; GCN-NEXT:    v_readlane_b32 s6, v1, 6
320; GCN-NEXT:    v_readlane_b32 s7, v1, 7
321; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
322; GCN-NEXT:    buffer_load_dword v2, off, s[92:95], 0 ; 4-byte Folded Reload
323; GCN-NEXT:    s_mov_b64 exec, s[34:35]
324; GCN-NEXT:    ;;#ASMSTART
325; GCN-NEXT:    ; use s[0:7]
326; GCN-NEXT:    ;;#ASMEND
327; GCN-NEXT:    v_readlane_b32 s0, v1, 8
328; GCN-NEXT:    v_readlane_b32 s1, v1, 9
329; GCN-NEXT:    v_readlane_b32 s2, v1, 10
330; GCN-NEXT:    v_readlane_b32 s3, v1, 11
331; GCN-NEXT:    v_readlane_b32 s4, v1, 12
332; GCN-NEXT:    v_readlane_b32 s5, v1, 13
333; GCN-NEXT:    v_readlane_b32 s6, v1, 14
334; GCN-NEXT:    v_readlane_b32 s7, v1, 15
335; GCN-NEXT:    ;;#ASMSTART
336; GCN-NEXT:    ; use s[0:7]
337; GCN-NEXT:    ;;#ASMEND
338; GCN-NEXT:    v_readlane_b32 s0, v1, 16
339; GCN-NEXT:    v_readlane_b32 s1, v1, 17
340; GCN-NEXT:    v_readlane_b32 s2, v1, 18
341; GCN-NEXT:    v_readlane_b32 s3, v1, 19
342; GCN-NEXT:    v_readlane_b32 s4, v1, 20
343; GCN-NEXT:    v_readlane_b32 s5, v1, 21
344; GCN-NEXT:    v_readlane_b32 s6, v1, 22
345; GCN-NEXT:    v_readlane_b32 s7, v1, 23
346; GCN-NEXT:    ;;#ASMSTART
347; GCN-NEXT:    ; use s[0:7]
348; GCN-NEXT:    ;;#ASMEND
349; GCN-NEXT:    v_readlane_b32 s0, v1, 24
350; GCN-NEXT:    v_readlane_b32 s1, v1, 25
351; GCN-NEXT:    v_readlane_b32 s2, v1, 26
352; GCN-NEXT:    v_readlane_b32 s3, v1, 27
353; GCN-NEXT:    v_readlane_b32 s4, v1, 28
354; GCN-NEXT:    v_readlane_b32 s5, v1, 29
355; GCN-NEXT:    v_readlane_b32 s6, v1, 30
356; GCN-NEXT:    v_readlane_b32 s7, v1, 31
357; GCN-NEXT:    ;;#ASMSTART
358; GCN-NEXT:    ; use s[0:7]
359; GCN-NEXT:    ;;#ASMEND
360; GCN-NEXT:    v_readlane_b32 s0, v1, 32
361; GCN-NEXT:    v_readlane_b32 s1, v1, 33
362; GCN-NEXT:    v_readlane_b32 s2, v1, 34
363; GCN-NEXT:    v_readlane_b32 s3, v1, 35
364; GCN-NEXT:    v_readlane_b32 s4, v1, 36
365; GCN-NEXT:    v_readlane_b32 s5, v1, 37
366; GCN-NEXT:    v_readlane_b32 s6, v1, 38
367; GCN-NEXT:    v_readlane_b32 s7, v1, 39
368; GCN-NEXT:    ;;#ASMSTART
369; GCN-NEXT:    ; use s[0:7]
370; GCN-NEXT:    ;;#ASMEND
371; GCN-NEXT:    v_readlane_b32 s0, v1, 40
372; GCN-NEXT:    v_readlane_b32 s1, v1, 41
373; GCN-NEXT:    v_readlane_b32 s2, v1, 42
374; GCN-NEXT:    v_readlane_b32 s3, v1, 43
375; GCN-NEXT:    v_readlane_b32 s4, v1, 44
376; GCN-NEXT:    v_readlane_b32 s5, v1, 45
377; GCN-NEXT:    v_readlane_b32 s6, v1, 46
378; GCN-NEXT:    v_readlane_b32 s7, v1, 47
379; GCN-NEXT:    ;;#ASMSTART
380; GCN-NEXT:    ; use s[0:7]
381; GCN-NEXT:    ;;#ASMEND
382; GCN-NEXT:    s_waitcnt vmcnt(0)
383; GCN-NEXT:    v_readlane_b32 s0, v2, 0
384; GCN-NEXT:    v_readlane_b32 s1, v2, 1
385; GCN-NEXT:    v_readlane_b32 s2, v2, 2
386; GCN-NEXT:    v_readlane_b32 s3, v2, 3
387; GCN-NEXT:    v_readlane_b32 s4, v2, 4
388; GCN-NEXT:    v_readlane_b32 s5, v2, 5
389; GCN-NEXT:    v_readlane_b32 s6, v2, 6
390; GCN-NEXT:    v_readlane_b32 s7, v2, 7
391; GCN-NEXT:    ;;#ASMSTART
392; GCN-NEXT:    ; use s[84:91]
393; GCN-NEXT:    ;;#ASMEND
394; GCN-NEXT:    ;;#ASMSTART
395; GCN-NEXT:    ; use s[76:83]
396; GCN-NEXT:    ;;#ASMEND
397; GCN-NEXT:    ;;#ASMSTART
398; GCN-NEXT:    ; use s[68:75]
399; GCN-NEXT:    ;;#ASMEND
400; GCN-NEXT:    ;;#ASMSTART
401; GCN-NEXT:    ; use s[60:67]
402; GCN-NEXT:    ;;#ASMEND
403; GCN-NEXT:    ;;#ASMSTART
404; GCN-NEXT:    ; use s[52:59]
405; GCN-NEXT:    ;;#ASMEND
406; GCN-NEXT:    ;;#ASMSTART
407; GCN-NEXT:    ; use s[44:51]
408; GCN-NEXT:    ;;#ASMEND
409; GCN-NEXT:    ;;#ASMSTART
410; GCN-NEXT:    ; use s[36:43]
411; GCN-NEXT:    ;;#ASMEND
412; GCN-NEXT:    ;;#ASMSTART
413; GCN-NEXT:    ; use s[24:31]
414; GCN-NEXT:    ;;#ASMEND
415; GCN-NEXT:    ;;#ASMSTART
416; GCN-NEXT:    ; use s[16:23]
417; GCN-NEXT:    ;;#ASMEND
418; GCN-NEXT:    ;;#ASMSTART
419; GCN-NEXT:    ; use s[8:15]
420; GCN-NEXT:    ;;#ASMEND
421; GCN-NEXT:    ;;#ASMSTART
422; GCN-NEXT:    ; use s[0:7]
423; GCN-NEXT:    ;;#ASMEND
424; GCN-NEXT:  .LBB0_2: ; %ret
425; GCN-NEXT:    s_endpgm
426  %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
427  %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
428  %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
429  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
430  %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
431  %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
432  %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
433  %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
434  %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
435  %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
436  %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
437  %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
438  %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
439  %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
440  %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
441  %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
442  %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
443  %cmp = icmp eq i32 %in, 0
444  br i1 %cmp, label %bb0, label %ret
445
446bb0:
447  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
448  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0
449  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0
450  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
451  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0
452  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0
453  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0
454  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0
455  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0
456  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0
457  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0
458  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0
459  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0
460  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0
461  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0
462  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0
463  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0
464  br label %ret
465
466ret:
467  ret void
468}
469
470; Some of the lanes of an SGPR spill are in one VGPR and some forced
471; into the next available VGPR.
472define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) #1 {
473; GCN-LABEL: split_sgpr_spill_2_vgprs:
474; GCN:       ; %bb.0:
475; GCN-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
476; GCN-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
477; GCN-NEXT:    s_mov_b32 s54, -1
478; GCN-NEXT:    s_mov_b32 s55, 0xe8f000
479; GCN-NEXT:    s_add_u32 s52, s52, s11
480; GCN-NEXT:    s_addc_u32 s53, s53, 0
481; GCN-NEXT:    s_load_dword s0, s[4:5], 0xb
482; GCN-NEXT:    ;;#ASMSTART
483; GCN-NEXT:    ; def s[4:19]
484; GCN-NEXT:    ;;#ASMEND
485; GCN-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
486; GCN-NEXT:    v_writelane_b32 v1, s4, 0
487; GCN-NEXT:    v_writelane_b32 v1, s5, 1
488; GCN-NEXT:    v_writelane_b32 v1, s6, 2
489; GCN-NEXT:    v_writelane_b32 v1, s7, 3
490; GCN-NEXT:    v_writelane_b32 v1, s8, 4
491; GCN-NEXT:    v_writelane_b32 v1, s9, 5
492; GCN-NEXT:    v_writelane_b32 v1, s10, 6
493; GCN-NEXT:    v_writelane_b32 v1, s11, 7
494; GCN-NEXT:    v_writelane_b32 v1, s12, 8
495; GCN-NEXT:    v_writelane_b32 v1, s13, 9
496; GCN-NEXT:    v_writelane_b32 v1, s14, 10
497; GCN-NEXT:    v_writelane_b32 v1, s15, 11
498; GCN-NEXT:    v_writelane_b32 v1, s16, 12
499; GCN-NEXT:    v_writelane_b32 v1, s17, 13
500; GCN-NEXT:    v_writelane_b32 v1, s18, 14
501; GCN-NEXT:    v_writelane_b32 v1, s19, 15
502; GCN-NEXT:    ;;#ASMSTART
503; GCN-NEXT:    ; def s[4:19]
504; GCN-NEXT:    ;;#ASMEND
505; GCN-NEXT:    v_writelane_b32 v1, s4, 16
506; GCN-NEXT:    v_writelane_b32 v1, s5, 17
507; GCN-NEXT:    v_writelane_b32 v1, s6, 18
508; GCN-NEXT:    v_writelane_b32 v1, s7, 19
509; GCN-NEXT:    v_writelane_b32 v1, s8, 20
510; GCN-NEXT:    v_writelane_b32 v1, s9, 21
511; GCN-NEXT:    v_writelane_b32 v1, s10, 22
512; GCN-NEXT:    v_writelane_b32 v1, s11, 23
513; GCN-NEXT:    v_writelane_b32 v1, s12, 24
514; GCN-NEXT:    v_writelane_b32 v1, s13, 25
515; GCN-NEXT:    v_writelane_b32 v1, s14, 26
516; GCN-NEXT:    v_writelane_b32 v1, s15, 27
517; GCN-NEXT:    v_writelane_b32 v1, s16, 28
518; GCN-NEXT:    v_writelane_b32 v1, s17, 29
519; GCN-NEXT:    v_writelane_b32 v1, s18, 30
520; GCN-NEXT:    v_writelane_b32 v1, s19, 31
521; GCN-NEXT:    ;;#ASMSTART
522; GCN-NEXT:    ; def s[4:19]
523; GCN-NEXT:    ;;#ASMEND
524; GCN-NEXT:    v_writelane_b32 v1, s4, 32
525; GCN-NEXT:    v_writelane_b32 v1, s5, 33
526; GCN-NEXT:    v_writelane_b32 v1, s6, 34
527; GCN-NEXT:    v_writelane_b32 v1, s7, 35
528; GCN-NEXT:    v_writelane_b32 v1, s8, 36
529; GCN-NEXT:    v_writelane_b32 v1, s9, 37
530; GCN-NEXT:    v_writelane_b32 v1, s10, 38
531; GCN-NEXT:    v_writelane_b32 v1, s11, 39
532; GCN-NEXT:    v_writelane_b32 v1, s12, 40
533; GCN-NEXT:    v_writelane_b32 v1, s13, 41
534; GCN-NEXT:    v_writelane_b32 v1, s14, 42
535; GCN-NEXT:    v_writelane_b32 v1, s15, 43
536; GCN-NEXT:    v_writelane_b32 v1, s16, 44
537; GCN-NEXT:    v_writelane_b32 v1, s17, 45
538; GCN-NEXT:    v_writelane_b32 v1, s18, 46
539; GCN-NEXT:    v_writelane_b32 v1, s19, 47
540; GCN-NEXT:    ;;#ASMSTART
541; GCN-NEXT:    ; def s[4:19]
542; GCN-NEXT:    ;;#ASMEND
543; GCN-NEXT:    v_writelane_b32 v1, s4, 48
544; GCN-NEXT:    v_writelane_b32 v1, s5, 49
545; GCN-NEXT:    v_writelane_b32 v1, s6, 50
546; GCN-NEXT:    v_writelane_b32 v1, s7, 51
547; GCN-NEXT:    v_writelane_b32 v1, s8, 52
548; GCN-NEXT:    v_writelane_b32 v1, s9, 53
549; GCN-NEXT:    v_writelane_b32 v1, s10, 54
550; GCN-NEXT:    v_writelane_b32 v1, s11, 55
551; GCN-NEXT:    v_writelane_b32 v1, s12, 56
552; GCN-NEXT:    v_writelane_b32 v1, s13, 57
553; GCN-NEXT:    v_writelane_b32 v1, s14, 58
554; GCN-NEXT:    v_writelane_b32 v1, s15, 59
555; GCN-NEXT:    v_writelane_b32 v1, s16, 60
556; GCN-NEXT:    v_writelane_b32 v1, s17, 61
557; GCN-NEXT:    v_writelane_b32 v1, s18, 62
558; GCN-NEXT:    v_writelane_b32 v1, s19, 63
559; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
560; GCN-NEXT:    buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
561; GCN-NEXT:    s_mov_b64 exec, s[28:29]
562; GCN-NEXT:    ;;#ASMSTART
563; GCN-NEXT:    ; def s[4:11]
564; GCN-NEXT:    ;;#ASMEND
565; GCN-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
566; GCN-NEXT:    v_writelane_b32 v1, s4, 0
567; GCN-NEXT:    v_writelane_b32 v1, s5, 1
568; GCN-NEXT:    v_writelane_b32 v1, s6, 2
569; GCN-NEXT:    v_writelane_b32 v1, s7, 3
570; GCN-NEXT:    v_writelane_b32 v1, s8, 4
571; GCN-NEXT:    v_writelane_b32 v1, s9, 5
572; GCN-NEXT:    v_writelane_b32 v1, s10, 6
573; GCN-NEXT:    v_writelane_b32 v1, s11, 7
574; GCN-NEXT:    ;;#ASMSTART
575; GCN-NEXT:    ; def s[2:3]
576; GCN-NEXT:    ;;#ASMEND
577; GCN-NEXT:    v_writelane_b32 v1, s2, 8
578; GCN-NEXT:    v_writelane_b32 v1, s3, 9
579; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
580; GCN-NEXT:    buffer_store_dword v1, off, s[52:55], 0 ; 4-byte Folded Spill
581; GCN-NEXT:    s_mov_b64 exec, s[28:29]
582; GCN-NEXT:    s_mov_b32 s1, 0
583; GCN-NEXT:    s_waitcnt lgkmcnt(0)
584; GCN-NEXT:    s_cmp_lg_u32 s0, s1
585; GCN-NEXT:    s_cbranch_scc1 .LBB1_2
586; GCN-NEXT:  ; %bb.1: ; %bb0
587; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
588; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
589; GCN-NEXT:    s_mov_b64 exec, s[28:29]
590; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
591; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
592; GCN-NEXT:    s_mov_b64 exec, s[28:29]
593; GCN-NEXT:    s_waitcnt vmcnt(1)
594; GCN-NEXT:    v_readlane_b32 s16, v0, 8
595; GCN-NEXT:    v_readlane_b32 s17, v0, 9
596; GCN-NEXT:    v_readlane_b32 s20, v0, 0
597; GCN-NEXT:    v_readlane_b32 s21, v0, 1
598; GCN-NEXT:    v_readlane_b32 s22, v0, 2
599; GCN-NEXT:    v_readlane_b32 s23, v0, 3
600; GCN-NEXT:    v_readlane_b32 s24, v0, 4
601; GCN-NEXT:    v_readlane_b32 s25, v0, 5
602; GCN-NEXT:    v_readlane_b32 s26, v0, 6
603; GCN-NEXT:    v_readlane_b32 s27, v0, 7
604; GCN-NEXT:    s_waitcnt vmcnt(0)
605; GCN-NEXT:    v_readlane_b32 s36, v1, 32
606; GCN-NEXT:    v_readlane_b32 s37, v1, 33
607; GCN-NEXT:    v_readlane_b32 s38, v1, 34
608; GCN-NEXT:    v_readlane_b32 s39, v1, 35
609; GCN-NEXT:    v_readlane_b32 s40, v1, 36
610; GCN-NEXT:    v_readlane_b32 s41, v1, 37
611; GCN-NEXT:    v_readlane_b32 s42, v1, 38
612; GCN-NEXT:    v_readlane_b32 s43, v1, 39
613; GCN-NEXT:    v_readlane_b32 s44, v1, 40
614; GCN-NEXT:    v_readlane_b32 s45, v1, 41
615; GCN-NEXT:    v_readlane_b32 s46, v1, 42
616; GCN-NEXT:    v_readlane_b32 s47, v1, 43
617; GCN-NEXT:    v_readlane_b32 s48, v1, 44
618; GCN-NEXT:    v_readlane_b32 s49, v1, 45
619; GCN-NEXT:    v_readlane_b32 s50, v1, 46
620; GCN-NEXT:    v_readlane_b32 s51, v1, 47
621; GCN-NEXT:    v_readlane_b32 s0, v1, 0
622; GCN-NEXT:    v_readlane_b32 s1, v1, 1
623; GCN-NEXT:    v_readlane_b32 s2, v1, 2
624; GCN-NEXT:    v_readlane_b32 s3, v1, 3
625; GCN-NEXT:    v_readlane_b32 s4, v1, 4
626; GCN-NEXT:    v_readlane_b32 s5, v1, 5
627; GCN-NEXT:    v_readlane_b32 s6, v1, 6
628; GCN-NEXT:    v_readlane_b32 s7, v1, 7
629; GCN-NEXT:    v_readlane_b32 s8, v1, 8
630; GCN-NEXT:    v_readlane_b32 s9, v1, 9
631; GCN-NEXT:    v_readlane_b32 s10, v1, 10
632; GCN-NEXT:    v_readlane_b32 s11, v1, 11
633; GCN-NEXT:    v_readlane_b32 s12, v1, 12
634; GCN-NEXT:    v_readlane_b32 s13, v1, 13
635; GCN-NEXT:    v_readlane_b32 s14, v1, 14
636; GCN-NEXT:    v_readlane_b32 s15, v1, 15
637; GCN-NEXT:    ;;#ASMSTART
638; GCN-NEXT:    ; use s[0:15]
639; GCN-NEXT:    ;;#ASMEND
640; GCN-NEXT:    v_readlane_b32 s0, v1, 16
641; GCN-NEXT:    v_readlane_b32 s1, v1, 17
642; GCN-NEXT:    v_readlane_b32 s2, v1, 18
643; GCN-NEXT:    v_readlane_b32 s3, v1, 19
644; GCN-NEXT:    v_readlane_b32 s4, v1, 20
645; GCN-NEXT:    v_readlane_b32 s5, v1, 21
646; GCN-NEXT:    v_readlane_b32 s6, v1, 22
647; GCN-NEXT:    v_readlane_b32 s7, v1, 23
648; GCN-NEXT:    v_readlane_b32 s8, v1, 24
649; GCN-NEXT:    v_readlane_b32 s9, v1, 25
650; GCN-NEXT:    v_readlane_b32 s10, v1, 26
651; GCN-NEXT:    v_readlane_b32 s11, v1, 27
652; GCN-NEXT:    v_readlane_b32 s12, v1, 28
653; GCN-NEXT:    v_readlane_b32 s13, v1, 29
654; GCN-NEXT:    v_readlane_b32 s14, v1, 30
655; GCN-NEXT:    v_readlane_b32 s15, v1, 31
656; GCN-NEXT:    ;;#ASMSTART
657; GCN-NEXT:    ; use s[0:15]
658; GCN-NEXT:    ;;#ASMEND
659; GCN-NEXT:    v_readlane_b32 s0, v1, 48
660; GCN-NEXT:    v_readlane_b32 s1, v1, 49
661; GCN-NEXT:    v_readlane_b32 s2, v1, 50
662; GCN-NEXT:    v_readlane_b32 s3, v1, 51
663; GCN-NEXT:    v_readlane_b32 s4, v1, 52
664; GCN-NEXT:    v_readlane_b32 s5, v1, 53
665; GCN-NEXT:    v_readlane_b32 s6, v1, 54
666; GCN-NEXT:    v_readlane_b32 s7, v1, 55
667; GCN-NEXT:    v_readlane_b32 s8, v1, 56
668; GCN-NEXT:    v_readlane_b32 s9, v1, 57
669; GCN-NEXT:    v_readlane_b32 s10, v1, 58
670; GCN-NEXT:    v_readlane_b32 s11, v1, 59
671; GCN-NEXT:    v_readlane_b32 s12, v1, 60
672; GCN-NEXT:    v_readlane_b32 s13, v1, 61
673; GCN-NEXT:    v_readlane_b32 s14, v1, 62
674; GCN-NEXT:    v_readlane_b32 s15, v1, 63
675; GCN-NEXT:    ;;#ASMSTART
676; GCN-NEXT:    ; use s[36:51]
677; GCN-NEXT:    ;;#ASMEND
678; GCN-NEXT:    ;;#ASMSTART
679; GCN-NEXT:    ; use s[20:27]
680; GCN-NEXT:    ;;#ASMEND
681; GCN-NEXT:    ;;#ASMSTART
682; GCN-NEXT:    ; use s[16:17]
683; GCN-NEXT:    ;;#ASMEND
684; GCN-NEXT:    ;;#ASMSTART
685; GCN-NEXT:    ; use s[0:15]
686; GCN-NEXT:    ;;#ASMEND
687; GCN-NEXT:  .LBB1_2: ; %ret
688; GCN-NEXT:    s_endpgm
689  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
690  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
691  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
692  %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
693  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
694  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
695
696  %cmp = icmp eq i32 %in, 0
697  br i1 %cmp, label %bb0, label %ret
698
699bb0:
700  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
701  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
702  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
703  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
704  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
705  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
706  br label %ret
707
708ret:
709  ret void
710}
711
712; The first 64 SGPR spills can go to a VGPR, but there isn't a second
713; so some spills must be to memory. The last 16 element spill runs out
714; of lanes at the 15th element.
715define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
716; GCN-LABEL: no_vgprs_last_sgpr_spill:
717; GCN:       ; %bb.0:
718; GCN-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
719; GCN-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
720; GCN-NEXT:    s_mov_b32 s54, -1
721; GCN-NEXT:    s_mov_b32 s55, 0xe8f000
722; GCN-NEXT:    s_add_u32 s52, s52, s11
723; GCN-NEXT:    s_addc_u32 s53, s53, 0
724; GCN-NEXT:    s_load_dword s0, s[4:5], 0xb
725; GCN-NEXT:    ;;#ASMSTART
726; GCN-NEXT:    ;;#ASMEND
727; GCN-NEXT:    ;;#ASMSTART
728; GCN-NEXT:    ;;#ASMEND
729; GCN-NEXT:    ;;#ASMSTART
730; GCN-NEXT:    ;;#ASMEND
731; GCN-NEXT:    ;;#ASMSTART
732; GCN-NEXT:    ;;#ASMEND
733; GCN-NEXT:    ;;#ASMSTART
734; GCN-NEXT:    ;;#ASMEND
735; GCN-NEXT:    ;;#ASMSTART
736; GCN-NEXT:    ;;#ASMEND
737; GCN-NEXT:    ;;#ASMSTART
738; GCN-NEXT:    ; def s[4:19]
739; GCN-NEXT:    ;;#ASMEND
740; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
741; GCN-NEXT:    v_writelane_b32 v32, s4, 0
742; GCN-NEXT:    v_writelane_b32 v32, s5, 1
743; GCN-NEXT:    v_writelane_b32 v32, s6, 2
744; GCN-NEXT:    v_writelane_b32 v32, s7, 3
745; GCN-NEXT:    v_writelane_b32 v32, s8, 4
746; GCN-NEXT:    v_writelane_b32 v32, s9, 5
747; GCN-NEXT:    v_writelane_b32 v32, s10, 6
748; GCN-NEXT:    v_writelane_b32 v32, s11, 7
749; GCN-NEXT:    v_writelane_b32 v32, s12, 8
750; GCN-NEXT:    v_writelane_b32 v32, s13, 9
751; GCN-NEXT:    v_writelane_b32 v32, s14, 10
752; GCN-NEXT:    v_writelane_b32 v32, s15, 11
753; GCN-NEXT:    v_writelane_b32 v32, s16, 12
754; GCN-NEXT:    v_writelane_b32 v32, s17, 13
755; GCN-NEXT:    v_writelane_b32 v32, s18, 14
756; GCN-NEXT:    v_writelane_b32 v32, s19, 15
757; GCN-NEXT:    ;;#ASMSTART
758; GCN-NEXT:    ; def s[4:19]
759; GCN-NEXT:    ;;#ASMEND
760; GCN-NEXT:    v_writelane_b32 v32, s4, 16
761; GCN-NEXT:    v_writelane_b32 v32, s5, 17
762; GCN-NEXT:    v_writelane_b32 v32, s6, 18
763; GCN-NEXT:    v_writelane_b32 v32, s7, 19
764; GCN-NEXT:    v_writelane_b32 v32, s8, 20
765; GCN-NEXT:    v_writelane_b32 v32, s9, 21
766; GCN-NEXT:    v_writelane_b32 v32, s10, 22
767; GCN-NEXT:    v_writelane_b32 v32, s11, 23
768; GCN-NEXT:    v_writelane_b32 v32, s12, 24
769; GCN-NEXT:    v_writelane_b32 v32, s13, 25
770; GCN-NEXT:    v_writelane_b32 v32, s14, 26
771; GCN-NEXT:    v_writelane_b32 v32, s15, 27
772; GCN-NEXT:    v_writelane_b32 v32, s16, 28
773; GCN-NEXT:    v_writelane_b32 v32, s17, 29
774; GCN-NEXT:    v_writelane_b32 v32, s18, 30
775; GCN-NEXT:    v_writelane_b32 v32, s19, 31
776; GCN-NEXT:    ;;#ASMSTART
777; GCN-NEXT:    ; def s[4:19]
778; GCN-NEXT:    ;;#ASMEND
779; GCN-NEXT:    v_writelane_b32 v32, s4, 32
780; GCN-NEXT:    v_writelane_b32 v32, s5, 33
781; GCN-NEXT:    v_writelane_b32 v32, s6, 34
782; GCN-NEXT:    v_writelane_b32 v32, s7, 35
783; GCN-NEXT:    v_writelane_b32 v32, s8, 36
784; GCN-NEXT:    v_writelane_b32 v32, s9, 37
785; GCN-NEXT:    v_writelane_b32 v32, s10, 38
786; GCN-NEXT:    v_writelane_b32 v32, s11, 39
787; GCN-NEXT:    v_writelane_b32 v32, s12, 40
788; GCN-NEXT:    v_writelane_b32 v32, s13, 41
789; GCN-NEXT:    v_writelane_b32 v32, s14, 42
790; GCN-NEXT:    v_writelane_b32 v32, s15, 43
791; GCN-NEXT:    v_writelane_b32 v32, s16, 44
792; GCN-NEXT:    v_writelane_b32 v32, s17, 45
793; GCN-NEXT:    v_writelane_b32 v32, s18, 46
794; GCN-NEXT:    v_writelane_b32 v32, s19, 47
795; GCN-NEXT:    ;;#ASMSTART
796; GCN-NEXT:    ; def s[4:19]
797; GCN-NEXT:    ;;#ASMEND
798; GCN-NEXT:    v_writelane_b32 v32, s4, 48
799; GCN-NEXT:    v_writelane_b32 v32, s5, 49
800; GCN-NEXT:    v_writelane_b32 v32, s6, 50
801; GCN-NEXT:    v_writelane_b32 v32, s7, 51
802; GCN-NEXT:    v_writelane_b32 v32, s8, 52
803; GCN-NEXT:    v_writelane_b32 v32, s9, 53
804; GCN-NEXT:    v_writelane_b32 v32, s10, 54
805; GCN-NEXT:    v_writelane_b32 v32, s11, 55
806; GCN-NEXT:    v_writelane_b32 v32, s12, 56
807; GCN-NEXT:    v_writelane_b32 v32, s13, 57
808; GCN-NEXT:    v_writelane_b32 v32, s14, 58
809; GCN-NEXT:    v_writelane_b32 v32, s15, 59
810; GCN-NEXT:    v_writelane_b32 v32, s16, 60
811; GCN-NEXT:    v_writelane_b32 v32, s17, 61
812; GCN-NEXT:    v_writelane_b32 v32, s18, 62
813; GCN-NEXT:    v_writelane_b32 v32, s19, 63
814; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
815; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
816; GCN-NEXT:    s_mov_b64 exec, s[34:35]
817; GCN-NEXT:    ;;#ASMSTART
818; GCN-NEXT:    ; def s[2:3]
819; GCN-NEXT:    ;;#ASMEND
820; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
821; GCN-NEXT:    v_writelane_b32 v32, s2, 0
822; GCN-NEXT:    v_writelane_b32 v32, s3, 1
823; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
824; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill
825; GCN-NEXT:    s_mov_b64 exec, s[34:35]
826; GCN-NEXT:    s_mov_b32 s1, 0
827; GCN-NEXT:    s_waitcnt lgkmcnt(0)
828; GCN-NEXT:    s_cmp_lg_u32 s0, s1
829; GCN-NEXT:    s_cbranch_scc1 .LBB2_2
830; GCN-NEXT:  ; %bb.1: ; %bb0
831; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
832; GCN-NEXT:    buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
833; GCN-NEXT:    s_mov_b64 exec, s[34:35]
834; GCN-NEXT:    s_waitcnt vmcnt(0)
835; GCN-NEXT:    v_readlane_b32 s36, v31, 32
836; GCN-NEXT:    v_readlane_b32 s37, v31, 33
837; GCN-NEXT:    v_readlane_b32 s38, v31, 34
838; GCN-NEXT:    v_readlane_b32 s39, v31, 35
839; GCN-NEXT:    v_readlane_b32 s40, v31, 36
840; GCN-NEXT:    v_readlane_b32 s41, v31, 37
841; GCN-NEXT:    v_readlane_b32 s42, v31, 38
842; GCN-NEXT:    v_readlane_b32 s43, v31, 39
843; GCN-NEXT:    v_readlane_b32 s44, v31, 40
844; GCN-NEXT:    v_readlane_b32 s45, v31, 41
845; GCN-NEXT:    v_readlane_b32 s46, v31, 42
846; GCN-NEXT:    v_readlane_b32 s47, v31, 43
847; GCN-NEXT:    v_readlane_b32 s48, v31, 44
848; GCN-NEXT:    v_readlane_b32 s49, v31, 45
849; GCN-NEXT:    v_readlane_b32 s50, v31, 46
850; GCN-NEXT:    v_readlane_b32 s51, v31, 47
851; GCN-NEXT:    v_readlane_b32 s0, v31, 16
852; GCN-NEXT:    v_readlane_b32 s1, v31, 17
853; GCN-NEXT:    v_readlane_b32 s2, v31, 18
854; GCN-NEXT:    v_readlane_b32 s3, v31, 19
855; GCN-NEXT:    v_readlane_b32 s4, v31, 20
856; GCN-NEXT:    v_readlane_b32 s5, v31, 21
857; GCN-NEXT:    v_readlane_b32 s6, v31, 22
858; GCN-NEXT:    v_readlane_b32 s7, v31, 23
859; GCN-NEXT:    v_readlane_b32 s8, v31, 24
860; GCN-NEXT:    v_readlane_b32 s9, v31, 25
861; GCN-NEXT:    v_readlane_b32 s10, v31, 26
862; GCN-NEXT:    v_readlane_b32 s11, v31, 27
863; GCN-NEXT:    v_readlane_b32 s12, v31, 28
864; GCN-NEXT:    v_readlane_b32 s13, v31, 29
865; GCN-NEXT:    v_readlane_b32 s14, v31, 30
866; GCN-NEXT:    v_readlane_b32 s15, v31, 31
867; GCN-NEXT:    v_readlane_b32 s16, v31, 0
868; GCN-NEXT:    v_readlane_b32 s17, v31, 1
869; GCN-NEXT:    v_readlane_b32 s18, v31, 2
870; GCN-NEXT:    v_readlane_b32 s19, v31, 3
871; GCN-NEXT:    v_readlane_b32 s20, v31, 4
872; GCN-NEXT:    v_readlane_b32 s21, v31, 5
873; GCN-NEXT:    v_readlane_b32 s22, v31, 6
874; GCN-NEXT:    v_readlane_b32 s23, v31, 7
875; GCN-NEXT:    v_readlane_b32 s24, v31, 8
876; GCN-NEXT:    v_readlane_b32 s25, v31, 9
877; GCN-NEXT:    v_readlane_b32 s26, v31, 10
878; GCN-NEXT:    v_readlane_b32 s27, v31, 11
879; GCN-NEXT:    v_readlane_b32 s28, v31, 12
880; GCN-NEXT:    v_readlane_b32 s29, v31, 13
881; GCN-NEXT:    v_readlane_b32 s30, v31, 14
882; GCN-NEXT:    v_readlane_b32 s31, v31, 15
883; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
884; GCN-NEXT:    buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload
885; GCN-NEXT:    s_mov_b64 exec, s[34:35]
886; GCN-NEXT:    ;;#ASMSTART
887; GCN-NEXT:    ; use s[16:31]
888; GCN-NEXT:    ;;#ASMEND
889; GCN-NEXT:    ;;#ASMSTART
890; GCN-NEXT:    ; use s[0:15]
891; GCN-NEXT:    ;;#ASMEND
892; GCN-NEXT:    v_readlane_b32 s4, v31, 48
893; GCN-NEXT:    v_readlane_b32 s5, v31, 49
894; GCN-NEXT:    v_readlane_b32 s6, v31, 50
895; GCN-NEXT:    v_readlane_b32 s7, v31, 51
896; GCN-NEXT:    v_readlane_b32 s8, v31, 52
897; GCN-NEXT:    v_readlane_b32 s9, v31, 53
898; GCN-NEXT:    v_readlane_b32 s10, v31, 54
899; GCN-NEXT:    v_readlane_b32 s11, v31, 55
900; GCN-NEXT:    v_readlane_b32 s12, v31, 56
901; GCN-NEXT:    v_readlane_b32 s13, v31, 57
902; GCN-NEXT:    v_readlane_b32 s14, v31, 58
903; GCN-NEXT:    v_readlane_b32 s15, v31, 59
904; GCN-NEXT:    v_readlane_b32 s16, v31, 60
905; GCN-NEXT:    v_readlane_b32 s17, v31, 61
906; GCN-NEXT:    v_readlane_b32 s18, v31, 62
907; GCN-NEXT:    v_readlane_b32 s19, v31, 63
908; GCN-NEXT:    s_waitcnt vmcnt(0)
909; GCN-NEXT:    v_readlane_b32 s0, v32, 0
910; GCN-NEXT:    v_readlane_b32 s1, v32, 1
911; GCN-NEXT:    ;;#ASMSTART
912; GCN-NEXT:    ; use s[36:51]
913; GCN-NEXT:    ;;#ASMEND
914; GCN-NEXT:    ;;#ASMSTART
915; GCN-NEXT:    ; use s[4:19]
916; GCN-NEXT:    ;;#ASMEND
917; GCN-NEXT:    ;;#ASMSTART
918; GCN-NEXT:    ; use s[0:1]
919; GCN-NEXT:    ;;#ASMEND
920; GCN-NEXT:  .LBB2_2: ; %ret
921; GCN-NEXT:    s_endpgm
922  call void asm sideeffect "", "~{v[0:7]}" () #0
923  call void asm sideeffect "", "~{v[8:15]}" () #0
924  call void asm sideeffect "", "~{v[16:23]}" () #0
925  call void asm sideeffect "", "~{v[24:27]}"() #0
926  call void asm sideeffect "", "~{v[28:29]}"() #0
927  call void asm sideeffect "", "~{v30}"() #0
928
929  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
930  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
931  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
932  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
933  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
934  %cmp = icmp eq i32 %in, 0
935  br i1 %cmp, label %bb0, label %ret
936
937bb0:
938  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
939  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
940  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
941  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
942  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
943  br label %ret
944
945ret:
946  ret void
947}
948
949; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory.
950; Additionally, v0 is live throughout the function.
951define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
952; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0:
953; GCN:       ; %bb.0:
954; GCN-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
955; GCN-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
956; GCN-NEXT:    s_mov_b32 s54, -1
957; GCN-NEXT:    s_mov_b32 s55, 0xe8f000
958; GCN-NEXT:    s_add_u32 s52, s52, s11
959; GCN-NEXT:    s_addc_u32 s53, s53, 0
960; GCN-NEXT:    s_load_dword s0, s[4:5], 0x9
961; GCN-NEXT:    ;;#ASMSTART
962; GCN-NEXT:    ;;#ASMEND
963; GCN-NEXT:    ;;#ASMSTART
964; GCN-NEXT:    ;;#ASMEND
965; GCN-NEXT:    ;;#ASMSTART
966; GCN-NEXT:    ;;#ASMEND
967; GCN-NEXT:    ;;#ASMSTART
968; GCN-NEXT:    ;;#ASMEND
969; GCN-NEXT:    ;;#ASMSTART
970; GCN-NEXT:    ;;#ASMEND
971; GCN-NEXT:    ;;#ASMSTART
972; GCN-NEXT:    ;;#ASMEND
973; GCN-NEXT:    ;;#ASMSTART
974; GCN-NEXT:    ; def s[4:19]
975; GCN-NEXT:    ;;#ASMEND
976; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
977; GCN-NEXT:    v_writelane_b32 v32, s4, 0
978; GCN-NEXT:    v_writelane_b32 v32, s5, 1
979; GCN-NEXT:    v_writelane_b32 v32, s6, 2
980; GCN-NEXT:    v_writelane_b32 v32, s7, 3
981; GCN-NEXT:    v_writelane_b32 v32, s8, 4
982; GCN-NEXT:    v_writelane_b32 v32, s9, 5
983; GCN-NEXT:    v_writelane_b32 v32, s10, 6
984; GCN-NEXT:    v_writelane_b32 v32, s11, 7
985; GCN-NEXT:    v_writelane_b32 v32, s12, 8
986; GCN-NEXT:    v_writelane_b32 v32, s13, 9
987; GCN-NEXT:    v_writelane_b32 v32, s14, 10
988; GCN-NEXT:    v_writelane_b32 v32, s15, 11
989; GCN-NEXT:    v_writelane_b32 v32, s16, 12
990; GCN-NEXT:    v_writelane_b32 v32, s17, 13
991; GCN-NEXT:    v_writelane_b32 v32, s18, 14
992; GCN-NEXT:    v_writelane_b32 v32, s19, 15
993; GCN-NEXT:    ;;#ASMSTART
994; GCN-NEXT:    ; def s[4:19]
995; GCN-NEXT:    ;;#ASMEND
996; GCN-NEXT:    v_writelane_b32 v32, s4, 16
997; GCN-NEXT:    v_writelane_b32 v32, s5, 17
998; GCN-NEXT:    v_writelane_b32 v32, s6, 18
999; GCN-NEXT:    v_writelane_b32 v32, s7, 19
1000; GCN-NEXT:    v_writelane_b32 v32, s8, 20
1001; GCN-NEXT:    v_writelane_b32 v32, s9, 21
1002; GCN-NEXT:    v_writelane_b32 v32, s10, 22
1003; GCN-NEXT:    v_writelane_b32 v32, s11, 23
1004; GCN-NEXT:    v_writelane_b32 v32, s12, 24
1005; GCN-NEXT:    v_writelane_b32 v32, s13, 25
1006; GCN-NEXT:    v_writelane_b32 v32, s14, 26
1007; GCN-NEXT:    v_writelane_b32 v32, s15, 27
1008; GCN-NEXT:    v_writelane_b32 v32, s16, 28
1009; GCN-NEXT:    v_writelane_b32 v32, s17, 29
1010; GCN-NEXT:    v_writelane_b32 v32, s18, 30
1011; GCN-NEXT:    v_writelane_b32 v32, s19, 31
1012; GCN-NEXT:    ;;#ASMSTART
1013; GCN-NEXT:    ; def s[4:19]
1014; GCN-NEXT:    ;;#ASMEND
1015; GCN-NEXT:    v_writelane_b32 v32, s4, 32
1016; GCN-NEXT:    v_writelane_b32 v32, s5, 33
1017; GCN-NEXT:    v_writelane_b32 v32, s6, 34
1018; GCN-NEXT:    v_writelane_b32 v32, s7, 35
1019; GCN-NEXT:    v_writelane_b32 v32, s8, 36
1020; GCN-NEXT:    v_writelane_b32 v32, s9, 37
1021; GCN-NEXT:    v_writelane_b32 v32, s10, 38
1022; GCN-NEXT:    v_writelane_b32 v32, s11, 39
1023; GCN-NEXT:    v_writelane_b32 v32, s12, 40
1024; GCN-NEXT:    v_writelane_b32 v32, s13, 41
1025; GCN-NEXT:    v_writelane_b32 v32, s14, 42
1026; GCN-NEXT:    v_writelane_b32 v32, s15, 43
1027; GCN-NEXT:    v_writelane_b32 v32, s16, 44
1028; GCN-NEXT:    v_writelane_b32 v32, s17, 45
1029; GCN-NEXT:    v_writelane_b32 v32, s18, 46
1030; GCN-NEXT:    v_writelane_b32 v32, s19, 47
1031; GCN-NEXT:    ;;#ASMSTART
1032; GCN-NEXT:    ; def s[4:19]
1033; GCN-NEXT:    ;;#ASMEND
1034; GCN-NEXT:    v_writelane_b32 v32, s4, 48
1035; GCN-NEXT:    v_writelane_b32 v32, s5, 49
1036; GCN-NEXT:    v_writelane_b32 v32, s6, 50
1037; GCN-NEXT:    v_writelane_b32 v32, s7, 51
1038; GCN-NEXT:    v_writelane_b32 v32, s8, 52
1039; GCN-NEXT:    v_writelane_b32 v32, s9, 53
1040; GCN-NEXT:    v_writelane_b32 v32, s10, 54
1041; GCN-NEXT:    v_writelane_b32 v32, s11, 55
1042; GCN-NEXT:    v_writelane_b32 v32, s12, 56
1043; GCN-NEXT:    v_writelane_b32 v32, s13, 57
1044; GCN-NEXT:    v_writelane_b32 v32, s14, 58
1045; GCN-NEXT:    v_writelane_b32 v32, s15, 59
1046; GCN-NEXT:    v_writelane_b32 v32, s16, 60
1047; GCN-NEXT:    v_writelane_b32 v32, s17, 61
1048; GCN-NEXT:    v_writelane_b32 v32, s18, 62
1049; GCN-NEXT:    v_writelane_b32 v32, s19, 63
1050; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
1051; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
1052; GCN-NEXT:    s_mov_b64 exec, s[34:35]
1053; GCN-NEXT:    ;;#ASMSTART
1054; GCN-NEXT:    ; def s[2:3]
1055; GCN-NEXT:    ;;#ASMEND
1056; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
1057; GCN-NEXT:    v_writelane_b32 v32, s2, 0
1058; GCN-NEXT:    v_writelane_b32 v32, s3, 1
1059; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
1060; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill
1061; GCN-NEXT:    s_mov_b64 exec, s[34:35]
1062; GCN-NEXT:    s_mov_b32 s1, 0
1063; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1064; GCN-NEXT:    s_cmp_lg_u32 s0, s1
1065; GCN-NEXT:    s_cbranch_scc1 .LBB3_2
1066; GCN-NEXT:  ; %bb.1: ; %bb0
1067; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
1068; GCN-NEXT:    buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1069; GCN-NEXT:    s_mov_b64 exec, s[34:35]
1070; GCN-NEXT:    s_waitcnt vmcnt(0)
1071; GCN-NEXT:    v_readlane_b32 s36, v31, 32
1072; GCN-NEXT:    v_readlane_b32 s37, v31, 33
1073; GCN-NEXT:    v_readlane_b32 s38, v31, 34
1074; GCN-NEXT:    v_readlane_b32 s39, v31, 35
1075; GCN-NEXT:    v_readlane_b32 s40, v31, 36
1076; GCN-NEXT:    v_readlane_b32 s41, v31, 37
1077; GCN-NEXT:    v_readlane_b32 s42, v31, 38
1078; GCN-NEXT:    v_readlane_b32 s43, v31, 39
1079; GCN-NEXT:    v_readlane_b32 s44, v31, 40
1080; GCN-NEXT:    v_readlane_b32 s45, v31, 41
1081; GCN-NEXT:    v_readlane_b32 s46, v31, 42
1082; GCN-NEXT:    v_readlane_b32 s47, v31, 43
1083; GCN-NEXT:    v_readlane_b32 s48, v31, 44
1084; GCN-NEXT:    v_readlane_b32 s49, v31, 45
1085; GCN-NEXT:    v_readlane_b32 s50, v31, 46
1086; GCN-NEXT:    v_readlane_b32 s51, v31, 47
1087; GCN-NEXT:    v_readlane_b32 s0, v31, 16
1088; GCN-NEXT:    v_readlane_b32 s1, v31, 17
1089; GCN-NEXT:    v_readlane_b32 s2, v31, 18
1090; GCN-NEXT:    v_readlane_b32 s3, v31, 19
1091; GCN-NEXT:    v_readlane_b32 s4, v31, 20
1092; GCN-NEXT:    v_readlane_b32 s5, v31, 21
1093; GCN-NEXT:    v_readlane_b32 s6, v31, 22
1094; GCN-NEXT:    v_readlane_b32 s7, v31, 23
1095; GCN-NEXT:    v_readlane_b32 s8, v31, 24
1096; GCN-NEXT:    v_readlane_b32 s9, v31, 25
1097; GCN-NEXT:    v_readlane_b32 s10, v31, 26
1098; GCN-NEXT:    v_readlane_b32 s11, v31, 27
1099; GCN-NEXT:    v_readlane_b32 s12, v31, 28
1100; GCN-NEXT:    v_readlane_b32 s13, v31, 29
1101; GCN-NEXT:    v_readlane_b32 s14, v31, 30
1102; GCN-NEXT:    v_readlane_b32 s15, v31, 31
1103; GCN-NEXT:    v_readlane_b32 s16, v31, 0
1104; GCN-NEXT:    v_readlane_b32 s17, v31, 1
1105; GCN-NEXT:    v_readlane_b32 s18, v31, 2
1106; GCN-NEXT:    v_readlane_b32 s19, v31, 3
1107; GCN-NEXT:    v_readlane_b32 s20, v31, 4
1108; GCN-NEXT:    v_readlane_b32 s21, v31, 5
1109; GCN-NEXT:    v_readlane_b32 s22, v31, 6
1110; GCN-NEXT:    v_readlane_b32 s23, v31, 7
1111; GCN-NEXT:    v_readlane_b32 s24, v31, 8
1112; GCN-NEXT:    v_readlane_b32 s25, v31, 9
1113; GCN-NEXT:    v_readlane_b32 s26, v31, 10
1114; GCN-NEXT:    v_readlane_b32 s27, v31, 11
1115; GCN-NEXT:    v_readlane_b32 s28, v31, 12
1116; GCN-NEXT:    v_readlane_b32 s29, v31, 13
1117; GCN-NEXT:    v_readlane_b32 s30, v31, 14
1118; GCN-NEXT:    v_readlane_b32 s31, v31, 15
1119; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
1120; GCN-NEXT:    buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload
1121; GCN-NEXT:    s_mov_b64 exec, s[34:35]
1122; GCN-NEXT:    ;;#ASMSTART
1123; GCN-NEXT:    ; def v0
1124; GCN-NEXT:    ;;#ASMEND
1125; GCN-NEXT:    ;;#ASMSTART
1126; GCN-NEXT:    ; use s[16:31]
1127; GCN-NEXT:    ;;#ASMEND
1128; GCN-NEXT:    ;;#ASMSTART
1129; GCN-NEXT:    ; use s[0:15]
1130; GCN-NEXT:    ;;#ASMEND
1131; GCN-NEXT:    v_readlane_b32 s4, v31, 48
1132; GCN-NEXT:    v_readlane_b32 s5, v31, 49
1133; GCN-NEXT:    v_readlane_b32 s6, v31, 50
1134; GCN-NEXT:    v_readlane_b32 s7, v31, 51
1135; GCN-NEXT:    v_readlane_b32 s8, v31, 52
1136; GCN-NEXT:    v_readlane_b32 s9, v31, 53
1137; GCN-NEXT:    v_readlane_b32 s10, v31, 54
1138; GCN-NEXT:    v_readlane_b32 s11, v31, 55
1139; GCN-NEXT:    v_readlane_b32 s12, v31, 56
1140; GCN-NEXT:    v_readlane_b32 s13, v31, 57
1141; GCN-NEXT:    v_readlane_b32 s14, v31, 58
1142; GCN-NEXT:    v_readlane_b32 s15, v31, 59
1143; GCN-NEXT:    v_readlane_b32 s16, v31, 60
1144; GCN-NEXT:    v_readlane_b32 s17, v31, 61
1145; GCN-NEXT:    v_readlane_b32 s18, v31, 62
1146; GCN-NEXT:    v_readlane_b32 s19, v31, 63
1147; GCN-NEXT:    s_waitcnt vmcnt(0)
1148; GCN-NEXT:    v_readlane_b32 s0, v32, 0
1149; GCN-NEXT:    v_readlane_b32 s1, v32, 1
1150; GCN-NEXT:    ;;#ASMSTART
1151; GCN-NEXT:    ; use s[36:51]
1152; GCN-NEXT:    ;;#ASMEND
1153; GCN-NEXT:    ;;#ASMSTART
1154; GCN-NEXT:    ; use s[4:19]
1155; GCN-NEXT:    ;;#ASMEND
1156; GCN-NEXT:    ;;#ASMSTART
1157; GCN-NEXT:    ; use s[0:1]
1158; GCN-NEXT:    ;;#ASMEND
1159; GCN-NEXT:    ;;#ASMSTART
1160; GCN-NEXT:    ; use v0
1161; GCN-NEXT:    ;;#ASMEND
1162; GCN-NEXT:  .LBB3_2: ; %ret
1163; GCN-NEXT:    s_endpgm
1164  call void asm sideeffect "", "~{v[0:7]}" () #0
1165  call void asm sideeffect "", "~{v[8:15]}" () #0
1166  call void asm sideeffect "", "~{v[16:23]}" () #0
1167  call void asm sideeffect "", "~{v[24:27]}"() #0
1168  call void asm sideeffect "", "~{v[28:29]}"() #0
1169  call void asm sideeffect "", "~{v30}"() #0
1170
1171  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1172  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1173  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1174  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1175  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
1176  %cmp = icmp eq i32 %in, 0
1177  br i1 %cmp, label %bb0, label %ret
1178
1179bb0:
1180  %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0
1181  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
1182  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
1183  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
1184  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
1185  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
1186  call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0
1187  br label %ret
1188
1189ret:
1190  ret void
1191}
1192
1193attributes #0 = { nounwind }
1194attributes #1 = { nounwind "amdgpu-waves-per-eu"="7,7" }
1195
1196!llvm.module.flags = !{!0}
1197!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
1198