xref: /llvm-project/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; The first 64 SGPR spills can go to a VGPR, but there isn't a second
5; so some spills must be to memory. The last 16 element spill runs out of lanes at the 15th element.
6
7define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
8; GCN-LABEL: partial_no_vgprs_last_sgpr_spill:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    s_add_u32 s0, s0, s15
11; GCN-NEXT:    s_addc_u32 s1, s1, 0
12; GCN-NEXT:    s_load_dword s4, s[8:9], 0x2
13; GCN-NEXT:    ;;#ASMSTART
14; GCN-NEXT:    ;;#ASMEND
15; GCN-NEXT:    ;;#ASMSTART
16; GCN-NEXT:    ;;#ASMEND
17; GCN-NEXT:    ;;#ASMSTART
18; GCN-NEXT:    ;;#ASMEND
19; GCN-NEXT:    ;;#ASMSTART
20; GCN-NEXT:    ;;#ASMEND
21; GCN-NEXT:    ;;#ASMSTART
22; GCN-NEXT:    ; def s[8:23]
23; GCN-NEXT:    ;;#ASMEND
24; GCN-NEXT:    ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
25; GCN-NEXT:    v_writelane_b32 v22, s8, 0
26; GCN-NEXT:    v_writelane_b32 v22, s9, 1
27; GCN-NEXT:    v_writelane_b32 v22, s10, 2
28; GCN-NEXT:    v_writelane_b32 v22, s11, 3
29; GCN-NEXT:    v_writelane_b32 v22, s12, 4
30; GCN-NEXT:    v_writelane_b32 v22, s13, 5
31; GCN-NEXT:    v_writelane_b32 v22, s14, 6
32; GCN-NEXT:    v_writelane_b32 v22, s15, 7
33; GCN-NEXT:    v_writelane_b32 v22, s16, 8
34; GCN-NEXT:    v_writelane_b32 v22, s17, 9
35; GCN-NEXT:    v_writelane_b32 v22, s18, 10
36; GCN-NEXT:    v_writelane_b32 v22, s19, 11
37; GCN-NEXT:    v_writelane_b32 v22, s20, 12
38; GCN-NEXT:    v_writelane_b32 v22, s21, 13
39; GCN-NEXT:    v_writelane_b32 v22, s22, 14
40; GCN-NEXT:    v_writelane_b32 v22, s23, 15
41; GCN-NEXT:    ;;#ASMSTART
42; GCN-NEXT:    ; def s[8:23]
43; GCN-NEXT:    ;;#ASMEND
44; GCN-NEXT:    v_writelane_b32 v22, s8, 16
45; GCN-NEXT:    v_writelane_b32 v22, s9, 17
46; GCN-NEXT:    v_writelane_b32 v22, s10, 18
47; GCN-NEXT:    v_writelane_b32 v22, s11, 19
48; GCN-NEXT:    v_writelane_b32 v22, s12, 20
49; GCN-NEXT:    v_writelane_b32 v22, s13, 21
50; GCN-NEXT:    v_writelane_b32 v22, s14, 22
51; GCN-NEXT:    v_writelane_b32 v22, s15, 23
52; GCN-NEXT:    v_writelane_b32 v22, s16, 24
53; GCN-NEXT:    v_writelane_b32 v22, s17, 25
54; GCN-NEXT:    v_writelane_b32 v22, s18, 26
55; GCN-NEXT:    v_writelane_b32 v22, s19, 27
56; GCN-NEXT:    v_writelane_b32 v22, s20, 28
57; GCN-NEXT:    v_writelane_b32 v22, s21, 29
58; GCN-NEXT:    v_writelane_b32 v22, s22, 30
59; GCN-NEXT:    v_writelane_b32 v22, s23, 31
60; GCN-NEXT:    ;;#ASMSTART
61; GCN-NEXT:    ; def s[8:23]
62; GCN-NEXT:    ;;#ASMEND
63; GCN-NEXT:    v_writelane_b32 v22, s8, 32
64; GCN-NEXT:    v_writelane_b32 v22, s9, 33
65; GCN-NEXT:    v_writelane_b32 v22, s10, 34
66; GCN-NEXT:    v_writelane_b32 v22, s11, 35
67; GCN-NEXT:    v_writelane_b32 v22, s12, 36
68; GCN-NEXT:    v_writelane_b32 v22, s13, 37
69; GCN-NEXT:    v_writelane_b32 v22, s14, 38
70; GCN-NEXT:    v_writelane_b32 v22, s15, 39
71; GCN-NEXT:    v_writelane_b32 v22, s16, 40
72; GCN-NEXT:    v_writelane_b32 v22, s17, 41
73; GCN-NEXT:    v_writelane_b32 v22, s18, 42
74; GCN-NEXT:    v_writelane_b32 v22, s19, 43
75; GCN-NEXT:    v_writelane_b32 v22, s20, 44
76; GCN-NEXT:    v_writelane_b32 v22, s21, 45
77; GCN-NEXT:    v_writelane_b32 v22, s22, 46
78; GCN-NEXT:    v_writelane_b32 v22, s23, 47
79; GCN-NEXT:    ;;#ASMSTART
80; GCN-NEXT:    ; def s[8:23]
81; GCN-NEXT:    ;;#ASMEND
82; GCN-NEXT:    v_writelane_b32 v22, s8, 48
83; GCN-NEXT:    v_writelane_b32 v22, s9, 49
84; GCN-NEXT:    v_writelane_b32 v22, s10, 50
85; GCN-NEXT:    v_writelane_b32 v22, s11, 51
86; GCN-NEXT:    v_writelane_b32 v22, s12, 52
87; GCN-NEXT:    v_writelane_b32 v22, s13, 53
88; GCN-NEXT:    v_writelane_b32 v22, s14, 54
89; GCN-NEXT:    v_writelane_b32 v22, s15, 55
90; GCN-NEXT:    v_writelane_b32 v22, s16, 56
91; GCN-NEXT:    v_writelane_b32 v22, s17, 57
92; GCN-NEXT:    v_writelane_b32 v22, s18, 58
93; GCN-NEXT:    v_writelane_b32 v22, s19, 59
94; GCN-NEXT:    v_writelane_b32 v22, s20, 60
95; GCN-NEXT:    v_writelane_b32 v22, s21, 61
96; GCN-NEXT:    v_writelane_b32 v22, s22, 62
97; GCN-NEXT:    v_writelane_b32 v22, s23, 63
98; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
99; GCN-NEXT:    buffer_store_dword v22, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
100; GCN-NEXT:    s_mov_b64 exec, s[24:25]
101; GCN-NEXT:    ;;#ASMSTART
102; GCN-NEXT:    ; def s[6:7]
103; GCN-NEXT:    ;;#ASMEND
104; GCN-NEXT:    ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
105; GCN-NEXT:    v_writelane_b32 v22, s6, 0
106; GCN-NEXT:    v_writelane_b32 v22, s7, 1
107; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
108; GCN-NEXT:    buffer_store_dword v22, off, s[0:3], 0 ; 4-byte Folded Spill
109; GCN-NEXT:    s_mov_b64 exec, s[24:25]
110; GCN-NEXT:    s_mov_b32 s5, 0
111; GCN-NEXT:    s_waitcnt lgkmcnt(0)
112; GCN-NEXT:    s_cmp_lg_u32 s4, s5
113; GCN-NEXT:    s_cbranch_scc1 .LBB0_2
114; GCN-NEXT:  ; %bb.1: ; %bb0
115; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
116; GCN-NEXT:    buffer_load_dword v23, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
117; GCN-NEXT:    s_mov_b64 exec, s[24:25]
118; GCN-NEXT:    s_waitcnt vmcnt(0)
119; GCN-NEXT:    v_readlane_b32 s4, v23, 0
120; GCN-NEXT:    v_readlane_b32 s5, v23, 1
121; GCN-NEXT:    v_readlane_b32 s6, v23, 2
122; GCN-NEXT:    v_readlane_b32 s7, v23, 3
123; GCN-NEXT:    v_readlane_b32 s8, v23, 4
124; GCN-NEXT:    v_readlane_b32 s9, v23, 5
125; GCN-NEXT:    v_readlane_b32 s10, v23, 6
126; GCN-NEXT:    v_readlane_b32 s11, v23, 7
127; GCN-NEXT:    v_readlane_b32 s12, v23, 8
128; GCN-NEXT:    v_readlane_b32 s13, v23, 9
129; GCN-NEXT:    v_readlane_b32 s14, v23, 10
130; GCN-NEXT:    v_readlane_b32 s15, v23, 11
131; GCN-NEXT:    v_readlane_b32 s16, v23, 12
132; GCN-NEXT:    v_readlane_b32 s17, v23, 13
133; GCN-NEXT:    v_readlane_b32 s18, v23, 14
134; GCN-NEXT:    v_readlane_b32 s19, v23, 15
135; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
136; GCN-NEXT:    buffer_load_dword v22, off, s[0:3], 0 ; 4-byte Folded Reload
137; GCN-NEXT:    s_mov_b64 exec, s[24:25]
138; GCN-NEXT:    ;;#ASMSTART
139; GCN-NEXT:    ; use s[4:19]
140; GCN-NEXT:    ;;#ASMEND
141; GCN-NEXT:    v_readlane_b32 s4, v23, 16
142; GCN-NEXT:    v_readlane_b32 s5, v23, 17
143; GCN-NEXT:    v_readlane_b32 s6, v23, 18
144; GCN-NEXT:    v_readlane_b32 s7, v23, 19
145; GCN-NEXT:    v_readlane_b32 s8, v23, 20
146; GCN-NEXT:    v_readlane_b32 s9, v23, 21
147; GCN-NEXT:    v_readlane_b32 s10, v23, 22
148; GCN-NEXT:    v_readlane_b32 s11, v23, 23
149; GCN-NEXT:    v_readlane_b32 s12, v23, 24
150; GCN-NEXT:    v_readlane_b32 s13, v23, 25
151; GCN-NEXT:    v_readlane_b32 s14, v23, 26
152; GCN-NEXT:    v_readlane_b32 s15, v23, 27
153; GCN-NEXT:    v_readlane_b32 s16, v23, 28
154; GCN-NEXT:    v_readlane_b32 s17, v23, 29
155; GCN-NEXT:    v_readlane_b32 s18, v23, 30
156; GCN-NEXT:    v_readlane_b32 s19, v23, 31
157; GCN-NEXT:    ;;#ASMSTART
158; GCN-NEXT:    ; use s[4:19]
159; GCN-NEXT:    ;;#ASMEND
160; GCN-NEXT:    v_readlane_b32 s4, v23, 32
161; GCN-NEXT:    v_readlane_b32 s5, v23, 33
162; GCN-NEXT:    v_readlane_b32 s6, v23, 34
163; GCN-NEXT:    v_readlane_b32 s7, v23, 35
164; GCN-NEXT:    v_readlane_b32 s8, v23, 36
165; GCN-NEXT:    v_readlane_b32 s9, v23, 37
166; GCN-NEXT:    v_readlane_b32 s10, v23, 38
167; GCN-NEXT:    v_readlane_b32 s11, v23, 39
168; GCN-NEXT:    v_readlane_b32 s12, v23, 40
169; GCN-NEXT:    v_readlane_b32 s13, v23, 41
170; GCN-NEXT:    v_readlane_b32 s14, v23, 42
171; GCN-NEXT:    v_readlane_b32 s15, v23, 43
172; GCN-NEXT:    v_readlane_b32 s16, v23, 44
173; GCN-NEXT:    v_readlane_b32 s17, v23, 45
174; GCN-NEXT:    v_readlane_b32 s18, v23, 46
175; GCN-NEXT:    v_readlane_b32 s19, v23, 47
176; GCN-NEXT:    ;;#ASMSTART
177; GCN-NEXT:    ; use s[4:19]
178; GCN-NEXT:    ;;#ASMEND
179; GCN-NEXT:    v_readlane_b32 s8, v23, 48
180; GCN-NEXT:    v_readlane_b32 s9, v23, 49
181; GCN-NEXT:    v_readlane_b32 s10, v23, 50
182; GCN-NEXT:    v_readlane_b32 s11, v23, 51
183; GCN-NEXT:    v_readlane_b32 s12, v23, 52
184; GCN-NEXT:    v_readlane_b32 s13, v23, 53
185; GCN-NEXT:    v_readlane_b32 s14, v23, 54
186; GCN-NEXT:    v_readlane_b32 s15, v23, 55
187; GCN-NEXT:    v_readlane_b32 s16, v23, 56
188; GCN-NEXT:    v_readlane_b32 s17, v23, 57
189; GCN-NEXT:    v_readlane_b32 s18, v23, 58
190; GCN-NEXT:    v_readlane_b32 s19, v23, 59
191; GCN-NEXT:    v_readlane_b32 s20, v23, 60
192; GCN-NEXT:    v_readlane_b32 s21, v23, 61
193; GCN-NEXT:    v_readlane_b32 s22, v23, 62
194; GCN-NEXT:    v_readlane_b32 s23, v23, 63
195; GCN-NEXT:    s_waitcnt vmcnt(0)
196; GCN-NEXT:    v_readlane_b32 s4, v22, 0
197; GCN-NEXT:    v_readlane_b32 s5, v22, 1
198; GCN-NEXT:    ;;#ASMSTART
199; GCN-NEXT:    ; use s[8:23]
200; GCN-NEXT:    ;;#ASMEND
201; GCN-NEXT:    ;;#ASMSTART
202; GCN-NEXT:    ; use s[4:5]
203; GCN-NEXT:    ;;#ASMEND
204; GCN-NEXT:  .LBB0_2: ; %ret
205; GCN-NEXT:    s_endpgm
206  call void asm sideeffect "", "~{v[0:7]}" () #0
207  call void asm sideeffect "", "~{v[8:15]}" () #0
208  call void asm sideeffect "", "~{v[16:19]}"() #0
209  call void asm sideeffect "", "~{v[20:21]}"() #0
210
211  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
212  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
213  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
214  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
215  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
216  %cmp = icmp eq i32 %in, 0
217  br i1 %cmp, label %bb0, label %ret
218
219bb0:
220  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
221  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
222  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
223  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
224  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
225  br label %ret
226
227ret:
228  ret void
229}
230
231attributes #0 = { nounwind }
232attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" }
233
234!llvm.module.flags = !{!0}
235!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
236