xref: /llvm-project/llvm/test/CodeGen/AMDGPU/multi-dword-vgpr-spill.ll (revision bdf2fbba9cee60b4b260ff17e4f44c475c11e715)
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,MUBUF
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck %s -check-prefixes=GCN,FLATSCR
3
4; GCN-LABEL: spill_v2i32:
5; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill
6; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill
7; FLATSCR:   scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill
8; FLATSCR-NOT: scratch_store_dword
9; GCN: ;;#ASMSTART
10; GCN-NEXT: ;;#ASMEND
11; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
12; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
13; FLATSCR:   scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload
14; FLATSCR-NOT: scratch_load_dword
15
16define void @spill_v2i32() {
17entry:
18  %alloca = alloca <2 x i32>, i32 2, align 4, addrspace(5)
19
20  %aptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1
21  %a = load volatile <2 x i32>, ptr addrspace(5) %aptr
22
23  ; Force %a to spill.
24  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
25
26  %outptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1
27  store volatile <2 x i32> %a, ptr addrspace(5) %outptr
28
29  ret void
30}
31
32; GCN-LABEL: spill_v2f32:
33; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill
34; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill
35; FLATSCR:   scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill
36; FLATSCR-NOT: scratch_store_dword
37; GCN: ;;#ASMSTART
38; GCN-NEXT: ;;#ASMEND
39; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
40; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
41; FLATSCR:   scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload
42; FLATSCR-NOT: scratch_load_dword
43
44define void @spill_v2f32() {
45entry:
46  %alloca = alloca <2 x i32>, i32 2, align 4, addrspace(5)
47
48  %aptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1
49  %a = load volatile <2 x i32>, ptr addrspace(5) %aptr
50
51  ; Force %a to spill.
52  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
53
54  %outptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1
55  store volatile <2 x i32> %a, ptr addrspace(5) %outptr
56
57  ret void
58}
59
60; GCN-LABEL: spill_v3i32:
61; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
62; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
63; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
64; FLATSCR:   scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill
65; FLATSCR-NOT: scratch_store_dword
66; GCN: ;;#ASMSTART
67; GCN-NEXT: ;;#ASMEND
68; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
69; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
70; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
71; FLATSCR:   scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload
72; FLATSCR-NOT: scratch_load_dword
73
74define void @spill_v3i32() {
75entry:
76  %alloca = alloca <3 x i32>, i32 2, align 4, addrspace(5)
77
78  %aptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1
79  %a = load volatile <3 x i32>, ptr addrspace(5) %aptr
80
81  ; Force %a to spill.
82  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
83
84  %outptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1
85  store volatile <3 x i32> %a, ptr addrspace(5) %outptr
86
87  ret void
88}
89
90; GCN-LABEL: spill_v3f32:
91; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
92; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
93; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
94; FLATSCR:   scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill
95; FLATSCR-NOT: scratch_store_dword
96; GCN: ;;#ASMSTART
97; GCN-NEXT: ;;#ASMEND
98; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
99; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
100; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
101; FLATSCR:   scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload
102; FLATSCR-NOT: scratch_load_dword
103
104define void @spill_v3f32() {
105entry:
106  %alloca = alloca <3 x i32>, i32 2, align 4, addrspace(5)
107
108  %aptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1
109  %a = load volatile <3 x i32>, ptr addrspace(5) %aptr
110
111  ; Force %a to spill.
112  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
113
114  %outptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1
115  store volatile <3 x i32> %a, ptr addrspace(5) %outptr
116
117  ret void
118}
119
120; GCN-LABEL: spill_v4i32:
121; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
122; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
123; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
124; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill
125; FLATSCR:   scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill
126; FLATSCR-NOT: scratch_store_dword
127; GCN: ;;#ASMSTART
128; GCN-NEXT: ;;#ASMEND
129; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
130; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
131; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
132; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
133; FLATSCR:   scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload
134; FLATSCR-NOT: scratch_load_dword
135
136define void @spill_v4i32() {
137entry:
138  %alloca = alloca <4 x i32>, i32 2, align 4, addrspace(5)
139
140  %aptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1
141  %a = load volatile <4 x i32>, ptr addrspace(5) %aptr
142
143  ; Force %a to spill.
144  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
145
146  %outptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1
147  store volatile <4 x i32> %a, ptr addrspace(5) %outptr
148
149  ret void
150}
151
152; GCN-LABEL: spill_v4f32:
153; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
154; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
155; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
156; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill
157; FLATSCR:   scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill
158; FLATSCR-NOT: scratch_store_dword
159; GCN: ;;#ASMSTART
160; GCN-NEXT: ;;#ASMEND
161; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
162; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
163; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
164; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
165; FLATSCR:   scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload
166; FLATSCR-NOT: scratch_load_dword
167
168define void @spill_v4f32() {
169entry:
170  %alloca = alloca <4 x i32>, i32 2, align 4, addrspace(5)
171
172  %aptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1
173  %a = load volatile <4 x i32>, ptr addrspace(5) %aptr
174
175  ; Force %a to spill.
176  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
177
178  %outptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1
179  store volatile <4 x i32> %a, ptr addrspace(5) %outptr
180
181  ret void
182}
183
184; GCN-LABEL: spill_v5i32:
185; MUBUF-DAG: buffer_store_dword v{{.*}} offset:64 ; 4-byte Folded Spill
186; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill
187; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill
188; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill
189; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill
190; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill
191; FLATSCR-NOT: scratch_store_dword
192; GCN: ;;#ASMSTART
193; GCN-NEXT: ;;#ASMEND
194; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
195; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
196; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
197; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
198; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload
199; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload
200; FLATSCR-NOT: scratch_load_dword
201define void @spill_v5i32() {
202entry:
203  %alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5)
204
205  %aptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1
206  %a = load volatile <5 x i32>, ptr addrspace(5) %aptr
207
208  ; Force %a to spill.
209  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
210
211  %outptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1
212  store volatile <5 x i32> %a, ptr addrspace(5) %outptr
213
214  ret void
215}
216
217; GCN-LABEL: spill_v5f32:
218; MUBUF-DAG: buffer_store_dword v{{.*}} offset:64 ; 4-byte Folded Spill
219; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill
220; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill
221; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill
222; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill
223; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill
224; FLATSCR-NOT: scratch_store_dword
225; GCN: ;;#ASMSTART
226; GCN-NEXT: ;;#ASMEND
227; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
228; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
229; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
230; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
231; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload
232; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload
233; FLATSCR-NOT: scratch_load_dword
234define void @spill_v5f32() {
235entry:
236  %alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5)
237
238  %aptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1
239  %a = load volatile <5 x i32>, ptr addrspace(5) %aptr
240
241  ; Force %a to spill.
242  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
243
244  %outptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1
245  store volatile <5 x i32> %a, ptr addrspace(5) %outptr
246
247  ret void
248}
249