xref: /llvm-project/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll (revision c17040599666c1f14906a899cabcf545c2c85744)
1; Test the generated function prologs/epilogs under XPLINK64 on z/OS
2;
3; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck --check-prefixes=CHECK64,CHECK %s
4
5; Test prolog/epilog for non-XPLEAF.
6
7; Small stack frame.
8; CHECK-LABEL: func0
9; CHECK64: stmg  6,7,1872(4)
10; stmg instruction's displacement field must be 2064-dsa_size
11; as per ABI
12; CHECK64: aghi  4,-192
13
14; CHECK64: lg  7,2072(4)
15; CHECK64: aghi  4,192
16; CHECK64: b 2(7)
17
18; CHECK64: L#PPA1_func0_0:
19; CHECK64: .short	0  * Length/4 of Parms
20define void @func0() {
21  call i64 (i64) @fun(i64 10)
22  ret void
23}
24
25; Spill all GPR CSRs
26; CHECK-LABEL: func1
27; CHECK64: stmg 6,15,1904(4)
28; CHECK64: aghi  4,-160
29
30; CHECK64: lmg 7,15,2072(4)
31; CHECK64: aghi  4,160
32; CHECK64: b 2(7)
33
34; CHECK64: L#PPA1_func1_0:
35; CHECK64: .short	2  * Length/4 of Parms
36define void @func1(ptr %ptr) {
37  %l01 = load volatile i64, ptr %ptr
38  %l02 = load volatile i64, ptr %ptr
39  %l03 = load volatile i64, ptr %ptr
40  %l04 = load volatile i64, ptr %ptr
41  %l05 = load volatile i64, ptr %ptr
42  %l06 = load volatile i64, ptr %ptr
43  %l07 = load volatile i64, ptr %ptr
44  %l08 = load volatile i64, ptr %ptr
45  %l09 = load volatile i64, ptr %ptr
46  %l10 = load volatile i64, ptr %ptr
47  %l11 = load volatile i64, ptr %ptr
48  %l12 = load volatile i64, ptr %ptr
49  %l13 = load volatile i64, ptr %ptr
50  %l14 = load volatile i64, ptr %ptr
51  %l15 = load volatile i64, ptr %ptr
52  %add01 = add i64 %l01, %l01
53  %add02 = add i64 %l02, %add01
54  %add03 = add i64 %l03, %add02
55  %add04 = add i64 %l04, %add03
56  %add05 = add i64 %l05, %add04
57  %add06 = add i64 %l06, %add05
58  %add07 = add i64 %l07, %add06
59  %add08 = add i64 %l08, %add07
60  %add09 = add i64 %l09, %add08
61  %add10 = add i64 %l10, %add09
62  %add11 = add i64 %l11, %add10
63  %add12 = add i64 %l12, %add11
64  %add13 = add i64 %l13, %add12
65  %add14 = add i64 %l14, %add13
66  %add15 = add i64 %l15, %add14
67  store volatile i64 %add01, ptr %ptr
68  store volatile i64 %add02, ptr %ptr
69  store volatile i64 %add03, ptr %ptr
70  store volatile i64 %add04, ptr %ptr
71  store volatile i64 %add05, ptr %ptr
72  store volatile i64 %add06, ptr %ptr
73  store volatile i64 %add07, ptr %ptr
74  store volatile i64 %add08, ptr %ptr
75  store volatile i64 %add09, ptr %ptr
76  store volatile i64 %add10, ptr %ptr
77  store volatile i64 %add11, ptr %ptr
78  store volatile i64 %add12, ptr %ptr
79  store volatile i64 %add13, ptr %ptr
80  store volatile i64 %add14, ptr %ptr
81  store volatile i64 %add15, ptr %ptr
82  ret void
83}
84
85
86; Spill all FPRs and VRs
87; CHECK-LABEL: func2
88; CHECK64: stmg	6,7,1744(4)
89; CHECK64: aghi  4,-320
90; CHECK64: std	15,{{[0-9]+}}(4)                      * 8-byte Folded Spill
91; CHECK64: std	14,{{[0-9]+}}(4)                      * 8-byte Folded Spill
92; CHECK64: std	13,{{[0-9]+}}(4)                      * 8-byte Folded Spill
93; CHECK64: std	12,{{[0-9]+}}(4)                      * 8-byte Folded Spill
94; CHECK64: std	11,{{[0-9]+}}(4)                      * 8-byte Folded Spill
95; CHECK64: std	10,{{[0-9]+}}(4)                      * 8-byte Folded Spill
96; CHECK64: std	9,{{[0-9]+}}(4)                       * 8-byte Folded Spill
97; CHECK64: std	8,{{[0-9]+}}(4)                       * 8-byte Folded Spill
98; CHECK64: vst	23,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
99; CHECK64: vst	22,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
100; CHECK64: vst	21,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
101; CHECK64: vst	20,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
102; CHECK64: vst	19,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
103; CHECK64: vst	18,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
104; CHECK64: vst	17,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
105; CHECK64: vst	16,{{[0-9]+}}(4),4                   * 16-byte Folded Spill
106
107; CHECK64: ld	15,{{[0-9]+}}(4)                      * 8-byte Folded Reload
108; CHECK64: ld	14,{{[0-9]+}}(4)                      * 8-byte Folded Reload
109; CHECK64: ld	13,{{[0-9]+}}(4)                      * 8-byte Folded Reload
110; CHECK64: ld	12,{{[0-9]+}}(4)                      * 8-byte Folded Reload
111; CHECK64: ld	11,{{[0-9]+}}(4)                      * 8-byte Folded Reload
112; CHECK64: ld	10,{{[0-9]+}}(4)                      * 8-byte Folded Reload
113; CHECK64: ld	9,{{[0-9]+}}(4)                       * 8-byte Folded Reload
114; CHECK64: ld	8,{{[0-9]+}}(4)                       * 8-byte Folded Reload
115; CHECK64: vl	23,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
116; CHECK64: vl	22,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
117; CHECK64: vl	21,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
118; CHECK64: vl	20,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
119; CHECK64: vl	19,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
120; CHECK64: vl	18,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
121; CHECK64: vl	17,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
122; CHECK64: vl	16,{{[0-9]+}}(4),4                   * 16-byte Folded Reload
123; CHECK64: lg  7,2072(4)
124; CHECK64: aghi  4,320
125; CHECK64: b 2(7)
126
127define void @func2(ptr %ptr, ptr %vec_ptr) {
128  %l00 = load volatile double, ptr %ptr
129  %l01 = load volatile double, ptr %ptr
130  %l02 = load volatile double, ptr %ptr
131  %l03 = load volatile double, ptr %ptr
132  %l04 = load volatile double, ptr %ptr
133  %l05 = load volatile double, ptr %ptr
134  %l06 = load volatile double, ptr %ptr
135  %l07 = load volatile double, ptr %ptr
136  %l08 = load volatile double, ptr %ptr
137  %l09 = load volatile double, ptr %ptr
138  %l10 = load volatile double, ptr %ptr
139  %l11 = load volatile double, ptr %ptr
140  %l12 = load volatile double, ptr %ptr
141  %l13 = load volatile double, ptr %ptr
142  %l14 = load volatile double, ptr %ptr
143  %l15 = load volatile double, ptr %ptr
144  %add00 = fadd double %l01, %l00
145  %add01 = fadd double %l01, %add00
146  %add02 = fadd double %l02, %add01
147  %add03 = fadd double %l03, %add02
148  %add04 = fadd double %l04, %add03
149  %add05 = fadd double %l05, %add04
150  %add06 = fadd double %l06, %add05
151  %add07 = fadd double %l07, %add06
152  %add08 = fadd double %l08, %add07
153  %add09 = fadd double %l09, %add08
154  %add10 = fadd double %l10, %add09
155  %add11 = fadd double %l11, %add10
156  %add12 = fadd double %l12, %add11
157  %add13 = fadd double %l13, %add12
158  %add14 = fadd double %l14, %add13
159  %add15 = fadd double %l15, %add14
160  store volatile double %add00, ptr %ptr
161  store volatile double %add01, ptr %ptr
162  store volatile double %add02, ptr %ptr
163  store volatile double %add03, ptr %ptr
164  store volatile double %add04, ptr %ptr
165  store volatile double %add05, ptr %ptr
166  store volatile double %add06, ptr %ptr
167  store volatile double %add07, ptr %ptr
168  store volatile double %add08, ptr %ptr
169  store volatile double %add09, ptr %ptr
170  store volatile double %add10, ptr %ptr
171  store volatile double %add11, ptr %ptr
172  store volatile double %add12, ptr %ptr
173  store volatile double %add13, ptr %ptr
174  store volatile double %add14, ptr %ptr
175  store volatile double %add15, ptr %ptr
176
177  %v00 = load volatile <2 x i64>, ptr %vec_ptr
178  %v01 = load volatile <2 x i64>, ptr %vec_ptr
179  %v02 = load volatile <2 x i64>, ptr %vec_ptr
180  %v03 = load volatile <2 x i64>, ptr %vec_ptr
181  %v04 = load volatile <2 x i64>, ptr %vec_ptr
182  %v05 = load volatile <2 x i64>, ptr %vec_ptr
183  %v06 = load volatile <2 x i64>, ptr %vec_ptr
184  %v07 = load volatile <2 x i64>, ptr %vec_ptr
185  %v08 = load volatile <2 x i64>, ptr %vec_ptr
186  %v09 = load volatile <2 x i64>, ptr %vec_ptr
187  %v10 = load volatile <2 x i64>, ptr %vec_ptr
188  %v11 = load volatile <2 x i64>, ptr %vec_ptr
189  %v12 = load volatile <2 x i64>, ptr %vec_ptr
190  %v13 = load volatile <2 x i64>, ptr %vec_ptr
191  %v14 = load volatile <2 x i64>, ptr %vec_ptr
192  %v15 = load volatile <2 x i64>, ptr %vec_ptr
193  %v16 = load volatile <2 x i64>, ptr %vec_ptr
194  %v17 = load volatile <2 x i64>, ptr %vec_ptr
195  %v18 = load volatile <2 x i64>, ptr %vec_ptr
196  %v19 = load volatile <2 x i64>, ptr %vec_ptr
197  %v20 = load volatile <2 x i64>, ptr %vec_ptr
198  %v21 = load volatile <2 x i64>, ptr %vec_ptr
199  %v22 = load volatile <2 x i64>, ptr %vec_ptr
200  %v23 = load volatile <2 x i64>, ptr %vec_ptr
201  %v24 = load volatile <2 x i64>, ptr %vec_ptr
202  %v25 = load volatile <2 x i64>, ptr %vec_ptr
203  %v26 = load volatile <2 x i64>, ptr %vec_ptr
204  %v27 = load volatile <2 x i64>, ptr %vec_ptr
205  %v28 = load volatile <2 x i64>, ptr %vec_ptr
206  %v29 = load volatile <2 x i64>, ptr %vec_ptr
207  %v30 = load volatile <2 x i64>, ptr %vec_ptr
208  %v31 = load volatile <2 x i64>, ptr %vec_ptr
209  %vadd00 = add <2 x i64> %v00, %v00
210  %vadd01 = add <2 x i64> %v01, %vadd00
211  %vadd02 = add <2 x i64> %v02, %vadd01
212  %vadd03 = add <2 x i64> %v03, %vadd02
213  %vadd04 = add <2 x i64> %v04, %vadd03
214  %vadd05 = add <2 x i64> %v05, %vadd04
215  %vadd06 = add <2 x i64> %v06, %vadd05
216  %vadd07 = add <2 x i64> %v07, %vadd06
217  %vadd08 = add <2 x i64> %v08, %vadd07
218  %vadd09 = add <2 x i64> %v09, %vadd08
219  %vadd10 = add <2 x i64> %v10, %vadd09
220  %vadd11 = add <2 x i64> %v11, %vadd10
221  %vadd12 = add <2 x i64> %v12, %vadd11
222  %vadd13 = add <2 x i64> %v13, %vadd12
223  %vadd14 = add <2 x i64> %v14, %vadd13
224  %vadd15 = add <2 x i64> %v15, %vadd14
225  %vadd16 = add <2 x i64> %v16, %vadd15
226  %vadd17 = add <2 x i64> %v17, %vadd16
227  %vadd18 = add <2 x i64> %v18, %vadd17
228  %vadd19 = add <2 x i64> %v19, %vadd18
229  %vadd20 = add <2 x i64> %v20, %vadd19
230  %vadd21 = add <2 x i64> %v21, %vadd20
231  %vadd22 = add <2 x i64> %v22, %vadd21
232  %vadd23 = add <2 x i64> %v23, %vadd22
233  %vadd24 = add <2 x i64> %v24, %vadd23
234  %vadd25 = add <2 x i64> %v25, %vadd24
235  %vadd26 = add <2 x i64> %v26, %vadd25
236  %vadd27 = add <2 x i64> %v27, %vadd26
237  %vadd28 = add <2 x i64> %v28, %vadd27
238  %vadd29 = add <2 x i64> %v29, %vadd28
239  %vadd30 = add <2 x i64> %v30, %vadd29
240  %vadd31 = add <2 x i64> %v31, %vadd30
241  store volatile <2 x i64> %vadd00, ptr %vec_ptr
242  store volatile <2 x i64> %vadd01, ptr %vec_ptr
243  store volatile <2 x i64> %vadd02, ptr %vec_ptr
244  store volatile <2 x i64> %vadd03, ptr %vec_ptr
245  store volatile <2 x i64> %vadd04, ptr %vec_ptr
246  store volatile <2 x i64> %vadd05, ptr %vec_ptr
247  store volatile <2 x i64> %vadd06, ptr %vec_ptr
248  store volatile <2 x i64> %vadd07, ptr %vec_ptr
249  store volatile <2 x i64> %vadd08, ptr %vec_ptr
250  store volatile <2 x i64> %vadd09, ptr %vec_ptr
251  store volatile <2 x i64> %vadd10, ptr %vec_ptr
252  store volatile <2 x i64> %vadd11, ptr %vec_ptr
253  store volatile <2 x i64> %vadd12, ptr %vec_ptr
254  store volatile <2 x i64> %vadd13, ptr %vec_ptr
255  store volatile <2 x i64> %vadd14, ptr %vec_ptr
256  store volatile <2 x i64> %vadd15, ptr %vec_ptr
257  store volatile <2 x i64> %vadd16, ptr %vec_ptr
258  store volatile <2 x i64> %vadd17, ptr %vec_ptr
259  store volatile <2 x i64> %vadd18, ptr %vec_ptr
260  store volatile <2 x i64> %vadd19, ptr %vec_ptr
261  store volatile <2 x i64> %vadd20, ptr %vec_ptr
262  store volatile <2 x i64> %vadd21, ptr %vec_ptr
263  store volatile <2 x i64> %vadd22, ptr %vec_ptr
264  store volatile <2 x i64> %vadd23, ptr %vec_ptr
265  store volatile <2 x i64> %vadd24, ptr %vec_ptr
266  store volatile <2 x i64> %vadd25, ptr %vec_ptr
267  store volatile <2 x i64> %vadd26, ptr %vec_ptr
268  store volatile <2 x i64> %vadd27, ptr %vec_ptr
269  store volatile <2 x i64> %vadd28, ptr %vec_ptr
270  store volatile <2 x i64> %vadd29, ptr %vec_ptr
271  store volatile <2 x i64> %vadd30, ptr %vec_ptr
272  store volatile <2 x i64> %vadd31, ptr %vec_ptr
273  ret void
274}
275
276; Big stack frame, force the use of agfi before stmg
277; despite not requiring stack extension routine.
278; CHECK64: agfi  4,-1040768
279; CHECK64: stmg  6,7,2064(4)
280; CHECK64: agfi  4,1040768
281define void @func3() {
282  %arr = alloca [130070 x i64], align 8
283  call i64 (ptr) @fun1(ptr %arr)
284  ret void
285}
286
287; Requires the saving of r4 due to variable sized
288; object in stack frame. (Eg: VLA) Sets up frame pointer in r8
289; CHECK64: stmg  4,10,1856(4)
290; CHECK64: aghi  4,-192
291; CHECK64: lg  6,40(5)
292; CHECK64: lg  5,32(5)
293; CHECK64: lgr     8,4
294; CHECK64: basr   7,6
295; CHECK64-NEXT: bcr     0,0
296; CHECK64: lmg  4,10,2048(4)
297define i64 @func4(i64 %n) {
298  %vla = alloca i64, i64 %n, align 8
299  %call = call i64 @fun2(i64 %n, ptr nonnull %vla, ptr nonnull %vla)
300  ret i64 %call
301}
302
303; Require saving of r4 and in addition, a displacement large enough
304; to force use of agfi before stmg.
305; CHECK64: lgr	0,4
306; CHECK64: agfi	4,-1040224
307; CHECK64: stmg  4,10,2048(4)
308; CHECK64: lgr     8,4
309; CHECK64: basr   7,6
310; CHECK64-NEXT: bcr     0,0
311; CHECK64: lmg 4,10,2048(4)
312define i64 @func5(i64 %n) {
313  %vla = alloca i64, i64 %n, align 8
314  %arr = alloca [130000 x i64], align 8
315  %call = call i64 @fun2(i64 %n, ptr nonnull %vla, ptr %arr)
316  ret i64 %call
317}
318
319; CHECK-LABEL: large_stack
320; CHECK64: agfi  4,-1048800
321; CHECK64-NEXT: llgt  3,1208
322; CHECK64-NEXT: cg  4,64(3)
323; CHECK64-NEXT: jhe
324; CHECK64: * %bb.1:
325; CHECK64: lg  3,72(3)
326; CHECK64: basr  3,3
327; CHECK64: stmg  6,7,2064(4)
328define void @large_stack0() {
329  %arr = alloca [131072 x i64], align 8
330  call i64 (ptr) @fun1(ptr %arr)
331  ret void
332}
333
334; CHECK-LABEL: large_stack1
335; CHECK64: agfi  4,-1048800
336; CHECK64: lgr 0,3
337; CHECK64: llgt  3,1208
338; CHECK64: cg  4,64(3)
339; CHECK64: jhe L#BB7_2
340; CHECK64: %bb.1:
341; CHECK64: lg  3,72(3)
342; CHECK64: basr  3,3
343; CHECK64: bcr 0,7
344; CHECK64: L#BB7_2:
345; CHECK64: stmg  6,7,2064(4)
346; CHECK64: lgr 3,0
347
348; CHECK64: L#PPA1_large_stack1_0:
349; CHECK64: .short	6  * Length/4 of Parms
350define void @large_stack1(i64 %n1, i64 %n2, i64 %n3) {
351  %arr = alloca [131072 x i64], align 8
352  call i64 (ptr, i64, i64, i64) @fun3(ptr %arr,
353            i64 %n1, i64 %n2, i64 %n3)
354  ret void
355}
356
357
358; CHECK-LABEL: large_stack2
359; CHECK64: lgr 0,4
360; CHECK64: stg 3,2192(4)
361; CHECK64: agfi  4,-1048800
362; CHECK64: llgt  3,1208
363; CHECK64: cg  4,64(3)
364; CHECK64: jhe L#BB8_2
365; CHECK64: %bb.1:
366; CHECK64: lg  3,72(3)
367; CHECK64: basr  3,3
368; CHECK64: bcr 0,7
369; CHECK64: L#BB8_2:
370; CHECK64: lgr 3,0
371; CHECK64: lg  3,2192(3)
372; CHECK64: stmg  4,12,2048(4)
373; CHECK64: lgr 8,4
374define void @large_stack2(i64 %n1, i64 %n2, i64 %n3) {
375  %arr0 = alloca [131072 x i64], align 8
376  %arr1 = alloca i64, i64 %n1, align 8
377  call i64 (ptr, ptr, i64, i64, i64) @fun4(ptr %arr0,
378            ptr %arr1, i64 %n1, i64 %n2, i64 %n3)
379  ret void
380}
381
382; CHECK-LABEL: leaf_func
383; CHECK: .long	8 * DSA Size 0x0
384; CHECK-NEXT:     * Entry Flags
385; CHECK-NEXT:     *   Bit 1: 1 = Leaf function
386; CHECK-NEXT:     *   Bit 2: 0 = Does not use alloca
387; CHECK-NOT: aghi  4,
388; CHECK-NOT: stmg
389; CHECK: agr	1,2
390; CHECK: msgr	1,3
391; CHECK: aghik	3,1,-4
392; CHECK-NOT: aghi  4,
393; CHECK-NOT: lmg
394define i64 @leaf_func0(i64 %a, i64 %b, i64 %c) {
395  %n = add i64 %a, %b
396  %m = mul i64 %n, %c
397  %o = sub i64 %m, 4
398  ret i64 %o
399}
400
401
402; =============================
403;     Tests for PPA1 Fields
404; =============================
405; CHECK-LABEL: named_func
406; CHECK: .byte	129  * PPA1 Flags 4
407; CHECK-NEXT: *   Bit 7: 1 = Name Length and Name
408define i64 @named_func(i64 %arg) {
409  %sum = add i64 1, %arg
410  ret i64 %sum
411}
412
413; CHECK-LABEL: __unnamed_1
414; CHECK: .byte	128  * PPA1 Flags 4
415; CHECK-NOT: *   Bit 7: 1 = Name Length and Name
416define void @""(ptr %p) {
417  call i64 (ptr) @fun1(ptr %p)
418  ret void
419}
420
421
422declare i64 @fun(i64 %arg0)
423declare i64 @fun1(ptr %ptr)
424declare i64 @fun2(i64 %n, ptr %arr0, ptr %arr1)
425declare i64 @fun3(ptr %ptr, i64 %n1, i64 %n2, i64 %n3)
426declare i64 @fun4(ptr %ptr0, ptr %ptr1, i64 %n1, i64 %n2, i64 %n3)
427