xref: /llvm-project/llvm/test/CodeGen/VE/Scalar/load_stk.ll (revision da5a6b2bf5746de19cb632973de42a5fb3ad26fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ve | FileCheck %s
3
4;;; Test store instructions
5;;;
6;;; Note:
7;;;   We test store instructions using general stack, stack with dynamic
8;;;   allocation, stack with dynamic allocation and alignment, and stack
9;;;   with dynamic allocation, alignment, and spill.
10;;;
11;;; Fist test using a stack for leaf function.
12;;;
13;;;   |                                              | Higher address
14;;;   |----------------------------------------------| <- old sp
15;;;   | Local variables of fixed size                |
16;;;   |----------------------------------------------| <- sp
17;;;   |                                              | Lower address
18;;;
19;;; Access local variable using sp (%s11).  In addition, please remember
20;;; that stack is aligned by 16 bytes.
21;;;
22;;; Second test using a general stack.
23;;;
24;;;   |                                              | Higher address
25;;;   |----------------------------------------------|
26;;;   | Parameter area for this function             |
27;;;   |----------------------------------------------|
28;;;   | Register save area (RSA) for this function   |
29;;;   |----------------------------------------------|
30;;;   | Return address for this function             |
31;;;   |----------------------------------------------|
32;;;   | Frame pointer for this function              |
33;;;   |----------------------------------------------| <- fp(=old sp)
34;;;   | Local variables of fixed size                |
35;;;   |----------------------------------------------|
36;;;   |.variable-sized.local.variables.(VLAs)........|
37;;;   |..............................................|
38;;;   |..............................................|
39;;;   |----------------------------------------------| <- returned by alloca
40;;;   | Parameter area for callee                    |
41;;;   |----------------------------------------------|
42;;;   | Register save area (RSA) for callee          |
43;;;   |----------------------------------------------|
44;;;   | Return address for callee                    |
45;;;   |----------------------------------------------|
46;;;   | Frame pointer for callee                     |
47;;;   |----------------------------------------------| <- sp
48;;;   |                                              | Lower address
49;;;
50;;; Access local variable using fp (%s9) since the size of VLA is not
51;;; known.  At the beginning of the functions, allocates 240 + data
52;;; bytes.  240 means RSA+RA+FP (=176) + Parameter (=64).
53;;;
54;;; Third test using a general stack.
55;;;
56;;;   |                                              | Higher address
57;;;   |----------------------------------------------|
58;;;   | Parameter area for this function             |
59;;;   |----------------------------------------------|
60;;;   | Register save area (RSA) for this function   |
61;;;   |----------------------------------------------|
62;;;   | Return address for this function             |
63;;;   |----------------------------------------------|
64;;;   | Frame pointer for this function              |
65;;;   |----------------------------------------------| <- fp(=old sp)
66;;;   |.empty.space.to.make.part.below.aligned.in....|
67;;;   |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
68;;;   |.alignment....................................|  unknown at compile time)
69;;;   |----------------------------------------------|
70;;;   | Local variables of fixed size including spill|
71;;;   | slots                                        |
72;;;   |----------------------------------------------| <- bp(not defined by ABI,
73;;;   |.variable-sized.local.variables.(VLAs)........|       LLVM chooses SX17)
74;;;   |..............................................| (size of this area is
75;;;   |..............................................|  unknown at compile time)
76;;;   |----------------------------------------------| <- stack top (returned by
77;;;   | Parameter area for callee                    |               alloca)
78;;;   |----------------------------------------------|
79;;;   | Register save area (RSA) for callee          |
80;;;   |----------------------------------------------|
81;;;   | Return address for callee                    |
82;;;   |----------------------------------------------|
83;;;   | Frame pointer for callee                     |
84;;;   |----------------------------------------------| <- sp
85;;;   |                                              | Lower address
86;;;
87;;; Access local variable using bp (%s17) since the size of alignment
88;;; and VLA are not known.  At the beginning of the functions, allocates
89;;; pad(240 + data + align) bytes.  Then, access data through bp + pad(240)
90;;; since this address doesn't change even if VLA is dynamically allocated.
91;;;
92;;; Fourth test using a general stack with some spills.
93;;;
94
95; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
96define x86_fastcallcc i64 @loadi64_stk() {
97; CHECK-LABEL: loadi64_stk:
98; CHECK:       # %bb.0:
99; CHECK-NEXT:    adds.l %s11, -16, %s11
100; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB0_2
101; CHECK-NEXT:  # %bb.1:
102; CHECK-NEXT:    ld %s61, 24(, %s14)
103; CHECK-NEXT:    or %s62, 0, %s0
104; CHECK-NEXT:    lea %s63, 315
105; CHECK-NEXT:    shm.l %s63, (%s61)
106; CHECK-NEXT:    shm.l %s8, 8(%s61)
107; CHECK-NEXT:    shm.l %s11, 16(%s61)
108; CHECK-NEXT:    monc
109; CHECK-NEXT:    or %s0, 0, %s62
110; CHECK-NEXT:  .LBB0_2:
111; CHECK-NEXT:    ld %s0, 8(, %s11)
112; CHECK-NEXT:    adds.l %s11, 16, %s11
113; CHECK-NEXT:    b.l.t (, %s10)
114  %1 = alloca i64, align 8
115  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
116  %2 = load volatile i64, ptr %1, align 8, !tbaa !3
117  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
118  ret i64 %2
119}
120
121; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
122declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
123
124; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
125declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
126
127; Function Attrs: argmemonly nofree nounwind
128define x86_fastcallcc i64 @loadi64_stk_big() {
129; CHECK-LABEL: loadi64_stk_big:
130; CHECK:       # %bb.0:
131; CHECK-NEXT:    lea %s11, -2147483648(, %s11)
132; CHECK-NEXT:    brge.l %s11, %s8, .LBB1_4
133; CHECK-NEXT:  # %bb.3:
134; CHECK-NEXT:    ld %s61, 24(, %s14)
135; CHECK-NEXT:    or %s62, 0, %s0
136; CHECK-NEXT:    lea %s63, 315
137; CHECK-NEXT:    shm.l %s63, (%s61)
138; CHECK-NEXT:    shm.l %s8, 8(%s61)
139; CHECK-NEXT:    shm.l %s11, 16(%s61)
140; CHECK-NEXT:    monc
141; CHECK-NEXT:    or %s0, 0, %s62
142; CHECK-NEXT:  .LBB1_4:
143; CHECK-NEXT:    ld %s0, 2147483640(, %s11)
144; CHECK-NEXT:    or %s1, 0, (0)1
145; CHECK-NEXT:    lea %s2, 2147483640
146; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
147; CHECK-NEXT:    ld %s3, (%s1, %s11)
148; CHECK-NEXT:    lea %s1, 8(, %s1)
149; CHECK-NEXT:    brne.l %s1, %s2, .LBB1_1
150; CHECK-NEXT:  # %bb.2:
151; CHECK-NEXT:    lea %s13, -2147483648
152; CHECK-NEXT:    and %s13, %s13, (32)0
153; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
154; CHECK-NEXT:    b.l.t (, %s10)
155  %1 = alloca i64, align 8
156  %2 = alloca [268435455 x i64], align 8
157  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
158  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %2)
159  %3 = load volatile i64, ptr %1, align 8, !tbaa !3
160  br label %5
161
1624:                                                ; preds = %5
163  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %2)
164  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
165  ret i64 %3
166
1675:                                                ; preds = %0, %5
168  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
169  %7 = getelementptr inbounds [268435455 x i64], ptr %2, i64 0, i64 %6
170  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
171  %9 = add nuw nsw i64 %6, 1
172  %10 = icmp eq i64 %9, 268435455
173  br i1 %10, label %4, label %5, !llvm.loop !7
174}
175
176; Function Attrs: argmemonly nofree nounwind
177define x86_fastcallcc i64 @loadi64_stk_big2() {
178; CHECK-LABEL: loadi64_stk_big2:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    lea %s13, 2147483632
181; CHECK-NEXT:    and %s13, %s13, (32)0
182; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
183; CHECK-NEXT:    brge.l %s11, %s8, .LBB2_4
184; CHECK-NEXT:  # %bb.3:
185; CHECK-NEXT:    ld %s61, 24(, %s14)
186; CHECK-NEXT:    or %s62, 0, %s0
187; CHECK-NEXT:    lea %s63, 315
188; CHECK-NEXT:    shm.l %s63, (%s61)
189; CHECK-NEXT:    shm.l %s8, 8(%s61)
190; CHECK-NEXT:    shm.l %s11, 16(%s61)
191; CHECK-NEXT:    monc
192; CHECK-NEXT:    or %s0, 0, %s62
193; CHECK-NEXT:  .LBB2_4:
194; CHECK-NEXT:    lea %s13, -2147483640
195; CHECK-NEXT:    and %s13, %s13, (32)0
196; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
197; CHECK-NEXT:    ld %s0, (, %s13)
198; CHECK-NEXT:    or %s1, 0, (0)1
199; CHECK-NEXT:    lea %s2, -2147483648
200; CHECK-NEXT:    and %s2, %s2, (32)0
201; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
202; CHECK-NEXT:    ld %s3, 8(%s1, %s11)
203; CHECK-NEXT:    lea %s1, 8(, %s1)
204; CHECK-NEXT:    brne.l %s1, %s2, .LBB2_1
205; CHECK-NEXT:  # %bb.2:
206; CHECK-NEXT:    lea %s13, -2147483632
207; CHECK-NEXT:    and %s13, %s13, (32)0
208; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
209; CHECK-NEXT:    b.l.t (, %s10)
210  %1 = alloca i64, align 8
211  %2 = alloca [268435456 x i64], align 8
212  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
213  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
214  %3 = load volatile i64, ptr %1, align 8, !tbaa !3
215  br label %5
216
2174:                                                ; preds = %5
218  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
219  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
220  ret i64 %3
221
2225:                                                ; preds = %0, %5
223  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
224  %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
225  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
226  %9 = add nuw nsw i64 %6, 1
227  %10 = icmp eq i64 %9, 268435456
228  br i1 %10, label %4, label %5, !llvm.loop !9
229}
230
231; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
232define x86_fastcallcc i64 @loadi64_stk_dyn(i64 noundef %0) {
233; CHECK-LABEL: loadi64_stk_dyn:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    st %s9, (, %s11)
236; CHECK-NEXT:    st %s10, 8(, %s11)
237; CHECK-NEXT:    or %s9, 0, %s11
238; CHECK-NEXT:    lea %s11, -256(, %s11)
239; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB3_2
240; CHECK-NEXT:  # %bb.1:
241; CHECK-NEXT:    ld %s61, 24(, %s14)
242; CHECK-NEXT:    or %s62, 0, %s0
243; CHECK-NEXT:    lea %s63, 315
244; CHECK-NEXT:    shm.l %s63, (%s61)
245; CHECK-NEXT:    shm.l %s8, 8(%s61)
246; CHECK-NEXT:    shm.l %s11, 16(%s61)
247; CHECK-NEXT:    monc
248; CHECK-NEXT:    or %s0, 0, %s62
249; CHECK-NEXT:  .LBB3_2:
250; CHECK-NEXT:    lea %s0, 15(, %s0)
251; CHECK-NEXT:    and %s0, -16, %s0
252; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
253; CHECK-NEXT:    and %s1, %s1, (32)0
254; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
255; CHECK-NEXT:    bsic %s10, (, %s12)
256; CHECK-NEXT:    lea %s0, 240(, %s11)
257; CHECK-NEXT:    ld %s0, (, %s0)
258; CHECK-NEXT:    ld %s0, -8(, %s9)
259; CHECK-NEXT:    or %s11, 0, %s9
260; CHECK-NEXT:    ld %s10, 8(, %s11)
261; CHECK-NEXT:    ld %s9, (, %s11)
262; CHECK-NEXT:    b.l.t (, %s10)
263  %2 = alloca i64, align 8
264  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
265  %3 = alloca i8, i64 %0, align 8
266  %4 = load volatile i64, ptr %3, align 8, !tbaa !3
267  %5 = load volatile i64, ptr %2, align 8, !tbaa !3
268  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
269  ret i64 %5
270}
271
272; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
273define x86_fastcallcc i64 @loadi64_stk_dyn_align(i64 noundef %0) {
274; CHECK-LABEL: loadi64_stk_dyn_align:
275; CHECK:       # %bb.0:
276; CHECK-NEXT:    st %s9, (, %s11)
277; CHECK-NEXT:    st %s10, 8(, %s11)
278; CHECK-NEXT:    st %s17, 40(, %s11)
279; CHECK-NEXT:    or %s9, 0, %s11
280; CHECK-NEXT:    lea %s11, -288(, %s11)
281; CHECK-NEXT:    and %s11, %s11, (59)1
282; CHECK-NEXT:    or %s17, 0, %s11
283; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB4_2
284; CHECK-NEXT:  # %bb.1:
285; CHECK-NEXT:    ld %s61, 24(, %s14)
286; CHECK-NEXT:    or %s62, 0, %s0
287; CHECK-NEXT:    lea %s63, 315
288; CHECK-NEXT:    shm.l %s63, (%s61)
289; CHECK-NEXT:    shm.l %s8, 8(%s61)
290; CHECK-NEXT:    shm.l %s11, 16(%s61)
291; CHECK-NEXT:    monc
292; CHECK-NEXT:    or %s0, 0, %s62
293; CHECK-NEXT:  .LBB4_2:
294; CHECK-NEXT:    lea %s0, 15(, %s0)
295; CHECK-NEXT:    and %s0, -16, %s0
296; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
297; CHECK-NEXT:    and %s1, %s1, (32)0
298; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
299; CHECK-NEXT:    bsic %s10, (, %s12)
300; CHECK-NEXT:    lea %s0, 240(, %s11)
301; CHECK-NEXT:    ld %s0, (, %s0)
302; CHECK-NEXT:    ld %s0, 256(, %s17)
303; CHECK-NEXT:    or %s11, 0, %s9
304; CHECK-NEXT:    ld %s17, 40(, %s11)
305; CHECK-NEXT:    ld %s10, 8(, %s11)
306; CHECK-NEXT:    ld %s9, (, %s11)
307; CHECK-NEXT:    b.l.t (, %s10)
308  %2 = alloca i64, align 32
309  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
310  %3 = alloca i8, i64 %0, align 8
311  %4 = load volatile i64, ptr %3, align 8, !tbaa !3
312  %5 = load volatile i64, ptr %2, align 32, !tbaa !10
313  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
314  ret i64 %5
315}
316
317; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
318define x86_fastcallcc i64 @loadi64_stk_dyn_align2(i64 noundef %0) {
319; CHECK-LABEL: loadi64_stk_dyn_align2:
320; CHECK:       # %bb.0:
321; CHECK-NEXT:    st %s9, (, %s11)
322; CHECK-NEXT:    st %s10, 8(, %s11)
323; CHECK-NEXT:    st %s17, 40(, %s11)
324; CHECK-NEXT:    or %s9, 0, %s11
325; CHECK-NEXT:    lea %s11, -320(, %s11)
326; CHECK-NEXT:    and %s11, %s11, (58)1
327; CHECK-NEXT:    or %s17, 0, %s11
328; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB5_2
329; CHECK-NEXT:  # %bb.1:
330; CHECK-NEXT:    ld %s61, 24(, %s14)
331; CHECK-NEXT:    or %s62, 0, %s0
332; CHECK-NEXT:    lea %s63, 315
333; CHECK-NEXT:    shm.l %s63, (%s61)
334; CHECK-NEXT:    shm.l %s8, 8(%s61)
335; CHECK-NEXT:    shm.l %s11, 16(%s61)
336; CHECK-NEXT:    monc
337; CHECK-NEXT:    or %s0, 0, %s62
338; CHECK-NEXT:  .LBB5_2:
339; CHECK-NEXT:    lea %s0, 15(, %s0)
340; CHECK-NEXT:    and %s0, -16, %s0
341; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
342; CHECK-NEXT:    and %s1, %s1, (32)0
343; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
344; CHECK-NEXT:    bsic %s10, (, %s12)
345; CHECK-NEXT:    lea %s0, 240(, %s11)
346; CHECK-NEXT:    ld %s0, (, %s0)
347; CHECK-NEXT:    ld %s0, 288(, %s17)
348; CHECK-NEXT:    ld %s1, 256(, %s17)
349; CHECK-NEXT:    or %s11, 0, %s9
350; CHECK-NEXT:    ld %s17, 40(, %s11)
351; CHECK-NEXT:    ld %s10, 8(, %s11)
352; CHECK-NEXT:    ld %s9, (, %s11)
353; CHECK-NEXT:    b.l.t (, %s10)
354  %2 = alloca i64, align 32
355  %3 = alloca i64, align 64
356  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
357  %4 = alloca i8, i64 %0, align 8
358  %5 = load volatile i64, ptr %4, align 8, !tbaa !3
359  %6 = load volatile i64, ptr %2, align 32, !tbaa !10
360  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
361  %7 = load volatile i64, ptr %3, align 64, !tbaa !10
362  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
363  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
364  ret i64 %6
365}
366
367; Function Attrs: nounwind
368define x86_fastcallcc i64 @loadi64_stk_dyn_align_spill(i64 noundef %0) {
369; CHECK-LABEL: loadi64_stk_dyn_align_spill:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    st %s9, (, %s11)
372; CHECK-NEXT:    st %s10, 8(, %s11)
373; CHECK-NEXT:    st %s17, 40(, %s11)
374; CHECK-NEXT:    or %s9, 0, %s11
375; CHECK-NEXT:    lea %s11, -288(, %s11)
376; CHECK-NEXT:    and %s11, %s11, (59)1
377; CHECK-NEXT:    or %s17, 0, %s11
378; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB6_2
379; CHECK-NEXT:  # %bb.1:
380; CHECK-NEXT:    ld %s61, 24(, %s14)
381; CHECK-NEXT:    or %s62, 0, %s0
382; CHECK-NEXT:    lea %s63, 315
383; CHECK-NEXT:    shm.l %s63, (%s61)
384; CHECK-NEXT:    shm.l %s8, 8(%s61)
385; CHECK-NEXT:    shm.l %s11, 16(%s61)
386; CHECK-NEXT:    monc
387; CHECK-NEXT:    or %s0, 0, %s62
388; CHECK-NEXT:  .LBB6_2:
389; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
390; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
391; CHECK-NEXT:    or %s18, 0, %s0
392; CHECK-NEXT:    lea %s0, 15(, %s0)
393; CHECK-NEXT:    and %s0, -16, %s0
394; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
395; CHECK-NEXT:    and %s1, %s1, (32)0
396; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
397; CHECK-NEXT:    bsic %s10, (, %s12)
398; CHECK-NEXT:    lea %s0, 240(, %s11)
399; CHECK-NEXT:    ld %s0, (, %s0)
400; CHECK-NEXT:    ld %s19, 256(, %s17)
401; CHECK-NEXT:    lea %s0, dummy@lo
402; CHECK-NEXT:    and %s0, %s0, (32)0
403; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
404; CHECK-NEXT:    bsic %s10, (, %s12)
405; CHECK-NEXT:    lea %s0, pass@lo
406; CHECK-NEXT:    and %s0, %s0, (32)0
407; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
408; CHECK-NEXT:    or %s0, 0, %s18
409; CHECK-NEXT:    bsic %s10, (, %s12)
410; CHECK-NEXT:    or %s0, 0, %s19
411; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
412; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
413; CHECK-NEXT:    or %s11, 0, %s9
414; CHECK-NEXT:    ld %s17, 40(, %s11)
415; CHECK-NEXT:    ld %s10, 8(, %s11)
416; CHECK-NEXT:    ld %s9, (, %s11)
417; CHECK-NEXT:    b.l.t (, %s10)
418  %2 = alloca i64, align 32
419  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
420  %3 = alloca i8, i64 %0, align 8
421  %4 = load volatile i64, ptr %3, align 8, !tbaa !3
422  %5 = load volatile i64, ptr %2, align 32, !tbaa !10
423  tail call void (...) @dummy()
424  tail call void @pass(i64 noundef %0)
425  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
426  ret i64 %5
427}
428
429declare void @dummy(...)
430
431declare void @pass(i64 noundef)
432
433; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
434define x86_fastcallcc fp128 @loadquad_stk() {
435; CHECK-LABEL: loadquad_stk:
436; CHECK:       # %bb.0:
437; CHECK-NEXT:    adds.l %s11, -16, %s11
438; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB7_2
439; CHECK-NEXT:  # %bb.1:
440; CHECK-NEXT:    ld %s61, 24(, %s14)
441; CHECK-NEXT:    or %s62, 0, %s0
442; CHECK-NEXT:    lea %s63, 315
443; CHECK-NEXT:    shm.l %s63, (%s61)
444; CHECK-NEXT:    shm.l %s8, 8(%s61)
445; CHECK-NEXT:    shm.l %s11, 16(%s61)
446; CHECK-NEXT:    monc
447; CHECK-NEXT:    or %s0, 0, %s62
448; CHECK-NEXT:  .LBB7_2:
449; CHECK-NEXT:    ld %s1, (, %s11)
450; CHECK-NEXT:    ld %s0, 8(, %s11)
451; CHECK-NEXT:    adds.l %s11, 16, %s11
452; CHECK-NEXT:    b.l.t (, %s10)
453  %1 = alloca fp128, align 16
454  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
455  %2 = load volatile fp128, ptr %1, align 16, !tbaa !12
456  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
457  ret fp128 %2
458}
459
460; Function Attrs: argmemonly nofree nounwind
461define x86_fastcallcc fp128 @loadquad_stk_big() {
462; CHECK-LABEL: loadquad_stk_big:
463; CHECK:       # %bb.0:
464; CHECK-NEXT:    lea %s13, 2147483632
465; CHECK-NEXT:    and %s13, %s13, (32)0
466; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
467; CHECK-NEXT:    brge.l %s11, %s8, .LBB8_4
468; CHECK-NEXT:  # %bb.3:
469; CHECK-NEXT:    ld %s61, 24(, %s14)
470; CHECK-NEXT:    or %s62, 0, %s0
471; CHECK-NEXT:    lea %s63, 315
472; CHECK-NEXT:    shm.l %s63, (%s61)
473; CHECK-NEXT:    shm.l %s8, 8(%s61)
474; CHECK-NEXT:    shm.l %s11, 16(%s61)
475; CHECK-NEXT:    monc
476; CHECK-NEXT:    or %s0, 0, %s62
477; CHECK-NEXT:  .LBB8_4:
478; CHECK-NEXT:    lea %s13, -2147483648
479; CHECK-NEXT:    and %s13, %s13, (32)0
480; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
481; CHECK-NEXT:    ld %s1, (, %s13)
482; CHECK-NEXT:    ld %s0, 8(, %s13)
483; CHECK-NEXT:    or %s2, 0, (0)1
484; CHECK-NEXT:    lea %s3, 2147483640
485; CHECK-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
486; CHECK-NEXT:    ld %s4, 8(%s2, %s11)
487; CHECK-NEXT:    lea %s2, 8(, %s2)
488; CHECK-NEXT:    brne.l %s2, %s3, .LBB8_1
489; CHECK-NEXT:  # %bb.2:
490; CHECK-NEXT:    lea %s13, -2147483632
491; CHECK-NEXT:    and %s13, %s13, (32)0
492; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
493; CHECK-NEXT:    b.l.t (, %s10)
494  %1 = alloca fp128, align 16
495  %2 = alloca [268435455 x i64], align 8
496  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
497  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %2)
498  %3 = load volatile fp128, ptr %1, align 16, !tbaa !12
499  br label %5
500
5014:                                                ; preds = %5
502  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %2)
503  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
504  ret fp128 %3
505
5065:                                                ; preds = %0, %5
507  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
508  %7 = getelementptr inbounds [268435455 x i64], ptr %2, i64 0, i64 %6
509  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
510  %9 = add nuw nsw i64 %6, 1
511  %10 = icmp eq i64 %9, 268435455
512  br i1 %10, label %4, label %5, !llvm.loop !14
513}
514
515; Function Attrs: argmemonly nofree nounwind
516define x86_fastcallcc fp128 @loadquad_stk_big2() {
517; CHECK-LABEL: loadquad_stk_big2:
518; CHECK:       # %bb.0:
519; CHECK-NEXT:    lea %s13, 2147483632
520; CHECK-NEXT:    and %s13, %s13, (32)0
521; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
522; CHECK-NEXT:    brge.l %s11, %s8, .LBB9_4
523; CHECK-NEXT:  # %bb.3:
524; CHECK-NEXT:    ld %s61, 24(, %s14)
525; CHECK-NEXT:    or %s62, 0, %s0
526; CHECK-NEXT:    lea %s63, 315
527; CHECK-NEXT:    shm.l %s63, (%s61)
528; CHECK-NEXT:    shm.l %s8, 8(%s61)
529; CHECK-NEXT:    shm.l %s11, 16(%s61)
530; CHECK-NEXT:    monc
531; CHECK-NEXT:    or %s0, 0, %s62
532; CHECK-NEXT:  .LBB9_4:
533; CHECK-NEXT:    lea %s13, -2147483648
534; CHECK-NEXT:    and %s13, %s13, (32)0
535; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
536; CHECK-NEXT:    ld %s1, (, %s13)
537; CHECK-NEXT:    ld %s0, 8(, %s13)
538; CHECK-NEXT:    or %s2, 0, (0)1
539; CHECK-NEXT:    lea %s3, -2147483648
540; CHECK-NEXT:    and %s3, %s3, (32)0
541; CHECK-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
542; CHECK-NEXT:    ld %s4, (%s2, %s11)
543; CHECK-NEXT:    lea %s2, 8(, %s2)
544; CHECK-NEXT:    brne.l %s2, %s3, .LBB9_1
545; CHECK-NEXT:  # %bb.2:
546; CHECK-NEXT:    lea %s13, -2147483632
547; CHECK-NEXT:    and %s13, %s13, (32)0
548; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
549; CHECK-NEXT:    b.l.t (, %s10)
550  %1 = alloca fp128, align 16
551  %2 = alloca [268435456 x i64], align 8
552  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
553  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
554  %3 = load volatile fp128, ptr %1, align 16, !tbaa !12
555  br label %5
556
5574:                                                ; preds = %5
558  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
559  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
560  ret fp128 %3
561
5625:                                                ; preds = %0, %5
563  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
564  %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
565  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
566  %9 = add nuw nsw i64 %6, 1
567  %10 = icmp eq i64 %9, 268435456
568  br i1 %10, label %4, label %5, !llvm.loop !15
569}
570
571; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
572define x86_fastcallcc fp128 @loadquad_stk_dyn(i64 noundef %0) {
573; CHECK-LABEL: loadquad_stk_dyn:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    st %s9, (, %s11)
576; CHECK-NEXT:    st %s10, 8(, %s11)
577; CHECK-NEXT:    or %s9, 0, %s11
578; CHECK-NEXT:    lea %s11, -256(, %s11)
579; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB10_2
580; CHECK-NEXT:  # %bb.1:
581; CHECK-NEXT:    ld %s61, 24(, %s14)
582; CHECK-NEXT:    or %s62, 0, %s0
583; CHECK-NEXT:    lea %s63, 315
584; CHECK-NEXT:    shm.l %s63, (%s61)
585; CHECK-NEXT:    shm.l %s8, 8(%s61)
586; CHECK-NEXT:    shm.l %s11, 16(%s61)
587; CHECK-NEXT:    monc
588; CHECK-NEXT:    or %s0, 0, %s62
589; CHECK-NEXT:  .LBB10_2:
590; CHECK-NEXT:    lea %s0, 15(, %s0)
591; CHECK-NEXT:    and %s0, -16, %s0
592; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
593; CHECK-NEXT:    and %s1, %s1, (32)0
594; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
595; CHECK-NEXT:    bsic %s10, (, %s12)
596; CHECK-NEXT:    lea %s0, 240(, %s11)
597; CHECK-NEXT:    ld %s1, 8(, %s0)
598; CHECK-NEXT:    ld %s0, (, %s0)
599; CHECK-NEXT:    ld %s1, -16(, %s9)
600; CHECK-NEXT:    ld %s0, -8(, %s9)
601; CHECK-NEXT:    or %s11, 0, %s9
602; CHECK-NEXT:    ld %s10, 8(, %s11)
603; CHECK-NEXT:    ld %s9, (, %s11)
604; CHECK-NEXT:    b.l.t (, %s10)
605  %2 = alloca fp128, align 16
606  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
607  %3 = alloca i8, i64 %0, align 16
608  %4 = load volatile fp128, ptr %3, align 16, !tbaa !12
609  %5 = load volatile fp128, ptr %2, align 16, !tbaa !12
610  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
611  ret fp128 %5
612}
613
614; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
615define x86_fastcallcc fp128 @loadquad_stk_dyn_align(i64 noundef %0) {
616; CHECK-LABEL: loadquad_stk_dyn_align:
617; CHECK:       # %bb.0:
618; CHECK-NEXT:    st %s9, (, %s11)
619; CHECK-NEXT:    st %s10, 8(, %s11)
620; CHECK-NEXT:    st %s17, 40(, %s11)
621; CHECK-NEXT:    or %s9, 0, %s11
622; CHECK-NEXT:    lea %s11, -288(, %s11)
623; CHECK-NEXT:    and %s11, %s11, (59)1
624; CHECK-NEXT:    or %s17, 0, %s11
625; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB11_2
626; CHECK-NEXT:  # %bb.1:
627; CHECK-NEXT:    ld %s61, 24(, %s14)
628; CHECK-NEXT:    or %s62, 0, %s0
629; CHECK-NEXT:    lea %s63, 315
630; CHECK-NEXT:    shm.l %s63, (%s61)
631; CHECK-NEXT:    shm.l %s8, 8(%s61)
632; CHECK-NEXT:    shm.l %s11, 16(%s61)
633; CHECK-NEXT:    monc
634; CHECK-NEXT:    or %s0, 0, %s62
635; CHECK-NEXT:  .LBB11_2:
636; CHECK-NEXT:    lea %s0, 15(, %s0)
637; CHECK-NEXT:    and %s0, -16, %s0
638; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
639; CHECK-NEXT:    and %s1, %s1, (32)0
640; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
641; CHECK-NEXT:    bsic %s10, (, %s12)
642; CHECK-NEXT:    lea %s0, 240(, %s11)
643; CHECK-NEXT:    ld %s1, 8(, %s0)
644; CHECK-NEXT:    ld %s0, (, %s0)
645; CHECK-NEXT:    ld %s1, 256(, %s17)
646; CHECK-NEXT:    ld %s0, 264(, %s17)
647; CHECK-NEXT:    or %s11, 0, %s9
648; CHECK-NEXT:    ld %s17, 40(, %s11)
649; CHECK-NEXT:    ld %s10, 8(, %s11)
650; CHECK-NEXT:    ld %s9, (, %s11)
651; CHECK-NEXT:    b.l.t (, %s10)
652  %2 = alloca fp128, align 32
653  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
654  %3 = alloca i8, i64 %0, align 16
655  %4 = load volatile fp128, ptr %3, align 16, !tbaa !12
656  %5 = load volatile fp128, ptr %2, align 32, !tbaa !16
657  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
658  ret fp128 %5
659}
660
661; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
662define x86_fastcallcc fp128 @loadquad_stk_dyn_align2(i64 noundef %0) {
663; CHECK-LABEL: loadquad_stk_dyn_align2:
664; CHECK:       # %bb.0:
665; CHECK-NEXT:    st %s9, (, %s11)
666; CHECK-NEXT:    st %s10, 8(, %s11)
667; CHECK-NEXT:    st %s17, 40(, %s11)
668; CHECK-NEXT:    or %s9, 0, %s11
669; CHECK-NEXT:    lea %s11, -320(, %s11)
670; CHECK-NEXT:    and %s11, %s11, (58)1
671; CHECK-NEXT:    or %s17, 0, %s11
672; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB12_2
673; CHECK-NEXT:  # %bb.1:
674; CHECK-NEXT:    ld %s61, 24(, %s14)
675; CHECK-NEXT:    or %s62, 0, %s0
676; CHECK-NEXT:    lea %s63, 315
677; CHECK-NEXT:    shm.l %s63, (%s61)
678; CHECK-NEXT:    shm.l %s8, 8(%s61)
679; CHECK-NEXT:    shm.l %s11, 16(%s61)
680; CHECK-NEXT:    monc
681; CHECK-NEXT:    or %s0, 0, %s62
682; CHECK-NEXT:  .LBB12_2:
683; CHECK-NEXT:    lea %s0, 15(, %s0)
684; CHECK-NEXT:    and %s0, -16, %s0
685; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
686; CHECK-NEXT:    and %s1, %s1, (32)0
687; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
688; CHECK-NEXT:    bsic %s10, (, %s12)
689; CHECK-NEXT:    lea %s0, 240(, %s11)
690; CHECK-NEXT:    ld %s1, 8(, %s0)
691; CHECK-NEXT:    ld %s0, (, %s0)
692; CHECK-NEXT:    ld %s1, 288(, %s17)
693; CHECK-NEXT:    ld %s0, 296(, %s17)
694; CHECK-NEXT:    ld %s3, 256(, %s17)
695; CHECK-NEXT:    ld %s2, 264(, %s17)
696; CHECK-NEXT:    or %s11, 0, %s9
697; CHECK-NEXT:    ld %s17, 40(, %s11)
698; CHECK-NEXT:    ld %s10, 8(, %s11)
699; CHECK-NEXT:    ld %s9, (, %s11)
700; CHECK-NEXT:    b.l.t (, %s10)
701  %2 = alloca fp128, align 32
702  %3 = alloca fp128, align 64
703  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
704  %4 = alloca i8, i64 %0, align 16
705  %5 = load volatile fp128, ptr %4, align 16, !tbaa !12
706  %6 = load volatile fp128, ptr %2, align 32, !tbaa !16
707  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
708  %7 = load volatile fp128, ptr %3, align 64, !tbaa !16
709  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
710  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
711  ret fp128 %6
712}
713
714; Function Attrs: nounwind
715define x86_fastcallcc fp128 @loadquad_stk_dyn_align_spill(i64 noundef %0) {
716; CHECK-LABEL: loadquad_stk_dyn_align_spill:
717; CHECK:       # %bb.0:
718; CHECK-NEXT:    st %s9, (, %s11)
719; CHECK-NEXT:    st %s10, 8(, %s11)
720; CHECK-NEXT:    st %s17, 40(, %s11)
721; CHECK-NEXT:    or %s9, 0, %s11
722; CHECK-NEXT:    lea %s11, -288(, %s11)
723; CHECK-NEXT:    and %s11, %s11, (59)1
724; CHECK-NEXT:    or %s17, 0, %s11
725; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB13_2
726; CHECK-NEXT:  # %bb.1:
727; CHECK-NEXT:    ld %s61, 24(, %s14)
728; CHECK-NEXT:    or %s62, 0, %s0
729; CHECK-NEXT:    lea %s63, 315
730; CHECK-NEXT:    shm.l %s63, (%s61)
731; CHECK-NEXT:    shm.l %s8, 8(%s61)
732; CHECK-NEXT:    shm.l %s11, 16(%s61)
733; CHECK-NEXT:    monc
734; CHECK-NEXT:    or %s0, 0, %s62
735; CHECK-NEXT:  .LBB13_2:
736; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
737; CHECK-NEXT:    st %s20, 64(, %s9) # 8-byte Folded Spill
738; CHECK-NEXT:    st %s21, 72(, %s9) # 8-byte Folded Spill
739; CHECK-NEXT:    or %s18, 0, %s0
740; CHECK-NEXT:    lea %s0, 15(, %s0)
741; CHECK-NEXT:    and %s0, -16, %s0
742; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
743; CHECK-NEXT:    and %s1, %s1, (32)0
744; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
745; CHECK-NEXT:    bsic %s10, (, %s12)
746; CHECK-NEXT:    lea %s0, 240(, %s11)
747; CHECK-NEXT:    ld %s1, 8(, %s0)
748; CHECK-NEXT:    ld %s0, (, %s0)
749; CHECK-NEXT:    ld %s21, 256(, %s17)
750; CHECK-NEXT:    ld %s20, 264(, %s17)
751; CHECK-NEXT:    lea %s0, dummy@lo
752; CHECK-NEXT:    and %s0, %s0, (32)0
753; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
754; CHECK-NEXT:    bsic %s10, (, %s12)
755; CHECK-NEXT:    lea %s0, pass@lo
756; CHECK-NEXT:    and %s0, %s0, (32)0
757; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
758; CHECK-NEXT:    or %s0, 0, %s18
759; CHECK-NEXT:    bsic %s10, (, %s12)
760; CHECK-NEXT:    or %s0, 0, %s20
761; CHECK-NEXT:    or %s1, 0, %s21
762; CHECK-NEXT:    ld %s21, 72(, %s9) # 8-byte Folded Reload
763; CHECK-NEXT:    ld %s20, 64(, %s9) # 8-byte Folded Reload
764; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
765; CHECK-NEXT:    or %s11, 0, %s9
766; CHECK-NEXT:    ld %s17, 40(, %s11)
767; CHECK-NEXT:    ld %s10, 8(, %s11)
768; CHECK-NEXT:    ld %s9, (, %s11)
769; CHECK-NEXT:    b.l.t (, %s10)
770  %2 = alloca fp128, align 32
771  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
772  %3 = alloca i8, i64 %0, align 16
773  %4 = load volatile fp128, ptr %3, align 16, !tbaa !12
774  %5 = load volatile fp128, ptr %2, align 32, !tbaa !16
775  tail call void (...) @dummy()
776  tail call void @pass(i64 noundef %0)
777  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
778  ret fp128 %5
779}
780
781!3 = !{!4, !4, i64 0}
782!4 = !{!"long", !5, i64 0}
783!5 = !{!"omnipotent char", !6, i64 0}
784!6 = !{!"Simple C/C++ TBAA"}
785!7 = distinct !{!7, !8}
786!8 = !{!"llvm.loop.mustprogress"}
787!9 = distinct !{!9, !8}
788!10 = !{!11, !4, i64 0}
789!11 = !{!"", !4, i64 0}
790!12 = !{!13, !13, i64 0}
791!13 = !{!"long double", !5, i64 0}
792!14 = distinct !{!14, !8}
793!15 = distinct !{!15, !8}
794!16 = !{!17, !13, i64 0}
795!17 = !{!"", !13, i64 0}
796