xref: /llvm-project/llvm/test/CodeGen/VE/Vector/store_stk_stvm.ll (revision 469044cfd355d34573643a57b5d2a78a9c341327)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
3
4;;; Test store instructions
5;;;
6;;; Note:
7;;;   We test store instructions using general stack, stack with dynamic
8;;;   allocation, stack with dynamic allocation and alignment, and stack
9;;;   with dynamic allocation, alignment, and spill.
10;;;
11;;; Fist test using a stack for leaf function.
12;;;
13;;;   |                                              | Higher address
14;;;   |----------------------------------------------| <- old sp
15;;;   | Local variables of fixed size                |
16;;;   |----------------------------------------------| <- sp
17;;;   |                                              | Lower address
18;;;
19;;; Access local variable using sp (%s11).  In addition, please remember
20;;; that stack is aligned by 16 bytes.
21;;;
22;;; Second test using a general stack.
23;;;
24;;;   |                                              | Higher address
25;;;   |----------------------------------------------|
26;;;   | Parameter area for this function             |
27;;;   |----------------------------------------------|
28;;;   | Register save area (RSA) for this function   |
29;;;   |----------------------------------------------|
30;;;   | Return address for this function             |
31;;;   |----------------------------------------------|
32;;;   | Frame pointer for this function              |
33;;;   |----------------------------------------------| <- fp(=old sp)
34;;;   | Local variables of fixed size                |
35;;;   |----------------------------------------------|
36;;;   |.variable-sized.local.variables.(VLAs)........|
37;;;   |..............................................|
38;;;   |..............................................|
39;;;   |----------------------------------------------| <- returned by alloca
40;;;   | Parameter area for callee                    |
41;;;   |----------------------------------------------|
42;;;   | Register save area (RSA) for callee          |
43;;;   |----------------------------------------------|
44;;;   | Return address for callee                    |
45;;;   |----------------------------------------------|
46;;;   | Frame pointer for callee                     |
47;;;   |----------------------------------------------| <- sp
48;;;   |                                              | Lower address
49;;;
50;;; Access local variable using fp (%s9) since the size of VLA is not
51;;; known.  At the beginning of the functions, allocates 240 + data
52;;; bytes.  240 means RSA+RA+FP (=176) + Parameter (=64).
53;;;
54;;; Third test using a general stack.
55;;;
56;;;   |                                              | Higher address
57;;;   |----------------------------------------------|
58;;;   | Parameter area for this function             |
59;;;   |----------------------------------------------|
60;;;   | Register save area (RSA) for this function   |
61;;;   |----------------------------------------------|
62;;;   | Return address for this function             |
63;;;   |----------------------------------------------|
64;;;   | Frame pointer for this function              |
65;;;   |----------------------------------------------| <- fp(=old sp)
66;;;   |.empty.space.to.make.part.below.aligned.in....|
67;;;   |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
68;;;   |.alignment....................................|  unknown at compile time)
69;;;   |----------------------------------------------|
70;;;   | Local variables of fixed size including spill|
71;;;   | slots                                        |
72;;;   |----------------------------------------------| <- bp(not defined by ABI,
73;;;   |.variable-sized.local.variables.(VLAs)........|       LLVM chooses SX17)
74;;;   |..............................................| (size of this area is
75;;;   |..............................................|  unknown at compile time)
76;;;   |----------------------------------------------| <- stack top (returned by
77;;;   | Parameter area for callee                    |               alloca)
78;;;   |----------------------------------------------|
79;;;   | Register save area (RSA) for callee          |
80;;;   |----------------------------------------------|
81;;;   | Return address for callee                    |
82;;;   |----------------------------------------------|
83;;;   | Frame pointer for callee                     |
84;;;   |----------------------------------------------| <- sp
85;;;   |                                              | Lower address
86;;;
87;;; Access local variable using bp (%s17) since the size of alignment
88;;; and VLA are not known.  At the beginning of the functions, allocates
89;;; pad(240 + data + align) bytes.  Then, access data through bp + pad(240)
90;;; since this address doesn't change even if VLA is dynamically allocated.
91;;;
92;;; Fourth test using a general stack with some spills.
93;;;
94
95; Function Attrs: argmemonly nofree nounwind
96define fastcc void @store__vm256_stk(<256 x i1> noundef %0) {
97; CHECK-LABEL: store__vm256_stk:
98; CHECK:       # %bb.0:
99; CHECK-NEXT:    st %s9, (, %s11)
100; CHECK-NEXT:    st %s10, 8(, %s11)
101; CHECK-NEXT:    or %s9, 0, %s11
102; CHECK-NEXT:    lea %s11, -224(, %s11)
103; CHECK-NEXT:    and %s11, %s11, (59)1
104; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB0_2
105; CHECK-NEXT:  # %bb.1:
106; CHECK-NEXT:    ld %s61, 24(, %s14)
107; CHECK-NEXT:    or %s62, 0, %s0
108; CHECK-NEXT:    lea %s63, 315
109; CHECK-NEXT:    shm.l %s63, (%s61)
110; CHECK-NEXT:    shm.l %s8, 8(%s61)
111; CHECK-NEXT:    shm.l %s11, 16(%s61)
112; CHECK-NEXT:    monc
113; CHECK-NEXT:    or %s0, 0, %s62
114; CHECK-NEXT:  .LBB0_2:
115; CHECK-NEXT:    svm %s16, %vm1, 0
116; CHECK-NEXT:    st %s16, 192(, %s11)
117; CHECK-NEXT:    svm %s16, %vm1, 1
118; CHECK-NEXT:    st %s16, 200(, %s11)
119; CHECK-NEXT:    svm %s16, %vm1, 2
120; CHECK-NEXT:    st %s16, 208(, %s11)
121; CHECK-NEXT:    svm %s16, %vm1, 3
122; CHECK-NEXT:    st %s16, 216(, %s11)
123; CHECK-NEXT:    or %s11, 0, %s9
124; CHECK-NEXT:    ld %s10, 8(, %s11)
125; CHECK-NEXT:    ld %s9, (, %s11)
126; CHECK-NEXT:    b.l.t (, %s10)
127  %2 = alloca <256 x i1>, align 32
128  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
129  store volatile <256 x i1> %0, ptr %2, align 32, !tbaa !3
130  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
131  ret void
132}
133
134; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
135declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
136
137; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
138declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
139
140; Function Attrs: argmemonly nofree nounwind
141define fastcc void @store__vm256_stk_big_fit(<256 x i1> noundef %0, i64 noundef %1) {
142; CHECK-LABEL: store__vm256_stk_big_fit:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    st %s9, (, %s11)
145; CHECK-NEXT:    st %s10, 8(, %s11)
146; CHECK-NEXT:    or %s9, 0, %s11
147; CHECK-NEXT:    lea %s11, -2147483648(, %s11)
148; CHECK-NEXT:    and %s11, %s11, (59)1
149; CHECK-NEXT:    brge.l %s11, %s8, .LBB1_4
150; CHECK-NEXT:  # %bb.3:
151; CHECK-NEXT:    ld %s61, 24(, %s14)
152; CHECK-NEXT:    or %s62, 0, %s0
153; CHECK-NEXT:    lea %s63, 315
154; CHECK-NEXT:    shm.l %s63, (%s61)
155; CHECK-NEXT:    shm.l %s8, 8(%s61)
156; CHECK-NEXT:    shm.l %s11, 16(%s61)
157; CHECK-NEXT:    monc
158; CHECK-NEXT:    or %s0, 0, %s62
159; CHECK-NEXT:  .LBB1_4:
160; CHECK-NEXT:    svm %s16, %vm1, 0
161; CHECK-NEXT:    st %s16, 2147483616(, %s11)
162; CHECK-NEXT:    svm %s16, %vm1, 1
163; CHECK-NEXT:    st %s16, 2147483624(, %s11)
164; CHECK-NEXT:    svm %s16, %vm1, 2
165; CHECK-NEXT:    st %s16, 2147483632(, %s11)
166; CHECK-NEXT:    svm %s16, %vm1, 3
167; CHECK-NEXT:    st %s16, 2147483640(, %s11)
168; CHECK-NEXT:    or %s1, 0, (0)1
169; CHECK-NEXT:    lea %s2, 2147483424
170; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
171; CHECK-NEXT:    st %s0, 192(%s1, %s11)
172; CHECK-NEXT:    lea %s1, 8(, %s1)
173; CHECK-NEXT:    brne.l %s1, %s2, .LBB1_1
174; CHECK-NEXT:  # %bb.2:
175; CHECK-NEXT:    or %s11, 0, %s9
176; CHECK-NEXT:    ld %s10, 8(, %s11)
177; CHECK-NEXT:    ld %s9, (, %s11)
178; CHECK-NEXT:    b.l.t (, %s10)
179  %3 = alloca <256 x i1>, align 32
180  %4 = alloca [268435428 x i64], align 8
181  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
182  call void @llvm.lifetime.start.p0(i64 2147483424, ptr nonnull %4)
183  store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3
184  br label %6
185
1865:                                                ; preds = %6
187  call void @llvm.lifetime.end.p0(i64 2147483424, ptr nonnull %4)
188  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
189  ret void
190
1916:                                                ; preds = %2, %6
192  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
193  %8 = getelementptr inbounds [268435428 x i64], ptr %4, i64 0, i64 %7
194  store volatile i64 %1, ptr %8, align 8, !tbaa !6
195  %9 = add nuw nsw i64 %7, 1
196  %10 = icmp eq i64 %9, 268435428
197  br i1 %10, label %5, label %6, !llvm.loop !8
198}
199
200; Function Attrs: argmemonly nofree nounwind
201define fastcc void @store__vm256_stk_big(<256 x i1> noundef %0, i64 noundef %1) {
202; CHECK-LABEL: store__vm256_stk_big:
203; CHECK:       # %bb.0:
204; CHECK-NEXT:    st %s9, (, %s11)
205; CHECK-NEXT:    st %s10, 8(, %s11)
206; CHECK-NEXT:    or %s9, 0, %s11
207; CHECK-NEXT:    lea %s13, 2147483616
208; CHECK-NEXT:    and %s13, %s13, (32)0
209; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
210; CHECK-NEXT:    and %s11, %s11, (59)1
211; CHECK-NEXT:    brge.l %s11, %s8, .LBB2_4
212; CHECK-NEXT:  # %bb.3:
213; CHECK-NEXT:    ld %s61, 24(, %s14)
214; CHECK-NEXT:    or %s62, 0, %s0
215; CHECK-NEXT:    lea %s63, 315
216; CHECK-NEXT:    shm.l %s63, (%s61)
217; CHECK-NEXT:    shm.l %s8, 8(%s61)
218; CHECK-NEXT:    shm.l %s11, 16(%s61)
219; CHECK-NEXT:    monc
220; CHECK-NEXT:    or %s0, 0, %s62
221; CHECK-NEXT:  .LBB2_4:
222; CHECK-NEXT:    lea %s13, -2147483648
223; CHECK-NEXT:    and %s13, %s13, (32)0
224; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
225; CHECK-NEXT:    svm %s16, %vm1, 0
226; CHECK-NEXT:    st %s16, (, %s13)
227; CHECK-NEXT:    svm %s16, %vm1, 1
228; CHECK-NEXT:    st %s16, 8(, %s13)
229; CHECK-NEXT:    svm %s16, %vm1, 2
230; CHECK-NEXT:    st %s16, 16(, %s13)
231; CHECK-NEXT:    svm %s16, %vm1, 3
232; CHECK-NEXT:    st %s16, 24(, %s13)
233; CHECK-NEXT:    or %s1, 0, (0)1
234; CHECK-NEXT:    lea %s2, 2147483432
235; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
236; CHECK-NEXT:    st %s0, 216(%s1, %s11)
237; CHECK-NEXT:    lea %s1, 8(, %s1)
238; CHECK-NEXT:    brne.l %s1, %s2, .LBB2_1
239; CHECK-NEXT:  # %bb.2:
240; CHECK-NEXT:    or %s11, 0, %s9
241; CHECK-NEXT:    ld %s10, 8(, %s11)
242; CHECK-NEXT:    ld %s9, (, %s11)
243; CHECK-NEXT:    b.l.t (, %s10)
244  %3 = alloca <256 x i1>, align 32
245  %4 = alloca [268435429 x i64], align 8
246  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
247  call void @llvm.lifetime.start.p0(i64 2147483432, ptr nonnull %4)
248  store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3
249  br label %6
250
2515:                                                ; preds = %6
252  call void @llvm.lifetime.end.p0(i64 2147483432, ptr nonnull %4)
253  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
254  ret void
255
2566:                                                ; preds = %2, %6
257  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
258  %8 = getelementptr inbounds [268435429 x i64], ptr %4, i64 0, i64 %7
259  store volatile i64 %1, ptr %8, align 8, !tbaa !6
260  %9 = add nuw nsw i64 %7, 1
261  %10 = icmp eq i64 %9, 268435429
262  br i1 %10, label %5, label %6, !llvm.loop !8
263}
264
265; Function Attrs: argmemonly nofree nounwind
266define fastcc void @store__vm256_stk_big2(<256 x i1> noundef %0, i64 noundef %1) {
267; CHECK-LABEL: store__vm256_stk_big2:
268; CHECK:       # %bb.0:
269; CHECK-NEXT:    st %s9, (, %s11)
270; CHECK-NEXT:    st %s10, 8(, %s11)
271; CHECK-NEXT:    or %s9, 0, %s11
272; CHECK-NEXT:    lea %s13, 2147483424
273; CHECK-NEXT:    and %s13, %s13, (32)0
274; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
275; CHECK-NEXT:    and %s11, %s11, (59)1
276; CHECK-NEXT:    brge.l %s11, %s8, .LBB3_4
277; CHECK-NEXT:  # %bb.3:
278; CHECK-NEXT:    ld %s61, 24(, %s14)
279; CHECK-NEXT:    or %s62, 0, %s0
280; CHECK-NEXT:    lea %s63, 315
281; CHECK-NEXT:    shm.l %s63, (%s61)
282; CHECK-NEXT:    shm.l %s8, 8(%s61)
283; CHECK-NEXT:    shm.l %s11, 16(%s61)
284; CHECK-NEXT:    monc
285; CHECK-NEXT:    or %s0, 0, %s62
286; CHECK-NEXT:  .LBB3_4:
287; CHECK-NEXT:    lea %s13, -2147483456
288; CHECK-NEXT:    and %s13, %s13, (32)0
289; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
290; CHECK-NEXT:    svm %s16, %vm1, 0
291; CHECK-NEXT:    st %s16, (, %s13)
292; CHECK-NEXT:    svm %s16, %vm1, 1
293; CHECK-NEXT:    st %s16, 8(, %s13)
294; CHECK-NEXT:    svm %s16, %vm1, 2
295; CHECK-NEXT:    st %s16, 16(, %s13)
296; CHECK-NEXT:    svm %s16, %vm1, 3
297; CHECK-NEXT:    st %s16, 24(, %s13)
298; CHECK-NEXT:    or %s1, 0, (0)1
299; CHECK-NEXT:    lea %s2, -2147483648
300; CHECK-NEXT:    and %s2, %s2, (32)0
301; CHECK-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
302; CHECK-NEXT:    st %s0, 192(%s1, %s11)
303; CHECK-NEXT:    lea %s1, 8(, %s1)
304; CHECK-NEXT:    brne.l %s1, %s2, .LBB3_1
305; CHECK-NEXT:  # %bb.2:
306; CHECK-NEXT:    or %s11, 0, %s9
307; CHECK-NEXT:    ld %s10, 8(, %s11)
308; CHECK-NEXT:    ld %s9, (, %s11)
309; CHECK-NEXT:    b.l.t (, %s10)
310  %3 = alloca <256 x i1>, align 32
311  %4 = alloca [268435456 x i64], align 8
312  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
313  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4)
314  store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3
315  br label %6
316
3175:                                                ; preds = %6
318  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4)
319  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
320  ret void
321
3226:                                                ; preds = %2, %6
323  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
324  %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7
325  store volatile i64 %1, ptr %8, align 8, !tbaa !6
326  %9 = add nuw nsw i64 %7, 1
327  %10 = icmp eq i64 %9, 268435456
328  br i1 %10, label %5, label %6, !llvm.loop !10
329}
330
331; Function Attrs: argmemonly nofree nounwind
332define fastcc void @store__vm256_stk_dyn(<256 x i1> noundef %0, i64 noundef %1) {
333; CHECK-LABEL: store__vm256_stk_dyn:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    st %s9, (, %s11)
336; CHECK-NEXT:    st %s10, 8(, %s11)
337; CHECK-NEXT:    or %s9, 0, %s11
338; CHECK-NEXT:    lea %s11, -272(, %s11)
339; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB4_2
340; CHECK-NEXT:  # %bb.1:
341; CHECK-NEXT:    ld %s61, 24(, %s14)
342; CHECK-NEXT:    or %s62, 0, %s0
343; CHECK-NEXT:    lea %s63, 315
344; CHECK-NEXT:    shm.l %s63, (%s61)
345; CHECK-NEXT:    shm.l %s8, 8(%s61)
346; CHECK-NEXT:    shm.l %s11, 16(%s61)
347; CHECK-NEXT:    monc
348; CHECK-NEXT:    or %s0, 0, %s62
349; CHECK-NEXT:  .LBB4_2:
350; CHECK-NEXT:    sll %s0, %s0, 5
351; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
352; CHECK-NEXT:    and %s1, %s1, (32)0
353; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
354; CHECK-NEXT:    bsic %s10, (, %s12)
355; CHECK-NEXT:    lea %s0, 240(, %s11)
356; CHECK-NEXT:    svm %s1, %vm1, 3
357; CHECK-NEXT:    st %s1, 24(, %s0)
358; CHECK-NEXT:    svm %s1, %vm1, 2
359; CHECK-NEXT:    st %s1, 16(, %s0)
360; CHECK-NEXT:    svm %s1, %vm1, 1
361; CHECK-NEXT:    st %s1, 8(, %s0)
362; CHECK-NEXT:    svm %s1, %vm1, 0
363; CHECK-NEXT:    st %s1, (, %s0)
364; CHECK-NEXT:    svm %s16, %vm1, 0
365; CHECK-NEXT:    st %s16, -32(, %s9)
366; CHECK-NEXT:    svm %s16, %vm1, 1
367; CHECK-NEXT:    st %s16, -24(, %s9)
368; CHECK-NEXT:    svm %s16, %vm1, 2
369; CHECK-NEXT:    st %s16, -16(, %s9)
370; CHECK-NEXT:    svm %s16, %vm1, 3
371; CHECK-NEXT:    st %s16, -8(, %s9)
372; CHECK-NEXT:    or %s11, 0, %s9
373; CHECK-NEXT:    ld %s10, 8(, %s11)
374; CHECK-NEXT:    ld %s9, (, %s11)
375; CHECK-NEXT:    b.l.t (, %s10)
376  %3 = alloca <256 x i1>, align 8
377  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
378  %4 = alloca <256 x i1>, i64 %1, align 8
379  store volatile <256 x i1> %0, ptr %4, align 32, !tbaa !3
380  store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3
381  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
382  ret void
383}
384
385; Function Attrs: argmemonly nofree nounwind
386define fastcc void @store__vm256_stk_dyn_align(<256 x i1> noundef %0, i64 noundef %1) {
387; CHECK-LABEL: store__vm256_stk_dyn_align:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    st %s9, (, %s11)
390; CHECK-NEXT:    st %s10, 8(, %s11)
391; CHECK-NEXT:    st %s17, 40(, %s11)
392; CHECK-NEXT:    or %s9, 0, %s11
393; CHECK-NEXT:    lea %s11, -288(, %s11)
394; CHECK-NEXT:    and %s11, %s11, (59)1
395; CHECK-NEXT:    or %s17, 0, %s11
396; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB5_2
397; CHECK-NEXT:  # %bb.1:
398; CHECK-NEXT:    ld %s61, 24(, %s14)
399; CHECK-NEXT:    or %s62, 0, %s0
400; CHECK-NEXT:    lea %s63, 315
401; CHECK-NEXT:    shm.l %s63, (%s61)
402; CHECK-NEXT:    shm.l %s8, 8(%s61)
403; CHECK-NEXT:    shm.l %s11, 16(%s61)
404; CHECK-NEXT:    monc
405; CHECK-NEXT:    or %s0, 0, %s62
406; CHECK-NEXT:  .LBB5_2:
407; CHECK-NEXT:    sll %s0, %s0, 5
408; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
409; CHECK-NEXT:    and %s1, %s1, (32)0
410; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
411; CHECK-NEXT:    bsic %s10, (, %s12)
412; CHECK-NEXT:    lea %s0, 240(, %s11)
413; CHECK-NEXT:    svm %s1, %vm1, 3
414; CHECK-NEXT:    st %s1, 24(, %s0)
415; CHECK-NEXT:    svm %s1, %vm1, 2
416; CHECK-NEXT:    st %s1, 16(, %s0)
417; CHECK-NEXT:    svm %s1, %vm1, 1
418; CHECK-NEXT:    st %s1, 8(, %s0)
419; CHECK-NEXT:    svm %s1, %vm1, 0
420; CHECK-NEXT:    st %s1, (, %s0)
421; CHECK-NEXT:    svm %s16, %vm1, 0
422; CHECK-NEXT:    st %s16, 256(, %s17)
423; CHECK-NEXT:    svm %s16, %vm1, 1
424; CHECK-NEXT:    st %s16, 264(, %s17)
425; CHECK-NEXT:    svm %s16, %vm1, 2
426; CHECK-NEXT:    st %s16, 272(, %s17)
427; CHECK-NEXT:    svm %s16, %vm1, 3
428; CHECK-NEXT:    st %s16, 280(, %s17)
429; CHECK-NEXT:    or %s11, 0, %s9
430; CHECK-NEXT:    ld %s17, 40(, %s11)
431; CHECK-NEXT:    ld %s10, 8(, %s11)
432; CHECK-NEXT:    ld %s9, (, %s11)
433; CHECK-NEXT:    b.l.t (, %s10)
434  %3 = alloca <256 x i1>, align 32
435  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
436  %4 = alloca <256 x i1>, i64 %1, align 8
437  store volatile <256 x i1> %0, ptr %4, align 32, !tbaa !3
438  store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3
439  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
440  ret void
441}
442
443; Function Attrs: argmemonly nofree nounwind
444define fastcc void @store__vm256_stk_dyn_align2(<256 x i1> noundef %0, i64 noundef %1) {
445; CHECK-LABEL: store__vm256_stk_dyn_align2:
446; CHECK:       # %bb.0:
447; CHECK-NEXT:    st %s9, (, %s11)
448; CHECK-NEXT:    st %s10, 8(, %s11)
449; CHECK-NEXT:    st %s17, 40(, %s11)
450; CHECK-NEXT:    or %s9, 0, %s11
451; CHECK-NEXT:    lea %s11, -320(, %s11)
452; CHECK-NEXT:    and %s11, %s11, (58)1
453; CHECK-NEXT:    or %s17, 0, %s11
454; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB6_2
455; CHECK-NEXT:  # %bb.1:
456; CHECK-NEXT:    ld %s61, 24(, %s14)
457; CHECK-NEXT:    or %s62, 0, %s0
458; CHECK-NEXT:    lea %s63, 315
459; CHECK-NEXT:    shm.l %s63, (%s61)
460; CHECK-NEXT:    shm.l %s8, 8(%s61)
461; CHECK-NEXT:    shm.l %s11, 16(%s61)
462; CHECK-NEXT:    monc
463; CHECK-NEXT:    or %s0, 0, %s62
464; CHECK-NEXT:  .LBB6_2:
465; CHECK-NEXT:    sll %s0, %s0, 5
466; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
467; CHECK-NEXT:    and %s1, %s1, (32)0
468; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
469; CHECK-NEXT:    bsic %s10, (, %s12)
470; CHECK-NEXT:    lea %s0, 240(, %s11)
471; CHECK-NEXT:    svm %s1, %vm1, 3
472; CHECK-NEXT:    st %s1, 24(, %s0)
473; CHECK-NEXT:    svm %s1, %vm1, 2
474; CHECK-NEXT:    st %s1, 16(, %s0)
475; CHECK-NEXT:    svm %s1, %vm1, 1
476; CHECK-NEXT:    st %s1, 8(, %s0)
477; CHECK-NEXT:    svm %s1, %vm1, 0
478; CHECK-NEXT:    st %s1, (, %s0)
479; CHECK-NEXT:    svm %s16, %vm1, 0
480; CHECK-NEXT:    st %s16, 288(, %s17)
481; CHECK-NEXT:    svm %s16, %vm1, 1
482; CHECK-NEXT:    st %s16, 296(, %s17)
483; CHECK-NEXT:    svm %s16, %vm1, 2
484; CHECK-NEXT:    st %s16, 304(, %s17)
485; CHECK-NEXT:    svm %s16, %vm1, 3
486; CHECK-NEXT:    st %s16, 312(, %s17)
487; CHECK-NEXT:    svm %s16, %vm1, 0
488; CHECK-NEXT:    st %s16, 256(, %s17)
489; CHECK-NEXT:    svm %s16, %vm1, 1
490; CHECK-NEXT:    st %s16, 264(, %s17)
491; CHECK-NEXT:    svm %s16, %vm1, 2
492; CHECK-NEXT:    st %s16, 272(, %s17)
493; CHECK-NEXT:    svm %s16, %vm1, 3
494; CHECK-NEXT:    st %s16, 280(, %s17)
495; CHECK-NEXT:    or %s11, 0, %s9
496; CHECK-NEXT:    ld %s17, 40(, %s11)
497; CHECK-NEXT:    ld %s10, 8(, %s11)
498; CHECK-NEXT:    ld %s9, (, %s11)
499; CHECK-NEXT:    b.l.t (, %s10)
500  %3 = alloca <256 x i1>, align 32
501  %4 = alloca <256 x i1>, align 64
502  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
503  %5 = alloca <256 x i1>, i64 %1, align 8
504  store volatile <256 x i1> %0, ptr %5, align 32, !tbaa !3
505  store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3
506  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %4)
507  store volatile <256 x i1> %0, ptr %4, align 64, !tbaa !3
508  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %4)
509  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
510  ret void
511}
512
513; Function Attrs: nounwind
514define fastcc void @store__vm256_stk_dyn_align_spill(<256 x i1> noundef %0, i64 noundef %1) {
515; CHECK-LABEL: store__vm256_stk_dyn_align_spill:
516; CHECK:       # %bb.0:
517; CHECK-NEXT:    st %s9, (, %s11)
518; CHECK-NEXT:    st %s10, 8(, %s11)
519; CHECK-NEXT:    st %s17, 40(, %s11)
520; CHECK-NEXT:    or %s9, 0, %s11
521; CHECK-NEXT:    lea %s11, -320(, %s11)
522; CHECK-NEXT:    and %s11, %s11, (59)1
523; CHECK-NEXT:    or %s17, 0, %s11
524; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB7_2
525; CHECK-NEXT:  # %bb.1:
526; CHECK-NEXT:    ld %s61, 24(, %s14)
527; CHECK-NEXT:    or %s62, 0, %s0
528; CHECK-NEXT:    lea %s63, 315
529; CHECK-NEXT:    shm.l %s63, (%s61)
530; CHECK-NEXT:    shm.l %s8, 8(%s61)
531; CHECK-NEXT:    shm.l %s11, 16(%s61)
532; CHECK-NEXT:    monc
533; CHECK-NEXT:    or %s0, 0, %s62
534; CHECK-NEXT:  .LBB7_2:
535; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
536; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
537; CHECK-NEXT:    or %s18, 0, %s0
538; CHECK-NEXT:    svm %s16, %vm1, 0
539; CHECK-NEXT:    st %s16, 256(, %s17)
540; CHECK-NEXT:    svm %s16, %vm1, 1
541; CHECK-NEXT:    st %s16, 264(, %s17)
542; CHECK-NEXT:    svm %s16, %vm1, 2
543; CHECK-NEXT:    st %s16, 272(, %s17)
544; CHECK-NEXT:    svm %s16, %vm1, 3
545; CHECK-NEXT:    st %s16, 280(, %s17) # 32-byte Folded Spill
546; CHECK-NEXT:    sll %s0, %s0, 5
547; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
548; CHECK-NEXT:    and %s1, %s1, (32)0
549; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
550; CHECK-NEXT:    bsic %s10, (, %s12)
551; CHECK-NEXT:    lea %s19, 240(, %s11)
552; CHECK-NEXT:    lea %s0, dummy@lo
553; CHECK-NEXT:    and %s0, %s0, (32)0
554; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
555; CHECK-NEXT:    bsic %s10, (, %s12)
556; CHECK-NEXT:    lea %s0, pass@lo
557; CHECK-NEXT:    and %s0, %s0, (32)0
558; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
559; CHECK-NEXT:    or %s0, 0, %s18
560; CHECK-NEXT:    bsic %s10, (, %s12)
561; CHECK-NEXT:    ld %s16, 256(, %s17)
562; CHECK-NEXT:    lvm %vm1, 0, %s16
563; CHECK-NEXT:    ld %s16, 264(, %s17)
564; CHECK-NEXT:    lvm %vm1, 1, %s16
565; CHECK-NEXT:    ld %s16, 272(, %s17)
566; CHECK-NEXT:    lvm %vm1, 2, %s16
567; CHECK-NEXT:    ld %s16, 280(, %s17) # 32-byte Folded Reload
568; CHECK-NEXT:    lvm %vm1, 3, %s16
569; CHECK-NEXT:    svm %s0, %vm1, 3
570; CHECK-NEXT:    st %s0, 24(, %s19)
571; CHECK-NEXT:    svm %s0, %vm1, 2
572; CHECK-NEXT:    st %s0, 16(, %s19)
573; CHECK-NEXT:    svm %s0, %vm1, 1
574; CHECK-NEXT:    st %s0, 8(, %s19)
575; CHECK-NEXT:    svm %s0, %vm1, 0
576; CHECK-NEXT:    st %s0, (, %s19)
577; CHECK-NEXT:    svm %s16, %vm1, 0
578; CHECK-NEXT:    st %s16, 288(, %s17)
579; CHECK-NEXT:    svm %s16, %vm1, 1
580; CHECK-NEXT:    st %s16, 296(, %s17)
581; CHECK-NEXT:    svm %s16, %vm1, 2
582; CHECK-NEXT:    st %s16, 304(, %s17)
583; CHECK-NEXT:    svm %s16, %vm1, 3
584; CHECK-NEXT:    st %s16, 312(, %s17)
585; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
586; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
587; CHECK-NEXT:    or %s11, 0, %s9
588; CHECK-NEXT:    ld %s17, 40(, %s11)
589; CHECK-NEXT:    ld %s10, 8(, %s11)
590; CHECK-NEXT:    ld %s9, (, %s11)
591; CHECK-NEXT:    b.l.t (, %s10)
592  %3 = alloca <256 x i1>, align 32
593  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
594  %4 = alloca <256 x i1>, i64 %1, align 8
595  tail call fastcc void @dummy()
596  tail call fastcc void @pass(i64 noundef %1)
597  store volatile <256 x i1> %0, ptr %4, align 32, !tbaa !3
598  store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3
599  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
600  ret void
601}
602
603declare fastcc void @dummy()
604
605declare fastcc void @pass(i64 noundef)
606
607; Function Attrs: argmemonly nofree nounwind
608define fastcc void @store__vm512_stk(<512 x i1> noundef %0) {
609; CHECK-LABEL: store__vm512_stk:
610; CHECK:       # %bb.0:
611; CHECK-NEXT:    st %s9, (, %s11)
612; CHECK-NEXT:    st %s10, 8(, %s11)
613; CHECK-NEXT:    or %s9, 0, %s11
614; CHECK-NEXT:    lea %s11, -256(, %s11)
615; CHECK-NEXT:    and %s11, %s11, (58)1
616; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB8_2
617; CHECK-NEXT:  # %bb.1:
618; CHECK-NEXT:    ld %s61, 24(, %s14)
619; CHECK-NEXT:    or %s62, 0, %s0
620; CHECK-NEXT:    lea %s63, 315
621; CHECK-NEXT:    shm.l %s63, (%s61)
622; CHECK-NEXT:    shm.l %s8, 8(%s61)
623; CHECK-NEXT:    shm.l %s11, 16(%s61)
624; CHECK-NEXT:    monc
625; CHECK-NEXT:    or %s0, 0, %s62
626; CHECK-NEXT:  .LBB8_2:
627; CHECK-NEXT:    svm %s16, %vm3, 0
628; CHECK-NEXT:    st %s16, 192(, %s11)
629; CHECK-NEXT:    svm %s16, %vm3, 1
630; CHECK-NEXT:    st %s16, 200(, %s11)
631; CHECK-NEXT:    svm %s16, %vm3, 2
632; CHECK-NEXT:    st %s16, 208(, %s11)
633; CHECK-NEXT:    svm %s16, %vm3, 3
634; CHECK-NEXT:    st %s16, 216(, %s11)
635; CHECK-NEXT:    svm %s16, %vm2, 0
636; CHECK-NEXT:    st %s16, 224(, %s11)
637; CHECK-NEXT:    svm %s16, %vm2, 1
638; CHECK-NEXT:    st %s16, 232(, %s11)
639; CHECK-NEXT:    svm %s16, %vm2, 2
640; CHECK-NEXT:    st %s16, 240(, %s11)
641; CHECK-NEXT:    svm %s16, %vm2, 3
642; CHECK-NEXT:    st %s16, 248(, %s11)
643; CHECK-NEXT:    or %s11, 0, %s9
644; CHECK-NEXT:    ld %s10, 8(, %s11)
645; CHECK-NEXT:    ld %s9, (, %s11)
646; CHECK-NEXT:    b.l.t (, %s10)
647  %2 = alloca <512 x i1>, align 64
648  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
649  store volatile <512 x i1> %0, ptr %2, align 64, !tbaa !3
650  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
651  ret void
652}
653
654; Function Attrs: argmemonly nofree nounwind
655define fastcc void @store__vm512_stk_bc(<512 x i1> noundef %0) {
656; CHECK-LABEL: store__vm512_stk_bc:
657; CHECK:       # %bb.0:
658; CHECK-NEXT:    st %s9, (, %s11)
659; CHECK-NEXT:    st %s10, 8(, %s11)
660; CHECK-NEXT:    or %s9, 0, %s11
661; CHECK-NEXT:    lea %s11, -320(, %s11)
662; CHECK-NEXT:    and %s11, %s11, (58)1
663; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB9_2
664; CHECK-NEXT:  # %bb.1:
665; CHECK-NEXT:    ld %s61, 24(, %s14)
666; CHECK-NEXT:    or %s62, 0, %s0
667; CHECK-NEXT:    lea %s63, 315
668; CHECK-NEXT:    shm.l %s63, (%s61)
669; CHECK-NEXT:    shm.l %s8, 8(%s61)
670; CHECK-NEXT:    shm.l %s11, 16(%s61)
671; CHECK-NEXT:    monc
672; CHECK-NEXT:    or %s0, 0, %s62
673; CHECK-NEXT:  .LBB9_2:
674; CHECK-NEXT:    svm %s16, %vm3, 0
675; CHECK-NEXT:    st %s16, 192(, %s11)
676; CHECK-NEXT:    svm %s16, %vm3, 1
677; CHECK-NEXT:    st %s16, 200(, %s11)
678; CHECK-NEXT:    svm %s16, %vm3, 2
679; CHECK-NEXT:    st %s16, 208(, %s11)
680; CHECK-NEXT:    svm %s16, %vm3, 3
681; CHECK-NEXT:    st %s16, 216(, %s11)
682; CHECK-NEXT:    svm %s16, %vm2, 0
683; CHECK-NEXT:    st %s16, 224(, %s11)
684; CHECK-NEXT:    svm %s16, %vm2, 1
685; CHECK-NEXT:    st %s16, 232(, %s11)
686; CHECK-NEXT:    svm %s16, %vm2, 2
687; CHECK-NEXT:    st %s16, 240(, %s11)
688; CHECK-NEXT:    svm %s16, %vm2, 3
689; CHECK-NEXT:    st %s16, 248(, %s11)
690; CHECK-NEXT:    ld %s0, 192(, %s11)
691; CHECK-NEXT:    ld %s1, 200(, %s11)
692; CHECK-NEXT:    ld %s2, 208(, %s11)
693; CHECK-NEXT:    ld %s3, 216(, %s11)
694; CHECK-NEXT:    ld %s4, 248(, %s11)
695; CHECK-NEXT:    ld %s5, 240(, %s11)
696; CHECK-NEXT:    ld %s6, 232(, %s11)
697; CHECK-NEXT:    ld %s7, 224(, %s11)
698; CHECK-NEXT:    st %s4, 312(, %s11)
699; CHECK-NEXT:    st %s5, 304(, %s11)
700; CHECK-NEXT:    st %s6, 296(, %s11)
701; CHECK-NEXT:    st %s7, 288(, %s11)
702; CHECK-NEXT:    st %s3, 280(, %s11)
703; CHECK-NEXT:    st %s2, 272(, %s11)
704; CHECK-NEXT:    st %s1, 264(, %s11)
705; CHECK-NEXT:    st %s0, 256(, %s11)
706; CHECK-NEXT:    or %s11, 0, %s9
707; CHECK-NEXT:    ld %s10, 8(, %s11)
708; CHECK-NEXT:    ld %s9, (, %s11)
709; CHECK-NEXT:    b.l.t (, %s10)
710  %2 = alloca i512, align 64
711  %3 = bitcast <512 x i1> %0 to i512
712  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
713  store volatile i512 %3, ptr %2, align 64, !tbaa !3
714  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
715  ret void
716}
717
718; Function Attrs: argmemonly nofree nounwind
719define fastcc void @store__vm512_stk_big(<512 x i1> noundef %0, i64 noundef %1) {
720; CHECK-LABEL: store__vm512_stk_big:
721; CHECK:       # %bb.0:
722; CHECK-NEXT:    st %s9, (, %s11)
723; CHECK-NEXT:    st %s10, 8(, %s11)
724; CHECK-NEXT:    or %s9, 0, %s11
725; CHECK-NEXT:    lea %s13, 2147483392
726; CHECK-NEXT:    and %s13, %s13, (32)0
727; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
728; CHECK-NEXT:    and %s11, %s11, (58)1
729; CHECK-NEXT:    brge.l %s11, %s8, .LBB10_4
730; CHECK-NEXT:  # %bb.3:
731; CHECK-NEXT:    ld %s61, 24(, %s14)
732; CHECK-NEXT:    or %s62, 0, %s0
733; CHECK-NEXT:    lea %s63, 315
734; CHECK-NEXT:    shm.l %s63, (%s61)
735; CHECK-NEXT:    shm.l %s8, 8(%s61)
736; CHECK-NEXT:    shm.l %s11, 16(%s61)
737; CHECK-NEXT:    monc
738; CHECK-NEXT:    or %s0, 0, %s62
739; CHECK-NEXT:  .LBB10_4:
740; CHECK-NEXT:    lea %s13, -2147483456
741; CHECK-NEXT:    and %s13, %s13, (32)0
742; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
743; CHECK-NEXT:    svm %s16, %vm3, 0
744; CHECK-NEXT:    st %s16, (, %s13)
745; CHECK-NEXT:    svm %s16, %vm3, 1
746; CHECK-NEXT:    st %s16, 8(, %s13)
747; CHECK-NEXT:    svm %s16, %vm3, 2
748; CHECK-NEXT:    st %s16, 16(, %s13)
749; CHECK-NEXT:    svm %s16, %vm3, 3
750; CHECK-NEXT:    st %s16, 24(, %s13)
751; CHECK-NEXT:    svm %s16, %vm2, 0
752; CHECK-NEXT:    st %s16, 32(, %s13)
753; CHECK-NEXT:    svm %s16, %vm2, 1
754; CHECK-NEXT:    st %s16, 40(, %s13)
755; CHECK-NEXT:    svm %s16, %vm2, 2
756; CHECK-NEXT:    st %s16, 48(, %s13)
757; CHECK-NEXT:    svm %s16, %vm2, 3
758; CHECK-NEXT:    st %s16, 56(, %s13)
759; CHECK-NEXT:    or %s1, 0, (0)1
760; CHECK-NEXT:    lea %s2, 2147483640
761; CHECK-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
762; CHECK-NEXT:    st %s0, 200(%s1, %s11)
763; CHECK-NEXT:    lea %s1, 8(, %s1)
764; CHECK-NEXT:    brne.l %s1, %s2, .LBB10_1
765; CHECK-NEXT:  # %bb.2:
766; CHECK-NEXT:    or %s11, 0, %s9
767; CHECK-NEXT:    ld %s10, 8(, %s11)
768; CHECK-NEXT:    ld %s9, (, %s11)
769; CHECK-NEXT:    b.l.t (, %s10)
770  %3 = alloca <512 x i1>, align 64
771  %4 = alloca [268435455 x i64], align 8
772  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
773  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4)
774  store volatile <512 x i1> %0, ptr %3, align 64, !tbaa !3
775  br label %6
776
7775:                                                ; preds = %6
778  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4)
779  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
780  ret void
781
7826:                                                ; preds = %2, %6
783  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
784  %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7
785  store volatile i64 %1, ptr %8, align 8, !tbaa !6
786  %9 = add nuw nsw i64 %7, 1
787  %10 = icmp eq i64 %9, 268435455
788  br i1 %10, label %5, label %6, !llvm.loop !11
789}
790
791; Function Attrs: argmemonly nofree nounwind
792define fastcc void @store__vm512_stk_big2(<512 x i1> noundef %0, i64 noundef %1) {
793; CHECK-LABEL: store__vm512_stk_big2:
794; CHECK:       # %bb.0:
795; CHECK-NEXT:    st %s9, (, %s11)
796; CHECK-NEXT:    st %s10, 8(, %s11)
797; CHECK-NEXT:    or %s9, 0, %s11
798; CHECK-NEXT:    lea %s13, 2147483392
799; CHECK-NEXT:    and %s13, %s13, (32)0
800; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
801; CHECK-NEXT:    and %s11, %s11, (58)1
802; CHECK-NEXT:    brge.l %s11, %s8, .LBB11_4
803; CHECK-NEXT:  # %bb.3:
804; CHECK-NEXT:    ld %s61, 24(, %s14)
805; CHECK-NEXT:    or %s62, 0, %s0
806; CHECK-NEXT:    lea %s63, 315
807; CHECK-NEXT:    shm.l %s63, (%s61)
808; CHECK-NEXT:    shm.l %s8, 8(%s61)
809; CHECK-NEXT:    shm.l %s11, 16(%s61)
810; CHECK-NEXT:    monc
811; CHECK-NEXT:    or %s0, 0, %s62
812; CHECK-NEXT:  .LBB11_4:
813; CHECK-NEXT:    lea %s13, -2147483456
814; CHECK-NEXT:    and %s13, %s13, (32)0
815; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
816; CHECK-NEXT:    svm %s16, %vm3, 0
817; CHECK-NEXT:    st %s16, (, %s13)
818; CHECK-NEXT:    svm %s16, %vm3, 1
819; CHECK-NEXT:    st %s16, 8(, %s13)
820; CHECK-NEXT:    svm %s16, %vm3, 2
821; CHECK-NEXT:    st %s16, 16(, %s13)
822; CHECK-NEXT:    svm %s16, %vm3, 3
823; CHECK-NEXT:    st %s16, 24(, %s13)
824; CHECK-NEXT:    svm %s16, %vm2, 0
825; CHECK-NEXT:    st %s16, 32(, %s13)
826; CHECK-NEXT:    svm %s16, %vm2, 1
827; CHECK-NEXT:    st %s16, 40(, %s13)
828; CHECK-NEXT:    svm %s16, %vm2, 2
829; CHECK-NEXT:    st %s16, 48(, %s13)
830; CHECK-NEXT:    svm %s16, %vm2, 3
831; CHECK-NEXT:    st %s16, 56(, %s13)
832; CHECK-NEXT:    or %s1, 0, (0)1
833; CHECK-NEXT:    lea %s2, -2147483648
834; CHECK-NEXT:    and %s2, %s2, (32)0
835; CHECK-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
836; CHECK-NEXT:    st %s0, 192(%s1, %s11)
837; CHECK-NEXT:    lea %s1, 8(, %s1)
838; CHECK-NEXT:    brne.l %s1, %s2, .LBB11_1
839; CHECK-NEXT:  # %bb.2:
840; CHECK-NEXT:    or %s11, 0, %s9
841; CHECK-NEXT:    ld %s10, 8(, %s11)
842; CHECK-NEXT:    ld %s9, (, %s11)
843; CHECK-NEXT:    b.l.t (, %s10)
844  %3 = alloca <512 x i1>, align 64
845  %4 = alloca [268435456 x i64], align 8
846  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
847  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4)
848  store volatile <512 x i1> %0, ptr %3, align 64, !tbaa !3
849  br label %6
850
8515:                                                ; preds = %6
852  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4)
853  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
854  ret void
855
8566:                                                ; preds = %2, %6
857  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
858  %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7
859  store volatile i64 %1, ptr %8, align 8, !tbaa !6
860  %9 = add nuw nsw i64 %7, 1
861  %10 = icmp eq i64 %9, 268435456
862  br i1 %10, label %5, label %6, !llvm.loop !12
863}
864
865; Function Attrs: argmemonly nofree nounwind
866define fastcc void @store__vm512_stk_dyn(<512 x i1> noundef %0, i64 noundef %1) {
867; CHECK-LABEL: store__vm512_stk_dyn:
868; CHECK:       # %bb.0:
869; CHECK-NEXT:    st %s9, (, %s11)
870; CHECK-NEXT:    st %s10, 8(, %s11)
871; CHECK-NEXT:    st %s17, 40(, %s11)
872; CHECK-NEXT:    or %s9, 0, %s11
873; CHECK-NEXT:    lea %s11, -320(, %s11)
874; CHECK-NEXT:    and %s11, %s11, (58)1
875; CHECK-NEXT:    or %s17, 0, %s11
876; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB12_2
877; CHECK-NEXT:  # %bb.1:
878; CHECK-NEXT:    ld %s61, 24(, %s14)
879; CHECK-NEXT:    or %s62, 0, %s0
880; CHECK-NEXT:    lea %s63, 315
881; CHECK-NEXT:    shm.l %s63, (%s61)
882; CHECK-NEXT:    shm.l %s8, 8(%s61)
883; CHECK-NEXT:    shm.l %s11, 16(%s61)
884; CHECK-NEXT:    monc
885; CHECK-NEXT:    or %s0, 0, %s62
886; CHECK-NEXT:  .LBB12_2:
887; CHECK-NEXT:    sll %s0, %s0, 6
888; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
889; CHECK-NEXT:    and %s1, %s1, (32)0
890; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
891; CHECK-NEXT:    bsic %s10, (, %s12)
892; CHECK-NEXT:    lea %s0, 240(, %s11)
893; CHECK-NEXT:    svm %s1, %vm2, 3
894; CHECK-NEXT:    st %s1, 56(, %s0)
895; CHECK-NEXT:    svm %s1, %vm2, 2
896; CHECK-NEXT:    st %s1, 48(, %s0)
897; CHECK-NEXT:    svm %s1, %vm2, 1
898; CHECK-NEXT:    st %s1, 40(, %s0)
899; CHECK-NEXT:    svm %s1, %vm2, 0
900; CHECK-NEXT:    st %s1, 32(, %s0)
901; CHECK-NEXT:    svm %s1, %vm3, 3
902; CHECK-NEXT:    st %s1, 24(, %s0)
903; CHECK-NEXT:    svm %s1, %vm3, 2
904; CHECK-NEXT:    st %s1, 16(, %s0)
905; CHECK-NEXT:    svm %s1, %vm3, 1
906; CHECK-NEXT:    st %s1, 8(, %s0)
907; CHECK-NEXT:    svm %s1, %vm3, 0
908; CHECK-NEXT:    st %s1, (, %s0)
909; CHECK-NEXT:    svm %s16, %vm3, 0
910; CHECK-NEXT:    st %s16, 256(, %s17)
911; CHECK-NEXT:    svm %s16, %vm3, 1
912; CHECK-NEXT:    st %s16, 264(, %s17)
913; CHECK-NEXT:    svm %s16, %vm3, 2
914; CHECK-NEXT:    st %s16, 272(, %s17)
915; CHECK-NEXT:    svm %s16, %vm3, 3
916; CHECK-NEXT:    st %s16, 280(, %s17)
917; CHECK-NEXT:    svm %s16, %vm2, 0
918; CHECK-NEXT:    st %s16, 288(, %s17)
919; CHECK-NEXT:    svm %s16, %vm2, 1
920; CHECK-NEXT:    st %s16, 296(, %s17)
921; CHECK-NEXT:    svm %s16, %vm2, 2
922; CHECK-NEXT:    st %s16, 304(, %s17)
923; CHECK-NEXT:    svm %s16, %vm2, 3
924; CHECK-NEXT:    st %s16, 312(, %s17)
925; CHECK-NEXT:    or %s11, 0, %s9
926; CHECK-NEXT:    ld %s17, 40(, %s11)
927; CHECK-NEXT:    ld %s10, 8(, %s11)
928; CHECK-NEXT:    ld %s9, (, %s11)
929; CHECK-NEXT:    b.l.t (, %s10)
930  %3 = alloca <512 x i1>, align 64
931  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
932  %4 = alloca <512 x i1>, i64 %1, align 8
933  store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3
934  store volatile <512 x i1> %0, ptr %3, align 64, !tbaa !3
935  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
936  ret void
937}
938
939; Function Attrs: argmemonly nofree nounwind
940define fastcc void @store__vm512_stk_dyn_align(<512 x i1> noundef %0, i64 noundef %1) {
941; CHECK-LABEL: store__vm512_stk_dyn_align:
942; CHECK:       # %bb.0:
943; CHECK-NEXT:    st %s9, (, %s11)
944; CHECK-NEXT:    st %s10, 8(, %s11)
945; CHECK-NEXT:    st %s17, 40(, %s11)
946; CHECK-NEXT:    or %s9, 0, %s11
947; CHECK-NEXT:    lea %s11, -320(, %s11)
948; CHECK-NEXT:    and %s11, %s11, (59)1
949; CHECK-NEXT:    or %s17, 0, %s11
950; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB13_2
951; CHECK-NEXT:  # %bb.1:
952; CHECK-NEXT:    ld %s61, 24(, %s14)
953; CHECK-NEXT:    or %s62, 0, %s0
954; CHECK-NEXT:    lea %s63, 315
955; CHECK-NEXT:    shm.l %s63, (%s61)
956; CHECK-NEXT:    shm.l %s8, 8(%s61)
957; CHECK-NEXT:    shm.l %s11, 16(%s61)
958; CHECK-NEXT:    monc
959; CHECK-NEXT:    or %s0, 0, %s62
960; CHECK-NEXT:  .LBB13_2:
961; CHECK-NEXT:    sll %s0, %s0, 6
962; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
963; CHECK-NEXT:    and %s1, %s1, (32)0
964; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
965; CHECK-NEXT:    bsic %s10, (, %s12)
966; CHECK-NEXT:    lea %s0, 240(, %s11)
967; CHECK-NEXT:    svm %s1, %vm2, 3
968; CHECK-NEXT:    st %s1, 56(, %s0)
969; CHECK-NEXT:    svm %s1, %vm2, 2
970; CHECK-NEXT:    st %s1, 48(, %s0)
971; CHECK-NEXT:    svm %s1, %vm2, 1
972; CHECK-NEXT:    st %s1, 40(, %s0)
973; CHECK-NEXT:    svm %s1, %vm2, 0
974; CHECK-NEXT:    st %s1, 32(, %s0)
975; CHECK-NEXT:    svm %s1, %vm3, 3
976; CHECK-NEXT:    st %s1, 24(, %s0)
977; CHECK-NEXT:    svm %s1, %vm3, 2
978; CHECK-NEXT:    st %s1, 16(, %s0)
979; CHECK-NEXT:    svm %s1, %vm3, 1
980; CHECK-NEXT:    st %s1, 8(, %s0)
981; CHECK-NEXT:    svm %s1, %vm3, 0
982; CHECK-NEXT:    st %s1, (, %s0)
983; CHECK-NEXT:    svm %s16, %vm3, 0
984; CHECK-NEXT:    st %s16, 256(, %s17)
985; CHECK-NEXT:    svm %s16, %vm3, 1
986; CHECK-NEXT:    st %s16, 264(, %s17)
987; CHECK-NEXT:    svm %s16, %vm3, 2
988; CHECK-NEXT:    st %s16, 272(, %s17)
989; CHECK-NEXT:    svm %s16, %vm3, 3
990; CHECK-NEXT:    st %s16, 280(, %s17)
991; CHECK-NEXT:    svm %s16, %vm2, 0
992; CHECK-NEXT:    st %s16, 288(, %s17)
993; CHECK-NEXT:    svm %s16, %vm2, 1
994; CHECK-NEXT:    st %s16, 296(, %s17)
995; CHECK-NEXT:    svm %s16, %vm2, 2
996; CHECK-NEXT:    st %s16, 304(, %s17)
997; CHECK-NEXT:    svm %s16, %vm2, 3
998; CHECK-NEXT:    st %s16, 312(, %s17)
999; CHECK-NEXT:    or %s11, 0, %s9
1000; CHECK-NEXT:    ld %s17, 40(, %s11)
1001; CHECK-NEXT:    ld %s10, 8(, %s11)
1002; CHECK-NEXT:    ld %s9, (, %s11)
1003; CHECK-NEXT:    b.l.t (, %s10)
1004  %3 = alloca <512 x i1>, align 32
1005  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
1006  %4 = alloca <512 x i1>, i64 %1, align 8
1007  store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3
1008  store volatile <512 x i1> %0, ptr %3, align 32, !tbaa !3
1009  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
1010  ret void
1011}
1012
1013; Function Attrs: argmemonly nofree nounwind
1014define fastcc void @store__vm512_stk_dyn_align2(<512 x i1> noundef %0, i64 noundef %1) {
1015; CHECK-LABEL: store__vm512_stk_dyn_align2:
1016; CHECK:       # %bb.0:
1017; CHECK-NEXT:    st %s9, (, %s11)
1018; CHECK-NEXT:    st %s10, 8(, %s11)
1019; CHECK-NEXT:    st %s17, 40(, %s11)
1020; CHECK-NEXT:    or %s9, 0, %s11
1021; CHECK-NEXT:    lea %s11, -384(, %s11)
1022; CHECK-NEXT:    and %s11, %s11, (58)1
1023; CHECK-NEXT:    or %s17, 0, %s11
1024; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB14_2
1025; CHECK-NEXT:  # %bb.1:
1026; CHECK-NEXT:    ld %s61, 24(, %s14)
1027; CHECK-NEXT:    or %s62, 0, %s0
1028; CHECK-NEXT:    lea %s63, 315
1029; CHECK-NEXT:    shm.l %s63, (%s61)
1030; CHECK-NEXT:    shm.l %s8, 8(%s61)
1031; CHECK-NEXT:    shm.l %s11, 16(%s61)
1032; CHECK-NEXT:    monc
1033; CHECK-NEXT:    or %s0, 0, %s62
1034; CHECK-NEXT:  .LBB14_2:
1035; CHECK-NEXT:    lea %s0, 15(, %s0)
1036; CHECK-NEXT:    and %s0, -16, %s0
1037; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
1038; CHECK-NEXT:    and %s1, %s1, (32)0
1039; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
1040; CHECK-NEXT:    bsic %s10, (, %s12)
1041; CHECK-NEXT:    lea %s0, 240(, %s11)
1042; CHECK-NEXT:    svm %s1, %vm2, 3
1043; CHECK-NEXT:    st %s1, 56(, %s0)
1044; CHECK-NEXT:    svm %s1, %vm2, 2
1045; CHECK-NEXT:    st %s1, 48(, %s0)
1046; CHECK-NEXT:    svm %s1, %vm2, 1
1047; CHECK-NEXT:    st %s1, 40(, %s0)
1048; CHECK-NEXT:    svm %s1, %vm2, 0
1049; CHECK-NEXT:    st %s1, 32(, %s0)
1050; CHECK-NEXT:    svm %s1, %vm3, 3
1051; CHECK-NEXT:    st %s1, 24(, %s0)
1052; CHECK-NEXT:    svm %s1, %vm3, 2
1053; CHECK-NEXT:    st %s1, 16(, %s0)
1054; CHECK-NEXT:    svm %s1, %vm3, 1
1055; CHECK-NEXT:    st %s1, 8(, %s0)
1056; CHECK-NEXT:    svm %s1, %vm3, 0
1057; CHECK-NEXT:    st %s1, (, %s0)
1058; CHECK-NEXT:    svm %s16, %vm3, 0
1059; CHECK-NEXT:    st %s16, 320(, %s17)
1060; CHECK-NEXT:    svm %s16, %vm3, 1
1061; CHECK-NEXT:    st %s16, 328(, %s17)
1062; CHECK-NEXT:    svm %s16, %vm3, 2
1063; CHECK-NEXT:    st %s16, 336(, %s17)
1064; CHECK-NEXT:    svm %s16, %vm3, 3
1065; CHECK-NEXT:    st %s16, 344(, %s17)
1066; CHECK-NEXT:    svm %s16, %vm2, 0
1067; CHECK-NEXT:    st %s16, 352(, %s17)
1068; CHECK-NEXT:    svm %s16, %vm2, 1
1069; CHECK-NEXT:    st %s16, 360(, %s17)
1070; CHECK-NEXT:    svm %s16, %vm2, 2
1071; CHECK-NEXT:    st %s16, 368(, %s17)
1072; CHECK-NEXT:    svm %s16, %vm2, 3
1073; CHECK-NEXT:    st %s16, 376(, %s17)
1074; CHECK-NEXT:    svm %s16, %vm3, 0
1075; CHECK-NEXT:    st %s16, 256(, %s17)
1076; CHECK-NEXT:    svm %s16, %vm3, 1
1077; CHECK-NEXT:    st %s16, 264(, %s17)
1078; CHECK-NEXT:    svm %s16, %vm3, 2
1079; CHECK-NEXT:    st %s16, 272(, %s17)
1080; CHECK-NEXT:    svm %s16, %vm3, 3
1081; CHECK-NEXT:    st %s16, 280(, %s17)
1082; CHECK-NEXT:    svm %s16, %vm2, 0
1083; CHECK-NEXT:    st %s16, 288(, %s17)
1084; CHECK-NEXT:    svm %s16, %vm2, 1
1085; CHECK-NEXT:    st %s16, 296(, %s17)
1086; CHECK-NEXT:    svm %s16, %vm2, 2
1087; CHECK-NEXT:    st %s16, 304(, %s17)
1088; CHECK-NEXT:    svm %s16, %vm2, 3
1089; CHECK-NEXT:    st %s16, 312(, %s17)
1090; CHECK-NEXT:    or %s11, 0, %s9
1091; CHECK-NEXT:    ld %s17, 40(, %s11)
1092; CHECK-NEXT:    ld %s10, 8(, %s11)
1093; CHECK-NEXT:    ld %s9, (, %s11)
1094; CHECK-NEXT:    b.l.t (, %s10)
1095  %3 = alloca <512 x i1>, align 32
1096  %4 = alloca <512 x i1>, align 64
1097  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
1098  %5 = alloca i8, i64 %1, align 8
1099  store volatile <512 x i1> %0, ptr %5, align 64, !tbaa !3
1100  store volatile <512 x i1> %0, ptr %3, align 32, !tbaa !3
1101  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %4)
1102  store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3
1103  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %4)
1104  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
1105  ret void
1106}
1107
1108; Function Attrs: nounwind
1109define fastcc void @store__vm512_stk_dyn_align_spill(<512 x i1> noundef %0, i64 noundef %1) {
1110; CHECK-LABEL: store__vm512_stk_dyn_align_spill:
1111; CHECK:       # %bb.0:
1112; CHECK-NEXT:    st %s9, (, %s11)
1113; CHECK-NEXT:    st %s10, 8(, %s11)
1114; CHECK-NEXT:    st %s17, 40(, %s11)
1115; CHECK-NEXT:    or %s9, 0, %s11
1116; CHECK-NEXT:    lea %s11, -384(, %s11)
1117; CHECK-NEXT:    and %s11, %s11, (59)1
1118; CHECK-NEXT:    or %s17, 0, %s11
1119; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB15_2
1120; CHECK-NEXT:  # %bb.1:
1121; CHECK-NEXT:    ld %s61, 24(, %s14)
1122; CHECK-NEXT:    or %s62, 0, %s0
1123; CHECK-NEXT:    lea %s63, 315
1124; CHECK-NEXT:    shm.l %s63, (%s61)
1125; CHECK-NEXT:    shm.l %s8, 8(%s61)
1126; CHECK-NEXT:    shm.l %s11, 16(%s61)
1127; CHECK-NEXT:    monc
1128; CHECK-NEXT:    or %s0, 0, %s62
1129; CHECK-NEXT:  .LBB15_2:
1130; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
1131; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
1132; CHECK-NEXT:    or %s18, 0, %s0
1133; CHECK-NEXT:    svm %s16, %vm3, 0
1134; CHECK-NEXT:    st %s16, 256(, %s17)
1135; CHECK-NEXT:    svm %s16, %vm3, 1
1136; CHECK-NEXT:    st %s16, 264(, %s17)
1137; CHECK-NEXT:    svm %s16, %vm3, 2
1138; CHECK-NEXT:    st %s16, 272(, %s17)
1139; CHECK-NEXT:    svm %s16, %vm3, 3
1140; CHECK-NEXT:    st %s16, 280(, %s17)
1141; CHECK-NEXT:    svm %s16, %vm2, 0
1142; CHECK-NEXT:    st %s16, 288(, %s17)
1143; CHECK-NEXT:    svm %s16, %vm2, 1
1144; CHECK-NEXT:    st %s16, 296(, %s17)
1145; CHECK-NEXT:    svm %s16, %vm2, 2
1146; CHECK-NEXT:    st %s16, 304(, %s17)
1147; CHECK-NEXT:    svm %s16, %vm2, 3
1148; CHECK-NEXT:    st %s16, 312(, %s17) # 64-byte Folded Spill
1149; CHECK-NEXT:    sll %s0, %s0, 6
1150; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
1151; CHECK-NEXT:    and %s1, %s1, (32)0
1152; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
1153; CHECK-NEXT:    bsic %s10, (, %s12)
1154; CHECK-NEXT:    lea %s19, 240(, %s11)
1155; CHECK-NEXT:    lea %s0, dummy@lo
1156; CHECK-NEXT:    and %s0, %s0, (32)0
1157; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
1158; CHECK-NEXT:    bsic %s10, (, %s12)
1159; CHECK-NEXT:    lea %s0, pass@lo
1160; CHECK-NEXT:    and %s0, %s0, (32)0
1161; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
1162; CHECK-NEXT:    or %s0, 0, %s18
1163; CHECK-NEXT:    bsic %s10, (, %s12)
1164; CHECK-NEXT:    # implicit-def: $vmp1
1165; CHECK-NEXT:    ld %s16, 256(, %s17)
1166; CHECK-NEXT:    lvm %vm3, 0, %s16
1167; CHECK-NEXT:    ld %s16, 264(, %s17)
1168; CHECK-NEXT:    lvm %vm3, 1, %s16
1169; CHECK-NEXT:    ld %s16, 272(, %s17)
1170; CHECK-NEXT:    lvm %vm3, 2, %s16
1171; CHECK-NEXT:    ld %s16, 280(, %s17)
1172; CHECK-NEXT:    lvm %vm3, 3, %s16
1173; CHECK-NEXT:    ld %s16, 288(, %s17)
1174; CHECK-NEXT:    lvm %vm2, 0, %s16
1175; CHECK-NEXT:    ld %s16, 296(, %s17)
1176; CHECK-NEXT:    lvm %vm2, 1, %s16
1177; CHECK-NEXT:    ld %s16, 304(, %s17)
1178; CHECK-NEXT:    lvm %vm2, 2, %s16
1179; CHECK-NEXT:    ld %s16, 312(, %s17) # 64-byte Folded Reload
1180; CHECK-NEXT:    lvm %vm2, 3, %s16
1181; CHECK-NEXT:    svm %s0, %vm2, 3
1182; CHECK-NEXT:    st %s0, 56(, %s19)
1183; CHECK-NEXT:    svm %s0, %vm2, 2
1184; CHECK-NEXT:    st %s0, 48(, %s19)
1185; CHECK-NEXT:    svm %s0, %vm2, 1
1186; CHECK-NEXT:    st %s0, 40(, %s19)
1187; CHECK-NEXT:    svm %s0, %vm2, 0
1188; CHECK-NEXT:    st %s0, 32(, %s19)
1189; CHECK-NEXT:    svm %s0, %vm3, 3
1190; CHECK-NEXT:    st %s0, 24(, %s19)
1191; CHECK-NEXT:    svm %s0, %vm3, 2
1192; CHECK-NEXT:    st %s0, 16(, %s19)
1193; CHECK-NEXT:    svm %s0, %vm3, 1
1194; CHECK-NEXT:    st %s0, 8(, %s19)
1195; CHECK-NEXT:    svm %s0, %vm3, 0
1196; CHECK-NEXT:    st %s0, (, %s19)
1197; CHECK-NEXT:    svm %s16, %vm3, 0
1198; CHECK-NEXT:    st %s16, 320(, %s17)
1199; CHECK-NEXT:    svm %s16, %vm3, 1
1200; CHECK-NEXT:    st %s16, 328(, %s17)
1201; CHECK-NEXT:    svm %s16, %vm3, 2
1202; CHECK-NEXT:    st %s16, 336(, %s17)
1203; CHECK-NEXT:    svm %s16, %vm3, 3
1204; CHECK-NEXT:    st %s16, 344(, %s17)
1205; CHECK-NEXT:    svm %s16, %vm2, 0
1206; CHECK-NEXT:    st %s16, 352(, %s17)
1207; CHECK-NEXT:    svm %s16, %vm2, 1
1208; CHECK-NEXT:    st %s16, 360(, %s17)
1209; CHECK-NEXT:    svm %s16, %vm2, 2
1210; CHECK-NEXT:    st %s16, 368(, %s17)
1211; CHECK-NEXT:    svm %s16, %vm2, 3
1212; CHECK-NEXT:    st %s16, 376(, %s17)
1213; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
1214; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
1215; CHECK-NEXT:    or %s11, 0, %s9
1216; CHECK-NEXT:    ld %s17, 40(, %s11)
1217; CHECK-NEXT:    ld %s10, 8(, %s11)
1218; CHECK-NEXT:    ld %s9, (, %s11)
1219; CHECK-NEXT:    b.l.t (, %s10)
1220  %3 = alloca <512 x i1>, align 32
1221  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
1222  %4 = alloca <512 x i1>, i64 %1, align 8
1223  tail call fastcc void @dummy()
1224  tail call fastcc void @pass(i64 noundef %1)
1225  store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3
1226  store volatile <512 x i1> %0, ptr %3, align 32, !tbaa !3
1227  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
1228  ret void
1229}
1230
1231!2 = !{!"clang version 15.0.0 (git@kaz7.github.com:sx-aurora-dev/llvm-project.git 6c510cbf7e17baa380bf8a181c3b43145fd50980)"}
1232!3 = !{!4, !4, i64 0}
1233!4 = !{!"omnipotent char", !5, i64 0}
1234!5 = !{!"Simple C/C++ TBAA"}
1235!6 = !{!7, !7, i64 0}
1236!7 = !{!"long", !4, i64 0}
1237!8 = distinct !{!8, !9}
1238!9 = !{!"llvm.loop.mustprogress"}
1239!10 = distinct !{!10, !9}
1240!11 = distinct !{!11, !9}
1241!12 = distinct !{!12, !9}
1242