xref: /llvm-project/llvm/test/CodeGen/VE/Vector/load_stk_ldvm.ll (revision 469044cfd355d34573643a57b5d2a78a9c341327)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
3
4;;; Test store instructions
5;;;
6;;; Note:
7;;;   We test store instructions using general stack, stack with dynamic
8;;;   allocation, stack with dynamic allocation and alignment, and stack
9;;;   with dynamic allocation, alignment, and spill.
10;;;
11;;; Fist test using a stack for leaf function.
12;;;
13;;;   |                                              | Higher address
14;;;   |----------------------------------------------| <- old sp
15;;;   | Local variables of fixed size                |
16;;;   |----------------------------------------------| <- sp
17;;;   |                                              | Lower address
18;;;
19;;; Access local variable using sp (%s11).  In addition, please remember
20;;; that stack is aligned by 16 bytes.
21;;;
22;;; Second test using a general stack.
23;;;
24;;;   |                                              | Higher address
25;;;   |----------------------------------------------|
26;;;   | Parameter area for this function             |
27;;;   |----------------------------------------------|
28;;;   | Register save area (RSA) for this function   |
29;;;   |----------------------------------------------|
30;;;   | Return address for this function             |
31;;;   |----------------------------------------------|
32;;;   | Frame pointer for this function              |
33;;;   |----------------------------------------------| <- fp(=old sp)
34;;;   | Local variables of fixed size                |
35;;;   |----------------------------------------------|
36;;;   |.variable-sized.local.variables.(VLAs)........|
37;;;   |..............................................|
38;;;   |..............................................|
39;;;   |----------------------------------------------| <- returned by alloca
40;;;   | Parameter area for callee                    |
41;;;   |----------------------------------------------|
42;;;   | Register save area (RSA) for callee          |
43;;;   |----------------------------------------------|
44;;;   | Return address for callee                    |
45;;;   |----------------------------------------------|
46;;;   | Frame pointer for callee                     |
47;;;   |----------------------------------------------| <- sp
48;;;   |                                              | Lower address
49;;;
50;;; Access local variable using fp (%s9) since the size of VLA is not
51;;; known.  At the beginning of the functions, allocates 240 + data
52;;; bytes.  240 means RSA+RA+FP (=176) + Parameter (=64).
53;;;
54;;; Third test using a general stack.
55;;;
56;;;   |                                              | Higher address
57;;;   |----------------------------------------------|
58;;;   | Parameter area for this function             |
59;;;   |----------------------------------------------|
60;;;   | Register save area (RSA) for this function   |
61;;;   |----------------------------------------------|
62;;;   | Return address for this function             |
63;;;   |----------------------------------------------|
64;;;   | Frame pointer for this function              |
65;;;   |----------------------------------------------| <- fp(=old sp)
66;;;   |.empty.space.to.make.part.below.aligned.in....|
67;;;   |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
68;;;   |.alignment....................................|  unknown at compile time)
69;;;   |----------------------------------------------|
70;;;   | Local variables of fixed size including spill|
71;;;   | slots                                        |
72;;;   |----------------------------------------------| <- bp(not defined by ABI,
73;;;   |.variable-sized.local.variables.(VLAs)........|       LLVM chooses SX17)
74;;;   |..............................................| (size of this area is
75;;;   |..............................................|  unknown at compile time)
76;;;   |----------------------------------------------| <- stack top (returned by
77;;;   | Parameter area for callee                    |               alloca)
78;;;   |----------------------------------------------|
79;;;   | Register save area (RSA) for callee          |
80;;;   |----------------------------------------------|
81;;;   | Return address for callee                    |
82;;;   |----------------------------------------------|
83;;;   | Frame pointer for callee                     |
84;;;   |----------------------------------------------| <- sp
85;;;   |                                              | Lower address
86;;;
87;;; Access local variable using bp (%s17) since the size of alignment
88;;; and VLA are not known.  At the beginning of the functions, allocates
89;;; pad(240 + data + align) bytes.  Then, access data through bp + pad(240)
90;;; since this address doesn't change even if VLA is dynamically allocated.
91;;;
92;;; Fourth test using a general stack with some spills.
93;;;
94
95; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
96define fastcc <256 x i1> @load__vm256_stk() {
97; CHECK-LABEL: load__vm256_stk:
98; CHECK:       # %bb.0:
99; CHECK-NEXT:    st %s9, (, %s11)
100; CHECK-NEXT:    st %s10, 8(, %s11)
101; CHECK-NEXT:    or %s9, 0, %s11
102; CHECK-NEXT:    lea %s11, -224(, %s11)
103; CHECK-NEXT:    and %s11, %s11, (59)1
104; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB0_2
105; CHECK-NEXT:  # %bb.1:
106; CHECK-NEXT:    ld %s61, 24(, %s14)
107; CHECK-NEXT:    or %s62, 0, %s0
108; CHECK-NEXT:    lea %s63, 315
109; CHECK-NEXT:    shm.l %s63, (%s61)
110; CHECK-NEXT:    shm.l %s8, 8(%s61)
111; CHECK-NEXT:    shm.l %s11, 16(%s61)
112; CHECK-NEXT:    monc
113; CHECK-NEXT:    or %s0, 0, %s62
114; CHECK-NEXT:  .LBB0_2:
115; CHECK-NEXT:    ld %s16, 192(, %s11)
116; CHECK-NEXT:    lvm %vm1, 0, %s16
117; CHECK-NEXT:    ld %s16, 200(, %s11)
118; CHECK-NEXT:    lvm %vm1, 1, %s16
119; CHECK-NEXT:    ld %s16, 208(, %s11)
120; CHECK-NEXT:    lvm %vm1, 2, %s16
121; CHECK-NEXT:    ld %s16, 216(, %s11)
122; CHECK-NEXT:    lvm %vm1, 3, %s16
123; CHECK-NEXT:    or %s11, 0, %s9
124; CHECK-NEXT:    ld %s10, 8(, %s11)
125; CHECK-NEXT:    ld %s9, (, %s11)
126; CHECK-NEXT:    b.l.t (, %s10)
127  %1 = alloca <256 x i1>, align 32
128  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
129  %2 = load volatile <256 x i1>, ptr %1, align 32
130  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
131  ret <256 x i1> %2
132}
133
134; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
135declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
136
137; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
138declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
139
140; Function Attrs: argmemonly nofree nounwind
141define fastcc <256 x i1> @load__vm256_stk_big_fit() {
142; CHECK-LABEL: load__vm256_stk_big_fit:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    st %s9, (, %s11)
145; CHECK-NEXT:    st %s10, 8(, %s11)
146; CHECK-NEXT:    or %s9, 0, %s11
147; CHECK-NEXT:    lea %s11, -2147483648(, %s11)
148; CHECK-NEXT:    and %s11, %s11, (59)1
149; CHECK-NEXT:    brge.l %s11, %s8, .LBB1_4
150; CHECK-NEXT:  # %bb.3:
151; CHECK-NEXT:    ld %s61, 24(, %s14)
152; CHECK-NEXT:    or %s62, 0, %s0
153; CHECK-NEXT:    lea %s63, 315
154; CHECK-NEXT:    shm.l %s63, (%s61)
155; CHECK-NEXT:    shm.l %s8, 8(%s61)
156; CHECK-NEXT:    shm.l %s11, 16(%s61)
157; CHECK-NEXT:    monc
158; CHECK-NEXT:    or %s0, 0, %s62
159; CHECK-NEXT:  .LBB1_4:
160; CHECK-NEXT:    ld %s16, 2147483616(, %s11)
161; CHECK-NEXT:    lvm %vm1, 0, %s16
162; CHECK-NEXT:    ld %s16, 2147483624(, %s11)
163; CHECK-NEXT:    lvm %vm1, 1, %s16
164; CHECK-NEXT:    ld %s16, 2147483632(, %s11)
165; CHECK-NEXT:    lvm %vm1, 2, %s16
166; CHECK-NEXT:    ld %s16, 2147483640(, %s11)
167; CHECK-NEXT:    lvm %vm1, 3, %s16
168; CHECK-NEXT:    or %s0, 0, (0)1
169; CHECK-NEXT:    lea %s1, 2147483424
170; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
171; CHECK-NEXT:    ld %s2, 192(%s0, %s11)
172; CHECK-NEXT:    lea %s0, 8(, %s0)
173; CHECK-NEXT:    brne.l %s0, %s1, .LBB1_1
174; CHECK-NEXT:  # %bb.2:
175; CHECK-NEXT:    or %s11, 0, %s9
176; CHECK-NEXT:    ld %s10, 8(, %s11)
177; CHECK-NEXT:    ld %s9, (, %s11)
178; CHECK-NEXT:    b.l.t (, %s10)
179  %1 = alloca <256 x i1>, align 32
180  %2 = alloca [268435428 x i64], align 8
181  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
182  call void @llvm.lifetime.start.p0(i64 2147483424, ptr nonnull %2)
183  %3 = load volatile <256 x i1>, ptr %1, align 32
184  br label %5
185
1864:                                                ; preds = %5
187  call void @llvm.lifetime.end.p0(i64 2147483424, ptr nonnull %2)
188  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
189  ret <256 x i1> %3
190
1915:                                                ; preds = %0, %5
192  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
193  %7 = getelementptr inbounds [268435428 x i64], ptr %2, i64 0, i64 %6
194  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
195  %9 = add nuw nsw i64 %6, 1
196  %10 = icmp eq i64 %9, 268435428
197  br i1 %10, label %4, label %5, !llvm.loop !7
198}
199
200; Function Attrs: argmemonly nofree nounwind
201define fastcc <256 x i1> @load__vm256_stk_big() {
202; CHECK-LABEL: load__vm256_stk_big:
203; CHECK:       # %bb.0:
204; CHECK-NEXT:    st %s9, (, %s11)
205; CHECK-NEXT:    st %s10, 8(, %s11)
206; CHECK-NEXT:    or %s9, 0, %s11
207; CHECK-NEXT:    lea %s13, 2147483616
208; CHECK-NEXT:    and %s13, %s13, (32)0
209; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
210; CHECK-NEXT:    and %s11, %s11, (59)1
211; CHECK-NEXT:    brge.l %s11, %s8, .LBB2_4
212; CHECK-NEXT:  # %bb.3:
213; CHECK-NEXT:    ld %s61, 24(, %s14)
214; CHECK-NEXT:    or %s62, 0, %s0
215; CHECK-NEXT:    lea %s63, 315
216; CHECK-NEXT:    shm.l %s63, (%s61)
217; CHECK-NEXT:    shm.l %s8, 8(%s61)
218; CHECK-NEXT:    shm.l %s11, 16(%s61)
219; CHECK-NEXT:    monc
220; CHECK-NEXT:    or %s0, 0, %s62
221; CHECK-NEXT:  .LBB2_4:
222; CHECK-NEXT:    lea %s13, -2147483648
223; CHECK-NEXT:    and %s13, %s13, (32)0
224; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
225; CHECK-NEXT:    ld %s16, (, %s13)
226; CHECK-NEXT:    lvm %vm1, 0, %s16
227; CHECK-NEXT:    ld %s16, 8(, %s13)
228; CHECK-NEXT:    lvm %vm1, 1, %s16
229; CHECK-NEXT:    ld %s16, 16(, %s13)
230; CHECK-NEXT:    lvm %vm1, 2, %s16
231; CHECK-NEXT:    ld %s16, 24(, %s13)
232; CHECK-NEXT:    lvm %vm1, 3, %s16
233; CHECK-NEXT:    or %s0, 0, (0)1
234; CHECK-NEXT:    lea %s1, 2147483432
235; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
236; CHECK-NEXT:    ld %s2, 216(%s0, %s11)
237; CHECK-NEXT:    lea %s0, 8(, %s0)
238; CHECK-NEXT:    brne.l %s0, %s1, .LBB2_1
239; CHECK-NEXT:  # %bb.2:
240; CHECK-NEXT:    or %s11, 0, %s9
241; CHECK-NEXT:    ld %s10, 8(, %s11)
242; CHECK-NEXT:    ld %s9, (, %s11)
243; CHECK-NEXT:    b.l.t (, %s10)
244  %1 = alloca <256 x i1>, align 32
245  %2 = alloca [268435429 x i64], align 8
246  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
247  call void @llvm.lifetime.start.p0(i64 2147483432, ptr nonnull %2)
248  %3 = load volatile <256 x i1>, ptr %1, align 32
249  br label %5
250
2514:                                                ; preds = %5
252  call void @llvm.lifetime.end.p0(i64 2147483432, ptr nonnull %2)
253  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
254  ret <256 x i1> %3
255
2565:                                                ; preds = %0, %5
257  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
258  %7 = getelementptr inbounds [268435429 x i64], ptr %2, i64 0, i64 %6
259  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
260  %9 = add nuw nsw i64 %6, 1
261  %10 = icmp eq i64 %9, 268435429
262  br i1 %10, label %4, label %5, !llvm.loop !9
263}
264
265; Function Attrs: argmemonly nofree nounwind
266define fastcc <256 x i1> @load__vm256_stk_big2() {
267; CHECK-LABEL: load__vm256_stk_big2:
268; CHECK:       # %bb.0:
269; CHECK-NEXT:    st %s9, (, %s11)
270; CHECK-NEXT:    st %s10, 8(, %s11)
271; CHECK-NEXT:    or %s9, 0, %s11
272; CHECK-NEXT:    lea %s13, 2147483424
273; CHECK-NEXT:    and %s13, %s13, (32)0
274; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
275; CHECK-NEXT:    and %s11, %s11, (59)1
276; CHECK-NEXT:    brge.l %s11, %s8, .LBB3_4
277; CHECK-NEXT:  # %bb.3:
278; CHECK-NEXT:    ld %s61, 24(, %s14)
279; CHECK-NEXT:    or %s62, 0, %s0
280; CHECK-NEXT:    lea %s63, 315
281; CHECK-NEXT:    shm.l %s63, (%s61)
282; CHECK-NEXT:    shm.l %s8, 8(%s61)
283; CHECK-NEXT:    shm.l %s11, 16(%s61)
284; CHECK-NEXT:    monc
285; CHECK-NEXT:    or %s0, 0, %s62
286; CHECK-NEXT:  .LBB3_4:
287; CHECK-NEXT:    lea %s13, -2147483456
288; CHECK-NEXT:    and %s13, %s13, (32)0
289; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
290; CHECK-NEXT:    ld %s16, (, %s13)
291; CHECK-NEXT:    lvm %vm1, 0, %s16
292; CHECK-NEXT:    ld %s16, 8(, %s13)
293; CHECK-NEXT:    lvm %vm1, 1, %s16
294; CHECK-NEXT:    ld %s16, 16(, %s13)
295; CHECK-NEXT:    lvm %vm1, 2, %s16
296; CHECK-NEXT:    ld %s16, 24(, %s13)
297; CHECK-NEXT:    lvm %vm1, 3, %s16
298; CHECK-NEXT:    or %s0, 0, (0)1
299; CHECK-NEXT:    lea %s1, -2147483648
300; CHECK-NEXT:    and %s1, %s1, (32)0
301; CHECK-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
302; CHECK-NEXT:    ld %s2, 192(%s0, %s11)
303; CHECK-NEXT:    lea %s0, 8(, %s0)
304; CHECK-NEXT:    brne.l %s0, %s1, .LBB3_1
305; CHECK-NEXT:  # %bb.2:
306; CHECK-NEXT:    or %s11, 0, %s9
307; CHECK-NEXT:    ld %s10, 8(, %s11)
308; CHECK-NEXT:    ld %s9, (, %s11)
309; CHECK-NEXT:    b.l.t (, %s10)
310  %1 = alloca <256 x i1>, align 32
311  %2 = alloca [268435456 x i64], align 8
312  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
313  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
314  %3 = load volatile <256 x i1>, ptr %1, align 32
315  br label %5
316
3174:                                                ; preds = %5
318  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
319  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
320  ret <256 x i1> %3
321
3225:                                                ; preds = %0, %5
323  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
324  %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
325  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
326  %9 = add nuw nsw i64 %6, 1
327  %10 = icmp eq i64 %9, 268435456
328  br i1 %10, label %4, label %5, !llvm.loop !10
329}
330
331; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
332define fastcc <256 x i1> @load__vm256_stk_dyn(i64 noundef %0) {
333; CHECK-LABEL: load__vm256_stk_dyn:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    st %s9, (, %s11)
336; CHECK-NEXT:    st %s10, 8(, %s11)
337; CHECK-NEXT:    or %s9, 0, %s11
338; CHECK-NEXT:    lea %s11, -272(, %s11)
339; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB4_2
340; CHECK-NEXT:  # %bb.1:
341; CHECK-NEXT:    ld %s61, 24(, %s14)
342; CHECK-NEXT:    or %s62, 0, %s0
343; CHECK-NEXT:    lea %s63, 315
344; CHECK-NEXT:    shm.l %s63, (%s61)
345; CHECK-NEXT:    shm.l %s8, 8(%s61)
346; CHECK-NEXT:    shm.l %s11, 16(%s61)
347; CHECK-NEXT:    monc
348; CHECK-NEXT:    or %s0, 0, %s62
349; CHECK-NEXT:  .LBB4_2:
350; CHECK-NEXT:    sll %s0, %s0, 5
351; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
352; CHECK-NEXT:    and %s1, %s1, (32)0
353; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
354; CHECK-NEXT:    bsic %s10, (, %s12)
355; CHECK-NEXT:    lea %s0, 240(, %s11)
356; CHECK-NEXT:    ld %s1, 24(, %s0)
357; CHECK-NEXT:    ld %s1, 16(, %s0)
358; CHECK-NEXT:    ld %s1, 8(, %s0)
359; CHECK-NEXT:    ld %s0, (, %s0)
360; CHECK-NEXT:    ld %s16, -32(, %s9)
361; CHECK-NEXT:    lvm %vm1, 0, %s16
362; CHECK-NEXT:    ld %s16, -24(, %s9)
363; CHECK-NEXT:    lvm %vm1, 1, %s16
364; CHECK-NEXT:    ld %s16, -16(, %s9)
365; CHECK-NEXT:    lvm %vm1, 2, %s16
366; CHECK-NEXT:    ld %s16, -8(, %s9)
367; CHECK-NEXT:    lvm %vm1, 3, %s16
368; CHECK-NEXT:    or %s11, 0, %s9
369; CHECK-NEXT:    ld %s10, 8(, %s11)
370; CHECK-NEXT:    ld %s9, (, %s11)
371; CHECK-NEXT:    b.l.t (, %s10)
372  %2 = alloca <256 x i1>, align 8
373  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
374  %3 = alloca <256 x i1>, i64 %0, align 8
375  %4 = load volatile <256 x i1>, ptr %3, align 32
376  %5 = load volatile <256 x i1>, ptr %2, align 32
377  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
378  ret <256 x i1> %5
379}
380
381; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
382define fastcc <256 x i1> @load__vm256_stk_dyn_align(i64 noundef %0) {
383; CHECK-LABEL: load__vm256_stk_dyn_align:
384; CHECK:       # %bb.0:
385; CHECK-NEXT:    st %s9, (, %s11)
386; CHECK-NEXT:    st %s10, 8(, %s11)
387; CHECK-NEXT:    st %s17, 40(, %s11)
388; CHECK-NEXT:    or %s9, 0, %s11
389; CHECK-NEXT:    lea %s11, -288(, %s11)
390; CHECK-NEXT:    and %s11, %s11, (59)1
391; CHECK-NEXT:    or %s17, 0, %s11
392; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB5_2
393; CHECK-NEXT:  # %bb.1:
394; CHECK-NEXT:    ld %s61, 24(, %s14)
395; CHECK-NEXT:    or %s62, 0, %s0
396; CHECK-NEXT:    lea %s63, 315
397; CHECK-NEXT:    shm.l %s63, (%s61)
398; CHECK-NEXT:    shm.l %s8, 8(%s61)
399; CHECK-NEXT:    shm.l %s11, 16(%s61)
400; CHECK-NEXT:    monc
401; CHECK-NEXT:    or %s0, 0, %s62
402; CHECK-NEXT:  .LBB5_2:
403; CHECK-NEXT:    sll %s0, %s0, 5
404; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
405; CHECK-NEXT:    and %s1, %s1, (32)0
406; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
407; CHECK-NEXT:    bsic %s10, (, %s12)
408; CHECK-NEXT:    lea %s0, 240(, %s11)
409; CHECK-NEXT:    ld %s1, 24(, %s0)
410; CHECK-NEXT:    ld %s1, 16(, %s0)
411; CHECK-NEXT:    ld %s1, 8(, %s0)
412; CHECK-NEXT:    ld %s0, (, %s0)
413; CHECK-NEXT:    ld %s16, 256(, %s17)
414; CHECK-NEXT:    lvm %vm1, 0, %s16
415; CHECK-NEXT:    ld %s16, 264(, %s17)
416; CHECK-NEXT:    lvm %vm1, 1, %s16
417; CHECK-NEXT:    ld %s16, 272(, %s17)
418; CHECK-NEXT:    lvm %vm1, 2, %s16
419; CHECK-NEXT:    ld %s16, 280(, %s17)
420; CHECK-NEXT:    lvm %vm1, 3, %s16
421; CHECK-NEXT:    or %s11, 0, %s9
422; CHECK-NEXT:    ld %s17, 40(, %s11)
423; CHECK-NEXT:    ld %s10, 8(, %s11)
424; CHECK-NEXT:    ld %s9, (, %s11)
425; CHECK-NEXT:    b.l.t (, %s10)
426  %2 = alloca <256 x i1>, align 32
427  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
428  %3 = alloca <256 x i1>, i64 %0, align 8
429  %4 = load volatile <256 x i1>, ptr %3, align 32
430  %5 = load volatile <256 x i1>, ptr %2, align 32
431  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
432  ret <256 x i1> %5
433}
434
435; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
436define fastcc <256 x i1> @load__vm256_stk_dyn_align2(i64 noundef %0) {
437; CHECK-LABEL: load__vm256_stk_dyn_align2:
438; CHECK:       # %bb.0:
439; CHECK-NEXT:    st %s9, (, %s11)
440; CHECK-NEXT:    st %s10, 8(, %s11)
441; CHECK-NEXT:    st %s17, 40(, %s11)
442; CHECK-NEXT:    or %s9, 0, %s11
443; CHECK-NEXT:    lea %s11, -320(, %s11)
444; CHECK-NEXT:    and %s11, %s11, (58)1
445; CHECK-NEXT:    or %s17, 0, %s11
446; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB6_2
447; CHECK-NEXT:  # %bb.1:
448; CHECK-NEXT:    ld %s61, 24(, %s14)
449; CHECK-NEXT:    or %s62, 0, %s0
450; CHECK-NEXT:    lea %s63, 315
451; CHECK-NEXT:    shm.l %s63, (%s61)
452; CHECK-NEXT:    shm.l %s8, 8(%s61)
453; CHECK-NEXT:    shm.l %s11, 16(%s61)
454; CHECK-NEXT:    monc
455; CHECK-NEXT:    or %s0, 0, %s62
456; CHECK-NEXT:  .LBB6_2:
457; CHECK-NEXT:    sll %s0, %s0, 5
458; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
459; CHECK-NEXT:    and %s1, %s1, (32)0
460; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
461; CHECK-NEXT:    bsic %s10, (, %s12)
462; CHECK-NEXT:    lea %s0, 240(, %s11)
463; CHECK-NEXT:    ld %s1, 24(, %s0)
464; CHECK-NEXT:    ld %s1, 16(, %s0)
465; CHECK-NEXT:    ld %s1, 8(, %s0)
466; CHECK-NEXT:    ld %s0, (, %s0)
467; CHECK-NEXT:    ld %s16, 288(, %s17)
468; CHECK-NEXT:    lvm %vm1, 0, %s16
469; CHECK-NEXT:    ld %s16, 296(, %s17)
470; CHECK-NEXT:    lvm %vm1, 1, %s16
471; CHECK-NEXT:    ld %s16, 304(, %s17)
472; CHECK-NEXT:    lvm %vm1, 2, %s16
473; CHECK-NEXT:    ld %s16, 312(, %s17)
474; CHECK-NEXT:    lvm %vm1, 3, %s16
475; CHECK-NEXT:    ld %s16, 256(, %s17)
476; CHECK-NEXT:    lvm %vm2, 0, %s16
477; CHECK-NEXT:    ld %s16, 264(, %s17)
478; CHECK-NEXT:    lvm %vm2, 1, %s16
479; CHECK-NEXT:    ld %s16, 272(, %s17)
480; CHECK-NEXT:    lvm %vm2, 2, %s16
481; CHECK-NEXT:    ld %s16, 280(, %s17)
482; CHECK-NEXT:    lvm %vm2, 3, %s16
483; CHECK-NEXT:    or %s11, 0, %s9
484; CHECK-NEXT:    ld %s17, 40(, %s11)
485; CHECK-NEXT:    ld %s10, 8(, %s11)
486; CHECK-NEXT:    ld %s9, (, %s11)
487; CHECK-NEXT:    b.l.t (, %s10)
488  %2 = alloca <256 x i1>, align 32
489  %3 = alloca <256 x i1>, align 64
490  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
491  %4 = alloca <256 x i1>, i64 %0, align 8
492  %5 = load volatile <256 x i1>, ptr %4, align 32
493  %6 = load volatile <256 x i1>, ptr %2, align 32
494  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
495  %7 = load volatile <256 x i1>, ptr %3, align 64
496  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
497  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
498  ret <256 x i1> %6
499}
500
501; Function Attrs: nounwind
502define fastcc <256 x i1> @load__vm256_stk_dyn_align_spill(i64 noundef %0) {
503; CHECK-LABEL: load__vm256_stk_dyn_align_spill:
504; CHECK:       # %bb.0:
505; CHECK-NEXT:    st %s9, (, %s11)
506; CHECK-NEXT:    st %s10, 8(, %s11)
507; CHECK-NEXT:    st %s17, 40(, %s11)
508; CHECK-NEXT:    or %s9, 0, %s11
509; CHECK-NEXT:    lea %s11, -320(, %s11)
510; CHECK-NEXT:    and %s11, %s11, (59)1
511; CHECK-NEXT:    or %s17, 0, %s11
512; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB7_2
513; CHECK-NEXT:  # %bb.1:
514; CHECK-NEXT:    ld %s61, 24(, %s14)
515; CHECK-NEXT:    or %s62, 0, %s0
516; CHECK-NEXT:    lea %s63, 315
517; CHECK-NEXT:    shm.l %s63, (%s61)
518; CHECK-NEXT:    shm.l %s8, 8(%s61)
519; CHECK-NEXT:    shm.l %s11, 16(%s61)
520; CHECK-NEXT:    monc
521; CHECK-NEXT:    or %s0, 0, %s62
522; CHECK-NEXT:  .LBB7_2:
523; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
524; CHECK-NEXT:    or %s18, 0, %s0
525; CHECK-NEXT:    lea %s0, 15(, %s0)
526; CHECK-NEXT:    and %s0, -16, %s0
527; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
528; CHECK-NEXT:    and %s1, %s1, (32)0
529; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
530; CHECK-NEXT:    bsic %s10, (, %s12)
531; CHECK-NEXT:    lea %s0, 240(, %s11)
532; CHECK-NEXT:    ld %s1, 24(, %s0)
533; CHECK-NEXT:    ld %s1, 16(, %s0)
534; CHECK-NEXT:    ld %s1, 8(, %s0)
535; CHECK-NEXT:    ld %s0, (, %s0)
536; CHECK-NEXT:    ld %s16, 288(, %s17)
537; CHECK-NEXT:    lvm %vm1, 0, %s16
538; CHECK-NEXT:    ld %s16, 296(, %s17)
539; CHECK-NEXT:    lvm %vm1, 1, %s16
540; CHECK-NEXT:    ld %s16, 304(, %s17)
541; CHECK-NEXT:    lvm %vm1, 2, %s16
542; CHECK-NEXT:    ld %s16, 312(, %s17)
543; CHECK-NEXT:    lvm %vm1, 3, %s16
544; CHECK-NEXT:    svm %s16, %vm1, 0
545; CHECK-NEXT:    st %s16, 256(, %s17)
546; CHECK-NEXT:    svm %s16, %vm1, 1
547; CHECK-NEXT:    st %s16, 264(, %s17)
548; CHECK-NEXT:    svm %s16, %vm1, 2
549; CHECK-NEXT:    st %s16, 272(, %s17)
550; CHECK-NEXT:    svm %s16, %vm1, 3
551; CHECK-NEXT:    st %s16, 280(, %s17) # 32-byte Folded Spill
552; CHECK-NEXT:    lea %s0, dummy@lo
553; CHECK-NEXT:    and %s0, %s0, (32)0
554; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
555; CHECK-NEXT:    bsic %s10, (, %s12)
556; CHECK-NEXT:    lea %s0, pass@lo
557; CHECK-NEXT:    and %s0, %s0, (32)0
558; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
559; CHECK-NEXT:    or %s0, 0, %s18
560; CHECK-NEXT:    bsic %s10, (, %s12)
561; CHECK-NEXT:    ld %s16, 256(, %s17)
562; CHECK-NEXT:    lvm %vm1, 0, %s16
563; CHECK-NEXT:    ld %s16, 264(, %s17)
564; CHECK-NEXT:    lvm %vm1, 1, %s16
565; CHECK-NEXT:    ld %s16, 272(, %s17)
566; CHECK-NEXT:    lvm %vm1, 2, %s16
567; CHECK-NEXT:    ld %s16, 280(, %s17) # 32-byte Folded Reload
568; CHECK-NEXT:    lvm %vm1, 3, %s16
569; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
570; CHECK-NEXT:    or %s11, 0, %s9
571; CHECK-NEXT:    ld %s17, 40(, %s11)
572; CHECK-NEXT:    ld %s10, 8(, %s11)
573; CHECK-NEXT:    ld %s9, (, %s11)
574; CHECK-NEXT:    b.l.t (, %s10)
575  %2 = alloca <256 x i1>, align 32
576  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
577  %3 = alloca i8, i64 %0, align 8
578  %4 = load volatile <256 x i1>, ptr %3, align 32
579  %5 = load volatile <256 x i1>, ptr %2, align 32
580  tail call fastcc void @dummy()
581  tail call fastcc void @pass(i64 noundef %0)
582  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
583  ret <256 x i1> %5
584}
585
586declare fastcc void @dummy()
587
588declare fastcc void @pass(i64 noundef)
589
590; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
591define fastcc <512 x i1> @load__vm512_stk() {
592; CHECK-LABEL: load__vm512_stk:
593; CHECK:       # %bb.0:
594; CHECK-NEXT:    st %s9, (, %s11)
595; CHECK-NEXT:    st %s10, 8(, %s11)
596; CHECK-NEXT:    or %s9, 0, %s11
597; CHECK-NEXT:    lea %s11, -256(, %s11)
598; CHECK-NEXT:    and %s11, %s11, (58)1
599; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB8_2
600; CHECK-NEXT:  # %bb.1:
601; CHECK-NEXT:    ld %s61, 24(, %s14)
602; CHECK-NEXT:    or %s62, 0, %s0
603; CHECK-NEXT:    lea %s63, 315
604; CHECK-NEXT:    shm.l %s63, (%s61)
605; CHECK-NEXT:    shm.l %s8, 8(%s61)
606; CHECK-NEXT:    shm.l %s11, 16(%s61)
607; CHECK-NEXT:    monc
608; CHECK-NEXT:    or %s0, 0, %s62
609; CHECK-NEXT:  .LBB8_2:
610; CHECK-NEXT:    # implicit-def: $vmp1
611; CHECK-NEXT:    ld %s16, 192(, %s11)
612; CHECK-NEXT:    lvm %vm3, 0, %s16
613; CHECK-NEXT:    ld %s16, 200(, %s11)
614; CHECK-NEXT:    lvm %vm3, 1, %s16
615; CHECK-NEXT:    ld %s16, 208(, %s11)
616; CHECK-NEXT:    lvm %vm3, 2, %s16
617; CHECK-NEXT:    ld %s16, 216(, %s11)
618; CHECK-NEXT:    lvm %vm3, 3, %s16
619; CHECK-NEXT:    ld %s16, 224(, %s11)
620; CHECK-NEXT:    lvm %vm2, 0, %s16
621; CHECK-NEXT:    ld %s16, 232(, %s11)
622; CHECK-NEXT:    lvm %vm2, 1, %s16
623; CHECK-NEXT:    ld %s16, 240(, %s11)
624; CHECK-NEXT:    lvm %vm2, 2, %s16
625; CHECK-NEXT:    ld %s16, 248(, %s11)
626; CHECK-NEXT:    lvm %vm2, 3, %s16
627; CHECK-NEXT:    or %s11, 0, %s9
628; CHECK-NEXT:    ld %s10, 8(, %s11)
629; CHECK-NEXT:    ld %s9, (, %s11)
630; CHECK-NEXT:    b.l.t (, %s10)
631  %1 = alloca <512 x i1>, align 64
632  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
633  %2 = load volatile <512 x i1>, ptr %1, align 64
634  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
635  ret <512 x i1> %2
636}
637
638; Function Attrs: argmemonly nofree nounwind
639define fastcc <512 x i1> @load__vm512_stk_big_fit() {
640; CHECK-LABEL: load__vm512_stk_big_fit:
641; CHECK:       # %bb.0:
642; CHECK-NEXT:    st %s9, (, %s11)
643; CHECK-NEXT:    st %s10, 8(, %s11)
644; CHECK-NEXT:    or %s9, 0, %s11
645; CHECK-NEXT:    lea %s11, -2147483648(, %s11)
646; CHECK-NEXT:    and %s11, %s11, (58)1
647; CHECK-NEXT:    brge.l %s11, %s8, .LBB9_4
648; CHECK-NEXT:  # %bb.3:
649; CHECK-NEXT:    ld %s61, 24(, %s14)
650; CHECK-NEXT:    or %s62, 0, %s0
651; CHECK-NEXT:    lea %s63, 315
652; CHECK-NEXT:    shm.l %s63, (%s61)
653; CHECK-NEXT:    shm.l %s8, 8(%s61)
654; CHECK-NEXT:    shm.l %s11, 16(%s61)
655; CHECK-NEXT:    monc
656; CHECK-NEXT:    or %s0, 0, %s62
657; CHECK-NEXT:  .LBB9_4:
658; CHECK-NEXT:    # implicit-def: $vmp1
659; CHECK-NEXT:    ld %s16, 2147483584(, %s11)
660; CHECK-NEXT:    lvm %vm3, 0, %s16
661; CHECK-NEXT:    ld %s16, 2147483592(, %s11)
662; CHECK-NEXT:    lvm %vm3, 1, %s16
663; CHECK-NEXT:    ld %s16, 2147483600(, %s11)
664; CHECK-NEXT:    lvm %vm3, 2, %s16
665; CHECK-NEXT:    ld %s16, 2147483608(, %s11)
666; CHECK-NEXT:    lvm %vm3, 3, %s16
667; CHECK-NEXT:    ld %s16, 2147483616(, %s11)
668; CHECK-NEXT:    lvm %vm2, 0, %s16
669; CHECK-NEXT:    ld %s16, 2147483624(, %s11)
670; CHECK-NEXT:    lvm %vm2, 1, %s16
671; CHECK-NEXT:    ld %s16, 2147483632(, %s11)
672; CHECK-NEXT:    lvm %vm2, 2, %s16
673; CHECK-NEXT:    ld %s16, 2147483640(, %s11)
674; CHECK-NEXT:    lvm %vm2, 3, %s16
675; CHECK-NEXT:    or %s0, 0, (0)1
676; CHECK-NEXT:    lea %s1, 2147483392
677; CHECK-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
678; CHECK-NEXT:    ld %s2, 192(%s0, %s11)
679; CHECK-NEXT:    lea %s0, 8(, %s0)
680; CHECK-NEXT:    brne.l %s0, %s1, .LBB9_1
681; CHECK-NEXT:  # %bb.2:
682; CHECK-NEXT:    or %s11, 0, %s9
683; CHECK-NEXT:    ld %s10, 8(, %s11)
684; CHECK-NEXT:    ld %s9, (, %s11)
685; CHECK-NEXT:    b.l.t (, %s10)
686  %1 = alloca <512 x i1>, align 64
687  %2 = alloca [268435424 x i64], align 8
688  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
689  call void @llvm.lifetime.start.p0(i64 2147483392, ptr nonnull %2)
690  %3 = load volatile <512 x i1>, ptr %1, align 64
691  br label %5
692
6934:                                                ; preds = %5
694  call void @llvm.lifetime.end.p0(i64 2147483392, ptr nonnull %2)
695  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
696  ret <512 x i1> %3
697
6985:                                                ; preds = %0, %5
699  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
700  %7 = getelementptr inbounds [268435424 x i64], ptr %2, i64 0, i64 %6
701  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
702  %9 = add nuw nsw i64 %6, 1
703  %10 = icmp eq i64 %9, 268435424
704  br i1 %10, label %4, label %5, !llvm.loop !11
705}
706
707; Function Attrs: argmemonly nofree nounwind
708define fastcc <512 x i1> @load__vm512_stk_big() {
709; CHECK-LABEL: load__vm512_stk_big:
710; CHECK:       # %bb.0:
711; CHECK-NEXT:    st %s9, (, %s11)
712; CHECK-NEXT:    st %s10, 8(, %s11)
713; CHECK-NEXT:    or %s9, 0, %s11
714; CHECK-NEXT:    lea %s13, 2147483584
715; CHECK-NEXT:    and %s13, %s13, (32)0
716; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
717; CHECK-NEXT:    and %s11, %s11, (58)1
718; CHECK-NEXT:    brge.l %s11, %s8, .LBB10_4
719; CHECK-NEXT:  # %bb.3:
720; CHECK-NEXT:    ld %s61, 24(, %s14)
721; CHECK-NEXT:    or %s62, 0, %s0
722; CHECK-NEXT:    lea %s63, 315
723; CHECK-NEXT:    shm.l %s63, (%s61)
724; CHECK-NEXT:    shm.l %s8, 8(%s61)
725; CHECK-NEXT:    shm.l %s11, 16(%s61)
726; CHECK-NEXT:    monc
727; CHECK-NEXT:    or %s0, 0, %s62
728; CHECK-NEXT:  .LBB10_4:
729; CHECK-NEXT:    lea %s13, -2147483648
730; CHECK-NEXT:    and %s13, %s13, (32)0
731; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
732; CHECK-NEXT:    # implicit-def: $vmp1
733; CHECK-NEXT:    ld %s16, (, %s13)
734; CHECK-NEXT:    lvm %vm3, 0, %s16
735; CHECK-NEXT:    ld %s16, 8(, %s13)
736; CHECK-NEXT:    lvm %vm3, 1, %s16
737; CHECK-NEXT:    ld %s16, 16(, %s13)
738; CHECK-NEXT:    lvm %vm3, 2, %s16
739; CHECK-NEXT:    ld %s16, 24(, %s13)
740; CHECK-NEXT:    lvm %vm3, 3, %s16
741; CHECK-NEXT:    ld %s16, 32(, %s13)
742; CHECK-NEXT:    lvm %vm2, 0, %s16
743; CHECK-NEXT:    ld %s16, 40(, %s13)
744; CHECK-NEXT:    lvm %vm2, 1, %s16
745; CHECK-NEXT:    ld %s16, 48(, %s13)
746; CHECK-NEXT:    lvm %vm2, 2, %s16
747; CHECK-NEXT:    ld %s16, 56(, %s13)
748; CHECK-NEXT:    lvm %vm2, 3, %s16
749; CHECK-NEXT:    or %s0, 0, (0)1
750; CHECK-NEXT:    lea %s1, 2147483400
751; CHECK-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
752; CHECK-NEXT:    ld %s2, 248(%s0, %s11)
753; CHECK-NEXT:    lea %s0, 8(, %s0)
754; CHECK-NEXT:    brne.l %s0, %s1, .LBB10_1
755; CHECK-NEXT:  # %bb.2:
756; CHECK-NEXT:    or %s11, 0, %s9
757; CHECK-NEXT:    ld %s10, 8(, %s11)
758; CHECK-NEXT:    ld %s9, (, %s11)
759; CHECK-NEXT:    b.l.t (, %s10)
760  %1 = alloca <512 x i1>, align 64
761  %2 = alloca [268435425 x i64], align 8
762  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
763  call void @llvm.lifetime.start.p0(i64 2147483400, ptr nonnull %2)
764  %3 = load volatile <512 x i1>, ptr %1, align 64
765  br label %5
766
7674:                                                ; preds = %5
768  call void @llvm.lifetime.end.p0(i64 2147483400, ptr nonnull %2)
769  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
770  ret <512 x i1> %3
771
7725:                                                ; preds = %0, %5
773  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
774  %7 = getelementptr inbounds [268435425 x i64], ptr %2, i64 0, i64 %6
775  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
776  %9 = add nuw nsw i64 %6, 1
777  %10 = icmp eq i64 %9, 268435425
778  br i1 %10, label %4, label %5, !llvm.loop !12
779}
780
781; Function Attrs: argmemonly nofree nounwind
782define fastcc <512 x i1> @load__vm512_stk_big2() {
783; CHECK-LABEL: load__vm512_stk_big2:
784; CHECK:       # %bb.0:
785; CHECK-NEXT:    st %s9, (, %s11)
786; CHECK-NEXT:    st %s10, 8(, %s11)
787; CHECK-NEXT:    or %s9, 0, %s11
788; CHECK-NEXT:    lea %s13, 2147483392
789; CHECK-NEXT:    and %s13, %s13, (32)0
790; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
791; CHECK-NEXT:    and %s11, %s11, (58)1
792; CHECK-NEXT:    brge.l %s11, %s8, .LBB11_4
793; CHECK-NEXT:  # %bb.3:
794; CHECK-NEXT:    ld %s61, 24(, %s14)
795; CHECK-NEXT:    or %s62, 0, %s0
796; CHECK-NEXT:    lea %s63, 315
797; CHECK-NEXT:    shm.l %s63, (%s61)
798; CHECK-NEXT:    shm.l %s8, 8(%s61)
799; CHECK-NEXT:    shm.l %s11, 16(%s61)
800; CHECK-NEXT:    monc
801; CHECK-NEXT:    or %s0, 0, %s62
802; CHECK-NEXT:  .LBB11_4:
803; CHECK-NEXT:    lea %s13, -2147483456
804; CHECK-NEXT:    and %s13, %s13, (32)0
805; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
806; CHECK-NEXT:    # implicit-def: $vmp1
807; CHECK-NEXT:    ld %s16, (, %s13)
808; CHECK-NEXT:    lvm %vm3, 0, %s16
809; CHECK-NEXT:    ld %s16, 8(, %s13)
810; CHECK-NEXT:    lvm %vm3, 1, %s16
811; CHECK-NEXT:    ld %s16, 16(, %s13)
812; CHECK-NEXT:    lvm %vm3, 2, %s16
813; CHECK-NEXT:    ld %s16, 24(, %s13)
814; CHECK-NEXT:    lvm %vm3, 3, %s16
815; CHECK-NEXT:    ld %s16, 32(, %s13)
816; CHECK-NEXT:    lvm %vm2, 0, %s16
817; CHECK-NEXT:    ld %s16, 40(, %s13)
818; CHECK-NEXT:    lvm %vm2, 1, %s16
819; CHECK-NEXT:    ld %s16, 48(, %s13)
820; CHECK-NEXT:    lvm %vm2, 2, %s16
821; CHECK-NEXT:    ld %s16, 56(, %s13)
822; CHECK-NEXT:    lvm %vm2, 3, %s16
823; CHECK-NEXT:    or %s0, 0, (0)1
824; CHECK-NEXT:    lea %s1, -2147483648
825; CHECK-NEXT:    and %s1, %s1, (32)0
826; CHECK-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
827; CHECK-NEXT:    ld %s2, 192(%s0, %s11)
828; CHECK-NEXT:    lea %s0, 8(, %s0)
829; CHECK-NEXT:    brne.l %s0, %s1, .LBB11_1
830; CHECK-NEXT:  # %bb.2:
831; CHECK-NEXT:    or %s11, 0, %s9
832; CHECK-NEXT:    ld %s10, 8(, %s11)
833; CHECK-NEXT:    ld %s9, (, %s11)
834; CHECK-NEXT:    b.l.t (, %s10)
835  %1 = alloca <512 x i1>, align 64
836  %2 = alloca [268435456 x i64], align 8
837  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
838  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
839  %3 = load volatile <512 x i1>, ptr %1, align 64
840  br label %5
841
8424:                                                ; preds = %5
843  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
844  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
845  ret <512 x i1> %3
846
8475:                                                ; preds = %0, %5
848  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
849  %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
850  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
851  %9 = add nuw nsw i64 %6, 1
852  %10 = icmp eq i64 %9, 268435456
853  br i1 %10, label %4, label %5, !llvm.loop !13
854}
855
856; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
857define fastcc <512 x i1> @load__vm512_stk_dyn(i64 noundef %0) {
858; CHECK-LABEL: load__vm512_stk_dyn:
859; CHECK:       # %bb.0:
860; CHECK-NEXT:    st %s9, (, %s11)
861; CHECK-NEXT:    st %s10, 8(, %s11)
862; CHECK-NEXT:    st %s17, 40(, %s11)
863; CHECK-NEXT:    or %s9, 0, %s11
864; CHECK-NEXT:    lea %s11, -320(, %s11)
865; CHECK-NEXT:    and %s11, %s11, (58)1
866; CHECK-NEXT:    or %s17, 0, %s11
867; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB12_2
868; CHECK-NEXT:  # %bb.1:
869; CHECK-NEXT:    ld %s61, 24(, %s14)
870; CHECK-NEXT:    or %s62, 0, %s0
871; CHECK-NEXT:    lea %s63, 315
872; CHECK-NEXT:    shm.l %s63, (%s61)
873; CHECK-NEXT:    shm.l %s8, 8(%s61)
874; CHECK-NEXT:    shm.l %s11, 16(%s61)
875; CHECK-NEXT:    monc
876; CHECK-NEXT:    or %s0, 0, %s62
877; CHECK-NEXT:  .LBB12_2:
878; CHECK-NEXT:    sll %s0, %s0, 6
879; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
880; CHECK-NEXT:    and %s1, %s1, (32)0
881; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
882; CHECK-NEXT:    bsic %s10, (, %s12)
883; CHECK-NEXT:    lea %s0, 240(, %s11)
884; CHECK-NEXT:    ld %s1, 56(, %s0)
885; CHECK-NEXT:    ld %s1, 48(, %s0)
886; CHECK-NEXT:    ld %s1, 40(, %s0)
887; CHECK-NEXT:    ld %s1, 32(, %s0)
888; CHECK-NEXT:    ld %s1, 24(, %s0)
889; CHECK-NEXT:    ld %s1, 16(, %s0)
890; CHECK-NEXT:    ld %s1, 8(, %s0)
891; CHECK-NEXT:    ld %s0, (, %s0)
892; CHECK-NEXT:    # implicit-def: $vmp1
893; CHECK-NEXT:    ld %s16, 256(, %s17)
894; CHECK-NEXT:    lvm %vm3, 0, %s16
895; CHECK-NEXT:    ld %s16, 264(, %s17)
896; CHECK-NEXT:    lvm %vm3, 1, %s16
897; CHECK-NEXT:    ld %s16, 272(, %s17)
898; CHECK-NEXT:    lvm %vm3, 2, %s16
899; CHECK-NEXT:    ld %s16, 280(, %s17)
900; CHECK-NEXT:    lvm %vm3, 3, %s16
901; CHECK-NEXT:    ld %s16, 288(, %s17)
902; CHECK-NEXT:    lvm %vm2, 0, %s16
903; CHECK-NEXT:    ld %s16, 296(, %s17)
904; CHECK-NEXT:    lvm %vm2, 1, %s16
905; CHECK-NEXT:    ld %s16, 304(, %s17)
906; CHECK-NEXT:    lvm %vm2, 2, %s16
907; CHECK-NEXT:    ld %s16, 312(, %s17)
908; CHECK-NEXT:    lvm %vm2, 3, %s16
909; CHECK-NEXT:    or %s11, 0, %s9
910; CHECK-NEXT:    ld %s17, 40(, %s11)
911; CHECK-NEXT:    ld %s10, 8(, %s11)
912; CHECK-NEXT:    ld %s9, (, %s11)
913; CHECK-NEXT:    b.l.t (, %s10)
914  %2 = alloca <512 x i1>, align 64
915  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
916  %3 = alloca <512 x i1>, i64 %0, align 8
917  %4 = load volatile <512 x i1>, ptr %3, align 64
918  %5 = load volatile <512 x i1>, ptr %2, align 64
919  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
920  ret <512 x i1> %5
921}
922
923; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
924define fastcc <512 x i1> @load__vm512_stk_dyn_align(i64 noundef %0) {
925; CHECK-LABEL: load__vm512_stk_dyn_align:
926; CHECK:       # %bb.0:
927; CHECK-NEXT:    st %s9, (, %s11)
928; CHECK-NEXT:    st %s10, 8(, %s11)
929; CHECK-NEXT:    st %s17, 40(, %s11)
930; CHECK-NEXT:    or %s9, 0, %s11
931; CHECK-NEXT:    lea %s11, -320(, %s11)
932; CHECK-NEXT:    and %s11, %s11, (59)1
933; CHECK-NEXT:    or %s17, 0, %s11
934; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB13_2
935; CHECK-NEXT:  # %bb.1:
936; CHECK-NEXT:    ld %s61, 24(, %s14)
937; CHECK-NEXT:    or %s62, 0, %s0
938; CHECK-NEXT:    lea %s63, 315
939; CHECK-NEXT:    shm.l %s63, (%s61)
940; CHECK-NEXT:    shm.l %s8, 8(%s61)
941; CHECK-NEXT:    shm.l %s11, 16(%s61)
942; CHECK-NEXT:    monc
943; CHECK-NEXT:    or %s0, 0, %s62
944; CHECK-NEXT:  .LBB13_2:
945; CHECK-NEXT:    sll %s0, %s0, 6
946; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
947; CHECK-NEXT:    and %s1, %s1, (32)0
948; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
949; CHECK-NEXT:    bsic %s10, (, %s12)
950; CHECK-NEXT:    lea %s0, 240(, %s11)
951; CHECK-NEXT:    ld %s1, 56(, %s0)
952; CHECK-NEXT:    ld %s1, 48(, %s0)
953; CHECK-NEXT:    ld %s1, 40(, %s0)
954; CHECK-NEXT:    ld %s1, 32(, %s0)
955; CHECK-NEXT:    ld %s1, 24(, %s0)
956; CHECK-NEXT:    ld %s1, 16(, %s0)
957; CHECK-NEXT:    ld %s1, 8(, %s0)
958; CHECK-NEXT:    ld %s0, (, %s0)
959; CHECK-NEXT:    # implicit-def: $vmp1
960; CHECK-NEXT:    ld %s16, 256(, %s17)
961; CHECK-NEXT:    lvm %vm3, 0, %s16
962; CHECK-NEXT:    ld %s16, 264(, %s17)
963; CHECK-NEXT:    lvm %vm3, 1, %s16
964; CHECK-NEXT:    ld %s16, 272(, %s17)
965; CHECK-NEXT:    lvm %vm3, 2, %s16
966; CHECK-NEXT:    ld %s16, 280(, %s17)
967; CHECK-NEXT:    lvm %vm3, 3, %s16
968; CHECK-NEXT:    ld %s16, 288(, %s17)
969; CHECK-NEXT:    lvm %vm2, 0, %s16
970; CHECK-NEXT:    ld %s16, 296(, %s17)
971; CHECK-NEXT:    lvm %vm2, 1, %s16
972; CHECK-NEXT:    ld %s16, 304(, %s17)
973; CHECK-NEXT:    lvm %vm2, 2, %s16
974; CHECK-NEXT:    ld %s16, 312(, %s17)
975; CHECK-NEXT:    lvm %vm2, 3, %s16
976; CHECK-NEXT:    or %s11, 0, %s9
977; CHECK-NEXT:    ld %s17, 40(, %s11)
978; CHECK-NEXT:    ld %s10, 8(, %s11)
979; CHECK-NEXT:    ld %s9, (, %s11)
980; CHECK-NEXT:    b.l.t (, %s10)
981  %2 = alloca <512 x i1>, align 32
982  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
983  %3 = alloca <512 x i1>, i64 %0, align 8
984  %4 = load volatile <512 x i1>, ptr %3, align 64
985  %5 = load volatile <512 x i1>, ptr %2, align 32
986  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
987  ret <512 x i1> %5
988}
989
990; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
991define fastcc <512 x i1> @load__vm512_stk_dyn_align2(i64 noundef %0) {
992; CHECK-LABEL: load__vm512_stk_dyn_align2:
993; CHECK:       # %bb.0:
994; CHECK-NEXT:    st %s9, (, %s11)
995; CHECK-NEXT:    st %s10, 8(, %s11)
996; CHECK-NEXT:    st %s17, 40(, %s11)
997; CHECK-NEXT:    or %s9, 0, %s11
998; CHECK-NEXT:    lea %s11, -384(, %s11)
999; CHECK-NEXT:    and %s11, %s11, (58)1
1000; CHECK-NEXT:    or %s17, 0, %s11
1001; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB14_2
1002; CHECK-NEXT:  # %bb.1:
1003; CHECK-NEXT:    ld %s61, 24(, %s14)
1004; CHECK-NEXT:    or %s62, 0, %s0
1005; CHECK-NEXT:    lea %s63, 315
1006; CHECK-NEXT:    shm.l %s63, (%s61)
1007; CHECK-NEXT:    shm.l %s8, 8(%s61)
1008; CHECK-NEXT:    shm.l %s11, 16(%s61)
1009; CHECK-NEXT:    monc
1010; CHECK-NEXT:    or %s0, 0, %s62
1011; CHECK-NEXT:  .LBB14_2:
1012; CHECK-NEXT:    sll %s0, %s0, 6
1013; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
1014; CHECK-NEXT:    and %s1, %s1, (32)0
1015; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
1016; CHECK-NEXT:    bsic %s10, (, %s12)
1017; CHECK-NEXT:    lea %s0, 240(, %s11)
1018; CHECK-NEXT:    ld %s1, 56(, %s0)
1019; CHECK-NEXT:    ld %s1, 48(, %s0)
1020; CHECK-NEXT:    ld %s1, 40(, %s0)
1021; CHECK-NEXT:    ld %s1, 32(, %s0)
1022; CHECK-NEXT:    ld %s1, 24(, %s0)
1023; CHECK-NEXT:    ld %s1, 16(, %s0)
1024; CHECK-NEXT:    ld %s1, 8(, %s0)
1025; CHECK-NEXT:    ld %s0, (, %s0)
1026; CHECK-NEXT:    # implicit-def: $vmp1
1027; CHECK-NEXT:    ld %s16, 320(, %s17)
1028; CHECK-NEXT:    lvm %vm3, 0, %s16
1029; CHECK-NEXT:    ld %s16, 328(, %s17)
1030; CHECK-NEXT:    lvm %vm3, 1, %s16
1031; CHECK-NEXT:    ld %s16, 336(, %s17)
1032; CHECK-NEXT:    lvm %vm3, 2, %s16
1033; CHECK-NEXT:    ld %s16, 344(, %s17)
1034; CHECK-NEXT:    lvm %vm3, 3, %s16
1035; CHECK-NEXT:    ld %s16, 352(, %s17)
1036; CHECK-NEXT:    lvm %vm2, 0, %s16
1037; CHECK-NEXT:    ld %s16, 360(, %s17)
1038; CHECK-NEXT:    lvm %vm2, 1, %s16
1039; CHECK-NEXT:    ld %s16, 368(, %s17)
1040; CHECK-NEXT:    lvm %vm2, 2, %s16
1041; CHECK-NEXT:    ld %s16, 376(, %s17)
1042; CHECK-NEXT:    lvm %vm2, 3, %s16
1043; CHECK-NEXT:    # implicit-def: $vmp2
1044; CHECK-NEXT:    ld %s16, 256(, %s17)
1045; CHECK-NEXT:    lvm %vm5, 0, %s16
1046; CHECK-NEXT:    ld %s16, 264(, %s17)
1047; CHECK-NEXT:    lvm %vm5, 1, %s16
1048; CHECK-NEXT:    ld %s16, 272(, %s17)
1049; CHECK-NEXT:    lvm %vm5, 2, %s16
1050; CHECK-NEXT:    ld %s16, 280(, %s17)
1051; CHECK-NEXT:    lvm %vm5, 3, %s16
1052; CHECK-NEXT:    ld %s16, 288(, %s17)
1053; CHECK-NEXT:    lvm %vm4, 0, %s16
1054; CHECK-NEXT:    ld %s16, 296(, %s17)
1055; CHECK-NEXT:    lvm %vm4, 1, %s16
1056; CHECK-NEXT:    ld %s16, 304(, %s17)
1057; CHECK-NEXT:    lvm %vm4, 2, %s16
1058; CHECK-NEXT:    ld %s16, 312(, %s17)
1059; CHECK-NEXT:    lvm %vm4, 3, %s16
1060; CHECK-NEXT:    or %s11, 0, %s9
1061; CHECK-NEXT:    ld %s17, 40(, %s11)
1062; CHECK-NEXT:    ld %s10, 8(, %s11)
1063; CHECK-NEXT:    ld %s9, (, %s11)
1064; CHECK-NEXT:    b.l.t (, %s10)
1065  %2 = alloca <512 x i1>, align 32
1066  %3 = alloca <512 x i1>, align 64
1067  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
1068  %4 = alloca <512 x i1>, i64 %0, align 8
1069  %5 = load volatile <512 x i1>, ptr %4, align 64
1070  %6 = load volatile <512 x i1>, ptr %2, align 32
1071  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
1072  %7 = load volatile <512 x i1>, ptr %3, align 64
1073  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
1074  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
1075  ret <512 x i1> %6
1076}
1077
1078; Function Attrs: nounwind
1079define fastcc <512 x i1> @load__vm512_stk_dyn_align_spill(i64 noundef %0) {
1080; CHECK-LABEL: load__vm512_stk_dyn_align_spill:
1081; CHECK:       # %bb.0:
1082; CHECK-NEXT:    st %s9, (, %s11)
1083; CHECK-NEXT:    st %s10, 8(, %s11)
1084; CHECK-NEXT:    st %s17, 40(, %s11)
1085; CHECK-NEXT:    or %s9, 0, %s11
1086; CHECK-NEXT:    lea %s11, -384(, %s11)
1087; CHECK-NEXT:    and %s11, %s11, (59)1
1088; CHECK-NEXT:    or %s17, 0, %s11
1089; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB15_2
1090; CHECK-NEXT:  # %bb.1:
1091; CHECK-NEXT:    ld %s61, 24(, %s14)
1092; CHECK-NEXT:    or %s62, 0, %s0
1093; CHECK-NEXT:    lea %s63, 315
1094; CHECK-NEXT:    shm.l %s63, (%s61)
1095; CHECK-NEXT:    shm.l %s8, 8(%s61)
1096; CHECK-NEXT:    shm.l %s11, 16(%s61)
1097; CHECK-NEXT:    monc
1098; CHECK-NEXT:    or %s0, 0, %s62
1099; CHECK-NEXT:  .LBB15_2:
1100; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
1101; CHECK-NEXT:    or %s18, 0, %s0
1102; CHECK-NEXT:    sll %s0, %s0, 6
1103; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
1104; CHECK-NEXT:    and %s1, %s1, (32)0
1105; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
1106; CHECK-NEXT:    bsic %s10, (, %s12)
1107; CHECK-NEXT:    lea %s0, 240(, %s11)
1108; CHECK-NEXT:    ld %s1, 56(, %s0)
1109; CHECK-NEXT:    ld %s1, 48(, %s0)
1110; CHECK-NEXT:    ld %s1, 40(, %s0)
1111; CHECK-NEXT:    ld %s1, 32(, %s0)
1112; CHECK-NEXT:    ld %s1, 24(, %s0)
1113; CHECK-NEXT:    ld %s1, 16(, %s0)
1114; CHECK-NEXT:    ld %s1, 8(, %s0)
1115; CHECK-NEXT:    ld %s0, (, %s0)
1116; CHECK-NEXT:    # implicit-def: $vmp1
1117; CHECK-NEXT:    ld %s16, 320(, %s17)
1118; CHECK-NEXT:    lvm %vm3, 0, %s16
1119; CHECK-NEXT:    ld %s16, 328(, %s17)
1120; CHECK-NEXT:    lvm %vm3, 1, %s16
1121; CHECK-NEXT:    ld %s16, 336(, %s17)
1122; CHECK-NEXT:    lvm %vm3, 2, %s16
1123; CHECK-NEXT:    ld %s16, 344(, %s17)
1124; CHECK-NEXT:    lvm %vm3, 3, %s16
1125; CHECK-NEXT:    ld %s16, 352(, %s17)
1126; CHECK-NEXT:    lvm %vm2, 0, %s16
1127; CHECK-NEXT:    ld %s16, 360(, %s17)
1128; CHECK-NEXT:    lvm %vm2, 1, %s16
1129; CHECK-NEXT:    ld %s16, 368(, %s17)
1130; CHECK-NEXT:    lvm %vm2, 2, %s16
1131; CHECK-NEXT:    ld %s16, 376(, %s17)
1132; CHECK-NEXT:    lvm %vm2, 3, %s16
1133; CHECK-NEXT:    svm %s16, %vm3, 0
1134; CHECK-NEXT:    st %s16, 256(, %s17)
1135; CHECK-NEXT:    svm %s16, %vm3, 1
1136; CHECK-NEXT:    st %s16, 264(, %s17)
1137; CHECK-NEXT:    svm %s16, %vm3, 2
1138; CHECK-NEXT:    st %s16, 272(, %s17)
1139; CHECK-NEXT:    svm %s16, %vm3, 3
1140; CHECK-NEXT:    st %s16, 280(, %s17)
1141; CHECK-NEXT:    svm %s16, %vm2, 0
1142; CHECK-NEXT:    st %s16, 288(, %s17)
1143; CHECK-NEXT:    svm %s16, %vm2, 1
1144; CHECK-NEXT:    st %s16, 296(, %s17)
1145; CHECK-NEXT:    svm %s16, %vm2, 2
1146; CHECK-NEXT:    st %s16, 304(, %s17)
1147; CHECK-NEXT:    svm %s16, %vm2, 3
1148; CHECK-NEXT:    st %s16, 312(, %s17) # 64-byte Folded Spill
1149; CHECK-NEXT:    lea %s0, dummy@lo
1150; CHECK-NEXT:    and %s0, %s0, (32)0
1151; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
1152; CHECK-NEXT:    bsic %s10, (, %s12)
1153; CHECK-NEXT:    lea %s0, pass@lo
1154; CHECK-NEXT:    and %s0, %s0, (32)0
1155; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
1156; CHECK-NEXT:    or %s0, 0, %s18
1157; CHECK-NEXT:    bsic %s10, (, %s12)
1158; CHECK-NEXT:    # implicit-def: $vmp1
1159; CHECK-NEXT:    ld %s16, 256(, %s17)
1160; CHECK-NEXT:    lvm %vm3, 0, %s16
1161; CHECK-NEXT:    ld %s16, 264(, %s17)
1162; CHECK-NEXT:    lvm %vm3, 1, %s16
1163; CHECK-NEXT:    ld %s16, 272(, %s17)
1164; CHECK-NEXT:    lvm %vm3, 2, %s16
1165; CHECK-NEXT:    ld %s16, 280(, %s17)
1166; CHECK-NEXT:    lvm %vm3, 3, %s16
1167; CHECK-NEXT:    ld %s16, 288(, %s17)
1168; CHECK-NEXT:    lvm %vm2, 0, %s16
1169; CHECK-NEXT:    ld %s16, 296(, %s17)
1170; CHECK-NEXT:    lvm %vm2, 1, %s16
1171; CHECK-NEXT:    ld %s16, 304(, %s17)
1172; CHECK-NEXT:    lvm %vm2, 2, %s16
1173; CHECK-NEXT:    ld %s16, 312(, %s17) # 64-byte Folded Reload
1174; CHECK-NEXT:    lvm %vm2, 3, %s16
1175; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
1176; CHECK-NEXT:    or %s11, 0, %s9
1177; CHECK-NEXT:    ld %s17, 40(, %s11)
1178; CHECK-NEXT:    ld %s10, 8(, %s11)
1179; CHECK-NEXT:    ld %s9, (, %s11)
1180; CHECK-NEXT:    b.l.t (, %s10)
1181  %2 = alloca <512 x i1>, align 32
1182  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
1183  %3 = alloca <512 x i1>, i64 %0, align 8
1184  %4 = load volatile <512 x i1>, ptr %3, align 64
1185  %5 = load volatile <512 x i1>, ptr %2, align 32
1186  tail call fastcc void @dummy()
1187  tail call fastcc void @pass(i64 noundef %0)
1188  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
1189  ret <512 x i1> %5
1190}
1191
1192!2 = !{!"clang version 15.0.0 (git@kaz7.github.com:sx-aurora-dev/llvm-project.git 50263c9e9cc3714bcd816eaea8822d3e010a0f19)"}
1193!3 = !{!4, !4, i64 0}
1194!4 = !{!"long", !5, i64 0}
1195!5 = !{!"omnipotent char", !6, i64 0}
1196!6 = !{!"Simple C/C++ TBAA"}
1197!7 = distinct !{!7, !8}
1198!8 = !{!"llvm.loop.mustprogress"}
1199!9 = distinct !{!9, !8}
1200!10 = distinct !{!10, !8}
1201!11 = distinct !{!11, !8}
1202!12 = distinct !{!12, !8}
1203!13 = distinct !{!13, !8}
1204