xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --arm-memtransfer-tploop=allow -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
3
4; Check that WLSTP loop is not generated for alignment < 4
5; void test1(char* dest, char* src, int n){
6;    memcpy(dest, src, n);
7; }
8
9declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
10declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
11declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
12
13define void @test1(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n){
14; CHECK-LABEL: test1:
15; CHECK:       @ %bb.0: @ %entry
16; CHECK-NEXT:    .save {r7, lr}
17; CHECK-NEXT:    push {r7, lr}
18; CHECK-NEXT:    bl __aeabi_memcpy
19; CHECK-NEXT:    pop {r7, pc}
20entry:
21  call void @llvm.memcpy.p0.p0.i32(ptr align 1 %X, ptr align 1 %Y, i32 %n, i1 false)
22  ret void
23}
24
25
26; Check that WLSTP loop is generated for alignment >= 4
27; void test2(int* restrict X, int* restrict Y, int n){
28;     memcpy(X, Y, n);
29; }
30
31define void @test2(ptr noalias %X, ptr noalias readonly %Y, i32 %n){
32; CHECK-LABEL: test2:
33; CHECK:       @ %bb.0: @ %entry
34; CHECK-NEXT:    .save {r7, lr}
35; CHECK-NEXT:    push {r7, lr}
36; CHECK-NEXT:    wlstp.8 lr, r2, .LBB1_2
37; CHECK-NEXT:  .LBB1_1: @ =>This Inner Loop Header: Depth=1
38; CHECK-NEXT:    vldrb.u8 q0, [r1], #16
39; CHECK-NEXT:    vstrb.8 q0, [r0], #16
40; CHECK-NEXT:    letp lr, .LBB1_1
41; CHECK-NEXT:  .LBB1_2: @ %entry
42; CHECK-NEXT:    pop {r7, pc}
43entry:
44  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
45  ret void
46}
47
48
49; Checks that transform handles some arithmetic on the input arguments.
50; void test3(int* restrict X, int* restrict Y, int n)
51; {
52;     memcpy(X+2, Y+3, (n*2)+10);
53; }
54
55define void @test3(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
56; CHECK-LABEL: test3:
57; CHECK:       @ %bb.0: @ %entry
58; CHECK-NEXT:    .save {r7, lr}
59; CHECK-NEXT:    push {r7, lr}
60; CHECK-NEXT:    movs r3, #10
61; CHECK-NEXT:    add.w r2, r3, r2, lsl #1
62; CHECK-NEXT:    adds r1, #12
63; CHECK-NEXT:    adds r0, #8
64; CHECK-NEXT:    wlstp.8 lr, r2, .LBB2_2
65; CHECK-NEXT:  .LBB2_1: @ =>This Inner Loop Header: Depth=1
66; CHECK-NEXT:    vldrb.u8 q0, [r1], #16
67; CHECK-NEXT:    vstrb.8 q0, [r0], #16
68; CHECK-NEXT:    letp lr, .LBB2_1
69; CHECK-NEXT:  .LBB2_2: @ %entry
70; CHECK-NEXT:    pop {r7, pc}
71entry:
72  %add.ptr = getelementptr inbounds i32, ptr %X, i32 2
73  %add.ptr1 = getelementptr inbounds i32, ptr %Y, i32 3
74  %mul = shl nsw i32 %n, 1
75  %add = add nsw i32 %mul, 10
76  call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 %add.ptr, ptr nonnull align 4 %add.ptr1, i32 %add, i1 false)
77  ret void
78}
79
80
81; Checks that transform handles for loops that are implicitly converted to mempcy
82; void test4(int* restrict X, int* restrict Y, int n){
83;     for(int i = 0; i < n; ++i){
84;         X[i] = Y[i];
85;     }
86; }
87
88define void @test4(ptr noalias %X, ptr noalias readonly %Y, i32 %n) {
89; CHECK-LABEL: test4:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    cmp r2, #1
92; CHECK-NEXT:    it lt
93; CHECK-NEXT:    bxlt lr
94; CHECK-NEXT:  .LBB3_1: @ %for.body.preheader
95; CHECK-NEXT:    .save {r7, lr}
96; CHECK-NEXT:    push {r7, lr}
97; CHECK-NEXT:    wlstp.8 lr, r2, .LBB3_3
98; CHECK-NEXT:  .LBB3_2: @ =>This Inner Loop Header: Depth=1
99; CHECK-NEXT:    vldrb.u8 q0, [r1], #16
100; CHECK-NEXT:    vstrb.8 q0, [r0], #16
101; CHECK-NEXT:    letp lr, .LBB3_2
102; CHECK-NEXT:  .LBB3_3: @ %for.body.preheader
103; CHECK-NEXT:    pop.w {r7, lr}
104; CHECK-NEXT:    bx lr
105entry:
106  %cmp6 = icmp sgt i32 %n, 0
107  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
108
109for.body.preheader:                               ; preds = %entry
110  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
111  br label %for.cond.cleanup
112
113for.cond.cleanup:                                 ; preds = %for.body.preheader, %entry
114  ret void
115}
116
117; Checks that transform can handle > i32 size inputs
118define void @test5(ptr noalias %X, ptr noalias %Y, i64 %n){
119; CHECK-LABEL: test5:
120; CHECK:       @ %bb.0:
121; CHECK-NEXT:    .save {r7, lr}
122; CHECK-NEXT:    push {r7, lr}
123; CHECK-NEXT:    wlstp.8 lr, r2, .LBB4_2
124; CHECK-NEXT:  .LBB4_1: @ =>This Inner Loop Header: Depth=1
125; CHECK-NEXT:    vldrb.u8 q0, [r1], #16
126; CHECK-NEXT:    vstrb.8 q0, [r0], #16
127; CHECK-NEXT:    letp lr, .LBB4_1
128; CHECK-NEXT:  .LBB4_2:
129; CHECK-NEXT:    pop {r7, pc}
130    call void @llvm.memcpy.p0.p0.i64(ptr align 4 %X, ptr align 4 %Y, i64 %n, i1 false)
131    ret void
132}
133
134; Checks the transform is applied for constant size inputs below a certain threshold (128 in this case)
135define void @test6(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
136; CHECK-LABEL: test6:
137; CHECK:       @ %bb.0: @ %entry
138; CHECK-NEXT:    .save {r7, lr}
139; CHECK-NEXT:    push {r7, lr}
140; CHECK-NEXT:    movs r2, #127
141; CHECK-NEXT:    wlstp.8 lr, r2, .LBB5_2
142; CHECK-NEXT:  .LBB5_1: @ =>This Inner Loop Header: Depth=1
143; CHECK-NEXT:    vldrb.u8 q0, [r1], #16
144; CHECK-NEXT:    vstrb.8 q0, [r0], #16
145; CHECK-NEXT:    letp lr, .LBB5_1
146; CHECK-NEXT:  .LBB5_2: @ %entry
147; CHECK-NEXT:    pop {r7, pc}
148entry:
149  call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 4 dereferenceable(127) %X, ptr noundef nonnull align 4 dereferenceable(127) %Y, i32 127, i1 false)
150  ret void
151}
152
153; Checks the transform is NOT applied for constant size inputs above a certain threshold (128 in this case)
154define void @test7(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
155; CHECK-LABEL: test7:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    .save {r7, lr}
158; CHECK-NEXT:    push {r7, lr}
159; CHECK-NEXT:    movs r2, #128
160; CHECK-NEXT:    bl __aeabi_memcpy4
161; CHECK-NEXT:    pop {r7, pc}
162entry:
163  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 128, i1 false)
164  ret void
165}
166
167; Checks the transform is NOT applied for constant size inputs below a certain threshold (64 in this case)
168define void @test8(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
169; CHECK-LABEL: test8:
170; CHECK:       @ %bb.0: @ %entry
171; CHECK-NEXT:    .save {r4, lr}
172; CHECK-NEXT:    push {r4, lr}
173; CHECK-NEXT:    ldm.w r1!, {r2, r3, r4, r12, lr}
174; CHECK-NEXT:    stm.w r0!, {r2, r3, r4, r12, lr}
175; CHECK-NEXT:    ldm.w r1!, {r2, r3, r4, r12, lr}
176; CHECK-NEXT:    stm.w r0!, {r2, r3, r4, r12, lr}
177; CHECK-NEXT:    ldm.w r1, {r2, r3, r4, r12, lr}
178; CHECK-NEXT:    stm.w r0, {r2, r3, r4, r12, lr}
179; CHECK-NEXT:    pop {r4, pc}
180entry:
181  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 60, i1 false)
182  ret void
183}
184
185; Checks the transform is NOT applied (regardless of alignment) when optimizations are disabled
186define void @test9(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) #0 {
187; CHECK-LABEL: test9:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    .save {r7, lr}
190; CHECK-NEXT:    push {r7, lr}
191; CHECK-NEXT:    bl __aeabi_memcpy4
192; CHECK-NEXT:    pop {r7, pc}
193entry:
194  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
195  ret void
196}
197
198; Checks the transform is NOT applied (regardless of alignment) when optimization for size is on (-Os or -Oz)
199define void @test10(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) #1 {
200; CHECK-LABEL: test10:
201; CHECK:       @ %bb.0: @ %entry
202; CHECK-NEXT:    .save {r7, lr}
203; CHECK-NEXT:    push {r7, lr}
204; CHECK-NEXT:    bl __aeabi_memcpy4
205; CHECK-NEXT:    pop {r7, pc}
206entry:
207  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
208  ret void
209}
210
211define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
212; CHECK-LABEL: test11:
213; CHECK:       @ %bb.0: @ %entry
214; CHECK-NEXT:    cmp.w r2, #-1
215; CHECK-NEXT:    it gt
216; CHECK-NEXT:    bxgt lr
217; CHECK-NEXT:  .LBB10_1: @ %prehead
218; CHECK-NEXT:    .save {r4, lr}
219; CHECK-NEXT:    push {r4, lr}
220; CHECK-NEXT:    mov r12, r1
221; CHECK-NEXT:    mov r4, r0
222; CHECK-NEXT:    wlstp.8 lr, r2, .LBB10_3
223; CHECK-NEXT:  .LBB10_2: @ =>This Inner Loop Header: Depth=1
224; CHECK-NEXT:    vldrb.u8 q0, [r12], #16
225; CHECK-NEXT:    vstrb.8 q0, [r4], #16
226; CHECK-NEXT:    letp lr, .LBB10_2
227; CHECK-NEXT:  .LBB10_3: @ %for.body
228; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
229; CHECK-NEXT:    ldrb r3, [r0], #1
230; CHECK-NEXT:    subs r2, #2
231; CHECK-NEXT:    strb r3, [r1], #1
232; CHECK-NEXT:    bne .LBB10_3
233; CHECK-NEXT:  @ %bb.4:
234; CHECK-NEXT:    pop.w {r4, lr}
235; CHECK-NEXT:    bx lr
236entry:
237  %cmp6 = icmp slt i32 %n, 0
238  br i1 %cmp6, label %prehead, label %for.cond.cleanup
239
240prehead:                                          ; preds = %entry
241  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %x, ptr align 4 %y, i32 %n, i1 false)
242  br label %for.body
243
244for.body:                                         ; preds = %for.body, %prehead
245  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %prehead ]
246  %x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %prehead ]
247  %y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %prehead ]
248  %add.ptr = getelementptr inbounds i8, ptr %x.addr.08, i32 1
249  %add.ptr1 = getelementptr inbounds i8, ptr %y.addr.07, i32 1
250  %l = load i8, ptr %x.addr.08, align 1
251  store i8 %l, ptr %y.addr.07, align 1
252  %inc = add nuw nsw i32 %i.09, 2
253  %exitcond.not = icmp eq i32 %inc, %n
254  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
255
256for.cond.cleanup:                                 ; preds = %entry
257  ret void
258}
259
260; Check that WLSTP loop is generated for simplest case of align = 1
261define void @test12(ptr %X, i8 zeroext %c, i32 %n) {
262; CHECK-LABEL: test12:
263; CHECK:       @ %bb.0: @ %entry
264; CHECK-NEXT:    .save {r7, lr}
265; CHECK-NEXT:    push {r7, lr}
266; CHECK-NEXT:    vdup.8 q0, r1
267; CHECK-NEXT:    wlstp.8 lr, r2, .LBB11_2
268; CHECK-NEXT:  .LBB11_1: @ =>This Inner Loop Header: Depth=1
269; CHECK-NEXT:    vstrb.8 q0, [r0], #16
270; CHECK-NEXT:    letp lr, .LBB11_1
271; CHECK-NEXT:  .LBB11_2: @ %entry
272; CHECK-NEXT:    pop {r7, pc}
273entry:
274  call void @llvm.memset.p0.i32(ptr align 1 %X, i8 %c, i32 %n, i1 false)
275  ret void
276}
277
278
279; Check that WLSTP loop is generated for alignment >= 4
280define void @test13(ptr %X, i8 zeroext %c, i32 %n) {
281; CHECK-LABEL: test13:
282; CHECK:       @ %bb.0: @ %entry
283; CHECK-NEXT:    .save {r7, lr}
284; CHECK-NEXT:    push {r7, lr}
285; CHECK-NEXT:    vdup.8 q0, r1
286; CHECK-NEXT:    wlstp.8 lr, r2, .LBB12_2
287; CHECK-NEXT:  .LBB12_1: @ =>This Inner Loop Header: Depth=1
288; CHECK-NEXT:    vstrb.8 q0, [r0], #16
289; CHECK-NEXT:    letp lr, .LBB12_1
290; CHECK-NEXT:  .LBB12_2: @ %entry
291; CHECK-NEXT:    pop {r7, pc}
292entry:
293  call void @llvm.memset.p0.i32(ptr align 4 %X, i8 %c, i32 %n, i1 false)
294  ret void
295}
296
297define void @twoloops(ptr %X, i32 %n, i32 %m) {
298; CHECK-LABEL: twoloops:
299; CHECK:       @ %bb.0: @ %entry
300; CHECK-NEXT:    .save {r7, lr}
301; CHECK-NEXT:    push {r7, lr}
302; CHECK-NEXT:    vmov.i32 q0, #0x0
303; CHECK-NEXT:    mov r3, r0
304; CHECK-NEXT:    wlstp.8 lr, r2, .LBB13_2
305; CHECK-NEXT:  .LBB13_1: @ =>This Inner Loop Header: Depth=1
306; CHECK-NEXT:    vstrb.8 q0, [r3], #16
307; CHECK-NEXT:    letp lr, .LBB13_1
308; CHECK-NEXT:  .LBB13_2: @ %entry
309; CHECK-NEXT:    wlstp.8 lr, r2, .LBB13_4
310; CHECK-NEXT:  .LBB13_3: @ =>This Inner Loop Header: Depth=1
311; CHECK-NEXT:    vstrb.8 q0, [r0], #16
312; CHECK-NEXT:    letp lr, .LBB13_3
313; CHECK-NEXT:  .LBB13_4: @ %entry
314; CHECK-NEXT:    pop {r7, pc}
315entry:
316  call void @llvm.memset.p0.i32(ptr align 4 %X, i8 0, i32 %m, i1 false)
317  call void @llvm.memset.p0.i32(ptr align 4 %X, i8 0, i32 %m, i1 false)
318  ret void
319}
320
321
322; Checks that transform correctly handles input with some arithmetic on input arguments.
323; void test14(int* X, char c, int n)
324; {
325;     memset(X+2, c, (n*2)+10);
326; }
327
328define void @test14(ptr %X, i8 zeroext %c, i32 %n) {
329; CHECK-LABEL: test14:
330; CHECK:       @ %bb.0: @ %entry
331; CHECK-NEXT:    .save {r7, lr}
332; CHECK-NEXT:    push {r7, lr}
333; CHECK-NEXT:    movs r3, #10
334; CHECK-NEXT:    add.w r2, r3, r2, lsl #1
335; CHECK-NEXT:    vdup.8 q0, r1
336; CHECK-NEXT:    adds r0, #8
337; CHECK-NEXT:    wlstp.8 lr, r2, .LBB14_2
338; CHECK-NEXT:  .LBB14_1: @ =>This Inner Loop Header: Depth=1
339; CHECK-NEXT:    vstrb.8 q0, [r0], #16
340; CHECK-NEXT:    letp lr, .LBB14_1
341; CHECK-NEXT:  .LBB14_2: @ %entry
342; CHECK-NEXT:    pop {r7, pc}
343entry:
344  %add.ptr = getelementptr inbounds i32, ptr %X, i32 2
345  %mul = shl nsw i32 %n, 1
346  %add = add nsw i32 %mul, 10
347  call void @llvm.memset.p0.i32(ptr nonnull align 4 %add.ptr, i8 %c, i32 %add, i1 false)
348  ret void
349}
350
351
352
353
354; Checks that transform handles for-loops (that get implicitly converted to memset)
355; void test15(int* X, char Y, int n){
356;     for(int i = 0; i < n; ++i){
357;         X[i] = c;
358;     }
359; }
360
361define void @test15(ptr nocapture %X, i8 zeroext %c, i32 %n) {
362; CHECK-LABEL: test15:
363; CHECK:       @ %bb.0: @ %entry
364; CHECK-NEXT:    cmp r2, #1
365; CHECK-NEXT:    it lt
366; CHECK-NEXT:    bxlt lr
367; CHECK-NEXT:  .LBB15_1: @ %for.body.preheader
368; CHECK-NEXT:    .save {r7, lr}
369; CHECK-NEXT:    push {r7, lr}
370; CHECK-NEXT:    vdup.8 q0, r1
371; CHECK-NEXT:    wlstp.8 lr, r2, .LBB15_3
372; CHECK-NEXT:  .LBB15_2: @ =>This Inner Loop Header: Depth=1
373; CHECK-NEXT:    vstrb.8 q0, [r0], #16
374; CHECK-NEXT:    letp lr, .LBB15_2
375; CHECK-NEXT:  .LBB15_3: @ %for.body.preheader
376; CHECK-NEXT:    pop.w {r7, lr}
377; CHECK-NEXT:    bx lr
378entry:
379  %cmp4 = icmp sgt i32 %n, 0
380  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
381
382for.body.preheader:                               ; preds = %entry
383  call void @llvm.memset.p0.i32(ptr align 4 %X, i8 %c, i32 %n, i1 false)
384  br label %for.cond.cleanup
385
386for.cond.cleanup:                                 ; preds = %for.body.preheader, %entry
387  ret void
388}
389
390; Checks that transform handles case with 0 as src value. No difference is expected.
391define void @test16(ptr %X, i8 zeroext %c, i32 %n) {
392; CHECK-LABEL: test16:
393; CHECK:       @ %bb.0: @ %entry
394; CHECK-NEXT:    .save {r7, lr}
395; CHECK-NEXT:    push {r7, lr}
396; CHECK-NEXT:    vmov.i32 q0, #0x0
397; CHECK-NEXT:    wlstp.8 lr, r2, .LBB16_2
398; CHECK-NEXT:  .LBB16_1: @ =>This Inner Loop Header: Depth=1
399; CHECK-NEXT:    vstrb.8 q0, [r0], #16
400; CHECK-NEXT:    letp lr, .LBB16_1
401; CHECK-NEXT:  .LBB16_2: @ %entry
402; CHECK-NEXT:    pop {r7, pc}
403entry:
404  call void @llvm.memset.p0.i32(ptr align 4 %X, i8 0, i32 %n, i1 false)
405  ret void
406}
407
408define void @csprlive(ptr noalias %X, ptr noalias readonly %Y, i32 %n) {
409; CHECK-LABEL: csprlive:
410; CHECK:       @ %bb.0: @ %entry
411; CHECK-NEXT:    .save {r7, lr}
412; CHECK-NEXT:    push {r7, lr}
413; CHECK-NEXT:    wlstp.8 lr, r2, .LBB17_2
414; CHECK-NEXT:  .LBB17_1: @ =>This Inner Loop Header: Depth=1
415; CHECK-NEXT:    vldrb.u8 q0, [r1], #16
416; CHECK-NEXT:    vstrb.8 q0, [r0], #16
417; CHECK-NEXT:    letp lr, .LBB17_1
418; CHECK-NEXT:  .LBB17_2: @ %entry
419; CHECK-NEXT:    bl other
420; CHECK-NEXT:    pop {r7, pc}
421entry:
422  %cmp6 = icmp sgt i32 %n, 0
423  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
424  br i1 %cmp6, label %if, label %else
425
426if:
427  call void @other()
428  br label %cleanup
429
430else:
431  call void @other()
432  br label %cleanup
433
434cleanup:
435  ret void
436}
437
438declare void @other()
439
440@arr_56 = external dso_local local_unnamed_addr global [21 x [16 x [11 x i8]]], align 1
441define void @multilooped_exit(i32 %b) {
442; CHECK-LABEL: multilooped_exit:
443; CHECK:       @ %bb.0: @ %entry
444; CHECK-NEXT:    cmp r0, #1
445; CHECK-NEXT:    it lt
446; CHECK-NEXT:    bxlt lr
447; CHECK-NEXT:  .LBB18_1: @ %loop.preheader
448; CHECK-NEXT:    .save {r4, lr}
449; CHECK-NEXT:    push {r4, lr}
450; CHECK-NEXT:    mov.w r4, #-1
451; CHECK-NEXT:    vmov.i32 q0, #0x0
452; CHECK-NEXT:    b .LBB18_3
453; CHECK-NEXT:  .LBB18_2: @ %loop
454; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
455; CHECK-NEXT:    adds r4, #1
456; CHECK-NEXT:    cmp.w r4, #1024
457; CHECK-NEXT:    bge .LBB18_12
458; CHECK-NEXT:  .LBB18_3: @ %loop
459; CHECK-NEXT:    @ =>This Loop Header: Depth=1
460; CHECK-NEXT:    @ Child Loop BB18_4 Depth 2
461; CHECK-NEXT:    @ Child Loop BB18_6 Depth 2
462; CHECK-NEXT:    @ Child Loop BB18_8 Depth 2
463; CHECK-NEXT:    @ Child Loop BB18_11 Depth 2
464; CHECK-NEXT:    movw r3, :lower16:arr_56
465; CHECK-NEXT:    add.w r1, r0, #15
466; CHECK-NEXT:    movt r3, :upper16:arr_56
467; CHECK-NEXT:    lsr.w r12, r1, #4
468; CHECK-NEXT:    mov r2, r3
469; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_5
470; CHECK-NEXT:  .LBB18_4: @ Parent Loop BB18_3 Depth=1
471; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
472; CHECK-NEXT:    vstrb.8 q0, [r2], #16
473; CHECK-NEXT:    letp lr, .LBB18_4
474; CHECK-NEXT:  .LBB18_5: @ %loop
475; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
476; CHECK-NEXT:    mov r2, r3
477; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_7
478; CHECK-NEXT:  .LBB18_6: @ Parent Loop BB18_3 Depth=1
479; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
480; CHECK-NEXT:    vstrb.8 q0, [r2], #16
481; CHECK-NEXT:    letp lr, .LBB18_6
482; CHECK-NEXT:  .LBB18_7: @ %loop
483; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
484; CHECK-NEXT:    mov r2, r3
485; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_9
486; CHECK-NEXT:  .LBB18_8: @ Parent Loop BB18_3 Depth=1
487; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
488; CHECK-NEXT:    vstrb.8 q0, [r2], #16
489; CHECK-NEXT:    letp lr, .LBB18_8
490; CHECK-NEXT:  .LBB18_9: @ %loop
491; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
492; CHECK-NEXT:    cmp.w r12, #0
493; CHECK-NEXT:    beq .LBB18_2
494; CHECK-NEXT:  @ %bb.10: @ %loop
495; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
496; CHECK-NEXT:    dlstp.8 lr, r0
497; CHECK-NEXT:  .LBB18_11: @ Parent Loop BB18_3 Depth=1
498; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
499; CHECK-NEXT:    vstrb.8 q0, [r3], #16
500; CHECK-NEXT:    letp lr, .LBB18_11
501; CHECK-NEXT:    b .LBB18_2
502; CHECK-NEXT:  .LBB18_12:
503; CHECK-NEXT:    pop.w {r4, lr}
504; CHECK-NEXT:    bx lr
505entry:
506  %cmp8 = icmp sgt i32 %b, 0
507  br i1 %cmp8, label %loop, label %exit
508
509loop:
510  %p = phi i32 [ 0, %entry ], [ %inc, %loop ]
511  call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
512  call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
513  call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
514  call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
515  %inc = add i32 %p, 1
516  %c = icmp slt i32 %p, 1024
517  br i1 %c, label %loop, label %exit
518
519exit:
520  ret void
521}
522
523@arr_21 = external dso_local local_unnamed_addr global [17 x [12 x [19 x i16]]], align 2
524@arr_20 = external dso_local local_unnamed_addr global [17 x [12 x [19 x i64]]], align 8
525@arr_22 = external dso_local local_unnamed_addr global [17 x [12 x [19 x i16]]], align 2
526define i32 @reverted(i1 zeroext %b) {
527; CHECK-LABEL: reverted:
528; CHECK:       @ %bb.0: @ %entry
529; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
530; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
531; CHECK-NEXT:    .pad #12
532; CHECK-NEXT:    sub sp, #12
533; CHECK-NEXT:    cmp r0, #0
534; CHECK-NEXT:    mov.w r1, #11
535; CHECK-NEXT:    cinc r1, r1, ne
536; CHECK-NEXT:    movs r0, #38
537; CHECK-NEXT:    mul r2, r1, r0
538; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
539; CHECK-NEXT:    movw r0, :lower16:arr_22
540; CHECK-NEXT:    vmov.i32 q0, #0x0
541; CHECK-NEXT:    movt r0, :upper16:arr_22
542; CHECK-NEXT:    add.w r1, r2, #15
543; CHECK-NEXT:    lsrs r3, r1, #4
544; CHECK-NEXT:    strd r3, r2, [sp] @ 8-byte Folded Spill
545; CHECK-NEXT:    wlstp.8 lr, r2, .LBB19_2
546; CHECK-NEXT:  .LBB19_1: @ =>This Inner Loop Header: Depth=1
547; CHECK-NEXT:    vstrb.8 q0, [r0], #16
548; CHECK-NEXT:    letp lr, .LBB19_1
549; CHECK-NEXT:  .LBB19_2: @ %entry
550; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
551; CHECK-NEXT:    movw r6, :lower16:arr_20
552; CHECK-NEXT:    movt r6, :upper16:arr_20
553; CHECK-NEXT:    add.w r3, r6, #80
554; CHECK-NEXT:    dls lr, r0
555; CHECK-NEXT:    movw r0, :lower16:arr_21
556; CHECK-NEXT:    movt r0, :upper16:arr_21
557; CHECK-NEXT:    add.w r5, r0, #36
558; CHECK-NEXT:    add.w r11, r6, #128
559; CHECK-NEXT:    add.w r7, r6, #112
560; CHECK-NEXT:    add.w r2, r6, #96
561; CHECK-NEXT:    add.w r4, r6, #64
562; CHECK-NEXT:    add.w r0, r6, #48
563; CHECK-NEXT:    add.w r1, r6, #32
564; CHECK-NEXT:    add.w r12, r6, #16
565; CHECK-NEXT:    adr r6, .LCPI19_0
566; CHECK-NEXT:    vldrw.u32 q0, [r6]
567; CHECK-NEXT:    movw r6, :lower16:arr_20
568; CHECK-NEXT:    mov.w r8, #327685
569; CHECK-NEXT:    mov.w r9, #5
570; CHECK-NEXT:    vmov.i16 q1, #0x5
571; CHECK-NEXT:    mov.w r10, #0
572; CHECK-NEXT:    movt r6, :upper16:arr_20
573; CHECK-NEXT:  .LBB19_3: @ %for.cond8.preheader
574; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
575; CHECK-NEXT:    str r8, [r5, #-4]
576; CHECK-NEXT:    vstrh.16 q1, [r5, #-36]
577; CHECK-NEXT:    strh.w r9, [r5]
578; CHECK-NEXT:    vstrh.16 q1, [r5, #-20]
579; CHECK-NEXT:    vstrw.32 q0, [r3]
580; CHECK-NEXT:    vstrh.16 q0, [r12], #152
581; CHECK-NEXT:    vstrh.16 q0, [r6], #152
582; CHECK-NEXT:    vstrh.16 q0, [r1], #152
583; CHECK-NEXT:    vstrh.16 q0, [r0], #152
584; CHECK-NEXT:    vstrh.16 q0, [r4], #152
585; CHECK-NEXT:    vstrh.16 q0, [r2], #152
586; CHECK-NEXT:    vstrh.16 q0, [r7], #152
587; CHECK-NEXT:    vstrh.16 q0, [r11], #152
588; CHECK-NEXT:    strd r9, r10, [r3, #64]
589; CHECK-NEXT:    adds r5, #38
590; CHECK-NEXT:    adds r3, #152
591; CHECK-NEXT:    le lr, .LBB19_3
592; CHECK-NEXT:  @ %bb.4: @ %for.cond.cleanup6
593; CHECK-NEXT:    movw r0, :lower16:arr_22
594; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
595; CHECK-NEXT:    movt r0, :upper16:arr_22
596; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
597; CHECK-NEXT:    add.w r0, r0, #1824
598; CHECK-NEXT:    vmov.i32 q1, #0x0
599; CHECK-NEXT:    wlstp.8 lr, r2, .LBB19_6
600; CHECK-NEXT:  .LBB19_5: @ =>This Inner Loop Header: Depth=1
601; CHECK-NEXT:    vstrb.8 q1, [r0], #16
602; CHECK-NEXT:    letp lr, .LBB19_5
603; CHECK-NEXT:  .LBB19_6: @ %for.cond.cleanup6
604; CHECK-NEXT:    movw r6, :lower16:arr_20
605; CHECK-NEXT:    movw r0, #7376
606; CHECK-NEXT:    movt r6, :upper16:arr_20
607; CHECK-NEXT:    adds r3, r6, r0
608; CHECK-NEXT:    movw r0, #7408
609; CHECK-NEXT:    add.w r12, r6, r0
610; CHECK-NEXT:    movw r0, #7344
611; CHECK-NEXT:    add.w r9, r6, r0
612; CHECK-NEXT:    movw r0, #7312
613; CHECK-NEXT:    adds r2, r6, r0
614; CHECK-NEXT:    movw r0, :lower16:arr_21
615; CHECK-NEXT:    add.w r1, r6, #7424
616; CHECK-NEXT:    add.w r7, r6, #7392
617; CHECK-NEXT:    add.w r4, r6, #7360
618; CHECK-NEXT:    add.w r5, r6, #7328
619; CHECK-NEXT:    add.w r8, r6, #7296
620; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
621; CHECK-NEXT:    movt r0, :upper16:arr_21
622; CHECK-NEXT:    addw r0, r0, #1860
623; CHECK-NEXT:    mov.w r10, #5
624; CHECK-NEXT:    dls lr, r6
625; CHECK-NEXT:    mov.w r6, #327685
626; CHECK-NEXT:    vmov.i16 q1, #0x5
627; CHECK-NEXT:    mov.w r11, #0
628; CHECK-NEXT:  .LBB19_7: @ %for.cond8.preheader.1
629; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
630; CHECK-NEXT:    str r6, [r0, #-4]
631; CHECK-NEXT:    vstrh.16 q1, [r0, #-36]
632; CHECK-NEXT:    strh.w r10, [r0]
633; CHECK-NEXT:    vstrh.16 q1, [r0, #-20]
634; CHECK-NEXT:    vstrw.32 q0, [r3]
635; CHECK-NEXT:    vstrh.16 q0, [r2], #152
636; CHECK-NEXT:    vstrh.16 q0, [r8], #152
637; CHECK-NEXT:    vstrh.16 q0, [r5], #152
638; CHECK-NEXT:    vstrh.16 q0, [r9], #152
639; CHECK-NEXT:    vstrh.16 q0, [r4], #152
640; CHECK-NEXT:    vstrh.16 q0, [r7], #152
641; CHECK-NEXT:    vstrh.16 q0, [r12], #152
642; CHECK-NEXT:    vstrh.16 q0, [r1], #152
643; CHECK-NEXT:    strd r10, r11, [r3, #64]
644; CHECK-NEXT:    adds r0, #38
645; CHECK-NEXT:    adds r3, #152
646; CHECK-NEXT:    le lr, .LBB19_7
647; CHECK-NEXT:  @ %bb.8: @ %for.cond.cleanup6.1
648; CHECK-NEXT:    movw r0, :lower16:arr_22
649; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
650; CHECK-NEXT:    movt r0, :upper16:arr_22
651; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
652; CHECK-NEXT:    add.w r0, r0, #3648
653; CHECK-NEXT:    vmov.i32 q1, #0x0
654; CHECK-NEXT:    wlstp.8 lr, r2, .LBB19_10
655; CHECK-NEXT:  .LBB19_9: @ =>This Inner Loop Header: Depth=1
656; CHECK-NEXT:    vstrb.8 q1, [r0], #16
657; CHECK-NEXT:    letp lr, .LBB19_9
658; CHECK-NEXT:  .LBB19_10: @ %for.cond.cleanup6.1
659; CHECK-NEXT:    movw r7, :lower16:arr_20
660; CHECK-NEXT:    movw r0, #14672
661; CHECK-NEXT:    movt r7, :upper16:arr_20
662; CHECK-NEXT:    adds r3, r7, r0
663; CHECK-NEXT:    movw r0, #14704
664; CHECK-NEXT:    add.w r12, r7, r0
665; CHECK-NEXT:    movw r0, #14688
666; CHECK-NEXT:    add.w r8, r7, r0
667; CHECK-NEXT:    movw r0, #14640
668; CHECK-NEXT:    add.w r9, r7, r0
669; CHECK-NEXT:    movw r0, #14624
670; CHECK-NEXT:    adds r2, r7, r0
671; CHECK-NEXT:    movw r0, #14608
672; CHECK-NEXT:    movw r1, :lower16:arr_21
673; CHECK-NEXT:    add r0, r7
674; CHECK-NEXT:    add.w r4, r7, #14720
675; CHECK-NEXT:    add.w r5, r7, #14656
676; CHECK-NEXT:    add.w r6, r7, #14592
677; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
678; CHECK-NEXT:    movt r1, :upper16:arr_21
679; CHECK-NEXT:    addw r1, r1, #3684
680; CHECK-NEXT:    mov.w r10, #5
681; CHECK-NEXT:    dls lr, r7
682; CHECK-NEXT:    mov.w r7, #327685
683; CHECK-NEXT:    vmov.i16 q1, #0x5
684; CHECK-NEXT:    mov.w r11, #0
685; CHECK-NEXT:  .LBB19_11: @ %for.cond8.preheader.2
686; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
687; CHECK-NEXT:    str r7, [r1, #-4]
688; CHECK-NEXT:    vstrh.16 q1, [r1, #-36]
689; CHECK-NEXT:    strh.w r10, [r1]
690; CHECK-NEXT:    vstrh.16 q1, [r1, #-20]
691; CHECK-NEXT:    vstrw.32 q0, [r3]
692; CHECK-NEXT:    vstrh.16 q0, [r0], #152
693; CHECK-NEXT:    vstrh.16 q0, [r6], #152
694; CHECK-NEXT:    vstrh.16 q0, [r2], #152
695; CHECK-NEXT:    vstrh.16 q0, [r9], #152
696; CHECK-NEXT:    vstrh.16 q0, [r5], #152
697; CHECK-NEXT:    vstrh.16 q0, [r8], #152
698; CHECK-NEXT:    vstrh.16 q0, [r12], #152
699; CHECK-NEXT:    vstrh.16 q0, [r4], #152
700; CHECK-NEXT:    strd r10, r11, [r3, #64]
701; CHECK-NEXT:    adds r1, #38
702; CHECK-NEXT:    adds r3, #152
703; CHECK-NEXT:    le lr, .LBB19_11
704; CHECK-NEXT:  @ %bb.12: @ %for.cond.cleanup6.2
705; CHECK-NEXT:    movw r0, :lower16:arr_22
706; CHECK-NEXT:    ldrd r2, r1, [sp] @ 8-byte Folded Reload
707; CHECK-NEXT:    movt r0, :upper16:arr_22
708; CHECK-NEXT:    vmov.i32 q1, #0x0
709; CHECK-NEXT:    add.w r0, r0, #5472
710; CHECK-NEXT:    wlstp.8 lr, r1, .LBB19_14
711; CHECK-NEXT:  .LBB19_13: @ =>This Inner Loop Header: Depth=1
712; CHECK-NEXT:    vstrb.8 q1, [r0], #16
713; CHECK-NEXT:    letp lr, .LBB19_13
714; CHECK-NEXT:  .LBB19_14: @ %for.cond.cleanup6.2
715; CHECK-NEXT:    movw r2, :lower16:arr_21
716; CHECK-NEXT:    movw r1, #5508
717; CHECK-NEXT:    movt r2, :upper16:arr_21
718; CHECK-NEXT:    movw r7, :lower16:arr_20
719; CHECK-NEXT:    add r2, r1
720; CHECK-NEXT:    movw r1, #22000
721; CHECK-NEXT:    movt r7, :upper16:arr_20
722; CHECK-NEXT:    add.w r12, r7, r1
723; CHECK-NEXT:    movw r1, #21984
724; CHECK-NEXT:    add.w r8, r7, r1
725; CHECK-NEXT:    movw r1, #21952
726; CHECK-NEXT:    add.w r9, r7, r1
727; CHECK-NEXT:    movw r1, #21936
728; CHECK-NEXT:    movw r0, #21968
729; CHECK-NEXT:    adds r5, r7, r1
730; CHECK-NEXT:    movw r1, #21920
731; CHECK-NEXT:    movw r3, #21904
732; CHECK-NEXT:    adds r4, r7, r3
733; CHECK-NEXT:    add r0, r7
734; CHECK-NEXT:    add r1, r7
735; CHECK-NEXT:    add.w r3, r7, #22016
736; CHECK-NEXT:    add.w r6, r7, #21888
737; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
738; CHECK-NEXT:    mov.w r10, #5
739; CHECK-NEXT:    vmov.i16 q1, #0x5
740; CHECK-NEXT:    mov.w r11, #0
741; CHECK-NEXT:    dls lr, r7
742; CHECK-NEXT:    mov.w r7, #327685
743; CHECK-NEXT:  .LBB19_15: @ %for.cond8.preheader.3
744; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
745; CHECK-NEXT:    str r7, [r2, #-4]
746; CHECK-NEXT:    vstrh.16 q1, [r2, #-36]
747; CHECK-NEXT:    strh.w r10, [r2]
748; CHECK-NEXT:    vstrh.16 q1, [r2, #-20]
749; CHECK-NEXT:    vstrw.32 q0, [r0]
750; CHECK-NEXT:    vstrh.16 q0, [r4], #152
751; CHECK-NEXT:    vstrh.16 q0, [r6], #152
752; CHECK-NEXT:    vstrh.16 q0, [r1], #152
753; CHECK-NEXT:    vstrh.16 q0, [r5], #152
754; CHECK-NEXT:    vstrh.16 q0, [r9], #152
755; CHECK-NEXT:    vstrh.16 q0, [r8], #152
756; CHECK-NEXT:    vstrh.16 q0, [r12], #152
757; CHECK-NEXT:    vstrh.16 q0, [r3], #152
758; CHECK-NEXT:    strd r10, r11, [r0, #64]
759; CHECK-NEXT:    adds r2, #38
760; CHECK-NEXT:    adds r0, #152
761; CHECK-NEXT:    le lr, .LBB19_15
762; CHECK-NEXT:  @ %bb.16: @ %for.cond.cleanup6.3
763; CHECK-NEXT:    add sp, #12
764; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
765; CHECK-NEXT:    .p2align 4
766; CHECK-NEXT:  @ %bb.17:
767; CHECK-NEXT:  .LCPI19_0:
768; CHECK-NEXT:    .long 5 @ 0x5
769; CHECK-NEXT:    .long 0 @ 0x0
770; CHECK-NEXT:    .long 5 @ 0x5
771; CHECK-NEXT:    .long 0 @ 0x0
772entry:
773  %add = select i1 %b, i32 12, i32 11
774  %0 = mul nuw nsw i32 %add, 38
775  call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) @arr_22, i8 0, i32 %0, i1 false)
776  br label %for.cond8.preheader
777
778for.cond8.preheader:                              ; preds = %entry, %for.cond8.preheader
779  %d.051 = phi i32 [ 0, %entry ], [ %inc, %for.cond8.preheader ]
780  %arrayidx16 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 0
781  %arrayidx21 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 0
782  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21, align 8
783  %arrayidx21.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 2
784  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2, align 8
785  %arrayidx21.4 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 4
786  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4, align 8
787  %arrayidx21.6 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 6
788  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16, align 2
789  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6, align 8
790  %arrayidx16.8 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 8
791  %arrayidx21.8 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 8
792  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8, align 8
793  %arrayidx21.10 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 10
794  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10, align 8
795  %arrayidx21.12 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 12
796  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12, align 8
797  %arrayidx21.14 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 14
798  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8, align 2
799  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14, align 8
800  %arrayidx16.16 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 16
801  store i16 5, ptr %arrayidx16.16, align 2
802  %arrayidx21.16 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 16
803  %arrayidx16.17 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 17
804  store i16 5, ptr %arrayidx16.17, align 2
805  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16, align 8
806  %arrayidx16.18 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 18
807  store i16 5, ptr %arrayidx16.18, align 2
808  %arrayidx21.18 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 18
809  store i64 5, ptr %arrayidx21.18, align 8
810  %inc = add nuw nsw i32 %d.051, 1
811  %exitcond.not = icmp eq i32 %inc, %add
812  br i1 %exitcond.not, label %for.cond.cleanup6, label %for.cond8.preheader
813
814for.cond.cleanup6:                                ; preds = %for.cond8.preheader
815  call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) getelementptr inbounds ([17 x [12 x [19 x i16]]], ptr @arr_22, i32 0, i32 4, i32 0, i32 0), i8 0, i32 %0, i1 false)
816  br label %for.cond8.preheader.1
817
818for.cond8.preheader.1:                            ; preds = %for.cond8.preheader.1, %for.cond.cleanup6
819  %d.051.1 = phi i32 [ 0, %for.cond.cleanup6 ], [ %inc.1, %for.cond8.preheader.1 ]
820  %arrayidx16.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 0
821  %arrayidx21.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 0
822  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.1, align 8
823  %arrayidx21.2.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 2
824  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2.1, align 8
825  %arrayidx21.4.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 4
826  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4.1, align 8
827  %arrayidx21.6.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 6
828  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.1, align 2
829  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6.1, align 8
830  %arrayidx16.8.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 8
831  %arrayidx21.8.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 8
832  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8.1, align 8
833  %arrayidx21.10.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 10
834  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10.1, align 8
835  %arrayidx21.12.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 12
836  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12.1, align 8
837  %arrayidx21.14.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 14
838  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8.1, align 2
839  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14.1, align 8
840  %arrayidx16.16.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 16
841  store i16 5, ptr %arrayidx16.16.1, align 2
842  %arrayidx21.16.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 16
843  %arrayidx16.17.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 17
844  store i16 5, ptr %arrayidx16.17.1, align 2
845  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16.1, align 8
846  %arrayidx16.18.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 18
847  store i16 5, ptr %arrayidx16.18.1, align 2
848  %arrayidx21.18.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 18
849  store i64 5, ptr %arrayidx21.18.1, align 8
850  %inc.1 = add nuw nsw i32 %d.051.1, 1
851  %exitcond.not.1 = icmp eq i32 %inc.1, %add
852  br i1 %exitcond.not.1, label %for.cond.cleanup6.1, label %for.cond8.preheader.1
853
854for.cond.cleanup6.1:                              ; preds = %for.cond8.preheader.1
855  call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) getelementptr inbounds ([17 x [12 x [19 x i16]]], ptr @arr_22, i32 0, i32 8, i32 0, i32 0), i8 0, i32 %0, i1 false)
856  br label %for.cond8.preheader.2
857
858for.cond8.preheader.2:                            ; preds = %for.cond8.preheader.2, %for.cond.cleanup6.1
859  %d.051.2 = phi i32 [ 0, %for.cond.cleanup6.1 ], [ %inc.2, %for.cond8.preheader.2 ]
860  %arrayidx16.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 0
861  %arrayidx21.254 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 0
862  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.254, align 8
863  %arrayidx21.2.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 2
864  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2.2, align 8
865  %arrayidx21.4.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 4
866  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4.2, align 8
867  %arrayidx21.6.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 6
868  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.2, align 2
869  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6.2, align 8
870  %arrayidx16.8.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 8
871  %arrayidx21.8.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 8
872  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8.2, align 8
873  %arrayidx21.10.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 10
874  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10.2, align 8
875  %arrayidx21.12.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 12
876  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12.2, align 8
877  %arrayidx21.14.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 14
878  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8.2, align 2
879  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14.2, align 8
880  %arrayidx16.16.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 16
881  store i16 5, ptr %arrayidx16.16.2, align 2
882  %arrayidx21.16.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 16
883  %arrayidx16.17.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 17
884  store i16 5, ptr %arrayidx16.17.2, align 2
885  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16.2, align 8
886  %arrayidx16.18.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 18
887  store i16 5, ptr %arrayidx16.18.2, align 2
888  %arrayidx21.18.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 18
889  store i64 5, ptr %arrayidx21.18.2, align 8
890  %inc.2 = add nuw nsw i32 %d.051.2, 1
891  %exitcond.not.2 = icmp eq i32 %inc.2, %add
892  br i1 %exitcond.not.2, label %for.cond.cleanup6.2, label %for.cond8.preheader.2
893
894for.cond.cleanup6.2:                              ; preds = %for.cond8.preheader.2
895  call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) getelementptr inbounds ([17 x [12 x [19 x i16]]], ptr @arr_22, i32 0, i32 12, i32 0, i32 0), i8 0, i32 %0, i1 false)
896  br label %for.cond8.preheader.3
897
898for.cond8.preheader.3:                            ; preds = %for.cond8.preheader.3, %for.cond.cleanup6.2
899  %d.051.3 = phi i32 [ 0, %for.cond.cleanup6.2 ], [ %inc.3, %for.cond8.preheader.3 ]
900  %arrayidx16.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 0
901  %arrayidx21.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 0
902  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.3, align 8
903  %arrayidx21.2.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 2
904  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2.3, align 8
905  %arrayidx21.4.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 4
906  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4.3, align 8
907  %arrayidx21.6.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 6
908  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.3, align 2
909  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6.3, align 8
910  %arrayidx16.8.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 8
911  %arrayidx21.8.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 8
912  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8.3, align 8
913  %arrayidx21.10.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 10
914  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10.3, align 8
915  %arrayidx21.12.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 12
916  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12.3, align 8
917  %arrayidx21.14.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 14
918  store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8.3, align 2
919  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14.3, align 8
920  %arrayidx16.16.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 16
921  store i16 5, ptr %arrayidx16.16.3, align 2
922  %arrayidx21.16.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 16
923  %arrayidx16.17.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 17
924  store i16 5, ptr %arrayidx16.17.3, align 2
925  store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16.3, align 8
926  %arrayidx16.18.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 18
927  store i16 5, ptr %arrayidx16.18.3, align 2
928  %arrayidx21.18.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 18
929  store i64 5, ptr %arrayidx21.18.3, align 8
930  %inc.3 = add nuw nsw i32 %d.051.3, 1
931  %exitcond.not.3 = icmp eq i32 %inc.3, %add
932  br i1 %exitcond.not.3, label %for.cond.cleanup6.3, label %for.cond8.preheader.3
933
934for.cond.cleanup6.3:                              ; preds = %for.cond8.preheader.3
935  ret i32 undef
936}
937
938attributes #0 = { noinline  optnone }
939attributes #1 = { optsize }
940