xref: /llvm-project/llvm/test/CodeGen/ARM/arm-post-indexing-opt.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1dc8a41deSAndrew Savonichev; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2dc8a41deSAndrew Savonichev; RUN: llc -o - < %s | FileCheck %s
3dc8a41deSAndrew Savonichev
4dc8a41deSAndrew Savonichevtarget datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5dc8a41deSAndrew Savonichevtarget triple = "armv8-unknown-linux-gnueabihf"
6dc8a41deSAndrew Savonichev
7*bed1c7f0SNikita Popovdefine <4 x float> @test(ptr %A) {
8dc8a41deSAndrew Savonichev; CHECK-LABEL: test:
9dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
10dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0]!
11dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]!
12dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
13dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
14dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
15dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
16*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr %A, align 4
17*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 4
18*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
19*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 8
20*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr %Z.ptr.elt, align 4
21dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
22dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
23dc8a41deSAndrew Savonichev  ret <4 x float> %sum
24dc8a41deSAndrew Savonichev}
25dc8a41deSAndrew Savonichev
26*bed1c7f0SNikita Popovdefine <4 x float> @test_stride(ptr %A) {
27dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stride:
28dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
29dc8a41deSAndrew Savonichev; CHECK-NEXT:    mov r1, #24
30dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0], r1
31dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0], r1
32dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
33dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
34dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
35dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
36*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr %A, align 4
37*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 6
38*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
39*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 12
40*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr %Z.ptr.elt, align 4
41dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
42dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
43dc8a41deSAndrew Savonichev  ret <4 x float> %sum
44dc8a41deSAndrew Savonichev}
45dc8a41deSAndrew Savonichev
46*bed1c7f0SNikita Popovdefine <4 x float> @test_stride_mixed(ptr %A) {
47dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stride_mixed:
48dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
49dc8a41deSAndrew Savonichev; CHECK-NEXT:    mov r1, #24
50dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0], r1
51dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]!
52dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
53dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
54dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
55dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
56*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr %A, align 4
57*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 6
58*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
59*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 10
60*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr %Z.ptr.elt, align 4
61dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
62dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
63dc8a41deSAndrew Savonichev  ret <4 x float> %sum
64dc8a41deSAndrew Savonichev}
65dc8a41deSAndrew Savonichev
66dc8a41deSAndrew Savonichev; Refrain from using multiple stride registers
67*bed1c7f0SNikita Popovdefine <4 x float> @test_stride_noop(ptr %A) {
68dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stride_noop:
69dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
70dc8a41deSAndrew Savonichev; CHECK-NEXT:    mov r1, #24
71dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0], r1
72dc8a41deSAndrew Savonichev; CHECK-NEXT:    mov r1, #32
73dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0], r1
74dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
75dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
76dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
77dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
78*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr %A, align 4
79*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 6
80*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
81*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 14
82*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr %Z.ptr.elt, align 4
83dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
84dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
85dc8a41deSAndrew Savonichev  ret <4 x float> %sum
86dc8a41deSAndrew Savonichev}
87dc8a41deSAndrew Savonichev
88*bed1c7f0SNikita Popovdefine <4 x float> @test_positive_initial_offset(ptr %A) {
89dc8a41deSAndrew Savonichev; CHECK-LABEL: test_positive_initial_offset:
90dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
91dc8a41deSAndrew Savonichev; CHECK-NEXT:    add r0, r0, #32
92dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0]!
93dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]!
94dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
95dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
96dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
97dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
98*bed1c7f0SNikita Popov  %X.ptr.elt = getelementptr inbounds float, ptr %A, i32 8
99*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr %X.ptr.elt, align 4
100*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 12
101*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
102*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 16
103*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr %Z.ptr.elt, align 4
104dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
105dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
106dc8a41deSAndrew Savonichev  ret <4 x float> %sum
107dc8a41deSAndrew Savonichev}
108dc8a41deSAndrew Savonichev
109*bed1c7f0SNikita Popovdefine <4 x float> @test_negative_initial_offset(ptr %A) {
110dc8a41deSAndrew Savonichev; CHECK-LABEL: test_negative_initial_offset:
111dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
112dc8a41deSAndrew Savonichev; CHECK-NEXT:    sub r0, r0, #64
113dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0]!
114dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]!
115dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
116dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
117dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
118dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
119*bed1c7f0SNikita Popov  %X.ptr.elt = getelementptr inbounds float, ptr %A, i32 -16
120*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr %X.ptr.elt, align 4
121*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 -12
122*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
123*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 -8
124*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr %Z.ptr.elt, align 4
125dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
126dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
127dc8a41deSAndrew Savonichev  ret <4 x float> %sum
128dc8a41deSAndrew Savonichev}
129dc8a41deSAndrew Savonichev
130dc8a41deSAndrew Savonichev@global_float_array = external global [128 x float], align 4
131dc8a41deSAndrew Savonichevdefine <4 x float> @test_global() {
132dc8a41deSAndrew Savonichev; CHECK-LABEL: test_global:
133dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
134dc8a41deSAndrew Savonichev; CHECK-NEXT:    movw r0, :lower16:global_float_array
135dc8a41deSAndrew Savonichev; CHECK-NEXT:    movt r0, :upper16:global_float_array
136dc8a41deSAndrew Savonichev; CHECK-NEXT:    add r0, r0, #32
137dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0]!
138dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]!
139dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
140dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
141dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
142dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
143*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr getelementptr inbounds ([128 x float], ptr @global_float_array, i32 0, i32 8), align 4
144*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr getelementptr inbounds ([128 x float], ptr @global_float_array, i32 0, i32 12), align 4
145*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr getelementptr inbounds ([128 x float], ptr @global_float_array, i32 0, i32 16), align 4
146dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
147dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
148dc8a41deSAndrew Savonichev  ret <4 x float> %sum
149dc8a41deSAndrew Savonichev}
150dc8a41deSAndrew Savonichev
151dc8a41deSAndrew Savonichevdefine <4 x float> @test_stack() {
152dc8a41deSAndrew Savonichev; Use huge alignment to test that ADD would not be converted to OR
153dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stack:
154dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
155dc8a41deSAndrew Savonichev; CHECK-NEXT:    .save {r4, r10, r11, lr}
156dc8a41deSAndrew Savonichev; CHECK-NEXT:    push {r4, r10, r11, lr}
157dc8a41deSAndrew Savonichev; CHECK-NEXT:    .setfp r11, sp, #8
158dc8a41deSAndrew Savonichev; CHECK-NEXT:    add r11, sp, #8
159dc8a41deSAndrew Savonichev; CHECK-NEXT:    .pad #240
160dc8a41deSAndrew Savonichev; CHECK-NEXT:    sub sp, sp, #240
161dc8a41deSAndrew Savonichev; CHECK-NEXT:    bfc sp, #0, #7
162dc8a41deSAndrew Savonichev; CHECK-NEXT:    mov r4, sp
163dc8a41deSAndrew Savonichev; CHECK-NEXT:    mov r0, r4
164dc8a41deSAndrew Savonichev; CHECK-NEXT:    bl external_function
165dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r4:128]!
166dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r4:128]!
167dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
168dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.64 {d18, d19}, [r4:128]
169dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q0, q8, q9
170dc8a41deSAndrew Savonichev; CHECK-NEXT:    sub sp, r11, #8
171dc8a41deSAndrew Savonichev; CHECK-NEXT:    pop {r4, r10, r11, pc}
172dc8a41deSAndrew Savonichev  %array = alloca [32 x float], align 128
173*bed1c7f0SNikita Popov  call void @external_function(ptr %array)
174*bed1c7f0SNikita Popov  %X = load <4 x float>, ptr %array, align 4
175*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds [32 x float], ptr %array, i32 0, i32 4
176*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
177*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds [32 x float], ptr %array, i32 0, i32 8
178*bed1c7f0SNikita Popov  %Z = load <4 x float>, ptr %Z.ptr.elt, align 4
179dc8a41deSAndrew Savonichev  %tmp.sum = fadd <4 x float> %X, %Y
180dc8a41deSAndrew Savonichev  %sum = fadd <4 x float> %tmp.sum, %Z
181dc8a41deSAndrew Savonichev  ret <4 x float> %sum
182dc8a41deSAndrew Savonichev}
183dc8a41deSAndrew Savonichev
184*bed1c7f0SNikita Popovdefine <2 x double> @test_double(ptr %A) {
185dc8a41deSAndrew Savonichev; CHECK-LABEL: test_double:
186dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
187dc8a41deSAndrew Savonichev; CHECK-NEXT:    add r0, r0, #64
188dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]!
189dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]!
190dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f64 d20, d17, d19
191dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f64 d16, d16, d18
192dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.64 {d22, d23}, [r0]
193dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f64 d1, d20, d23
194dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f64 d0, d16, d22
195dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
196*bed1c7f0SNikita Popov  %X.ptr.elt = getelementptr inbounds double, ptr %A, i32 8
197*bed1c7f0SNikita Popov  %X = load <2 x double>, ptr %X.ptr.elt, align 8
198*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds double, ptr %A, i32 10
199*bed1c7f0SNikita Popov  %Y = load <2 x double>, ptr %Y.ptr.elt, align 8
200*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds double, ptr %A, i32 12
201*bed1c7f0SNikita Popov  %Z = load <2 x double>, ptr %Z.ptr.elt, align 8
202dc8a41deSAndrew Savonichev  %tmp.sum = fadd <2 x double> %X, %Y
203dc8a41deSAndrew Savonichev  %sum = fadd <2 x double> %tmp.sum, %Z
204dc8a41deSAndrew Savonichev  ret <2 x double> %sum
205dc8a41deSAndrew Savonichev}
206dc8a41deSAndrew Savonichev
207*bed1c7f0SNikita Popovdefine void @test_various_instructions(ptr %A) {
208dc8a41deSAndrew Savonichev; CHECK-LABEL: test_various_instructions:
209dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0:
210dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r0]!
211dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]!
212dc8a41deSAndrew Savonichev; CHECK-NEXT:    vadd.f32 q8, q8, q9
213dc8a41deSAndrew Savonichev; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
214dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
215*bed1c7f0SNikita Popov  %X = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0(ptr %A, i32 1)
216*bed1c7f0SNikita Popov  %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 4
217*bed1c7f0SNikita Popov  %Y = load <4 x float>, ptr %Y.ptr.elt, align 4
218*bed1c7f0SNikita Popov  %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 8
219dc8a41deSAndrew Savonichev  %Z = fadd <4 x float> %X, %Y
220*bed1c7f0SNikita Popov  tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %Z.ptr.elt, <4 x float> %Z, i32 4)
221dc8a41deSAndrew Savonichev  ret void
222dc8a41deSAndrew Savonichev}
223dc8a41deSAndrew Savonichev
224*bed1c7f0SNikita Popovdefine void @test_lsr_geps(ptr %a, ptr %b, i32 %n) {
225dc8a41deSAndrew Savonichev; CHECK-LABEL: test_lsr_geps:
226dc8a41deSAndrew Savonichev; CHECK:       @ %bb.0: @ %entry
227dc8a41deSAndrew Savonichev; CHECK-NEXT:    cmp r2, #1
228dc8a41deSAndrew Savonichev; CHECK-NEXT:    bxlt lr
229dc8a41deSAndrew Savonichev; CHECK-NEXT:  .LBB10_1: @ %for.body.preheader
230dc8a41deSAndrew Savonichev; CHECK-NEXT:    mov r12, #0
231dc8a41deSAndrew Savonichev; CHECK-NEXT:  .LBB10_2: @ %for.body
232dc8a41deSAndrew Savonichev; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
233dc8a41deSAndrew Savonichev; CHECK-NEXT:    add r3, r0, r12
234dc8a41deSAndrew Savonichev; CHECK-NEXT:    subs r2, r2, #1
235dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d16, d17}, [r3]!
236dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d18, d19}, [r3]!
237dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d20, d21}, [r3]!
238dc8a41deSAndrew Savonichev; CHECK-NEXT:    vld1.32 {d22, d23}, [r3]
239dc8a41deSAndrew Savonichev; CHECK-NEXT:    add r3, r1, r12
240dc8a41deSAndrew Savonichev; CHECK-NEXT:    add r12, r12, #64
241dc8a41deSAndrew Savonichev; CHECK-NEXT:    vst1.32 {d16, d17}, [r3]!
242dc8a41deSAndrew Savonichev; CHECK-NEXT:    vst1.32 {d18, d19}, [r3]!
243dc8a41deSAndrew Savonichev; CHECK-NEXT:    vst1.32 {d20, d21}, [r3]!
244dc8a41deSAndrew Savonichev; CHECK-NEXT:    vst1.32 {d22, d23}, [r3]
245dc8a41deSAndrew Savonichev; CHECK-NEXT:    bne .LBB10_2
246dc8a41deSAndrew Savonichev; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
247dc8a41deSAndrew Savonichev; CHECK-NEXT:    bx lr
248dc8a41deSAndrew Savonicheventry:
249dc8a41deSAndrew Savonichev  %cmp61 = icmp sgt i32 %n, 0
250dc8a41deSAndrew Savonichev  br i1 %cmp61, label %for.body.preheader, label %for.cond.cleanup
251dc8a41deSAndrew Savonichev
252dc8a41deSAndrew Savonichevfor.body.preheader:
253dc8a41deSAndrew Savonichev  br label %for.body
254dc8a41deSAndrew Savonichev
255dc8a41deSAndrew Savonichevfor.cond.cleanup:
256dc8a41deSAndrew Savonichev  ret void
257dc8a41deSAndrew Savonichev
258dc8a41deSAndrew Savonichevfor.body:
259dc8a41deSAndrew Savonichev  %lsr.iv1 = phi i32 [ 0, %for.body.preheader ], [ %lsr.iv.next2, %for.body ]
260dc8a41deSAndrew Savonichev  %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
261*bed1c7f0SNikita Popov  %uglygep19 = getelementptr i8, ptr %a, i32 %lsr.iv1
262*bed1c7f0SNikita Popov  %0 = load <4 x float>, ptr %uglygep19, align 4
263*bed1c7f0SNikita Popov  %uglygep16 = getelementptr i8, ptr %a, i32 %lsr.iv1
264*bed1c7f0SNikita Popov  %scevgep18 = getelementptr <4 x float>, ptr %uglygep16, i32 1
265*bed1c7f0SNikita Popov  %1 = load <4 x float>, ptr %scevgep18, align 4
266*bed1c7f0SNikita Popov  %uglygep13 = getelementptr i8, ptr %a, i32 %lsr.iv1
267*bed1c7f0SNikita Popov  %scevgep15 = getelementptr <4 x float>, ptr %uglygep13, i32 2
268*bed1c7f0SNikita Popov  %2 = load <4 x float>, ptr %scevgep15, align 4
269*bed1c7f0SNikita Popov  %uglygep10 = getelementptr i8, ptr %a, i32 %lsr.iv1
270*bed1c7f0SNikita Popov  %scevgep12 = getelementptr <4 x float>, ptr %uglygep10, i32 3
271*bed1c7f0SNikita Popov  %3 = load <4 x float>, ptr %scevgep12, align 4
272*bed1c7f0SNikita Popov  %uglygep8 = getelementptr i8, ptr %b, i32 %lsr.iv1
273*bed1c7f0SNikita Popov  tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr %uglygep8, <4 x float> %0, i32 4)
274*bed1c7f0SNikita Popov  %uglygep6 = getelementptr i8, ptr %b, i32 %lsr.iv1
275*bed1c7f0SNikita Popov  %scevgep7 = getelementptr i8, ptr %uglygep6, i32 16
276*bed1c7f0SNikita Popov  tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %scevgep7, <4 x float> %1, i32 4)
277*bed1c7f0SNikita Popov  %uglygep4 = getelementptr i8, ptr %b, i32 %lsr.iv1
278*bed1c7f0SNikita Popov  %scevgep5 = getelementptr i8, ptr %uglygep4, i32 32
279*bed1c7f0SNikita Popov  tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %scevgep5, <4 x float> %2, i32 4)
280*bed1c7f0SNikita Popov  %uglygep = getelementptr i8, ptr %b, i32 %lsr.iv1
281*bed1c7f0SNikita Popov  %scevgep = getelementptr i8, ptr %uglygep, i32 48
282*bed1c7f0SNikita Popov  tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %scevgep, <4 x float> %3, i32 4)
283dc8a41deSAndrew Savonichev  %lsr.iv.next = add i32 %lsr.iv, -1
284dc8a41deSAndrew Savonichev  %lsr.iv.next2 = add nuw i32 %lsr.iv1, 64
285dc8a41deSAndrew Savonichev  %exitcond.not = icmp eq i32 %lsr.iv.next, 0
286dc8a41deSAndrew Savonichev  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
287dc8a41deSAndrew Savonichev}
288dc8a41deSAndrew Savonichev
289*bed1c7f0SNikita Popovdeclare void @external_function(ptr)
290*bed1c7f0SNikita Popovdeclare <4 x float> @llvm.arm.neon.vld1.v4f32.p0(ptr, i32) nounwind readonly
291*bed1c7f0SNikita Popovdeclare void @llvm.arm.neon.vst1.p0.v4f32(ptr, <4 x float>, i32) nounwind argmemonly
292