1dc8a41deSAndrew Savonichev; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2dc8a41deSAndrew Savonichev; RUN: llc -o - < %s | FileCheck %s 3dc8a41deSAndrew Savonichev 4dc8a41deSAndrew Savonichevtarget datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 5dc8a41deSAndrew Savonichevtarget triple = "armv8-unknown-linux-gnueabihf" 6dc8a41deSAndrew Savonichev 7*bed1c7f0SNikita Popovdefine <4 x float> @test(ptr %A) { 8dc8a41deSAndrew Savonichev; CHECK-LABEL: test: 9dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 10dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0]! 11dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0]! 12dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 13dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 14dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 15dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 16*bed1c7f0SNikita Popov %X = load <4 x float>, ptr %A, align 4 17*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 4 18*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 19*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 8 20*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr %Z.ptr.elt, align 4 21dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 22dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 23dc8a41deSAndrew Savonichev ret <4 x float> %sum 24dc8a41deSAndrew Savonichev} 25dc8a41deSAndrew Savonichev 26*bed1c7f0SNikita Popovdefine <4 x float> @test_stride(ptr %A) { 27dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stride: 28dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 29dc8a41deSAndrew Savonichev; CHECK-NEXT: mov r1, #24 30dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0], r1 31dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0], r1 32dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 33dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 34dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 35dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 36*bed1c7f0SNikita Popov %X = load <4 x float>, ptr %A, align 4 37*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 6 38*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 39*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 12 40*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr %Z.ptr.elt, align 4 41dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 42dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 43dc8a41deSAndrew Savonichev ret <4 x float> %sum 44dc8a41deSAndrew Savonichev} 45dc8a41deSAndrew Savonichev 46*bed1c7f0SNikita Popovdefine <4 x float> @test_stride_mixed(ptr %A) { 47dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stride_mixed: 48dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 49dc8a41deSAndrew Savonichev; CHECK-NEXT: mov r1, #24 50dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0], r1 51dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0]! 52dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 53dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 54dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 55dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 56*bed1c7f0SNikita Popov %X = load <4 x float>, ptr %A, align 4 57*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 6 58*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 59*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 10 60*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr %Z.ptr.elt, align 4 61dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 62dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 63dc8a41deSAndrew Savonichev ret <4 x float> %sum 64dc8a41deSAndrew Savonichev} 65dc8a41deSAndrew Savonichev 66dc8a41deSAndrew Savonichev; Refrain from using multiple stride registers 67*bed1c7f0SNikita Popovdefine <4 x float> @test_stride_noop(ptr %A) { 68dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stride_noop: 69dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 70dc8a41deSAndrew Savonichev; CHECK-NEXT: mov r1, #24 71dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0], r1 72dc8a41deSAndrew Savonichev; CHECK-NEXT: mov r1, #32 73dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0], r1 74dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 75dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 76dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 77dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 78*bed1c7f0SNikita Popov %X = load <4 x float>, ptr %A, align 4 79*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 6 80*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 81*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 14 82*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr %Z.ptr.elt, align 4 83dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 84dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 85dc8a41deSAndrew Savonichev ret <4 x float> %sum 86dc8a41deSAndrew Savonichev} 87dc8a41deSAndrew Savonichev 88*bed1c7f0SNikita Popovdefine <4 x float> @test_positive_initial_offset(ptr %A) { 89dc8a41deSAndrew Savonichev; CHECK-LABEL: test_positive_initial_offset: 90dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 91dc8a41deSAndrew Savonichev; CHECK-NEXT: add r0, r0, #32 92dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0]! 93dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0]! 94dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 95dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 96dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 97dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 98*bed1c7f0SNikita Popov %X.ptr.elt = getelementptr inbounds float, ptr %A, i32 8 99*bed1c7f0SNikita Popov %X = load <4 x float>, ptr %X.ptr.elt, align 4 100*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 12 101*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 102*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 16 103*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr %Z.ptr.elt, align 4 104dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 105dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 106dc8a41deSAndrew Savonichev ret <4 x float> %sum 107dc8a41deSAndrew Savonichev} 108dc8a41deSAndrew Savonichev 109*bed1c7f0SNikita Popovdefine <4 x float> @test_negative_initial_offset(ptr %A) { 110dc8a41deSAndrew Savonichev; CHECK-LABEL: test_negative_initial_offset: 111dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 112dc8a41deSAndrew Savonichev; CHECK-NEXT: sub r0, r0, #64 113dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0]! 114dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0]! 115dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 116dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 117dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 118dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 119*bed1c7f0SNikita Popov %X.ptr.elt = getelementptr inbounds float, ptr %A, i32 -16 120*bed1c7f0SNikita Popov %X = load <4 x float>, ptr %X.ptr.elt, align 4 121*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 -12 122*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 123*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 -8 124*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr %Z.ptr.elt, align 4 125dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 126dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 127dc8a41deSAndrew Savonichev ret <4 x float> %sum 128dc8a41deSAndrew Savonichev} 129dc8a41deSAndrew Savonichev 130dc8a41deSAndrew Savonichev@global_float_array = external global [128 x float], align 4 131dc8a41deSAndrew Savonichevdefine <4 x float> @test_global() { 132dc8a41deSAndrew Savonichev; CHECK-LABEL: test_global: 133dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 134dc8a41deSAndrew Savonichev; CHECK-NEXT: movw r0, :lower16:global_float_array 135dc8a41deSAndrew Savonichev; CHECK-NEXT: movt r0, :upper16:global_float_array 136dc8a41deSAndrew Savonichev; CHECK-NEXT: add r0, r0, #32 137dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0]! 138dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0]! 139dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 140dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 141dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 142dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 143*bed1c7f0SNikita Popov %X = load <4 x float>, ptr getelementptr inbounds ([128 x float], ptr @global_float_array, i32 0, i32 8), align 4 144*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr getelementptr inbounds ([128 x float], ptr @global_float_array, i32 0, i32 12), align 4 145*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr getelementptr inbounds ([128 x float], ptr @global_float_array, i32 0, i32 16), align 4 146dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 147dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 148dc8a41deSAndrew Savonichev ret <4 x float> %sum 149dc8a41deSAndrew Savonichev} 150dc8a41deSAndrew Savonichev 151dc8a41deSAndrew Savonichevdefine <4 x float> @test_stack() { 152dc8a41deSAndrew Savonichev; Use huge alignment to test that ADD would not be converted to OR 153dc8a41deSAndrew Savonichev; CHECK-LABEL: test_stack: 154dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 155dc8a41deSAndrew Savonichev; CHECK-NEXT: .save {r4, r10, r11, lr} 156dc8a41deSAndrew Savonichev; CHECK-NEXT: push {r4, r10, r11, lr} 157dc8a41deSAndrew Savonichev; CHECK-NEXT: .setfp r11, sp, #8 158dc8a41deSAndrew Savonichev; CHECK-NEXT: add r11, sp, #8 159dc8a41deSAndrew Savonichev; CHECK-NEXT: .pad #240 160dc8a41deSAndrew Savonichev; CHECK-NEXT: sub sp, sp, #240 161dc8a41deSAndrew Savonichev; CHECK-NEXT: bfc sp, #0, #7 162dc8a41deSAndrew Savonichev; CHECK-NEXT: mov r4, sp 163dc8a41deSAndrew Savonichev; CHECK-NEXT: mov r0, r4 164dc8a41deSAndrew Savonichev; CHECK-NEXT: bl external_function 165dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r4:128]! 166dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r4:128]! 167dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 168dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.64 {d18, d19}, [r4:128] 169dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q0, q8, q9 170dc8a41deSAndrew Savonichev; CHECK-NEXT: sub sp, r11, #8 171dc8a41deSAndrew Savonichev; CHECK-NEXT: pop {r4, r10, r11, pc} 172dc8a41deSAndrew Savonichev %array = alloca [32 x float], align 128 173*bed1c7f0SNikita Popov call void @external_function(ptr %array) 174*bed1c7f0SNikita Popov %X = load <4 x float>, ptr %array, align 4 175*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds [32 x float], ptr %array, i32 0, i32 4 176*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 177*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds [32 x float], ptr %array, i32 0, i32 8 178*bed1c7f0SNikita Popov %Z = load <4 x float>, ptr %Z.ptr.elt, align 4 179dc8a41deSAndrew Savonichev %tmp.sum = fadd <4 x float> %X, %Y 180dc8a41deSAndrew Savonichev %sum = fadd <4 x float> %tmp.sum, %Z 181dc8a41deSAndrew Savonichev ret <4 x float> %sum 182dc8a41deSAndrew Savonichev} 183dc8a41deSAndrew Savonichev 184*bed1c7f0SNikita Popovdefine <2 x double> @test_double(ptr %A) { 185dc8a41deSAndrew Savonichev; CHECK-LABEL: test_double: 186dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 187dc8a41deSAndrew Savonichev; CHECK-NEXT: add r0, r0, #64 188dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.64 {d16, d17}, [r0]! 189dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.64 {d18, d19}, [r0]! 190dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f64 d20, d17, d19 191dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f64 d16, d16, d18 192dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.64 {d22, d23}, [r0] 193dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f64 d1, d20, d23 194dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f64 d0, d16, d22 195dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 196*bed1c7f0SNikita Popov %X.ptr.elt = getelementptr inbounds double, ptr %A, i32 8 197*bed1c7f0SNikita Popov %X = load <2 x double>, ptr %X.ptr.elt, align 8 198*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds double, ptr %A, i32 10 199*bed1c7f0SNikita Popov %Y = load <2 x double>, ptr %Y.ptr.elt, align 8 200*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds double, ptr %A, i32 12 201*bed1c7f0SNikita Popov %Z = load <2 x double>, ptr %Z.ptr.elt, align 8 202dc8a41deSAndrew Savonichev %tmp.sum = fadd <2 x double> %X, %Y 203dc8a41deSAndrew Savonichev %sum = fadd <2 x double> %tmp.sum, %Z 204dc8a41deSAndrew Savonichev ret <2 x double> %sum 205dc8a41deSAndrew Savonichev} 206dc8a41deSAndrew Savonichev 207*bed1c7f0SNikita Popovdefine void @test_various_instructions(ptr %A) { 208dc8a41deSAndrew Savonichev; CHECK-LABEL: test_various_instructions: 209dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: 210dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r0]! 211dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r0]! 212dc8a41deSAndrew Savonichev; CHECK-NEXT: vadd.f32 q8, q8, q9 213dc8a41deSAndrew Savonichev; CHECK-NEXT: vst1.32 {d16, d17}, [r0] 214dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 215*bed1c7f0SNikita Popov %X = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0(ptr %A, i32 1) 216*bed1c7f0SNikita Popov %Y.ptr.elt = getelementptr inbounds float, ptr %A, i32 4 217*bed1c7f0SNikita Popov %Y = load <4 x float>, ptr %Y.ptr.elt, align 4 218*bed1c7f0SNikita Popov %Z.ptr.elt = getelementptr inbounds float, ptr %A, i32 8 219dc8a41deSAndrew Savonichev %Z = fadd <4 x float> %X, %Y 220*bed1c7f0SNikita Popov tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %Z.ptr.elt, <4 x float> %Z, i32 4) 221dc8a41deSAndrew Savonichev ret void 222dc8a41deSAndrew Savonichev} 223dc8a41deSAndrew Savonichev 224*bed1c7f0SNikita Popovdefine void @test_lsr_geps(ptr %a, ptr %b, i32 %n) { 225dc8a41deSAndrew Savonichev; CHECK-LABEL: test_lsr_geps: 226dc8a41deSAndrew Savonichev; CHECK: @ %bb.0: @ %entry 227dc8a41deSAndrew Savonichev; CHECK-NEXT: cmp r2, #1 228dc8a41deSAndrew Savonichev; CHECK-NEXT: bxlt lr 229dc8a41deSAndrew Savonichev; CHECK-NEXT: .LBB10_1: @ %for.body.preheader 230dc8a41deSAndrew Savonichev; CHECK-NEXT: mov r12, #0 231dc8a41deSAndrew Savonichev; CHECK-NEXT: .LBB10_2: @ %for.body 232dc8a41deSAndrew Savonichev; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 233dc8a41deSAndrew Savonichev; CHECK-NEXT: add r3, r0, r12 234dc8a41deSAndrew Savonichev; CHECK-NEXT: subs r2, r2, #1 235dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d16, d17}, [r3]! 236dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d18, d19}, [r3]! 237dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d20, d21}, [r3]! 238dc8a41deSAndrew Savonichev; CHECK-NEXT: vld1.32 {d22, d23}, [r3] 239dc8a41deSAndrew Savonichev; CHECK-NEXT: add r3, r1, r12 240dc8a41deSAndrew Savonichev; CHECK-NEXT: add r12, r12, #64 241dc8a41deSAndrew Savonichev; CHECK-NEXT: vst1.32 {d16, d17}, [r3]! 242dc8a41deSAndrew Savonichev; CHECK-NEXT: vst1.32 {d18, d19}, [r3]! 243dc8a41deSAndrew Savonichev; CHECK-NEXT: vst1.32 {d20, d21}, [r3]! 244dc8a41deSAndrew Savonichev; CHECK-NEXT: vst1.32 {d22, d23}, [r3] 245dc8a41deSAndrew Savonichev; CHECK-NEXT: bne .LBB10_2 246dc8a41deSAndrew Savonichev; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 247dc8a41deSAndrew Savonichev; CHECK-NEXT: bx lr 248dc8a41deSAndrew Savonicheventry: 249dc8a41deSAndrew Savonichev %cmp61 = icmp sgt i32 %n, 0 250dc8a41deSAndrew Savonichev br i1 %cmp61, label %for.body.preheader, label %for.cond.cleanup 251dc8a41deSAndrew Savonichev 252dc8a41deSAndrew Savonichevfor.body.preheader: 253dc8a41deSAndrew Savonichev br label %for.body 254dc8a41deSAndrew Savonichev 255dc8a41deSAndrew Savonichevfor.cond.cleanup: 256dc8a41deSAndrew Savonichev ret void 257dc8a41deSAndrew Savonichev 258dc8a41deSAndrew Savonichevfor.body: 259dc8a41deSAndrew Savonichev %lsr.iv1 = phi i32 [ 0, %for.body.preheader ], [ %lsr.iv.next2, %for.body ] 260dc8a41deSAndrew Savonichev %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] 261*bed1c7f0SNikita Popov %uglygep19 = getelementptr i8, ptr %a, i32 %lsr.iv1 262*bed1c7f0SNikita Popov %0 = load <4 x float>, ptr %uglygep19, align 4 263*bed1c7f0SNikita Popov %uglygep16 = getelementptr i8, ptr %a, i32 %lsr.iv1 264*bed1c7f0SNikita Popov %scevgep18 = getelementptr <4 x float>, ptr %uglygep16, i32 1 265*bed1c7f0SNikita Popov %1 = load <4 x float>, ptr %scevgep18, align 4 266*bed1c7f0SNikita Popov %uglygep13 = getelementptr i8, ptr %a, i32 %lsr.iv1 267*bed1c7f0SNikita Popov %scevgep15 = getelementptr <4 x float>, ptr %uglygep13, i32 2 268*bed1c7f0SNikita Popov %2 = load <4 x float>, ptr %scevgep15, align 4 269*bed1c7f0SNikita Popov %uglygep10 = getelementptr i8, ptr %a, i32 %lsr.iv1 270*bed1c7f0SNikita Popov %scevgep12 = getelementptr <4 x float>, ptr %uglygep10, i32 3 271*bed1c7f0SNikita Popov %3 = load <4 x float>, ptr %scevgep12, align 4 272*bed1c7f0SNikita Popov %uglygep8 = getelementptr i8, ptr %b, i32 %lsr.iv1 273*bed1c7f0SNikita Popov tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr %uglygep8, <4 x float> %0, i32 4) 274*bed1c7f0SNikita Popov %uglygep6 = getelementptr i8, ptr %b, i32 %lsr.iv1 275*bed1c7f0SNikita Popov %scevgep7 = getelementptr i8, ptr %uglygep6, i32 16 276*bed1c7f0SNikita Popov tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %scevgep7, <4 x float> %1, i32 4) 277*bed1c7f0SNikita Popov %uglygep4 = getelementptr i8, ptr %b, i32 %lsr.iv1 278*bed1c7f0SNikita Popov %scevgep5 = getelementptr i8, ptr %uglygep4, i32 32 279*bed1c7f0SNikita Popov tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %scevgep5, <4 x float> %2, i32 4) 280*bed1c7f0SNikita Popov %uglygep = getelementptr i8, ptr %b, i32 %lsr.iv1 281*bed1c7f0SNikita Popov %scevgep = getelementptr i8, ptr %uglygep, i32 48 282*bed1c7f0SNikita Popov tail call void @llvm.arm.neon.vst1.p0.v4f32(ptr nonnull %scevgep, <4 x float> %3, i32 4) 283dc8a41deSAndrew Savonichev %lsr.iv.next = add i32 %lsr.iv, -1 284dc8a41deSAndrew Savonichev %lsr.iv.next2 = add nuw i32 %lsr.iv1, 64 285dc8a41deSAndrew Savonichev %exitcond.not = icmp eq i32 %lsr.iv.next, 0 286dc8a41deSAndrew Savonichev br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 287dc8a41deSAndrew Savonichev} 288dc8a41deSAndrew Savonichev 289*bed1c7f0SNikita Popovdeclare void @external_function(ptr) 290*bed1c7f0SNikita Popovdeclare <4 x float> @llvm.arm.neon.vld1.v4f32.p0(ptr, i32) nounwind readonly 291*bed1c7f0SNikita Popovdeclare void @llvm.arm.neon.vst1.p0.v4f32(ptr, <4 x float>, i32) nounwind argmemonly 292