1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define nonnull ptr @useafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) { 5; CHECK-LABEL: useafterloop: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: .save {r7, lr} 8; CHECK-NEXT: push {r7, lr} 9; CHECK-NEXT: mov.w lr, #64 10; CHECK-NEXT: mov r3, r2 11; CHECK-NEXT: .LBB0_1: @ %while.body 12; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 13; CHECK-NEXT: vldrw.u32 q0, [r0], #16 14; CHECK-NEXT: vldrw.u32 q1, [r1], #16 15; CHECK-NEXT: vadd.f32 q0, q1, q0 16; CHECK-NEXT: vstrb.8 q0, [r3], #16 17; CHECK-NEXT: le lr, .LBB0_1 18; CHECK-NEXT: @ %bb.2: @ %while.end 19; CHECK-NEXT: mov r0, r2 20; CHECK-NEXT: pop {r7, pc} 21entry: 22 br label %while.body 23 24while.body: 25 %pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ] 26 %pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ] 27 %pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ] 28 %blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ] 29 %0 = load <4 x float>, ptr %pSrcA.addr.012, align 4 30 %1 = load <4 x float>, ptr %pSrcB.addr.011, align 4 31 %2 = fadd fast <4 x float> %1, %0 32 store <4 x float> %2, ptr %pDst.addr.010, align 4 33 %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4 34 %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4 35 %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4 36 %dec = add nsw i32 %blkCnt.09, -1 37 %cmp.not = icmp eq i32 %dec, 0 38 br i1 %cmp.not, label %while.end, label %while.body 39 40while.end: 41 ret ptr %pDst 42} 43 44 45define nonnull ptr @nouse(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) { 46; CHECK-LABEL: nouse: 47; CHECK: @ %bb.0: @ %entry 48; CHECK-NEXT: .save {r7, lr} 49; CHECK-NEXT: push {r7, lr} 50; CHECK-NEXT: mov.w lr, #64 51; CHECK-NEXT: mov r3, r2 52; CHECK-NEXT: .LBB1_1: @ %while.body 53; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 54; CHECK-NEXT: vldrw.u32 q0, [r0], #16 55; CHECK-NEXT: vldrw.u32 q1, [r1], #16 56; CHECK-NEXT: vadd.f32 q0, q1, q0 57; CHECK-NEXT: vstrb.8 q0, [r3], #16 58; CHECK-NEXT: le lr, .LBB1_1 59; CHECK-NEXT: @ %bb.2: @ %while.end 60; CHECK-NEXT: adds r0, r2, #4 61; CHECK-NEXT: pop {r7, pc} 62entry: 63 br label %while.body 64 65while.body: 66 %pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ] 67 %pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ] 68 %pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ] 69 %blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ] 70 %0 = load <4 x float>, ptr %pSrcA.addr.012, align 4 71 %1 = load <4 x float>, ptr %pSrcB.addr.011, align 4 72 %2 = fadd fast <4 x float> %1, %0 73 store <4 x float> %2, ptr %pDst.addr.010, align 4 74 %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4 75 %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4 76 %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4 77 %dec = add nsw i32 %blkCnt.09, -1 78 %cmp.not = icmp eq i32 %dec, 0 79 br i1 %cmp.not, label %while.end, label %while.body 80 81while.end: 82 %add.ptr3 = getelementptr inbounds float, ptr %pDst, i32 1 83 ret ptr %add.ptr3 84} 85 86define nofpclass(nan inf) float @manyusesafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr nocapture noundef %pDst, i32 noundef %blockSize) { 87; CHECK-LABEL: manyusesafterloop: 88; CHECK: @ %bb.0: @ %entry 89; CHECK-NEXT: .save {r4, lr} 90; CHECK-NEXT: push {r4, lr} 91; CHECK-NEXT: mov.w lr, #64 92; CHECK-NEXT: mov r12, r0 93; CHECK-NEXT: mov r3, r1 94; CHECK-NEXT: mov r4, r2 95; CHECK-NEXT: .LBB2_1: @ %while.body 96; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 97; CHECK-NEXT: vldrw.u32 q0, [r12], #16 98; CHECK-NEXT: vldrw.u32 q1, [r3], #16 99; CHECK-NEXT: vadd.f32 q0, q1, q0 100; CHECK-NEXT: vstrb.8 q0, [r4], #16 101; CHECK-NEXT: le lr, .LBB2_1 102; CHECK-NEXT: @ %bb.2: @ %while.end 103; CHECK-NEXT: vldr s0, [r2] 104; CHECK-NEXT: vldr s2, [r0] 105; CHECK-NEXT: vadd.f32 s0, s2, s0 106; CHECK-NEXT: vldr s2, [r1] 107; CHECK-NEXT: vadd.f32 s0, s0, s2 108; CHECK-NEXT: vmov r0, s0 109; CHECK-NEXT: pop {r4, pc} 110entry: 111 br label %while.body 112 113while.body: 114 %pSrcA.addr.016 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ] 115 %pSrcB.addr.015 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ] 116 %pDst.addr.014 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ] 117 %blkCnt.013 = phi i32 [ 64, %entry ], [ %dec, %while.body ] 118 %0 = load <4 x float>, ptr %pSrcA.addr.016, align 4 119 %1 = load <4 x float>, ptr %pSrcB.addr.015, align 4 120 %2 = fadd fast <4 x float> %1, %0 121 store <4 x float> %2, ptr %pDst.addr.014, align 4 122 %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.016, i32 4 123 %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.015, i32 4 124 %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.014, i32 4 125 %dec = add nsw i32 %blkCnt.013, -1 126 %cmp.not = icmp eq i32 %dec, 0 127 br i1 %cmp.not, label %while.end, label %while.body 128 129while.end: 130 %3 = load float, ptr %pDst, align 4 131 %4 = load float, ptr %pSrcA, align 4 132 %add = fadd fast float %4, %3 133 %5 = load float, ptr %pSrcB, align 4 134 %add5 = fadd fast float %add, %5 135 ret float %add5 136} 137 138