1; REQUIRES: asserts 2; RUN: llc -mtriple=hexagon --stats -o - 2>&1 < %s | FileCheck %s 3 4; Check that the compilation succeeded and that some code was generated. 5; CHECK: vadd 6 7; Check that the loop is pipelined and that a valid node order is used. 8; CHECK-NOT: Number of node order issues found 9; CHECK: Number of loops software pipelined 10; CHECK-NOT: Number of node order issues found 11 12target triple = "hexagon" 13 14define void @fred(ptr noalias nocapture readonly %p0, i32 %p1, i32 %p2, ptr noalias nocapture %p3, i32 %p4) local_unnamed_addr #1 { 15entry: 16 %mul = mul i32 %p4, %p1 17 %add.ptr = getelementptr inbounds i16, ptr %p0, i32 %mul 18 %add = add nsw i32 %p4, 1 19 %rem = srem i32 %add, 5 20 %mul1 = mul i32 %rem, %p1 21 %add.ptr2 = getelementptr inbounds i16, ptr %p0, i32 %mul1 22 %add7 = add nsw i32 %p4, 3 23 %rem8 = srem i32 %add7, 5 24 %mul9 = mul i32 %rem8, %p1 25 %add.ptr10 = getelementptr inbounds i16, ptr %p0, i32 %mul9 26 %incdec.ptr18 = getelementptr inbounds i16, ptr %p0, i32 32 27 %incdec.ptr17 = getelementptr inbounds i16, ptr %add.ptr10, i32 32 28 %incdec.ptr16 = getelementptr inbounds i16, ptr %p0, i32 32 29 %incdec.ptr15 = getelementptr inbounds i16, ptr %add.ptr2, i32 32 30 %incdec.ptr = getelementptr inbounds i16, ptr %add.ptr, i32 32 31 br i1 undef, label %for.end.loopexit.unr-lcssa, label %for.body 32 33for.body: ; preds = %for.body, %entry 34 %optr.0102 = phi ptr [ %incdec.ptr24.3, %for.body ], [ %p3, %entry ] 35 %iptr4.0101 = phi ptr [ %incdec.ptr23.3, %for.body ], [ %incdec.ptr18, %entry ] 36 %iptr3.0100 = phi ptr [ %incdec.ptr22.3, %for.body ], [ %incdec.ptr17, %entry ] 37 %iptr2.099 = phi ptr [ poison, %for.body ], [ %incdec.ptr16, %entry ] 38 %iptr1.098 = phi ptr [ %incdec.ptr20.3, %for.body ], [ %incdec.ptr15, %entry ] 39 %iptr0.097 = phi ptr [ %incdec.ptr19.3, %for.body ], [ %incdec.ptr, %entry ] 40 %dVsumv1.096 = phi <32 x i32> [ %60, %for.body ], [ undef, %entry ] 41 %niter = phi i32 [ %niter.nsub.3, %for.body ], [ undef, %entry ] 42 %0 = load <16 x i32>, ptr %iptr0.097, align 64, !tbaa !1 43 %1 = load <16 x i32>, ptr %iptr1.098, align 64, !tbaa !1 44 %2 = load <16 x i32>, ptr %iptr2.099, align 64, !tbaa !1 45 %3 = load <16 x i32>, ptr %iptr3.0100, align 64, !tbaa !1 46 %4 = load <16 x i32>, ptr %iptr4.0101, align 64, !tbaa !1 47 %5 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %0, <16 x i32> %4) 48 %6 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %5, <16 x i32> %2, i32 393222) 49 %7 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %3, <16 x i32> %1) 50 %8 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %6, <32 x i32> %7, i32 67372036) 51 %9 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dVsumv1.096) 52 %10 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %8) 53 %11 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %10, <16 x i32> %9, i32 4) 54 %12 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %8) 55 %13 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %10, <16 x i32> %9, i32 8) 56 %14 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %12, <16 x i32> undef, i32 8) 57 %15 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %11, <16 x i32> %13) 58 %16 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %9, <16 x i32> %13) 59 %17 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %16, <16 x i32> %11, i32 101058054) 60 %18 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %17, <16 x i32> zeroinitializer, i32 67372036) 61 %19 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %14) 62 %20 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %19, <16 x i32> undef, i32 101058054) 63 %21 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %20, <16 x i32> %15, i32 67372036) 64 %22 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %21, <16 x i32> %18, i32 8) 65 %incdec.ptr24 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 1 66 store <16 x i32> %22, ptr %optr.0102, align 64, !tbaa !1 67 %incdec.ptr19.1 = getelementptr inbounds <16 x i32>, ptr %iptr0.097, i32 2 68 %incdec.ptr23.1 = getelementptr inbounds <16 x i32>, ptr %iptr4.0101, i32 2 69 %23 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %8) 70 %24 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %8) 71 %25 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %23, i32 4) 72 %26 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %24, i32 4) 73 %27 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %23, i32 8) 74 %28 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %24, i32 8) 75 %29 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %25, <16 x i32> %27) 76 %30 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %23, <16 x i32> %27) 77 %31 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %30, <16 x i32> %25, i32 101058054) 78 %32 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %31, <16 x i32> undef, i32 67372036) 79 %33 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %24, <16 x i32> %28) 80 %34 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %33, <16 x i32> %26, i32 101058054) 81 %35 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %34, <16 x i32> %29, i32 67372036) 82 %36 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %35, <16 x i32> %32, i32 8) 83 %incdec.ptr24.1 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 2 84 store <16 x i32> %36, ptr %incdec.ptr24, align 64, !tbaa !1 85 %incdec.ptr19.2 = getelementptr inbounds <16 x i32>, ptr %iptr0.097, i32 3 86 %37 = load <16 x i32>, ptr %incdec.ptr19.1, align 64, !tbaa !1 87 %incdec.ptr20.2 = getelementptr inbounds <16 x i32>, ptr %iptr1.098, i32 3 88 %incdec.ptr21.2 = getelementptr inbounds <16 x i32>, ptr %iptr2.099, i32 3 89 %incdec.ptr22.2 = getelementptr inbounds <16 x i32>, ptr %iptr3.0100, i32 3 90 %incdec.ptr23.2 = getelementptr inbounds <16 x i32>, ptr %iptr4.0101, i32 3 91 %38 = load <16 x i32>, ptr %incdec.ptr23.1, align 64, !tbaa !1 92 %39 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %37, <16 x i32> %38) 93 %40 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %39, <16 x i32> undef, i32 393222) 94 %41 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %40, <32 x i32> undef, i32 67372036) 95 %42 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %41) 96 %43 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %42, <16 x i32> undef, i32 4) 97 %44 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %42, <16 x i32> undef, i32 8) 98 %45 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> zeroinitializer, <16 x i32> undef) 99 %46 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %43, <16 x i32> %44) 100 %47 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %44) 101 %48 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %47, <16 x i32> %43, i32 101058054) 102 %49 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %48, <16 x i32> %45, i32 67372036) 103 %50 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %46, i32 67372036) 104 %51 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %50, <16 x i32> %49, i32 8) 105 %incdec.ptr24.2 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 3 106 store <16 x i32> %51, ptr %incdec.ptr24.1, align 64, !tbaa !1 107 %incdec.ptr19.3 = getelementptr inbounds <16 x i32>, ptr %iptr0.097, i32 4 108 %52 = load <16 x i32>, ptr %incdec.ptr19.2, align 64, !tbaa !1 109 %incdec.ptr20.3 = getelementptr inbounds <16 x i32>, ptr %iptr1.098, i32 4 110 %53 = load <16 x i32>, ptr %incdec.ptr20.2, align 64, !tbaa !1 111 %54 = load <16 x i32>, ptr %incdec.ptr21.2, align 64, !tbaa !1 112 %incdec.ptr22.3 = getelementptr inbounds <16 x i32>, ptr %iptr3.0100, i32 4 113 %55 = load <16 x i32>, ptr %incdec.ptr22.2, align 64, !tbaa !1 114 %incdec.ptr23.3 = getelementptr inbounds <16 x i32>, ptr %iptr4.0101, i32 4 115 %56 = load <16 x i32>, ptr %incdec.ptr23.2, align 64, !tbaa !1 116 %57 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %52, <16 x i32> %56) 117 %58 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %57, <16 x i32> %54, i32 393222) 118 %59 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %53) 119 %60 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %58, <32 x i32> %59, i32 67372036) 120 %61 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %41) 121 %62 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %60) 122 %63 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %62, <16 x i32> undef, i32 4) 123 %64 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %60) 124 %65 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %64, <16 x i32> %61, i32 4) 125 %66 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %64, <16 x i32> %61, i32 8) 126 %67 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %61, <16 x i32> %65) 127 %68 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %63, i32 101058054) 128 %69 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %68, <16 x i32> %67, i32 67372036) 129 %70 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %61, <16 x i32> %66) 130 %71 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %70, <16 x i32> %65, i32 101058054) 131 %72 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %71, <16 x i32> undef, i32 67372036) 132 %73 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %69, i32 8) 133 %incdec.ptr24.3 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 4 134 store <16 x i32> %73, ptr %incdec.ptr24.2, align 64, !tbaa !1 135 %niter.nsub.3 = add i32 %niter, -4 136 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 137 br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa, label %for.body 138 139for.end.loopexit.unr-lcssa: ; preds = %for.body, %entry 140 ret void 141} 142 143declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #0 144declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #0 145declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #0 146declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #0 147declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0 148declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) #0 149declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) #0 150declare <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32>, <16 x i32>) #0 151declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0 152declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) #0 153declare <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32>, <16 x i32>, i32) #0 154 155attributes #0 = { nounwind readnone } 156attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } 157 158!1 = !{!2, !2, i64 0} 159!2 = !{!"omnipotent char", !3, i64 0} 160!3 = !{!"Simple C/C++ TBAA"} 161