xref: /llvm-project/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll (revision 2208c97c1bec2512d4e47b6223db6d95a7037956)
1; REQUIRES: asserts
2; RUN: llc -mtriple=hexagon --stats -o - 2>&1 < %s | FileCheck %s
3
4; Check that the compilation succeeded and that some code was generated.
5; CHECK: vadd
6
7; Check that the loop is pipelined and that a valid node order is used.
8; CHECK-NOT: Number of node order issues found
9; CHECK: Number of loops software pipelined
10; CHECK-NOT: Number of node order issues found
11
12target triple = "hexagon"
13
14define void @fred(ptr noalias nocapture readonly %p0, i32 %p1, i32 %p2, ptr noalias nocapture %p3, i32 %p4) local_unnamed_addr #1 {
15entry:
16  %mul = mul i32 %p4, %p1
17  %add.ptr = getelementptr inbounds i16, ptr %p0, i32 %mul
18  %add = add nsw i32 %p4, 1
19  %rem = srem i32 %add, 5
20  %mul1 = mul i32 %rem, %p1
21  %add.ptr2 = getelementptr inbounds i16, ptr %p0, i32 %mul1
22  %add7 = add nsw i32 %p4, 3
23  %rem8 = srem i32 %add7, 5
24  %mul9 = mul i32 %rem8, %p1
25  %add.ptr10 = getelementptr inbounds i16, ptr %p0, i32 %mul9
26  %incdec.ptr18 = getelementptr inbounds i16, ptr %p0, i32 32
27  %incdec.ptr17 = getelementptr inbounds i16, ptr %add.ptr10, i32 32
28  %incdec.ptr16 = getelementptr inbounds i16, ptr %p0, i32 32
29  %incdec.ptr15 = getelementptr inbounds i16, ptr %add.ptr2, i32 32
30  %incdec.ptr = getelementptr inbounds i16, ptr %add.ptr, i32 32
31  br i1 undef, label %for.end.loopexit.unr-lcssa, label %for.body
32
33for.body:                                         ; preds = %for.body, %entry
34  %optr.0102 = phi ptr [ %incdec.ptr24.3, %for.body ], [ %p3, %entry ]
35  %iptr4.0101 = phi ptr [ %incdec.ptr23.3, %for.body ], [ %incdec.ptr18, %entry ]
36  %iptr3.0100 = phi ptr [ %incdec.ptr22.3, %for.body ], [ %incdec.ptr17, %entry ]
37  %iptr2.099 = phi ptr [ poison, %for.body ], [ %incdec.ptr16, %entry ]
38  %iptr1.098 = phi ptr [ %incdec.ptr20.3, %for.body ], [ %incdec.ptr15, %entry ]
39  %iptr0.097 = phi ptr [ %incdec.ptr19.3, %for.body ], [ %incdec.ptr, %entry ]
40  %dVsumv1.096 = phi <32 x i32> [ %60, %for.body ], [ undef, %entry ]
41  %niter = phi i32 [ %niter.nsub.3, %for.body ], [ undef, %entry ]
42  %0 = load <16 x i32>, ptr %iptr0.097, align 64, !tbaa !1
43  %1 = load <16 x i32>, ptr %iptr1.098, align 64, !tbaa !1
44  %2 = load <16 x i32>, ptr %iptr2.099, align 64, !tbaa !1
45  %3 = load <16 x i32>, ptr %iptr3.0100, align 64, !tbaa !1
46  %4 = load <16 x i32>, ptr %iptr4.0101, align 64, !tbaa !1
47  %5 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %0, <16 x i32> %4)
48  %6 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %5, <16 x i32> %2, i32 393222)
49  %7 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %3, <16 x i32> %1)
50  %8 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %6, <32 x i32> %7, i32 67372036)
51  %9 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dVsumv1.096)
52  %10 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %8)
53  %11 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %10, <16 x i32> %9, i32 4)
54  %12 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %8)
55  %13 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %10, <16 x i32> %9, i32 8)
56  %14 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %12, <16 x i32> undef, i32 8)
57  %15 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %11, <16 x i32> %13)
58  %16 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %9, <16 x i32> %13)
59  %17 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %16, <16 x i32> %11, i32 101058054)
60  %18 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %17, <16 x i32> zeroinitializer, i32 67372036)
61  %19 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %14)
62  %20 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %19, <16 x i32> undef, i32 101058054)
63  %21 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %20, <16 x i32> %15, i32 67372036)
64  %22 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %21, <16 x i32> %18, i32 8)
65  %incdec.ptr24 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 1
66  store <16 x i32> %22, ptr %optr.0102, align 64, !tbaa !1
67  %incdec.ptr19.1 = getelementptr inbounds <16 x i32>, ptr %iptr0.097, i32 2
68  %incdec.ptr23.1 = getelementptr inbounds <16 x i32>, ptr %iptr4.0101, i32 2
69  %23 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %8)
70  %24 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %8)
71  %25 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %23, i32 4)
72  %26 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %24, i32 4)
73  %27 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %23, i32 8)
74  %28 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %24, i32 8)
75  %29 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %25, <16 x i32> %27)
76  %30 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %23, <16 x i32> %27)
77  %31 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %30, <16 x i32> %25, i32 101058054)
78  %32 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %31, <16 x i32> undef, i32 67372036)
79  %33 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %24, <16 x i32> %28)
80  %34 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %33, <16 x i32> %26, i32 101058054)
81  %35 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %34, <16 x i32> %29, i32 67372036)
82  %36 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %35, <16 x i32> %32, i32 8)
83  %incdec.ptr24.1 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 2
84  store <16 x i32> %36, ptr %incdec.ptr24, align 64, !tbaa !1
85  %incdec.ptr19.2 = getelementptr inbounds <16 x i32>, ptr %iptr0.097, i32 3
86  %37 = load <16 x i32>, ptr %incdec.ptr19.1, align 64, !tbaa !1
87  %incdec.ptr20.2 = getelementptr inbounds <16 x i32>, ptr %iptr1.098, i32 3
88  %incdec.ptr21.2 = getelementptr inbounds <16 x i32>, ptr %iptr2.099, i32 3
89  %incdec.ptr22.2 = getelementptr inbounds <16 x i32>, ptr %iptr3.0100, i32 3
90  %incdec.ptr23.2 = getelementptr inbounds <16 x i32>, ptr %iptr4.0101, i32 3
91  %38 = load <16 x i32>, ptr %incdec.ptr23.1, align 64, !tbaa !1
92  %39 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %37, <16 x i32> %38)
93  %40 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %39, <16 x i32> undef, i32 393222)
94  %41 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %40, <32 x i32> undef, i32 67372036)
95  %42 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %41)
96  %43 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %42, <16 x i32> undef, i32 4)
97  %44 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %42, <16 x i32> undef, i32 8)
98  %45 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> zeroinitializer, <16 x i32> undef)
99  %46 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %43, <16 x i32> %44)
100  %47 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %44)
101  %48 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %47, <16 x i32> %43, i32 101058054)
102  %49 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %48, <16 x i32> %45, i32 67372036)
103  %50 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %46, i32 67372036)
104  %51 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %50, <16 x i32> %49, i32 8)
105  %incdec.ptr24.2 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 3
106  store <16 x i32> %51, ptr %incdec.ptr24.1, align 64, !tbaa !1
107  %incdec.ptr19.3 = getelementptr inbounds <16 x i32>, ptr %iptr0.097, i32 4
108  %52 = load <16 x i32>, ptr %incdec.ptr19.2, align 64, !tbaa !1
109  %incdec.ptr20.3 = getelementptr inbounds <16 x i32>, ptr %iptr1.098, i32 4
110  %53 = load <16 x i32>, ptr %incdec.ptr20.2, align 64, !tbaa !1
111  %54 = load <16 x i32>, ptr %incdec.ptr21.2, align 64, !tbaa !1
112  %incdec.ptr22.3 = getelementptr inbounds <16 x i32>, ptr %iptr3.0100, i32 4
113  %55 = load <16 x i32>, ptr %incdec.ptr22.2, align 64, !tbaa !1
114  %incdec.ptr23.3 = getelementptr inbounds <16 x i32>, ptr %iptr4.0101, i32 4
115  %56 = load <16 x i32>, ptr %incdec.ptr23.2, align 64, !tbaa !1
116  %57 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %52, <16 x i32> %56)
117  %58 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %57, <16 x i32> %54, i32 393222)
118  %59 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %53)
119  %60 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %58, <32 x i32> %59, i32 67372036)
120  %61 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %41)
121  %62 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %60)
122  %63 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %62, <16 x i32> undef, i32 4)
123  %64 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %60)
124  %65 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %64, <16 x i32> %61, i32 4)
125  %66 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %64, <16 x i32> %61, i32 8)
126  %67 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %61, <16 x i32> %65)
127  %68 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %63, i32 101058054)
128  %69 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %68, <16 x i32> %67, i32 67372036)
129  %70 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %61, <16 x i32> %66)
130  %71 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %70, <16 x i32> %65, i32 101058054)
131  %72 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %71, <16 x i32> undef, i32 67372036)
132  %73 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %69, i32 8)
133  %incdec.ptr24.3 = getelementptr inbounds <16 x i32>, ptr %optr.0102, i32 4
134  store <16 x i32> %73, ptr %incdec.ptr24.2, align 64, !tbaa !1
135  %niter.nsub.3 = add i32 %niter, -4
136  %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0
137  br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa, label %for.body
138
139for.end.loopexit.unr-lcssa:                       ; preds = %for.body, %entry
140  ret void
141}
142
143declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #0
144declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #0
145declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #0
146declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #0
147declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0
148declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) #0
149declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) #0
150declare <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32>, <16 x i32>) #0
151declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0
152declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) #0
153declare <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32>, <16 x i32>, i32) #0
154
155attributes #0 = { nounwind readnone }
156attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
157
158!1 = !{!2, !2, i64 0}
159!2 = !{!"omnipotent char", !3, i64 0}
160!3 = !{!"Simple C/C++ TBAA"}
161