xref: /llvm-project/llvm/test/CodeGen/Hexagon/vect-regpairs.ll (revision ff9af4c43ad71eeba2cabe99609cfaa0fd54c1d0)
1;RUN: llc -mtriple=hexagon -mcpu=hexagonv66 -mhvx -filetype=obj < %s -o - | llvm-objdump --mcpu=hexagonv66 --mattr=+hvx -d - | FileCheck --check-prefix=CHECK-V66 %s
2;RUN: llc -mtriple=hexagon -mcpu=hexagonv67 -mhvx -filetype=obj < %s -o - | llvm-objdump --mcpu=hexagonv67 --mattr=+hvx -d - | FileCheck --check-prefix=CHECK-V67 %s
3
4; Should not attempt to use v<even>:<odd> 'reverse' vector regpairs
5; on old or new arches (should not crash).
6
7; CHECK-V66: vcombine
8; CHECK-V67: vcombine
9declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>)
10declare <16 x i32> @llvm.hexagon.V6.vd0()
11declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32)
12declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32)
13declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>)
14declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32 )
15declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>)
16declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32 )
17declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>)
18declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32)
19declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32)
20
21declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>)
22declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32)
23declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32)
24declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>)
25
26
27define void @Gaussian7x7u8PerRow(ptr %src, i32 %stride, i32 %width, ptr %dst) #0 {
28entry:
29  %mul = mul i32 %stride, 3
30  %idx.neg = sub i32 0, %mul
31  %add.ptr = getelementptr i8, ptr %src, i32 %idx.neg
32  bitcast ptr %add.ptr to ptr
33  %mul1 = shl i32 %stride, 1
34  %idx.neg2 = sub i32 0, %mul1
35  %add.ptr3 = getelementptr i8, ptr %src, i32 %idx.neg2
36  bitcast ptr %add.ptr3 to ptr
37  %idx.neg5 = sub i32 0, %stride
38  %add.ptr6 = getelementptr i8, ptr %src, i32 %idx.neg5
39  bitcast ptr %add.ptr6 to ptr
40  bitcast ptr %src to ptr
41  %add.ptr10 = getelementptr i8, ptr %src, i32 %stride
42  bitcast ptr %add.ptr10 to ptr
43  %add.ptr12 = getelementptr i8, ptr %src, i32 %mul1
44  bitcast ptr %add.ptr12 to ptr
45  %add.ptr14 = getelementptr i8, ptr %src, i32 %mul
46  bitcast ptr %add.ptr14 to ptr
47  bitcast ptr %dst to ptr
48  load <16 x i32>, ptr %0load <16 x i32>, ptr %1load <16 x i32>, ptr %2load <16 x i32>, ptr %3load <16 x i32>, ptr %4load <16 x i32>, ptr %5load <16 x i32>, ptr %6call <16 x i32> @llvm.hexagon.V6.vd0()
49  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %15, <16 x i32> %15)
50  call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %14, <16 x i32> %8)
51  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %13, <16 x i32> %9)
52  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %17, <32 x i32> %18, i32 101058054)
53  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %12, <16 x i32> %10)
54  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %19, <32 x i32> %20, i32 252645135)
55  call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %21, <16 x i32> %11, i32 336860180)
56  %cmp155 = icmp sgt i32 %width, 64
57  br i1 %cmp155, label %for.body.preheader, label %for.end
58for.body.preheader:                               %incdec.ptr20 = getelementptr i8, ptr %add.ptr14%23 = bitcast ptr %incdec.ptr20 to ptr
59  %incdec.ptr19 = getelementptr i8, ptr %add.ptr12%24 = bitcast ptr %incdec.ptr19 to ptr
60  %incdec.ptr18 = getelementptr i8, ptr %add.ptr10%25 = bitcast ptr %incdec.ptr18 to ptr
61  %incdec.ptr17 = getelementptr i8, ptr %src%26 = bitcast ptr %incdec.ptr17 to ptr
62  %incdec.ptr16 = getelementptr i8, ptr %add.ptr6%27 = bitcast ptr %incdec.ptr16 to ptr
63  %incdec.ptr15 = getelementptr i8, ptr %add.ptr3%28 = bitcast ptr %incdec.ptr15 to ptr
64  %incdec.ptr = getelementptr i8, ptr %add.ptr%29 = bitcast ptr %incdec.ptr to ptr
65  br label %for.body
66for.body:                                         %optr.0166 = phi ptr [ %incdec.ptr28, %for.body ], [ %7, %for.body.preheader ]
67  %iptr6.0165 = phi ptr [ %incdec.ptr27, %for.body ], [ %23, %for.body.preheader ]
68  %iptr5.0164 = phi ptr [ %incdec.ptr26, %for.body ], [ %24, %for.body.preheader ]
69  %iptr4.0163 = phi ptr [ %incdec.ptr25, %for.body ], [ %25, %for.body.preheader ]
70  %iptr3.0162 = phi ptr [ %incdec.ptr24, %for.body ], [ %26, %for.body.preheader ]
71  %iptr2.0161 = phi ptr [ %incdec.ptr23, %for.body ], [ %27, %for.body.preheader ]
72  %iptr1.0160 = phi ptr [ %incdec.ptr22, %for.body ], [ %28, %for.body.preheader ]
73  %iptr0.0159 = phi ptr [ %incdec.ptr21, %for.body ], [ %29, %for.body.preheader ]
74  %dXV1.0158 = phi <32 x i32> [ %49, %for.body ], [ %22, %for.body.preheader ]
75  %dXV0.0157 = phi <32 x i32> [ %dXV1.0158, %for.body ], [ %16, %for.body.preheader ]
76  %i.0156 = phi i32 [ %sub, %for.body ], [ %width, %for.body.preheader ]
77  %incdec.ptr21 = getelementptr <16 x i32>, ptr %iptr0.0159%30 = load <16 x i32>, ptr %iptr0.0159%incdec.ptr22 = getelementptr <16 x i32>, ptr %iptr1.0160%31 = load <16 x i32>, ptr %iptr1.0160%incdec.ptr23 = getelementptr <16 x i32>, ptr %iptr2.0161%32 = load <16 x i32>, ptr %iptr2.0161%incdec.ptr24 = getelementptr <16 x i32>, ptr %iptr3.0162%33 = load <16 x i32>, ptr %iptr3.0162%incdec.ptr25 = getelementptr <16 x i32>, ptr %iptr4.0163%34 = load <16 x i32>, ptr %iptr4.0163%incdec.ptr26 = getelementptr <16 x i32>, ptr %iptr5.0164%35 = load <16 x i32>, ptr %iptr5.0164%incdec.ptr27 = getelementptr <16 x i32>, ptr %iptr6.0165%36 = load <16 x i32>, ptr %iptr6.0165, !tbaa !8
78  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV1.0158)
79  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV0.0157)
80  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 2)
81  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV1.0158)
82  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV0.0157)
83  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %40, <16 x i32> %41, i32 2)
84  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 4)
85  call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %36, <16 x i32> %30)
86  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %35, <16 x i32> %31)
87  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %44, <32 x i32> %45, i32 101058054)
88  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %34, <16 x i32> %32)
89  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %46, <32 x i32> %47, i32 252645135)
90  call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %48, <16 x i32> %33, i32 336860180)
91  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %49)
92  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 2)
93  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %49)
94  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %52, <16 x i32> %37, i32 2)
95  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 4)
96  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %37, <16 x i32> %39)
97  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %40)
98  call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %56, i32 252972820)
99  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %40)
100  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %58, <16 x i32> %37)
101  call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %59, i32 252972820)
102  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %43)
103  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %42)
104  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %62)
105  call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %57, <32 x i32> %63, i32 17170694)
106  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %54, <16 x i32> %42)
107  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %39)
108  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %65, <16 x i32> %66)
109  call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %60, <32 x i32> %67, i32 17170694)
110  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %64)
111  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %64)
112  call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %69, <16 x i32> %70, i32 12)
113  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %68)
114  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %68)
115  call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %73, i32 12)
116  call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %74, <16 x i32> %71)
117  %incdec.ptr28 = getelementptr <16 x i32>, ptr %1
118  store <16 x i32> %75, ptr %optr.0166%sub = add i32 %i.0156, -64
119  %cmp = icmp sgt i32 %sub, 64
120  br i1 %cmp, label %for.body, label %for.end
121for.end:                                          ret void
122}
123declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32)
124declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32)
125declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32)
126declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>)
127
128attributes #0 = { "correctly-rounded-divide-sqrt-fp-math""target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls" "unsafe-fp-math"}
129!8 = !{!9, !9, i64 0}
130!9 = !{!"omnipotent char", !10}
131!10 = !{}
132!14 = !{}
133!19 = !{}
134!24 = !{}
135