xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/vqdmull.ll (revision ab0c5cea0b1a9a1227fea840184dd7b5983c22a5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
3
4declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
5declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32)
6
7declare <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32)
8declare <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32>, <4 x i32>, i32)
9declare <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, <4 x i1>, <4 x i32>)
10declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32>, <4 x i32>, i32, <2 x i1>, <2 x i64>)
11
12define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_s16(<8 x i16> %a, <8 x i16> %b) {
13; CHECK-LABEL: test_vqdmullbq_s16:
14; CHECK:       @ %bb.0: @ %entry
15; CHECK-NEXT:    vqdmullb.s16 q0, q0, q1
16; CHECK-NEXT:    bx lr
17entry:
18  %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0)
19  ret <4 x i32> %0
20}
21
22define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_s32(<4 x i32> %a, <4 x i32> %b) {
23; CHECK-LABEL: test_vqdmullbq_s32:
24; CHECK:       @ %bb.0: @ %entry
25; CHECK-NEXT:    vqdmullb.s32 q2, q0, q1
26; CHECK-NEXT:    vmov q0, q2
27; CHECK-NEXT:    bx lr
28entry:
29  %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
30  ret <2 x i64> %0
31}
32
33define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
34; CHECK-LABEL: test_vqdmullbq_m_s16:
35; CHECK:       @ %bb.0: @ %entry
36; CHECK-NEXT:    vmsr p0, r0
37; CHECK-NEXT:    vpst
38; CHECK-NEXT:    vqdmullbt.s16 q0, q1, q2
39; CHECK-NEXT:    bx lr
40entry:
41  %0 = zext i16 %p to i32
42  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
43  %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 0, <4 x i1> %1, <4 x i32> %inactive)
44  ret <4 x i32> %2
45}
46
47define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
48; CHECK-LABEL: test_vqdmullbq_m_s32:
49; CHECK:       @ %bb.0: @ %entry
50; CHECK-NEXT:    vmsr p0, r0
51; CHECK-NEXT:    vpst
52; CHECK-NEXT:    vqdmullbt.s32 q0, q1, q2
53; CHECK-NEXT:    bx lr
54entry:
55  %0 = zext i16 %p to i32
56  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
57  %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %b, i32 0, <2 x i1> %1, <2 x i64> %inactive)
58  ret <2 x i64> %2
59}
60
61define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_n_s16(<8 x i16> %a, i16 signext %b) {
62; CHECK-LABEL: test_vqdmullbq_n_s16:
63; CHECK:       @ %bb.0: @ %entry
64; CHECK-NEXT:    vqdmullb.s16 q0, q0, r0
65; CHECK-NEXT:    bx lr
66entry:
67  %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0
68  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
69  %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %.splat, i32 0)
70  ret <4 x i32> %0
71}
72
73define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_n_s32(<4 x i32> %a, i32 %b) #0 {
74; CHECK-LABEL: test_vqdmullbq_n_s32:
75; CHECK:       @ %bb.0: @ %entry
76; CHECK-NEXT:    vqdmullb.s32 q1, q0, r0
77; CHECK-NEXT:    vmov q0, q1
78; CHECK-NEXT:    bx lr
79entry:
80  %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0
81  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
82  %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %.splat, i32 0)
83  ret <2 x i64> %0
84}
85
86define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 signext %b, i16 zeroext %p) {
87; CHECK-LABEL: test_vqdmullbq_m_n_s16:
88; CHECK:       @ %bb.0: @ %entry
89; CHECK-NEXT:    vmsr p0, r1
90; CHECK-NEXT:    vpst
91; CHECK-NEXT:    vqdmullbt.s16 q0, q1, r0
92; CHECK-NEXT:    bx lr
93entry:
94  %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0
95  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
96  %0 = zext i16 %p to i32
97  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
98  %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %.splat, i32 0, <4 x i1> %1, <4 x i32> %inactive)
99  ret <4 x i32> %2
100}
101
102define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_n_s32(<2 x i64> %inactive, <4 x i32> %a, i32 %b, i16 zeroext %p) {
103; CHECK-LABEL: test_vqdmullbq_m_n_s32:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vmsr p0, r1
106; CHECK-NEXT:    vpst
107; CHECK-NEXT:    vqdmullbt.s32 q0, q1, r0
108; CHECK-NEXT:    bx lr
109entry:
110  %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0
111  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
112  %0 = zext i16 %p to i32
113  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
114  %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %.splat, i32 0, <2 x i1> %1, <2 x i64> %inactive)
115  ret <2 x i64> %2
116}
117
118define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_s16(<8 x i16> %a, <8 x i16> %b) {
119; CHECK-LABEL: test_vqdmulltq_s16:
120; CHECK:       @ %bb.0: @ %entry
121; CHECK-NEXT:    vqdmullt.s16 q0, q0, q1
122; CHECK-NEXT:    bx lr
123entry:
124  %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1)
125  ret <4 x i32> %0
126}
127
128define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_s32(<4 x i32> %a, <4 x i32> %b) {
129; CHECK-LABEL: test_vqdmulltq_s32:
130; CHECK:       @ %bb.0: @ %entry
131; CHECK-NEXT:    vqdmullt.s32 q2, q0, q1
132; CHECK-NEXT:    vmov q0, q2
133; CHECK-NEXT:    bx lr
134entry:
135  %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1)
136  ret <2 x i64> %0
137}
138
139define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
140; CHECK-LABEL: test_vqdmulltq_m_s16:
141; CHECK:       @ %bb.0: @ %entry
142; CHECK-NEXT:    vmsr p0, r0
143; CHECK-NEXT:    vpst
144; CHECK-NEXT:    vqdmulltt.s16 q0, q1, q2
145; CHECK-NEXT:    bx lr
146entry:
147  %0 = zext i16 %p to i32
148  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
149  %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 1, <4 x i1> %1, <4 x i32> %inactive)
150  ret <4 x i32> %2
151}
152
153define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
154; CHECK-LABEL: test_vqdmulltq_m_s32:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    vmsr p0, r0
157; CHECK-NEXT:    vpst
158; CHECK-NEXT:    vqdmulltt.s32 q0, q1, q2
159; CHECK-NEXT:    bx lr
160entry:
161  %0 = zext i16 %p to i32
162  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
163  %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %b, i32 1, <2 x i1> %1, <2 x i64> %inactive)
164  ret <2 x i64> %2
165}
166
167define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_n_s16(<8 x i16> %a, i16 signext %b) {
168; CHECK-LABEL: test_vqdmulltq_n_s16:
169; CHECK:       @ %bb.0: @ %entry
170; CHECK-NEXT:    vqdmullt.s16 q0, q0, r0
171; CHECK-NEXT:    bx lr
172entry:
173  %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0
174  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
175  %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %.splat, i32 1)
176  ret <4 x i32> %0
177}
178
179define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_n_s32(<4 x i32> %a, i32 %b) {
180; CHECK-LABEL: test_vqdmulltq_n_s32:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    vqdmullt.s32 q1, q0, r0
183; CHECK-NEXT:    vmov q0, q1
184; CHECK-NEXT:    bx lr
185entry:
186  %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0
187  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
188  %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %.splat, i32 1)
189  ret <2 x i64> %0
190}
191
192define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 signext %b, i16 zeroext %p) {
193; CHECK-LABEL: test_vqdmulltq_m_n_s16:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    vmsr p0, r1
196; CHECK-NEXT:    vpst
197; CHECK-NEXT:    vqdmulltt.s16 q0, q1, r0
198; CHECK-NEXT:    bx lr
199entry:
200  %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0
201  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
202  %0 = zext i16 %p to i32
203  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
204  %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %.splat, i32 1, <4 x i1> %1, <4 x i32> %inactive)
205  ret <4 x i32> %2
206}
207
208define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_m_n_s32(<2 x i64> %inactive, <4 x i32> %a, i32 %b, i16 zeroext %p) {
209; CHECK-LABEL: test_vqdmulltq_m_n_s32:
210; CHECK:       @ %bb.0: @ %entry
211; CHECK-NEXT:    vmsr p0, r1
212; CHECK-NEXT:    vpst
213; CHECK-NEXT:    vqdmulltt.s32 q0, q1, r0
214; CHECK-NEXT:    bx lr
215entry:
216  %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0
217  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
218  %0 = zext i16 %p to i32
219  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
220  %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %.splat, i32 1, <2 x i1> %1, <2 x i64> %inactive)
221  ret <2 x i64> %2
222}
223