xref: /llvm-project/llvm/test/CodeGen/ARM/neon-v8.1a.ll (revision 82973edfb72a95b442fa6d2bb404e15a4031855e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s
3
4;-----------------------------------------------------------------------------
5; RDMA Vector
6
7declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
8declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
9declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
10declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
11
12declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
13declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
14declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
15declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
16
17declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>)
18declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
19declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
20declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
21
22declare <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
23declare <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
24declare <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
25declare <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
26declare <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
27declare <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
28declare <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
29declare <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
30
31; The sadd intrinsics in this file previously transformed into sqrdmlah where they
32; shouldn't. They should produce vqrdmulh and vadd.
33
34define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
35; CHECK-LABEL: test_vqrdmulah_v4i16:
36; CHECK:       @ %bb.0:
37; CHECK-NEXT:    vqrdmulh.s16 d16, d1, d2
38; CHECK-NEXT:    vqadd.s16 d0, d0, d16
39; CHECK-NEXT:    bx lr
40   %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
41   %retval =  call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc,  <4 x i16> %prod)
42   ret <4 x i16> %retval
43}
44
45define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
46; CHECK-LABEL: test_vqrdmulah_v8i16:
47; CHECK:       @ %bb.0:
48; CHECK-NEXT:    vqrdmulh.s16 q8, q1, q2
49; CHECK-NEXT:    vqadd.s16 q0, q0, q8
50; CHECK-NEXT:    bx lr
51   %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
52   %retval =  call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod)
53   ret <8 x i16> %retval
54}
55
56define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
57; CHECK-LABEL: test_vqrdmulah_v2i32:
58; CHECK:       @ %bb.0:
59; CHECK-NEXT:    vqrdmulh.s32 d16, d1, d2
60; CHECK-NEXT:    vqadd.s32 d0, d0, d16
61; CHECK-NEXT:    bx lr
62   %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
63   %retval =  call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod)
64   ret <2 x i32> %retval
65}
66
67define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
68; CHECK-LABEL: test_vqrdmulah_v4i32:
69; CHECK:       @ %bb.0:
70; CHECK-NEXT:    vqrdmulh.s32 q8, q1, q2
71; CHECK-NEXT:    vqadd.s32 q0, q0, q8
72; CHECK-NEXT:    bx lr
73   %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
74   %retval =  call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod)
75   ret <4 x i32> %retval
76}
77
78define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
79; CHECK-LABEL: test_vqrdmulsh_v4i16:
80; CHECK:       @ %bb.0:
81; CHECK-NEXT:    vqrdmulh.s16 d16, d1, d2
82; CHECK-NEXT:    vqsub.s16 d0, d0, d16
83; CHECK-NEXT:    bx lr
84   %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
85   %retval =  call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod)
86   ret <4 x i16> %retval
87}
88
89define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
90; CHECK-LABEL: test_vqrdmulsh_v8i16:
91; CHECK:       @ %bb.0:
92; CHECK-NEXT:    vqrdmulh.s16 q8, q1, q2
93; CHECK-NEXT:    vqsub.s16 q0, q0, q8
94; CHECK-NEXT:    bx lr
95   %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
96   %retval =  call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod)
97   ret <8 x i16> %retval
98}
99
100define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
101; CHECK-LABEL: test_vqrdmulsh_v2i32:
102; CHECK:       @ %bb.0:
103; CHECK-NEXT:    vqrdmulh.s32 d16, d1, d2
104; CHECK-NEXT:    vqsub.s32 d0, d0, d16
105; CHECK-NEXT:    bx lr
106   %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
107   %retval =  call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod)
108   ret <2 x i32> %retval
109}
110
111define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
112; CHECK-LABEL: test_vqrdmulsh_v4i32:
113; CHECK:       @ %bb.0:
114; CHECK-NEXT:    vqrdmulh.s32 q8, q1, q2
115; CHECK-NEXT:    vqsub.s32 q0, q0, q8
116; CHECK-NEXT:    bx lr
117   %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
118   %retval =  call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod)
119   ret <4 x i32> %retval
120}
121
122;-----------------------------------------------------------------------------
123; RDMA Scalar
124
125define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
126; CHECK-LABEL: test_vqrdmulah_lane_s16:
127; CHECK:       @ %bb.0: @ %entry
128; CHECK-NEXT:    vqrdmulh.s16 d16, d1, d2[3]
129; CHECK-NEXT:    vqadd.s16 d0, d0, d16
130; CHECK-NEXT:    bx lr
131entry:
132  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
133  %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
134  %retval =  call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod)
135  ret <4 x i16> %retval
136}
137
138define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
139; CHECK-LABEL: test_vqrdmulahq_lane_s16:
140; CHECK:       @ %bb.0: @ %entry
141; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
142; CHECK-NEXT:    vqrdmulh.s16 q8, q1, d4[2]
143; CHECK-NEXT:    vqadd.s16 q0, q0, q8
144; CHECK-NEXT:    bx lr
145entry:
146  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
147  %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
148  %retval =  call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod)
149  ret <8 x i16> %retval
150}
151
152define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
153; CHECK-LABEL: test_vqrdmulah_lane_s32:
154; CHECK:       @ %bb.0: @ %entry
155; CHECK-NEXT:    vqrdmulh.s32 d16, d1, d2[1]
156; CHECK-NEXT:    vqadd.s32 d0, d0, d16
157; CHECK-NEXT:    bx lr
158entry:
159  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
160  %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
161  %retval =  call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod)
162  ret <2 x i32> %retval
163}
164
165define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
166; CHECK-LABEL: test_vqrdmulahq_lane_s32:
167; CHECK:       @ %bb.0: @ %entry
168; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
169; CHECK-NEXT:    vqrdmulh.s32 q8, q1, d4[0]
170; CHECK-NEXT:    vqadd.s32 q0, q0, q8
171; CHECK-NEXT:    bx lr
172entry:
173  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
174  %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
175  %retval =  call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod)
176  ret <4 x i32> %retval
177}
178
179define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
180; CHECK-LABEL: test_vqrdmulsh_lane_s16:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    vqrdmulh.s16 d16, d1, d2[3]
183; CHECK-NEXT:    vqsub.s16 d0, d0, d16
184; CHECK-NEXT:    bx lr
185entry:
186  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
187  %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
188  %retval =  call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod)
189  ret <4 x i16> %retval
190}
191
192define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
193; CHECK-LABEL: test_vqrdmulshq_lane_s16:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
196; CHECK-NEXT:    vqrdmulh.s16 q8, q1, d4[2]
197; CHECK-NEXT:    vqsub.s16 q0, q0, q8
198; CHECK-NEXT:    bx lr
199entry:
200  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
201  %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
202  %retval =  call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod)
203  ret <8 x i16> %retval
204}
205
206define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
207; CHECK-LABEL: test_vqrdmulsh_lane_s32:
208; CHECK:       @ %bb.0: @ %entry
209; CHECK-NEXT:    vqrdmulh.s32 d16, d1, d2[1]
210; CHECK-NEXT:    vqsub.s32 d0, d0, d16
211; CHECK-NEXT:    bx lr
212entry:
213  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
214  %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
215  %retval =  call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod)
216  ret <2 x i32> %retval
217}
218
219define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
220; CHECK-LABEL: test_vqrdmulshq_lane_s32:
221; CHECK:       @ %bb.0: @ %entry
222; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
223; CHECK-NEXT:    vqrdmulh.s32 q8, q1, d4[0]
224; CHECK-NEXT:    vqsub.s32 q0, q0, q8
225; CHECK-NEXT:    bx lr
226entry:
227  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
228  %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
229  %retval =  call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod)
230  ret <4 x i32> %retval
231}
232
233
234
235define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlah_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
236; CHECK-LABEL: test_vqrdmlah_s16:
237; CHECK:       @ %bb.0: @ %entry
238; CHECK-NEXT:    vqrdmlah.s16 d0, d1, d2
239; CHECK-NEXT:    bx lr
240entry:
241  %vqrdmlah_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #3
242  ret <4 x i16> %vqrdmlah_v3.i
243}
244
245define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlah_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
246; CHECK-LABEL: test_vqrdmlah_s32:
247; CHECK:       @ %bb.0: @ %entry
248; CHECK-NEXT:    vqrdmlah.s32 d0, d1, d2
249; CHECK-NEXT:    bx lr
250entry:
251  %vqrdmlah_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #3
252  ret <2 x i32> %vqrdmlah_v3.i
253}
254
255define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlahq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
256; CHECK-LABEL: test_vqrdmlahq_s16:
257; CHECK:       @ %bb.0: @ %entry
258; CHECK-NEXT:    vqrdmlah.s16 q0, q1, q2
259; CHECK-NEXT:    bx lr
260entry:
261  %vqrdmlahq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #3
262  ret <8 x i16> %vqrdmlahq_v3.i
263}
264
265define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlahq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
266; CHECK-LABEL: test_vqrdmlahq_s32:
267; CHECK:       @ %bb.0: @ %entry
268; CHECK-NEXT:    vqrdmlah.s32 q0, q1, q2
269; CHECK-NEXT:    bx lr
270entry:
271  %vqrdmlahq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #3
272  ret <4 x i32> %vqrdmlahq_v3.i
273}
274
275define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
276; CHECK-LABEL: test_vqrdmlah_lane_s16:
277; CHECK:       @ %bb.0: @ %entry
278; CHECK-NEXT:    vqrdmlah.s16 d0, d1, d2[3]
279; CHECK-NEXT:    bx lr
280entry:
281  %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
282  %vqrdmlah_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #3
283  ret <4 x i16> %vqrdmlah_v3.i
284}
285
286define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
287; CHECK-LABEL: test_vqrdmlah_lane_s32:
288; CHECK:       @ %bb.0: @ %entry
289; CHECK-NEXT:    vqrdmlah.s32 d0, d1, d2[1]
290; CHECK-NEXT:    bx lr
291entry:
292  %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
293  %vqrdmlah_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #3
294  ret <2 x i32> %vqrdmlah_v3.i
295}
296
297define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) {
298; CHECK-LABEL: test_vqrdmlahq_lane_s16:
299; CHECK:       @ %bb.0: @ %entry
300; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
301; CHECK-NEXT:    vqrdmlah.s16 q0, q1, d4[3]
302; CHECK-NEXT:    bx lr
303entry:
304  %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
305  %vqrdmlahq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %lane) #3
306  ret <8 x i16> %vqrdmlahq_v3.i
307}
308
309define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) {
310; CHECK-LABEL: test_vqrdmlahq_lane_s32:
311; CHECK:       @ %bb.0: @ %entry
312; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
313; CHECK-NEXT:    vqrdmlah.s32 q0, q1, d4[1]
314; CHECK-NEXT:    bx lr
315entry:
316  %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
317  %vqrdmlahq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %lane) #3
318  ret <4 x i32> %vqrdmlahq_v3.i
319}
320
321define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlsh_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
322; CHECK-LABEL: test_vqrdmlsh_s16:
323; CHECK:       @ %bb.0: @ %entry
324; CHECK-NEXT:    vqrdmlsh.s16 d0, d1, d2
325; CHECK-NEXT:    bx lr
326entry:
327  %vqrdmlsh_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #3
328  ret <4 x i16> %vqrdmlsh_v3.i
329}
330
331define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlsh_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
332; CHECK-LABEL: test_vqrdmlsh_s32:
333; CHECK:       @ %bb.0: @ %entry
334; CHECK-NEXT:    vqrdmlsh.s32 d0, d1, d2
335; CHECK-NEXT:    bx lr
336entry:
337  %vqrdmlsh_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #3
338  ret <2 x i32> %vqrdmlsh_v3.i
339}
340
341define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlshq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
342; CHECK-LABEL: test_vqrdmlshq_s16:
343; CHECK:       @ %bb.0: @ %entry
344; CHECK-NEXT:    vqrdmlsh.s16 q0, q1, q2
345; CHECK-NEXT:    bx lr
346entry:
347  %vqrdmlshq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #3
348  ret <8 x i16> %vqrdmlshq_v3.i
349}
350
351define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlshq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
352; CHECK-LABEL: test_vqrdmlshq_s32:
353; CHECK:       @ %bb.0: @ %entry
354; CHECK-NEXT:    vqrdmlsh.s32 q0, q1, q2
355; CHECK-NEXT:    bx lr
356entry:
357  %vqrdmlshq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #3
358  ret <4 x i32> %vqrdmlshq_v3.i
359}
360
361define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
362; CHECK-LABEL: test_vqrdmlsh_lane_s16:
363; CHECK:       @ %bb.0: @ %entry
364; CHECK-NEXT:    vqrdmlsh.s16 d0, d1, d2[3]
365; CHECK-NEXT:    bx lr
366entry:
367  %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
368  %vqrdmlsh_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #3
369  ret <4 x i16> %vqrdmlsh_v3.i
370}
371
372define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
373; CHECK-LABEL: test_vqrdmlsh_lane_s32:
374; CHECK:       @ %bb.0: @ %entry
375; CHECK-NEXT:    vqrdmlsh.s32 d0, d1, d2[1]
376; CHECK-NEXT:    bx lr
377entry:
378  %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
379  %vqrdmlsh_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #3
380  ret <2 x i32> %vqrdmlsh_v3.i
381}
382
383define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) {
384; CHECK-LABEL: test_vqrdmlshq_lane_s16:
385; CHECK:       @ %bb.0: @ %entry
386; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
387; CHECK-NEXT:    vqrdmlsh.s16 q0, q1, d4[3]
388; CHECK-NEXT:    bx lr
389entry:
390  %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
391  %vqrdmlshq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %lane) #3
392  ret <8 x i16> %vqrdmlshq_v3.i
393}
394
395define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) {
396; CHECK-LABEL: test_vqrdmlshq_lane_s32:
397; CHECK:       @ %bb.0: @ %entry
398; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
399; CHECK-NEXT:    vqrdmlsh.s32 q0, q1, d4[1]
400; CHECK-NEXT:    bx lr
401entry:
402  %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
403  %vqrdmlshq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %lane) #3
404  ret <4 x i32> %vqrdmlshq_v3.i
405}
406