1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-abi apcs-gnu -target-feature +neon \
3 // RUN: -emit-llvm -o - %s -disable-O0-optnone | opt -passes=mem2reg,dce -S \
4 // RUN: | FileCheck %s --check-prefix=CHECK-ARM
5
6 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
7 // RUN: -target-feature +v8.1a -emit-llvm -o - %s -disable-O0-optnone | opt -passes=mem2reg,dce -S \
8 // RUN: | FileCheck %s --check-prefix=CHECK-AARCH64
9
10 // REQUIRES: arm-registered-target,aarch64-registered-target
11
12 #include <arm_neon.h>
13
14 // CHECK-ARM-LABEL: @test_vqrdmlah_s16(
15 // CHECK-ARM-NEXT: entry:
16 // CHECK-ARM-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
17 // CHECK-ARM-NEXT: ret <4 x i16> [[VQRDMLAH_V3_I]]
18 //
19 // CHECK-AARCH64-LABEL: @test_vqrdmlah_s16(
20 // CHECK-AARCH64-NEXT: entry:
21 // CHECK-AARCH64-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
22 // CHECK-AARCH64-NEXT: ret <4 x i16> [[VQRDMLAH_V3_I]]
23 //
test_vqrdmlah_s16(int16x4_t a,int16x4_t b,int16x4_t c)24 int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
25
26 return vqrdmlah_s16(a, b, c);
27 }
28
29 // CHECK-ARM-LABEL: @test_vqrdmlah_s32(
30 // CHECK-ARM-NEXT: entry:
31 // CHECK-ARM-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
32 // CHECK-ARM-NEXT: ret <2 x i32> [[VQRDMLAH_V3_I]]
33 //
34 // CHECK-AARCH64-LABEL: @test_vqrdmlah_s32(
35 // CHECK-AARCH64-NEXT: entry:
36 // CHECK-AARCH64-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
37 // CHECK-AARCH64-NEXT: ret <2 x i32> [[VQRDMLAH_V3_I]]
38 //
test_vqrdmlah_s32(int32x2_t a,int32x2_t b,int32x2_t c)39 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
40
41 return vqrdmlah_s32(a, b, c);
42 }
43
44 // CHECK-ARM-LABEL: @test_vqrdmlahq_s16(
45 // CHECK-ARM-NEXT: entry:
46 // CHECK-ARM-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
47 // CHECK-ARM-NEXT: ret <8 x i16> [[VQRDMLAHQ_V3_I]]
48 //
49 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_s16(
50 // CHECK-AARCH64-NEXT: entry:
51 // CHECK-AARCH64-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
52 // CHECK-AARCH64-NEXT: ret <8 x i16> [[VQRDMLAHQ_V3_I]]
53 //
test_vqrdmlahq_s16(int16x8_t a,int16x8_t b,int16x8_t c)54 int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
55
56 return vqrdmlahq_s16(a, b, c);
57 }
58
59 // CHECK-ARM-LABEL: @test_vqrdmlahq_s32(
60 // CHECK-ARM-NEXT: entry:
61 // CHECK-ARM-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
62 // CHECK-ARM-NEXT: ret <4 x i32> [[VQRDMLAHQ_V3_I]]
63 //
64 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_s32(
65 // CHECK-AARCH64-NEXT: entry:
66 // CHECK-AARCH64-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
67 // CHECK-AARCH64-NEXT: ret <4 x i32> [[VQRDMLAHQ_V3_I]]
68 //
test_vqrdmlahq_s32(int32x4_t a,int32x4_t b,int32x4_t c)69 int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
70
71 return vqrdmlahq_s32(a, b, c);
72 }
73
74 // CHECK-ARM-LABEL: @test_vqrdmlah_lane_s16(
75 // CHECK-ARM-NEXT: entry:
76 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
77 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
78 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
79 // CHECK-ARM-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
80 // CHECK-ARM-NEXT: ret <4 x i16> [[VQRDMLAH_V3_I]]
81 //
82 // CHECK-AARCH64-LABEL: @test_vqrdmlah_lane_s16(
83 // CHECK-AARCH64-NEXT: entry:
84 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
85 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
86 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
87 // CHECK-AARCH64-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
88 // CHECK-AARCH64-NEXT: ret <4 x i16> [[VQRDMLAH_V3_I]]
89 //
test_vqrdmlah_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)90 int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
91
92 return vqrdmlah_lane_s16(a, b, c, 3);
93 }
94
95 // CHECK-ARM-LABEL: @test_vqrdmlah_lane_s32(
96 // CHECK-ARM-NEXT: entry:
97 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
98 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
99 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
100 // CHECK-ARM-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
101 // CHECK-ARM-NEXT: ret <2 x i32> [[VQRDMLAH_V3_I]]
102 //
103 // CHECK-AARCH64-LABEL: @test_vqrdmlah_lane_s32(
104 // CHECK-AARCH64-NEXT: entry:
105 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
106 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
107 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
108 // CHECK-AARCH64-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
109 // CHECK-AARCH64-NEXT: ret <2 x i32> [[VQRDMLAH_V3_I]]
110 //
test_vqrdmlah_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)111 int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
112
113 return vqrdmlah_lane_s32(a, b, c, 1);
114 }
115
116 // CHECK-ARM-LABEL: @test_vqrdmlahq_lane_s16(
117 // CHECK-ARM-NEXT: entry:
118 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
119 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
120 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
121 // CHECK-ARM-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
122 // CHECK-ARM-NEXT: ret <8 x i16> [[VQRDMLAHQ_V3_I]]
123 //
124 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_lane_s16(
125 // CHECK-AARCH64-NEXT: entry:
126 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
127 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
128 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
129 // CHECK-AARCH64-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
130 // CHECK-AARCH64-NEXT: ret <8 x i16> [[VQRDMLAHQ_V3_I]]
131 //
test_vqrdmlahq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)132 int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
133
134 return vqrdmlahq_lane_s16(a, b, c, 3);
135 }
136
137 // CHECK-ARM-LABEL: @test_vqrdmlahq_lane_s32(
138 // CHECK-ARM-NEXT: entry:
139 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
140 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
141 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
142 // CHECK-ARM-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
143 // CHECK-ARM-NEXT: ret <4 x i32> [[VQRDMLAHQ_V3_I]]
144 //
145 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_lane_s32(
146 // CHECK-AARCH64-NEXT: entry:
147 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
148 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
149 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
150 // CHECK-AARCH64-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
151 // CHECK-AARCH64-NEXT: ret <4 x i32> [[VQRDMLAHQ_V3_I]]
152 //
test_vqrdmlahq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)153 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
154
155 return vqrdmlahq_lane_s32(a, b, c, 1);
156 }
157
158 // CHECK-ARM-LABEL: @test_vqrdmlsh_s16(
159 // CHECK-ARM-NEXT: entry:
160 // CHECK-ARM-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
161 // CHECK-ARM-NEXT: ret <4 x i16> [[VQRDMLSH_V3_I]]
162 //
163 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_s16(
164 // CHECK-AARCH64-NEXT: entry:
165 // CHECK-AARCH64-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
166 // CHECK-AARCH64-NEXT: ret <4 x i16> [[VQRDMLSH_V3_I]]
167 //
test_vqrdmlsh_s16(int16x4_t a,int16x4_t b,int16x4_t c)168 int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
169
170 return vqrdmlsh_s16(a, b, c);
171 }
172
173 // CHECK-ARM-LABEL: @test_vqrdmlsh_s32(
174 // CHECK-ARM-NEXT: entry:
175 // CHECK-ARM-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
176 // CHECK-ARM-NEXT: ret <2 x i32> [[VQRDMLSH_V3_I]]
177 //
178 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_s32(
179 // CHECK-AARCH64-NEXT: entry:
180 // CHECK-AARCH64-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
181 // CHECK-AARCH64-NEXT: ret <2 x i32> [[VQRDMLSH_V3_I]]
182 //
test_vqrdmlsh_s32(int32x2_t a,int32x2_t b,int32x2_t c)183 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
184
185 return vqrdmlsh_s32(a, b, c);
186 }
187
188 // CHECK-ARM-LABEL: @test_vqrdmlshq_s16(
189 // CHECK-ARM-NEXT: entry:
190 // CHECK-ARM-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
191 // CHECK-ARM-NEXT: ret <8 x i16> [[VQRDMLSHQ_V3_I]]
192 //
193 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_s16(
194 // CHECK-AARCH64-NEXT: entry:
195 // CHECK-AARCH64-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
196 // CHECK-AARCH64-NEXT: ret <8 x i16> [[VQRDMLSHQ_V3_I]]
197 //
test_vqrdmlshq_s16(int16x8_t a,int16x8_t b,int16x8_t c)198 int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
199
200 return vqrdmlshq_s16(a, b, c);
201 }
202
203 // CHECK-ARM-LABEL: @test_vqrdmlshq_s32(
204 // CHECK-ARM-NEXT: entry:
205 // CHECK-ARM-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
206 // CHECK-ARM-NEXT: ret <4 x i32> [[VQRDMLSHQ_V3_I]]
207 //
208 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_s32(
209 // CHECK-AARCH64-NEXT: entry:
210 // CHECK-AARCH64-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
211 // CHECK-AARCH64-NEXT: ret <4 x i32> [[VQRDMLSHQ_V3_I]]
212 //
test_vqrdmlshq_s32(int32x4_t a,int32x4_t b,int32x4_t c)213 int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
214
215 return vqrdmlshq_s32(a, b, c);
216 }
217
218 // CHECK-ARM-LABEL: @test_vqrdmlsh_lane_s16(
219 // CHECK-ARM-NEXT: entry:
220 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
221 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
222 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
223 // CHECK-ARM-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
224 // CHECK-ARM-NEXT: ret <4 x i16> [[VQRDMLSH_V3_I]]
225 //
226 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_lane_s16(
227 // CHECK-AARCH64-NEXT: entry:
228 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
229 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
230 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
231 // CHECK-AARCH64-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
232 // CHECK-AARCH64-NEXT: ret <4 x i16> [[VQRDMLSH_V3_I]]
233 //
test_vqrdmlsh_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)234 int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
235
236 return vqrdmlsh_lane_s16(a, b, c, 3);
237 }
238
239 // CHECK-ARM-LABEL: @test_vqrdmlsh_lane_s32(
240 // CHECK-ARM-NEXT: entry:
241 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
242 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
243 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
244 // CHECK-ARM-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
245 // CHECK-ARM-NEXT: ret <2 x i32> [[VQRDMLSH_V3_I]]
246 //
247 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_lane_s32(
248 // CHECK-AARCH64-NEXT: entry:
249 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
250 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
251 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
252 // CHECK-AARCH64-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
253 // CHECK-AARCH64-NEXT: ret <2 x i32> [[VQRDMLSH_V3_I]]
254 //
test_vqrdmlsh_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)255 int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
256
257 return vqrdmlsh_lane_s32(a, b, c, 1);
258 }
259
260 // CHECK-ARM-LABEL: @test_vqrdmlshq_lane_s16(
261 // CHECK-ARM-NEXT: entry:
262 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
263 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
264 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
265 // CHECK-ARM-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
266 // CHECK-ARM-NEXT: ret <8 x i16> [[VQRDMLSHQ_V3_I]]
267 //
268 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_lane_s16(
269 // CHECK-AARCH64-NEXT: entry:
270 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
271 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
272 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
273 // CHECK-AARCH64-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
274 // CHECK-AARCH64-NEXT: ret <8 x i16> [[VQRDMLSHQ_V3_I]]
275 //
test_vqrdmlshq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)276 int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
277
278 return vqrdmlshq_lane_s16(a, b, c, 3);
279 }
280
281 // CHECK-ARM-LABEL: @test_vqrdmlshq_lane_s32(
282 // CHECK-ARM-NEXT: entry:
283 // CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
284 // CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
285 // CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
286 // CHECK-ARM-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
287 // CHECK-ARM-NEXT: ret <4 x i32> [[VQRDMLSHQ_V3_I]]
288 //
289 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_lane_s32(
290 // CHECK-AARCH64-NEXT: entry:
291 // CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
292 // CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
293 // CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
294 // CHECK-AARCH64-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
295 // CHECK-AARCH64-NEXT: ret <4 x i32> [[VQRDMLSHQ_V3_I]]
296 //
test_vqrdmlshq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)297 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
298
299 return vqrdmlshq_lane_s32(a, b, c, 1);
300 }
301