xref: /llvm-project/clang/test/CodeGen/AArch64/v8.1a-neon-intrinsics.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
3*207e5cccSFangrui Song // RUN:  -target-feature +v8.1a -emit-llvm -disable-O0-optnone -o - %s | opt -passes=mem2reg,dce -S | FileCheck %s
4*207e5cccSFangrui Song 
5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target
6*207e5cccSFangrui Song 
7*207e5cccSFangrui Song  #include <arm_neon.h>
8*207e5cccSFangrui Song 
9*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlah_laneq_s16(
10*207e5cccSFangrui Song // CHECK-NEXT:  entry:
11*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
12*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
14*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
15*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x i16> [[VQRDMLAH_V3_I]]
16*207e5cccSFangrui Song //
17*207e5cccSFangrui Song int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
18*207e5cccSFangrui Song   return vqrdmlah_laneq_s16(a, b, v, 7);
19*207e5cccSFangrui Song }
20*207e5cccSFangrui Song 
21*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlah_laneq_s32(
22*207e5cccSFangrui Song // CHECK-NEXT:  entry:
23*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
24*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
25*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
26*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
27*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x i32> [[VQRDMLAH_V3_I]]
28*207e5cccSFangrui Song //
29*207e5cccSFangrui Song int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
30*207e5cccSFangrui Song   return vqrdmlah_laneq_s32(a, b, v, 3);
31*207e5cccSFangrui Song }
32*207e5cccSFangrui Song 
33*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahq_laneq_s16(
34*207e5cccSFangrui Song // CHECK-NEXT:  entry:
35*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
36*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
37*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
38*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
39*207e5cccSFangrui Song // CHECK-NEXT:    ret <8 x i16> [[VQRDMLAHQ_V3_I]]
40*207e5cccSFangrui Song //
41*207e5cccSFangrui Song int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
42*207e5cccSFangrui Song   return vqrdmlahq_laneq_s16(a, b, v, 7);
43*207e5cccSFangrui Song }
44*207e5cccSFangrui Song 
45*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahq_laneq_s32(
46*207e5cccSFangrui Song // CHECK-NEXT:  entry:
47*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
48*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
49*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
50*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
51*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x i32> [[VQRDMLAHQ_V3_I]]
52*207e5cccSFangrui Song //
53*207e5cccSFangrui Song int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
54*207e5cccSFangrui Song   return vqrdmlahq_laneq_s32(a, b, v, 3);
55*207e5cccSFangrui Song }
56*207e5cccSFangrui Song 
57*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahh_s16(
58*207e5cccSFangrui Song // CHECK-NEXT:  entry:
59*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
60*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
61*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0
62*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
63*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0
64*207e5cccSFangrui Song // CHECK-NEXT:    ret i16 [[TMP3]]
65*207e5cccSFangrui Song //
66*207e5cccSFangrui Song int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
67*207e5cccSFangrui Song   return vqrdmlahh_s16(a, b, c);
68*207e5cccSFangrui Song }
69*207e5cccSFangrui Song 
70*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahs_s32(
71*207e5cccSFangrui Song // CHECK-NEXT:  entry:
72*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
73*207e5cccSFangrui Song // CHECK-NEXT:    ret i32 [[VQRDMLAHS_S32_I]]
74*207e5cccSFangrui Song //
75*207e5cccSFangrui Song int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
76*207e5cccSFangrui Song   return vqrdmlahs_s32(a, b, c);
77*207e5cccSFangrui Song }
78*207e5cccSFangrui Song 
79*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahh_lane_s16(
80*207e5cccSFangrui Song // CHECK-NEXT:  entry:
81*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3
82*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
83*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
84*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
85*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
86*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0
87*207e5cccSFangrui Song // CHECK-NEXT:    ret i16 [[TMP3]]
88*207e5cccSFangrui Song //
89*207e5cccSFangrui Song int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
90*207e5cccSFangrui Song   return vqrdmlahh_lane_s16(a, b, c, 3);
91*207e5cccSFangrui Song }
92*207e5cccSFangrui Song 
93*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahs_lane_s32(
94*207e5cccSFangrui Song // CHECK-NEXT:  entry:
95*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1
96*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]])
97*207e5cccSFangrui Song // CHECK-NEXT:    ret i32 [[VQRDMLAHS_S32_I]]
98*207e5cccSFangrui Song //
99*207e5cccSFangrui Song int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
100*207e5cccSFangrui Song   return vqrdmlahs_lane_s32(a, b, c, 1);
101*207e5cccSFangrui Song }
102*207e5cccSFangrui Song 
103*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahh_laneq_s16(
104*207e5cccSFangrui Song // CHECK-NEXT:  entry:
105*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7
106*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
107*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
108*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
109*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
110*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0
111*207e5cccSFangrui Song // CHECK-NEXT:    ret i16 [[TMP3]]
112*207e5cccSFangrui Song //
113*207e5cccSFangrui Song int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
114*207e5cccSFangrui Song   return vqrdmlahh_laneq_s16(a, b, c, 7);
115*207e5cccSFangrui Song }
116*207e5cccSFangrui Song 
117*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahs_laneq_s32(
118*207e5cccSFangrui Song // CHECK-NEXT:  entry:
119*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
120*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]])
121*207e5cccSFangrui Song // CHECK-NEXT:    ret i32 [[VQRDMLAHS_S32_I]]
122*207e5cccSFangrui Song //
123*207e5cccSFangrui Song int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
124*207e5cccSFangrui Song   return vqrdmlahs_laneq_s32(a, b, c, 3);
125*207e5cccSFangrui Song }
126*207e5cccSFangrui Song 
127*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlsh_laneq_s16(
128*207e5cccSFangrui Song // CHECK-NEXT:  entry:
129*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
130*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
131*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
132*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
133*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x i16> [[VQRDMLSH_V3_I]]
134*207e5cccSFangrui Song //
135*207e5cccSFangrui Song int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
136*207e5cccSFangrui Song   return vqrdmlsh_laneq_s16(a, b, v, 7);
137*207e5cccSFangrui Song }
138*207e5cccSFangrui Song 
139*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlsh_laneq_s32(
140*207e5cccSFangrui Song // CHECK-NEXT:  entry:
141*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
142*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
143*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
144*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
145*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x i32> [[VQRDMLSH_V3_I]]
146*207e5cccSFangrui Song //
147*207e5cccSFangrui Song int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
148*207e5cccSFangrui Song   return vqrdmlsh_laneq_s32(a, b, v, 3);
149*207e5cccSFangrui Song }
150*207e5cccSFangrui Song 
151*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshq_laneq_s16(
152*207e5cccSFangrui Song // CHECK-NEXT:  entry:
153*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
154*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
155*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
156*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
157*207e5cccSFangrui Song // CHECK-NEXT:    ret <8 x i16> [[VQRDMLSHQ_V3_I]]
158*207e5cccSFangrui Song //
159*207e5cccSFangrui Song int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
160*207e5cccSFangrui Song   return vqrdmlshq_laneq_s16(a, b, v, 7);
161*207e5cccSFangrui Song }
162*207e5cccSFangrui Song 
163*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshq_laneq_s32(
164*207e5cccSFangrui Song // CHECK-NEXT:  entry:
165*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
166*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
167*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
168*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
169*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x i32> [[VQRDMLSHQ_V3_I]]
170*207e5cccSFangrui Song //
171*207e5cccSFangrui Song int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
172*207e5cccSFangrui Song   return vqrdmlshq_laneq_s32(a, b, v, 3);
173*207e5cccSFangrui Song }
174*207e5cccSFangrui Song 
175*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshh_s16(
176*207e5cccSFangrui Song // CHECK-NEXT:  entry:
177*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
178*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
179*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0
180*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
181*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0
182*207e5cccSFangrui Song // CHECK-NEXT:    ret i16 [[TMP3]]
183*207e5cccSFangrui Song //
184*207e5cccSFangrui Song int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
185*207e5cccSFangrui Song   return vqrdmlshh_s16(a, b, c);
186*207e5cccSFangrui Song }
187*207e5cccSFangrui Song 
188*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshs_s32(
189*207e5cccSFangrui Song // CHECK-NEXT:  entry:
190*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
191*207e5cccSFangrui Song // CHECK-NEXT:    ret i32 [[VQRDMLSHS_S32_I]]
192*207e5cccSFangrui Song //
193*207e5cccSFangrui Song int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
194*207e5cccSFangrui Song   return vqrdmlshs_s32(a, b, c);
195*207e5cccSFangrui Song }
196*207e5cccSFangrui Song 
197*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshh_lane_s16(
198*207e5cccSFangrui Song // CHECK-NEXT:  entry:
199*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3
200*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
201*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
202*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
203*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
204*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0
205*207e5cccSFangrui Song // CHECK-NEXT:    ret i16 [[TMP3]]
206*207e5cccSFangrui Song //
207*207e5cccSFangrui Song int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
208*207e5cccSFangrui Song   return vqrdmlshh_lane_s16(a, b, c, 3);
209*207e5cccSFangrui Song }
210*207e5cccSFangrui Song 
211*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshs_lane_s32(
212*207e5cccSFangrui Song // CHECK-NEXT:  entry:
213*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1
214*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]])
215*207e5cccSFangrui Song // CHECK-NEXT:    ret i32 [[VQRDMLSHS_S32_I]]
216*207e5cccSFangrui Song //
217*207e5cccSFangrui Song int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
218*207e5cccSFangrui Song   return vqrdmlshs_lane_s32(a, b, c, 1);
219*207e5cccSFangrui Song }
220*207e5cccSFangrui Song 
221*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshh_laneq_s16(
222*207e5cccSFangrui Song // CHECK-NEXT:  entry:
223*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7
224*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
225*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
226*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
227*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
228*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0
229*207e5cccSFangrui Song // CHECK-NEXT:    ret i16 [[TMP3]]
230*207e5cccSFangrui Song //
231*207e5cccSFangrui Song int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
232*207e5cccSFangrui Song   return vqrdmlshh_laneq_s16(a, b, c, 7);
233*207e5cccSFangrui Song }
234*207e5cccSFangrui Song 
235*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshs_laneq_s32(
236*207e5cccSFangrui Song // CHECK-NEXT:  entry:
237*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
238*207e5cccSFangrui Song // CHECK-NEXT:    [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]])
239*207e5cccSFangrui Song // CHECK-NEXT:    ret i32 [[VQRDMLSHS_S32_I]]
240*207e5cccSFangrui Song //
241*207e5cccSFangrui Song int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
242*207e5cccSFangrui Song   return vqrdmlshs_laneq_s32(a, b, c, 3);
243*207e5cccSFangrui Song }
244