xref: /llvm-project/clang/test/CodeGen/AArch64/neon-fp16fml.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +v8.2a -target-feature +neon -target-feature +fp16fml \
3*207e5cccSFangrui Song // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
4*207e5cccSFangrui Song 
5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target
6*207e5cccSFangrui Song 
7*207e5cccSFangrui Song // Test AArch64 Armv8.2-A FP16 Fused Multiply-Add Long intrinsics
8*207e5cccSFangrui Song 
9*207e5cccSFangrui Song #include <arm_neon.h>
10*207e5cccSFangrui Song 
11*207e5cccSFangrui Song // Vector form
12*207e5cccSFangrui Song 
13*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_low_f16(
14*207e5cccSFangrui Song // CHECK-NEXT:  entry:
15*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
16*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
17*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
18*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
19*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLAL_LOW3_I]]
20*207e5cccSFangrui Song //
21*207e5cccSFangrui Song float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
22*207e5cccSFangrui Song   return vfmlal_low_f16(a, b, c);
23*207e5cccSFangrui Song }
24*207e5cccSFangrui Song 
25*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_low_f16(
26*207e5cccSFangrui Song // CHECK-NEXT:  entry:
27*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
28*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
29*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
30*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
31*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLSL_LOW3_I]]
32*207e5cccSFangrui Song //
33*207e5cccSFangrui Song float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
34*207e5cccSFangrui Song   return vfmlsl_low_f16(a, b, c);
35*207e5cccSFangrui Song }
36*207e5cccSFangrui Song 
37*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_high_f16(
38*207e5cccSFangrui Song // CHECK-NEXT:  entry:
39*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
40*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
41*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
42*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
43*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLAL_HIGH3_I]]
44*207e5cccSFangrui Song //
45*207e5cccSFangrui Song float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
46*207e5cccSFangrui Song   return vfmlal_high_f16(a, b, c);
47*207e5cccSFangrui Song }
48*207e5cccSFangrui Song 
49*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_high_f16(
50*207e5cccSFangrui Song // CHECK-NEXT:  entry:
51*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
52*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
53*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
54*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
55*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLSL_HIGH3_I]]
56*207e5cccSFangrui Song //
57*207e5cccSFangrui Song float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
58*207e5cccSFangrui Song   return vfmlsl_high_f16(a, b, c);
59*207e5cccSFangrui Song }
60*207e5cccSFangrui Song 
61*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_low_f16(
62*207e5cccSFangrui Song // CHECK-NEXT:  entry:
63*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
64*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
65*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
66*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
67*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLAL_LOW3_I]]
68*207e5cccSFangrui Song //
69*207e5cccSFangrui Song float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
70*207e5cccSFangrui Song   return vfmlalq_low_f16(a, b, c);
71*207e5cccSFangrui Song }
72*207e5cccSFangrui Song 
73*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_low_f16(
74*207e5cccSFangrui Song // CHECK-NEXT:  entry:
75*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
76*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
77*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
78*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
79*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLSL_LOW3_I]]
80*207e5cccSFangrui Song //
81*207e5cccSFangrui Song float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
82*207e5cccSFangrui Song   return vfmlslq_low_f16(a, b, c);
83*207e5cccSFangrui Song }
84*207e5cccSFangrui Song 
85*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_high_f16(
86*207e5cccSFangrui Song // CHECK-NEXT:  entry:
87*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
88*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
89*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
90*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
91*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLAL_HIGH3_I]]
92*207e5cccSFangrui Song //
93*207e5cccSFangrui Song float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
94*207e5cccSFangrui Song   return vfmlalq_high_f16(a, b, c);
95*207e5cccSFangrui Song }
96*207e5cccSFangrui Song 
97*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_high_f16(
98*207e5cccSFangrui Song // CHECK-NEXT:  entry:
99*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
100*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
101*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
102*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
103*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLSL_HIGH3_I]]
104*207e5cccSFangrui Song //
105*207e5cccSFangrui Song float32x4_t test_vfmlslq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
106*207e5cccSFangrui Song   return vfmlslq_high_f16(a, b, c);
107*207e5cccSFangrui Song }
108*207e5cccSFangrui Song 
109*207e5cccSFangrui Song // Indexed form
110*207e5cccSFangrui Song 
111*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_lane_low_f16(
112*207e5cccSFangrui Song // CHECK-NEXT:  entry:
113*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
114*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
115*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
116*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
117*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
118*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
119*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
120*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
121*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
122*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
123*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
124*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
125*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
126*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
127*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
128*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
129*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
130*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
131*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
132*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
133*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
134*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
135*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0
136*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
137*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
138*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
139*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
140*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
141*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0
142*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
143*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
144*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
145*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
146*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
147*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
148*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
149*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLAL_LOW3_I]]
150*207e5cccSFangrui Song //
151*207e5cccSFangrui Song float32x2_t test_vfmlal_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
152*207e5cccSFangrui Song   return vfmlal_lane_low_f16(a, b, c, 0);
153*207e5cccSFangrui Song }
154*207e5cccSFangrui Song 
155*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_lane_high_f16(
156*207e5cccSFangrui Song // CHECK-NEXT:  entry:
157*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
158*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
159*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
160*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
161*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
162*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
163*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
164*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
165*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
166*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
167*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
168*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
169*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
170*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
171*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
172*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
173*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
174*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
175*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
176*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
177*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
178*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
179*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1
180*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
181*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
182*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
183*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
184*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
185*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1
186*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
187*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
188*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
189*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
190*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
191*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
192*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
193*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLAL_HIGH3_I]]
194*207e5cccSFangrui Song //
195*207e5cccSFangrui Song float32x2_t test_vfmlal_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
196*207e5cccSFangrui Song   return vfmlal_lane_high_f16(a, b, c, 1);
197*207e5cccSFangrui Song }
198*207e5cccSFangrui Song 
199*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_lane_low_f16(
200*207e5cccSFangrui Song // CHECK-NEXT:  entry:
201*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
202*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
203*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
204*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
205*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
206*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
207*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
208*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
209*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
210*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84735:%.*]] = alloca i16, align 2
211*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
212*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84745:%.*]] = alloca i16, align 2
213*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
214*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84755:%.*]] = alloca i16, align 2
215*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
216*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84765:%.*]] = alloca i16, align 2
217*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
218*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
219*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
220*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
221*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
222*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
223*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
224*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
225*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
226*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
227*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
228*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
229*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
230*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
231*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
232*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
233*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
234*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
235*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
236*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
237*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2
238*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
239*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
240*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
241*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
242*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
243*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2
244*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
245*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
246*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
247*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
248*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
249*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2
250*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
251*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
252*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
253*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
254*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
255*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2
256*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
257*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
258*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
259*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
260*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
261*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2
262*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
263*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
264*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
265*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
266*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
267*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
268*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
269*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLAL_LOW3_I]]
270*207e5cccSFangrui Song //
271*207e5cccSFangrui Song float32x4_t test_vfmlalq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
272*207e5cccSFangrui Song   return vfmlalq_lane_low_f16(a, b, c, 2);
273*207e5cccSFangrui Song }
274*207e5cccSFangrui Song 
275*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_lane_high_f16(
276*207e5cccSFangrui Song // CHECK-NEXT:  entry:
277*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
278*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
279*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
280*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
281*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
282*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
283*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
284*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
285*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
286*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84735:%.*]] = alloca i16, align 2
287*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
288*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84745:%.*]] = alloca i16, align 2
289*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
290*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84755:%.*]] = alloca i16, align 2
291*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
292*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84765:%.*]] = alloca i16, align 2
293*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
294*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
295*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
296*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
297*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
298*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
299*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
300*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
301*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
302*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
303*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
304*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
305*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
306*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
307*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
308*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
309*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
310*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
311*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
312*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
313*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3
314*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
315*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
316*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
317*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
318*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
319*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3
320*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
321*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
322*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
323*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
324*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
325*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
326*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
327*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
328*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
329*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
330*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
331*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3
332*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
333*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
334*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
335*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
336*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
337*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3
338*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
339*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
340*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
341*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
342*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
343*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
344*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
345*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLAL_HIGH3_I]]
346*207e5cccSFangrui Song //
347*207e5cccSFangrui Song float32x4_t test_vfmlalq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
348*207e5cccSFangrui Song   return vfmlalq_lane_high_f16(a, b, c, 3);
349*207e5cccSFangrui Song }
350*207e5cccSFangrui Song 
351*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_laneq_low_f16(
352*207e5cccSFangrui Song // CHECK-NEXT:  entry:
353*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
354*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
355*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
356*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
357*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
358*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
359*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
360*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
361*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
362*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
363*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 4
364*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
365*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
366*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
367*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
368*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
369*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4
370*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
371*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
372*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
373*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
374*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
375*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4
376*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
377*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
378*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
379*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
380*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
381*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4
382*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
383*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
384*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
385*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
386*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
387*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
388*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
389*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLAL_LOW3_I]]
390*207e5cccSFangrui Song //
391*207e5cccSFangrui Song float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
392*207e5cccSFangrui Song   return vfmlal_laneq_low_f16(a, b, c, 4);
393*207e5cccSFangrui Song }
394*207e5cccSFangrui Song 
395*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_laneq_high_f16(
396*207e5cccSFangrui Song // CHECK-NEXT:  entry:
397*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
398*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
399*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
400*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
401*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
402*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
403*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
404*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
405*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
406*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
407*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 5
408*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
409*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
410*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
411*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
412*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
413*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5
414*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
415*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
416*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
417*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
418*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
419*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5
420*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
421*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
422*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
423*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
424*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
425*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5
426*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
427*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
428*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
429*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
430*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
431*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
432*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
433*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLAL_HIGH3_I]]
434*207e5cccSFangrui Song //
435*207e5cccSFangrui Song float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
436*207e5cccSFangrui Song   return vfmlal_laneq_high_f16(a, b, c, 5);
437*207e5cccSFangrui Song }
438*207e5cccSFangrui Song 
439*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_laneq_low_f16(
440*207e5cccSFangrui Song // CHECK-NEXT:  entry:
441*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
442*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
443*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
444*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
445*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
446*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
447*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
448*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
449*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
450*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85035:%.*]] = alloca i16, align 2
451*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
452*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85045:%.*]] = alloca i16, align 2
453*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
454*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85055:%.*]] = alloca i16, align 2
455*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
456*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85065:%.*]] = alloca i16, align 2
457*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
458*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
459*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 6
460*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
461*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
462*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
463*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
464*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
465*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6
466*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
467*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
468*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
469*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
470*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
471*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6
472*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
473*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
474*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
475*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
476*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
477*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6
478*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
479*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
480*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
481*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
482*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
483*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6
484*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
485*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
486*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
487*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
488*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
489*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6
490*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
491*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
492*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
493*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
494*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
495*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6
496*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
497*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
498*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
499*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
500*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
501*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6
502*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
503*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
504*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
505*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
506*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
507*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
508*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
509*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLAL_LOW3_I]]
510*207e5cccSFangrui Song //
511*207e5cccSFangrui Song float32x4_t test_vfmlalq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
512*207e5cccSFangrui Song   return vfmlalq_laneq_low_f16(a, b, c, 6);
513*207e5cccSFangrui Song }
514*207e5cccSFangrui Song 
515*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_laneq_high_f16(
516*207e5cccSFangrui Song // CHECK-NEXT:  entry:
517*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
518*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
519*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
520*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
521*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
522*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
523*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
524*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
525*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
526*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85035:%.*]] = alloca i16, align 2
527*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
528*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85045:%.*]] = alloca i16, align 2
529*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
530*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85055:%.*]] = alloca i16, align 2
531*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
532*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85065:%.*]] = alloca i16, align 2
533*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
534*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
535*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
536*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
537*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
538*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
539*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
540*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
541*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
542*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
543*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
544*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
545*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
546*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
547*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7
548*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
549*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
550*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
551*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
552*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
553*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7
554*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
555*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
556*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
557*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
558*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
559*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7
560*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
561*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
562*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
563*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
564*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
565*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7
566*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
567*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
568*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
569*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
570*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
571*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7
572*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
573*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
574*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
575*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
576*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
577*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7
578*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
579*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
580*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
581*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
582*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
583*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
584*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
585*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLAL_HIGH3_I]]
586*207e5cccSFangrui Song //
587*207e5cccSFangrui Song float32x4_t test_vfmlalq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
588*207e5cccSFangrui Song   return vfmlalq_laneq_high_f16(a, b, c, 7);
589*207e5cccSFangrui Song }
590*207e5cccSFangrui Song 
591*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_lane_low_f16(
592*207e5cccSFangrui Song // CHECK-NEXT:  entry:
593*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
594*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
595*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
596*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
597*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
598*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
599*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
600*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
601*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
602*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
603*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
604*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
605*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
606*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
607*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
608*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
609*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
610*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
611*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
612*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
613*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
614*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
615*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0
616*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
617*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
618*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
619*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
620*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
621*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0
622*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
623*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
624*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
625*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
626*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
627*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
628*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
629*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLSL_LOW3_I]]
630*207e5cccSFangrui Song //
631*207e5cccSFangrui Song float32x2_t test_vfmlsl_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
632*207e5cccSFangrui Song   return vfmlsl_lane_low_f16(a, b, c, 0);
633*207e5cccSFangrui Song }
634*207e5cccSFangrui Song 
635*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_lane_high_f16(
636*207e5cccSFangrui Song // CHECK-NEXT:  entry:
637*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
638*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
639*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
640*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
641*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
642*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
643*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
644*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
645*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
646*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
647*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
648*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
649*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
650*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
651*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
652*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
653*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
654*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
655*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
656*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
657*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
658*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
659*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1
660*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
661*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
662*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
663*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
664*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
665*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1
666*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
667*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
668*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
669*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
670*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
671*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
672*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
673*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLSL_HIGH3_I]]
674*207e5cccSFangrui Song //
675*207e5cccSFangrui Song float32x2_t test_vfmlsl_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
676*207e5cccSFangrui Song   return vfmlsl_lane_high_f16(a, b, c, 1);
677*207e5cccSFangrui Song }
678*207e5cccSFangrui Song 
679*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_lane_low_f16(
680*207e5cccSFangrui Song // CHECK-NEXT:  entry:
681*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
682*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
683*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
684*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
685*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
686*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
687*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
688*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
689*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
690*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84735:%.*]] = alloca i16, align 2
691*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
692*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84745:%.*]] = alloca i16, align 2
693*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
694*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84755:%.*]] = alloca i16, align 2
695*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
696*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84765:%.*]] = alloca i16, align 2
697*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
698*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
699*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
700*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
701*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
702*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
703*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
704*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
705*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
706*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
707*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
708*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
709*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
710*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
711*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
712*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
713*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
714*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
715*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
716*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
717*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2
718*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
719*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
720*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
721*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
722*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
723*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2
724*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
725*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
726*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
727*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
728*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
729*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2
730*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
731*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
732*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
733*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
734*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
735*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2
736*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
737*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
738*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
739*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
740*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
741*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2
742*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
743*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
744*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
745*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
746*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
747*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
748*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
749*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLSL_LOW3_I]]
750*207e5cccSFangrui Song //
751*207e5cccSFangrui Song float32x4_t test_vfmlslq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
752*207e5cccSFangrui Song   return vfmlslq_lane_low_f16(a, b, c, 2);
753*207e5cccSFangrui Song }
754*207e5cccSFangrui Song 
755*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_lane_high_f16(
756*207e5cccSFangrui Song // CHECK-NEXT:  entry:
757*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_847:%.*]] = alloca <4 x half>, align 8
758*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_847:%.*]] = alloca i16, align 2
759*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
760*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8475:%.*]] = alloca i16, align 2
761*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
762*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84715:%.*]] = alloca i16, align 2
763*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
764*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84725:%.*]] = alloca i16, align 2
765*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
766*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84735:%.*]] = alloca i16, align 2
767*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
768*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84745:%.*]] = alloca i16, align 2
769*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
770*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84755:%.*]] = alloca i16, align 2
771*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
772*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_84765:%.*]] = alloca i16, align 2
773*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
774*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
775*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
776*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
777*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
778*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
779*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
780*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
781*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
782*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
783*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
784*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
785*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
786*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
787*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
788*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
789*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
790*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
791*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
792*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
793*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3
794*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
795*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
796*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
797*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
798*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
799*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3
800*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
801*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
802*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
803*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
804*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
805*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
806*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
807*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
808*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
809*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
810*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
811*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3
812*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
813*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
814*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
815*207e5cccSFangrui Song // CHECK-NEXT:    store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
816*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
817*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3
818*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
819*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
820*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
821*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
822*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
823*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
824*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
825*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLSL_HIGH3_I]]
826*207e5cccSFangrui Song //
827*207e5cccSFangrui Song float32x4_t test_vfmlslq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
828*207e5cccSFangrui Song   return vfmlslq_lane_high_f16(a, b, c, 3);
829*207e5cccSFangrui Song }
830*207e5cccSFangrui Song 
831*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_laneq_low_f16(
832*207e5cccSFangrui Song // CHECK-NEXT:  entry:
833*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
834*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
835*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
836*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
837*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
838*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
839*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
840*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
841*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
842*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
843*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 4
844*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
845*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
846*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
847*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
848*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
849*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4
850*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
851*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
852*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
853*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
854*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
855*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4
856*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
857*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
858*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
859*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
860*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
861*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4
862*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
863*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
864*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
865*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
866*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
867*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
868*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
869*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLSL_LOW3_I]]
870*207e5cccSFangrui Song //
871*207e5cccSFangrui Song float32x2_t test_vfmlsl_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
872*207e5cccSFangrui Song   return vfmlsl_laneq_low_f16(a, b, c, 4);
873*207e5cccSFangrui Song }
874*207e5cccSFangrui Song 
875*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_laneq_high_f16(
876*207e5cccSFangrui Song // CHECK-NEXT:  entry:
877*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
878*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
879*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
880*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
881*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
882*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
883*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
884*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
885*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
886*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
887*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 5
888*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
889*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
890*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
891*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
892*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
893*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5
894*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
895*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
896*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
897*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
898*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
899*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5
900*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
901*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
902*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
903*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
904*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
905*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5
906*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
907*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
908*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
909*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
910*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
911*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
912*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
913*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x float> [[VFMLSL_HIGH3_I]]
914*207e5cccSFangrui Song //
915*207e5cccSFangrui Song float32x2_t test_vfmlsl_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
916*207e5cccSFangrui Song   return vfmlsl_laneq_high_f16(a, b, c, 5);
917*207e5cccSFangrui Song }
918*207e5cccSFangrui Song 
919*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_laneq_low_f16(
920*207e5cccSFangrui Song // CHECK-NEXT:  entry:
921*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
922*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
923*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
924*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
925*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
926*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
927*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
928*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
929*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
930*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85035:%.*]] = alloca i16, align 2
931*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
932*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85045:%.*]] = alloca i16, align 2
933*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
934*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85055:%.*]] = alloca i16, align 2
935*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
936*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85065:%.*]] = alloca i16, align 2
937*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
938*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
939*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 6
940*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
941*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
942*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
943*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
944*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
945*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6
946*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
947*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
948*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
949*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
950*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
951*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6
952*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
953*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
954*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
955*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
956*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
957*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6
958*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
959*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
960*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
961*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
962*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
963*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6
964*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
965*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
966*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
967*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
968*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
969*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6
970*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
971*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
972*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
973*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
974*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
975*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6
976*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
977*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
978*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
979*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
980*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
981*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6
982*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
983*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
984*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
985*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
986*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
987*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
988*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
989*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLSL_LOW3_I]]
990*207e5cccSFangrui Song //
991*207e5cccSFangrui Song float32x4_t test_vfmlslq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
992*207e5cccSFangrui Song   return vfmlslq_laneq_low_f16(a, b, c, 6);
993*207e5cccSFangrui Song }
994*207e5cccSFangrui Song 
995*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_laneq_high_f16(
996*207e5cccSFangrui Song // CHECK-NEXT:  entry:
997*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_850:%.*]] = alloca <8 x half>, align 16
998*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_850:%.*]] = alloca i16, align 2
999*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
1000*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_8505:%.*]] = alloca i16, align 2
1001*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
1002*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85015:%.*]] = alloca i16, align 2
1003*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
1004*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85025:%.*]] = alloca i16, align 2
1005*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
1006*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85035:%.*]] = alloca i16, align 2
1007*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
1008*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85045:%.*]] = alloca i16, align 2
1009*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
1010*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85055:%.*]] = alloca i16, align 2
1011*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
1012*207e5cccSFangrui Song // CHECK-NEXT:    [[__REINT1_85065:%.*]] = alloca i16, align 2
1013*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
1014*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
1015*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
1016*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
1017*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
1018*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
1019*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
1020*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
1021*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
1022*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
1023*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
1024*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
1025*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
1026*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
1027*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7
1028*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
1029*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
1030*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
1031*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
1032*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
1033*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7
1034*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
1035*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
1036*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
1037*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
1038*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
1039*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7
1040*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
1041*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
1042*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
1043*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
1044*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
1045*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7
1046*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
1047*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
1048*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
1049*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
1050*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
1051*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7
1052*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
1053*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
1054*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
1055*207e5cccSFangrui Song // CHECK-NEXT:    store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
1056*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
1057*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7
1058*207e5cccSFangrui Song // CHECK-NEXT:    store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
1059*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
1060*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
1061*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
1062*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
1063*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
1064*207e5cccSFangrui Song // CHECK-NEXT:    [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
1065*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x float> [[VFMLSL_HIGH3_I]]
1066*207e5cccSFangrui Song //
1067*207e5cccSFangrui Song float32x4_t test_vfmlslq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
1068*207e5cccSFangrui Song   return vfmlslq_laneq_high_f16(a, b, c, 7);
1069*207e5cccSFangrui Song }
1070