xref: /llvm-project/clang/test/CodeGen/AArch64/neon-vcmla.c (revision ca603d2536f039194141bf3a01e9ee7f60e37406)
1*ca603d25SDavid Green // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2*ca603d25SDavid Green // RUN: %clang_cc1 -triple arm64 -target-feature +neon \
3207e5cccSFangrui Song // RUN:        -target-feature +v8.3a \
4207e5cccSFangrui Song // RUN:        -target-feature +fullfp16 \
5*ca603d25SDavid Green // RUN:        -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes="mem2reg,instsimplify" | FileCheck %s
6207e5cccSFangrui Song 
7207e5cccSFangrui Song // REQUIRES: aarch64-registered-target
8207e5cccSFangrui Song 
9207e5cccSFangrui Song #include <arm_neon.h>
10207e5cccSFangrui Song 
11*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16(
12*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0:[0-9]+]] {
13*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
14*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
15*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
16*ca603d25SDavid Green //
17207e5cccSFangrui Song float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
18207e5cccSFangrui Song   return vcmla_f16(acc, lhs, rhs);
19207e5cccSFangrui Song }
20207e5cccSFangrui Song 
21*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32(
22*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
23*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
24*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
25*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
26*ca603d25SDavid Green //
27207e5cccSFangrui Song float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
28207e5cccSFangrui Song   return vcmla_f32(acc, lhs, rhs);
29207e5cccSFangrui Song }
30207e5cccSFangrui Song 
31*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16(
32*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
33*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
34*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
35*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
36*ca603d25SDavid Green //
37207e5cccSFangrui Song float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
38207e5cccSFangrui Song   return vcmlaq_f16(acc, lhs, rhs);
39207e5cccSFangrui Song }
40207e5cccSFangrui Song 
41*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32(
42*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
43*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
44*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
45*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
46*ca603d25SDavid Green //
47207e5cccSFangrui Song float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
48207e5cccSFangrui Song   return vcmlaq_f32(acc, lhs, rhs);
49207e5cccSFangrui Song }
50207e5cccSFangrui Song 
51*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64(
52*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
53*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
54*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
55*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x double> [[VCMLAQ_F643_I]]
56*ca603d25SDavid Green //
57207e5cccSFangrui Song float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
58207e5cccSFangrui Song   return vcmlaq_f64(acc, lhs, rhs);
59207e5cccSFangrui Song }
60207e5cccSFangrui Song 
61*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16(
62*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
63*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
64*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
65*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
66*ca603d25SDavid Green //
67207e5cccSFangrui Song float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
68207e5cccSFangrui Song   return vcmla_rot90_f16(acc, lhs, rhs);
69207e5cccSFangrui Song }
70207e5cccSFangrui Song 
71*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32(
72*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
73*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
74*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
75*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
76*ca603d25SDavid Green //
77207e5cccSFangrui Song float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
78207e5cccSFangrui Song   return vcmla_rot90_f32(acc, lhs, rhs);
79207e5cccSFangrui Song }
80207e5cccSFangrui Song 
81*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16(
82*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
83*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
84*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
85*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
86*ca603d25SDavid Green //
87207e5cccSFangrui Song float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
88207e5cccSFangrui Song   return vcmlaq_rot90_f16(acc, lhs, rhs);
89207e5cccSFangrui Song }
90207e5cccSFangrui Song 
91*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32(
92*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
93*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
94*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
95*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
96*ca603d25SDavid Green //
97207e5cccSFangrui Song float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
98207e5cccSFangrui Song   return vcmlaq_rot90_f32(acc, lhs, rhs);
99207e5cccSFangrui Song }
100207e5cccSFangrui Song 
101*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64(
102*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
103*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
104*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT90_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
105*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT90_F643_I]]
106*ca603d25SDavid Green //
107207e5cccSFangrui Song float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
108207e5cccSFangrui Song   return vcmlaq_rot90_f64(acc, lhs, rhs);
109207e5cccSFangrui Song }
110207e5cccSFangrui Song 
111*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16(
112*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
113*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
114*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
115*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
116*ca603d25SDavid Green //
117207e5cccSFangrui Song float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
118207e5cccSFangrui Song   return vcmla_rot180_f16(acc, lhs, rhs);
119207e5cccSFangrui Song }
120207e5cccSFangrui Song 
121*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32(
122*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
123*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
124*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
125*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
126*ca603d25SDavid Green //
127207e5cccSFangrui Song float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
128207e5cccSFangrui Song   return vcmla_rot180_f32(acc, lhs, rhs);
129207e5cccSFangrui Song }
130207e5cccSFangrui Song 
131*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16(
132*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
133*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
134*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
135*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
136*ca603d25SDavid Green //
137207e5cccSFangrui Song float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
138207e5cccSFangrui Song   return vcmlaq_rot180_f16(acc, lhs, rhs);
139207e5cccSFangrui Song }
140207e5cccSFangrui Song 
141*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32(
142*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
143*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
144*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
145*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
146*ca603d25SDavid Green //
147207e5cccSFangrui Song float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
148207e5cccSFangrui Song   return vcmlaq_rot180_f32(acc, lhs, rhs);
149207e5cccSFangrui Song }
150207e5cccSFangrui Song 
151*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64(
152*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
153*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
154*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT180_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
155*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT180_F643_I]]
156*ca603d25SDavid Green //
157207e5cccSFangrui Song float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
158207e5cccSFangrui Song   return vcmlaq_rot180_f64(acc, lhs, rhs);
159207e5cccSFangrui Song }
160207e5cccSFangrui Song 
161*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16(
162*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
163*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
164*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
165*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
166*ca603d25SDavid Green //
167207e5cccSFangrui Song float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
168207e5cccSFangrui Song   return vcmla_rot270_f16(acc, lhs, rhs);
169207e5cccSFangrui Song }
170207e5cccSFangrui Song 
171*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32(
172*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
173*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
174*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
175*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
176*ca603d25SDavid Green //
177207e5cccSFangrui Song float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
178207e5cccSFangrui Song   return vcmla_rot270_f32(acc, lhs, rhs);
179207e5cccSFangrui Song }
180207e5cccSFangrui Song 
181*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16(
182*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
183*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
184*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
185*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
186*ca603d25SDavid Green //
187207e5cccSFangrui Song float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
188207e5cccSFangrui Song   return vcmlaq_rot270_f16(acc, lhs, rhs);
189207e5cccSFangrui Song }
190207e5cccSFangrui Song 
191*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32(
192*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
193*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
194*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
195*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
196*ca603d25SDavid Green //
197207e5cccSFangrui Song float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
198207e5cccSFangrui Song   return vcmlaq_rot270_f32(acc, lhs, rhs);
199207e5cccSFangrui Song }
200207e5cccSFangrui Song 
201*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64(
202*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
203*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
204*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT270_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
205*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT270_F643_I]]
206*ca603d25SDavid Green //
207207e5cccSFangrui Song float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
208207e5cccSFangrui Song   return vcmlaq_rot270_f64(acc, lhs, rhs);
209207e5cccSFangrui Song }
210207e5cccSFangrui Song 
211*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16(
212*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
213*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
214*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_150:%.*]] = alloca <4 x half>, align 8
215*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_150:%.*]] = alloca <2 x i32>, align 8
216*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_150]], align 8
217*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_150]], align 8
218*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
219*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
220*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_150]], align 8
221*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
222*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
223*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_150]], align 8
224*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_150]], align 8
225*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
226*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
227*ca603d25SDavid Green //
228207e5cccSFangrui Song float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
229207e5cccSFangrui Song   return vcmla_lane_f16(acc, lhs, rhs, 1);
230207e5cccSFangrui Song }
231207e5cccSFangrui Song 
232207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
233*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16(
234*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
235*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
236*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_154:%.*]] = alloca <8 x half>, align 16
237*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_154:%.*]] = alloca <2 x i32>, align 8
238*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_154]], align 16
239*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_154]], align 16
240*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
241*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
242*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_154]], align 16
243*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
244*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
245*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_154]], align 8
246*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_154]], align 8
247*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
248*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
249*ca603d25SDavid Green //
250207e5cccSFangrui Song float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
251207e5cccSFangrui Song   return vcmla_laneq_f16(acc, lhs, rhs, 3);
252207e5cccSFangrui Song }
253207e5cccSFangrui Song 
254*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16(
255*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
256*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
257*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_152:%.*]] = alloca <4 x half>, align 8
258*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_152:%.*]] = alloca <4 x i32>, align 16
259*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_152]], align 8
260*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
261*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
262*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
263*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
264*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
265*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
266*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
267*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
268*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
269*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
270*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
271*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
272*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_152]], align 16
273*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_152]], align 16
274*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
275*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
276*ca603d25SDavid Green //
277207e5cccSFangrui Song float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
278207e5cccSFangrui Song   return vcmlaq_lane_f16(acc, lhs, rhs, 1);
279207e5cccSFangrui Song }
280207e5cccSFangrui Song 
281*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16(
282*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
283*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
284*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_156:%.*]] = alloca <8 x half>, align 16
285*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_156:%.*]] = alloca <4 x i32>, align 16
286*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_156]], align 16
287*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
288*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
289*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
290*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
291*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
292*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
293*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
294*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
295*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
296*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
297*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
298*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
299*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_156]], align 16
300*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_156]], align 16
301*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
302*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
303*ca603d25SDavid Green //
304207e5cccSFangrui Song float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
305207e5cccSFangrui Song   return vcmlaq_laneq_f16(acc, lhs, rhs, 3);
306207e5cccSFangrui Song }
307207e5cccSFangrui Song 
308*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32(
309*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
310*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
311*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_182:%.*]] = alloca <2 x float>, align 8
312*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_182:%.*]] = alloca <1 x i64>, align 8
313*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_182]], align 8
314*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_182]], align 8
315*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
316*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
317*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_182]], align 8
318*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_182]], align 8
319*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
320*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
321*ca603d25SDavid Green //
322207e5cccSFangrui Song float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
323207e5cccSFangrui Song   return vcmla_lane_f32(acc, lhs, rhs, 0);
324207e5cccSFangrui Song }
325207e5cccSFangrui Song 
326207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
327*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32(
328*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
329*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
330*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_186:%.*]] = alloca <4 x float>, align 16
331*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_186:%.*]] = alloca <1 x i64>, align 8
332*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_186]], align 16
333*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_186]], align 16
334*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
335*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
336*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_186]], align 8
337*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_186]], align 8
338*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
339*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
340*ca603d25SDavid Green //
341207e5cccSFangrui Song float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
342207e5cccSFangrui Song   return vcmla_laneq_f32(acc, lhs, rhs, 1);
343207e5cccSFangrui Song }
344207e5cccSFangrui Song 
345*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32(
346*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
347*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
348*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_184:%.*]] = alloca <2 x float>, align 8
349*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_184:%.*]] = alloca <2 x i64>, align 16
350*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_184]], align 8
351*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_184]], align 8
352*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
353*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
354*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_184]], align 8
355*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
356*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
357*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_184]], align 16
358*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_184]], align 16
359*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
360*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
361*ca603d25SDavid Green //
362207e5cccSFangrui Song float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
363207e5cccSFangrui Song   return vcmlaq_lane_f32(acc, lhs, rhs, 0);
364207e5cccSFangrui Song }
365207e5cccSFangrui Song 
366*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32(
367*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
368*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
369*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_188:%.*]] = alloca <4 x float>, align 16
370*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_188:%.*]] = alloca <2 x i64>, align 16
371*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_188]], align 16
372*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_188]], align 16
373*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
374*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
375*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_188]], align 16
376*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
377*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
378*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_188]], align 16
379*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_188]], align 16
380*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
381*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
382*ca603d25SDavid Green //
383207e5cccSFangrui Song float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
384207e5cccSFangrui Song   return vcmlaq_laneq_f32(acc, lhs, rhs, 1);
385207e5cccSFangrui Song }
386207e5cccSFangrui Song 
387*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16(
388*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
389*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
390*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_174:%.*]] = alloca <4 x half>, align 8
391*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_174:%.*]] = alloca <2 x i32>, align 8
392*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_174]], align 8
393*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_174]], align 8
394*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
395*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
396*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_174]], align 8
397*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
398*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
399*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_174]], align 8
400*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_174]], align 8
401*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
402*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
403*ca603d25SDavid Green //
404207e5cccSFangrui Song float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
405207e5cccSFangrui Song   return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
406207e5cccSFangrui Song }
407207e5cccSFangrui Song 
408207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
409*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16(
410*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
411*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
412*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_178:%.*]] = alloca <8 x half>, align 16
413*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_178:%.*]] = alloca <2 x i32>, align 8
414*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_178]], align 16
415*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_178]], align 16
416*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
417*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
418*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_178]], align 16
419*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
420*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
421*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_178]], align 8
422*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_178]], align 8
423*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
424*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
425*ca603d25SDavid Green //
426207e5cccSFangrui Song float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
427207e5cccSFangrui Song   return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3);
428207e5cccSFangrui Song }
429207e5cccSFangrui Song 
430*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16(
431*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
432*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
433*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_176:%.*]] = alloca <4 x half>, align 8
434*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_176:%.*]] = alloca <4 x i32>, align 16
435*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_176]], align 8
436*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
437*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
438*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
439*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
440*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
441*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
442*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
443*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
444*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
445*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
446*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
447*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
448*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_176]], align 16
449*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_176]], align 16
450*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
451*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
452*ca603d25SDavid Green //
453207e5cccSFangrui Song float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
454207e5cccSFangrui Song   return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1);
455207e5cccSFangrui Song }
456207e5cccSFangrui Song 
457*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16(
458*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
459*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
460*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_180:%.*]] = alloca <8 x half>, align 16
461*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_180:%.*]] = alloca <4 x i32>, align 16
462*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_180]], align 16
463*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
464*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
465*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
466*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
467*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
468*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
469*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
470*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
471*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
472*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
473*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
474*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
475*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_180]], align 16
476*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_180]], align 16
477*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
478*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
479*ca603d25SDavid Green //
480207e5cccSFangrui Song float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
481207e5cccSFangrui Song   return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3);
482207e5cccSFangrui Song }
483207e5cccSFangrui Song 
484*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32(
485*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
486*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
487*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_206:%.*]] = alloca <2 x float>, align 8
488*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_206:%.*]] = alloca <1 x i64>, align 8
489*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_206]], align 8
490*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_206]], align 8
491*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
492*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
493*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_206]], align 8
494*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_206]], align 8
495*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
496*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
497*ca603d25SDavid Green //
498207e5cccSFangrui Song float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
499207e5cccSFangrui Song   return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
500207e5cccSFangrui Song }
501207e5cccSFangrui Song 
502207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
503*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32(
504*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
505*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
506*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_210:%.*]] = alloca <4 x float>, align 16
507*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_210:%.*]] = alloca <1 x i64>, align 8
508*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_210]], align 16
509*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_210]], align 16
510*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
511*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
512*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_210]], align 8
513*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_210]], align 8
514*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
515*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
516*ca603d25SDavid Green //
517207e5cccSFangrui Song float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
518207e5cccSFangrui Song   return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1);
519207e5cccSFangrui Song }
520207e5cccSFangrui Song 
521*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32(
522*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
523*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
524*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_208:%.*]] = alloca <2 x float>, align 8
525*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_208:%.*]] = alloca <2 x i64>, align 16
526*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_208]], align 8
527*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_208]], align 8
528*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
529*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
530*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_208]], align 8
531*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
532*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
533*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_208]], align 16
534*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_208]], align 16
535*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
536*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
537*ca603d25SDavid Green //
538207e5cccSFangrui Song float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
539207e5cccSFangrui Song   return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0);
540207e5cccSFangrui Song }
541207e5cccSFangrui Song 
542*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32(
543*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
544*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
545*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_212:%.*]] = alloca <4 x float>, align 16
546*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_212:%.*]] = alloca <2 x i64>, align 16
547*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_212]], align 16
548*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_212]], align 16
549*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
550*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
551*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_212]], align 16
552*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
553*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
554*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_212]], align 16
555*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_212]], align 16
556*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
557*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
558*ca603d25SDavid Green //
559207e5cccSFangrui Song float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
560207e5cccSFangrui Song   return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1);
561207e5cccSFangrui Song }
562207e5cccSFangrui Song 
563*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16(
564*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
565*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
566*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_158:%.*]] = alloca <4 x half>, align 8
567*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_158:%.*]] = alloca <2 x i32>, align 8
568*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_158]], align 8
569*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_158]], align 8
570*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
571*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
572*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_158]], align 8
573*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
574*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
575*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_158]], align 8
576*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_158]], align 8
577*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
578*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
579*ca603d25SDavid Green //
580207e5cccSFangrui Song float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
581207e5cccSFangrui Song   return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
582207e5cccSFangrui Song }
583207e5cccSFangrui Song 
584207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
585*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16(
586*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
587*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
588*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_162:%.*]] = alloca <8 x half>, align 16
589*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_162:%.*]] = alloca <2 x i32>, align 8
590*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_162]], align 16
591*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_162]], align 16
592*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
593*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
594*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_162]], align 16
595*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
596*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
597*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_162]], align 8
598*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_162]], align 8
599*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
600*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
601*ca603d25SDavid Green //
602207e5cccSFangrui Song float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
603207e5cccSFangrui Song   return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3);
604207e5cccSFangrui Song }
605207e5cccSFangrui Song 
606*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16(
607*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
608*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
609*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_160:%.*]] = alloca <4 x half>, align 8
610*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_160:%.*]] = alloca <4 x i32>, align 16
611*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_160]], align 8
612*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
613*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
614*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
615*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
616*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
617*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
618*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
619*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
620*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
621*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
622*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
623*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
624*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_160]], align 16
625*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_160]], align 16
626*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
627*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
628*ca603d25SDavid Green //
629207e5cccSFangrui Song float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
630207e5cccSFangrui Song   return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1);
631207e5cccSFangrui Song }
632207e5cccSFangrui Song 
633*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16(
634*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
635*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
636*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_164:%.*]] = alloca <8 x half>, align 16
637*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_164:%.*]] = alloca <4 x i32>, align 16
638*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_164]], align 16
639*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
640*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
641*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
642*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
643*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
644*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
645*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
646*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
647*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
648*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
649*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
650*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
651*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_164]], align 16
652*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_164]], align 16
653*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
654*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
655*ca603d25SDavid Green //
656207e5cccSFangrui Song float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
657207e5cccSFangrui Song   return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3);
658207e5cccSFangrui Song }
659207e5cccSFangrui Song 
660*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32(
661*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
662*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
663*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_190:%.*]] = alloca <2 x float>, align 8
664*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_190:%.*]] = alloca <1 x i64>, align 8
665*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_190]], align 8
666*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_190]], align 8
667*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
668*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
669*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_190]], align 8
670*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_190]], align 8
671*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
672*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
673*ca603d25SDavid Green //
674207e5cccSFangrui Song float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
675207e5cccSFangrui Song   return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
676207e5cccSFangrui Song }
677207e5cccSFangrui Song 
678207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
679*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32(
680*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
681*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
682*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_194:%.*]] = alloca <4 x float>, align 16
683*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_194:%.*]] = alloca <1 x i64>, align 8
684*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_194]], align 16
685*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_194]], align 16
686*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
687*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
688*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_194]], align 8
689*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_194]], align 8
690*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
691*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
692*ca603d25SDavid Green //
693207e5cccSFangrui Song float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
694207e5cccSFangrui Song   return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1);
695207e5cccSFangrui Song }
696207e5cccSFangrui Song 
697*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32(
698*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
699*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
700*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_192:%.*]] = alloca <2 x float>, align 8
701*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_192:%.*]] = alloca <2 x i64>, align 16
702*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_192]], align 8
703*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_192]], align 8
704*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
705*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
706*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_192]], align 8
707*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
708*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
709*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_192]], align 16
710*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_192]], align 16
711*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
712*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
713*ca603d25SDavid Green //
714207e5cccSFangrui Song float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
715207e5cccSFangrui Song   return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0);
716207e5cccSFangrui Song }
717207e5cccSFangrui Song 
718*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32(
719*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
720*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
721*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_196:%.*]] = alloca <4 x float>, align 16
722*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_196:%.*]] = alloca <2 x i64>, align 16
723*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_196]], align 16
724*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_196]], align 16
725*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
726*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
727*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_196]], align 16
728*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
729*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
730*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_196]], align 16
731*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_196]], align 16
732*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
733*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
734*ca603d25SDavid Green //
735207e5cccSFangrui Song float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
736207e5cccSFangrui Song   return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1);
737207e5cccSFangrui Song }
738207e5cccSFangrui Song 
739*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16(
740*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
741*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
742*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_166:%.*]] = alloca <4 x half>, align 8
743*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_166:%.*]] = alloca <2 x i32>, align 8
744*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_166]], align 8
745*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_166]], align 8
746*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
747*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
748*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_166]], align 8
749*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
750*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
751*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_166]], align 8
752*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_166]], align 8
753*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
754*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
755*ca603d25SDavid Green //
756207e5cccSFangrui Song float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
757207e5cccSFangrui Song   return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
758207e5cccSFangrui Song }
759207e5cccSFangrui Song 
760207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
761*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16(
762*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
763*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
764*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_170:%.*]] = alloca <8 x half>, align 16
765*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_170:%.*]] = alloca <2 x i32>, align 8
766*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_170]], align 16
767*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_170]], align 16
768*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
769*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
770*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_170]], align 16
771*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
772*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
773*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i32> [[VECINIT5]], ptr [[__REINT1_170]], align 8
774*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_170]], align 8
775*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
776*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
777*ca603d25SDavid Green //
778207e5cccSFangrui Song float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
779207e5cccSFangrui Song   return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3);
780207e5cccSFangrui Song }
781207e5cccSFangrui Song 
782*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16(
783*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
784*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
785*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_168:%.*]] = alloca <4 x half>, align 8
786*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_168:%.*]] = alloca <4 x i32>, align 16
787*ca603d25SDavid Green // CHECK-NEXT:    store <4 x half> [[RHS]], ptr [[__REINT_168]], align 8
788*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
789*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
790*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
791*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
792*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
793*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
794*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
795*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
796*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
797*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
798*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
799*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
800*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_168]], align 16
801*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_168]], align 16
802*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
803*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
804*ca603d25SDavid Green //
805207e5cccSFangrui Song float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
806207e5cccSFangrui Song   return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1);
807207e5cccSFangrui Song }
808207e5cccSFangrui Song 
809*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16(
810*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
811*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
812*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_172:%.*]] = alloca <8 x half>, align 16
813*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_172:%.*]] = alloca <4 x i32>, align 16
814*ca603d25SDavid Green // CHECK-NEXT:    store <8 x half> [[RHS]], ptr [[__REINT_172]], align 16
815*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
816*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
817*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
818*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
819*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
820*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
821*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
822*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
823*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
824*ca603d25SDavid Green // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
825*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
826*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
827*ca603d25SDavid Green // CHECK-NEXT:    store <4 x i32> [[VECINIT15]], ptr [[__REINT1_172]], align 16
828*ca603d25SDavid Green // CHECK-NEXT:    [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_172]], align 16
829*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
830*ca603d25SDavid Green // CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
831*ca603d25SDavid Green //
832207e5cccSFangrui Song float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
833207e5cccSFangrui Song   return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3);
834207e5cccSFangrui Song }
835207e5cccSFangrui Song 
836*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32(
837*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
838*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
839*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_198:%.*]] = alloca <2 x float>, align 8
840*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_198:%.*]] = alloca <1 x i64>, align 8
841*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_198]], align 8
842*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_198]], align 8
843*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
844*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
845*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_198]], align 8
846*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_198]], align 8
847*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
848*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
849*ca603d25SDavid Green //
850207e5cccSFangrui Song float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
851207e5cccSFangrui Song   return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
852207e5cccSFangrui Song }
853207e5cccSFangrui Song 
854207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1.
855*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32(
856*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
857*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
858*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_202:%.*]] = alloca <4 x float>, align 16
859*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_202:%.*]] = alloca <1 x i64>, align 8
860*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_202]], align 16
861*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_202]], align 16
862*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
863*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
864*ca603d25SDavid Green // CHECK-NEXT:    store <1 x i64> [[VECINIT]], ptr [[__REINT1_202]], align 8
865*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_202]], align 8
866*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
867*ca603d25SDavid Green // CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
868*ca603d25SDavid Green //
869207e5cccSFangrui Song float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
870207e5cccSFangrui Song   return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1);
871207e5cccSFangrui Song }
872207e5cccSFangrui Song 
873*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32(
874*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
875*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
876*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_200:%.*]] = alloca <2 x float>, align 8
877*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_200:%.*]] = alloca <2 x i64>, align 16
878*ca603d25SDavid Green // CHECK-NEXT:    store <2 x float> [[RHS]], ptr [[__REINT_200]], align 8
879*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_200]], align 8
880*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
881*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
882*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_200]], align 8
883*ca603d25SDavid Green // CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
884*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
885*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_200]], align 16
886*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_200]], align 16
887*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
888*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
889*ca603d25SDavid Green //
890207e5cccSFangrui Song float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
891207e5cccSFangrui Song   return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0);
892207e5cccSFangrui Song }
893207e5cccSFangrui Song 
894*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32(
895*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
896*ca603d25SDavid Green // CHECK-NEXT:  [[ENTRY:.*:]]
897*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT_204:%.*]] = alloca <4 x float>, align 16
898*ca603d25SDavid Green // CHECK-NEXT:    [[__REINT1_204:%.*]] = alloca <2 x i64>, align 16
899*ca603d25SDavid Green // CHECK-NEXT:    store <4 x float> [[RHS]], ptr [[__REINT_204]], align 16
900*ca603d25SDavid Green // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_204]], align 16
901*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
902*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
903*ca603d25SDavid Green // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_204]], align 16
904*ca603d25SDavid Green // CHECK-NEXT:    [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
905*ca603d25SDavid Green // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
906*ca603d25SDavid Green // CHECK-NEXT:    store <2 x i64> [[VECINIT5]], ptr [[__REINT1_204]], align 16
907*ca603d25SDavid Green // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_204]], align 16
908*ca603d25SDavid Green // CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
909*ca603d25SDavid Green // CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
910*ca603d25SDavid Green //
911207e5cccSFangrui Song float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
912207e5cccSFangrui Song   return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1);
913207e5cccSFangrui Song }
914