xref: /llvm-project/llvm/test/CodeGen/AArch64/neon-vcmla.ll (revision 2360e53ec6fd5f55f6ab05522d7546c3647a59a2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s
3
4define <4 x half> @test_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
5; CHECK-LABEL: test_16x4:
6; CHECK:       // %bb.0: // %entry
7; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #0
8; CHECK-NEXT:    ret
9entry:
10  %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
11  ret <4 x half> %res
12}
13
14define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
15; CHECK-LABEL: test_16x4_lane_1:
16; CHECK:       // %bb.0: // %entry
17; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
18; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[1], #0
19; CHECK-NEXT:    ret
20entry:
21  %c.cast = bitcast <4 x half> %c to <2 x i32>
22  %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 1, i32 1>
23  %c.res = bitcast <2 x i32> %c.dup to <4 x half>
24  %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
25  ret <4 x half> %res
26}
27
28define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
29; CHECK-LABEL: test_rot90_16x4:
30; CHECK:       // %bb.0: // %entry
31; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #90
32; CHECK-NEXT:    ret
33entry:
34  %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
35  ret <4 x half> %res
36}
37
38define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
39; CHECK-LABEL: test_rot90_16x4_lane_0:
40; CHECK:       // %bb.0: // %entry
41; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
42; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[0], #90
43; CHECK-NEXT:    ret
44entry:
45  %c.cast = bitcast <4 x half> %c to <2 x i32>
46  %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 0, i32 0>
47  %c.res = bitcast <2 x i32> %c.dup to <4 x half>
48  %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
49  ret <4 x half> %res
50}
51
52define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
53; CHECK-LABEL: test_rot180_16x4:
54; CHECK:       // %bb.0: // %entry
55; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #180
56; CHECK-NEXT:    ret
57entry:
58  %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
59  ret <4 x half> %res
60}
61
62define <4 x half> @test_rot180_16x4_lane_0(<4 x half> %a, <4 x half> %b, <8 x half> %c) {
63; CHECK-LABEL: test_rot180_16x4_lane_0:
64; CHECK:       // %bb.0: // %entry
65; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[0], #180
66; CHECK-NEXT:    ret
67entry:
68
69  %c.cast = bitcast <8 x half> %c to <4 x i32>
70  %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <2 x i32> <i32 0, i32 0>
71  %c.res = bitcast <2 x i32> %c.dup to <4 x half>
72  %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
73  ret <4 x half> %res
74}
75
76define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
77; CHECK-LABEL: test_rot270_16x4:
78; CHECK:       // %bb.0: // %entry
79; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #270
80; CHECK-NEXT:    ret
81entry:
82  %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
83  ret <4 x half> %res
84}
85
86define <2 x float> @test_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
87; CHECK-LABEL: test_32x2:
88; CHECK:       // %bb.0: // %entry
89; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #0
90; CHECK-NEXT:    ret
91entry:
92  %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
93  ret <2 x float> %res
94}
95
96define <2 x float> @test_rot90_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
97; CHECK-LABEL: test_rot90_32x2:
98; CHECK:       // %bb.0: // %entry
99; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #90
100; CHECK-NEXT:    ret
101entry:
102  %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
103  ret <2 x float> %res
104}
105
106define <2 x float> @test_rot180_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
107; CHECK-LABEL: test_rot180_32x2:
108; CHECK:       // %bb.0: // %entry
109; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #180
110; CHECK-NEXT:    ret
111entry:
112  %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
113  ret <2 x float> %res
114}
115
116define <2 x float> @test_rot270_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
117; CHECK-LABEL: test_rot270_32x2:
118; CHECK:       // %bb.0: // %entry
119; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #270
120; CHECK-NEXT:    ret
121entry:
122  %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
123  ret <2 x float> %res
124}
125
126define <8 x half> @test_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
127; CHECK-LABEL: test_16x8:
128; CHECK:       // %bb.0: // %entry
129; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #0
130; CHECK-NEXT:    ret
131entry:
132  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
133  ret <8 x half> %res
134}
135
136define <8 x half> @test_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
137; CHECK-LABEL: test_16x8_lane_0:
138; CHECK:       // %bb.0: // %entry
139; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[0], #0
140; CHECK-NEXT:    ret
141entry:
142  %c.cast = bitcast <8 x half> %c to <4 x i32>
143  %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
144  %c.res = bitcast <4 x i32> %c.dup to <8 x half>
145  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
146  ret <8 x half> %res
147}
148
149define <8 x half> @test_rot90_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
150; CHECK-LABEL: test_rot90_16x8:
151; CHECK:       // %bb.0: // %entry
152; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #90
153; CHECK-NEXT:    ret
154entry:
155  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
156  ret <8 x half> %res
157}
158
159define <8 x half> @test_rot90_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
160; CHECK-LABEL: test_rot90_16x8_lane_1:
161; CHECK:       // %bb.0: // %entry
162; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[1], #90
163; CHECK-NEXT:    ret
164entry:
165  %c.cast = bitcast <8 x half> %c to <4 x i32>
166  %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
167  %c.res = bitcast <4 x i32> %c.dup to <8 x half>
168  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
169  ret <8 x half> %res
170}
171
172define <8 x half> @test_rot180_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
173; CHECK-LABEL: test_rot180_16x8:
174; CHECK:       // %bb.0: // %entry
175; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #180
176; CHECK-NEXT:    ret
177entry:
178  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
179  ret <8 x half> %res
180}
181
182define <8 x half> @test_rot180_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
183; CHECK-LABEL: test_rot180_16x8_lane_1:
184; CHECK:       // %bb.0: // %entry
185; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[1], #180
186; CHECK-NEXT:    ret
187entry:
188  %c.cast = bitcast <8 x half> %c to <4 x i32>
189  %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
190  %c.res = bitcast <4 x i32> %c.dup to <8 x half>
191  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
192  ret <8 x half> %res
193}
194
195define <8 x half> @test_rot270_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
196; CHECK-LABEL: test_rot270_16x8:
197; CHECK:       // %bb.0: // %entry
198; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #270
199; CHECK-NEXT:    ret
200entry:
201  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
202  ret <8 x half> %res
203}
204
205define <8 x half> @test_rot270_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
206; CHECK-LABEL: test_rot270_16x8_lane_0:
207; CHECK:       // %bb.0: // %entry
208; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[0], #270
209; CHECK-NEXT:    ret
210entry:
211  %c.cast = bitcast <8 x half> %c to <4 x i32>
212  %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
213  %c.res = bitcast <4 x i32> %c.dup to <8 x half>
214  %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
215  ret <8 x half> %res
216}
217
218define <4 x float> @test_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
219; CHECK-LABEL: test_32x4:
220; CHECK:       // %bb.0: // %entry
221; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #0
222; CHECK-NEXT:    ret
223entry:
224  %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
225  ret <4 x float> %res
226}
227
228define <4 x float> @test_32x4_lane_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
229; CHECK-LABEL: test_32x4_lane_0:
230; CHECK:       // %bb.0: // %entry
231; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.s[0], #0
232; CHECK-NEXT:    ret
233entry:
234  %c.cast = bitcast <4 x float> %c to <2 x i64>
235  %c.dup = shufflevector <2 x i64> %c.cast , <2 x i64> undef, <2 x i32> <i32 0, i32 0>
236  %c.res = bitcast <2 x i64> %c.dup to <4 x float>
237  %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c.res)
238  ret <4 x float> %res
239}
240
241define <4 x float> @test_rot90_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
242; CHECK-LABEL: test_rot90_32x4:
243; CHECK:       // %bb.0: // %entry
244; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #90
245; CHECK-NEXT:    ret
246entry:
247  %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
248  ret <4 x float> %res
249}
250
251define <4 x float> @test_rot180_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
252; CHECK-LABEL: test_rot180_32x4:
253; CHECK:       // %bb.0: // %entry
254; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #180
255; CHECK-NEXT:    ret
256entry:
257  %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
258  ret <4 x float> %res
259}
260
261define <4 x float> @test_rot270_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
262; CHECK-LABEL: test_rot270_32x4:
263; CHECK:       // %bb.0: // %entry
264; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #270
265; CHECK-NEXT:    ret
266entry:
267  %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
268  ret <4 x float> %res
269}
270
271define <2 x double> @test_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
272; CHECK-LABEL: test_64x2:
273; CHECK:       // %bb.0: // %entry
274; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #0
275; CHECK-NEXT:    ret
276entry:
277  %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
278  ret <2 x double> %res
279}
280
281define <2 x double> @test_rot90_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
282; CHECK-LABEL: test_rot90_64x2:
283; CHECK:       // %bb.0: // %entry
284; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #90
285; CHECK-NEXT:    ret
286entry:
287  %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
288  ret <2 x double> %res
289}
290
291define <2 x double> @test_rot180_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
292; CHECK-LABEL: test_rot180_64x2:
293; CHECK:       // %bb.0: // %entry
294; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #180
295; CHECK-NEXT:    ret
296entry:
297  %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
298  ret <2 x double> %res
299}
300
301define <2 x double> @test_rot270_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
302; CHECK-LABEL: test_rot270_64x2:
303; CHECK:       // %bb.0: // %entry
304; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #270
305; CHECK-NEXT:    ret
306entry:
307  %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
308  ret <2 x double> %res
309}
310
311define <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
312; CHECK-LABEL: reassoc_f32x4:
313; CHECK:       // %bb.0: // %entry
314; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #0
315; CHECK-NEXT:    ret
316entry:
317  %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
318  %res = fadd fast <4 x float> %d, %a
319  ret <4 x float> %res
320}
321
322define <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
323; CHECK-LABEL: reassoc_c_f32x4:
324; CHECK:       // %bb.0: // %entry
325; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #90
326; CHECK-NEXT:    ret
327entry:
328  %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
329  %res = fadd fast <4 x float> %a, %d
330  ret <4 x float> %res
331}
332
333define <4 x half> @reassoc_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
334; CHECK-LABEL: reassoc_f16x4:
335; CHECK:       // %bb.0: // %entry
336; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #180
337; CHECK-NEXT:    ret
338entry:
339  %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c)
340  %res = fadd fast <4 x half> %d, %a
341  ret <4 x half> %res
342}
343
344define <4 x half> @reassoc_c_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
345; CHECK-LABEL: reassoc_c_f16x4:
346; CHECK:       // %bb.0: // %entry
347; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #270
348; CHECK-NEXT:    ret
349entry:
350  %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c)
351  %res = fadd fast <4 x half> %a, %d
352  ret <4 x half> %res
353}
354
355define <2 x double> @reassoc_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) {
356; CHECK-LABEL: reassoc_f64x2:
357; CHECK:       // %bb.0: // %entry
358; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #270
359; CHECK-NEXT:    fcmla v0.2d, v2.2d, v3.2d, #270
360; CHECK-NEXT:    ret
361entry:
362  %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
363  %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> zeroinitializer, <2 x double> %c, <2 x double> %g)
364  %res = fadd fast <2 x double> %e, %d
365  ret <2 x double> %res
366}
367
368define <2 x double> @reassoc_c_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) {
369; CHECK-LABEL: reassoc_c_f64x2:
370; CHECK:       // %bb.0: // %entry
371; CHECK-NEXT:    fadd v0.2d, v0.2d, v0.2d
372; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #270
373; CHECK-NEXT:    fcmla v0.2d, v2.2d, v3.2d, #270
374; CHECK-NEXT:    ret
375entry:
376  %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
377  %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %c, <2 x double> %g)
378  %res = fadd fast <2 x double> %e, %d
379  ret <2 x double> %res
380}
381
382define <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
383; CHECK-LABEL: reassoc_nonfast_f32x4:
384; CHECK:       // %bb.0: // %entry
385; CHECK-NEXT:    movi v3.2d, #0000000000000000
386; CHECK-NEXT:    fcmla v3.4s, v1.4s, v2.4s, #0
387; CHECK-NEXT:    fadd v0.4s, v3.4s, v0.4s
388; CHECK-NEXT:    ret
389entry:
390  %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
391  %res = fadd <4 x float> %d, %a
392  ret <4 x float> %res
393}
394
395declare <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half>, <4 x half>, <4 x half>)
396declare <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half>, <4 x half>, <4 x half>)
397declare <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half>, <4 x half>, <4 x half>)
398declare <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half>, <4 x half>, <4 x half>)
399declare <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half>, <8 x half>, <8 x half>)
400declare <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half>, <8 x half>, <8 x half>)
401declare <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half>, <8 x half>, <8 x half>)
402declare <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half>, <8 x half>, <8 x half>)
403declare <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float>, <2 x float>, <2 x float>)
404declare <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float>, <2 x float>, <2 x float>)
405declare <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float>, <2 x float>, <2 x float>)
406declare <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float>, <2 x float>, <2 x float>)
407declare <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float>, <4 x float>, <4 x float>)
408declare <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float>, <4 x float>, <4 x float>)
409declare <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float>, <4 x float>, <4 x float>)
410declare <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float>, <4 x float>, <4 x float>)
411declare <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double>, <2 x double>, <2 x double>)
412declare <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double>, <2 x double>, <2 x double>)
413declare <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double>, <2 x double>, <2 x double>)
414declare <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double>, <2 x double>, <2 x double>)
415