xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vcmla.ll (revision 9b016e3cb2859ef06f0301ebbc48df294b2356dc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4declare <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32, <8 x half>, <8 x half>, <8 x half>)
5declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>)
6declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>)
7declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>)
8
9
10define arm_aapcs_vfpcc <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
11; CHECK-LABEL: reassoc_f32x4:
12; CHECK:       @ %bb.0: @ %entry
13; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #0
14; CHECK-NEXT:    bx lr
15entry:
16  %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
17  %res = fadd fast <4 x float> %d, %a
18  ret <4 x float> %res
19}
20
21define arm_aapcs_vfpcc <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
22; CHECK-LABEL: reassoc_c_f32x4:
23; CHECK:       @ %bb.0: @ %entry
24; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #90
25; CHECK-NEXT:    bx lr
26entry:
27  %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
28  %res = fadd fast <4 x float> %a, %d
29  ret <4 x float> %res
30}
31
32define arm_aapcs_vfpcc <8 x half> @reassoc_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
33; CHECK-LABEL: reassoc_f16x4:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #180
36; CHECK-NEXT:    bx lr
37entry:
38  %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 2, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c)
39  %res = fadd fast <8 x half> %d, %a
40  ret <8 x half> %res
41}
42
43define arm_aapcs_vfpcc <8 x half> @reassoc_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
44; CHECK-LABEL: reassoc_c_f16x4:
45; CHECK:       @ %bb.0: @ %entry
46; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #270
47; CHECK-NEXT:    bx lr
48entry:
49  %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 3, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c)
50  %res = fadd fast <8 x half> %a, %d
51  ret <8 x half> %res
52}
53
54define arm_aapcs_vfpcc <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
55; CHECK-LABEL: reassoc_nonfast_f32x4:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vmov.i32 q3, #0x0
58; CHECK-NEXT:    vcmla.f32 q3, q1, q2, #0
59; CHECK-NEXT:    vadd.f32 q0, q3, q0
60; CHECK-NEXT:    bx lr
61entry:
62  %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
63  %res = fadd <4 x float> %d, %a
64  ret <4 x float> %res
65}
66
67
68
69define arm_aapcs_vfpcc <4 x float> @muladd_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
70; CHECK-LABEL: muladd_f32x4:
71; CHECK:       @ %bb.0: @ %entry
72; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #0
73; CHECK-NEXT:    bx lr
74entry:
75  %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c)
76  %res = fadd fast <4 x float> %d, %a
77  ret <4 x float> %res
78}
79
80define arm_aapcs_vfpcc <4 x float> @muladd_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
81; CHECK-LABEL: muladd_c_f32x4:
82; CHECK:       @ %bb.0: @ %entry
83; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #90
84; CHECK-NEXT:    bx lr
85entry:
86  %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %b, <4 x float> %c)
87  %res = fadd fast <4 x float> %a, %d
88  ret <4 x float> %res
89}
90
91define arm_aapcs_vfpcc <8 x half> @muladd_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
92; CHECK-LABEL: muladd_f16x4:
93; CHECK:       @ %bb.0: @ %entry
94; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #180
95; CHECK-NEXT:    bx lr
96entry:
97  %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %b, <8 x half> %c)
98  %res = fadd fast <8 x half> %d, %a
99  ret <8 x half> %res
100}
101
102define arm_aapcs_vfpcc <8 x half> @muladd_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
103; CHECK-LABEL: muladd_c_f16x4:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #270
106; CHECK-NEXT:    bx lr
107entry:
108  %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %b, <8 x half> %c)
109  %res = fadd fast <8 x half> %a, %d
110  ret <8 x half> %res
111}
112
113define arm_aapcs_vfpcc <4 x float> @muladd_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
114; CHECK-LABEL: muladd_nonfast_f32x4:
115; CHECK:       @ %bb.0: @ %entry
116; CHECK-NEXT:    vcmul.f32 q3, q1, q2, #0
117; CHECK-NEXT:    vadd.f32 q0, q3, q0
118; CHECK-NEXT:    bx lr
119entry:
120  %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c)
121  %res = fadd <4 x float> %d, %a
122  ret <4 x float> %res
123}
124
125define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) {
126; CHECK-LABEL: same_register_f16:
127; CHECK:       @ %bb.0: @ %entry
128; CHECK-NEXT:    vcmla.f16 q0, q0, q0, #0
129; CHECK-NEXT:    bx lr
130entry:
131  %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a)
132  %res = fadd fast <8 x half> %d, %a
133  ret <8 x half> %res
134}
135
136define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) {
137; CHECK-LABEL: same_register_f32:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    vmov q1, q0
140; CHECK-NEXT:    vcmla.f32 q1, q0, q0, #0
141; CHECK-NEXT:    vmov q0, q1
142; CHECK-NEXT:    bx lr
143entry:
144  %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a)
145  %res = fadd fast <4 x float> %d, %a
146  ret <4 x float> %res
147}
148