xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-f32-mul.ll (revision d52e2839f3b1a21d4a6090ccff6f4b7f1f89a1b3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s --mattr=+mve.fp -o - | FileCheck %s
3
4target triple = "thumbv8.1m.main-none-none-eabi"
5
6; Expected to not transform
7define arm_aapcs_vfpcc <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) {
8; CHECK-LABEL: complex_mul_v2f32:
9; CHECK:       @ %bb.0: @ %entry
10; CHECK-NEXT:    vmul.f32 s9, s5, s0
11; CHECK-NEXT:    vmul.f32 s8, s1, s5
12; CHECK-NEXT:    vfma.f32 s9, s4, s1
13; CHECK-NEXT:    vfnms.f32 s8, s4, s0
14; CHECK-NEXT:    vmov q0, q2
15; CHECK-NEXT:    bx lr
16entry:
17  %a.real   = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 0>
18  %a.imag = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 1>
19  %b.real = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 0>
20  %b.imag = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 1>
21  %0 = fmul fast <1 x float> %b.imag, %a.real
22  %1 = fmul fast <1 x float> %b.real, %a.imag
23  %2 = fadd fast <1 x float> %1, %0
24  %3 = fmul fast <1 x float> %b.real, %a.real
25  %4 = fmul fast <1 x float> %a.imag, %b.imag
26  %5 = fsub fast <1 x float> %3, %4
27  %interleaved.vec = shufflevector <1 x float> %5, <1 x float> %2, <2 x i32> <i32 0, i32 1>
28  ret <2 x float> %interleaved.vec
29}
30
31; Expected to transform
32define arm_aapcs_vfpcc <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) {
33; CHECK-LABEL: complex_mul_v4f32:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #0
36; CHECK-NEXT:    vcmla.f32 q2, q0, q1, #90
37; CHECK-NEXT:    vmov q0, q2
38; CHECK-NEXT:    bx lr
39entry:
40  %a.real   = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
41  %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
42  %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
43  %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
44  %0 = fmul fast <2 x float> %b.imag, %a.real
45  %1 = fmul fast <2 x float> %b.real, %a.imag
46  %2 = fadd fast <2 x float> %1, %0
47  %3 = fmul fast <2 x float> %b.real, %a.real
48  %4 = fmul fast <2 x float> %a.imag, %b.imag
49  %5 = fsub fast <2 x float> %3, %4
50  %interleaved.vec = shufflevector <2 x float> %5, <2 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
51  ret <4 x float> %interleaved.vec
52}
53
54; Expected to transform
55define arm_aapcs_vfpcc <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) {
56; CHECK-LABEL: complex_mul_v8f32:
57; CHECK:       @ %bb.0: @ %entry
58; CHECK-NEXT:    .vsave {d8, d9}
59; CHECK-NEXT:    vpush {d8, d9}
60; CHECK-NEXT:    vcmul.f32 q4, q0, q2, #0
61; CHECK-NEXT:    vcmla.f32 q4, q0, q2, #90
62; CHECK-NEXT:    vcmul.f32 q2, q1, q3, #0
63; CHECK-NEXT:    vcmla.f32 q2, q1, q3, #90
64; CHECK-NEXT:    vmov q0, q4
65; CHECK-NEXT:    vmov q1, q2
66; CHECK-NEXT:    vpop {d8, d9}
67; CHECK-NEXT:    bx lr
68entry:
69  %a.real   = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
70  %a.imag = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
71  %b.real = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
72  %b.imag = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
73  %0 = fmul fast <4 x float> %b.imag, %a.real
74  %1 = fmul fast <4 x float> %b.real, %a.imag
75  %2 = fadd fast <4 x float> %1, %0
76  %3 = fmul fast <4 x float> %b.real, %a.real
77  %4 = fmul fast <4 x float> %a.imag, %b.imag
78  %5 = fsub fast <4 x float> %3, %4
79  %interleaved.vec = shufflevector <4 x float> %5, <4 x float> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
80  ret <8 x float> %interleaved.vec
81}
82
83; Expected to transform
84define arm_aapcs_vfpcc <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) {
85; CHECK-LABEL: complex_mul_v16f32:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
88; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
89; CHECK-NEXT:    add r3, sp, #64
90; CHECK-NEXT:    add r2, sp, #80
91; CHECK-NEXT:    vldrw.u32 q5, [r3]
92; CHECK-NEXT:    add r1, sp, #96
93; CHECK-NEXT:    add r0, sp, #112
94; CHECK-NEXT:    vcmul.f32 q4, q0, q5, #0
95; CHECK-NEXT:    vcmla.f32 q4, q0, q5, #90
96; CHECK-NEXT:    vldrw.u32 q0, [r2]
97; CHECK-NEXT:    vcmul.f32 q5, q1, q0, #0
98; CHECK-NEXT:    vcmla.f32 q5, q1, q0, #90
99; CHECK-NEXT:    vldrw.u32 q0, [r1]
100; CHECK-NEXT:    vmov q1, q5
101; CHECK-NEXT:    vcmul.f32 q6, q2, q0, #0
102; CHECK-NEXT:    vcmla.f32 q6, q2, q0, #90
103; CHECK-NEXT:    vldrw.u32 q0, [r0]
104; CHECK-NEXT:    vmov q2, q6
105; CHECK-NEXT:    vcmul.f32 q7, q3, q0, #0
106; CHECK-NEXT:    vcmla.f32 q7, q3, q0, #90
107; CHECK-NEXT:    vmov q0, q4
108; CHECK-NEXT:    vmov q3, q7
109; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
110; CHECK-NEXT:    bx lr
111entry:
112  %a.real   = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
113  %a.imag = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
114  %b.real = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
115  %b.imag = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
116  %0 = fmul fast <8 x float> %b.imag, %a.real
117  %1 = fmul fast <8 x float> %b.real, %a.imag
118  %2 = fadd fast <8 x float> %1, %0
119  %3 = fmul fast <8 x float> %b.real, %a.real
120  %4 = fmul fast <8 x float> %a.imag, %b.imag
121  %5 = fsub fast <8 x float> %3, %4
122  %interleaved.vec = shufflevector <8 x float> %5, <8 x float> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
123  ret <16 x float> %interleaved.vec
124}
125