xref: /llvm-project/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll (revision 9ef15f4109890b2ad1e7ac714e4398d44b2087ff)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
3
4target triple = "aarch64"
5
6; Expected to transform
7define <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) {
8; CHECK-LABEL: complex_mul_v2f32:
9; CHECK:       // %bb.0: // %entry
10; CHECK-NEXT:    movi d2, #0000000000000000
11; CHECK-NEXT:    fcmla v2.2s, v1.2s, v0.2s, #0
12; CHECK-NEXT:    fcmla v2.2s, v1.2s, v0.2s, #90
13; CHECK-NEXT:    fmov d0, d2
14; CHECK-NEXT:    ret
15entry:
16  %a.real   = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 0>
17  %a.imag = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 1>
18  %b.real = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 0>
19  %b.imag = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 1>
20  %0 = fmul fast <1 x float> %b.imag, %a.real
21  %1 = fmul fast <1 x float> %b.real, %a.imag
22  %2 = fadd fast <1 x float> %1, %0
23  %3 = fmul fast <1 x float> %b.real, %a.real
24  %4 = fmul fast <1 x float> %a.imag, %b.imag
25  %5 = fsub fast <1 x float> %3, %4
26  %interleaved.vec = shufflevector <1 x float> %5, <1 x float> %2, <2 x i32> <i32 0, i32 1>
27  ret <2 x float> %interleaved.vec
28}
29
30; Expected to transform
31define <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) {
32; CHECK-LABEL: complex_mul_v4f32:
33; CHECK:       // %bb.0: // %entry
34; CHECK-NEXT:    movi v2.2d, #0000000000000000
35; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #0
36; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #90
37; CHECK-NEXT:    mov v0.16b, v2.16b
38; CHECK-NEXT:    ret
39entry:
40  %a.real   = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
41  %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
42  %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
43  %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
44  %0 = fmul fast <2 x float> %b.imag, %a.real
45  %1 = fmul fast <2 x float> %b.real, %a.imag
46  %2 = fadd fast <2 x float> %1, %0
47  %3 = fmul fast <2 x float> %b.real, %a.real
48  %4 = fmul fast <2 x float> %a.imag, %b.imag
49  %5 = fsub fast <2 x float> %3, %4
50  %interleaved.vec = shufflevector <2 x float> %5, <2 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
51  ret <4 x float> %interleaved.vec
52}
53
54; Expected to transform
55define <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) {
56; CHECK-LABEL: complex_mul_v8f32:
57; CHECK:       // %bb.0: // %entry
58; CHECK-NEXT:    movi v4.2d, #0000000000000000
59; CHECK-NEXT:    movi v5.2d, #0000000000000000
60; CHECK-NEXT:    fcmla v5.4s, v2.4s, v0.4s, #0
61; CHECK-NEXT:    fcmla v4.4s, v3.4s, v1.4s, #0
62; CHECK-NEXT:    fcmla v5.4s, v2.4s, v0.4s, #90
63; CHECK-NEXT:    fcmla v4.4s, v3.4s, v1.4s, #90
64; CHECK-NEXT:    mov v0.16b, v5.16b
65; CHECK-NEXT:    mov v1.16b, v4.16b
66; CHECK-NEXT:    ret
67entry:
68  %a.real   = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
69  %a.imag = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
70  %b.real = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
71  %b.imag = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
72  %0 = fmul fast <4 x float> %b.imag, %a.real
73  %1 = fmul fast <4 x float> %b.real, %a.imag
74  %2 = fadd fast <4 x float> %1, %0
75  %3 = fmul fast <4 x float> %b.real, %a.real
76  %4 = fmul fast <4 x float> %a.imag, %b.imag
77  %5 = fsub fast <4 x float> %3, %4
78  %interleaved.vec = shufflevector <4 x float> %5, <4 x float> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
79  ret <8 x float> %interleaved.vec
80}
81
82; Expected to transform
83define <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) {
84; CHECK-LABEL: complex_mul_v16f32:
85; CHECK:       // %bb.0: // %entry
86; CHECK-NEXT:    movi v16.2d, #0000000000000000
87; CHECK-NEXT:    movi v17.2d, #0000000000000000
88; CHECK-NEXT:    movi v18.2d, #0000000000000000
89; CHECK-NEXT:    movi v19.2d, #0000000000000000
90; CHECK-NEXT:    fcmla v16.4s, v4.4s, v0.4s, #0
91; CHECK-NEXT:    fcmla v18.4s, v5.4s, v1.4s, #0
92; CHECK-NEXT:    fcmla v17.4s, v7.4s, v3.4s, #0
93; CHECK-NEXT:    fcmla v19.4s, v6.4s, v2.4s, #0
94; CHECK-NEXT:    fcmla v16.4s, v4.4s, v0.4s, #90
95; CHECK-NEXT:    fcmla v18.4s, v5.4s, v1.4s, #90
96; CHECK-NEXT:    fcmla v17.4s, v7.4s, v3.4s, #90
97; CHECK-NEXT:    fcmla v19.4s, v6.4s, v2.4s, #90
98; CHECK-NEXT:    mov v0.16b, v16.16b
99; CHECK-NEXT:    mov v1.16b, v18.16b
100; CHECK-NEXT:    mov v3.16b, v17.16b
101; CHECK-NEXT:    mov v2.16b, v19.16b
102; CHECK-NEXT:    ret
103entry:
104  %a.real   = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
105  %a.imag = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
106  %b.real = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
107  %b.imag = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
108  %0 = fmul fast <8 x float> %b.imag, %a.real
109  %1 = fmul fast <8 x float> %b.real, %a.imag
110  %2 = fadd fast <8 x float> %1, %0
111  %3 = fmul fast <8 x float> %b.real, %a.real
112  %4 = fmul fast <8 x float> %a.imag, %b.imag
113  %5 = fsub fast <8 x float> %3, %4
114  %interleaved.vec = shufflevector <8 x float> %5, <8 x float> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
115  ret <16 x float> %interleaved.vec
116}
117