xref: /llvm-project/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll (revision 9ef15f4109890b2ad1e7ac714e4398d44b2087ff)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s --mattr=+complxnum,+neon -o - | FileCheck %s
3
4target triple = "aarch64"
5
6; a * b + c
7define <4 x double> @mull_add(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
8; CHECK-LABEL: mull_add:
9; CHECK:       // %bb.0: // %entry
10; CHECK-NEXT:    fcmla v4.2d, v0.2d, v2.2d, #0
11; CHECK-NEXT:    fcmla v5.2d, v1.2d, v3.2d, #0
12; CHECK-NEXT:    fcmla v4.2d, v0.2d, v2.2d, #90
13; CHECK-NEXT:    fcmla v5.2d, v1.2d, v3.2d, #90
14; CHECK-NEXT:    mov v0.16b, v4.16b
15; CHECK-NEXT:    mov v1.16b, v5.16b
16; CHECK-NEXT:    ret
17entry:
18  %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
19  %strided.vec28 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
20  %strided.vec30 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
21  %strided.vec31 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
22  %0 = fmul fast <2 x double> %strided.vec31, %strided.vec
23  %1 = fmul fast <2 x double> %strided.vec30, %strided.vec28
24  %2 = fadd fast <2 x double> %0, %1
25  %3 = fmul fast <2 x double> %strided.vec30, %strided.vec
26  %strided.vec33 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
27  %strided.vec34 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
28  %4 = fadd fast <2 x double> %strided.vec33, %3
29  %5 = fmul fast <2 x double> %strided.vec31, %strided.vec28
30  %6 = fsub fast <2 x double> %4, %5
31  %7 = fadd fast <2 x double> %2, %strided.vec34
32  %interleaved.vec = shufflevector <2 x double> %6, <2 x double> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
33  ret <4 x double> %interleaved.vec
34}
35
36; a * b + c * d
37define <4 x double> @mul_add_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
38; CHECK-LABEL: mul_add_mull:
39; CHECK:       // %bb.0: // %entry
40; CHECK-NEXT:    movi v16.2d, #0000000000000000
41; CHECK-NEXT:    movi v17.2d, #0000000000000000
42; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #0
43; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #0
44; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #0
45; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #0
46; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #90
47; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #90
48; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #90
49; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #90
50; CHECK-NEXT:    mov v0.16b, v17.16b
51; CHECK-NEXT:    mov v1.16b, v16.16b
52; CHECK-NEXT:    ret
53entry:
54  %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
55  %strided.vec51 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
56  %strided.vec53 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
57  %strided.vec54 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
58  %0 = fmul fast <2 x double> %strided.vec54, %strided.vec
59  %1 = fmul fast <2 x double> %strided.vec53, %strided.vec51
60  %2 = fmul fast <2 x double> %strided.vec53, %strided.vec
61  %3 = fmul fast <2 x double> %strided.vec54, %strided.vec51
62  %strided.vec56 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
63  %strided.vec57 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
64  %strided.vec59 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
65  %strided.vec60 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
66  %4 = fmul fast <2 x double> %strided.vec60, %strided.vec56
67  %5 = fmul fast <2 x double> %strided.vec59, %strided.vec57
68  %6 = fmul fast <2 x double> %strided.vec59, %strided.vec56
69  %7 = fmul fast <2 x double> %strided.vec60, %strided.vec57
70  %8 = fadd fast <2 x double> %7, %3
71  %9 = fadd fast <2 x double> %6, %2
72  %10 = fsub fast <2 x double> %9, %8
73  %11 = fadd fast <2 x double> %0, %1
74  %12 = fadd fast <2 x double> %11, %5
75  %13 = fadd fast <2 x double> %12, %4
76  %interleaved.vec = shufflevector <2 x double> %10, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
77  ret <4 x double> %interleaved.vec
78}
79
80; a * b - c * d
81define <4 x double> @mul_sub_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
82; CHECK-LABEL: mul_sub_mull:
83; CHECK:       // %bb.0: // %entry
84; CHECK-NEXT:    movi v16.2d, #0000000000000000
85; CHECK-NEXT:    movi v17.2d, #0000000000000000
86; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #270
87; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #270
88; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #0
89; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #0
90; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #180
91; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #180
92; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #90
93; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #90
94; CHECK-NEXT:    mov v0.16b, v17.16b
95; CHECK-NEXT:    mov v1.16b, v16.16b
96; CHECK-NEXT:    ret
97entry:
98  %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
99  %strided.vec53 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
100  %strided.vec55 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
101  %strided.vec56 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
102  %0 = fmul fast <2 x double> %strided.vec56, %strided.vec
103  %1 = fmul fast <2 x double> %strided.vec55, %strided.vec53
104  %2 = fmul fast <2 x double> %strided.vec55, %strided.vec
105  %3 = fmul fast <2 x double> %strided.vec56, %strided.vec53
106  %strided.vec58 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
107  %strided.vec59 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
108  %strided.vec61 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
109  %strided.vec62 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
110  %4 = fmul fast <2 x double> %strided.vec62, %strided.vec59
111  %5 = fmul fast <2 x double> %strided.vec61, %strided.vec58
112  %6 = fadd fast <2 x double> %5, %3
113  %7 = fadd fast <2 x double> %4, %2
114  %8 = fsub fast <2 x double> %7, %6
115  %9 = fmul fast <2 x double> %strided.vec61, %strided.vec59
116  %10 = fmul fast <2 x double> %strided.vec62, %strided.vec58
117  %11 = fadd fast <2 x double> %10, %9
118  %12 = fadd fast <2 x double> %0, %1
119  %13 = fsub fast <2 x double> %12, %11
120  %interleaved.vec = shufflevector <2 x double> %8, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
121  ret <4 x double> %interleaved.vec
122}
123
124; a * b + conj(c) * d
125define <4 x double> @mul_conj_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
126; CHECK-LABEL: mul_conj_mull:
127; CHECK:       // %bb.0: // %entry
128; CHECK-NEXT:    movi v16.2d, #0000000000000000
129; CHECK-NEXT:    movi v17.2d, #0000000000000000
130; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #0
131; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #0
132; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #90
133; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #90
134; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #0
135; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #0
136; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #270
137; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #270
138; CHECK-NEXT:    mov v0.16b, v17.16b
139; CHECK-NEXT:    mov v1.16b, v16.16b
140; CHECK-NEXT:    ret
141entry:
142  %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
143  %strided.vec59 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
144  %strided.vec61 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
145  %strided.vec62 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
146  %0 = fmul fast <2 x double> %strided.vec62, %strided.vec
147  %1 = fmul fast <2 x double> %strided.vec61, %strided.vec59
148  %2 = fmul fast <2 x double> %strided.vec61, %strided.vec
149  %strided.vec64 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
150  %strided.vec65 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
151  %strided.vec67 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
152  %strided.vec68 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
153  %3 = fmul fast <2 x double> %strided.vec68, %strided.vec64
154  %4 = fmul fast <2 x double> %strided.vec67, %strided.vec64
155  %5 = fmul fast <2 x double> %strided.vec68, %strided.vec65
156  %6 = fmul fast <2 x double> %strided.vec62, %strided.vec59
157  %7 = fsub fast <2 x double> %2, %6
158  %8 = fadd fast <2 x double> %7, %4
159  %9 = fadd fast <2 x double> %8, %5
160  %10 = fadd fast <2 x double> %0, %1
161  %11 = fmul fast <2 x double> %strided.vec67, %strided.vec65
162  %12 = fsub fast <2 x double> %10, %11
163  %13 = fadd fast <2 x double> %12, %3
164  %interleaved.vec = shufflevector <2 x double> %9, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
165  ret <4 x double> %interleaved.vec
166}
167
168; a + b + 1i * c * d
169define <4 x double> @mul_add_rot_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
170; CHECK-LABEL: mul_add_rot_mull:
171; CHECK:       // %bb.0: // %entry
172; CHECK-NEXT:    zip2 v16.2d, v2.2d, v3.2d
173; CHECK-NEXT:    zip2 v17.2d, v0.2d, v1.2d
174; CHECK-NEXT:    zip1 v2.2d, v2.2d, v3.2d
175; CHECK-NEXT:    zip2 v18.2d, v4.2d, v5.2d
176; CHECK-NEXT:    zip1 v19.2d, v6.2d, v7.2d
177; CHECK-NEXT:    zip1 v0.2d, v0.2d, v1.2d
178; CHECK-NEXT:    zip1 v1.2d, v4.2d, v5.2d
179; CHECK-NEXT:    zip2 v5.2d, v6.2d, v7.2d
180; CHECK-NEXT:    fmul v3.2d, v16.2d, v17.2d
181; CHECK-NEXT:    fmul v4.2d, v2.2d, v17.2d
182; CHECK-NEXT:    fmla v3.2d, v18.2d, v19.2d
183; CHECK-NEXT:    fmla v4.2d, v0.2d, v16.2d
184; CHECK-NEXT:    fmla v3.2d, v1.2d, v5.2d
185; CHECK-NEXT:    fmla v4.2d, v1.2d, v19.2d
186; CHECK-NEXT:    fneg v3.2d, v3.2d
187; CHECK-NEXT:    fmls v4.2d, v18.2d, v5.2d
188; CHECK-NEXT:    fmla v3.2d, v0.2d, v2.2d
189; CHECK-NEXT:    zip1 v0.2d, v3.2d, v4.2d
190; CHECK-NEXT:    zip2 v1.2d, v3.2d, v4.2d
191; CHECK-NEXT:    ret
192entry:
193  %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
194  %strided.vec79 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
195  %strided.vec81 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
196  %strided.vec82 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
197  %0 = fmul fast <2 x double> %strided.vec82, %strided.vec
198  %1 = fmul fast <2 x double> %strided.vec81, %strided.vec79
199  %2 = fmul fast <2 x double> %strided.vec81, %strided.vec
200  %3 = fmul fast <2 x double> %strided.vec82, %strided.vec79
201  %strided.vec84 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
202  %strided.vec85 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
203  %strided.vec87 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
204  %strided.vec88 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
205  %4 = fmul fast <2 x double> %strided.vec87, %strided.vec84
206  %5 = fmul fast <2 x double> %strided.vec87, %strided.vec85
207  %6 = fmul fast <2 x double> %strided.vec88, %strided.vec84
208  %7 = fadd fast <2 x double> %5, %3
209  %8 = fadd fast <2 x double> %7, %6
210  %9 = fsub fast <2 x double> %2, %8
211  %10 = fadd fast <2 x double> %0, %1
212  %11 = fadd fast <2 x double> %10, %4
213  %12 = fmul fast <2 x double> %strided.vec88, %strided.vec85
214  %13 = fsub fast <2 x double> %11, %12
215  %interleaved.vec = shufflevector <2 x double> %9, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
216  ret <4 x double> %interleaved.vec
217}
218