xref: /llvm-project/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll (revision bfc0317153dca75137fba00b5c28758d6f720963)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s --mattr=+sve -o - | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6; a * b + c
7define <vscale x 4 x double> @mull_add(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c) {
8; CHECK-LABEL: mull_add:
9; CHECK:       // %bb.0: // %entry
10; CHECK-NEXT:    uzp2 z6.d, z0.d, z1.d
11; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
12; CHECK-NEXT:    uzp2 z1.d, z2.d, z3.d
13; CHECK-NEXT:    uzp1 z2.d, z2.d, z3.d
14; CHECK-NEXT:    ptrue p0.d
15; CHECK-NEXT:    fmul z7.d, z0.d, z1.d
16; CHECK-NEXT:    fmul z1.d, z6.d, z1.d
17; CHECK-NEXT:    movprfx z3, z7
18; CHECK-NEXT:    fmla z3.d, p0/m, z6.d, z2.d
19; CHECK-NEXT:    fnmsb z0.d, p0/m, z2.d, z1.d
20; CHECK-NEXT:    uzp2 z1.d, z4.d, z5.d
21; CHECK-NEXT:    uzp1 z2.d, z4.d, z5.d
22; CHECK-NEXT:    fadd z2.d, z2.d, z0.d
23; CHECK-NEXT:    fadd z1.d, z3.d, z1.d
24; CHECK-NEXT:    zip1 z0.d, z2.d, z1.d
25; CHECK-NEXT:    zip2 z1.d, z2.d, z1.d
26; CHECK-NEXT:    ret
27entry:
28  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
29  %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
30  %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
31  %strided.vec29 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
32  %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec29, 0
33  %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec29, 1
34  %4 = fmul contract <vscale x 2 x double> %0, %3
35  %5 = fmul contract <vscale x 2 x double> %1, %2
36  %6 = fadd contract <vscale x 2 x double> %5, %4
37  %7 = fmul contract <vscale x 2 x double> %0, %2
38  %8 = fmul contract <vscale x 2 x double> %1, %3
39  %9 = fsub contract <vscale x 2 x double> %7, %8
40  %strided.vec31 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
41  %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec31, 0
42  %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec31, 1
43  %12 = fadd contract <vscale x 2 x double> %10, %9
44  %13 = fadd contract <vscale x 2 x double> %6, %11
45  %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %12, <vscale x 2 x double> %13)
46  ret <vscale x 4 x double> %interleaved.vec
47}
48
49; a * b + c * d
50define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
51; CHECK-LABEL: mul_add_mull:
52; CHECK:       // %bb.0: // %entry
53; CHECK-NEXT:    mov z24.d, #0 // =0x0
54; CHECK-NEXT:    ptrue p0.d
55; CHECK-NEXT:    mov z25.d, z24.d
56; CHECK-NEXT:    mov z26.d, z24.d
57; CHECK-NEXT:    mov z27.d, z24.d
58; CHECK-NEXT:    fcmla z24.d, p0/m, z7.d, z5.d, #0
59; CHECK-NEXT:    fcmla z25.d, p0/m, z2.d, z0.d, #0
60; CHECK-NEXT:    fcmla z26.d, p0/m, z3.d, z1.d, #0
61; CHECK-NEXT:    fcmla z27.d, p0/m, z6.d, z4.d, #0
62; CHECK-NEXT:    fcmla z24.d, p0/m, z7.d, z5.d, #90
63; CHECK-NEXT:    fcmla z25.d, p0/m, z2.d, z0.d, #90
64; CHECK-NEXT:    fcmla z26.d, p0/m, z3.d, z1.d, #90
65; CHECK-NEXT:    fcmla z27.d, p0/m, z6.d, z4.d, #90
66; CHECK-NEXT:    fadd z1.d, z26.d, z24.d
67; CHECK-NEXT:    fadd z0.d, z25.d, z27.d
68; CHECK-NEXT:    ret
69entry:
70  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
71  %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
72  %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
73  %strided.vec52 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
74  %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 0
75  %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 1
76  %4 = fmul contract <vscale x 2 x double> %0, %3
77  %5 = fmul contract <vscale x 2 x double> %1, %2
78  %6 = fadd contract <vscale x 2 x double> %5, %4
79  %7 = fmul contract <vscale x 2 x double> %0, %2
80  %8 = fmul contract <vscale x 2 x double> %1, %3
81  %9 = fsub contract <vscale x 2 x double> %7, %8
82  %strided.vec54 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
83  %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 0
84  %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 1
85  %strided.vec56 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
86  %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 0
87  %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 1
88  %14 = fmul contract <vscale x 2 x double> %10, %13
89  %15 = fmul contract <vscale x 2 x double> %11, %12
90  %16 = fadd contract <vscale x 2 x double> %15, %14
91  %17 = fmul contract <vscale x 2 x double> %10, %12
92  %18 = fmul contract <vscale x 2 x double> %11, %13
93  %19 = fsub contract <vscale x 2 x double> %17, %18
94  %20 = fadd contract <vscale x 2 x double> %9, %19
95  %21 = fadd contract <vscale x 2 x double> %6, %16
96  %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21)
97  ret <vscale x 4 x double> %interleaved.vec
98}
99
100; a * b - c * d
101define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
102; CHECK-LABEL: mul_sub_mull:
103; CHECK:       // %bb.0: // %entry
104; CHECK-NEXT:    mov z24.d, #0 // =0x0
105; CHECK-NEXT:    ptrue p0.d
106; CHECK-NEXT:    mov z25.d, z24.d
107; CHECK-NEXT:    mov z26.d, z24.d
108; CHECK-NEXT:    mov z27.d, z24.d
109; CHECK-NEXT:    fcmla z24.d, p0/m, z7.d, z5.d, #0
110; CHECK-NEXT:    fcmla z25.d, p0/m, z2.d, z0.d, #0
111; CHECK-NEXT:    fcmla z26.d, p0/m, z3.d, z1.d, #0
112; CHECK-NEXT:    fcmla z27.d, p0/m, z6.d, z4.d, #0
113; CHECK-NEXT:    fcmla z24.d, p0/m, z7.d, z5.d, #90
114; CHECK-NEXT:    fcmla z25.d, p0/m, z2.d, z0.d, #90
115; CHECK-NEXT:    fcmla z26.d, p0/m, z3.d, z1.d, #90
116; CHECK-NEXT:    fcmla z27.d, p0/m, z6.d, z4.d, #90
117; CHECK-NEXT:    fsub z1.d, z26.d, z24.d
118; CHECK-NEXT:    fsub z0.d, z25.d, z27.d
119; CHECK-NEXT:    ret
120entry:
121  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
122  %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
123  %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
124  %strided.vec52 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
125  %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 0
126  %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 1
127  %4 = fmul contract <vscale x 2 x double> %0, %3
128  %5 = fmul contract <vscale x 2 x double> %1, %2
129  %6 = fadd contract <vscale x 2 x double> %5, %4
130  %7 = fmul contract <vscale x 2 x double> %0, %2
131  %8 = fmul contract <vscale x 2 x double> %1, %3
132  %9 = fsub contract <vscale x 2 x double> %7, %8
133  %strided.vec54 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
134  %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 0
135  %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 1
136  %strided.vec56 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
137  %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 0
138  %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 1
139  %14 = fmul contract <vscale x 2 x double> %10, %13
140  %15 = fmul contract <vscale x 2 x double> %11, %12
141  %16 = fadd contract <vscale x 2 x double> %15, %14
142  %17 = fmul contract <vscale x 2 x double> %10, %12
143  %18 = fmul contract <vscale x 2 x double> %11, %13
144  %19 = fsub contract <vscale x 2 x double> %17, %18
145  %20 = fsub contract <vscale x 2 x double> %9, %19
146  %21 = fsub contract <vscale x 2 x double> %6, %16
147  %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21)
148  ret <vscale x 4 x double> %interleaved.vec
149}
150
151; a * b + conj(c) * d
152define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
153; CHECK-LABEL: mul_conj_mull:
154; CHECK:       // %bb.0: // %entry
155; CHECK-NEXT:    mov z24.d, #0 // =0x0
156; CHECK-NEXT:    ptrue p0.d
157; CHECK-NEXT:    mov z25.d, z24.d
158; CHECK-NEXT:    mov z26.d, z24.d
159; CHECK-NEXT:    mov z27.d, z24.d
160; CHECK-NEXT:    fcmla z24.d, p0/m, z5.d, z7.d, #0
161; CHECK-NEXT:    fcmla z25.d, p0/m, z2.d, z0.d, #0
162; CHECK-NEXT:    fcmla z26.d, p0/m, z3.d, z1.d, #0
163; CHECK-NEXT:    fcmla z27.d, p0/m, z4.d, z6.d, #0
164; CHECK-NEXT:    fcmla z24.d, p0/m, z5.d, z7.d, #270
165; CHECK-NEXT:    fcmla z25.d, p0/m, z2.d, z0.d, #90
166; CHECK-NEXT:    fcmla z26.d, p0/m, z3.d, z1.d, #90
167; CHECK-NEXT:    fcmla z27.d, p0/m, z4.d, z6.d, #270
168; CHECK-NEXT:    fadd z1.d, z26.d, z24.d
169; CHECK-NEXT:    fadd z0.d, z25.d, z27.d
170; CHECK-NEXT:    ret
171entry:
172  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
173  %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
174  %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
175  %strided.vec60 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
176  %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec60, 0
177  %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec60, 1
178  %4 = fmul contract <vscale x 2 x double> %0, %3
179  %5 = fmul contract <vscale x 2 x double> %1, %2
180  %6 = fadd contract <vscale x 2 x double> %5, %4
181  %7 = fmul contract <vscale x 2 x double> %0, %2
182  %8 = fmul contract <vscale x 2 x double> %1, %3
183  %9 = fsub contract <vscale x 2 x double> %7, %8
184  %strided.vec62 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
185  %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec62, 0
186  %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec62, 1
187  %strided.vec64 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
188  %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec64, 0
189  %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec64, 1
190  %14 = fmul contract <vscale x 2 x double> %10, %13
191  %15 = fmul contract <vscale x 2 x double> %11, %12
192  %16 = fsub contract <vscale x 2 x double> %14, %15
193  %17 = fmul contract <vscale x 2 x double> %10, %12
194  %18 = fmul contract <vscale x 2 x double> %11, %13
195  %19 = fadd contract <vscale x 2 x double> %17, %18
196  %20 = fadd contract <vscale x 2 x double> %9, %19
197  %21 = fadd contract <vscale x 2 x double> %6, %16
198  %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21)
199  ret <vscale x 4 x double> %interleaved.vec
200}
201
202; a + b + 1i * c * d
203define <vscale x 4 x double> @mul_add_rot_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
204; CHECK-LABEL: mul_add_rot_mull:
205; CHECK:       // %bb.0: // %entry
206; CHECK-NEXT:    uzp2 z24.d, z4.d, z5.d
207; CHECK-NEXT:    mov z25.d, #0 // =0x0
208; CHECK-NEXT:    uzp1 z4.d, z4.d, z5.d
209; CHECK-NEXT:    ptrue p0.d
210; CHECK-NEXT:    mov z26.d, z24.d
211; CHECK-NEXT:    and z25.d, z25.d, #0x7fffffffffffffff
212; CHECK-NEXT:    and z26.d, z26.d, #0x8000000000000000
213; CHECK-NEXT:    orr z5.d, z25.d, z26.d
214; CHECK-NEXT:    fadd z5.d, z4.d, z5.d
215; CHECK-NEXT:    and z4.d, z4.d, #0x8000000000000000
216; CHECK-NEXT:    orr z4.d, z25.d, z4.d
217; CHECK-NEXT:    uzp2 z25.d, z0.d, z1.d
218; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
219; CHECK-NEXT:    uzp2 z1.d, z2.d, z3.d
220; CHECK-NEXT:    uzp1 z2.d, z2.d, z3.d
221; CHECK-NEXT:    fsub z4.d, z4.d, z24.d
222; CHECK-NEXT:    uzp2 z24.d, z6.d, z7.d
223; CHECK-NEXT:    uzp1 z6.d, z6.d, z7.d
224; CHECK-NEXT:    fmul z26.d, z0.d, z1.d
225; CHECK-NEXT:    fmul z1.d, z25.d, z1.d
226; CHECK-NEXT:    fmul z3.d, z4.d, z24.d
227; CHECK-NEXT:    fmul z24.d, z5.d, z24.d
228; CHECK-NEXT:    movprfx z7, z26
229; CHECK-NEXT:    fmla z7.d, p0/m, z25.d, z2.d
230; CHECK-NEXT:    fnmsb z0.d, p0/m, z2.d, z1.d
231; CHECK-NEXT:    movprfx z1, z3
232; CHECK-NEXT:    fmla z1.d, p0/m, z6.d, z5.d
233; CHECK-NEXT:    movprfx z2, z24
234; CHECK-NEXT:    fnmls z2.d, p0/m, z4.d, z6.d
235; CHECK-NEXT:    fadd z2.d, z0.d, z2.d
236; CHECK-NEXT:    fadd z1.d, z7.d, z1.d
237; CHECK-NEXT:    zip1 z0.d, z2.d, z1.d
238; CHECK-NEXT:    zip2 z1.d, z2.d, z1.d
239; CHECK-NEXT:    ret
240entry:
241  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
242  %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
243  %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
244  %strided.vec78 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
245  %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec78, 0
246  %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec78, 1
247  %4 = fmul contract <vscale x 2 x double> %0, %3
248  %5 = fmul contract <vscale x 2 x double> %1, %2
249  %6 = fadd contract <vscale x 2 x double> %5, %4
250  %7 = fmul contract <vscale x 2 x double> %0, %2
251  %8 = fmul contract <vscale x 2 x double> %1, %3
252  %9 = fsub contract <vscale x 2 x double> %7, %8
253  %strided.vec80 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
254  %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec80, 0
255  %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec80, 1
256  %12 = tail call contract <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %11)
257  %13 = fadd contract <vscale x 2 x double> %10, %12
258  %14 = tail call contract <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %10)
259  %15 = fsub contract <vscale x 2 x double> %14, %11
260  %strided.vec82 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
261  %16 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec82, 0
262  %17 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec82, 1
263  %18 = fmul contract <vscale x 2 x double> %15, %17
264  %19 = fmul contract <vscale x 2 x double> %16, %13
265  %20 = fadd contract <vscale x 2 x double> %19, %18
266  %21 = fmul contract <vscale x 2 x double> %15, %16
267  %22 = fmul contract <vscale x 2 x double> %13, %17
268  %23 = fsub contract <vscale x 2 x double> %21, %22
269  %24 = fadd contract <vscale x 2 x double> %9, %23
270  %25 = fadd contract <vscale x 2 x double> %6, %20
271  %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %24, <vscale x 2 x double> %25)
272  ret <vscale x 4 x double> %interleaved.vec
273}
274
275declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
276declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
277declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
278