xref: /llvm-project/llvm/test/CodeGen/AArch64/reassocmls.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s
3
4define i64 @smlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
5; CHECK-LABEL: smlsl_i64:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    smsubl x8, w4, w3, x0
8; CHECK-NEXT:    smsubl x0, w2, w1, x8
9; CHECK-NEXT:    ret
10  %be = sext i32 %b to i64
11  %ce = sext i32 %c to i64
12  %de = sext i32 %d to i64
13  %ee = sext i32 %e to i64
14  %m1.neg = mul nsw i64 %ce, %be
15  %m2.neg = mul nsw i64 %ee, %de
16  %reass.add = add i64 %m2.neg, %m1.neg
17  %s2 = sub i64 %a, %reass.add
18  ret i64 %s2
19}
20
21define i64 @umlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
22; CHECK-LABEL: umlsl_i64:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    umsubl x8, w4, w3, x0
25; CHECK-NEXT:    umsubl x0, w2, w1, x8
26; CHECK-NEXT:    ret
27  %be = zext i32 %b to i64
28  %ce = zext i32 %c to i64
29  %de = zext i32 %d to i64
30  %ee = zext i32 %e to i64
31  %m1.neg = mul nuw i64 %ce, %be
32  %m2.neg = mul nuw i64 %ee, %de
33  %reass.add = add i64 %m2.neg, %m1.neg
34  %s2 = sub i64 %a, %reass.add
35  ret i64 %s2
36}
37
38define i64 @mls_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
39; CHECK-LABEL: mls_i64:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    msub x8, x4, x3, x0
42; CHECK-NEXT:    msub x0, x2, x1, x8
43; CHECK-NEXT:    ret
44  %m1.neg = mul i64 %c, %b
45  %m2.neg = mul i64 %e, %d
46  %reass.add = add i64 %m2.neg, %m1.neg
47  %s2 = sub i64 %a, %reass.add
48  ret i64 %s2
49}
50
51define i16 @mls_i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e) {
52; CHECK-LABEL: mls_i16:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    msub w8, w4, w3, w0
55; CHECK-NEXT:    msub w0, w2, w1, w8
56; CHECK-NEXT:    ret
57  %m1.neg = mul i16 %c, %b
58  %m2.neg = mul i16 %e, %d
59  %reass.add = add i16 %m2.neg, %m1.neg
60  %s2 = sub i16 %a, %reass.add
61  ret i16 %s2
62}
63
64define i64 @mla_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
65; CHECK-LABEL: mla_i64:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    mul x8, x4, x3
68; CHECK-NEXT:    madd x8, x2, x1, x8
69; CHECK-NEXT:    add x0, x8, x0
70; CHECK-NEXT:    ret
71  %m1 = mul i64 %c, %b
72  %m2 = mul i64 %e, %d
73  %s1 = add i64 %m1, %m2
74  %s2 = add i64 %s1, %a
75  ret i64 %s2
76}
77
78define i64 @mls_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
79; CHECK-LABEL: mls_i64_C:
80; CHECK:       // %bb.0:
81; CHECK-NEXT:    mul x8, x2, x1
82; CHECK-NEXT:    mov w9, #10 // =0xa
83; CHECK-NEXT:    madd x8, x4, x3, x8
84; CHECK-NEXT:    sub x0, x9, x8
85; CHECK-NEXT:    ret
86  %m1.neg = mul i64 %c, %b
87  %m2.neg = mul i64 %e, %d
88  %reass.add = add i64 %m2.neg, %m1.neg
89  %s2 = sub i64 10, %reass.add
90  ret i64 %s2
91}
92
93define i64 @umlsl_i64_muls(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
94; CHECK-LABEL: umlsl_i64_muls:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    umull x8, w2, w3
97; CHECK-NEXT:    umsubl x8, w4, w3, x8
98; CHECK-NEXT:    umsubl x0, w2, w1, x8
99; CHECK-NEXT:    ret
100  %be = zext i32 %b to i64
101  %ce = zext i32 %c to i64
102  %de = zext i32 %d to i64
103  %ee = zext i32 %e to i64
104  %m1.neg = mul nuw i64 %ce, %be
105  %m2.neg = mul nuw i64 %ee, %de
106  %m3 = mul nuw i64 %ce, %de
107  %reass.add = add i64 %m2.neg, %m1.neg
108  %s2 = sub i64 %m3, %reass.add
109  ret i64 %s2
110}
111
112define i64 @umlsl_i64_uses(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
113; CHECK-LABEL: umlsl_i64_uses:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    umull x8, w4, w3
116; CHECK-NEXT:    umaddl x8, w2, w1, x8
117; CHECK-NEXT:    sub x9, x0, x8
118; CHECK-NEXT:    and x0, x8, x9
119; CHECK-NEXT:    ret
120  %be = zext i32 %b to i64
121  %ce = zext i32 %c to i64
122  %de = zext i32 %d to i64
123  %ee = zext i32 %e to i64
124  %m1.neg = mul nuw i64 %ce, %be
125  %m2.neg = mul nuw i64 %ee, %de
126  %reass.add = add i64 %m2.neg, %m1.neg
127  %s2 = sub i64 %a, %reass.add
128  %o = and i64 %reass.add, %s2
129  ret i64 %o
130}
131
132define i64 @mla_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
133; CHECK-LABEL: mla_i64_C:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    mul x8, x2, x1
136; CHECK-NEXT:    madd x8, x4, x3, x8
137; CHECK-NEXT:    add x0, x8, #10
138; CHECK-NEXT:    ret
139  %m1.neg = mul i64 %c, %b
140  %m2.neg = mul i64 %e, %d
141  %reass.add = add i64 %m2.neg, %m1.neg
142  %s2 = add i64 10, %reass.add
143  ret i64 %s2
144}
145
146define i64 @mla_i64_uses(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
147; CHECK-LABEL: mla_i64_uses:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    mul x8, x2, x1
150; CHECK-NEXT:    madd x8, x4, x3, x8
151; CHECK-NEXT:    add x9, x0, x8
152; CHECK-NEXT:    eor x0, x8, x9
153; CHECK-NEXT:    ret
154  %m1.neg = mul i64 %c, %b
155  %m2.neg = mul i64 %e, %d
156  %reass.add = add i64 %m2.neg, %m1.neg
157  %s2 = add i64 %a, %reass.add
158  %o = xor i64 %reass.add, %s2
159  ret i64 %o
160}
161
162define i64 @mla_i64_mul(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
163; CHECK-LABEL: mla_i64_mul:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    mul x8, x2, x1
166; CHECK-NEXT:    madd x9, x4, x3, x8
167; CHECK-NEXT:    add x0, x8, x9
168; CHECK-NEXT:    ret
169  %m1.neg = mul i64 %c, %b
170  %m2.neg = mul i64 %e, %d
171  %reass.add = add i64 %m2.neg, %m1.neg
172  %s2 = add i64 %m1.neg, %reass.add
173  ret i64 %s2
174}
175
176
177define <8 x i16> @smlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) {
178; CHECK-LABEL: smlsl_v8i16:
179; CHECK:       // %bb.0:
180; CHECK-NEXT:    smlsl v0.8h, v4.8b, v3.8b
181; CHECK-NEXT:    smlsl v0.8h, v2.8b, v1.8b
182; CHECK-NEXT:    ret
183  %be = sext <8 x i8> %b to <8 x i16>
184  %ce = sext <8 x i8> %c to <8 x i16>
185  %de = sext <8 x i8> %d to <8 x i16>
186  %ee = sext <8 x i8> %e to <8 x i16>
187  %m1.neg = mul nsw <8 x i16> %ce, %be
188  %m2.neg = mul nsw <8 x i16> %ee, %de
189  %reass.add = add <8 x i16> %m2.neg, %m1.neg
190  %s2 = sub <8 x i16> %a, %reass.add
191  ret <8 x i16> %s2
192}
193
194define <8 x i16> @umlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) {
195; CHECK-LABEL: umlsl_v8i16:
196; CHECK:       // %bb.0:
197; CHECK-NEXT:    umlsl v0.8h, v4.8b, v3.8b
198; CHECK-NEXT:    umlsl v0.8h, v2.8b, v1.8b
199; CHECK-NEXT:    ret
200  %be = zext <8 x i8> %b to <8 x i16>
201  %ce = zext <8 x i8> %c to <8 x i16>
202  %de = zext <8 x i8> %d to <8 x i16>
203  %ee = zext <8 x i8> %e to <8 x i16>
204  %m1.neg = mul nuw <8 x i16> %ce, %be
205  %m2.neg = mul nuw <8 x i16> %ee, %de
206  %reass.add = add <8 x i16> %m2.neg, %m1.neg
207  %s2 = sub <8 x i16> %a, %reass.add
208  ret <8 x i16> %s2
209}
210
211define <8 x i16> @mls_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {
212; CHECK-LABEL: mls_v8i16:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    mls v0.8h, v4.8h, v3.8h
215; CHECK-NEXT:    mls v0.8h, v2.8h, v1.8h
216; CHECK-NEXT:    ret
217  %m1.neg = mul <8 x i16> %c, %b
218  %m2.neg = mul <8 x i16> %e, %d
219  %reass.add = add <8 x i16> %m2.neg, %m1.neg
220  %s2 = sub <8 x i16> %a, %reass.add
221  ret <8 x i16> %s2
222}
223
224define <8 x i16> @mla_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {
225; CHECK-LABEL: mla_v8i16:
226; CHECK:       // %bb.0:
227; CHECK-NEXT:    mul v3.8h, v4.8h, v3.8h
228; CHECK-NEXT:    mla v3.8h, v2.8h, v1.8h
229; CHECK-NEXT:    add v0.8h, v3.8h, v0.8h
230; CHECK-NEXT:    ret
231  %m1 = mul <8 x i16> %c, %b
232  %m2 = mul <8 x i16> %e, %d
233  %s1 = add <8 x i16> %m1, %m2
234  %s2 = add <8 x i16> %s1, %a
235  ret <8 x i16> %s2
236}
237
238define <8 x i16> @mls_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {
239; CHECK-LABEL: mls_v8i16_C:
240; CHECK:       // %bb.0:
241; CHECK-NEXT:    movi v0.8h, #10
242; CHECK-NEXT:    mls v0.8h, v4.8h, v3.8h
243; CHECK-NEXT:    mls v0.8h, v2.8h, v1.8h
244; CHECK-NEXT:    ret
245  %m1.neg = mul <8 x i16> %c, %b
246  %m2.neg = mul <8 x i16> %e, %d
247  %reass.add = add <8 x i16> %m2.neg, %m1.neg
248  %s2 = sub <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>, %reass.add
249  ret <8 x i16> %s2
250}
251
252define <8 x i16> @mla_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {
253; CHECK-LABEL: mla_v8i16_C:
254; CHECK:       // %bb.0:
255; CHECK-NEXT:    mul v1.8h, v2.8h, v1.8h
256; CHECK-NEXT:    movi v0.8h, #10
257; CHECK-NEXT:    mla v1.8h, v4.8h, v3.8h
258; CHECK-NEXT:    add v0.8h, v1.8h, v0.8h
259; CHECK-NEXT:    ret
260  %m1.neg = mul <8 x i16> %c, %b
261  %m2.neg = mul <8 x i16> %e, %d
262  %reass.add = add <8 x i16> %m2.neg, %m1.neg
263  %s2 = add <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>, %reass.add
264  ret <8 x i16> %s2
265}
266
267
268define <vscale x 8 x i16> @smlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {
269; CHECK-LABEL: smlsl_nxv8i16:
270; CHECK:       // %bb.0:
271; CHECK-NEXT:    ptrue p0.h
272; CHECK-NEXT:    sxtb z3.h, p0/m, z3.h
273; CHECK-NEXT:    sxtb z4.h, p0/m, z4.h
274; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
275; CHECK-NEXT:    sxtb z2.h, p0/m, z2.h
276; CHECK-NEXT:    mls z0.h, p0/m, z4.h, z3.h
277; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
278; CHECK-NEXT:    ret
279  %be = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
280  %ce = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>
281  %de = sext <vscale x 8 x i8> %d to <vscale x 8 x i16>
282  %ee = sext <vscale x 8 x i8> %e to <vscale x 8 x i16>
283  %m1.neg = mul nsw <vscale x 8 x i16> %ce, %be
284  %m2.neg = mul nsw <vscale x 8 x i16> %ee, %de
285  %reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg
286  %s2 = sub <vscale x 8 x i16> %a, %reass.add
287  ret <vscale x 8 x i16> %s2
288}
289
290define <vscale x 8 x i16> @umlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {
291; CHECK-LABEL: umlsl_nxv8i16:
292; CHECK:       // %bb.0:
293; CHECK-NEXT:    and z3.h, z3.h, #0xff
294; CHECK-NEXT:    and z4.h, z4.h, #0xff
295; CHECK-NEXT:    ptrue p0.h
296; CHECK-NEXT:    and z1.h, z1.h, #0xff
297; CHECK-NEXT:    and z2.h, z2.h, #0xff
298; CHECK-NEXT:    mls z0.h, p0/m, z4.h, z3.h
299; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
300; CHECK-NEXT:    ret
301  %be = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
302  %ce = zext <vscale x 8 x i8> %c to <vscale x 8 x i16>
303  %de = zext <vscale x 8 x i8> %d to <vscale x 8 x i16>
304  %ee = zext <vscale x 8 x i8> %e to <vscale x 8 x i16>
305  %m1.neg = mul nuw <vscale x 8 x i16> %ce, %be
306  %m2.neg = mul nuw <vscale x 8 x i16> %ee, %de
307  %reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg
308  %s2 = sub <vscale x 8 x i16> %a, %reass.add
309  ret <vscale x 8 x i16> %s2
310}
311
312define <vscale x 8 x i16> @mls_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e) {
313; CHECK-LABEL: mls_nxv8i16:
314; CHECK:       // %bb.0:
315; CHECK-NEXT:    ptrue p0.h
316; CHECK-NEXT:    mls z0.h, p0/m, z4.h, z3.h
317; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
318; CHECK-NEXT:    ret
319  %m1.neg = mul <vscale x 8 x i16> %c, %b
320  %m2.neg = mul <vscale x 8 x i16> %e, %d
321  %reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg
322  %s2 = sub <vscale x 8 x i16> %a, %reass.add
323  ret <vscale x 8 x i16> %s2
324}
325
326define <vscale x 8 x i16> @mla_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e) {
327; CHECK-LABEL: mla_nxv8i16:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    mul z1.h, z2.h, z1.h
330; CHECK-NEXT:    ptrue p0.h
331; CHECK-NEXT:    mla z1.h, p0/m, z4.h, z3.h
332; CHECK-NEXT:    add z0.h, z1.h, z0.h
333; CHECK-NEXT:    ret
334  %m1 = mul <vscale x 8 x i16> %c, %b
335  %m2 = mul <vscale x 8 x i16> %e, %d
336  %s1 = add <vscale x 8 x i16> %m1, %m2
337  %s2 = add <vscale x 8 x i16> %s1, %a
338  ret <vscale x 8 x i16> %s2
339}
340