xref: /llvm-project/llvm/test/CodeGen/AArch64/machine-combiner.ll (revision 32d761bbec660c977322afeac1acbafd46008752)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
3; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
4
5; Incremental updates of the instruction depths should be enough for this test
6; case.
7; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \
8; RUN:     -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
9
10; Verify that the first two adds are independent regardless of how the inputs are
11; commuted. The destination registers are used as source registers for the third add.
12
13define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
14; CHECK-STD-LABEL: reassociate_adds1:
15; CHECK-STD:       // %bb.0:
16; CHECK-STD-NEXT:    fadd s0, s0, s1
17; CHECK-STD-NEXT:    fadd s0, s0, s2
18; CHECK-STD-NEXT:    fadd s0, s0, s3
19; CHECK-STD-NEXT:    ret
20;
21; CHECK-UNSAFE-LABEL: reassociate_adds1:
22; CHECK-UNSAFE:       // %bb.0:
23; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
24; CHECK-UNSAFE-NEXT:    fadd s1, s2, s3
25; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
26; CHECK-UNSAFE-NEXT:    ret
27  %t0 = fadd float %x0, %x1
28  %t1 = fadd float %t0, %x2
29  %t2 = fadd float %t1, %x3
30  ret float %t2
31}
32
33define float @reassociate_adds1_fast(float %x0, float %x1, float %x2, float %x3) {
34; CHECK-LABEL: reassociate_adds1_fast:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    fadd s0, s0, s1
37; CHECK-NEXT:    fadd s1, s2, s3
38; CHECK-NEXT:    fadd s0, s0, s1
39; CHECK-NEXT:    ret
40  %t0 = fadd fast float %x0, %x1
41  %t1 = fadd fast float %t0, %x2
42  %t2 = fadd fast float %t1, %x3
43  ret float %t2
44}
45
46define float @reassociate_adds1_reassoc(float %x0, float %x1, float %x2, float %x3) {
47; CHECK-STD-LABEL: reassociate_adds1_reassoc:
48; CHECK-STD:       // %bb.0:
49; CHECK-STD-NEXT:    fadd s0, s0, s1
50; CHECK-STD-NEXT:    fadd s0, s0, s2
51; CHECK-STD-NEXT:    fadd s0, s0, s3
52; CHECK-STD-NEXT:    ret
53;
54; CHECK-UNSAFE-LABEL: reassociate_adds1_reassoc:
55; CHECK-UNSAFE:       // %bb.0:
56; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
57; CHECK-UNSAFE-NEXT:    fadd s1, s2, s3
58; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
59; CHECK-UNSAFE-NEXT:    ret
60  %t0 = fadd reassoc float %x0, %x1
61  %t1 = fadd reassoc float %t0, %x2
62  %t2 = fadd reassoc float %t1, %x3
63  ret float %t2
64}
65
66define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
67; CHECK-STD-LABEL: reassociate_adds2:
68; CHECK-STD:       // %bb.0:
69; CHECK-STD-NEXT:    fadd s0, s0, s1
70; CHECK-STD-NEXT:    fadd s0, s2, s0
71; CHECK-STD-NEXT:    fadd s0, s0, s3
72; CHECK-STD-NEXT:    ret
73;
74; CHECK-UNSAFE-LABEL: reassociate_adds2:
75; CHECK-UNSAFE:       // %bb.0:
76; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
77; CHECK-UNSAFE-NEXT:    fadd s1, s2, s3
78; CHECK-UNSAFE-NEXT:    fadd s0, s1, s0
79; CHECK-UNSAFE-NEXT:    ret
80  %t0 = fadd float %x0, %x1
81  %t1 = fadd float %x2, %t0
82  %t2 = fadd float %t1, %x3
83  ret float %t2
84}
85
86define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
87; CHECK-STD-LABEL: reassociate_adds3:
88; CHECK-STD:       // %bb.0:
89; CHECK-STD-NEXT:    fadd s0, s0, s1
90; CHECK-STD-NEXT:    fadd s0, s0, s2
91; CHECK-STD-NEXT:    fadd s0, s3, s0
92; CHECK-STD-NEXT:    ret
93;
94; CHECK-UNSAFE-LABEL: reassociate_adds3:
95; CHECK-UNSAFE:       // %bb.0:
96; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
97; CHECK-UNSAFE-NEXT:    fadd s1, s3, s2
98; CHECK-UNSAFE-NEXT:    fadd s0, s1, s0
99; CHECK-UNSAFE-NEXT:    ret
100  %t0 = fadd float %x0, %x1
101  %t1 = fadd float %t0, %x2
102  %t2 = fadd float %x3, %t1
103  ret float %t2
104}
105
106define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
107; CHECK-STD-LABEL: reassociate_adds4:
108; CHECK-STD:       // %bb.0:
109; CHECK-STD-NEXT:    fadd s0, s0, s1
110; CHECK-STD-NEXT:    fadd s0, s2, s0
111; CHECK-STD-NEXT:    fadd s0, s3, s0
112; CHECK-STD-NEXT:    ret
113;
114; CHECK-UNSAFE-LABEL: reassociate_adds4:
115; CHECK-UNSAFE:       // %bb.0:
116; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
117; CHECK-UNSAFE-NEXT:    fadd s1, s3, s2
118; CHECK-UNSAFE-NEXT:    fadd s0, s1, s0
119; CHECK-UNSAFE-NEXT:    ret
120  %t0 = fadd float %x0, %x1
121  %t1 = fadd float %x2, %t0
122  %t2 = fadd float %x3, %t1
123  ret float %t2
124}
125
126; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
127; produced because that would cost more compile time.
128
129define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
130; CHECK-STD-LABEL: reassociate_adds5:
131; CHECK-STD:       // %bb.0:
132; CHECK-STD-NEXT:    fadd s0, s0, s1
133; CHECK-STD-NEXT:    fadd s0, s0, s2
134; CHECK-STD-NEXT:    fadd s0, s0, s3
135; CHECK-STD-NEXT:    fadd s0, s0, s4
136; CHECK-STD-NEXT:    fadd s0, s0, s5
137; CHECK-STD-NEXT:    fadd s0, s0, s6
138; CHECK-STD-NEXT:    fadd s0, s0, s7
139; CHECK-STD-NEXT:    ret
140;
141; CHECK-UNSAFE-LABEL: reassociate_adds5:
142; CHECK-UNSAFE:       // %bb.0:
143; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
144; CHECK-UNSAFE-NEXT:    fadd s1, s2, s3
145; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
146; CHECK-UNSAFE-NEXT:    fadd s1, s4, s5
147; CHECK-UNSAFE-NEXT:    fadd s1, s1, s6
148; CHECK-UNSAFE-NEXT:    fadd s0, s0, s1
149; CHECK-UNSAFE-NEXT:    fadd s0, s0, s7
150; CHECK-UNSAFE-NEXT:    ret
151  %t0 = fadd float %x0, %x1
152  %t1 = fadd float %t0, %x2
153  %t2 = fadd float %t1, %x3
154  %t3 = fadd float %t2, %x4
155  %t4 = fadd float %t3, %x5
156  %t5 = fadd float %t4, %x6
157  %t6 = fadd float %t5, %x7
158  ret float %t6
159}
160
161; Verify that we only need two associative operations to reassociate the operands.
162; Also, we should reassociate such that the result of the high latency division
163; is used by the final 'add' rather than reassociating the %x3 operand with the
164; division. The latter reassociation would not improve anything.
165
166define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
167; CHECK-STD-LABEL: reassociate_adds6:
168; CHECK-STD:       // %bb.0:
169; CHECK-STD-NEXT:    fdiv s0, s0, s1
170; CHECK-STD-NEXT:    fadd s0, s2, s0
171; CHECK-STD-NEXT:    fadd s0, s3, s0
172; CHECK-STD-NEXT:    ret
173;
174; CHECK-UNSAFE-LABEL: reassociate_adds6:
175; CHECK-UNSAFE:       // %bb.0:
176; CHECK-UNSAFE-NEXT:    fdiv s0, s0, s1
177; CHECK-UNSAFE-NEXT:    fadd s1, s3, s2
178; CHECK-UNSAFE-NEXT:    fadd s0, s1, s0
179; CHECK-UNSAFE-NEXT:    ret
180  %t0 = fdiv float %x0, %x1
181  %t1 = fadd float %x2, %t0
182  %t2 = fadd float %x3, %t1
183  ret float %t2
184}
185
186; Verify that scalar single-precision multiplies are reassociated.
187
188define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
189; CHECK-STD-LABEL: reassociate_muls1:
190; CHECK-STD:       // %bb.0:
191; CHECK-STD-NEXT:    fdiv s0, s0, s1
192; CHECK-STD-NEXT:    fmul s0, s2, s0
193; CHECK-STD-NEXT:    fmul s0, s3, s0
194; CHECK-STD-NEXT:    ret
195;
196; CHECK-UNSAFE-LABEL: reassociate_muls1:
197; CHECK-UNSAFE:       // %bb.0:
198; CHECK-UNSAFE-NEXT:    fdiv s0, s0, s1
199; CHECK-UNSAFE-NEXT:    fmul s1, s3, s2
200; CHECK-UNSAFE-NEXT:    fmul s0, s1, s0
201; CHECK-UNSAFE-NEXT:    ret
202  %t0 = fdiv float %x0, %x1
203  %t1 = fmul float %x2, %t0
204  %t2 = fmul float %x3, %t1
205  ret float %t2
206}
207
208; Verify that scalar double-precision adds are reassociated.
209
210define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
211; CHECK-STD-LABEL: reassociate_adds_double:
212; CHECK-STD:       // %bb.0:
213; CHECK-STD-NEXT:    fdiv d0, d0, d1
214; CHECK-STD-NEXT:    fadd d0, d2, d0
215; CHECK-STD-NEXT:    fadd d0, d3, d0
216; CHECK-STD-NEXT:    ret
217;
218; CHECK-UNSAFE-LABEL: reassociate_adds_double:
219; CHECK-UNSAFE:       // %bb.0:
220; CHECK-UNSAFE-NEXT:    fdiv d0, d0, d1
221; CHECK-UNSAFE-NEXT:    fadd d1, d3, d2
222; CHECK-UNSAFE-NEXT:    fadd d0, d1, d0
223; CHECK-UNSAFE-NEXT:    ret
224  %t0 = fdiv double %x0, %x1
225  %t1 = fadd double %x2, %t0
226  %t2 = fadd double %x3, %t1
227  ret double %t2
228}
229
230; Verify that scalar double-precision multiplies are reassociated.
231
232define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
233; CHECK-STD-LABEL: reassociate_muls_double:
234; CHECK-STD:       // %bb.0:
235; CHECK-STD-NEXT:    fdiv d0, d0, d1
236; CHECK-STD-NEXT:    fmul d0, d2, d0
237; CHECK-STD-NEXT:    fmul d0, d3, d0
238; CHECK-STD-NEXT:    ret
239;
240; CHECK-UNSAFE-LABEL: reassociate_muls_double:
241; CHECK-UNSAFE:       // %bb.0:
242; CHECK-UNSAFE-NEXT:    fdiv d0, d0, d1
243; CHECK-UNSAFE-NEXT:    fmul d1, d3, d2
244; CHECK-UNSAFE-NEXT:    fmul d0, d1, d0
245; CHECK-UNSAFE-NEXT:    ret
246  %t0 = fdiv double %x0, %x1
247  %t1 = fmul double %x2, %t0
248  %t2 = fmul double %x3, %t1
249  ret double %t2
250}
251
252; Verify that scalar half-precision adds are reassociated.
253
254define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
255; CHECK-STD-LABEL: reassociate_adds_half:
256; CHECK-STD:       // %bb.0:
257; CHECK-STD-NEXT:    fdiv h0, h0, h1
258; CHECK-STD-NEXT:    fadd h0, h2, h0
259; CHECK-STD-NEXT:    fadd h0, h3, h0
260; CHECK-STD-NEXT:    ret
261;
262; CHECK-UNSAFE-LABEL: reassociate_adds_half:
263; CHECK-UNSAFE:       // %bb.0:
264; CHECK-UNSAFE-NEXT:    fdiv h0, h0, h1
265; CHECK-UNSAFE-NEXT:    fadd h2, h3, h2
266; CHECK-UNSAFE-NEXT:    fadd h0, h2, h0
267; CHECK-UNSAFE-NEXT:    ret
268  %t0 = fdiv half %x0, %x1
269  %t1 = fadd half %x2, %t0
270  %t2 = fadd half %x3, %t1
271  ret half %t2
272}
273
274; Verify that scalar half-precision multiplies are reassociated.
275
276define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
277; CHECK-STD-LABEL: reassociate_muls_half:
278; CHECK-STD:       // %bb.0:
279; CHECK-STD-NEXT:    fdiv h0, h0, h1
280; CHECK-STD-NEXT:    fmul h0, h2, h0
281; CHECK-STD-NEXT:    fmul h0, h3, h0
282; CHECK-STD-NEXT:    ret
283;
284; CHECK-UNSAFE-LABEL: reassociate_muls_half:
285; CHECK-UNSAFE:       // %bb.0:
286; CHECK-UNSAFE-NEXT:    fdiv h0, h0, h1
287; CHECK-UNSAFE-NEXT:    fmul h2, h3, h2
288; CHECK-UNSAFE-NEXT:    fmul h0, h2, h0
289; CHECK-UNSAFE-NEXT:    ret
290  %t0 = fdiv half %x0, %x1
291  %t1 = fmul half %x2, %t0
292  %t2 = fmul half %x3, %t1
293  ret half %t2
294}
295
296; Verify that scalar integer adds are reassociated.
297
298define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
299; CHECK-LABEL: reassociate_adds_i32:
300; CHECK:       // %bb.0:
301; CHECK-NEXT:    udiv w8, w0, w1
302; CHECK-NEXT:    add w9, w3, w2
303; CHECK-NEXT:    add w0, w9, w8
304; CHECK-NEXT:    ret
305  %t0 = udiv i32 %x0, %x1
306  %t1 = add i32 %x2, %t0
307  %t2 = add i32 %x3, %t1
308  ret i32 %t2
309}
310
311define i64 @reassociate_adds_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
312; CHECK-LABEL: reassociate_adds_i64:
313; CHECK:       // %bb.0:
314; CHECK-NEXT:    udiv x8, x0, x1
315; CHECK-NEXT:    add x9, x3, x2
316; CHECK-NEXT:    add x0, x9, x8
317; CHECK-NEXT:    ret
318  %t0 = udiv i64 %x0, %x1
319  %t1 = add i64 %x2, %t0
320  %t2 = add i64 %x3, %t1
321  ret i64 %t2
322}
323
324; Verify that scalar bitwise operations are reassociated.
325
326define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
327; CHECK-LABEL: reassociate_ands_i32:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    and w8, w0, w1
330; CHECK-NEXT:    and w9, w2, w3
331; CHECK-NEXT:    and w0, w8, w9
332; CHECK-NEXT:    ret
333  %t0 = and i32 %x0, %x1
334  %t1 = and i32 %t0, %x2
335  %t2 = and i32 %t1, %x3
336  ret i32 %t2
337}
338
339define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
340; CHECK-LABEL: reassociate_ors_i64:
341; CHECK:       // %bb.0:
342; CHECK-NEXT:    orr x8, x0, x1
343; CHECK-NEXT:    orr x9, x2, x3
344; CHECK-NEXT:    orr x0, x8, x9
345; CHECK-NEXT:    ret
346  %t0 = or i64 %x0, %x1
347  %t1 = or i64 %t0, %x2
348  %t2 = or i64 %t1, %x3
349  ret i64 %t2
350}
351
352define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
353; CHECK-LABEL: reassociate_xors_i32:
354; CHECK:       // %bb.0:
355; CHECK-NEXT:    eor w8, w0, w1
356; CHECK-NEXT:    eor w9, w2, w3
357; CHECK-NEXT:    eor w0, w8, w9
358; CHECK-NEXT:    ret
359  %t0 = xor i32 %x0, %x1
360  %t1 = xor i32 %t0, %x2
361  %t2 = xor i32 %t1, %x3
362  ret i32 %t2
363}
364
365; Verify that we reassociate vector instructions too.
366
367define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
368; CHECK-STD-LABEL: vector_reassociate_adds1:
369; CHECK-STD:       // %bb.0:
370; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v1.4s
371; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v2.4s
372; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v3.4s
373; CHECK-STD-NEXT:    ret
374;
375; CHECK-UNSAFE-LABEL: vector_reassociate_adds1:
376; CHECK-UNSAFE:       // %bb.0:
377; CHECK-UNSAFE-NEXT:    fadd v0.4s, v0.4s, v1.4s
378; CHECK-UNSAFE-NEXT:    fadd v1.4s, v2.4s, v3.4s
379; CHECK-UNSAFE-NEXT:    fadd v0.4s, v0.4s, v1.4s
380; CHECK-UNSAFE-NEXT:    ret
381  %t0 = fadd <4 x float> %x0, %x1
382  %t1 = fadd <4 x float> %t0, %x2
383  %t2 = fadd <4 x float> %t1, %x3
384  ret <4 x float> %t2
385}
386
387define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
388; CHECK-STD-LABEL: vector_reassociate_adds2:
389; CHECK-STD:       // %bb.0:
390; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v1.4s
391; CHECK-STD-NEXT:    fadd v0.4s, v2.4s, v0.4s
392; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v3.4s
393; CHECK-STD-NEXT:    ret
394;
395; CHECK-UNSAFE-LABEL: vector_reassociate_adds2:
396; CHECK-UNSAFE:       // %bb.0:
397; CHECK-UNSAFE-NEXT:    fadd v0.4s, v0.4s, v1.4s
398; CHECK-UNSAFE-NEXT:    fadd v1.4s, v2.4s, v3.4s
399; CHECK-UNSAFE-NEXT:    fadd v0.4s, v1.4s, v0.4s
400; CHECK-UNSAFE-NEXT:    ret
401  %t0 = fadd <4 x float> %x0, %x1
402  %t1 = fadd <4 x float> %x2, %t0
403  %t2 = fadd <4 x float> %t1, %x3
404  ret <4 x float> %t2
405}
406
407define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
408; CHECK-STD-LABEL: vector_reassociate_adds3:
409; CHECK-STD:       // %bb.0:
410; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v1.4s
411; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v2.4s
412; CHECK-STD-NEXT:    fadd v0.4s, v3.4s, v0.4s
413; CHECK-STD-NEXT:    ret
414;
415; CHECK-UNSAFE-LABEL: vector_reassociate_adds3:
416; CHECK-UNSAFE:       // %bb.0:
417; CHECK-UNSAFE-NEXT:    fadd v0.4s, v0.4s, v1.4s
418; CHECK-UNSAFE-NEXT:    fadd v1.4s, v3.4s, v2.4s
419; CHECK-UNSAFE-NEXT:    fadd v0.4s, v1.4s, v0.4s
420; CHECK-UNSAFE-NEXT:    ret
421  %t0 = fadd <4 x float> %x0, %x1
422  %t1 = fadd <4 x float> %t0, %x2
423  %t2 = fadd <4 x float> %x3, %t1
424  ret <4 x float> %t2
425}
426
427define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
428; CHECK-STD-LABEL: vector_reassociate_adds4:
429; CHECK-STD:       // %bb.0:
430; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v1.4s
431; CHECK-STD-NEXT:    fadd v0.4s, v2.4s, v0.4s
432; CHECK-STD-NEXT:    fadd v0.4s, v3.4s, v0.4s
433; CHECK-STD-NEXT:    ret
434;
435; CHECK-UNSAFE-LABEL: vector_reassociate_adds4:
436; CHECK-UNSAFE:       // %bb.0:
437; CHECK-UNSAFE-NEXT:    fadd v0.4s, v0.4s, v1.4s
438; CHECK-UNSAFE-NEXT:    fadd v1.4s, v3.4s, v2.4s
439; CHECK-UNSAFE-NEXT:    fadd v0.4s, v1.4s, v0.4s
440; CHECK-UNSAFE-NEXT:    ret
441  %t0 = fadd <4 x float> %x0, %x1
442  %t1 = fadd <4 x float> %x2, %t0
443  %t2 = fadd <4 x float> %x3, %t1
444  ret <4 x float> %t2
445}
446
447; Verify that 64-bit vector half-precision adds are reassociated.
448
449define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
450; CHECK-STD-LABEL: reassociate_adds_v4f16:
451; CHECK-STD:       // %bb.0:
452; CHECK-STD-NEXT:    fadd v0.4h, v0.4h, v1.4h
453; CHECK-STD-NEXT:    fadd v0.4h, v2.4h, v0.4h
454; CHECK-STD-NEXT:    fadd v0.4h, v3.4h, v0.4h
455; CHECK-STD-NEXT:    ret
456;
457; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16:
458; CHECK-UNSAFE:       // %bb.0:
459; CHECK-UNSAFE-NEXT:    fadd v0.4h, v0.4h, v1.4h
460; CHECK-UNSAFE-NEXT:    fadd v1.4h, v3.4h, v2.4h
461; CHECK-UNSAFE-NEXT:    fadd v0.4h, v1.4h, v0.4h
462; CHECK-UNSAFE-NEXT:    ret
463  %t0 = fadd <4 x half> %x0, %x1
464  %t1 = fadd <4 x half> %x2, %t0
465  %t2 = fadd <4 x half> %x3, %t1
466  ret <4 x half> %t2
467}
468
469; Verify that 128-bit vector half-precision multiplies are reassociated.
470
471define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
472; CHECK-STD-LABEL: reassociate_muls_v8f16:
473; CHECK-STD:       // %bb.0:
474; CHECK-STD-NEXT:    fadd v0.8h, v0.8h, v1.8h
475; CHECK-STD-NEXT:    fmul v0.8h, v2.8h, v0.8h
476; CHECK-STD-NEXT:    fmul v0.8h, v3.8h, v0.8h
477; CHECK-STD-NEXT:    ret
478;
479; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16:
480; CHECK-UNSAFE:       // %bb.0:
481; CHECK-UNSAFE-NEXT:    fadd v0.8h, v0.8h, v1.8h
482; CHECK-UNSAFE-NEXT:    fmul v1.8h, v3.8h, v2.8h
483; CHECK-UNSAFE-NEXT:    fmul v0.8h, v1.8h, v0.8h
484; CHECK-UNSAFE-NEXT:    ret
485  %t0 = fadd <8 x half> %x0, %x1
486  %t1 = fmul <8 x half> %x2, %t0
487  %t2 = fmul <8 x half> %x3, %t1
488  ret <8 x half> %t2
489}
490
491; Verify that 128-bit vector single-precision multiplies are reassociated.
492
493define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
494; CHECK-STD-LABEL: reassociate_muls_v4f32:
495; CHECK-STD:       // %bb.0:
496; CHECK-STD-NEXT:    fadd v0.4s, v0.4s, v1.4s
497; CHECK-STD-NEXT:    fmul v0.4s, v2.4s, v0.4s
498; CHECK-STD-NEXT:    fmul v0.4s, v3.4s, v0.4s
499; CHECK-STD-NEXT:    ret
500;
501; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32:
502; CHECK-UNSAFE:       // %bb.0:
503; CHECK-UNSAFE-NEXT:    fadd v0.4s, v0.4s, v1.4s
504; CHECK-UNSAFE-NEXT:    fmul v1.4s, v3.4s, v2.4s
505; CHECK-UNSAFE-NEXT:    fmul v0.4s, v1.4s, v0.4s
506; CHECK-UNSAFE-NEXT:    ret
507  %t0 = fadd <4 x float> %x0, %x1
508  %t1 = fmul <4 x float> %x2, %t0
509  %t2 = fmul <4 x float> %x3, %t1
510  ret <4 x float> %t2
511}
512
513; Verify that 128-bit vector double-precision multiplies are reassociated.
514
515define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
516; CHECK-STD-LABEL: reassociate_muls_v2f64:
517; CHECK-STD:       // %bb.0:
518; CHECK-STD-NEXT:    fadd v0.2d, v0.2d, v1.2d
519; CHECK-STD-NEXT:    fmul v0.2d, v2.2d, v0.2d
520; CHECK-STD-NEXT:    fmul v0.2d, v3.2d, v0.2d
521; CHECK-STD-NEXT:    ret
522;
523; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64:
524; CHECK-UNSAFE:       // %bb.0:
525; CHECK-UNSAFE-NEXT:    fadd v0.2d, v0.2d, v1.2d
526; CHECK-UNSAFE-NEXT:    fmul v1.2d, v3.2d, v2.2d
527; CHECK-UNSAFE-NEXT:    fmul v0.2d, v1.2d, v0.2d
528; CHECK-UNSAFE-NEXT:    ret
529  %t0 = fadd <2 x double> %x0, %x1
530  %t1 = fmul <2 x double> %x2, %t0
531  %t2 = fmul <2 x double> %x3, %t1
532  ret <2 x double> %t2
533}
534
535; Verify that vector integer arithmetic operations are reassociated.
536
537define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) {
538; CHECK-LABEL: reassociate_muls_v2i32:
539; CHECK:       // %bb.0:
540; CHECK-NEXT:    mul v0.2s, v0.2s, v1.2s
541; CHECK-NEXT:    mul v1.2s, v3.2s, v2.2s
542; CHECK-NEXT:    mul v0.2s, v1.2s, v0.2s
543; CHECK-NEXT:    ret
544  %t0 = mul <2 x i32> %x0, %x1
545  %t1 = mul <2 x i32> %x2, %t0
546  %t2 = mul <2 x i32> %x3, %t1
547  ret <2 x i32> %t2
548}
549
550define <2 x i64> @reassociate_adds_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) {
551; CHECK-LABEL: reassociate_adds_v2i64:
552; CHECK:       // %bb.0:
553; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
554; CHECK-NEXT:    add v1.2d, v3.2d, v2.2d
555; CHECK-NEXT:    add v0.2d, v1.2d, v0.2d
556; CHECK-NEXT:    ret
557  %t0 = add <2 x i64> %x0, %x1
558  %t1 = add <2 x i64> %x2, %t0
559  %t2 = add <2 x i64> %x3, %t1
560  ret <2 x i64> %t2
561}
562
563; Verify that vector bitwise operations are reassociated.
564
565define <16 x i8> @reassociate_ands_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) {
566; CHECK-LABEL: reassociate_ands_v16i8:
567; CHECK:       // %bb.0:
568; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
569; CHECK-NEXT:    and v1.16b, v2.16b, v3.16b
570; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
571; CHECK-NEXT:    ret
572  %t0 = or <16 x i8> %x0, %x1
573  %t1 = and <16 x i8> %t0, %x2
574  %t2 = and <16 x i8> %t1, %x3
575  ret <16 x i8> %t2
576}
577
578define <4 x i16> @reassociate_ors_v4i16(<4 x i16> %x0, <4 x i16> %x1, <4 x i16> %x2, <4 x i16> %x3) {
579; CHECK-LABEL: reassociate_ors_v4i16:
580; CHECK:       // %bb.0:
581; CHECK-NEXT:    eor v0.8b, v0.8b, v1.8b
582; CHECK-NEXT:    orr v1.8b, v2.8b, v3.8b
583; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
584; CHECK-NEXT:    ret
585  %t0 = xor <4 x i16> %x0, %x1
586  %t1 = or <4 x i16> %t0, %x2
587  %t2 = or <4 x i16> %t1, %x3
588  ret <4 x i16> %t2
589}
590
591define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
592; CHECK-LABEL: reassociate_xors_v4i32:
593; CHECK:       // %bb.0:
594; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
595; CHECK-NEXT:    eor v1.16b, v2.16b, v3.16b
596; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
597; CHECK-NEXT:    ret
598  %t0 = and <4 x i32> %x0, %x1
599  %t1 = xor <4 x i32> %t0, %x2
600  %t2 = xor <4 x i32> %t1, %x3
601  ret <4 x i32> %t2
602}
603
604; Verify that scalable vector FP arithmetic operations are reassociated.
605
606define <vscale x 8 x half> @reassociate_adds_nxv4f16(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) {
607; CHECK-STD-LABEL: reassociate_adds_nxv4f16:
608; CHECK-STD:       // %bb.0:
609; CHECK-STD-NEXT:    fadd z0.h, z0.h, z1.h
610; CHECK-STD-NEXT:    fadd z0.h, z2.h, z0.h
611; CHECK-STD-NEXT:    fadd z0.h, z3.h, z0.h
612; CHECK-STD-NEXT:    ret
613;
614; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f16:
615; CHECK-UNSAFE:       // %bb.0:
616; CHECK-UNSAFE-NEXT:    fadd z0.h, z0.h, z1.h
617; CHECK-UNSAFE-NEXT:    fadd z1.h, z3.h, z2.h
618; CHECK-UNSAFE-NEXT:    fadd z0.h, z1.h, z0.h
619; CHECK-UNSAFE-NEXT:    ret
620  %t0 = fadd reassoc <vscale x 8 x half> %x0, %x1
621  %t1 = fadd reassoc <vscale x 8 x half> %x2, %t0
622  %t2 = fadd reassoc <vscale x 8 x half> %x3, %t1
623  ret <vscale x 8 x half> %t2
624}
625
626define <vscale x 4 x float> @reassociate_adds_nxv4f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) {
627; CHECK-STD-LABEL: reassociate_adds_nxv4f32:
628; CHECK-STD:       // %bb.0:
629; CHECK-STD-NEXT:    fadd z0.s, z0.s, z1.s
630; CHECK-STD-NEXT:    fadd z0.s, z2.s, z0.s
631; CHECK-STD-NEXT:    fadd z0.s, z3.s, z0.s
632; CHECK-STD-NEXT:    ret
633;
634; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32:
635; CHECK-UNSAFE:       // %bb.0:
636; CHECK-UNSAFE-NEXT:    fadd z0.s, z0.s, z1.s
637; CHECK-UNSAFE-NEXT:    fadd z1.s, z3.s, z2.s
638; CHECK-UNSAFE-NEXT:    fadd z0.s, z1.s, z0.s
639; CHECK-UNSAFE-NEXT:    ret
640  %t0 = fadd reassoc <vscale x 4 x float> %x0, %x1
641  %t1 = fadd reassoc <vscale x 4 x float> %x2, %t0
642  %t2 = fadd reassoc <vscale x 4 x float> %x3, %t1
643  ret <vscale x 4 x float> %t2
644}
645
646define <vscale x 2 x double> @reassociate_muls_nxv2f64(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) {
647; CHECK-STD-LABEL: reassociate_muls_nxv2f64:
648; CHECK-STD:       // %bb.0:
649; CHECK-STD-NEXT:    fmul z0.d, z0.d, z1.d
650; CHECK-STD-NEXT:    fmul z0.d, z2.d, z0.d
651; CHECK-STD-NEXT:    fmul z0.d, z3.d, z0.d
652; CHECK-STD-NEXT:    ret
653;
654; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64:
655; CHECK-UNSAFE:       // %bb.0:
656; CHECK-UNSAFE-NEXT:    fmul z0.d, z0.d, z1.d
657; CHECK-UNSAFE-NEXT:    fmul z1.d, z3.d, z2.d
658; CHECK-UNSAFE-NEXT:    fmul z0.d, z1.d, z0.d
659; CHECK-UNSAFE-NEXT:    ret
660  %t0 = fmul reassoc <vscale x 2 x double> %x0, %x1
661  %t1 = fmul reassoc <vscale x 2 x double> %x2, %t0
662  %t2 = fmul reassoc <vscale x 2 x double> %x3, %t1
663  ret <vscale x 2 x double> %t2
664}
665
666; Verify that scalable vector integer arithmetic operations are reassociated.
667
668define <vscale x 16 x i8> @reassociate_muls_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) {
669; CHECK-LABEL: reassociate_muls_nxv16i8:
670; CHECK:       // %bb.0:
671; CHECK-NEXT:    mul z0.b, z0.b, z1.b
672; CHECK-NEXT:    mul z1.b, z3.b, z2.b
673; CHECK-NEXT:    mul z0.b, z1.b, z0.b
674; CHECK-NEXT:    ret
675  %t0 = mul <vscale x 16 x i8> %x0, %x1
676  %t1 = mul <vscale x 16 x i8> %x2, %t0
677  %t2 = mul <vscale x 16 x i8> %x3, %t1
678  ret <vscale x 16 x i8> %t2
679}
680
681define <vscale x 8 x i16> @reassociate_adds_nxv8i16(<vscale x 8 x i16> %x0, <vscale x 8 x i16> %x1, <vscale x 8 x i16> %x2, <vscale x 8 x i16> %x3) {
682; CHECK-LABEL: reassociate_adds_nxv8i16:
683; CHECK:       // %bb.0:
684; CHECK-NEXT:    add z0.h, z0.h, z1.h
685; CHECK-NEXT:    add z1.h, z3.h, z2.h
686; CHECK-NEXT:    add z0.h, z1.h, z0.h
687; CHECK-NEXT:    ret
688  %t0 = add <vscale x 8 x i16> %x0, %x1
689  %t1 = add <vscale x 8 x i16> %x2, %t0
690  %t2 = add <vscale x 8 x i16> %x3, %t1
691  ret <vscale x 8 x i16> %t2
692}
693
694define <vscale x 4 x i32> @reassociate_muls_nxv4i32(<vscale x 4 x i32> %x0, <vscale x 4 x i32> %x1, <vscale x 4 x i32> %x2, <vscale x 4 x i32> %x3) {
695; CHECK-LABEL: reassociate_muls_nxv4i32:
696; CHECK:       // %bb.0:
697; CHECK-NEXT:    mul z0.s, z0.s, z1.s
698; CHECK-NEXT:    mul z1.s, z3.s, z2.s
699; CHECK-NEXT:    mul z0.s, z1.s, z0.s
700; CHECK-NEXT:    ret
701  %t0 = mul <vscale x 4 x i32> %x0, %x1
702  %t1 = mul <vscale x 4 x i32> %x2, %t0
703  %t2 = mul <vscale x 4 x i32> %x3, %t1
704  ret <vscale x 4 x i32> %t2
705}
706
707define <vscale x 2 x i64> @reassociate_adds_nxv2i64(<vscale x 2 x i64> %x0, <vscale x 2 x i64> %x1, <vscale x 2 x i64> %x2, <vscale x 2 x i64> %x3) {
708; CHECK-LABEL: reassociate_adds_nxv2i64:
709; CHECK:       // %bb.0:
710; CHECK-NEXT:    add z0.d, z0.d, z1.d
711; CHECK-NEXT:    add z1.d, z3.d, z2.d
712; CHECK-NEXT:    add z0.d, z1.d, z0.d
713; CHECK-NEXT:    ret
714  %t0 = add <vscale x 2 x i64> %x0, %x1
715  %t1 = add <vscale x 2 x i64> %x2, %t0
716  %t2 = add <vscale x 2 x i64> %x3, %t1
717  ret <vscale x 2 x i64> %t2
718}
719
720; Verify that scalable vector bitwise operations are reassociated.
721
722define <vscale x 16 x i8> @reassociate_ands_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) {
723; CHECK-LABEL: reassociate_ands_nxv16i8:
724; CHECK:       // %bb.0:
725; CHECK-NEXT:    orr z0.d, z0.d, z1.d
726; CHECK-NEXT:    and z1.d, z2.d, z3.d
727; CHECK-NEXT:    and z0.d, z0.d, z1.d
728; CHECK-NEXT:    ret
729  %t0 = or <vscale x 16 x i8> %x0, %x1
730  %t1 = and <vscale x 16 x i8> %t0, %x2
731  %t2 = and <vscale x 16 x i8> %t1, %x3
732  ret <vscale x 16 x i8> %t2
733}
734
735define <vscale x 8 x i16> @reassociate_ors_nxv8i16(<vscale x 8 x i16> %x0, <vscale x 8 x i16> %x1, <vscale x 8 x i16> %x2, <vscale x 8 x i16> %x3) {
736; CHECK-LABEL: reassociate_ors_nxv8i16:
737; CHECK:       // %bb.0:
738; CHECK-NEXT:    eor z0.d, z0.d, z1.d
739; CHECK-NEXT:    orr z1.d, z2.d, z3.d
740; CHECK-NEXT:    orr z0.d, z0.d, z1.d
741; CHECK-NEXT:    ret
742  %t0 = xor <vscale x 8 x i16> %x0, %x1
743  %t1 = or <vscale x 8 x i16> %t0, %x2
744  %t2 = or <vscale x 8 x i16> %t1, %x3
745  ret <vscale x 8 x i16> %t2
746}
747
748; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
749; Verify that reassociation is not happening needlessly or wrongly.
750
751declare double @bar()
752
753define double @reassociate_adds_from_calls() {
754; CHECK-STD-LABEL: reassociate_adds_from_calls:
755; CHECK-STD:       // %bb.0:
756; CHECK-STD-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
757; CHECK-STD-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
758; CHECK-STD-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
759; CHECK-STD-NEXT:    .cfi_def_cfa_offset 32
760; CHECK-STD-NEXT:    .cfi_offset w30, -8
761; CHECK-STD-NEXT:    .cfi_offset b8, -16
762; CHECK-STD-NEXT:    .cfi_offset b9, -24
763; CHECK-STD-NEXT:    .cfi_offset b10, -32
764; CHECK-STD-NEXT:    bl bar
765; CHECK-STD-NEXT:    fmov d8, d0
766; CHECK-STD-NEXT:    bl bar
767; CHECK-STD-NEXT:    fmov d9, d0
768; CHECK-STD-NEXT:    bl bar
769; CHECK-STD-NEXT:    fmov d10, d0
770; CHECK-STD-NEXT:    bl bar
771; CHECK-STD-NEXT:    fadd d1, d8, d9
772; CHECK-STD-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
773; CHECK-STD-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
774; CHECK-STD-NEXT:    fadd d1, d1, d10
775; CHECK-STD-NEXT:    fadd d0, d1, d0
776; CHECK-STD-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload
777; CHECK-STD-NEXT:    ret
778;
779; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls:
780; CHECK-UNSAFE:       // %bb.0:
781; CHECK-UNSAFE-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
782; CHECK-UNSAFE-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
783; CHECK-UNSAFE-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
784; CHECK-UNSAFE-NEXT:    .cfi_def_cfa_offset 32
785; CHECK-UNSAFE-NEXT:    .cfi_offset w30, -8
786; CHECK-UNSAFE-NEXT:    .cfi_offset b8, -16
787; CHECK-UNSAFE-NEXT:    .cfi_offset b9, -24
788; CHECK-UNSAFE-NEXT:    .cfi_offset b10, -32
789; CHECK-UNSAFE-NEXT:    bl bar
790; CHECK-UNSAFE-NEXT:    fmov d8, d0
791; CHECK-UNSAFE-NEXT:    bl bar
792; CHECK-UNSAFE-NEXT:    fmov d9, d0
793; CHECK-UNSAFE-NEXT:    bl bar
794; CHECK-UNSAFE-NEXT:    fmov d10, d0
795; CHECK-UNSAFE-NEXT:    bl bar
796; CHECK-UNSAFE-NEXT:    fadd d1, d8, d9
797; CHECK-UNSAFE-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
798; CHECK-UNSAFE-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
799; CHECK-UNSAFE-NEXT:    fadd d0, d10, d0
800; CHECK-UNSAFE-NEXT:    fadd d0, d1, d0
801; CHECK-UNSAFE-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload
802; CHECK-UNSAFE-NEXT:    ret
803  %x0 = call double @bar()
804  %x1 = call double @bar()
805  %x2 = call double @bar()
806  %x3 = call double @bar()
807  %t0 = fadd double %x0, %x1
808  %t1 = fadd double %t0, %x2
809  %t2 = fadd double %t1, %x3
810  ret double %t2
811}
812
813define double @already_reassociated() {
814; CHECK-LABEL: already_reassociated:
815; CHECK:       // %bb.0:
816; CHECK-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
817; CHECK-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
818; CHECK-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
819; CHECK-NEXT:    .cfi_def_cfa_offset 32
820; CHECK-NEXT:    .cfi_offset w30, -8
821; CHECK-NEXT:    .cfi_offset b8, -16
822; CHECK-NEXT:    .cfi_offset b9, -24
823; CHECK-NEXT:    .cfi_offset b10, -32
824; CHECK-NEXT:    bl bar
825; CHECK-NEXT:    fmov d8, d0
826; CHECK-NEXT:    bl bar
827; CHECK-NEXT:    fmov d9, d0
828; CHECK-NEXT:    bl bar
829; CHECK-NEXT:    fmov d10, d0
830; CHECK-NEXT:    bl bar
831; CHECK-NEXT:    fadd d1, d8, d9
832; CHECK-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
833; CHECK-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
834; CHECK-NEXT:    fadd d0, d10, d0
835; CHECK-NEXT:    fadd d0, d1, d0
836; CHECK-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload
837; CHECK-NEXT:    ret
838  %x0 = call double @bar()
839  %x1 = call double @bar()
840  %x2 = call double @bar()
841  %x3 = call double @bar()
842  %t0 = fadd double %x0, %x1
843  %t1 = fadd double %x2, %x3
844  %t2 = fadd double %t0, %t1
845  ret double %t2
846}
847