xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll (revision 1c55cc600e99a963233d6f467373c8f16a1b8826)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -O3 -S                                        | FileCheck %s
3; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
4
5target triple = "x86_64--"
6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7
8; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle.
9; That may require some coordination between VectorCombine, SLP, and other passes.
10; The end goal is to get a single "vaddsubps" instruction for x86 with AVX.
11
12define <4 x float> @PR45015(<4 x float> %arg, <4 x float> %arg1) {
13; CHECK-LABEL: @PR45015(
14; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[ARG:%.*]], [[ARG1:%.*]]
15; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]]
16; CHECK-NEXT:    [[T16:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
17; CHECK-NEXT:    ret <4 x float> [[T16]]
18;
19  %t = extractelement <4 x float> %arg, i32 0
20  %t2 = extractelement <4 x float> %arg1, i32 0
21  %t3 = fsub float %t, %t2
22  %t4 = insertelement <4 x float> undef, float %t3, i32 0
23  %t5 = extractelement <4 x float> %arg, i32 1
24  %t6 = extractelement <4 x float> %arg1, i32 1
25  %t7 = fadd float %t5, %t6
26  %t8 = insertelement <4 x float> %t4, float %t7, i32 1
27  %t9 = extractelement <4 x float> %arg, i32 2
28  %t10 = extractelement <4 x float> %arg1, i32 2
29  %t11 = fsub float %t9, %t10
30  %t12 = insertelement <4 x float> %t8, float %t11, i32 2
31  %t13 = extractelement <4 x float> %arg, i32 3
32  %t14 = extractelement <4 x float> %arg1, i32 3
33  %t15 = fadd float %t13, %t14
34  %t16 = insertelement <4 x float> %t12, float %t15, i32 3
35  ret <4 x float> %t16
36}
37
38; PR42022 - https://bugs.llvm.org/show_bug.cgi?id=42022
39
40%struct.Vector4 = type { float, float, float, float }
41
42define { <2 x float>, <2 x float> } @add_aggregate(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1) {
43; CHECK-LABEL: @add_aggregate(
44; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]]
45; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]]
46; CHECK-NEXT:    [[FCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP1]], 0
47; CHECK-NEXT:    [[FCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[FCA_0_INSERT]], <2 x float> [[TMP2]], 1
48; CHECK-NEXT:    ret { <2 x float>, <2 x float> } [[FCA_1_INSERT]]
49;
50  %a00 = extractelement <2 x float> %a0, i32 0
51  %b00 = extractelement <2 x float> %b0, i32 0
52  %add = fadd float %a00, %b00
53  %retval.0.0.insert = insertelement <2 x float> undef, float %add, i32 0
54  %a01 = extractelement <2 x float> %a0, i32 1
55  %b01 = extractelement <2 x float> %b0, i32 1
56  %add4 = fadd float %a01, %b01
57  %retval.0.1.insert = insertelement <2 x float> %retval.0.0.insert, float %add4, i32 1
58  %a10 = extractelement <2 x float> %a1, i32 0
59  %b10 = extractelement <2 x float> %b1, i32 0
60  %add7 = fadd float %a10, %b10
61  %retval.1.0.insert = insertelement <2 x float> undef, float %add7, i32 0
62  %a11 = extractelement <2 x float> %a1, i32 1
63  %b11 = extractelement <2 x float> %b1, i32 1
64  %add10 = fadd float %a11, %b11
65  %retval.1.1.insert = insertelement <2 x float> %retval.1.0.insert, float %add10, i32 1
66  %fca.0.insert = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> %retval.0.1.insert, 0
67  %fca.1.insert = insertvalue { <2 x float>, <2 x float> } %fca.0.insert, <2 x float> %retval.1.1.insert, 1
68  ret { <2 x float>, <2 x float> } %fca.1.insert
69}
70
71define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1, ptr nocapture dereferenceable(16) %r) {
72; CHECK-LABEL: @add_aggregate_store(
73; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]]
74; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]]
75; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
76; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[R:%.*]], align 4
77; CHECK-NEXT:    ret void
78;
79  %a00 = extractelement <2 x float> %a0, i32 0
80  %b00 = extractelement <2 x float> %b0, i32 0
81  %add = fadd float %a00, %b00
82  store float %add, ptr %r, align 4
83  %a01 = extractelement <2 x float> %a0, i32 1
84  %b01 = extractelement <2 x float> %b0, i32 1
85  %add4 = fadd float %a01, %b01
86  %r1 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 1
87  store float %add4, ptr %r1, align 4
88  %a10 = extractelement <2 x float> %a1, i32 0
89  %b10 = extractelement <2 x float> %b1, i32 0
90  %add7 = fadd float %a10, %b10
91  %r2 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 2
92  store float %add7, ptr %r2, align 4
93  %a11 = extractelement <2 x float> %a1, i32 1
94  %b11 = extractelement <2 x float> %b1, i32 1
95  %add10 = fadd float %a11, %b11
96  %r3 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 3
97  store float %add10, ptr %r3, align 4
98  ret void
99}
100