xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
3
4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
5target triple = "i386-apple-macosx10.8.0"
6
7define double @foo(ptr nocapture %D) {
8; CHECK-LABEL: @foo(
9; CHECK-NEXT:    br label [[TMP1:%.*]]
10; CHECK:       1:
11; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP11:%.*]], [[TMP1]] ]
12; CHECK-NEXT:    [[SUM_01:%.*]] = phi double [ 0.000000e+00, [[TMP0]] ], [ [[TMP10:%.*]], [[TMP1]] ]
13; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i32 [[I_02]], 1
14; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[D:%.*]], i32 [[TMP2]]
15; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[TMP3]], align 4
16; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP4]]
17; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP5]]
18; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
19; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
20; CHECK-NEXT:    [[TMP9:%.*]] = fadd double [[TMP7]], [[TMP8]]
21; CHECK-NEXT:    [[TMP10]] = fadd double [[SUM_01]], [[TMP9]]
22; CHECK-NEXT:    [[TMP11]] = add nsw i32 [[I_02]], 1
23; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP11]], 100
24; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP12:%.*]], label [[TMP1]]
25; CHECK:       12:
26; CHECK-NEXT:    ret double [[TMP10]]
27;
28  br label %1
29
30; <label>:1                                       ; preds = %1, %0
31  %i.02 = phi i32 [ 0, %0 ], [ %10, %1 ]
32  %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
33  %2 = shl nsw i32 %i.02, 1
34  %3 = getelementptr inbounds double, ptr %D, i32 %2
35  %4 = load double, ptr %3, align 4
36  %A4 = fmul double %4, %4
37  %A42 = fmul double %A4, %A4
38  %5 = or disjoint i32 %2, 1
39  %6 = getelementptr inbounds double, ptr %D, i32 %5
40  %7 = load double, ptr %6, align 4
41  %A7 = fmul double %7, %7
42  %A72 = fmul double %A7, %A7
43  %8 = fadd double %A42, %A72
44  %9 = fadd double %sum.01, %8
45  %10 = add nsw i32 %i.02, 1
46  %exitcond = icmp eq i32 %10, 100
47  br i1 %exitcond, label %11, label %1
48
49; <label>:11                                      ; preds = %1
50  ret double %9
51}
52
53define i1 @two_wide_fcmp_reduction(<2 x double> %a0) {
54; CHECK-LABEL: @two_wide_fcmp_reduction(
55; CHECK-NEXT:    [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], splat (double 1.000000e+00)
56; CHECK-NEXT:    [[B:%.*]] = extractelement <2 x i1> [[A]], i32 0
57; CHECK-NEXT:    [[C:%.*]] = extractelement <2 x i1> [[A]], i32 1
58; CHECK-NEXT:    [[D:%.*]] = and i1 [[B]], [[C]]
59; CHECK-NEXT:    ret i1 [[D]]
60;
61  %a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0>
62  %b = extractelement <2 x i1> %a, i32 0
63  %c = extractelement <2 x i1> %a, i32 1
64  %d = and i1 %b, %c
65  ret i1 %d
66}
67
68define double @fadd_reduction(<2 x double> %a0) {
69; CHECK-LABEL: @fadd_reduction(
70; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x double> [[A0:%.*]], splat (double 1.000000e+00)
71; CHECK-NEXT:    [[B:%.*]] = extractelement <2 x double> [[A]], i32 0
72; CHECK-NEXT:    [[C:%.*]] = extractelement <2 x double> [[A]], i32 1
73; CHECK-NEXT:    [[D:%.*]] = fadd fast double [[B]], [[C]]
74; CHECK-NEXT:    ret double [[D]]
75;
76  %a = fadd fast <2 x double> %a0, <double 1.000000e+00, double 1.000000e+00>
77  %b = extractelement <2 x double> %a, i32 0
78  %c = extractelement <2 x double> %a, i32 1
79  %d = fadd fast double %b, %c
80  ret double %d
81}
82
83; PR43745 https://bugs.llvm.org/show_bug.cgi?id=43745
84
85define i1 @fcmp_lt_gt(double %a, double %b, double %c) {
86; CHECK-LABEL: @fcmp_lt_gt(
87; CHECK-NEXT:  entry:
88; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[B:%.*]]
89; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
90; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1
91; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[FNEG]], i32 0
92; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
93; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1
94; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
95; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
96; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer
97; CHECK-NEXT:    [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
98; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
99; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D
100; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
101; CHECK-NEXT:    [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D
102; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]]
103; CHECK-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
104; CHECK:       lor.lhs.false:
105; CHECK-NEXT:    [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00)
106; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
107; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
108; CHECK-NEXT:    [[NOT_OR_COND9:%.*]] = or i1 [[TMP11]], [[TMP12]]
109; CHECK-NEXT:    ret i1 [[NOT_OR_COND9]]
110; CHECK:       cleanup:
111; CHECK-NEXT:    ret i1 false
112;
113entry:
114  %fneg = fneg double %b
115  %add = fsub double %c, %b
116  %mul = fmul double %a, 2.000000e+00
117  %div = fdiv double %add, %mul
118  %sub = fsub double %fneg, %c
119  %div3 = fdiv double %sub, %mul
120  %cmp = fcmp olt double %div, 0x3EB0C6F7A0B5ED8D
121  %cmp4 = fcmp olt double %div3, 0x3EB0C6F7A0B5ED8D
122  %or.cond = and i1 %cmp, %cmp4
123  br i1 %or.cond, label %cleanup, label %lor.lhs.false
124
125lor.lhs.false:
126  %cmp5 = fcmp ule double %div, 1.000000e+00
127  %cmp7 = fcmp ule double %div3, 1.000000e+00
128  %not.or.cond9 = or i1 %cmp7, %cmp5
129  ret i1 %not.or.cond9
130
131cleanup:
132  ret i1 false
133}
134
135define i1 @fcmp_lt(double %a, double %b, double %c) {
136; CHECK-LABEL: @fcmp_lt(
137; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[B:%.*]]
138; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
139; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1
140; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[FNEG]], i32 0
141; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
142; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1
143; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]
144; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
145; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer
146; CHECK-NEXT:    [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]]
147; CHECK-NEXT:    [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], splat (double 0x3EB0C6F7A0B5ED8D)
148; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
149; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
150; CHECK-NEXT:    [[NOT_OR_COND:%.*]] = or i1 [[TMP10]], [[TMP11]]
151; CHECK-NEXT:    ret i1 [[NOT_OR_COND]]
152;
153  %fneg = fneg double %b
154  %add = fsub double %c, %b
155  %mul = fmul double %a, 2.000000e+00
156  %div = fdiv double %add, %mul
157  %sub = fsub double %fneg, %c
158  %div3 = fdiv double %sub, %mul
159  %cmp = fcmp uge double %div, 0x3EB0C6F7A0B5ED8D
160  %cmp4 = fcmp uge double %div3, 0x3EB0C6F7A0B5ED8D
161  %not.or.cond = or i1 %cmp4, %cmp
162  ret i1 %not.or.cond
163}
164