xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/fmaximum-fminimum.ll (revision 11592667344f1f4e12da599e6669a359b99bd43b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
4declare float @llvm.maximum.f32(float, float)
5declare float @llvm.minimum.f32(float, float)
6declare double @llvm.maximum.f64(double, double)
7declare double @llvm.minimum.f64(double, double)
8
9@srcA64 = common global [8 x double] zeroinitializer, align 64
10@srcB64 = common global [8 x double] zeroinitializer, align 64
11@srcC64 = common global [8 x double] zeroinitializer, align 64
12@srcA32 = common global [16 x float] zeroinitializer, align 64
13@srcB32 = common global [16 x float] zeroinitializer, align 64
14@srcC32 = common global [16 x float] zeroinitializer, align 64
15@dst64 = common global [8 x double] zeroinitializer, align 64
16@dst32 = common global [16 x float] zeroinitializer, align 64
17
18define void @fmaximum_2f64() {
19; SSE-LABEL: define void @fmaximum_2f64() {
20; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
21; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
22; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
23; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
24; SSE-NEXT:    ret void
25;
26; AVX-LABEL: define void @fmaximum_2f64
27; AVX-SAME: () #[[ATTR1:[0-9]+]] {
28; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
29; AVX-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
30; AVX-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
31; AVX-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
32; AVX-NEXT:    ret void
33;
34  %a0 = load double, ptr @srcA64, align 8
35  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
36  %b0 = load double, ptr @srcB64, align 8
37  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
38  %fmaximum0 = call double @llvm.maximum.f64(double %a0, double %b0)
39  %fmaximum1 = call double @llvm.maximum.f64(double %a1, double %b1)
40  store double %fmaximum0, ptr @dst64, align 8
41  store double %fmaximum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
42  ret void
43}
44
45define void @fmaximum_4f64() {
46; SSE-LABEL: define void @fmaximum_4f64() {
47; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
48; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
49; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
50; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
51; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
52; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
53; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
54; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
55; SSE-NEXT:    ret void
56;
57; AVX-LABEL: define void @fmaximum_4f64
58; AVX-SAME: () #[[ATTR1]] {
59; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 8
60; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 8
61; AVX-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.maximum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
62; AVX-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 8
63; AVX-NEXT:    ret void
64;
65  %a0 = load double, ptr @srcA64, align 8
66  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
67  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
68  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8
69  %b0 = load double, ptr @srcB64, align 8
70  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
71  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
72  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8
73  %fmaximum0 = call double @llvm.maximum.f64(double %a0, double %b0)
74  %fmaximum1 = call double @llvm.maximum.f64(double %a1, double %b1)
75  %fmaximum2 = call double @llvm.maximum.f64(double %a2, double %b2)
76  %fmaximum3 = call double @llvm.maximum.f64(double %a3, double %b3)
77  store double %fmaximum0, ptr @dst64, align 8
78  store double %fmaximum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
79  store double %fmaximum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
80  store double %fmaximum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
81  ret void
82}
83
84define void @fmaximum_8f64() {
85; SSE-LABEL: define void @fmaximum_8f64() {
86; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 4
87; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 4
88; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
89; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 4
90; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
91; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
92; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
93; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
94; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
95; SSE-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
96; SSE-NEXT:    [[TMP9:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]])
97; SSE-NEXT:    store <2 x double> [[TMP9]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
98; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
99; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
100; SSE-NEXT:    [[TMP12:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP10]], <2 x double> [[TMP11]])
101; SSE-NEXT:    store <2 x double> [[TMP12]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
102; SSE-NEXT:    ret void
103;
104; AVX-LABEL: define void @fmaximum_8f64
105; AVX-SAME: () #[[ATTR1]] {
106; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 4
107; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 4
108; AVX-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.maximum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
109; AVX-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 4
110; AVX-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
111; AVX-NEXT:    [[TMP5:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
112; AVX-NEXT:    [[TMP6:%.*]] = call <4 x double> @llvm.maximum.v4f64(<4 x double> [[TMP4]], <4 x double> [[TMP5]])
113; AVX-NEXT:    store <4 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
114; AVX-NEXT:    ret void
115;
116  %a0 = load double, ptr @srcA64, align 4
117  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4
118  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
119  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4
120  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
121  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4
122  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
123  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4
124  %b0 = load double, ptr @srcB64, align 4
125  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4
126  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
127  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4
128  %b4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
129  %b5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4
130  %b6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
131  %b7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4
132  %fmaximum0 = call double @llvm.maximum.f64(double %a0, double %b0)
133  %fmaximum1 = call double @llvm.maximum.f64(double %a1, double %b1)
134  %fmaximum2 = call double @llvm.maximum.f64(double %a2, double %b2)
135  %fmaximum3 = call double @llvm.maximum.f64(double %a3, double %b3)
136  %fmaximum4 = call double @llvm.maximum.f64(double %a4, double %b4)
137  %fmaximum5 = call double @llvm.maximum.f64(double %a5, double %b5)
138  %fmaximum6 = call double @llvm.maximum.f64(double %a6, double %b6)
139  %fmaximum7 = call double @llvm.maximum.f64(double %a7, double %b7)
140  store double %fmaximum0, ptr @dst64, align 4
141  store double %fmaximum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
142  store double %fmaximum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
143  store double %fmaximum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
144  store double %fmaximum4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
145  store double %fmaximum5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
146  store double %fmaximum6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
147  store double %fmaximum7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
148  ret void
149}
150
151define double @reduction_v2f64(ptr %p) {
152; SSE-LABEL: define double @reduction_v2f64
153; SSE-SAME: (ptr [[P:%.*]]) {
154; SSE-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 1
155; SSE-NEXT:    [[T0:%.*]] = load double, ptr [[P]], align 4
156; SSE-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
157; SSE-NEXT:    [[M1:%.*]] = tail call double @llvm.maximum.f64(double [[T1]], double [[T0]])
158; SSE-NEXT:    ret double [[M1]]
159;
160; AVX-LABEL: define double @reduction_v2f64
161; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
162; AVX-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 1
163; AVX-NEXT:    [[T0:%.*]] = load double, ptr [[P]], align 4
164; AVX-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
165; AVX-NEXT:    [[M1:%.*]] = tail call double @llvm.maximum.f64(double [[T1]], double [[T0]])
166; AVX-NEXT:    ret double [[M1]]
167;
168  %g1 = getelementptr inbounds double, ptr %p, i64 1
169  %t0 = load double, ptr %p, align 4
170  %t1 = load double, ptr %g1, align 4
171  %m1 = tail call double @llvm.maximum.f64(double %t1, double %t0)
172  ret double %m1
173}
174
175define float @reduction_v4f32(ptr %p) {
176; SSE-LABEL: define float @reduction_v4f32
177; SSE-SAME: (ptr [[P:%.*]]) {
178; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P]], align 4
179; SSE-NEXT:    [[TMP2:%.*]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[TMP1]])
180; SSE-NEXT:    ret float [[TMP2]]
181;
182; AVX-LABEL: define float @reduction_v4f32
183; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
184; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P]], align 4
185; AVX-NEXT:    [[TMP2:%.*]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[TMP1]])
186; AVX-NEXT:    ret float [[TMP2]]
187;
188  %g1 = getelementptr inbounds float, ptr %p, i64 1
189  %g2 = getelementptr inbounds float, ptr %p, i64 2
190  %g3 = getelementptr inbounds float, ptr %p, i64 3
191  %t0 = load float, ptr %p, align 4
192  %t1 = load float, ptr %g1, align 4
193  %t2 = load float, ptr %g2, align 4
194  %t3 = load float, ptr %g3, align 4
195  %m1 = tail call float @llvm.maximum.f32(float %t1, float %t0)
196  %m2 = tail call float @llvm.maximum.f32(float %t2, float %m1)
197  %m3 = tail call float @llvm.maximum.f32(float %t3, float %m2)
198  ret float %m3
199}
200
201define double @reduction_v4f64_fminimum(ptr %p) {
202; SSE-LABEL: define double @reduction_v4f64_fminimum
203; SSE-SAME: (ptr [[P:%.*]]) {
204; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr [[P]], align 4
205; SSE-NEXT:    [[TMP2:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[TMP1]])
206; SSE-NEXT:    ret double [[TMP2]]
207;
208; AVX-LABEL: define double @reduction_v4f64_fminimum
209; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
210; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr [[P]], align 4
211; AVX-NEXT:    [[TMP2:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[TMP1]])
212; AVX-NEXT:    ret double [[TMP2]]
213;
214  %g1 = getelementptr inbounds double, ptr %p, i64 1
215  %g2 = getelementptr inbounds double, ptr %p, i64 2
216  %g3 = getelementptr inbounds double, ptr %p, i64 3
217  %t0 = load double, ptr %p, align 4
218  %t1 = load double, ptr %g1, align 4
219  %t2 = load double, ptr %g2, align 4
220  %t3 = load double, ptr %g3, align 4
221  %m1 = tail call double @llvm.minimum.f64(double %t1, double %t0)
222  %m2 = tail call double @llvm.minimum.f64(double %t2, double %m1)
223  %m3 = tail call double @llvm.minimum.f64(double %t3, double %m2)
224  ret double %m3
225}
226
227define float @reduction_v8f32_fminimum(ptr %p) {
228; SSE-LABEL: define float @reduction_v8f32_fminimum
229; SSE-SAME: (ptr [[P:%.*]]) {
230; SSE-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 4
231; SSE-NEXT:    [[TMP2:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP1]])
232; SSE-NEXT:    ret float [[TMP2]]
233;
234; AVX-LABEL: define float @reduction_v8f32_fminimum
235; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
236; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 4
237; AVX-NEXT:    [[TMP2:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP1]])
238; AVX-NEXT:    ret float [[TMP2]]
239;
240  %g1 = getelementptr inbounds float, ptr %p, i64 1
241  %g2 = getelementptr inbounds float, ptr %p, i64 2
242  %g3 = getelementptr inbounds float, ptr %p, i64 3
243  %g4 = getelementptr inbounds float, ptr %p, i64 4
244  %g5 = getelementptr inbounds float, ptr %p, i64 5
245  %g6 = getelementptr inbounds float, ptr %p, i64 6
246  %g7 = getelementptr inbounds float, ptr %p, i64 7
247  %t0 = load float, ptr %p, align 4
248  %t1 = load float, ptr %g1, align 4
249  %t2 = load float, ptr %g2, align 4
250  %t3 = load float, ptr %g3, align 4
251  %t4 = load float, ptr %g4, align 4
252  %t5 = load float, ptr %g5, align 4
253  %t6 = load float, ptr %g6, align 4
254  %t7 = load float, ptr %g7, align 4
255  %m1 = tail call float @llvm.minimum.f32(float %t1, float %t0)
256  %m2 = tail call float @llvm.minimum.f32(float %t2, float %m1)
257  %m3 = tail call float @llvm.minimum.f32(float %t3, float %m2)
258  %m4 = tail call float @llvm.minimum.f32(float %t4, float %m3)
259  %m5 = tail call float @llvm.minimum.f32(float %m4, float %t6)
260  %m6 = tail call float @llvm.minimum.f32(float %m5, float %t5)
261  %m7 = tail call float @llvm.minimum.f32(float %m6, float %t7)
262  ret float %m7
263}
264