xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmin-fmax.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \
3; RUN:   | FileCheck %s
4
5; Test vectorization and reassociation of fmin/fmax operations. Vectorization
6; is more profitable if the loads are also vectorizable.
7
8define double @fmin_double_4_nums_seq(ptr nocapture noundef readonly %x) {
9; CHECK-LABEL: define double @fmin_double_4_nums_seq(
10; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0:[0-9]+]] {
11; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4
12; CHECK-NEXT:    [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP1]])
13; CHECK-NEXT:    ret double [[TMP2]]
14;
15  %g1 = getelementptr inbounds double, ptr %x, i64 1
16  %g2 = getelementptr inbounds double, ptr %x, i64 2
17  %g3 = getelementptr inbounds double, ptr %x, i64 3
18  %t0 = load double, ptr %x, align 4
19  %t1 = load double, ptr %g1, align 4
20  %t2 = load double, ptr %g2, align 4
21  %t3 = load double, ptr %g3, align 4
22  %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
23  %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1)
24  %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2)
25  ret double %m3
26}
27
28define double @fmin_double_16_nums_nonseq(ptr nocapture noundef readonly %x) {
29; CHECK-LABEL: define double @fmin_double_16_nums_nonseq(
30; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] {
31; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
32; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4
33; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6
34; CHECK-NEXT:    [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8
35; CHECK-NEXT:    [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10
36; CHECK-NEXT:    [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12
37; CHECK-NEXT:    [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14
38; CHECK-NEXT:    [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16
39; CHECK-NEXT:    [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18
40; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20
41; CHECK-NEXT:    [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22
42; CHECK-NEXT:    [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24
43; CHECK-NEXT:    [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26
44; CHECK-NEXT:    [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28
45; CHECK-NEXT:    [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30
46; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[X]], align 4
47; CHECK-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
48; CHECK-NEXT:    [[T2:%.*]] = load double, ptr [[G2]], align 4
49; CHECK-NEXT:    [[T3:%.*]] = load double, ptr [[G3]], align 4
50; CHECK-NEXT:    [[T4:%.*]] = load double, ptr [[G4]], align 4
51; CHECK-NEXT:    [[T5:%.*]] = load double, ptr [[G5]], align 4
52; CHECK-NEXT:    [[T6:%.*]] = load double, ptr [[G6]], align 4
53; CHECK-NEXT:    [[T7:%.*]] = load double, ptr [[G7]], align 4
54; CHECK-NEXT:    [[T8:%.*]] = load double, ptr [[G8]], align 4
55; CHECK-NEXT:    [[T9:%.*]] = load double, ptr [[G9]], align 4
56; CHECK-NEXT:    [[T10:%.*]] = load double, ptr [[G10]], align 4
57; CHECK-NEXT:    [[T11:%.*]] = load double, ptr [[G11]], align 4
58; CHECK-NEXT:    [[T12:%.*]] = load double, ptr [[G12]], align 4
59; CHECK-NEXT:    [[T13:%.*]] = load double, ptr [[G13]], align 4
60; CHECK-NEXT:    [[T14:%.*]] = load double, ptr [[G14]], align 4
61; CHECK-NEXT:    [[T15:%.*]] = load double, ptr [[G15]], align 4
62; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0
63; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1
64; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2
65; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3
66; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4
67; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5
68; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6
69; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7
70; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8
71; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9
72; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10
73; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11
74; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12
75; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13
76; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14
77; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15
78; CHECK-NEXT:    [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> [[TMP16]])
79; CHECK-NEXT:    ret double [[TMP17]]
80;
81  %g1 = getelementptr inbounds double, ptr %x, i64 2
82  %g2 = getelementptr inbounds double, ptr %x, i64 4
83  %g3 = getelementptr inbounds double, ptr %x, i64 6
84  %g4 = getelementptr inbounds double, ptr %x, i64 8
85  %g5 = getelementptr inbounds double, ptr %x, i64 10
86  %g6 = getelementptr inbounds double, ptr %x, i64 12
87  %g7 = getelementptr inbounds double, ptr %x, i64 14
88  %g8 = getelementptr inbounds double, ptr %x, i64 16
89  %g9 = getelementptr inbounds double, ptr %x, i64 18
90  %g10 = getelementptr inbounds double, ptr %x, i64 20
91  %g11 = getelementptr inbounds double, ptr %x, i64 22
92  %g12 = getelementptr inbounds double, ptr %x, i64 24
93  %g13 = getelementptr inbounds double, ptr %x, i64 26
94  %g14 = getelementptr inbounds double, ptr %x, i64 28
95  %g15 = getelementptr inbounds double, ptr %x, i64 30
96  %t0 = load double, ptr %x, align 4
97  %t1 = load double, ptr %g1, align 4
98  %t2 = load double, ptr %g2, align 4
99  %t3 = load double, ptr %g3, align 4
100  %t4 = load double, ptr %g4, align 4
101  %t5 = load double, ptr %g5, align 4
102  %t6 = load double, ptr %g6, align 4
103  %t7 = load double, ptr %g7, align 4
104  %t8 = load double, ptr %g8, align 4
105  %t9 = load double, ptr %g9, align 4
106  %t10 = load double, ptr %g10, align 4
107  %t11 = load double, ptr %g11, align 4
108  %t12 = load double, ptr %g12, align 4
109  %t13 = load double, ptr %g13, align 4
110  %t14 = load double, ptr %g14, align 4
111  %t15 = load double, ptr %g15, align 4
112  %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
113  %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1)
114  %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2)
115  %m4 = tail call fast double @llvm.minnum.f64(double %t4, double %m3)
116  %m5 = tail call fast double @llvm.minnum.f64(double %t5, double %m4)
117  %m6 = tail call fast double @llvm.minnum.f64(double %t6, double %m5)
118  %m7 = tail call fast double @llvm.minnum.f64(double %t7, double %m6)
119  %m8 = tail call fast double @llvm.minnum.f64(double %t8, double %m7)
120  %m9 = tail call fast double @llvm.minnum.f64(double %t9, double %m8)
121  %m10 = tail call fast double @llvm.minnum.f64(double %t10, double %m9)
122  %m11 = tail call fast double @llvm.minnum.f64(double %t11, double %m10)
123  %m12 = tail call fast double @llvm.minnum.f64(double %t12, double %m11)
124  %m13 = tail call fast double @llvm.minnum.f64(double %t13, double %m12)
125  %m14 = tail call fast double @llvm.minnum.f64(double %t14, double %m13)
126  %m15 = tail call fast double @llvm.minnum.f64(double %t15, double %m14)
127  ret double %m15
128}
129
130define float @fmin_float_12_nums_nonseq(ptr nocapture noundef readonly %x) {
131; CHECK-LABEL: define float @fmin_float_12_nums_nonseq(
132; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] {
133; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
134; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4
135; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6
136; CHECK-NEXT:    [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8
137; CHECK-NEXT:    [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10
138; CHECK-NEXT:    [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12
139; CHECK-NEXT:    [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14
140; CHECK-NEXT:    [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16
141; CHECK-NEXT:    [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18
142; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20
143; CHECK-NEXT:    [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22
144; CHECK-NEXT:    [[T0:%.*]] = load float, ptr [[X]], align 4
145; CHECK-NEXT:    [[T1:%.*]] = load float, ptr [[G1]], align 4
146; CHECK-NEXT:    [[T2:%.*]] = load float, ptr [[G2]], align 4
147; CHECK-NEXT:    [[T3:%.*]] = load float, ptr [[G3]], align 4
148; CHECK-NEXT:    [[T4:%.*]] = load float, ptr [[G4]], align 4
149; CHECK-NEXT:    [[T5:%.*]] = load float, ptr [[G5]], align 4
150; CHECK-NEXT:    [[T6:%.*]] = load float, ptr [[G6]], align 4
151; CHECK-NEXT:    [[T7:%.*]] = load float, ptr [[G7]], align 4
152; CHECK-NEXT:    [[T8:%.*]] = load float, ptr [[G8]], align 4
153; CHECK-NEXT:    [[T9:%.*]] = load float, ptr [[G9]], align 4
154; CHECK-NEXT:    [[T10:%.*]] = load float, ptr [[G10]], align 4
155; CHECK-NEXT:    [[T11:%.*]] = load float, ptr [[G11]], align 4
156; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0
157; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1
158; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2
159; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3
160; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4
161; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5
162; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6
163; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7
164; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8
165; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9
166; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10
167; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11
168; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.v12f32(<12 x float> [[TMP12]])
169; CHECK-NEXT:    ret float [[TMP13]]
170;
171  %g1 = getelementptr inbounds float, ptr %x, i64 2
172  %g2 = getelementptr inbounds float, ptr %x, i64 4
173  %g3 = getelementptr inbounds float, ptr %x, i64 6
174  %g4 = getelementptr inbounds float, ptr %x, i64 8
175  %g5 = getelementptr inbounds float, ptr %x, i64 10
176  %g6 = getelementptr inbounds float, ptr %x, i64 12
177  %g7 = getelementptr inbounds float, ptr %x, i64 14
178  %g8 = getelementptr inbounds float, ptr %x, i64 16
179  %g9 = getelementptr inbounds float, ptr %x, i64 18
180  %g10 = getelementptr inbounds float, ptr %x, i64 20
181  %g11 = getelementptr inbounds float, ptr %x, i64 22
182  %t0 = load float, ptr %x, align 4
183  %t1 = load float, ptr %g1, align 4
184  %t2 = load float, ptr %g2, align 4
185  %t3 = load float, ptr %g3, align 4
186  %t4 = load float, ptr %g4, align 4
187  %t5 = load float, ptr %g5, align 4
188  %t6 = load float, ptr %g6, align 4
189  %t7 = load float, ptr %g7, align 4
190  %t8 = load float, ptr %g8, align 4
191  %t9 = load float, ptr %g9, align 4
192  %t10 = load float, ptr %g10, align 4
193  %t11 = load float, ptr %g11, align 4
194  %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0)
195  %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1)
196  %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2)
197  %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3)
198  %m5 = tail call fast float @llvm.minnum.f32(float %t5, float %m4)
199  %m6 = tail call fast float @llvm.minnum.f32(float %t6, float %m5)
200  %m7 = tail call fast float @llvm.minnum.f32(float %t7, float %m6)
201  %m8 = tail call fast float @llvm.minnum.f32(float %t8, float %m7)
202  %m9 = tail call fast float @llvm.minnum.f32(float %t9, float %m8)
203  %m10 = tail call fast float @llvm.minnum.f32(float %t10, float %m9)
204  %m11 = tail call fast float @llvm.minnum.f32(float %t11, float %m10)
205  ret float %m11
206}
207
208define double @fmax_double_4_nums_seq(ptr nocapture noundef readonly %x) {
209; CHECK-LABEL: define double @fmax_double_4_nums_seq(
210; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] {
211; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4
212; CHECK-NEXT:    [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP1]])
213; CHECK-NEXT:    ret double [[TMP2]]
214;
215  %g1 = getelementptr inbounds double, ptr %x, i64 1
216  %g2 = getelementptr inbounds double, ptr %x, i64 2
217  %g3 = getelementptr inbounds double, ptr %x, i64 3
218  %t0 = load double, ptr %x, align 4
219  %t1 = load double, ptr %g1, align 4
220  %t2 = load double, ptr %g2, align 4
221  %t3 = load double, ptr %g3, align 4
222  %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
223  %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1)
224  %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2)
225  ret double %m3
226}
227
228define double @fmax_double_16_nums_nonseq(ptr nocapture noundef readonly %x) {
229; CHECK-LABEL: define double @fmax_double_16_nums_nonseq(
230; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] {
231; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
232; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4
233; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6
234; CHECK-NEXT:    [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8
235; CHECK-NEXT:    [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10
236; CHECK-NEXT:    [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12
237; CHECK-NEXT:    [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14
238; CHECK-NEXT:    [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16
239; CHECK-NEXT:    [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18
240; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20
241; CHECK-NEXT:    [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22
242; CHECK-NEXT:    [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24
243; CHECK-NEXT:    [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26
244; CHECK-NEXT:    [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28
245; CHECK-NEXT:    [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30
246; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[X]], align 4
247; CHECK-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
248; CHECK-NEXT:    [[T2:%.*]] = load double, ptr [[G2]], align 4
249; CHECK-NEXT:    [[T3:%.*]] = load double, ptr [[G3]], align 4
250; CHECK-NEXT:    [[T4:%.*]] = load double, ptr [[G4]], align 4
251; CHECK-NEXT:    [[T5:%.*]] = load double, ptr [[G5]], align 4
252; CHECK-NEXT:    [[T6:%.*]] = load double, ptr [[G6]], align 4
253; CHECK-NEXT:    [[T7:%.*]] = load double, ptr [[G7]], align 4
254; CHECK-NEXT:    [[T8:%.*]] = load double, ptr [[G8]], align 4
255; CHECK-NEXT:    [[T9:%.*]] = load double, ptr [[G9]], align 4
256; CHECK-NEXT:    [[T10:%.*]] = load double, ptr [[G10]], align 4
257; CHECK-NEXT:    [[T11:%.*]] = load double, ptr [[G11]], align 4
258; CHECK-NEXT:    [[T12:%.*]] = load double, ptr [[G12]], align 4
259; CHECK-NEXT:    [[T13:%.*]] = load double, ptr [[G13]], align 4
260; CHECK-NEXT:    [[T14:%.*]] = load double, ptr [[G14]], align 4
261; CHECK-NEXT:    [[T15:%.*]] = load double, ptr [[G15]], align 4
262; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0
263; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1
264; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2
265; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3
266; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4
267; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5
268; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6
269; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7
270; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8
271; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9
272; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10
273; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11
274; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12
275; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13
276; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14
277; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15
278; CHECK-NEXT:    [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> [[TMP16]])
279; CHECK-NEXT:    ret double [[TMP17]]
280;
281  %g1 = getelementptr inbounds double, ptr %x, i64 2
282  %g2 = getelementptr inbounds double, ptr %x, i64 4
283  %g3 = getelementptr inbounds double, ptr %x, i64 6
284  %g4 = getelementptr inbounds double, ptr %x, i64 8
285  %g5 = getelementptr inbounds double, ptr %x, i64 10
286  %g6 = getelementptr inbounds double, ptr %x, i64 12
287  %g7 = getelementptr inbounds double, ptr %x, i64 14
288  %g8 = getelementptr inbounds double, ptr %x, i64 16
289  %g9 = getelementptr inbounds double, ptr %x, i64 18
290  %g10 = getelementptr inbounds double, ptr %x, i64 20
291  %g11 = getelementptr inbounds double, ptr %x, i64 22
292  %g12 = getelementptr inbounds double, ptr %x, i64 24
293  %g13 = getelementptr inbounds double, ptr %x, i64 26
294  %g14 = getelementptr inbounds double, ptr %x, i64 28
295  %g15 = getelementptr inbounds double, ptr %x, i64 30
296  %t0 = load double, ptr %x, align 4
297  %t1 = load double, ptr %g1, align 4
298  %t2 = load double, ptr %g2, align 4
299  %t3 = load double, ptr %g3, align 4
300  %t4 = load double, ptr %g4, align 4
301  %t5 = load double, ptr %g5, align 4
302  %t6 = load double, ptr %g6, align 4
303  %t7 = load double, ptr %g7, align 4
304  %t8 = load double, ptr %g8, align 4
305  %t9 = load double, ptr %g9, align 4
306  %t10 = load double, ptr %g10, align 4
307  %t11 = load double, ptr %g11, align 4
308  %t12 = load double, ptr %g12, align 4
309  %t13 = load double, ptr %g13, align 4
310  %t14 = load double, ptr %g14, align 4
311  %t15 = load double, ptr %g15, align 4
312  %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
313  %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1)
314  %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2)
315  %m4 = tail call fast double @llvm.maxnum.f64(double %t4, double %m3)
316  %m5 = tail call fast double @llvm.maxnum.f64(double %t5, double %m4)
317  %m6 = tail call fast double @llvm.maxnum.f64(double %t6, double %m5)
318  %m7 = tail call fast double @llvm.maxnum.f64(double %t7, double %m6)
319  %m8 = tail call fast double @llvm.maxnum.f64(double %t8, double %m7)
320  %m9 = tail call fast double @llvm.maxnum.f64(double %t9, double %m8)
321  %m10 = tail call fast double @llvm.maxnum.f64(double %t10, double %m9)
322  %m11 = tail call fast double @llvm.maxnum.f64(double %t11, double %m10)
323  %m12 = tail call fast double @llvm.maxnum.f64(double %t12, double %m11)
324  %m13 = tail call fast double @llvm.maxnum.f64(double %t13, double %m12)
325  %m14 = tail call fast double @llvm.maxnum.f64(double %t14, double %m13)
326  %m15 = tail call fast double @llvm.maxnum.f64(double %t15, double %m14)
327  ret double %m15
328}
329
330define float @fmax_float_12_nums_nonseq(ptr nocapture noundef readonly %x) {
331; CHECK-LABEL: define float @fmax_float_12_nums_nonseq(
332; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] {
333; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
334; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4
335; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6
336; CHECK-NEXT:    [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8
337; CHECK-NEXT:    [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10
338; CHECK-NEXT:    [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12
339; CHECK-NEXT:    [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14
340; CHECK-NEXT:    [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16
341; CHECK-NEXT:    [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18
342; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20
343; CHECK-NEXT:    [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22
344; CHECK-NEXT:    [[T0:%.*]] = load float, ptr [[X]], align 4
345; CHECK-NEXT:    [[T1:%.*]] = load float, ptr [[G1]], align 4
346; CHECK-NEXT:    [[T2:%.*]] = load float, ptr [[G2]], align 4
347; CHECK-NEXT:    [[T3:%.*]] = load float, ptr [[G3]], align 4
348; CHECK-NEXT:    [[T4:%.*]] = load float, ptr [[G4]], align 4
349; CHECK-NEXT:    [[T5:%.*]] = load float, ptr [[G5]], align 4
350; CHECK-NEXT:    [[T6:%.*]] = load float, ptr [[G6]], align 4
351; CHECK-NEXT:    [[T7:%.*]] = load float, ptr [[G7]], align 4
352; CHECK-NEXT:    [[T8:%.*]] = load float, ptr [[G8]], align 4
353; CHECK-NEXT:    [[T9:%.*]] = load float, ptr [[G9]], align 4
354; CHECK-NEXT:    [[T10:%.*]] = load float, ptr [[G10]], align 4
355; CHECK-NEXT:    [[T11:%.*]] = load float, ptr [[G11]], align 4
356; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0
357; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1
358; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2
359; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3
360; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4
361; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5
362; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6
363; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7
364; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8
365; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9
366; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10
367; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11
368; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmax.v12f32(<12 x float> [[TMP12]])
369; CHECK-NEXT:    ret float [[TMP13]]
370;
371  %g1 = getelementptr inbounds float, ptr %x, i64 2
372  %g2 = getelementptr inbounds float, ptr %x, i64 4
373  %g3 = getelementptr inbounds float, ptr %x, i64 6
374  %g4 = getelementptr inbounds float, ptr %x, i64 8
375  %g5 = getelementptr inbounds float, ptr %x, i64 10
376  %g6 = getelementptr inbounds float, ptr %x, i64 12
377  %g7 = getelementptr inbounds float, ptr %x, i64 14
378  %g8 = getelementptr inbounds float, ptr %x, i64 16
379  %g9 = getelementptr inbounds float, ptr %x, i64 18
380  %g10 = getelementptr inbounds float, ptr %x, i64 20
381  %g11 = getelementptr inbounds float, ptr %x, i64 22
382  %t0 = load float, ptr %x, align 4
383  %t1 = load float, ptr %g1, align 4
384  %t2 = load float, ptr %g2, align 4
385  %t3 = load float, ptr %g3, align 4
386  %t4 = load float, ptr %g4, align 4
387  %t5 = load float, ptr %g5, align 4
388  %t6 = load float, ptr %g6, align 4
389  %t7 = load float, ptr %g7, align 4
390  %t8 = load float, ptr %g8, align 4
391  %t9 = load float, ptr %g9, align 4
392  %t10 = load float, ptr %g10, align 4
393  %t11 = load float, ptr %g11, align 4
394  %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0)
395  %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1)
396  %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2)
397  %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3)
398  %m5 = tail call fast float @llvm.maxnum.f32(float %t5, float %m4)
399  %m6 = tail call fast float @llvm.maxnum.f32(float %t6, float %m5)
400  %m7 = tail call fast float @llvm.maxnum.f32(float %t7, float %m6)
401  %m8 = tail call fast float @llvm.maxnum.f32(float %t8, float %m7)
402  %m9 = tail call fast float @llvm.maxnum.f32(float %t9, float %m8)
403  %m10 = tail call fast float @llvm.maxnum.f32(float %t10, float %m9)
404  %m11 = tail call fast float @llvm.maxnum.f32(float %t11, float %m10)
405  ret float %m11
406}
407
408declare float @llvm.minnum.f32(float, float)
409declare double @llvm.minnum.f64(double, double)
410declare float @llvm.maxnum.f32(float, float)
411declare double @llvm.maxnum.f64(double, double)
412