xref: /llvm-project/llvm/test/Analysis/CostModel/SystemZ/vector-reductions.ll (revision 866b9f43a0772faee64aa421886eb101118f3167)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt -passes='print<cost-model>' -disable-output -mtriple=s390x-unknown-linux \
3; RUN:   -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15
4
5define void @fadd_reductions() {
6; Z15-LABEL: 'fadd_reductions'
7; Z15-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
8; Z15-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
9; Z15-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
10; Z15-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
11; Z15-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
12; Z15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
13;
14  %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
15  %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
16  %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
17  %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
18  %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
19  ret void
20}
21
22define void @fast_fadd_reductions(ptr %src, ptr %dst) {
23; Z15-LABEL: 'fast_fadd_reductions'
24; Z15-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
25; Z15-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
26; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
27; Z15-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
28; Z15-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
29; Z15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
30;
31  %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
32  %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
33  %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
34  %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
35  %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
36  ret void
37}
38
39define void @fmul_reductions() {
40; Z15-LABEL: 'fmul_reductions'
41; Z15-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
42; Z15-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
43; Z15-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
44; Z15-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
45; Z15-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
46; Z15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
47;
48  %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
49  %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
50  %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
51  %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
52  %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
53  ret void
54}
55
56define void @fast_fmul_reductions() {
57; Z15-LABEL: 'fast_fmul_reductions'
58; Z15-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
59; Z15-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
60; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
61; Z15-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
62; Z15-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
63; Z15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
64;
65  %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
66  %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
67  %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
68  %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
69  %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
70
71  ret void
72}
73
74define void @fmin_reductions() {
75; Z15-LABEL: 'fmin_reductions'
76; Z15-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
77; Z15-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
78; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
79; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
80; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
81; Z15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
82;
83  %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
84  %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
85  %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
86  %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
87  %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
88  ret void
89}
90
91define void @fmax_reductions() {
92; Z15-LABEL: 'fmax_reductions'
93; Z15-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
94; Z15-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
95; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
96; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
97; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
98; Z15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
99;
100  %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
101  %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
102  %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
103  %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
104  %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
105  ret void
106}
107
108define void @reduceumin() {
109; Z15-LABEL: 'reduceumin'
110; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
111; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
112; Z15-NEXT  Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
113; Z15-NEXT  Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
114; Z15-NEXT  Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
115; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef)
116;
117  %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
118  %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
119  %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
120  %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
121
122  %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
123  %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef)
124
125  ret void
126}
127
128define void @reduceumax() {
129; Z15-LABEL: 'reduceumax'
130; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
131; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
132; Z15-NEXT  Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
133; Z15-NEXT  Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
134; Z15-NEXT  Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
135; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef)
136;
137  %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
138  %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
139  %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
140  %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
141
142  %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
143  %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef)
144
145  ret void
146}
147
148define void @reducesmin() {
149; Z15-LABEL: 'reducesmin'
150; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
151; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
152; Z15-NEXT  Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
153; Z15-NEXT  Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
154; Z15-NEXT  Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
155; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef)
156;
157  %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
158  %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
159  %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
160  %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
161
162  %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
163  %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef)
164
165  ret void
166}
167
168define void @reducesmax() {
169; Z15-LABEL: 'reducesmax'
170; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
171; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
172; Z15-NEXT  Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
173; Z15-NEXT  Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
174; Z15-NEXT  Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
175; Z15-NEXT  Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef)
176;
177  %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
178  %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
179  %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
180  %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
181
182  %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
183  %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef)
184
185  ret void
186}
187
188define void @reduceadd() {
189; Z15-LABEL: 'reduceadd'
190; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
191; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
192; Z15-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
193; Z15-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
194; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
195; Z15-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
196; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
197; Z15-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
198; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
199; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
200; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
201; Z15-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
202; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
203; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
204; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
205; Z15-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
206;
207; Z15-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
208; Z15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef)
209
210  ; REDUCEADD64
211  %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
212  %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
213  %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
214  %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
215  ; REDUCEADD32
216  %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
217  %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
218  %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
219  %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
220  ; REDUCEADD16
221  %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
222  %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
223  %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
224  %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
225  ; REDUCEADD8
226  %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
227  %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
228  %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
229  %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
230  ; EXTREME VALUES
231  %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
232  %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef)
233
234  ret void
235}
236
237define void @reducemul() {
238; CHECK-LABEL: 'reducemul'
239; CHECK:  Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
240; CHECK:  Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
241; CHECK:  Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
242; CHECK:  Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
243; CHECK:  Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
244; CHECK:  Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
245; CHECK:  Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
246; CHECK:  Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
247; CHECK:  Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
248; CHECK:  Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
249; CHECK:  Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
250; CHECK:  Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
251; CHECK:  Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
252; CHECK:  Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
253; CHECK:  Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
254; CHECK:  Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
255;
256; CHECK:  Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
257; CHECK:  Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef)
258
259  ; REDUCEADD64
260  %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
261  %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
262  %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
263  %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
264  ; REDUCEADD32
265  %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
266  %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
267  %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
268  %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
269  ; REDUCEADD16
270  %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
271  %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
272  %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
273  %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
274  ; REDUCEADD8
275  %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
276  %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
277  %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
278  %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
279  ; EXTREME VALUES
280  %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
281  %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef)
282
283  ret void
284}
285
286declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
287declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
288declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
289declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
290declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
291
292declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>)
293declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>)
294declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>)
295declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
296declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>)
297
298declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
299declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
300declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
301declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
302declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>)
303
304declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
305declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
306declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
307declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
308declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>)
309
310declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
311declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
312declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
313declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
314declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)
315declare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>)
316
317declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
318declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
319declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
320declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
321declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>)
322declare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>)
323
324declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
325declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
326declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
327declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
328declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>)
329declare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>)
330
331declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
332declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
333declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
334declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
335declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)
336declare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>)
337
338declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
339declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
340declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
341declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
342declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
343declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
344declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
345declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
346declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
347declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
348declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
349declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
350declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
351declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
352declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
353declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
354
355declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
356declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>)
357
358declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
359declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
360declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>)
361declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>)
362declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
363declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
364declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
365declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>)
366declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
367declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
368declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
369declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
370declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
371declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
372declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
373declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
374
375declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>)
376declare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>)
377