xref: /llvm-project/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll (revision 7bc079c85219ad6e954fb6071cd108151203c85e)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
3; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-F16
4
5target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6
7define void @reduce_umin() {
8; CHECK-LABEL: 'reduce_umin'
9; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
10; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
11; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
12; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
13; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
14; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
15; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
16; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
17; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
18; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
19; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
20; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
21; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
22; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
23; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
24; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
25; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
26;
27  %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
28  %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
29  %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
30  %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
31  %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
32  %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
33  %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
34  %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
35  %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
36  %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
37  %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
38  %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
39  %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
40  %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
41  %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
42  %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
43  ret void
44}
45
46define void @reduce_umax() {
47; CHECK-LABEL: 'reduce_umax'
48; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
49; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
50; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
51; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
52; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
53; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
54; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
55; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
56; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
57; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
58; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
59; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
60; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
61; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
62; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
63; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
64; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
65;
66  %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
67  %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
68  %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
69  %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
70  %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
71  %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
72  %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
73  %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
74  %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
75  %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
76  %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
77  %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
78  %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
79  %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
80  %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
81  %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
82  ret void
83}
84
85define void @reduce_smin() {
86; CHECK-LABEL: 'reduce_smin'
87; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
88; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
89; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
90; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
91; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
92; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
93; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
94; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
95; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
96; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
97; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
98; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
99; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
100; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
101; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
102; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
103; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
104;
105  %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
106  %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
107  %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
108  %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
109  %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
110  %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
111  %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
112  %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
113  %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
114  %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
115  %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
116  %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
117  %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
118  %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
119  %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
120  %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
121  ret void
122}
123
124define void @reduce_smax() {
125; CHECK-LABEL: 'reduce_smax'
126; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
127; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
128; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
129; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
130; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
131; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
132; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
133; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
134; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
135; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
136; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
137; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
138; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
139; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
140; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
141; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
142; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
143;
144  %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
145  %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
146  %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
147  %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
148  %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
149  %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
150  %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
151  %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
152  %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
153  %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
154  %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
155  %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
156  %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
157  %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
158  %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
159  %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
160  ret void
161}
162
163define void @reduce_fmin16() {
164; CHECK-NOF16-LABEL: 'reduce_fmin16'
165; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
166; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
167; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
168; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
169; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
170; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
171; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
172; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
173; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
174;
175; CHECK-F16-LABEL: 'reduce_fmin16'
176; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
177; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
178; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
179; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
180; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
181; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
182; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
183; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
184; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
185;
186  %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
187  %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
188  %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
189  %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
190  %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
191  %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
192  %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
193  %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
194  ret void
195}
196
197define void @reduce_fmax16() {
198; CHECK-NOF16-LABEL: 'reduce_fmax16'
199; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
200; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
201; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
202; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
203; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
204; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
205; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
206; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
207; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
208;
209; CHECK-F16-LABEL: 'reduce_fmax16'
210; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
211; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
212; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
213; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
214; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
215; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
216; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
217; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
218; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
219;
220  %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
221  %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
222  %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
223  %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
224  %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
225  %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
226  %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
227  %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
228  ret void
229}
230
231define void @reduce_fmin() {
232; CHECK-LABEL: 'reduce_fmin'
233; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
234; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
235; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
236; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
237; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
238; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
239; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
240; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
241; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
242; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
243; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
244;
245  %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
246  %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
247  %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
248  %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
249  %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
250  %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
251  %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
252  %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
253  %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
254  %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
255  ret void
256}
257
258define void @reduce_fmax() {
259; CHECK-LABEL: 'reduce_fmax'
260; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
261; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
262; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
263; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
264; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
265; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
266; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
267; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
268; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
269; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
270; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
271;
272  %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
273  %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
274  %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
275  %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
276  %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
277  %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
278  %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
279  %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
280  %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
281  %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
282  ret void
283}
284
285declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>)
286declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
287declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
288declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
289declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
290declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
291declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
292declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
293declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
294declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
295declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
296declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
297declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
298declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
299declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
300declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
301
302declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>)
303declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
304declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
305declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
306declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
307declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
308declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>)
309declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
310declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
311declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
312declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
313declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
314declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
315declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
316declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
317declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
318
319declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>)
320declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
321declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
322declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
323declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
324declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
325declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>)
326declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
327declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
328declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
329declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
330declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
331declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
332declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
333declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
334declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
335
336declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>)
337declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
338declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
339declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
340declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
341declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
342declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
343declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
344declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
345declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
346declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
347declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
348declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
349declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
350declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
351declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
352
353declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
354declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
355declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
356declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
357declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
358declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
359declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
360declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
361declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
362
363declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
364declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
365declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
366declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
367declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
368declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
369declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
370declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
371declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
372
373declare half @llvm.vector.reduce.fminimum.v2f16(<2 x half>)
374declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>)
375declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>)
376declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>)
377declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
378declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
379declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
380declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
381declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)
382
383declare half @llvm.vector.reduce.fmaximum.v2f16(<2 x half>)
384declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>)
385declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>)
386declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>)
387declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
388declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
389declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
390declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
391declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
392