xref: /llvm-project/llvm/test/Analysis/CostModel/X86/reduce-umin.ll (revision 63c3895327839ba5b57f5b99ec9e888abf976ac6)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
3; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
4; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41
5; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42
6; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1
7; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
8; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
9; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
10; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
11
12define i32 @reduce_i64(i32 %arg) {
13; SSE2-LABEL: 'reduce_i64'
14; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
15; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
16; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
17; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
18; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
19; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
20;
21; SSSE3-LABEL: 'reduce_i64'
22; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
23; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
24; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
25; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
26; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
27; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
28;
29; SSE4-LABEL: 'reduce_i64'
30; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
31; SSE4-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
32; SSE4-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
33; SSE4-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
34; SSE4-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
35; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
36;
37; AVX1-LABEL: 'reduce_i64'
38; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
39; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
40; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
41; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
42; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
43; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
44;
45; AVX2-LABEL: 'reduce_i64'
46; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
47; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
48; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
49; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
50; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
51; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
52;
53; AVX512-LABEL: 'reduce_i64'
54; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
55; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
56; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
57; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
58; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
59; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
60;
61  %V1  = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
62  %V2  = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
63  %V4  = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
64  %V8  = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
65  %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
66  ret i32 undef
67}
68
69define i32 @reduce_i32(i32 %arg) {
70; SSE2-LABEL: 'reduce_i32'
71; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
72; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
73; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
74; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
75; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
76; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
77;
78; SSSE3-LABEL: 'reduce_i32'
79; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
80; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
81; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
82; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
83; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
84; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
85;
86; SSE4-LABEL: 'reduce_i32'
87; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
88; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
89; SSE4-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
90; SSE4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
91; SSE4-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
92; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
93;
94; AVX1-LABEL: 'reduce_i32'
95; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
96; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
97; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
98; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
99; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
100; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
101;
102; AVX2-LABEL: 'reduce_i32'
103; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
104; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
105; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
106; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
107; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
108; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
109;
110; AVX512-LABEL: 'reduce_i32'
111; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
112; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
113; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
114; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
115; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
116; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
117;
118  %V2  = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
119  %V4  = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
120  %V8  = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
121  %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
122  %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
123  ret i32 undef
124}
125
126define i32 @reduce_i16(i32 %arg) {
127; SSE2-LABEL: 'reduce_i16'
128; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
129; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
130; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
131; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
132; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
133; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
134; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
135;
136; SSSE3-LABEL: 'reduce_i16'
137; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
138; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
139; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
140; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
141; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
142; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
143; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
144;
145; SSE4-LABEL: 'reduce_i16'
146; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
147; SSE4-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
148; SSE4-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
149; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
150; SSE4-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
151; SSE4-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
152; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
153;
154; AVX1-LABEL: 'reduce_i16'
155; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
156; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
157; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
158; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
159; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
160; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
161; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
162;
163; AVX2-LABEL: 'reduce_i16'
164; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
165; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
166; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
167; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
168; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
169; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
170; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
171;
172; AVX512F-LABEL: 'reduce_i16'
173; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
174; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
175; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
176; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
177; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
178; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
179; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
180;
181; AVX512BW-LABEL: 'reduce_i16'
182; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
183; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
184; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
185; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
186; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
187; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
188; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
189;
190; AVX512DQ-LABEL: 'reduce_i16'
191; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
192; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
193; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
194; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
195; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
196; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
197; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
198;
199  %V2  = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
200  %V4  = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
201  %V8  = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
202  %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
203  %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
204  %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
205  ret i32 undef
206}
207
208define i32 @reduce_i8(i32 %arg) {
209; SSE2-LABEL: 'reduce_i8'
210; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
211; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
212; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
213; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
214; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
215; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
216; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
217; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
218;
219; SSSE3-LABEL: 'reduce_i8'
220; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
221; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
222; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
223; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
224; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
225; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
226; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
227; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
228;
229; SSE4-LABEL: 'reduce_i8'
230; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
231; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
232; SSE4-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
233; SSE4-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
234; SSE4-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
235; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
236; SSE4-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
237; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
238;
239; AVX1-LABEL: 'reduce_i8'
240; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
241; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
242; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
243; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
244; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
245; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
246; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
247; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
248;
249; AVX2-LABEL: 'reduce_i8'
250; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
251; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
252; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
253; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
254; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
255; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
256; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
257; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
258;
259; AVX512F-LABEL: 'reduce_i8'
260; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
261; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
262; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
263; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
264; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
265; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
266; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
267; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
268;
269; AVX512BW-LABEL: 'reduce_i8'
270; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
271; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
272; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
273; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
274; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
275; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
276; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
277; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
278;
279; AVX512DQ-LABEL: 'reduce_i8'
280; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
281; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
282; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
283; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
284; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
285; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
286; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
287; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
288;
289  %V2   = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
290  %V4   = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
291  %V8   = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
292  %V16  = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
293  %V32  = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
294  %V64  = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
295  %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
296  ret i32 undef
297}
298
299declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>)
300declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
301declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
302declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>)
303declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>)
304
305declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
306declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
307declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
308declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
309declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>)
310
311declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
312declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
313declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
314declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
315declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>)
316declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>)
317
318declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
319declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
320declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
321declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
322declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
323declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
324declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)
325;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
326; SSE41: {{.*}}
327; SSE42: {{.*}}
328