xref: /llvm-project/llvm/test/Analysis/CostModel/X86/reduce-mul.ll (revision 595a74391daaff8daffa2f6e19274792d0074565)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
4; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
5; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1
6; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
7; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
8; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
9; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
10
11define i32 @reduce_i64(i32 %arg) {
12; SSE2-LABEL: 'reduce_i64'
13; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
14; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
15; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
16; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
17; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
18; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
19;
20; SSSE3-LABEL: 'reduce_i64'
21; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
22; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
23; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
24; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
25; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
26; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
27;
28; SSE42-LABEL: 'reduce_i64'
29; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
30; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
31; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
32; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
33; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
34; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
35;
36; AVX1-LABEL: 'reduce_i64'
37; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
38; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
39; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
40; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
41; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
42; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
43;
44; AVX2-LABEL: 'reduce_i64'
45; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
46; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
47; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
48; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
49; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
50; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
51;
52; AVX512F-LABEL: 'reduce_i64'
53; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
54; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
55; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
56; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
57; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
58; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
59;
60; AVX512BW-LABEL: 'reduce_i64'
61; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
62; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
63; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
64; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
65; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
66; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
67;
68; AVX512DQ-LABEL: 'reduce_i64'
69; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
70; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
71; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
72; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
73; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
74; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
75;
76  %V1  = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> undef)
77  %V2  = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
78  %V4  = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
79  %V8  = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
80  %V16 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
81  ret i32 undef
82}
83
84define i32 @reduce_i32(i32 %arg) {
85; SSE2-LABEL: 'reduce_i32'
86; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
87; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
88; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
89; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
90; SSE2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
91; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
92;
93; SSSE3-LABEL: 'reduce_i32'
94; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
95; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
96; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
97; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
98; SSSE3-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
99; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
100;
101; SSE42-LABEL: 'reduce_i32'
102; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
103; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
104; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
105; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
106; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
107; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
108;
109; AVX1-LABEL: 'reduce_i32'
110; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
111; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
112; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
113; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
114; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
115; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
116;
117; AVX2-LABEL: 'reduce_i32'
118; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
119; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
120; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
121; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
122; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
123; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
124;
125; AVX512-LABEL: 'reduce_i32'
126; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
127; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
128; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
129; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
130; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
131; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
132;
133  %V2  = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
134  %V4  = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
135  %V8  = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
136  %V16 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
137  %V32 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> undef)
138  ret i32 undef
139}
140
141define i32 @reduce_i16(i32 %arg) {
142; SSE-LABEL: 'reduce_i16'
143; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
144; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
145; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
146; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
147; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
148; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
149; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
150;
151; AVX1-LABEL: 'reduce_i16'
152; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
153; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
154; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
155; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
156; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
157; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
158; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
159;
160; AVX2-LABEL: 'reduce_i16'
161; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
162; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
163; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
164; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
165; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
166; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
167; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
168;
169; AVX512F-LABEL: 'reduce_i16'
170; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
171; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
172; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
173; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
174; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
175; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
176; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
177;
178; AVX512BW-LABEL: 'reduce_i16'
179; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
180; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
181; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
182; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
183; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
184; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
185; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
186;
187; AVX512DQ-LABEL: 'reduce_i16'
188; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
189; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
190; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
191; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
192; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
193; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
194; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
195;
196  %V2  = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
197  %V4  = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
198  %V8  = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
199  %V16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
200  %V32 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> undef)
201  %V64 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> undef)
202  ret i32 undef
203}
204
205define i32 @reduce_i8(i32 %arg) {
206; SSE-LABEL: 'reduce_i8'
207; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
208; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
209; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
210; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
211; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
212; SSE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
213; SSE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
214; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
215;
216; AVX1-LABEL: 'reduce_i8'
217; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
218; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
219; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
220; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
221; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
222; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
223; AVX1-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
224; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
225;
226; AVX2-LABEL: 'reduce_i8'
227; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
228; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
229; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
230; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
231; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
232; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
233; AVX2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
234; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
235;
236; AVX512F-LABEL: 'reduce_i8'
237; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
238; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
239; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
240; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
241; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
242; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
243; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
244; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
245;
246; AVX512BW-LABEL: 'reduce_i8'
247; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
248; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
249; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
250; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
251; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
252; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
253; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
254; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
255;
256; AVX512DQ-LABEL: 'reduce_i8'
257; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
258; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
259; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
260; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
261; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
262; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
263; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
264; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
265;
266  %V2   = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
267  %V4   = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
268  %V8   = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
269  %V16  = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
270  %V32  = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
271  %V64  = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
272  %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
273  ret i32 undef
274}
275
276declare i64 @llvm.vector.reduce.mul.v1i64(<1 x i64>)
277declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
278declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
279declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>)
280declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>)
281
282declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
283declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
284declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
285declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>)
286declare i32 @llvm.vector.reduce.mul.v32i32(<32 x i32>)
287
288declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
289declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
290declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
291declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
292declare i16 @llvm.vector.reduce.mul.v32i16(<32 x i16>)
293declare i16 @llvm.vector.reduce.mul.v64i16(<64 x i16>)
294
295declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
296declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
297declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
298declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
299declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>)
300declare i8 @llvm.vector.reduce.mul.v64i8(<64 x i8>)
301declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>)
302