xref: /llvm-project/llvm/test/Analysis/CostModel/X86/mul32.ll (revision c444af1c20b35555f2fdb2c1ca38d3f23b2faebd)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
3; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
4; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
5; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
6; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
7; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
8; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
9; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512F
10;
11; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
12; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
13; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
14; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512
15
16target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
17target triple = "x86_64-apple-macosx10.8.0"
18
19;
20; mul vXi8 -> mXi32
21;
22
23define void @mul_sext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64)  {
24; SSE2-LABEL: 'mul_sext_vXi8'
25; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
26; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
27; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
28; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
29; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
30; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
31; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
32; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
33; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
34; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
35; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
36; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
37; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
38; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
39; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
40; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
41;
42; SSSE3-LABEL: 'mul_sext_vXi8'
43; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
44; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
45; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
46; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
47; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
48; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
49; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
50; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
51; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
52; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
53; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
54; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
55; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
56; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
57; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
58; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
59;
60; SSE42-LABEL: 'mul_sext_vXi8'
61; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
62; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
63; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
64; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
65; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
66; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
67; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
68; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
69; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
70; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
71; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
72; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
73; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
74; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
75; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
76; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
77;
78; AVX1-LABEL: 'mul_sext_vXi8'
79; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
80; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
81; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
82; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
83; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
84; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
85; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
86; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
87; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
88; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
89; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
90; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
91; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
92; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
93; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
94; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
95;
96; AVX2-LABEL: 'mul_sext_vXi8'
97; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
98; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
99; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
100; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
101; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
102; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
103; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
104; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
105; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
106; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
107; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
108; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
109; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
110; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
111; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
112; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
113;
114; AVX512-LABEL: 'mul_sext_vXi8'
115; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
116; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
117; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
118; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
119; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
120; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
121; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
122; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
123; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
124; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
125; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
126; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
127; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
128; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
129; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
130; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
131;
132; SLM-LABEL: 'mul_sext_vXi8'
133; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
134; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
135; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
136; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
137; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
138; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
139; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
140; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
141; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
142; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
143; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
144; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
145; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
146; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
147; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
148; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
149;
150; GLM-LABEL: 'mul_sext_vXi8'
151; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
152; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
153; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
154; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
155; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
156; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
157; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
158; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
159; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
160; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
161; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
162; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
163; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
164; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
165; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
166; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
167;
168  %xa4 = sext <4 x i8> %a4 to <4 x i32>
169  %xb4 = sext <4 x i8> %b4 to <4 x i32>
170  %xa8 = sext <8 x i8> %a8 to <8 x i32>
171  %xb8 = sext <8 x i8> %b8 to <8 x i32>
172  %xa16 = sext <16 x i8> %a16 to <16 x i32>
173  %xb16 = sext <16 x i8> %b16 to <16 x i32>
174  %xa32 = sext <32 x i8> %a32 to <32 x i32>
175  %xb32 = sext <32 x i8> %b32 to <32 x i32>
176  %xa64 = sext <64 x i8> %a64 to <64 x i32>
177  %xb64 = sext <64 x i8> %b64 to <64 x i32>
178  %res4 = mul <4 x i32> %xa4, %xb4
179  %res8 = mul <8 x i32> %xa8, %xb8
180  %res16 = mul <16 x i32> %xa16, %xb16
181  %res32 = mul <32 x i32> %xa32, %xb32
182  %res64 = mul <64 x i32> %xa64, %xb64
183  ret void
184}
185
186define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64)  {
187; SSE2-LABEL: 'mul_zext_vXi8'
188; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
189; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
190; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
191; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
192; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
193; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
194; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
195; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
196; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
197; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
198; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
199; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
200; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
201; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
202; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
203; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
204;
205; SSSE3-LABEL: 'mul_zext_vXi8'
206; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
207; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
208; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
209; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
210; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
211; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
212; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
213; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
214; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
215; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
216; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
217; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
218; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
219; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
220; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
221; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
222;
223; SSE42-LABEL: 'mul_zext_vXi8'
224; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
225; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
226; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
227; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
228; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
229; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
230; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
231; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
232; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
233; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
234; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
235; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
236; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
237; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
238; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
239; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
240;
241; AVX1-LABEL: 'mul_zext_vXi8'
242; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
243; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
244; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
245; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
246; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
247; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
248; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
249; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
250; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
251; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
252; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
253; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
254; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
255; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
256; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
257; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
258;
259; AVX2-LABEL: 'mul_zext_vXi8'
260; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
261; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
262; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
263; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
264; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
265; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
266; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
267; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
268; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
269; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
270; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
271; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
272; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
273; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
274; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
275; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
276;
277; AVX512F-LABEL: 'mul_zext_vXi8'
278; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
279; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
280; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
281; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
282; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
283; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
284; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
285; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
286; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
287; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
288; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
289; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
290; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
291; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
292; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
293; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
294;
295; AVX512BW-LABEL: 'mul_zext_vXi8'
296; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
297; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
298; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
299; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
300; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
301; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
302; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
303; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
304; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
305; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
306; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
307; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
308; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
309; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
310; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
311; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
312;
313; SLM-LABEL: 'mul_zext_vXi8'
314; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
315; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
316; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
317; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
318; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
319; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
320; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
321; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
322; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
323; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
324; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
325; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
326; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
327; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
328; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
329; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
330;
331; GLM-LABEL: 'mul_zext_vXi8'
332; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
333; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
334; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
335; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
336; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
337; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
338; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
339; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
340; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
341; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
342; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
343; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
344; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
345; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
346; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
347; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
348;
349  %xa4 = zext <4 x i8> %a4 to <4 x i32>
350  %xb4 = zext <4 x i8> %b4 to <4 x i32>
351  %xa8 = zext <8 x i8> %a8 to <8 x i32>
352  %xb8 = zext <8 x i8> %b8 to <8 x i32>
353  %xa16 = zext <16 x i8> %a16 to <16 x i32>
354  %xb16 = zext <16 x i8> %b16 to <16 x i32>
355  %xa32 = zext <32 x i8> %a32 to <32 x i32>
356  %xb32 = zext <32 x i8> %b32 to <32 x i32>
357  %xa64 = zext <64 x i8> %a64 to <64 x i32>
358  %xb64 = zext <64 x i8> %b64 to <64 x i32>
359  %res4 = mul <4 x i32> %xa4, %xb4
360  %res8 = mul <8 x i32> %xa8, %xb8
361  %res16 = mul <16 x i32> %xa16, %xb16
362  %res32 = mul <32 x i32> %xa32, %xb32
363  %res64 = mul <64 x i32> %xa64, %xb64
364  ret void
365}
366
367define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64)  {
368; SSE2-LABEL: 'mul_sext_zext_vXi8'
369; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
370; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
371; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
372; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
373; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
374; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
375; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
376; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
377; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
378; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
379; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
380; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
381; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
382; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
383; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
384; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
385;
386; SSSE3-LABEL: 'mul_sext_zext_vXi8'
387; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
388; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
389; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
390; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
391; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
392; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
393; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
394; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
395; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
396; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
397; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
398; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
399; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
400; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
401; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
402; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
403;
404; SSE42-LABEL: 'mul_sext_zext_vXi8'
405; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
406; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
407; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
408; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
409; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
410; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
411; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
412; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
413; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
414; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
415; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
416; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
417; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
418; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
419; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
420; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
421;
422; AVX1-LABEL: 'mul_sext_zext_vXi8'
423; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
424; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
425; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
426; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
427; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
428; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
429; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
430; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
431; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
432; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
433; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
434; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
435; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
436; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
437; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
438; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
439;
440; AVX2-LABEL: 'mul_sext_zext_vXi8'
441; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
442; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
443; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
444; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
445; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
446; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
447; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
448; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
449; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
450; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
451; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
452; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
453; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
454; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
455; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
456; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
457;
458; AVX512F-LABEL: 'mul_sext_zext_vXi8'
459; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
460; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
461; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
462; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
463; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
464; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
465; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
466; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
467; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
468; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
469; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
470; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
471; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
472; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
473; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
474; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
475;
476; AVX512BW-LABEL: 'mul_sext_zext_vXi8'
477; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
478; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
479; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
480; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
481; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
482; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
483; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
484; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
485; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
486; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
487; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
488; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
489; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
490; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
491; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
492; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
493;
494; SLM-LABEL: 'mul_sext_zext_vXi8'
495; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
496; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
497; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
498; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
499; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
500; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
501; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
502; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
503; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
504; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
505; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
506; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
507; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
508; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
509; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
510; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
511;
512; GLM-LABEL: 'mul_sext_zext_vXi8'
513; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
514; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
515; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
516; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
517; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
518; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
519; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
520; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
521; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
522; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
523; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
524; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
525; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
526; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
527; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
528; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
529;
530  %xa4 = sext <4 x i8> %a4 to <4 x i32>
531  %xb4 = zext <4 x i8> %b4 to <4 x i32>
532  %xa8 = sext <8 x i8> %a8 to <8 x i32>
533  %xb8 = zext <8 x i8> %b8 to <8 x i32>
534  %xa16 = sext <16 x i8> %a16 to <16 x i32>
535  %xb16 = zext <16 x i8> %b16 to <16 x i32>
536  %xa32 = sext <32 x i8> %a32 to <32 x i32>
537  %xb32 = zext <32 x i8> %b32 to <32 x i32>
538  %xa64 = sext <64 x i8> %a64 to <64 x i32>
539  %xb64 = zext <64 x i8> %b64 to <64 x i32>
540  %res4 = mul <4 x i32> %xa4, %xb4
541  %res8 = mul <8 x i32> %xa8, %xb8
542  %res16 = mul <16 x i32> %xa16, %xb16
543  %res32 = mul <32 x i32> %xa32, %xb32
544  %res64 = mul <64 x i32> %xa64, %xb64
545  ret void
546}
547
548;
549; mul vXi16 -> mXi32
550;
551
552define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64)  {
553; SSE2-LABEL: 'mul_sext_vXi16'
554; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
555; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
556; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
557; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
558; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
559; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
560; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
561; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
562; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
563; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
564; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
565; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
566; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
567; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
568; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
569; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
570;
571; SSSE3-LABEL: 'mul_sext_vXi16'
572; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
573; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
574; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
575; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
576; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
577; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
578; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
579; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
580; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
581; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
582; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
583; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
584; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
585; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
586; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
587; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
588;
589; SSE42-LABEL: 'mul_sext_vXi16'
590; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
591; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
592; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
593; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
594; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
595; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
596; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
597; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
598; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
599; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
600; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
601; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
602; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
603; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
604; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
605; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
606;
607; AVX1-LABEL: 'mul_sext_vXi16'
608; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
609; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
610; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
611; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
612; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
613; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
614; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
615; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
616; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
617; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
618; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
619; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
620; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
621; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
622; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
623; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
624;
625; AVX2-LABEL: 'mul_sext_vXi16'
626; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
627; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
628; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
629; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
630; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
631; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
632; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
633; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
634; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
635; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
636; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
637; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
638; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
639; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
640; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
641; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
642;
643; AVX512F-LABEL: 'mul_sext_vXi16'
644; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
645; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
646; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
647; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
648; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
649; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
650; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
651; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
652; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
653; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
654; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
655; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
656; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
657; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
658; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
659; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
660;
661; AVX512BW-LABEL: 'mul_sext_vXi16'
662; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
663; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
664; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
665; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
666; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
667; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
668; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
669; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
670; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
671; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
672; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
673; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
674; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
675; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
676; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
677; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
678;
679; SLM-LABEL: 'mul_sext_vXi16'
680; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
681; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
682; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
683; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
684; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
685; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
686; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
687; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
688; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
689; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
690; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
691; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
692; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
693; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
694; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
695; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
696;
697; GLM-LABEL: 'mul_sext_vXi16'
698; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
699; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
700; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
701; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
702; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
703; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
704; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
705; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
706; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
707; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
708; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
709; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
710; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
711; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
712; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
713; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
714;
715  %xa4 = sext <4 x i16> %a4 to <4 x i32>
716  %xb4 = sext <4 x i16> %b4 to <4 x i32>
717  %xa8 = sext <8 x i16> %a8 to <8 x i32>
718  %xb8 = sext <8 x i16> %b8 to <8 x i32>
719  %xa16 = sext <16 x i16> %a16 to <16 x i32>
720  %xb16 = sext <16 x i16> %b16 to <16 x i32>
721  %xa32 = sext <32 x i16> %a32 to <32 x i32>
722  %xb32 = sext <32 x i16> %b32 to <32 x i32>
723  %xa64 = sext <64 x i16> %a64 to <64 x i32>
724  %xb64 = sext <64 x i16> %b64 to <64 x i32>
725  %res4 = mul <4 x i32> %xa4, %xb4
726  %res8 = mul <8 x i32> %xa8, %xb8
727  %res16 = mul <16 x i32> %xa16, %xb16
728  %res32 = mul <32 x i32> %xa32, %xb32
729  %res64 = mul <64 x i32> %xa64, %xb64
730  ret void
731}
732
733define void @mul_zext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64)  {
734; SSE2-LABEL: 'mul_zext_vXi16'
735; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
736; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
737; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
738; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
739; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
740; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
741; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
742; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
743; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
744; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
745; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
746; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
747; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
748; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
749; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
750; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
751;
752; SSSE3-LABEL: 'mul_zext_vXi16'
753; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
754; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
755; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
756; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
757; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
758; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
759; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
760; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
761; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
762; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
763; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
764; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
765; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
766; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
767; SSSE3-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
768; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
769;
770; SSE42-LABEL: 'mul_zext_vXi16'
771; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
772; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
773; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
774; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
775; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
776; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
777; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
778; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
779; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
780; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
781; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
782; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
783; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
784; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
785; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
786; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
787;
788; AVX1-LABEL: 'mul_zext_vXi16'
789; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
790; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
791; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
792; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
793; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
794; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
795; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
796; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
797; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
798; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
799; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
800; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
801; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
802; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
803; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
804; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
805;
806; AVX2-LABEL: 'mul_zext_vXi16'
807; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
808; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
809; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
810; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
811; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
812; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
813; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
814; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
815; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
816; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
817; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
818; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
819; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
820; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
821; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
822; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
823;
824; AVX512-LABEL: 'mul_zext_vXi16'
825; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
826; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
827; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
828; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
829; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
830; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
831; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
832; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
833; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
834; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
835; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
836; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
837; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
838; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
839; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
840; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
841;
842; SLM-LABEL: 'mul_zext_vXi16'
843; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
844; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
845; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
846; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
847; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
848; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
849; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
850; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
851; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
852; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
853; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
854; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
855; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
856; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
857; SLM-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
858; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
859;
860; GLM-LABEL: 'mul_zext_vXi16'
861; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
862; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
863; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
864; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
865; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
866; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
867; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
868; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
869; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
870; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
871; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
872; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
873; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
874; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
875; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
876; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
877;
878  %xa4 = zext <4 x i16> %a4 to <4 x i32>
879  %xb4 = zext <4 x i16> %b4 to <4 x i32>
880  %xa8 = zext <8 x i16> %a8 to <8 x i32>
881  %xb8 = zext <8 x i16> %b8 to <8 x i32>
882  %xa16 = zext <16 x i16> %a16 to <16 x i32>
883  %xb16 = zext <16 x i16> %b16 to <16 x i32>
884  %xa32 = zext <32 x i16> %a32 to <32 x i32>
885  %xb32 = zext <32 x i16> %b32 to <32 x i32>
886  %xa64 = zext <64 x i16> %a64 to <64 x i32>
887  %xb64 = zext <64 x i16> %b64 to <64 x i32>
888  %res4 = mul <4 x i32> %xa4, %xb4
889  %res8 = mul <8 x i32> %xa8, %xb8
890  %res16 = mul <16 x i32> %xa16, %xb16
891  %res32 = mul <32 x i32> %xa32, %xb32
892  %res64 = mul <64 x i32> %xa64, %xb64
893  ret void
894}
895
896define void @mul_sext_zext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64)  {
897; SSE2-LABEL: 'mul_sext_zext_vXi16'
898; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
899; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
900; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
901; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
902; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
903; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
904; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
905; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
906; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
907; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
908; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
909; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
910; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
911; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
912; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
913; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
914;
915; SSSE3-LABEL: 'mul_sext_zext_vXi16'
916; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
917; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
918; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
919; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
920; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
921; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
922; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
923; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
924; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
925; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
926; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
927; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
928; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
929; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
930; SSSE3-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
931; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
932;
933; SSE42-LABEL: 'mul_sext_zext_vXi16'
934; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
935; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
936; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
937; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
938; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
939; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
940; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
941; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
942; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
943; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
944; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
945; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
946; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
947; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
948; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
949; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
950;
951; AVX1-LABEL: 'mul_sext_zext_vXi16'
952; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
953; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
954; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
955; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
956; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
957; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
958; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
959; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
960; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
961; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
962; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
963; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
964; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
965; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
966; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
967; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
968;
969; AVX2-LABEL: 'mul_sext_zext_vXi16'
970; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
971; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
972; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
973; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
974; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
975; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
976; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
977; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
978; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
979; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
980; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
981; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
982; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
983; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
984; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
985; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
986;
987; AVX512-LABEL: 'mul_sext_zext_vXi16'
988; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
989; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
990; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
991; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
992; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
993; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
994; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
995; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
996; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
997; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
998; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
999; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
1000; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
1001; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
1002; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
1003; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1004;
1005; SLM-LABEL: 'mul_sext_zext_vXi16'
1006; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
1007; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
1008; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
1009; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
1010; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
1011; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
1012; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
1013; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
1014; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
1015; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
1016; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
1017; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
1018; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
1019; SLM-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
1020; SLM-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
1021; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1022;
1023; GLM-LABEL: 'mul_sext_zext_vXi16'
1024; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
1025; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
1026; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
1027; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
1028; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
1029; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
1030; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
1031; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
1032; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
1033; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
1034; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
1035; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
1036; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
1037; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
1038; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
1039; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1040;
1041  %xa4 = sext <4 x i16> %a4 to <4 x i32>
1042  %xb4 = zext <4 x i16> %b4 to <4 x i32>
1043  %xa8 = sext <8 x i16> %a8 to <8 x i32>
1044  %xb8 = zext <8 x i16> %b8 to <8 x i32>
1045  %xa16 = sext <16 x i16> %a16 to <16 x i32>
1046  %xb16 = zext <16 x i16> %b16 to <16 x i32>
1047  %xa32 = sext <32 x i16> %a32 to <32 x i32>
1048  %xb32 = zext <32 x i16> %b32 to <32 x i32>
1049  %xa64 = sext <64 x i16> %a64 to <64 x i32>
1050  %xb64 = zext <64 x i16> %b64 to <64 x i32>
1051  %res4 = mul <4 x i32> %xa4, %xb4
1052  %res8 = mul <8 x i32> %xa8, %xb8
1053  %res16 = mul <16 x i32> %xa16, %xb16
1054  %res32 = mul <32 x i32> %xa32, %xb32
1055  %res64 = mul <64 x i32> %xa64, %xb64
1056  ret void
1057}
1058