xref: /llvm-project/llvm/test/Analysis/CostModel/X86/load-broadcast.ll (revision f89f670d92b5726fc61bf8252a6ecc273086c501)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2
3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3
4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42
5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1
6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2
7; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
8; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
9; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512
10;
11; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42
12; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42
13; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1
14
15;
16; vXf64
17;
18
19define <2 x double> @broadcast_load_v2f64_v2f64(ptr %src) {
20; SSE2-LABEL: 'broadcast_load_v2f64_v2f64'
21; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
22; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
23; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
24;
25; SSSE3-LABEL: 'broadcast_load_v2f64_v2f64'
26; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
27; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
28; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
29;
30; SSE42-LABEL: 'broadcast_load_v2f64_v2f64'
31; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
32; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
33; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
34;
35; AVX-LABEL: 'broadcast_load_v2f64_v2f64'
36; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
37; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
38; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
39;
40  %load = load <2 x double>, ptr %src
41  %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
42  ret <2 x double> %bcst
43}
44
45define <4 x double> @broadcast_load_v2f64_v4f64(ptr %src) {
46; SSE2-LABEL: 'broadcast_load_v2f64_v4f64'
47; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
48; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
49; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
50;
51; SSSE3-LABEL: 'broadcast_load_v2f64_v4f64'
52; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
53; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
54; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
55;
56; SSE42-LABEL: 'broadcast_load_v2f64_v4f64'
57; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
58; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
59; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
60;
61; AVX-LABEL: 'broadcast_load_v2f64_v4f64'
62; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
63; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
64; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
65;
66  %load = load <2 x double>, ptr %src
67  %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
68  ret <4 x double> %bcst
69}
70
71define <8 x double> @broadcast_load_v2f64_v8f64(ptr %src) {
72; SSE2-LABEL: 'broadcast_load_v2f64_v8f64'
73; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
74; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
75; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
76;
77; SSSE3-LABEL: 'broadcast_load_v2f64_v8f64'
78; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
79; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
80; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
81;
82; SSE42-LABEL: 'broadcast_load_v2f64_v8f64'
83; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
84; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
85; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
86;
87; AVX-LABEL: 'broadcast_load_v2f64_v8f64'
88; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
89; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
90; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
91;
92  %load = load <2 x double>, ptr %src
93  %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
94  ret <8 x double> %bcst
95}
96
97define <2 x double> @broadcast_load_f64_v2f64(ptr %src) {
98; SSE-LABEL: 'broadcast_load_f64_v2f64'
99; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
100; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <2 x double> poison, double %load, i32 0
101; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
102; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
103;
104; AVX-LABEL: 'broadcast_load_f64_v2f64'
105; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
106; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <2 x double> poison, double %load, i32 0
107; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
108; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
109;
110  %load = load double, ptr %src
111  %insert = insertelement <2 x double> poison, double %load, i32 0
112  %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
113  ret <2 x double> %bcst
114}
115
116define <4 x double> @broadcast_load_f64_v4f64(ptr %src) {
117; SSE-LABEL: 'broadcast_load_f64_v4f64'
118; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
119; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
120; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
121; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
122;
123; AVX1-LABEL: 'broadcast_load_f64_v4f64'
124; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
125; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
126; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
127; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
128;
129; AVX2-LABEL: 'broadcast_load_f64_v4f64'
130; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
131; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
132; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
133; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
134;
135; AVX512-LABEL: 'broadcast_load_f64_v4f64'
136; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
137; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
138; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
139; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
140;
141  %load = load double, ptr %src
142  %insert = insertelement <4 x double> poison, double %load, i32 0
143  %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
144  ret <4 x double> %bcst
145}
146
147define <8 x double> @broadcast_load_f64_v8f64(ptr %src) {
148; SSE-LABEL: 'broadcast_load_f64_v8f64'
149; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
150; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
151; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
152; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
153;
154; AVX1-LABEL: 'broadcast_load_f64_v8f64'
155; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
156; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
157; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
158; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
159;
160; AVX2-LABEL: 'broadcast_load_f64_v8f64'
161; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
162; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
163; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
164; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
165;
166; AVX512-LABEL: 'broadcast_load_f64_v8f64'
167; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
168; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
169; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
170; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
171;
172  %load = load double, ptr %src
173  %insert = insertelement <8 x double> poison, double %load, i32 0
174  %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
175  ret <8 x double> %bcst
176}
177
178;
179; vXf32
180;
181
182define <4 x float> @broadcast_load_v4f32_v4f32(ptr %src) {
183; SSE-LABEL: 'broadcast_load_v4f32_v4f32'
184; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
185; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
186; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %bcst
187;
188; AVX-LABEL: 'broadcast_load_v4f32_v4f32'
189; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
190; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
191; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %bcst
192;
193  %load = load <4 x float>, ptr %src
194  %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
195  ret <4 x float> %bcst
196}
197
198define <8 x float> @broadcast_load_v4f32_v8f32(ptr %src) {
199; SSE-LABEL: 'broadcast_load_v4f32_v8f32'
200; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
201; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
202; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %bcst
203;
204; AVX-LABEL: 'broadcast_load_v4f32_v8f32'
205; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
206; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
207; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %bcst
208;
209  %load = load <4 x float>, ptr %src
210  %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
211  ret <8 x float> %bcst
212}
213
214define <16 x float> @broadcast_load_v4f32_v16f32(ptr %src) {
215; SSE-LABEL: 'broadcast_load_v4f32_v16f32'
216; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
217; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
218; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %bcst
219;
220; AVX-LABEL: 'broadcast_load_v4f32_v16f32'
221; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
222; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
223; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %bcst
224;
225  %load = load <4 x float>, ptr %src
226  %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
227  ret <16 x float> %bcst
228}
229
230define <2 x i64> @broadcast_load_v2i64_v2i64(ptr %src) {
231; SSE-LABEL: 'broadcast_load_v2i64_v2i64'
232; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
233; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
234; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bcst
235;
236; AVX-LABEL: 'broadcast_load_v2i64_v2i64'
237; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
238; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
239; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bcst
240;
241  %load = load <2 x i64>, ptr %src
242  %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
243  ret <2 x i64> %bcst
244}
245
246;
247; vXi64
248;
249
250define <4 x i64> @broadcast_load_v2i64_v4i64(ptr %src) {
251; SSE-LABEL: 'broadcast_load_v2i64_v4i64'
252; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
253; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
254; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bcst
255;
256; AVX-LABEL: 'broadcast_load_v2i64_v4i64'
257; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
258; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
259; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bcst
260;
261  %load = load <2 x i64>, ptr %src
262  %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
263  ret <4 x i64> %bcst
264}
265
266define <8 x i64> @broadcast_load_v2i64_v8i64(ptr %src) {
267; SSE-LABEL: 'broadcast_load_v2i64_v8i64'
268; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
269; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
270; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bcst
271;
272; AVX-LABEL: 'broadcast_load_v2i64_v8i64'
273; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
274; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
275; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bcst
276;
277  %load = load <2 x i64>, ptr %src
278  %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
279  ret <8 x i64> %bcst
280}
281
282;
283; vXi32
284;
285
286define <4 x i32> @broadcast_load_v4i32_v4i32(ptr %src) {
287; SSE-LABEL: 'broadcast_load_v4i32_v4i32'
288; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
289; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
290; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bcst
291;
292; AVX-LABEL: 'broadcast_load_v4i32_v4i32'
293; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
294; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
295; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bcst
296;
297  %load = load <4 x i32>, ptr %src
298  %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
299  ret <4 x i32> %bcst
300}
301
302define <8 x i32> @broadcast_load_v4i32_v8i32(ptr %src) {
303; SSE-LABEL: 'broadcast_load_v4i32_v8i32'
304; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
305; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
306; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bcst
307;
308; AVX-LABEL: 'broadcast_load_v4i32_v8i32'
309; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
310; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
311; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bcst
312;
313  %load = load <4 x i32>, ptr %src
314  %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
315  ret <8 x i32> %bcst
316}
317
318define <16 x i32> @broadcast_load_v4i32_v16i32(ptr %src) {
319; SSE-LABEL: 'broadcast_load_v4i32_v16i32'
320; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
321; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
322; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bcst
323;
324; AVX-LABEL: 'broadcast_load_v4i32_v16i32'
325; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
326; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
327; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bcst
328;
329  %load = load <4 x i32>, ptr %src
330  %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
331  ret <16 x i32> %bcst
332}
333
334;
335; vXi16
336;
337
338define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) {
339; SSE2-LABEL: 'broadcast_load_v8i16_v8i16'
340; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
341; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
342; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
343;
344; SSSE3-LABEL: 'broadcast_load_v8i16_v8i16'
345; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
346; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
347; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
348;
349; SSE42-LABEL: 'broadcast_load_v8i16_v8i16'
350; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
351; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
352; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
353;
354; AVX1-LABEL: 'broadcast_load_v8i16_v8i16'
355; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
356; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
357; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
358;
359; AVX2-LABEL: 'broadcast_load_v8i16_v8i16'
360; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
361; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
362; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
363;
364; AVX512-LABEL: 'broadcast_load_v8i16_v8i16'
365; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
366; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
367; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
368;
369  %load = load <8 x i16>, ptr %src
370  %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
371  ret <8 x i16> %bcst
372}
373
374define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) {
375; SSE2-LABEL: 'broadcast_load_v8i16_v16i16'
376; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
377; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
378; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
379;
380; SSSE3-LABEL: 'broadcast_load_v8i16_v16i16'
381; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
382; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
383; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
384;
385; SSE42-LABEL: 'broadcast_load_v8i16_v16i16'
386; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
387; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
388; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
389;
390; AVX1-LABEL: 'broadcast_load_v8i16_v16i16'
391; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
392; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
393; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
394;
395; AVX2-LABEL: 'broadcast_load_v8i16_v16i16'
396; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
397; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
398; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
399;
400; AVX512-LABEL: 'broadcast_load_v8i16_v16i16'
401; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
402; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
403; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
404;
405  %load = load <8 x i16>, ptr %src
406  %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
407  ret <16 x i16> %bcst
408}
409
410define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) {
411; SSE2-LABEL: 'broadcast_load_v8i16_v32i16'
412; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
413; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
414; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
415;
416; SSSE3-LABEL: 'broadcast_load_v8i16_v32i16'
417; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
418; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
419; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
420;
421; SSE42-LABEL: 'broadcast_load_v8i16_v32i16'
422; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
423; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
424; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
425;
426; AVX1-LABEL: 'broadcast_load_v8i16_v32i16'
427; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
428; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
429; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
430;
431; AVX2-LABEL: 'broadcast_load_v8i16_v32i16'
432; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
433; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
434; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
435;
436; AVX512-LABEL: 'broadcast_load_v8i16_v32i16'
437; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
438; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
439; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
440;
441  %load = load <8 x i16>, ptr %src
442  %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
443  ret <32 x i16> %bcst
444}
445
446;
447; vXi8
448;
449
450define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) {
451; SSE2-LABEL: 'broadcast_load_v16i8_v16i8'
452; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
453; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
454; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
455;
456; SSSE3-LABEL: 'broadcast_load_v16i8_v16i8'
457; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
458; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
459; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
460;
461; SSE42-LABEL: 'broadcast_load_v16i8_v16i8'
462; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
463; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
464; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
465;
466; AVX1-LABEL: 'broadcast_load_v16i8_v16i8'
467; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
468; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
469; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
470;
471; AVX2-LABEL: 'broadcast_load_v16i8_v16i8'
472; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
473; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
474; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
475;
476; AVX512-LABEL: 'broadcast_load_v16i8_v16i8'
477; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
478; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
479; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
480;
481  %load = load <16 x i8>, ptr %src
482  %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
483  ret <16 x i8> %bcst
484}
485
486define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) {
487; SSE2-LABEL: 'broadcast_load_v16i8_v32i8'
488; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
489; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
490; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
491;
492; SSSE3-LABEL: 'broadcast_load_v16i8_v32i8'
493; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
494; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
495; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
496;
497; SSE42-LABEL: 'broadcast_load_v16i8_v32i8'
498; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
499; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
500; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
501;
502; AVX1-LABEL: 'broadcast_load_v16i8_v32i8'
503; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
504; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
505; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
506;
507; AVX2-LABEL: 'broadcast_load_v16i8_v32i8'
508; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
509; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
510; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
511;
512; AVX512-LABEL: 'broadcast_load_v16i8_v32i8'
513; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
514; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
515; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
516;
517  %load = load <16 x i8>, ptr %src
518  %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
519  ret <32 x i8> %bcst
520}
521
522define <64 x i8> @broadcast_load_v16i8_v64i8(ptr %src) {
523; SSE2-LABEL: 'broadcast_load_v16i8_v64i8'
524; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
525; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
526; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
527;
528; SSSE3-LABEL: 'broadcast_load_v16i8_v64i8'
529; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
530; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
531; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
532;
533; SSE42-LABEL: 'broadcast_load_v16i8_v64i8'
534; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
535; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
536; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
537;
538; AVX1-LABEL: 'broadcast_load_v16i8_v64i8'
539; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
540; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
541; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
542;
543; AVX2-LABEL: 'broadcast_load_v16i8_v64i8'
544; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
545; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
546; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
547;
548; AVX512-LABEL: 'broadcast_load_v16i8_v64i8'
549; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
550; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
551; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
552;
553  %load = load <16 x i8>, ptr %src
554  %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
555  ret <64 x i8> %bcst
556}
557