xref: /llvm-project/llvm/test/Analysis/CostModel/X86/shuffle-load-latency.ll (revision 2dfe76e989877d3992bf52971f27ad4ae5064a6d)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE
3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2
4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3
5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX
6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX2
7; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX512
8
9; This test checks that the cost of a splat-load shuffle is correctly detected.
10; If there is a combined load+broadcast instruction, like `movddup` it should
11; return 0.
12;
13; TODO: AVX `vbroadcast*` seems to support more types than the
14;       2xdouble type of `movddup`:
15;       - `vbroadcastss` supports 4xfloat, 8xfloat
16;       - `vbroadcastsd` supports 4xdouble
17
18; NOTE: The code in this test is a hack. Since TTI cannot currently detect a
19; proper broadcast pattern from a scalar load (like the one that follows),
20; we use a vector load as the shuffle's operand to trigger the pattern.
21;
22;  %load = load double, double *%ptr
23;  %insert = insertelement <2 x double> poison, double %load, i32 0
24;  %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
25
26define void @shuffle_load() {
27; SSE-LABEL: 'shuffle_load'
28; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
29; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
30; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
31; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
32; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
33; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
34; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
35; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
36; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
37; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
38; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
39; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
40; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
41; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
42; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
43; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
44; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
45; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
46; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
47; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
48; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
49; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
50; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
51; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
52; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
53; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer
54; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32
55; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer
56; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64
57; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer
58; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16
59; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer
60; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32
61; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer
62; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64
63; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer
64; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4
65; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer
66; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8
67; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
68; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16
69; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
70; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32
71; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
72; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64
73; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
74; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8
75; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
76; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16
77; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer
78; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32
79; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer
80; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64
81; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer
82; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
83; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
84; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32
85; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer
86; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64
87; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer
88; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
89;
90; SSE2-LABEL: 'shuffle_load'
91; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
92; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
93; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
94; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
95; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
96; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
97; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
98; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
99; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
100; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
101; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
102; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
103; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
104; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
105; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
106; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
107; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
108; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
109; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
110; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
111; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
112; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
113; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
114; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
115; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
116; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer
117; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32
118; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer
119; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64
120; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer
121; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16
122; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer
123; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32
124; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer
125; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64
126; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer
127; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4
128; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer
129; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8
130; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
131; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16
132; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
133; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32
134; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
135; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64
136; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
137; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8
138; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
139; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16
140; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer
141; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32
142; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer
143; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64
144; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer
145; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
146; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
147; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32
148; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer
149; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64
150; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer
151; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
152;
153; SSE3-LABEL: 'shuffle_load'
154; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
155; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
156; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
157; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
158; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
159; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
160; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
161; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
162; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
163; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
164; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
165; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
166; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
167; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
168; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
169; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
170; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
171; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
172; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
173; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
174; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
175; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
176; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
177; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
178; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
179; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer
180; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32
181; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer
182; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64
183; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer
184; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16
185; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer
186; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32
187; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer
188; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64
189; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer
190; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4
191; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer
192; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8
193; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
194; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16
195; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
196; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32
197; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
198; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64
199; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
200; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8
201; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
202; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16
203; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer
204; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32
205; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer
206; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64
207; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer
208; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
209; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
210; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32
211; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer
212; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64
213; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer
214; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
215;
216; AVX-LABEL: 'shuffle_load'
217; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
218; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
219; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
220; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
221; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
222; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
223; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
224; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
225; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
226; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
227; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
228; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
229; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
230; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
231; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
232; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
233; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
234; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
235; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
236; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
237; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
238; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
239; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
240; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
241; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
242; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer
243; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32
244; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer
245; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64
246; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer
247; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16
248; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer
249; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32
250; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer
251; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64
252; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer
253; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4
254; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer
255; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8
256; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
257; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16
258; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
259; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32
260; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
261; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64
262; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
263; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8
264; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
265; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16
266; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer
267; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32
268; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer
269; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64
270; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer
271; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
272; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
273; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32
274; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer
275; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64
276; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer
277; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
278;
279; AVX2-LABEL: 'shuffle_load'
280; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
281; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
282; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
283; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
284; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
285; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
286; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
287; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
288; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
289; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
290; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
291; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
292; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
293; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
294; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
295; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
296; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
297; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
298; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
299; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
300; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
301; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
302; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
303; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
304; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
305; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer
306; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32
307; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer
308; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64
309; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer
310; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16
311; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer
312; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32
313; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer
314; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64
315; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer
316; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4
317; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer
318; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8
319; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
320; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16
321; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
322; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32
323; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
324; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64
325; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
326; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8
327; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
328; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16
329; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer
330; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32
331; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer
332; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64
333; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer
334; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
335; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
336; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32
337; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer
338; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64
339; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer
340; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
341;
342; AVX512-LABEL: 'shuffle_load'
343; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
344; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
345; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
346; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
347; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
348; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
349; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
350; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
351; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
352; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
353; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
354; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
355; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
356; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
357; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
358; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
359; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
360; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
361; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
362; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
363; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
364; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
365; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
366; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
367; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
368; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer
369; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32
370; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer
371; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64
372; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer
373; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16
374; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer
375; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32
376; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer
377; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64
378; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer
379; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4
380; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer
381; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8
382; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
383; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16
384; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
385; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32
386; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
387; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64
388; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
389; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8
390; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
391; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16
392; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer
393; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32
394; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer
395; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64
396; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer
397; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
398; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
399; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32
400; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer
401; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64
402; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer
403; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
404;
405  %ld_2xi8 = load <2 x i8>, ptr undef
406  %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
407  %ld_4xi8 = load <4 x i8>, ptr undef
408  %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
409  %ld_8xi8 = load <8 x i8>, ptr undef
410  %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
411  %ld_16xi8 = load <16 x i8>, ptr undef
412  %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
413  %ld_32xi8 = load <32 x i8>, ptr undef
414  %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
415  %ld_64xi8 = load <64 x i8>, ptr undef
416  %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
417
418  %ld_2xi16 = load <2 x i16>, ptr undef
419  %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
420  %ld_4xi16 = load <4 x i16>, ptr undef
421  %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
422  %ld_8xi16 = load <8 x i16>, ptr undef
423  %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
424  %ld_16xi16 = load <16 x i16>, ptr undef
425  %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
426  %ld_32xi16 = load <32 x i16>, ptr undef
427  %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
428
429  %ld_2xi32 = load <2 x i32>, ptr undef
430  %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
431  %ld_4xi32 = load <4 x i32>, ptr undef
432  %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer
433  %ld_8xi32 = load <8 x i32>, ptr undef
434  %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer
435  %ld_16xi32 = load <16 x i32>, ptr undef
436  %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer
437
438  %ld_2xi64 = load <2 x i64>, ptr undef
439  %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer
440  %ld_4xi64 = load <4 x i64>, ptr undef
441  %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer
442  %ld_8xi64 = load <8 x i64>, ptr undef
443  %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer
444
445  %ld_2xf16 = load <2 x half>, ptr undef
446  %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer
447  %ld_4xf16 = load <4 x half>, ptr undef
448  %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
449  %ld_8xf16 = load <8 x half>, ptr undef
450  %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
451  %ld_16xf16 = load <16 x half>, ptr undef
452  %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
453  %ld_32xf16 = load <32 x half>, ptr undef
454  %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
455
456  %ld_2xf32 = load <2 x float>, ptr undef
457  %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
458  %ld_4xf32 = load <4 x float>, ptr undef
459  %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer
460  %ld_8xf32 = load <8 x float>, ptr undef
461  %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer
462  %ld_16xf32 = load <16 x float>, ptr undef
463  %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer
464
465  %ld_2xf64 = load <2 x double>, ptr undef
466  %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
467  %ld_4xf64 = load <4 x double>, ptr undef
468  %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer
469  %ld_8xf64 = load <8 x double>, ptr undef
470  %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer
471
472  ret void
473}
474