xref: /llvm-project/llvm/test/CodeGen/X86/widened-broadcast.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
7
8; Widened shuffle broadcast loads
9
10define <4 x float> @load_splat_4f32_4f32_0101(ptr %ptr) nounwind uwtable readnone ssp {
11; SSE2-LABEL: load_splat_4f32_4f32_0101:
12; SSE2:       # %bb.0: # %entry
13; SSE2-NEXT:    movaps (%rdi), %xmm0
14; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
15; SSE2-NEXT:    retq
16;
17; SSE42-LABEL: load_splat_4f32_4f32_0101:
18; SSE42:       # %bb.0: # %entry
19; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
20; SSE42-NEXT:    retq
21;
22; AVX-LABEL: load_splat_4f32_4f32_0101:
23; AVX:       # %bb.0: # %entry
24; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
25; AVX-NEXT:    retq
26entry:
27  %ld = load <4 x float>, ptr %ptr
28  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
29  ret <4 x float> %ret
30}
31
32define <8 x float> @load_splat_8f32_4f32_01010101(ptr %ptr) nounwind uwtable readnone ssp {
33; SSE2-LABEL: load_splat_8f32_4f32_01010101:
34; SSE2:       # %bb.0: # %entry
35; SSE2-NEXT:    movaps (%rdi), %xmm0
36; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
37; SSE2-NEXT:    movaps %xmm0, %xmm1
38; SSE2-NEXT:    retq
39;
40; SSE42-LABEL: load_splat_8f32_4f32_01010101:
41; SSE42:       # %bb.0: # %entry
42; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
43; SSE42-NEXT:    movapd %xmm0, %xmm1
44; SSE42-NEXT:    retq
45;
46; AVX-LABEL: load_splat_8f32_4f32_01010101:
47; AVX:       # %bb.0: # %entry
48; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
49; AVX-NEXT:    retq
50entry:
51  %ld = load <4 x float>, ptr %ptr
52  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
53  ret <8 x float> %ret
54}
55
56define <8 x float> @load_splat_8f32_8f32_01010101(ptr %ptr) nounwind uwtable readnone ssp {
57; SSE2-LABEL: load_splat_8f32_8f32_01010101:
58; SSE2:       # %bb.0: # %entry
59; SSE2-NEXT:    movaps (%rdi), %xmm0
60; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
61; SSE2-NEXT:    movaps %xmm0, %xmm1
62; SSE2-NEXT:    retq
63;
64; SSE42-LABEL: load_splat_8f32_8f32_01010101:
65; SSE42:       # %bb.0: # %entry
66; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
67; SSE42-NEXT:    movapd %xmm0, %xmm1
68; SSE42-NEXT:    retq
69;
70; AVX-LABEL: load_splat_8f32_8f32_01010101:
71; AVX:       # %bb.0: # %entry
72; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
73; AVX-NEXT:    retq
74entry:
75  %ld = load <8 x float>, ptr %ptr
76  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
77  ret <8 x float> %ret
78}
79
80define <4 x i32> @load_splat_4i32_4i32_0101(ptr %ptr) nounwind uwtable readnone ssp {
81; SSE-LABEL: load_splat_4i32_4i32_0101:
82; SSE:       # %bb.0: # %entry
83; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
84; SSE-NEXT:    retq
85;
86; AVX1-LABEL: load_splat_4i32_4i32_0101:
87; AVX1:       # %bb.0: # %entry
88; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
89; AVX1-NEXT:    retq
90;
91; AVX2-LABEL: load_splat_4i32_4i32_0101:
92; AVX2:       # %bb.0: # %entry
93; AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
94; AVX2-NEXT:    retq
95;
96; AVX512-LABEL: load_splat_4i32_4i32_0101:
97; AVX512:       # %bb.0: # %entry
98; AVX512-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
99; AVX512-NEXT:    retq
100entry:
101  %ld = load <4 x i32>, ptr %ptr
102  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
103  ret <4 x i32> %ret
104}
105
106define <8 x i32> @load_splat_8i32_4i32_01010101(ptr %ptr) nounwind uwtable readnone ssp {
107; SSE-LABEL: load_splat_8i32_4i32_01010101:
108; SSE:       # %bb.0: # %entry
109; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
110; SSE-NEXT:    movdqa %xmm0, %xmm1
111; SSE-NEXT:    retq
112;
113; AVX-LABEL: load_splat_8i32_4i32_01010101:
114; AVX:       # %bb.0: # %entry
115; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
116; AVX-NEXT:    retq
117entry:
118  %ld = load <4 x i32>, ptr %ptr
119  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
120  ret <8 x i32> %ret
121}
122
123define <8 x i32> @load_splat_8i32_8i32_01010101(ptr %ptr) nounwind uwtable readnone ssp {
124; SSE-LABEL: load_splat_8i32_8i32_01010101:
125; SSE:       # %bb.0: # %entry
126; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
127; SSE-NEXT:    movdqa %xmm0, %xmm1
128; SSE-NEXT:    retq
129;
130; AVX-LABEL: load_splat_8i32_8i32_01010101:
131; AVX:       # %bb.0: # %entry
132; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
133; AVX-NEXT:    retq
134entry:
135  %ld = load <8 x i32>, ptr %ptr
136  %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
137  ret <8 x i32> %ret
138}
139
140define <8 x i16> @load_splat_8i16_8i16_01010101(ptr %ptr) nounwind uwtable readnone ssp {
141; SSE-LABEL: load_splat_8i16_8i16_01010101:
142; SSE:       # %bb.0: # %entry
143; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
144; SSE-NEXT:    retq
145;
146; AVX1-LABEL: load_splat_8i16_8i16_01010101:
147; AVX1:       # %bb.0: # %entry
148; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
149; AVX1-NEXT:    retq
150;
151; AVX2-LABEL: load_splat_8i16_8i16_01010101:
152; AVX2:       # %bb.0: # %entry
153; AVX2-NEXT:    vbroadcastss (%rdi), %xmm0
154; AVX2-NEXT:    retq
155;
156; AVX512-LABEL: load_splat_8i16_8i16_01010101:
157; AVX512:       # %bb.0: # %entry
158; AVX512-NEXT:    vbroadcastss (%rdi), %xmm0
159; AVX512-NEXT:    retq
160entry:
161  %ld = load <8 x i16>, ptr %ptr
162  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
163  ret <8 x i16> %ret
164}
165
166define <8 x i16> @load_splat_8i16_8i16_01230123(ptr %ptr) nounwind uwtable readnone ssp {
167; SSE-LABEL: load_splat_8i16_8i16_01230123:
168; SSE:       # %bb.0: # %entry
169; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
170; SSE-NEXT:    retq
171;
172; AVX1-LABEL: load_splat_8i16_8i16_01230123:
173; AVX1:       # %bb.0: # %entry
174; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
175; AVX1-NEXT:    retq
176;
177; AVX2-LABEL: load_splat_8i16_8i16_01230123:
178; AVX2:       # %bb.0: # %entry
179; AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
180; AVX2-NEXT:    retq
181;
182; AVX512-LABEL: load_splat_8i16_8i16_01230123:
183; AVX512:       # %bb.0: # %entry
184; AVX512-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
185; AVX512-NEXT:    retq
186entry:
187  %ld = load <8 x i16>, ptr %ptr
188  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
189  ret <8 x i16> %ret
190}
191
192define <16 x i16> @load_splat_16i16_8i16_0101010101010101(ptr %ptr) nounwind uwtable readnone ssp {
193; SSE-LABEL: load_splat_16i16_8i16_0101010101010101:
194; SSE:       # %bb.0: # %entry
195; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
196; SSE-NEXT:    movdqa %xmm0, %xmm1
197; SSE-NEXT:    retq
198;
199; AVX-LABEL: load_splat_16i16_8i16_0101010101010101:
200; AVX:       # %bb.0: # %entry
201; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
202; AVX-NEXT:    retq
203entry:
204  %ld = load <8 x i16>, ptr %ptr
205  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
206  ret <16 x i16> %ret
207}
208
209define <16 x i16> @load_splat_16i16_8i16_0123012301230123(ptr %ptr) nounwind uwtable readnone ssp {
210; SSE-LABEL: load_splat_16i16_8i16_0123012301230123:
211; SSE:       # %bb.0: # %entry
212; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
213; SSE-NEXT:    movdqa %xmm0, %xmm1
214; SSE-NEXT:    retq
215;
216; AVX-LABEL: load_splat_16i16_8i16_0123012301230123:
217; AVX:       # %bb.0: # %entry
218; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
219; AVX-NEXT:    retq
220entry:
221  %ld = load <8 x i16>, ptr %ptr
222  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
223  ret <16 x i16> %ret
224}
225
226define <16 x i16> @load_splat_16i16_16i16_0101010101010101(ptr %ptr) nounwind uwtable readnone ssp {
227; SSE-LABEL: load_splat_16i16_16i16_0101010101010101:
228; SSE:       # %bb.0: # %entry
229; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
230; SSE-NEXT:    movdqa %xmm0, %xmm1
231; SSE-NEXT:    retq
232;
233; AVX-LABEL: load_splat_16i16_16i16_0101010101010101:
234; AVX:       # %bb.0: # %entry
235; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
236; AVX-NEXT:    retq
237entry:
238  %ld = load <16 x i16>, ptr %ptr
239  %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
240  ret <16 x i16> %ret
241}
242
243define <16 x i16> @load_splat_16i16_16i16_0123012301230123(ptr %ptr) nounwind uwtable readnone ssp {
244; SSE-LABEL: load_splat_16i16_16i16_0123012301230123:
245; SSE:       # %bb.0: # %entry
246; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
247; SSE-NEXT:    movdqa %xmm0, %xmm1
248; SSE-NEXT:    retq
249;
250; AVX-LABEL: load_splat_16i16_16i16_0123012301230123:
251; AVX:       # %bb.0: # %entry
252; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
253; AVX-NEXT:    retq
254entry:
255  %ld = load <16 x i16>, ptr %ptr
256  %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
257  ret <16 x i16> %ret
258}
259
260define <16 x i8> @load_splat_16i8_16i8_0101010101010101(ptr %ptr) nounwind uwtable readnone ssp {
261; SSE-LABEL: load_splat_16i8_16i8_0101010101010101:
262; SSE:       # %bb.0: # %entry
263; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
264; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
265; SSE-NEXT:    retq
266;
267; AVX1-LABEL: load_splat_16i8_16i8_0101010101010101:
268; AVX1:       # %bb.0: # %entry
269; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
270; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
271; AVX1-NEXT:    retq
272;
273; AVX2-LABEL: load_splat_16i8_16i8_0101010101010101:
274; AVX2:       # %bb.0: # %entry
275; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
276; AVX2-NEXT:    retq
277;
278; AVX512-LABEL: load_splat_16i8_16i8_0101010101010101:
279; AVX512:       # %bb.0: # %entry
280; AVX512-NEXT:    vpbroadcastw (%rdi), %xmm0
281; AVX512-NEXT:    retq
282entry:
283  %ld = load <16 x i8>, ptr %ptr
284  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
285  ret <16 x i8> %ret
286}
287
288define <16 x i8> @load_splat_16i8_16i8_0123012301230123(ptr %ptr) nounwind uwtable readnone ssp {
289; SSE-LABEL: load_splat_16i8_16i8_0123012301230123:
290; SSE:       # %bb.0: # %entry
291; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
292; SSE-NEXT:    retq
293;
294; AVX1-LABEL: load_splat_16i8_16i8_0123012301230123:
295; AVX1:       # %bb.0: # %entry
296; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
297; AVX1-NEXT:    retq
298;
299; AVX2-LABEL: load_splat_16i8_16i8_0123012301230123:
300; AVX2:       # %bb.0: # %entry
301; AVX2-NEXT:    vbroadcastss (%rdi), %xmm0
302; AVX2-NEXT:    retq
303;
304; AVX512-LABEL: load_splat_16i8_16i8_0123012301230123:
305; AVX512:       # %bb.0: # %entry
306; AVX512-NEXT:    vbroadcastss (%rdi), %xmm0
307; AVX512-NEXT:    retq
308entry:
309  %ld = load <16 x i8>, ptr %ptr
310  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
311  ret <16 x i8> %ret
312}
313
314define <16 x i8> @load_splat_16i8_16i8_0123456701234567(ptr %ptr) nounwind uwtable readnone ssp {
315; SSE-LABEL: load_splat_16i8_16i8_0123456701234567:
316; SSE:       # %bb.0: # %entry
317; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
318; SSE-NEXT:    retq
319;
320; AVX1-LABEL: load_splat_16i8_16i8_0123456701234567:
321; AVX1:       # %bb.0: # %entry
322; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
323; AVX1-NEXT:    retq
324;
325; AVX2-LABEL: load_splat_16i8_16i8_0123456701234567:
326; AVX2:       # %bb.0: # %entry
327; AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
328; AVX2-NEXT:    retq
329;
330; AVX512-LABEL: load_splat_16i8_16i8_0123456701234567:
331; AVX512:       # %bb.0: # %entry
332; AVX512-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
333; AVX512-NEXT:    retq
334entry:
335  %ld = load <16 x i8>, ptr %ptr
336  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
337  ret <16 x i8> %ret
338}
339
340define <32 x i8> @load_splat_32i8_16i8_01010101010101010101010101010101(ptr %ptr) nounwind uwtable readnone ssp {
341; SSE-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
342; SSE:       # %bb.0: # %entry
343; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
344; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
345; SSE-NEXT:    movdqa %xmm0, %xmm1
346; SSE-NEXT:    retq
347;
348; AVX1-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
349; AVX1:       # %bb.0: # %entry
350; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
351; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
352; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
353; AVX1-NEXT:    retq
354;
355; AVX2-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
356; AVX2:       # %bb.0: # %entry
357; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
358; AVX2-NEXT:    retq
359;
360; AVX512-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
361; AVX512:       # %bb.0: # %entry
362; AVX512-NEXT:    vpbroadcastw (%rdi), %ymm0
363; AVX512-NEXT:    retq
364entry:
365  %ld = load <16 x i8>, ptr %ptr
366  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
367  ret <32 x i8> %ret
368}
369
370define <32 x i8> @load_splat_32i8_16i8_01230123012301230123012301230123(ptr %ptr) nounwind uwtable readnone ssp {
371; SSE-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
372; SSE:       # %bb.0: # %entry
373; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
374; SSE-NEXT:    movdqa %xmm0, %xmm1
375; SSE-NEXT:    retq
376;
377; AVX-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
378; AVX:       # %bb.0: # %entry
379; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
380; AVX-NEXT:    retq
381entry:
382  %ld = load <16 x i8>, ptr %ptr
383  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
384  ret <32 x i8> %ret
385}
386
387define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(ptr %ptr) nounwind uwtable readnone ssp {
388; SSE-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
389; SSE:       # %bb.0: # %entry
390; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
391; SSE-NEXT:    movdqa %xmm0, %xmm1
392; SSE-NEXT:    retq
393;
394; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
395; AVX:       # %bb.0: # %entry
396; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
397; AVX-NEXT:    retq
398entry:
399  %ld = load <16 x i8>, ptr %ptr
400  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
401  ret <32 x i8> %ret
402}
403
404define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(ptr %ptr) nounwind uwtable readnone ssp {
405; SSE-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
406; SSE:       # %bb.0: # %entry
407; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
408; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
409; SSE-NEXT:    movdqa %xmm0, %xmm1
410; SSE-NEXT:    retq
411;
412; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
413; AVX1:       # %bb.0: # %entry
414; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
415; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
416; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
417; AVX1-NEXT:    retq
418;
419; AVX2-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
420; AVX2:       # %bb.0: # %entry
421; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
422; AVX2-NEXT:    retq
423;
424; AVX512-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
425; AVX512:       # %bb.0: # %entry
426; AVX512-NEXT:    vpbroadcastw (%rdi), %ymm0
427; AVX512-NEXT:    retq
428entry:
429  %ld = load <32 x i8>, ptr %ptr
430  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
431  ret <32 x i8> %ret
432}
433
434define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(ptr %ptr) nounwind uwtable readnone ssp {
435; SSE-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
436; SSE:       # %bb.0: # %entry
437; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
438; SSE-NEXT:    movdqa %xmm0, %xmm1
439; SSE-NEXT:    retq
440;
441; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
442; AVX:       # %bb.0: # %entry
443; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
444; AVX-NEXT:    retq
445entry:
446  %ld = load <32 x i8>, ptr %ptr
447  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
448  ret <32 x i8> %ret
449}
450
451define <32 x i8> @load_splat_32i8_32i8_01234567012345670123456701234567(ptr %ptr) nounwind uwtable readnone ssp {
452; SSE-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
453; SSE:       # %bb.0: # %entry
454; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
455; SSE-NEXT:    movdqa %xmm0, %xmm1
456; SSE-NEXT:    retq
457;
458; AVX-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
459; AVX:       # %bb.0: # %entry
460; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
461; AVX-NEXT:    retq
462entry:
463  %ld = load <32 x i8>, ptr %ptr
464  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
465  ret <32 x i8> %ret
466}
467
468define <4 x float> @load_splat_4f32_8f32_0000(ptr %ptr) nounwind uwtable readnone ssp {
469; SSE-LABEL: load_splat_4f32_8f32_0000:
470; SSE:       # %bb.0: # %entry
471; SSE-NEXT:    movaps (%rdi), %xmm0
472; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
473; SSE-NEXT:    retq
474;
475; AVX-LABEL: load_splat_4f32_8f32_0000:
476; AVX:       # %bb.0: # %entry
477; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
478; AVX-NEXT:    retq
479entry:
480  %ld = load <8 x float>, ptr %ptr
481  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer
482  ret <4 x float> %ret
483}
484
485define <8 x float> @load_splat_8f32_16f32_89898989(ptr %ptr) nounwind uwtable readnone ssp {
486; SSE2-LABEL: load_splat_8f32_16f32_89898989:
487; SSE2:       # %bb.0: # %entry
488; SSE2-NEXT:    movaps 32(%rdi), %xmm0
489; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
490; SSE2-NEXT:    movaps %xmm0, %xmm1
491; SSE2-NEXT:    retq
492;
493; SSE42-LABEL: load_splat_8f32_16f32_89898989:
494; SSE42:       # %bb.0: # %entry
495; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
496; SSE42-NEXT:    movapd %xmm0, %xmm1
497; SSE42-NEXT:    retq
498;
499; AVX-LABEL: load_splat_8f32_16f32_89898989:
500; AVX:       # %bb.0: # %entry
501; AVX-NEXT:    vbroadcastsd 32(%rdi), %ymm0
502; AVX-NEXT:    retq
503entry:
504  %ld = load <16 x float>, ptr %ptr
505  %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
506  ret <8 x float> %ret
507}
508
509; PR34394
510define <4 x i32> @load_splat_4i32_2i32_0101(ptr %vp) {
511; SSE-LABEL: load_splat_4i32_2i32_0101:
512; SSE:       # %bb.0:
513; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
514; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
515; SSE-NEXT:    retq
516;
517; AVX-LABEL: load_splat_4i32_2i32_0101:
518; AVX:       # %bb.0:
519; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
520; AVX-NEXT:    retq
521  %vec = load <2 x i32>, ptr %vp
522  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
523  ret <4 x i32> %res
524}
525
526define <8 x i32> @load_splat_8i32_2i32_0101(ptr %vp) {
527; SSE-LABEL: load_splat_8i32_2i32_0101:
528; SSE:       # %bb.0:
529; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
530; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
531; SSE-NEXT:    movdqa %xmm0, %xmm1
532; SSE-NEXT:    retq
533;
534; AVX-LABEL: load_splat_8i32_2i32_0101:
535; AVX:       # %bb.0:
536; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
537; AVX-NEXT:    retq
538  %vec = load <2 x i32>, ptr %vp
539  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
540  ret <8 x i32> %res
541}
542
543define <16 x i32> @load_splat_16i32_2i32_0101(ptr %vp) {
544; SSE-LABEL: load_splat_16i32_2i32_0101:
545; SSE:       # %bb.0:
546; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
547; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
548; SSE-NEXT:    movdqa %xmm0, %xmm1
549; SSE-NEXT:    movdqa %xmm0, %xmm2
550; SSE-NEXT:    movdqa %xmm0, %xmm3
551; SSE-NEXT:    retq
552;
553; AVX1-LABEL: load_splat_16i32_2i32_0101:
554; AVX1:       # %bb.0:
555; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
556; AVX1-NEXT:    vmovaps %ymm0, %ymm1
557; AVX1-NEXT:    retq
558;
559; AVX2-LABEL: load_splat_16i32_2i32_0101:
560; AVX2:       # %bb.0:
561; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
562; AVX2-NEXT:    vmovaps %ymm0, %ymm1
563; AVX2-NEXT:    retq
564;
565; AVX512-LABEL: load_splat_16i32_2i32_0101:
566; AVX512:       # %bb.0:
567; AVX512-NEXT:    vbroadcastsd (%rdi), %zmm0
568; AVX512-NEXT:    retq
569  %vec = load <2 x i32>, ptr %vp
570  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
571  ret <16 x i32> %res
572}
573