xref: /llvm-project/llvm/test/CodeGen/X86/avx512-extract-subvector.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
3
4
5define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
6; SKX-LABEL: extract_subvector128_v32i16:
7; SKX:       ## %bb.0:
8; SKX-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
9; SKX-NEXT:    vzeroupper
10; SKX-NEXT:    retq
11  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
12  ret <8 x i16> %r1
13}
14
15define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
16; SKX-LABEL: extract_subvector128_v32i16_first_element:
17; SKX:       ## %bb.0:
18; SKX-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
19; SKX-NEXT:    vzeroupper
20; SKX-NEXT:    retq
21  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22  ret <8 x i16> %r1
23}
24
25define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
26; SKX-LABEL: extract_subvector128_v64i8:
27; SKX:       ## %bb.0:
28; SKX-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
29; SKX-NEXT:    vzeroupper
30; SKX-NEXT:    retq
31  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
32  ret <16 x i8> %r1
33}
34
35define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
36; SKX-LABEL: extract_subvector128_v64i8_first_element:
37; SKX:       ## %bb.0:
38; SKX-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
39; SKX-NEXT:    vzeroupper
40; SKX-NEXT:    retq
41  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
42  ret <16 x i8> %r1
43}
44
45
46define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
47; SKX-LABEL: extract_subvector256_v32i16:
48; SKX:       ## %bb.0:
49; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
50; SKX-NEXT:    retq
51  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
52  ret <16 x i16> %r1
53}
54
55define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
56; SKX-LABEL: extract_subvector256_v64i8:
57; SKX:       ## %bb.0:
58; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
59; SKX-NEXT:    retq
60  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
61  ret <32 x i8> %r1
62}
63
64define void @extract_subvector256_v8f64_store(ptr nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
65; SKX-LABEL: extract_subvector256_v8f64_store:
66; SKX:       ## %bb.0: ## %entry
67; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
68; SKX-NEXT:    vzeroupper
69; SKX-NEXT:    retq
70entry:
71  %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
72  store <2 x double> %0, ptr %addr, align 1
73  ret void
74}
75
76define void @extract_subvector256_v8f32_store(ptr nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
77; SKX-LABEL: extract_subvector256_v8f32_store:
78; SKX:       ## %bb.0: ## %entry
79; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
80; SKX-NEXT:    vzeroupper
81; SKX-NEXT:    retq
82entry:
83  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
84  store <4 x float> %0, ptr %addr, align 1
85  ret void
86}
87
88define void @extract_subvector256_v4i64_store(ptr nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
89; SKX-LABEL: extract_subvector256_v4i64_store:
90; SKX:       ## %bb.0: ## %entry
91; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
92; SKX-NEXT:    vzeroupper
93; SKX-NEXT:    retq
94entry:
95  %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
96  store <2 x i64> %0, ptr %addr, align 1
97  ret void
98}
99
100define void @extract_subvector256_v8i32_store(ptr nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
101; SKX-LABEL: extract_subvector256_v8i32_store:
102; SKX:       ## %bb.0: ## %entry
103; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
104; SKX-NEXT:    vzeroupper
105; SKX-NEXT:    retq
106entry:
107  %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
108  store <4 x i32> %0, ptr %addr, align 1
109  ret void
110}
111
112define void @extract_subvector256_v16i16_store(ptr nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
113; SKX-LABEL: extract_subvector256_v16i16_store:
114; SKX:       ## %bb.0: ## %entry
115; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
116; SKX-NEXT:    vzeroupper
117; SKX-NEXT:    retq
118entry:
119  %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
120  store <8 x i16> %0, ptr %addr, align 1
121  ret void
122}
123
124define void @extract_subvector256_v32i8_store(ptr nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
125; SKX-LABEL: extract_subvector256_v32i8_store:
126; SKX:       ## %bb.0: ## %entry
127; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
128; SKX-NEXT:    vzeroupper
129; SKX-NEXT:    retq
130entry:
131  %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
132  store <16 x i8> %0, ptr %addr, align 1
133  ret void
134}
135
136define void @extract_subvector256_v4f64_store_lo(ptr nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
137; SKX-LABEL: extract_subvector256_v4f64_store_lo:
138; SKX:       ## %bb.0: ## %entry
139; SKX-NEXT:    vmovups %xmm0, (%rdi)
140; SKX-NEXT:    vzeroupper
141; SKX-NEXT:    retq
142entry:
143  %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
144  store <2 x double> %0, ptr %addr, align 1
145  ret void
146}
147
148define void @extract_subvector256_v4f64_store_lo_align_16(ptr nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
149; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16:
150; SKX:       ## %bb.0: ## %entry
151; SKX-NEXT:    vmovaps %xmm0, (%rdi)
152; SKX-NEXT:    vzeroupper
153; SKX-NEXT:    retq
154entry:
155  %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
156  store <2 x double> %0, ptr %addr, align 16
157  ret void
158}
159
160define void @extract_subvector256_v4f32_store_lo(ptr nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
161; SKX-LABEL: extract_subvector256_v4f32_store_lo:
162; SKX:       ## %bb.0: ## %entry
163; SKX-NEXT:    vmovups %xmm0, (%rdi)
164; SKX-NEXT:    vzeroupper
165; SKX-NEXT:    retq
166entry:
167  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168  store <4 x float> %0, ptr %addr, align 1
169  ret void
170}
171
172define void @extract_subvector256_v4f32_store_lo_align_16(ptr nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
173; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16:
174; SKX:       ## %bb.0: ## %entry
175; SKX-NEXT:    vmovaps %xmm0, (%rdi)
176; SKX-NEXT:    vzeroupper
177; SKX-NEXT:    retq
178entry:
179  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
180  store <4 x float> %0, ptr %addr, align 16
181  ret void
182}
183
184define void @extract_subvector256_v2i64_store_lo(ptr nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
185; SKX-LABEL: extract_subvector256_v2i64_store_lo:
186; SKX:       ## %bb.0: ## %entry
187; SKX-NEXT:    vmovups %xmm0, (%rdi)
188; SKX-NEXT:    vzeroupper
189; SKX-NEXT:    retq
190entry:
191  %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
192  store <2 x i64> %0, ptr %addr, align 1
193  ret void
194}
195
196define void @extract_subvector256_v2i64_store_lo_align_16(ptr nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
197; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16:
198; SKX:       ## %bb.0: ## %entry
199; SKX-NEXT:    vmovaps %xmm0, (%rdi)
200; SKX-NEXT:    vzeroupper
201; SKX-NEXT:    retq
202entry:
203  %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
204  store <2 x i64> %0, ptr %addr, align 16
205  ret void
206}
207
208define void @extract_subvector256_v4i32_store_lo(ptr nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
209; SKX-LABEL: extract_subvector256_v4i32_store_lo:
210; SKX:       ## %bb.0: ## %entry
211; SKX-NEXT:    vmovups %xmm0, (%rdi)
212; SKX-NEXT:    vzeroupper
213; SKX-NEXT:    retq
214entry:
215  %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
216  store <4 x i32> %0, ptr %addr, align 1
217  ret void
218}
219
220define void @extract_subvector256_v4i32_store_lo_align_16(ptr nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
221; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16:
222; SKX:       ## %bb.0: ## %entry
223; SKX-NEXT:    vmovaps %xmm0, (%rdi)
224; SKX-NEXT:    vzeroupper
225; SKX-NEXT:    retq
226entry:
227  %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
228  store <4 x i32> %0, ptr %addr, align 16
229  ret void
230}
231
232define void @extract_subvector256_v8i16_store_lo(ptr nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
233; SKX-LABEL: extract_subvector256_v8i16_store_lo:
234; SKX:       ## %bb.0: ## %entry
235; SKX-NEXT:    vmovups %xmm0, (%rdi)
236; SKX-NEXT:    vzeroupper
237; SKX-NEXT:    retq
238entry:
239  %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
240  store <8 x i16> %0, ptr %addr, align 1
241  ret void
242}
243
244define void @extract_subvector256_v8i16_store_lo_align_16(ptr nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
245; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16:
246; SKX:       ## %bb.0: ## %entry
247; SKX-NEXT:    vmovaps %xmm0, (%rdi)
248; SKX-NEXT:    vzeroupper
249; SKX-NEXT:    retq
250entry:
251  %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
252  store <8 x i16> %0, ptr %addr, align 16
253  ret void
254}
255
256define void @extract_subvector256_v16i8_store_lo(ptr nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
257; SKX-LABEL: extract_subvector256_v16i8_store_lo:
258; SKX:       ## %bb.0: ## %entry
259; SKX-NEXT:    vmovups %xmm0, (%rdi)
260; SKX-NEXT:    vzeroupper
261; SKX-NEXT:    retq
262entry:
263  %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
264  store <16 x i8> %0, ptr %addr, align 1
265  ret void
266}
267
268define void @extract_subvector256_v16i8_store_lo_align_16(ptr nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
269; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16:
270; SKX:       ## %bb.0: ## %entry
271; SKX-NEXT:    vmovaps %xmm0, (%rdi)
272; SKX-NEXT:    vzeroupper
273; SKX-NEXT:    retq
274entry:
275  %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
276  store <16 x i8> %0, ptr %addr, align 16
277  ret void
278}
279
280define void @extract_subvector512_v2f64_store_lo(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
281; SKX-LABEL: extract_subvector512_v2f64_store_lo:
282; SKX:       ## %bb.0: ## %entry
283; SKX-NEXT:    vmovups %xmm0, (%rdi)
284; SKX-NEXT:    vzeroupper
285; SKX-NEXT:    retq
286entry:
287  %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
288  store <2 x double> %0, ptr %addr, align 1
289  ret void
290}
291
292define void @extract_subvector512_v2f64_store_lo_align_16(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
293; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16:
294; SKX:       ## %bb.0: ## %entry
295; SKX-NEXT:    vmovaps %xmm0, (%rdi)
296; SKX-NEXT:    vzeroupper
297; SKX-NEXT:    retq
298entry:
299  %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
300  store <2 x double> %0, ptr %addr, align 16
301  ret void
302}
303
304define void @extract_subvector512_v4f32_store_lo(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
305; SKX-LABEL: extract_subvector512_v4f32_store_lo:
306; SKX:       ## %bb.0: ## %entry
307; SKX-NEXT:    vmovups %xmm0, (%rdi)
308; SKX-NEXT:    vzeroupper
309; SKX-NEXT:    retq
310entry:
311  %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312  store <4 x float> %0, ptr %addr, align 1
313  ret void
314}
315
316define void @extract_subvector512_v4f32_store_lo_align_16(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
317; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16:
318; SKX:       ## %bb.0: ## %entry
319; SKX-NEXT:    vmovaps %xmm0, (%rdi)
320; SKX-NEXT:    vzeroupper
321; SKX-NEXT:    retq
322entry:
323  %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
324  store <4 x float> %0, ptr %addr, align 16
325  ret void
326}
327
328define void @extract_subvector512_v2i64_store_lo(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
329; SKX-LABEL: extract_subvector512_v2i64_store_lo:
330; SKX:       ## %bb.0: ## %entry
331; SKX-NEXT:    vmovups %xmm0, (%rdi)
332; SKX-NEXT:    vzeroupper
333; SKX-NEXT:    retq
334entry:
335  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
336  store <2 x i64> %0, ptr %addr, align 1
337  ret void
338}
339
340define void @extract_subvector512_v2i64_store_lo_align_16(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
341; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16:
342; SKX:       ## %bb.0: ## %entry
343; SKX-NEXT:    vmovaps %xmm0, (%rdi)
344; SKX-NEXT:    vzeroupper
345; SKX-NEXT:    retq
346entry:
347  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
348  store <2 x i64> %0, ptr %addr, align 16
349  ret void
350}
351
352define void @extract_subvector512_v4i32_store_lo(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
353; SKX-LABEL: extract_subvector512_v4i32_store_lo:
354; SKX:       ## %bb.0: ## %entry
355; SKX-NEXT:    vmovups %xmm0, (%rdi)
356; SKX-NEXT:    vzeroupper
357; SKX-NEXT:    retq
358entry:
359  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
360  store <4 x i32> %0, ptr %addr, align 1
361  ret void
362}
363
364define void @extract_subvector512_v4i32_store_lo_align_16(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
365; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16:
366; SKX:       ## %bb.0: ## %entry
367; SKX-NEXT:    vmovaps %xmm0, (%rdi)
368; SKX-NEXT:    vzeroupper
369; SKX-NEXT:    retq
370entry:
371  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
372  store <4 x i32> %0, ptr %addr, align 16
373  ret void
374}
375
376define void @extract_subvector512_v8i16_store_lo(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
377; SKX-LABEL: extract_subvector512_v8i16_store_lo:
378; SKX:       ## %bb.0: ## %entry
379; SKX-NEXT:    vmovups %xmm0, (%rdi)
380; SKX-NEXT:    vzeroupper
381; SKX-NEXT:    retq
382entry:
383  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
384  store <8 x i16> %0, ptr %addr, align 1
385  ret void
386}
387
388define void @extract_subvector512_v16i8_store_lo(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
389; SKX-LABEL: extract_subvector512_v16i8_store_lo:
390; SKX:       ## %bb.0: ## %entry
391; SKX-NEXT:    vmovups %xmm0, (%rdi)
392; SKX-NEXT:    vzeroupper
393; SKX-NEXT:    retq
394entry:
395  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
396  store <16 x i8> %0, ptr %addr, align 1
397  ret void
398}
399
400define void @extract_subvector512_v16i8_store_lo_align_16(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
401; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16:
402; SKX:       ## %bb.0: ## %entry
403; SKX-NEXT:    vmovaps %xmm0, (%rdi)
404; SKX-NEXT:    vzeroupper
405; SKX-NEXT:    retq
406entry:
407  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
408  store <16 x i8> %0, ptr %addr, align 16
409  ret void
410}
411
412define void @extract_subvector512_v4f64_store_lo(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
413; SKX-LABEL: extract_subvector512_v4f64_store_lo:
414; SKX:       ## %bb.0: ## %entry
415; SKX-NEXT:    vmovups %ymm0, (%rdi)
416; SKX-NEXT:    vzeroupper
417; SKX-NEXT:    retq
418entry:
419  %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
420  store <4 x double> %0, ptr %addr, align 1
421  ret void
422}
423
424define void @extract_subvector512_v4f64_store_lo_align_16(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
425; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16:
426; SKX:       ## %bb.0: ## %entry
427; SKX-NEXT:    vmovups %ymm0, (%rdi)
428; SKX-NEXT:    vzeroupper
429; SKX-NEXT:    retq
430entry:
431  %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
432  store <4 x double> %0, ptr %addr, align 16
433  ret void
434}
435
436define void @extract_subvector512_v4f64_store_lo_align_32(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
437; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32:
438; SKX:       ## %bb.0: ## %entry
439; SKX-NEXT:    vmovaps %ymm0, (%rdi)
440; SKX-NEXT:    vzeroupper
441; SKX-NEXT:    retq
442entry:
443  %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
444  store <4 x double> %0, ptr %addr, align 32
445  ret void
446}
447
448define void @extract_subvector512_v8f32_store_lo(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
449; SKX-LABEL: extract_subvector512_v8f32_store_lo:
450; SKX:       ## %bb.0: ## %entry
451; SKX-NEXT:    vmovups %ymm0, (%rdi)
452; SKX-NEXT:    vzeroupper
453; SKX-NEXT:    retq
454entry:
455  %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
456  store <8 x float> %0, ptr %addr, align 1
457  ret void
458}
459
460define void @extract_subvector512_v8f32_store_lo_align_16(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
461; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16:
462; SKX:       ## %bb.0: ## %entry
463; SKX-NEXT:    vmovups %ymm0, (%rdi)
464; SKX-NEXT:    vzeroupper
465; SKX-NEXT:    retq
466entry:
467  %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
468  store <8 x float> %0, ptr %addr, align 16
469  ret void
470}
471
472define void @extract_subvector512_v8f32_store_lo_align_32(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
473; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32:
474; SKX:       ## %bb.0: ## %entry
475; SKX-NEXT:    vmovaps %ymm0, (%rdi)
476; SKX-NEXT:    vzeroupper
477; SKX-NEXT:    retq
478entry:
479  %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
480  store <8 x float> %0, ptr %addr, align 32
481  ret void
482}
483
484define void @extract_subvector512_v4i64_store_lo(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
485; SKX-LABEL: extract_subvector512_v4i64_store_lo:
486; SKX:       ## %bb.0: ## %entry
487; SKX-NEXT:    vmovups %ymm0, (%rdi)
488; SKX-NEXT:    vzeroupper
489; SKX-NEXT:    retq
490entry:
491  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
492  store <4 x i64> %0, ptr %addr, align 1
493  ret void
494}
495
496define void @extract_subvector512_v4i64_store_lo_align_16(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
497; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16:
498; SKX:       ## %bb.0: ## %entry
499; SKX-NEXT:    vmovups %ymm0, (%rdi)
500; SKX-NEXT:    vzeroupper
501; SKX-NEXT:    retq
502entry:
503  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
504  store <4 x i64> %0, ptr %addr, align 16
505  ret void
506}
507
508define void @extract_subvector512_v4i64_store_lo_align_32(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
509; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32:
510; SKX:       ## %bb.0: ## %entry
511; SKX-NEXT:    vmovaps %ymm0, (%rdi)
512; SKX-NEXT:    vzeroupper
513; SKX-NEXT:    retq
514entry:
515  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
516  store <4 x i64> %0, ptr %addr, align 32
517  ret void
518}
519
520define void @extract_subvector512_v8i32_store_lo(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
521; SKX-LABEL: extract_subvector512_v8i32_store_lo:
522; SKX:       ## %bb.0: ## %entry
523; SKX-NEXT:    vmovups %ymm0, (%rdi)
524; SKX-NEXT:    vzeroupper
525; SKX-NEXT:    retq
526entry:
527  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
528  store <8 x i32> %0, ptr %addr, align 1
529  ret void
530}
531
532define void @extract_subvector512_v8i32_store_lo_align_16(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
533; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16:
534; SKX:       ## %bb.0: ## %entry
535; SKX-NEXT:    vmovups %ymm0, (%rdi)
536; SKX-NEXT:    vzeroupper
537; SKX-NEXT:    retq
538entry:
539  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
540  store <8 x i32> %0, ptr %addr, align 16
541  ret void
542}
543
544define void @extract_subvector512_v8i32_store_lo_align_32(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
545; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32:
546; SKX:       ## %bb.0: ## %entry
547; SKX-NEXT:    vmovaps %ymm0, (%rdi)
548; SKX-NEXT:    vzeroupper
549; SKX-NEXT:    retq
550entry:
551  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
552  store <8 x i32> %0, ptr %addr, align 32
553  ret void
554}
555
556define void @extract_subvector512_v16i16_store_lo(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
557; SKX-LABEL: extract_subvector512_v16i16_store_lo:
558; SKX:       ## %bb.0: ## %entry
559; SKX-NEXT:    vmovups %ymm0, (%rdi)
560; SKX-NEXT:    vzeroupper
561; SKX-NEXT:    retq
562entry:
563  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
564  store <16 x i16> %0, ptr %addr, align 1
565  ret void
566}
567
568define void @extract_subvector512_v16i16_store_lo_align_16(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
569; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16:
570; SKX:       ## %bb.0: ## %entry
571; SKX-NEXT:    vmovups %ymm0, (%rdi)
572; SKX-NEXT:    vzeroupper
573; SKX-NEXT:    retq
574entry:
575  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
576  store <16 x i16> %0, ptr %addr, align 16
577  ret void
578}
579
580define void @extract_subvector512_v16i16_store_lo_align_32(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
581; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32:
582; SKX:       ## %bb.0: ## %entry
583; SKX-NEXT:    vmovaps %ymm0, (%rdi)
584; SKX-NEXT:    vzeroupper
585; SKX-NEXT:    retq
586entry:
587  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
588  store <16 x i16> %0, ptr %addr, align 32
589  ret void
590}
591
592define void @extract_subvector512_v32i8_store_lo(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
593; SKX-LABEL: extract_subvector512_v32i8_store_lo:
594; SKX:       ## %bb.0: ## %entry
595; SKX-NEXT:    vmovups %ymm0, (%rdi)
596; SKX-NEXT:    vzeroupper
597; SKX-NEXT:    retq
598entry:
599  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
600  store <32 x i8> %0, ptr %addr, align 1
601  ret void
602}
603
604define void @extract_subvector512_v32i8_store_lo_align_16(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
605; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16:
606; SKX:       ## %bb.0: ## %entry
607; SKX-NEXT:    vmovups %ymm0, (%rdi)
608; SKX-NEXT:    vzeroupper
609; SKX-NEXT:    retq
610entry:
611  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
612  store <32 x i8> %0, ptr %addr, align 16
613  ret void
614}
615
616define void @extract_subvector512_v32i8_store_lo_align_32(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
617; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32:
618; SKX:       ## %bb.0: ## %entry
619; SKX-NEXT:    vmovaps %ymm0, (%rdi)
620; SKX-NEXT:    vzeroupper
621; SKX-NEXT:    retq
622entry:
623  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
624  store <32 x i8> %0, ptr %addr, align 32
625  ret void
626}
627
628define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) {
629; SKX-LABEL: test_mm512_mask_extractf64x4_pd:
630; SKX:       ## %bb.0: ## %entry
631; SKX-NEXT:    kmovd %edi, %k1
632; SKX-NEXT:    vextractf64x4 $1, %zmm1, %ymm0 {%k1}
633; SKX-NEXT:    retq
634entry:
635  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
636  %0 = bitcast i8 %__U to <8 x i1>
637  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
638  %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W
639  ret <4 x double> %1
640}
641
642define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) {
643; SKX-LABEL: test_mm512_maskz_extractf64x4_pd:
644; SKX:       ## %bb.0: ## %entry
645; SKX-NEXT:    kmovd %edi, %k1
646; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
647; SKX-NEXT:    retq
648entry:
649  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
650  %0 = bitcast i8 %__U to <8 x i1>
651  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
652  %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer
653  ret <4 x double> %1
654}
655
656define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) {
657; SKX-LABEL: test_mm512_mask_extractf32x4_ps:
658; SKX:       ## %bb.0: ## %entry
659; SKX-NEXT:    kmovd %edi, %k1
660; SKX-NEXT:    vextractf32x4 $1, %zmm1, %xmm0 {%k1}
661; SKX-NEXT:    vzeroupper
662; SKX-NEXT:    retq
663entry:
664  %0 = bitcast <8 x double> %__A to <16 x float>
665  %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
666  %1 = bitcast i8 %__U to <8 x i1>
667  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
668  %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
669  ret <4 x float> %2
670}
671
672define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) {
673; SKX-LABEL: test_mm512_maskz_extractf32x4_ps:
674; SKX:       ## %bb.0: ## %entry
675; SKX-NEXT:    kmovd %edi, %k1
676; SKX-NEXT:    vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
677; SKX-NEXT:    vzeroupper
678; SKX-NEXT:    retq
679entry:
680  %0 = bitcast <8 x double> %__A to <16 x float>
681  %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
682  %1 = bitcast i8 %__U to <8 x i1>
683  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
684  %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
685  ret <4 x float> %2
686}
687
688define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) {
689; SKX-LABEL: test_mm256_mask_extractf64x2_pd:
690; SKX:       ## %bb.0: ## %entry
691; SKX-NEXT:    kmovd %edi, %k1
692; SKX-NEXT:    vextractf64x2 $1, %ymm1, %xmm0 {%k1}
693; SKX-NEXT:    vzeroupper
694; SKX-NEXT:    retq
695entry:
696  %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
697  %0 = bitcast i8 %__U to <8 x i1>
698  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
699  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
700  ret <2 x double> %1
701}
702
703define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) {
704; SKX-LABEL: test_mm256_maskz_extractf64x2_pd:
705; SKX:       ## %bb.0: ## %entry
706; SKX-NEXT:    kmovd %edi, %k1
707; SKX-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
708; SKX-NEXT:    vzeroupper
709; SKX-NEXT:    retq
710entry:
711  %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
712  %0 = bitcast i8 %__U to <8 x i1>
713  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
714  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
715  ret <2 x double> %1
716}
717
718define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
719; SKX-LABEL: test_mm256_mask_extracti64x2_epi64:
720; SKX:       ## %bb.0: ## %entry
721; SKX-NEXT:    kmovd %edi, %k1
722; SKX-NEXT:    vextracti64x2 $1, %ymm1, %xmm0 {%k1}
723; SKX-NEXT:    vzeroupper
724; SKX-NEXT:    retq
725entry:
726  %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
727  %0 = bitcast i8 %__U to <8 x i1>
728  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
729  %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W
730  ret <2 x i64> %1
731}
732
733define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) {
734; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64:
735; SKX:       ## %bb.0: ## %entry
736; SKX-NEXT:    kmovd %edi, %k1
737; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
738; SKX-NEXT:    vzeroupper
739; SKX-NEXT:    retq
740entry:
741  %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
742  %0 = bitcast i8 %__U to <8 x i1>
743  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
744  %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
745  ret <2 x i64> %1
746}
747
748define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) {
749; SKX-LABEL: test_mm256_mask_extractf32x4_ps:
750; SKX:       ## %bb.0: ## %entry
751; SKX-NEXT:    kmovd %edi, %k1
752; SKX-NEXT:    vextractf32x4 $1, %ymm1, %xmm0 {%k1}
753; SKX-NEXT:    vzeroupper
754; SKX-NEXT:    retq
755entry:
756  %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
757  %0 = bitcast i8 %__U to <8 x i1>
758  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
759  %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
760  ret <4 x float> %1
761}
762
763define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) {
764; SKX-LABEL: test_mm256_maskz_extractf32x4_ps:
765; SKX:       ## %bb.0: ## %entry
766; SKX-NEXT:    kmovd %edi, %k1
767; SKX-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
768; SKX-NEXT:    vzeroupper
769; SKX-NEXT:    retq
770entry:
771  %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
772  %0 = bitcast i8 %__U to <8 x i1>
773  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
774  %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
775  ret <4 x float> %1
776}
777
778define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
779; SKX-LABEL: test_mm256_mask_extracti32x4_epi32:
780; SKX:       ## %bb.0: ## %entry
781; SKX-NEXT:    kmovd %edi, %k1
782; SKX-NEXT:    vextracti32x4 $1, %ymm1, %xmm0 {%k1}
783; SKX-NEXT:    vzeroupper
784; SKX-NEXT:    retq
785entry:
786  %0 = bitcast <4 x i64> %__A to <8 x i32>
787  %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
788  %1 = bitcast <2 x i64> %__W to <4 x i32>
789  %2 = bitcast i8 %__U to <8 x i1>
790  %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
791  %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1
792  %4 = bitcast <4 x i32> %3 to <2 x i64>
793  ret <2 x i64> %4
794}
795
796define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) {
797; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32:
798; SKX:       ## %bb.0: ## %entry
799; SKX-NEXT:    kmovd %edi, %k1
800; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
801; SKX-NEXT:    vzeroupper
802; SKX-NEXT:    retq
803entry:
804  %0 = bitcast <4 x i64> %__A to <8 x i32>
805  %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
806  %1 = bitcast i8 %__U to <8 x i1>
807  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
808  %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
809  %3 = bitcast <4 x i32> %2 to <2 x i64>
810  ret <2 x i64> %3
811}
812
813define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) {
814; SKX-LABEL: test_mm512_mask_extractf32x8_ps:
815; SKX:       ## %bb.0: ## %entry
816; SKX-NEXT:    kmovd %edi, %k1
817; SKX-NEXT:    vextractf32x8 $1, %zmm1, %ymm0 {%k1}
818; SKX-NEXT:    retq
819entry:
820  %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
821  %0 = bitcast i8 %__U to <8 x i1>
822  %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W
823  ret <8 x float> %1
824}
825
826define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) {
827; SKX-LABEL: test_mm512_maskz_extractf32x8_ps:
828; SKX:       ## %bb.0: ## %entry
829; SKX-NEXT:    kmovd %edi, %k1
830; SKX-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
831; SKX-NEXT:    retq
832entry:
833  %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
834  %0 = bitcast i8 %__U to <8 x i1>
835  %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer
836  ret <8 x float> %1
837}
838
839define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) {
840; SKX-LABEL: test_mm512_mask_extractf64x2_pd:
841; SKX:       ## %bb.0: ## %entry
842; SKX-NEXT:    kmovd %edi, %k1
843; SKX-NEXT:    vextractf64x2 $3, %zmm1, %xmm0 {%k1}
844; SKX-NEXT:    vzeroupper
845; SKX-NEXT:    retq
846entry:
847  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
848  %0 = bitcast i8 %__U to <8 x i1>
849  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
850  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
851  ret <2 x double> %1
852}
853
854define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) {
855; SKX-LABEL: test_mm512_maskz_extractf64x2_pd:
856; SKX:       ## %bb.0: ## %entry
857; SKX-NEXT:    kmovd %edi, %k1
858; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z}
859; SKX-NEXT:    vzeroupper
860; SKX-NEXT:    retq
861entry:
862  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
863  %0 = bitcast i8 %__U to <8 x i1>
864  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
865  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
866  ret <2 x double> %1
867}
868