xref: /llvm-project/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle %s -o - | FileCheck %s
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq,+fast-variable-perlane-shuffle %s -o - | FileCheck %s
4
5define <4 x i32> @test_2xi32_to_4xi32(<4 x i32> %vec) {
6; CHECK-LABEL: test_2xi32_to_4xi32:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
9; CHECK-NEXT:    retq
10  %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
11  ret <4 x i32> %res
12}
13define <4 x i32> @test_masked_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
14; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask0:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
17; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
18; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
19; CHECK-NEXT:    retq
20  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
21  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
22  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
23  ret <4 x i32> %res
24}
25
26define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %mask) {
27; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask0:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
30; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
31; CHECK-NEXT:    retq
32  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
33  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
34  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
35  ret <4 x i32> %res
36}
37define <4 x i32> @test_masked_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
38; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask1:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
41; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
42; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
43; CHECK-NEXT:    retq
44  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
45  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
46  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
47  ret <4 x i32> %res
48}
49
50define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %mask) {
51; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask1:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
54; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
55; CHECK-NEXT:    retq
56  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
57  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
58  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
59  ret <4 x i32> %res
60}
61define <4 x i32> @test_masked_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
62; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask2:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
65; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
66; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
67; CHECK-NEXT:    retq
68  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
69  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
70  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
71  ret <4 x i32> %res
72}
73
74define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %mask) {
75; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask2:
76; CHECK:       # %bb.0:
77; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
78; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
79; CHECK-NEXT:    retq
80  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
81  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
82  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
83  ret <4 x i32> %res
84}
85define <4 x i32> @test_masked_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
86; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask3:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
89; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
90; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
91; CHECK-NEXT:    retq
92  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
93  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
94  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
95  ret <4 x i32> %res
96}
97
98define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %mask) {
99; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask3:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
102; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
103; CHECK-NEXT:    retq
104  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
105  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
106  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
107  ret <4 x i32> %res
108}
109define <8 x i32> @test_2xi32_to_8xi32(<8 x i32> %vec) {
110; CHECK-LABEL: test_2xi32_to_8xi32:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
113; CHECK-NEXT:    retq
114  %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
115  ret <8 x i32> %res
116}
117define <8 x i32> @test_masked_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
118; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask0:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
121; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
122; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
123; CHECK-NEXT:    retq
124  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
125  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
126  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
127  ret <8 x i32> %res
128}
129
130define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %mask) {
131; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask0:
132; CHECK:       # %bb.0:
133; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
134; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
135; CHECK-NEXT:    retq
136  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
137  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
138  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
139  ret <8 x i32> %res
140}
141define <8 x i32> @test_masked_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
142; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask1:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
145; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
146; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
147; CHECK-NEXT:    retq
148  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
149  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
150  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
151  ret <8 x i32> %res
152}
153
154define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %mask) {
155; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask1:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
158; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
159; CHECK-NEXT:    retq
160  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
161  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
162  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
163  ret <8 x i32> %res
164}
165define <8 x i32> @test_masked_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
166; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask2:
167; CHECK:       # %bb.0:
168; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
169; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
170; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
171; CHECK-NEXT:    retq
172  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
173  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
174  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
175  ret <8 x i32> %res
176}
177
178define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %mask) {
179; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask2:
180; CHECK:       # %bb.0:
181; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
182; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
183; CHECK-NEXT:    retq
184  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
185  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
186  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
187  ret <8 x i32> %res
188}
189define <8 x i32> @test_masked_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
190; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask3:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
193; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
194; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
195; CHECK-NEXT:    retq
196  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
197  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
198  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
199  ret <8 x i32> %res
200}
201
202define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %mask) {
203; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask3:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
206; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
207; CHECK-NEXT:    retq
208  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
209  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
210  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
211  ret <8 x i32> %res
212}
213define <16 x i32> @test_2xi32_to_16xi32(<16 x i32> %vec) {
214; CHECK-LABEL: test_2xi32_to_16xi32:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
217; CHECK-NEXT:    retq
218  %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
219  ret <16 x i32> %res
220}
221define <16 x i32> @test_masked_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
222; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask0:
223; CHECK:       # %bb.0:
224; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
225; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
226; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
227; CHECK-NEXT:    retq
228  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
229  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
230  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
231  ret <16 x i32> %res
232}
233
234define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32> %mask) {
235; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask0:
236; CHECK:       # %bb.0:
237; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
238; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
239; CHECK-NEXT:    retq
240  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
241  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
242  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
243  ret <16 x i32> %res
244}
245define <16 x i32> @test_masked_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
246; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask1:
247; CHECK:       # %bb.0:
248; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
249; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
250; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
251; CHECK-NEXT:    retq
252  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
253  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
254  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
255  ret <16 x i32> %res
256}
257
258define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32> %mask) {
259; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask1:
260; CHECK:       # %bb.0:
261; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
262; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
263; CHECK-NEXT:    retq
264  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
265  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
266  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
267  ret <16 x i32> %res
268}
269define <16 x i32> @test_masked_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
270; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask2:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
273; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
274; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
275; CHECK-NEXT:    retq
276  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
277  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
278  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
279  ret <16 x i32> %res
280}
281
282define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32> %mask) {
283; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask2:
284; CHECK:       # %bb.0:
285; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
286; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
287; CHECK-NEXT:    retq
288  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
289  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
290  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
291  ret <16 x i32> %res
292}
293define <16 x i32> @test_masked_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
294; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask3:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
297; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
298; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
299; CHECK-NEXT:    retq
300  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
301  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
302  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
303  ret <16 x i32> %res
304}
305
306define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32> %mask) {
307; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask3:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
310; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
311; CHECK-NEXT:    retq
312  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
313  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
314  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
315  ret <16 x i32> %res
316}
317define <4 x i32> @test_2xi32_to_4xi32_mem(ptr %vp) {
318; CHECK-LABEL: test_2xi32_to_4xi32_mem:
319; CHECK:       # %bb.0:
320; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
321; CHECK-NEXT:    retq
322  %vec = load <2 x i32>, ptr %vp
323  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
324  ret <4 x i32> %res
325}
326define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask0(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
327; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask0:
328; CHECK:       # %bb.0:
329; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
330; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
331; CHECK-NEXT:    retq
332  %vec = load <2 x i32>, ptr %vp
333  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
334  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
335  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
336  ret <4 x i32> %res
337}
338
339define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask0(ptr %vp, <4 x i32> %mask) {
340; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask0:
341; CHECK:       # %bb.0:
342; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
343; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
344; CHECK-NEXT:    retq
345  %vec = load <2 x i32>, ptr %vp
346  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
347  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
348  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
349  ret <4 x i32> %res
350}
351define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask1(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
352; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask1:
353; CHECK:       # %bb.0:
354; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
355; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
356; CHECK-NEXT:    retq
357  %vec = load <2 x i32>, ptr %vp
358  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
359  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
360  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
361  ret <4 x i32> %res
362}
363
364define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask1(ptr %vp, <4 x i32> %mask) {
365; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask1:
366; CHECK:       # %bb.0:
367; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
368; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
369; CHECK-NEXT:    retq
370  %vec = load <2 x i32>, ptr %vp
371  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
372  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
373  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
374  ret <4 x i32> %res
375}
376define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask2(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
377; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask2:
378; CHECK:       # %bb.0:
379; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
380; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
381; CHECK-NEXT:    retq
382  %vec = load <2 x i32>, ptr %vp
383  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
384  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
385  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
386  ret <4 x i32> %res
387}
388
389define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask2(ptr %vp, <4 x i32> %mask) {
390; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask2:
391; CHECK:       # %bb.0:
392; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
393; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
394; CHECK-NEXT:    retq
395  %vec = load <2 x i32>, ptr %vp
396  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
397  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
398  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
399  ret <4 x i32> %res
400}
401define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask3(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
402; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask3:
403; CHECK:       # %bb.0:
404; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
405; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
406; CHECK-NEXT:    retq
407  %vec = load <2 x i32>, ptr %vp
408  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
409  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
410  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
411  ret <4 x i32> %res
412}
413
414define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask3(ptr %vp, <4 x i32> %mask) {
415; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask3:
416; CHECK:       # %bb.0:
417; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
418; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
419; CHECK-NEXT:    retq
420  %vec = load <2 x i32>, ptr %vp
421  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
422  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
423  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
424  ret <4 x i32> %res
425}
426define <8 x i32> @test_2xi32_to_8xi32_mem(ptr %vp) {
427; CHECK-LABEL: test_2xi32_to_8xi32_mem:
428; CHECK:       # %bb.0:
429; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
430; CHECK-NEXT:    retq
431  %vec = load <2 x i32>, ptr %vp
432  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
433  ret <8 x i32> %res
434}
435define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
436; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask0:
437; CHECK:       # %bb.0:
438; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
439; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
440; CHECK-NEXT:    retq
441  %vec = load <2 x i32>, ptr %vp
442  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
443  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
444  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
445  ret <8 x i32> %res
446}
447
448define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %mask) {
449; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask0:
450; CHECK:       # %bb.0:
451; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
452; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
453; CHECK-NEXT:    retq
454  %vec = load <2 x i32>, ptr %vp
455  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
456  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
457  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
458  ret <8 x i32> %res
459}
460define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
461; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask1:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
464; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
465; CHECK-NEXT:    retq
466  %vec = load <2 x i32>, ptr %vp
467  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
468  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
469  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
470  ret <8 x i32> %res
471}
472
473define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %mask) {
474; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask1:
475; CHECK:       # %bb.0:
476; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
477; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
478; CHECK-NEXT:    retq
479  %vec = load <2 x i32>, ptr %vp
480  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
481  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
482  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
483  ret <8 x i32> %res
484}
485define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
486; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask2:
487; CHECK:       # %bb.0:
488; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
489; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
490; CHECK-NEXT:    retq
491  %vec = load <2 x i32>, ptr %vp
492  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
493  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
494  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
495  ret <8 x i32> %res
496}
497
498define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %mask) {
499; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask2:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
502; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
503; CHECK-NEXT:    retq
504  %vec = load <2 x i32>, ptr %vp
505  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
506  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
507  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
508  ret <8 x i32> %res
509}
510define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
511; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask3:
512; CHECK:       # %bb.0:
513; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
514; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
515; CHECK-NEXT:    retq
516  %vec = load <2 x i32>, ptr %vp
517  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
518  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
519  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
520  ret <8 x i32> %res
521}
522
523define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %mask) {
524; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask3:
525; CHECK:       # %bb.0:
526; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
527; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
528; CHECK-NEXT:    retq
529  %vec = load <2 x i32>, ptr %vp
530  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
531  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
532  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
533  ret <8 x i32> %res
534}
535define <16 x i32> @test_2xi32_to_16xi32_mem(ptr %vp) {
536; CHECK-LABEL: test_2xi32_to_16xi32_mem:
537; CHECK:       # %bb.0:
538; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0
539; CHECK-NEXT:    retq
540  %vec = load <2 x i32>, ptr %vp
541  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
542  ret <16 x i32> %res
543}
544define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
545; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask0:
546; CHECK:       # %bb.0:
547; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
548; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
549; CHECK-NEXT:    retq
550  %vec = load <2 x i32>, ptr %vp
551  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
552  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
553  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
554  ret <16 x i32> %res
555}
556
557define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) {
558; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask0:
559; CHECK:       # %bb.0:
560; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
561; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
562; CHECK-NEXT:    retq
563  %vec = load <2 x i32>, ptr %vp
564  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
565  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
566  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
567  ret <16 x i32> %res
568}
569define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
570; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask1:
571; CHECK:       # %bb.0:
572; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
573; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
574; CHECK-NEXT:    retq
575  %vec = load <2 x i32>, ptr %vp
576  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
577  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
578  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
579  ret <16 x i32> %res
580}
581
582define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) {
583; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask1:
584; CHECK:       # %bb.0:
585; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
586; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
587; CHECK-NEXT:    retq
588  %vec = load <2 x i32>, ptr %vp
589  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
590  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
591  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
592  ret <16 x i32> %res
593}
594define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
595; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask2:
596; CHECK:       # %bb.0:
597; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
598; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
599; CHECK-NEXT:    retq
600  %vec = load <2 x i32>, ptr %vp
601  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
602  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
603  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
604  ret <16 x i32> %res
605}
606
607define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) {
608; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask2:
609; CHECK:       # %bb.0:
610; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
611; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
612; CHECK-NEXT:    retq
613  %vec = load <2 x i32>, ptr %vp
614  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
615  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
616  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
617  ret <16 x i32> %res
618}
619define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
620; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask3:
621; CHECK:       # %bb.0:
622; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
623; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
624; CHECK-NEXT:    retq
625  %vec = load <2 x i32>, ptr %vp
626  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
627  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
628  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
629  ret <16 x i32> %res
630}
631
632define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) {
633; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask3:
634; CHECK:       # %bb.0:
635; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
636; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
637; CHECK-NEXT:    retq
638  %vec = load <2 x i32>, ptr %vp
639  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
640  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
641  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
642  ret <16 x i32> %res
643}
644define <8 x i32> @test_4xi32_to_8xi32_mem(ptr %vp) {
645; CHECK-LABEL: test_4xi32_to_8xi32_mem:
646; CHECK:       # %bb.0:
647; CHECK-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
648; CHECK-NEXT:    retq
649  %vec = load <4 x i32>, ptr %vp
650  %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
651  ret <8 x i32> %res
652}
653define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
654; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask0:
655; CHECK:       # %bb.0:
656; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
657; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
658; CHECK-NEXT:    retq
659  %vec = load <4 x i32>, ptr %vp
660  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
661  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
662  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
663  ret <8 x i32> %res
664}
665
666define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %mask) {
667; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask0:
668; CHECK:       # %bb.0:
669; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
670; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
671; CHECK-NEXT:    retq
672  %vec = load <4 x i32>, ptr %vp
673  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
674  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
675  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
676  ret <8 x i32> %res
677}
678define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
679; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask1:
680; CHECK:       # %bb.0:
681; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
682; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
683; CHECK-NEXT:    retq
684  %vec = load <4 x i32>, ptr %vp
685  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
686  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
687  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
688  ret <8 x i32> %res
689}
690
691define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %mask) {
692; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask1:
693; CHECK:       # %bb.0:
694; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
695; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
696; CHECK-NEXT:    retq
697  %vec = load <4 x i32>, ptr %vp
698  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
699  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
700  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
701  ret <8 x i32> %res
702}
703define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
704; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask2:
705; CHECK:       # %bb.0:
706; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
707; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
708; CHECK-NEXT:    retq
709  %vec = load <4 x i32>, ptr %vp
710  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
711  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
712  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
713  ret <8 x i32> %res
714}
715
716define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %mask) {
717; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask2:
718; CHECK:       # %bb.0:
719; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
720; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
721; CHECK-NEXT:    retq
722  %vec = load <4 x i32>, ptr %vp
723  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
724  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
725  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
726  ret <8 x i32> %res
727}
728define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
729; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask3:
730; CHECK:       # %bb.0:
731; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
732; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
733; CHECK-NEXT:    retq
734  %vec = load <4 x i32>, ptr %vp
735  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
736  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
737  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
738  ret <8 x i32> %res
739}
740
741define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %mask) {
742; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask3:
743; CHECK:       # %bb.0:
744; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
745; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
746; CHECK-NEXT:    retq
747  %vec = load <4 x i32>, ptr %vp
748  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
749  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
750  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
751  ret <8 x i32> %res
752}
753define <16 x i32> @test_4xi32_to_16xi32_mem(ptr %vp) {
754; CHECK-LABEL: test_4xi32_to_16xi32_mem:
755; CHECK:       # %bb.0:
756; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
757; CHECK-NEXT:    retq
758  %vec = load <4 x i32>, ptr %vp
759  %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
760  ret <16 x i32> %res
761}
762define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
763; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask0:
764; CHECK:       # %bb.0:
765; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
766; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
767; CHECK-NEXT:    retq
768  %vec = load <4 x i32>, ptr %vp
769  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
770  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
771  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
772  ret <16 x i32> %res
773}
774
775define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) {
776; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask0:
777; CHECK:       # %bb.0:
778; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
779; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
780; CHECK-NEXT:    retq
781  %vec = load <4 x i32>, ptr %vp
782  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
783  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
784  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
785  ret <16 x i32> %res
786}
787define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
788; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask1:
789; CHECK:       # %bb.0:
790; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
791; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
792; CHECK-NEXT:    retq
793  %vec = load <4 x i32>, ptr %vp
794  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
795  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
796  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
797  ret <16 x i32> %res
798}
799
800define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) {
801; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask1:
802; CHECK:       # %bb.0:
803; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
804; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
805; CHECK-NEXT:    retq
806  %vec = load <4 x i32>, ptr %vp
807  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
808  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
809  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
810  ret <16 x i32> %res
811}
812define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
813; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask2:
814; CHECK:       # %bb.0:
815; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
816; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
817; CHECK-NEXT:    retq
818  %vec = load <4 x i32>, ptr %vp
819  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
820  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
821  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
822  ret <16 x i32> %res
823}
824
825define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) {
826; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask2:
827; CHECK:       # %bb.0:
828; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
829; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
830; CHECK-NEXT:    retq
831  %vec = load <4 x i32>, ptr %vp
832  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
833  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
834  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
835  ret <16 x i32> %res
836}
837define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
838; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask3:
839; CHECK:       # %bb.0:
840; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
841; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
842; CHECK-NEXT:    retq
843  %vec = load <4 x i32>, ptr %vp
844  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
845  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
846  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
847  ret <16 x i32> %res
848}
849
850define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) {
851; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask3:
852; CHECK:       # %bb.0:
853; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
854; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
855; CHECK-NEXT:    retq
856  %vec = load <4 x i32>, ptr %vp
857  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
858  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
859  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
860  ret <16 x i32> %res
861}
862define <4 x i64> @test_2xi64_to_4xi64_mem(ptr %vp) {
863; CHECK-LABEL: test_2xi64_to_4xi64_mem:
864; CHECK:       # %bb.0:
865; CHECK-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
866; CHECK-NEXT:    retq
867  %vec = load <2 x i64>, ptr %vp
868  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
869  ret <4 x i64> %res
870}
871define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask0(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
872; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask0:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
875; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
876; CHECK-NEXT:    retq
877  %vec = load <2 x i64>, ptr %vp
878  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
879  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
880  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
881  ret <4 x i64> %res
882}
883
884define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask0(ptr %vp, <4 x i64> %mask) {
885; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask0:
886; CHECK:       # %bb.0:
887; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
888; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
889; CHECK-NEXT:    retq
890  %vec = load <2 x i64>, ptr %vp
891  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
892  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
893  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
894  ret <4 x i64> %res
895}
896define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask1(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
897; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask1:
898; CHECK:       # %bb.0:
899; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
900; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
901; CHECK-NEXT:    retq
902  %vec = load <2 x i64>, ptr %vp
903  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
904  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
905  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
906  ret <4 x i64> %res
907}
908
909define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask1(ptr %vp, <4 x i64> %mask) {
910; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask1:
911; CHECK:       # %bb.0:
912; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
913; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
914; CHECK-NEXT:    retq
915  %vec = load <2 x i64>, ptr %vp
916  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
917  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
918  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
919  ret <4 x i64> %res
920}
921define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask2(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
922; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask2:
923; CHECK:       # %bb.0:
924; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
925; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
926; CHECK-NEXT:    retq
927  %vec = load <2 x i64>, ptr %vp
928  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
929  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
930  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
931  ret <4 x i64> %res
932}
933
934define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask2(ptr %vp, <4 x i64> %mask) {
935; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask2:
936; CHECK:       # %bb.0:
937; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
938; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
939; CHECK-NEXT:    retq
940  %vec = load <2 x i64>, ptr %vp
941  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
942  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
943  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
944  ret <4 x i64> %res
945}
946define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask3(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
947; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask3:
948; CHECK:       # %bb.0:
949; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
950; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
951; CHECK-NEXT:    retq
952  %vec = load <2 x i64>, ptr %vp
953  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
954  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
955  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
956  ret <4 x i64> %res
957}
958
959define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask3(ptr %vp, <4 x i64> %mask) {
960; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask3:
961; CHECK:       # %bb.0:
962; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
963; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
964; CHECK-NEXT:    retq
965  %vec = load <2 x i64>, ptr %vp
966  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
967  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
968  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
969  ret <4 x i64> %res
970}
971define <8 x i64> @test_2xi64_to_8xi64_mem(ptr %vp) {
972; CHECK-LABEL: test_2xi64_to_8xi64_mem:
973; CHECK:       # %bb.0:
974; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
975; CHECK-NEXT:    retq
976  %vec = load <2 x i64>, ptr %vp
977  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
978  ret <8 x i64> %res
979}
980define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
981; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask0:
982; CHECK:       # %bb.0:
983; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
984; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
985; CHECK-NEXT:    retq
986  %vec = load <2 x i64>, ptr %vp
987  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
988  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
989  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
990  ret <8 x i64> %res
991}
992
993define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %mask) {
994; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask0:
995; CHECK:       # %bb.0:
996; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
997; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
998; CHECK-NEXT:    retq
999  %vec = load <2 x i64>, ptr %vp
1000  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1001  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1002  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1003  ret <8 x i64> %res
1004}
1005define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1006; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask1:
1007; CHECK:       # %bb.0:
1008; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1009; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1010; CHECK-NEXT:    retq
1011  %vec = load <2 x i64>, ptr %vp
1012  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1013  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1014  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1015  ret <8 x i64> %res
1016}
1017
1018define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %mask) {
1019; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask1:
1020; CHECK:       # %bb.0:
1021; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1022; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1023; CHECK-NEXT:    retq
1024  %vec = load <2 x i64>, ptr %vp
1025  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1026  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1027  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1028  ret <8 x i64> %res
1029}
1030define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1031; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask2:
1032; CHECK:       # %bb.0:
1033; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1034; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1035; CHECK-NEXT:    retq
1036  %vec = load <2 x i64>, ptr %vp
1037  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1038  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1039  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1040  ret <8 x i64> %res
1041}
1042
1043define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %mask) {
1044; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask2:
1045; CHECK:       # %bb.0:
1046; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1047; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1048; CHECK-NEXT:    retq
1049  %vec = load <2 x i64>, ptr %vp
1050  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1051  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1052  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1053  ret <8 x i64> %res
1054}
1055define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1056; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask3:
1057; CHECK:       # %bb.0:
1058; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1059; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1060; CHECK-NEXT:    retq
1061  %vec = load <2 x i64>, ptr %vp
1062  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1063  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1064  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1065  ret <8 x i64> %res
1066}
1067
1068define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %mask) {
1069; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask3:
1070; CHECK:       # %bb.0:
1071; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1072; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1073; CHECK-NEXT:    retq
1074  %vec = load <2 x i64>, ptr %vp
1075  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1076  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1077  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1078  ret <8 x i64> %res
1079}
1080define <16 x i32> @test_8xi32_to_16xi32_mem(ptr %vp) {
1081; CHECK-LABEL: test_8xi32_to_16xi32_mem:
1082; CHECK:       # %bb.0:
1083; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
1084; CHECK-NEXT:    retq
1085  %vec = load <8 x i32>, ptr %vp
1086  %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1087  ret <16 x i32> %res
1088}
1089define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1090; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask0:
1091; CHECK:       # %bb.0:
1092; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1093; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1094; CHECK-NEXT:    retq
1095  %vec = load <8 x i32>, ptr %vp
1096  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1097  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1098  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1099  ret <16 x i32> %res
1100}
1101
1102define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) {
1103; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask0:
1104; CHECK:       # %bb.0:
1105; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1106; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1107; CHECK-NEXT:    retq
1108  %vec = load <8 x i32>, ptr %vp
1109  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1110  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1111  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1112  ret <16 x i32> %res
1113}
1114define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1115; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask1:
1116; CHECK:       # %bb.0:
1117; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1118; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1119; CHECK-NEXT:    retq
1120  %vec = load <8 x i32>, ptr %vp
1121  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1122  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1123  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1124  ret <16 x i32> %res
1125}
1126
1127define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) {
1128; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask1:
1129; CHECK:       # %bb.0:
1130; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1131; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1132; CHECK-NEXT:    retq
1133  %vec = load <8 x i32>, ptr %vp
1134  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1135  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1136  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1137  ret <16 x i32> %res
1138}
1139define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1140; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask2:
1141; CHECK:       # %bb.0:
1142; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1143; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1144; CHECK-NEXT:    retq
1145  %vec = load <8 x i32>, ptr %vp
1146  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1147  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1148  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1149  ret <16 x i32> %res
1150}
1151
1152define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) {
1153; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask2:
1154; CHECK:       # %bb.0:
1155; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1156; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1157; CHECK-NEXT:    retq
1158  %vec = load <8 x i32>, ptr %vp
1159  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1160  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1161  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1162  ret <16 x i32> %res
1163}
1164define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1165; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask3:
1166; CHECK:       # %bb.0:
1167; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1168; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1169; CHECK-NEXT:    retq
1170  %vec = load <8 x i32>, ptr %vp
1171  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1172  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1173  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1174  ret <16 x i32> %res
1175}
1176
1177define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) {
1178; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask3:
1179; CHECK:       # %bb.0:
1180; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1181; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1182; CHECK-NEXT:    retq
1183  %vec = load <8 x i32>, ptr %vp
1184  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1185  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1186  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1187  ret <16 x i32> %res
1188}
1189define <8 x i64> @test_4xi64_to_8xi64_mem(ptr %vp) {
1190; CHECK-LABEL: test_4xi64_to_8xi64_mem:
1191; CHECK:       # %bb.0:
1192; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
1193; CHECK-NEXT:    retq
1194  %vec = load <4 x i64>, ptr %vp
1195  %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1196  ret <8 x i64> %res
1197}
1198define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1199; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask0:
1200; CHECK:       # %bb.0:
1201; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1202; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1203; CHECK-NEXT:    retq
1204  %vec = load <4 x i64>, ptr %vp
1205  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1206  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1207  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1208  ret <8 x i64> %res
1209}
1210
1211define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %mask) {
1212; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask0:
1213; CHECK:       # %bb.0:
1214; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1215; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1216; CHECK-NEXT:    retq
1217  %vec = load <4 x i64>, ptr %vp
1218  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1219  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1220  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1221  ret <8 x i64> %res
1222}
1223define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1224; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask1:
1225; CHECK:       # %bb.0:
1226; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1227; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1228; CHECK-NEXT:    retq
1229  %vec = load <4 x i64>, ptr %vp
1230  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1231  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1232  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1233  ret <8 x i64> %res
1234}
1235
1236define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %mask) {
1237; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask1:
1238; CHECK:       # %bb.0:
1239; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1240; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1241; CHECK-NEXT:    retq
1242  %vec = load <4 x i64>, ptr %vp
1243  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1244  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1245  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1246  ret <8 x i64> %res
1247}
1248define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1249; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask2:
1250; CHECK:       # %bb.0:
1251; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1252; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1253; CHECK-NEXT:    retq
1254  %vec = load <4 x i64>, ptr %vp
1255  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1256  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1257  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1258  ret <8 x i64> %res
1259}
1260
1261define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %mask) {
1262; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask2:
1263; CHECK:       # %bb.0:
1264; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1265; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1266; CHECK-NEXT:    retq
1267  %vec = load <4 x i64>, ptr %vp
1268  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1269  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1270  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1271  ret <8 x i64> %res
1272}
1273define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1274; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask3:
1275; CHECK:       # %bb.0:
1276; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1277; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1278; CHECK-NEXT:    retq
1279  %vec = load <4 x i64>, ptr %vp
1280  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1281  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1282  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1283  ret <8 x i64> %res
1284}
1285
1286define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %mask) {
1287; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask3:
1288; CHECK:       # %bb.0:
1289; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1290; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1291; CHECK-NEXT:    retq
1292  %vec = load <4 x i64>, ptr %vp
1293  %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1294  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1295  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1296  ret <8 x i64> %res
1297}
1298