xref: /llvm-project/llvm/test/CodeGen/X86/vselect-pcmp.ll (revision c47f3e8c7027fbf13495dc865c28d852bf77836d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx       | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2      | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f   | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop       | FileCheck %s --check-prefixes=CHECK,XOP
7
8; The condition vector for BLENDV* only cares about the sign bit of each element.
9; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
10
11; Test 128-bit vectors for all legal element types.
12
13define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
14; CHECK-LABEL: signbit_sel_v16i8:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
17; CHECK-NEXT:    retq
18  %tr = icmp slt <16 x i8> %mask, zeroinitializer
19  %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
20  ret <16 x i8> %z
21}
22
23; Sorry 16-bit, you're not important enough to support?
24
25define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) {
26; AVX12-LABEL: signbit_sel_v8i16:
27; AVX12:       # %bb.0:
28; AVX12-NEXT:    vpxor %xmm3, %xmm3, %xmm3
29; AVX12-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
30; AVX12-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
31; AVX12-NEXT:    retq
32;
33; AVX512F-LABEL: signbit_sel_v8i16:
34; AVX512F:       # %bb.0:
35; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
36; AVX512F-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
37; AVX512F-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
38; AVX512F-NEXT:    retq
39;
40; AVX512VL-LABEL: signbit_sel_v8i16:
41; AVX512VL:       # %bb.0:
42; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
43; AVX512VL-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
44; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1))
45; AVX512VL-NEXT:    retq
46;
47; XOP-LABEL: signbit_sel_v8i16:
48; XOP:       # %bb.0:
49; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
50; XOP-NEXT:    vpcomltw %xmm3, %xmm2, %xmm2
51; XOP-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
52; XOP-NEXT:    retq
53  %tr = icmp slt <8 x i16> %mask, zeroinitializer
54  %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
55  ret <8 x i16> %z
56}
57
58define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
59; AVX12-LABEL: signbit_sel_v4i32:
60; AVX12:       # %bb.0:
61; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
62; AVX12-NEXT:    retq
63;
64; AVX512F-LABEL: signbit_sel_v4i32:
65; AVX512F:       # %bb.0:
66; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
67; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
68; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
69; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
70; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
71; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
72; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
73; AVX512F-NEXT:    vzeroupper
74; AVX512F-NEXT:    retq
75;
76; AVX512VL-LABEL: signbit_sel_v4i32:
77; AVX512VL:       # %bb.0:
78; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
79; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
80; AVX512VL-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
81; AVX512VL-NEXT:    retq
82;
83; XOP-LABEL: signbit_sel_v4i32:
84; XOP:       # %bb.0:
85; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
86; XOP-NEXT:    retq
87  %tr = icmp slt <4 x i32> %mask, zeroinitializer
88  %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
89  ret <4 x i32> %z
90}
91
92define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
93; AVX12-LABEL: signbit_sel_v2i64:
94; AVX12:       # %bb.0:
95; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
96; AVX12-NEXT:    retq
97;
98; AVX512F-LABEL: signbit_sel_v2i64:
99; AVX512F:       # %bb.0:
100; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
101; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
102; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
103; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
104; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
105; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
106; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
107; AVX512F-NEXT:    vzeroupper
108; AVX512F-NEXT:    retq
109;
110; AVX512VL-LABEL: signbit_sel_v2i64:
111; AVX512VL:       # %bb.0:
112; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
113; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
114; AVX512VL-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
115; AVX512VL-NEXT:    retq
116;
117; XOP-LABEL: signbit_sel_v2i64:
118; XOP:       # %bb.0:
119; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
120; XOP-NEXT:    retq
121  %tr = icmp slt <2 x i64> %mask, zeroinitializer
122  %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
123  ret <2 x i64> %z
124}
125
126define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) {
127; AVX12-LABEL: signbit_sel_v4f32:
128; AVX12:       # %bb.0:
129; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
130; AVX12-NEXT:    retq
131;
132; AVX512F-LABEL: signbit_sel_v4f32:
133; AVX512F:       # %bb.0:
134; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
135; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
136; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
137; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
138; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
139; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
140; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
141; AVX512F-NEXT:    vzeroupper
142; AVX512F-NEXT:    retq
143;
144; AVX512VL-LABEL: signbit_sel_v4f32:
145; AVX512VL:       # %bb.0:
146; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
147; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
148; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
149; AVX512VL-NEXT:    retq
150;
151; XOP-LABEL: signbit_sel_v4f32:
152; XOP:       # %bb.0:
153; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
154; XOP-NEXT:    retq
155  %tr = icmp slt <4 x i32> %mask, zeroinitializer
156  %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
157  ret <4 x float> %z
158}
159
160define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) {
161; AVX12-LABEL: signbit_sel_v2f64:
162; AVX12:       # %bb.0:
163; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
164; AVX12-NEXT:    retq
165;
166; AVX512F-LABEL: signbit_sel_v2f64:
167; AVX512F:       # %bb.0:
168; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
169; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
170; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
171; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
172; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
173; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
174; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
175; AVX512F-NEXT:    vzeroupper
176; AVX512F-NEXT:    retq
177;
178; AVX512VL-LABEL: signbit_sel_v2f64:
179; AVX512VL:       # %bb.0:
180; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
181; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
182; AVX512VL-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
183; AVX512VL-NEXT:    retq
184;
185; XOP-LABEL: signbit_sel_v2f64:
186; XOP:       # %bb.0:
187; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
188; XOP-NEXT:    retq
189  %tr = icmp slt <2 x i64> %mask, zeroinitializer
190  %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
191  ret <2 x double> %z
192}
193
194; Test 256-bit vectors to see differences between AVX1 and AVX2.
195
196define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) {
197; AVX1-LABEL: signbit_sel_v32i8:
198; AVX1:       # %bb.0:
199; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
200; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
201; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
202; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm4, %xmm2
203; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
204; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
205; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
206; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
207; AVX1-NEXT:    retq
208;
209; AVX2-LABEL: signbit_sel_v32i8:
210; AVX2:       # %bb.0:
211; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
212; AVX2-NEXT:    retq
213;
214; AVX512-LABEL: signbit_sel_v32i8:
215; AVX512:       # %bb.0:
216; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
217; AVX512-NEXT:    retq
218;
219; XOP-LABEL: signbit_sel_v32i8:
220; XOP:       # %bb.0:
221; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
222; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
223; XOP-NEXT:    vpcomltb %xmm4, %xmm3, %xmm3
224; XOP-NEXT:    vpcomltb %xmm4, %xmm2, %xmm2
225; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
226; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
227; XOP-NEXT:    retq
228  %tr = icmp slt <32 x i8> %mask, zeroinitializer
229  %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
230  ret <32 x i8> %z
231}
232
233; Sorry 16-bit, you'll never be important enough to support?
234
235define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) {
236; AVX1-LABEL: signbit_sel_v16i16:
237; AVX1:       # %bb.0:
238; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
239; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
240; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm4, %xmm3
241; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm4, %xmm2
242; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
243; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
244; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
245; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
246; AVX1-NEXT:    retq
247;
248; AVX2-LABEL: signbit_sel_v16i16:
249; AVX2:       # %bb.0:
250; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
251; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
252; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
253; AVX2-NEXT:    retq
254;
255; AVX512F-LABEL: signbit_sel_v16i16:
256; AVX512F:       # %bb.0:
257; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
258; AVX512F-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
259; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
260; AVX512F-NEXT:    retq
261;
262; AVX512VL-LABEL: signbit_sel_v16i16:
263; AVX512VL:       # %bb.0:
264; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
265; AVX512VL-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
266; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm2 & (ymm0 ^ ymm1))
267; AVX512VL-NEXT:    retq
268;
269; XOP-LABEL: signbit_sel_v16i16:
270; XOP:       # %bb.0:
271; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
272; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
273; XOP-NEXT:    vpcomltw %xmm4, %xmm3, %xmm3
274; XOP-NEXT:    vpcomltw %xmm4, %xmm2, %xmm2
275; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
276; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
277; XOP-NEXT:    retq
278  %tr = icmp slt <16 x i16> %mask, zeroinitializer
279  %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
280  ret <16 x i16> %z
281}
282
283define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
284; AVX12-LABEL: signbit_sel_v8i32:
285; AVX12:       # %bb.0:
286; AVX12-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
287; AVX12-NEXT:    retq
288;
289; AVX512F-LABEL: signbit_sel_v8i32:
290; AVX512F:       # %bb.0:
291; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
292; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
293; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
294; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
295; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
296; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
297; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
298; AVX512F-NEXT:    retq
299;
300; AVX512VL-LABEL: signbit_sel_v8i32:
301; AVX512VL:       # %bb.0:
302; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
303; AVX512VL-NEXT:    vpcmpgtd %ymm2, %ymm3, %k1
304; AVX512VL-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
305; AVX512VL-NEXT:    retq
306;
307; XOP-LABEL: signbit_sel_v8i32:
308; XOP:       # %bb.0:
309; XOP-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
310; XOP-NEXT:    retq
311  %tr = icmp slt <8 x i32> %mask, zeroinitializer
312  %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
313  ret <8 x i32> %z
314}
315
316define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
317; AVX12-LABEL: signbit_sel_v4i64:
318; AVX12:       # %bb.0:
319; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
320; AVX12-NEXT:    retq
321;
322; AVX512F-LABEL: signbit_sel_v4i64:
323; AVX512F:       # %bb.0:
324; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
325; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
326; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
327; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
328; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
329; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
330; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
331; AVX512F-NEXT:    retq
332;
333; AVX512VL-LABEL: signbit_sel_v4i64:
334; AVX512VL:       # %bb.0:
335; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
336; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
337; AVX512VL-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
338; AVX512VL-NEXT:    retq
339;
340; XOP-LABEL: signbit_sel_v4i64:
341; XOP:       # %bb.0:
342; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
343; XOP-NEXT:    retq
344  %tr = icmp slt <4 x i64> %mask, zeroinitializer
345  %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
346  ret <4 x i64> %z
347}
348
349define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) {
350; AVX12-LABEL: signbit_sel_v4f64:
351; AVX12:       # %bb.0:
352; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
353; AVX12-NEXT:    retq
354;
355; AVX512F-LABEL: signbit_sel_v4f64:
356; AVX512F:       # %bb.0:
357; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
358; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
359; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
360; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
361; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
362; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
363; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
364; AVX512F-NEXT:    retq
365;
366; AVX512VL-LABEL: signbit_sel_v4f64:
367; AVX512VL:       # %bb.0:
368; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
369; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
370; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
371; AVX512VL-NEXT:    retq
372;
373; XOP-LABEL: signbit_sel_v4f64:
374; XOP:       # %bb.0:
375; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
376; XOP-NEXT:    retq
377  %tr = icmp slt <4 x i64> %mask, zeroinitializer
378  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
379  ret <4 x double> %z
380}
381
382; Try a condition with a different type than the select operands.
383
384define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) {
385; AVX1-LABEL: signbit_sel_v4f64_small_mask:
386; AVX1:       # %bb.0:
387; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm3
388; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
389; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
390; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
391; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
392; AVX1-NEXT:    retq
393;
394; AVX2-LABEL: signbit_sel_v4f64_small_mask:
395; AVX2:       # %bb.0:
396; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
397; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
398; AVX2-NEXT:    retq
399;
400; AVX512F-LABEL: signbit_sel_v4f64_small_mask:
401; AVX512F:       # %bb.0:
402; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
403; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
404; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
405; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
406; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
407; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
408; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
409; AVX512F-NEXT:    retq
410;
411; AVX512VL-LABEL: signbit_sel_v4f64_small_mask:
412; AVX512VL:       # %bb.0:
413; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
414; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
415; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
416; AVX512VL-NEXT:    retq
417;
418; XOP-LABEL: signbit_sel_v4f64_small_mask:
419; XOP:       # %bb.0:
420; XOP-NEXT:    vpmovsxdq %xmm2, %xmm3
421; XOP-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
422; XOP-NEXT:    vpmovsxdq %xmm2, %xmm2
423; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
424; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
425; XOP-NEXT:    retq
426  %tr = icmp slt <4 x i32> %mask, zeroinitializer
427  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
428  ret <4 x double> %z
429}
430
431; Try a 512-bit vector to make sure AVX-512 is handled as expected.
432
433define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) {
434; AVX12-LABEL: signbit_sel_v8f64:
435; AVX12:       # %bb.0:
436; AVX12-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
437; AVX12-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
438; AVX12-NEXT:    retq
439;
440; AVX512-LABEL: signbit_sel_v8f64:
441; AVX512:       # %bb.0:
442; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
443; AVX512-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
444; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
445; AVX512-NEXT:    retq
446;
447; XOP-LABEL: signbit_sel_v8f64:
448; XOP:       # %bb.0:
449; XOP-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
450; XOP-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
451; XOP-NEXT:    retq
452  %tr = icmp slt <8 x i64> %mask, zeroinitializer
453  %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
454  ret <8 x double> %z
455}
456
457; If we have a floating-point compare:
458; (1) Don't die.
459; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
460
461define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
462; AVX12-LABEL: signbit_sel_v4f32_fcmp:
463; AVX12:       # %bb.0:
464; AVX12-NEXT:    vxorps %xmm2, %xmm2, %xmm2
465; AVX12-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
466; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
467; AVX12-NEXT:    retq
468;
469; AVX512F-LABEL: signbit_sel_v4f32_fcmp:
470; AVX512F:       # %bb.0:
471; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
472; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
473; AVX512F-NEXT:    vxorps %xmm2, %xmm2, %xmm2
474; AVX512F-NEXT:    vcmpltps %zmm2, %zmm0, %k1
475; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
476; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
477; AVX512F-NEXT:    vzeroupper
478; AVX512F-NEXT:    retq
479;
480; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
481; AVX512VL:       # %bb.0:
482; AVX512VL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
483; AVX512VL-NEXT:    vcmpltps %xmm2, %xmm0, %k1
484; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
485; AVX512VL-NEXT:    retq
486;
487; XOP-LABEL: signbit_sel_v4f32_fcmp:
488; XOP:       # %bb.0:
489; XOP-NEXT:    vxorps %xmm2, %xmm2, %xmm2
490; XOP-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
491; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
492; XOP-NEXT:    retq
493  %cmp = fcmp olt <4 x float> %x, zeroinitializer
494  %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
495  ret <4 x float> %sel
496}
497
498define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
499; AVX1-LABEL: blend_splat1_mask_cond_v4i64:
500; AVX1:       # %bb.0:
501; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm3
502; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
503; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm0
504; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
505; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
506; AVX1-NEXT:    retq
507;
508; AVX2-LABEL: blend_splat1_mask_cond_v4i64:
509; AVX2:       # %bb.0:
510; AVX2-NEXT:    vpsllq $63, %ymm0, %ymm0
511; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
512; AVX2-NEXT:    retq
513;
514; AVX512F-LABEL: blend_splat1_mask_cond_v4i64:
515; AVX512F:       # %bb.0:
516; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
517; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
518; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
519; AVX512F-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
520; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
521; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
522; AVX512F-NEXT:    retq
523;
524; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64:
525; AVX512VL:       # %bb.0:
526; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
527; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
528; AVX512VL-NEXT:    retq
529;
530; XOP-LABEL: blend_splat1_mask_cond_v4i64:
531; XOP:       # %bb.0:
532; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
533; XOP-NEXT:    vpsllq $63, %xmm3, %xmm3
534; XOP-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
535; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
536; XOP-NEXT:    vpsllq $63, %xmm0, %xmm0
537; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
538; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
539; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
540; XOP-NEXT:    retq
541  %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
542  %c = icmp eq <4 x i64> %a, zeroinitializer
543  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
544  ret <4 x i64> %r
545}
546
547define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
548; AVX12-LABEL: blend_splat1_mask_cond_v4i32:
549; AVX12:       # %bb.0:
550; AVX12-NEXT:    vpslld $31, %xmm0, %xmm0
551; AVX12-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
552; AVX12-NEXT:    retq
553;
554; AVX512F-LABEL: blend_splat1_mask_cond_v4i32:
555; AVX512F:       # %bb.0:
556; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
557; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
558; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
559; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
560; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
561; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
562; AVX512F-NEXT:    vzeroupper
563; AVX512F-NEXT:    retq
564;
565; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32:
566; AVX512VL:       # %bb.0:
567; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
568; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
569; AVX512VL-NEXT:    retq
570;
571; XOP-LABEL: blend_splat1_mask_cond_v4i32:
572; XOP:       # %bb.0:
573; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
574; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
575; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
576; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
577; XOP-NEXT:    retq
578  %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
579  %c = icmp eq <4 x i32> %a, zeroinitializer
580  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
581  ret <4 x i32> %r
582}
583
584define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
585; AVX1-LABEL: blend_splat1_mask_cond_v16i16:
586; AVX1:       # %bb.0:
587; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm3
588; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
589; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
590; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
591; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
592; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
593; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
594; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
595; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
596; AVX1-NEXT:    retq
597;
598; AVX2-LABEL: blend_splat1_mask_cond_v16i16:
599; AVX2:       # %bb.0:
600; AVX2-NEXT:    vpsllw $15, %ymm0, %ymm0
601; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
602; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
603; AVX2-NEXT:    retq
604;
605; AVX512F-LABEL: blend_splat1_mask_cond_v16i16:
606; AVX512F:       # %bb.0:
607; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
608; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
609; AVX512F-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
610; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
611; AVX512F-NEXT:    retq
612;
613; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16:
614; AVX512VL:       # %bb.0:
615; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
616; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
617; AVX512VL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
618; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
619; AVX512VL-NEXT:    retq
620;
621; XOP-LABEL: blend_splat1_mask_cond_v16i16:
622; XOP:       # %bb.0:
623; XOP-NEXT:    vpsllw $15, %xmm0, %xmm3
624; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
625; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
626; XOP-NEXT:    vpsllw $15, %xmm0, %xmm0
627; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
628; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
629; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
630; XOP-NEXT:    retq
631  %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
632  %c = icmp eq <16 x i16> %a, zeroinitializer
633  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
634  ret <16 x i16> %r
635}
636
637define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
638; AVX12-LABEL: blend_splat1_mask_cond_v16i8:
639; AVX12:       # %bb.0:
640; AVX12-NEXT:    vpsllw $7, %xmm0, %xmm0
641; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
642; AVX12-NEXT:    retq
643;
644; AVX512F-LABEL: blend_splat1_mask_cond_v16i8:
645; AVX512F:       # %bb.0:
646; AVX512F-NEXT:    vpbroadcastb {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
647; AVX512F-NEXT:    vpand %xmm3, %xmm0, %xmm0
648; AVX512F-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
649; AVX512F-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
650; AVX512F-NEXT:    retq
651;
652; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8:
653; AVX512VL:       # %bb.0:
654; AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
655; AVX512VL-NEXT:    vpand %xmm3, %xmm0, %xmm0
656; AVX512VL-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
657; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1))
658; AVX512VL-NEXT:    retq
659;
660; XOP-LABEL: blend_splat1_mask_cond_v16i8:
661; XOP:       # %bb.0:
662; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
663; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
664; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
665; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
666; XOP-NEXT:    retq
667  %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
668  %c = icmp eq <16 x i8> %a, zeroinitializer
669  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
670  ret <16 x i8> %r
671}
672
673define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
674; AVX12-LABEL: blend_splatmax_mask_cond_v2i64:
675; AVX12:       # %bb.0:
676; AVX12-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
677; AVX12-NEXT:    retq
678;
679; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64:
680; AVX512F:       # %bb.0:
681; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
682; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
683; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
684; AVX512F-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
685; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
686; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
687; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
688; AVX512F-NEXT:    vzeroupper
689; AVX512F-NEXT:    retq
690;
691; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64:
692; AVX512VL:       # %bb.0:
693; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
694; AVX512VL-NEXT:    vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
695; AVX512VL-NEXT:    retq
696;
697; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
698; XOP:       # %bb.0:
699; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
700; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
701; XOP-NEXT:    vpcomneqq %xmm3, %xmm0, %xmm0
702; XOP-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
703; XOP-NEXT:    retq
704  %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
705  %c = icmp eq <2 x i64> %a, zeroinitializer
706  %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
707  ret <2 x i64> %r
708}
709
710define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
711; AVX12-LABEL: blend_splatmax_mask_cond_v8i32:
712; AVX12:       # %bb.0:
713; AVX12-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
714; AVX12-NEXT:    retq
715;
716; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32:
717; AVX512F:       # %bb.0:
718; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
719; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
720; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
721; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
722; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
723; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
724; AVX512F-NEXT:    retq
725;
726; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32:
727; AVX512VL:       # %bb.0:
728; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
729; AVX512VL-NEXT:    vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
730; AVX512VL-NEXT:    retq
731;
732; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
733; XOP:       # %bb.0:
734; XOP-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
735; XOP-NEXT:    retq
736  %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
737  %c = icmp eq <8 x i32> %a, zeroinitializer
738  %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
739  ret <8 x i32> %r
740}
741
742define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
743; AVX12-LABEL: blend_splatmax_mask_cond_v8i16:
744; AVX12:       # %bb.0:
745; AVX12-NEXT:    vpsraw $15, %xmm0, %xmm0
746; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
747; AVX12-NEXT:    retq
748;
749; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16:
750; AVX512F:       # %bb.0:
751; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
752; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
753; AVX512F-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
754; AVX512F-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
755; AVX512F-NEXT:    retq
756;
757; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16:
758; AVX512VL:       # %bb.0:
759; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
760; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
761; AVX512VL-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
762; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
763; AVX512VL-NEXT:    retq
764;
765; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
766; XOP:       # %bb.0:
767; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
768; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
769; XOP-NEXT:    vpcomneqw %xmm3, %xmm0, %xmm0
770; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
771; XOP-NEXT:    retq
772  %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
773  %c = icmp eq <8 x i16> %a, zeroinitializer
774  %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
775  ret <8 x i16> %r
776}
777
778define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
779; AVX1-LABEL: blend_splatmax_mask_cond_v32i8:
780; AVX1:       # %bb.0:
781; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
782; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
783; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
784; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
785; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
786; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
787; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
788; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
789; AVX1-NEXT:    retq
790;
791; AVX2-LABEL: blend_splatmax_mask_cond_v32i8:
792; AVX2:       # %bb.0:
793; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
794; AVX2-NEXT:    retq
795;
796; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8:
797; AVX512F:       # %bb.0:
798; AVX512F-NEXT:    vpbroadcastb {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
799; AVX512F-NEXT:    vpand %ymm3, %ymm0, %ymm0
800; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
801; AVX512F-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
802; AVX512F-NEXT:    retq
803;
804; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8:
805; AVX512VL:       # %bb.0:
806; AVX512VL-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
807; AVX512VL-NEXT:    vpand %ymm3, %ymm0, %ymm0
808; AVX512VL-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
809; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1))
810; AVX512VL-NEXT:    retq
811;
812; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
813; XOP:       # %bb.0:
814; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
815; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
816; XOP-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
817; XOP-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
818; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
819; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
820; XOP-NEXT:    retq
821  %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
822  %c = icmp eq <32 x i8> %a, zeroinitializer
823  %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
824  ret <32 x i8> %r
825}
826
827define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
828; AVX1-LABEL: blend_splat_mask_cond_v4i64:
829; AVX1:       # %bb.0:
830; AVX1-NEXT:    vpsllq $62, %xmm0, %xmm3
831; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
832; AVX1-NEXT:    vpsllq $62, %xmm0, %xmm0
833; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
834; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
835; AVX1-NEXT:    retq
836;
837; AVX2-LABEL: blend_splat_mask_cond_v4i64:
838; AVX2:       # %bb.0:
839; AVX2-NEXT:    vpsllq $62, %ymm0, %ymm0
840; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
841; AVX2-NEXT:    retq
842;
843; AVX512F-LABEL: blend_splat_mask_cond_v4i64:
844; AVX512F:       # %bb.0:
845; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
846; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
847; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
848; AVX512F-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
849; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
850; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
851; AVX512F-NEXT:    retq
852;
853; AVX512VL-LABEL: blend_splat_mask_cond_v4i64:
854; AVX512VL:       # %bb.0:
855; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
856; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
857; AVX512VL-NEXT:    retq
858;
859; XOP-LABEL: blend_splat_mask_cond_v4i64:
860; XOP:       # %bb.0:
861; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
862; XOP-NEXT:    vpsllq $62, %xmm3, %xmm3
863; XOP-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
864; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
865; XOP-NEXT:    vpsllq $62, %xmm0, %xmm0
866; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
867; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
868; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
869; XOP-NEXT:    retq
870  %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
871  %c = icmp eq <4 x i64> %a, zeroinitializer
872  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
873  ret <4 x i64> %r
874}
875
876define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
877; AVX12-LABEL: blend_splat_mask_cond_v4i32:
878; AVX12:       # %bb.0:
879; AVX12-NEXT:    vpslld $15, %xmm0, %xmm0
880; AVX12-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
881; AVX12-NEXT:    retq
882;
883; AVX512F-LABEL: blend_splat_mask_cond_v4i32:
884; AVX512F:       # %bb.0:
885; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
886; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
887; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
888; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
889; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
890; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
891; AVX512F-NEXT:    vzeroupper
892; AVX512F-NEXT:    retq
893;
894; AVX512VL-LABEL: blend_splat_mask_cond_v4i32:
895; AVX512VL:       # %bb.0:
896; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
897; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
898; AVX512VL-NEXT:    retq
899;
900; XOP-LABEL: blend_splat_mask_cond_v4i32:
901; XOP:       # %bb.0:
902; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
903; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
904; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
905; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
906; XOP-NEXT:    retq
907  %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
908  %c = icmp eq <4 x i32> %a, zeroinitializer
909  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
910  ret <4 x i32> %r
911}
912
913define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
914; AVX1-LABEL: blend_splat_mask_cond_v16i16:
915; AVX1:       # %bb.0:
916; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm3
917; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
918; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
919; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
920; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
921; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
922; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
923; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
924; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
925; AVX1-NEXT:    retq
926;
927; AVX2-LABEL: blend_splat_mask_cond_v16i16:
928; AVX2:       # %bb.0:
929; AVX2-NEXT:    vpsllw $5, %ymm0, %ymm0
930; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
931; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
932; AVX2-NEXT:    retq
933;
934; AVX512F-LABEL: blend_splat_mask_cond_v16i16:
935; AVX512F:       # %bb.0:
936; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
937; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
938; AVX512F-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
939; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
940; AVX512F-NEXT:    retq
941;
942; AVX512VL-LABEL: blend_splat_mask_cond_v16i16:
943; AVX512VL:       # %bb.0:
944; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
945; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
946; AVX512VL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
947; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
948; AVX512VL-NEXT:    retq
949;
950; XOP-LABEL: blend_splat_mask_cond_v16i16:
951; XOP:       # %bb.0:
952; XOP-NEXT:    vpsllw $5, %xmm0, %xmm3
953; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
954; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
955; XOP-NEXT:    vpsllw $5, %xmm0, %xmm0
956; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
957; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
958; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
959; XOP-NEXT:    retq
960  %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
961  %c = icmp eq <16 x i16> %a, zeroinitializer
962  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
963  ret <16 x i16> %r
964}
965
966define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
967; AVX12-LABEL: blend_splat_mask_cond_v16i8:
968; AVX12:       # %bb.0:
969; AVX12-NEXT:    vpsllw $5, %xmm0, %xmm0
970; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
971; AVX12-NEXT:    retq
972;
973; AVX512F-LABEL: blend_splat_mask_cond_v16i8:
974; AVX512F:       # %bb.0:
975; AVX512F-NEXT:    vpbroadcastb {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
976; AVX512F-NEXT:    vpand %xmm3, %xmm0, %xmm0
977; AVX512F-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
978; AVX512F-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
979; AVX512F-NEXT:    retq
980;
981; AVX512VL-LABEL: blend_splat_mask_cond_v16i8:
982; AVX512VL:       # %bb.0:
983; AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
984; AVX512VL-NEXT:    vpand %xmm3, %xmm0, %xmm0
985; AVX512VL-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
986; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1))
987; AVX512VL-NEXT:    retq
988;
989; XOP-LABEL: blend_splat_mask_cond_v16i8:
990; XOP:       # %bb.0:
991; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
992; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
993; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
994; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
995; XOP-NEXT:    retq
996  %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
997  %c = icmp eq <16 x i8> %a, zeroinitializer
998  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
999  ret <16 x i8> %r
1000}
1001
1002define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
1003; AVX1-LABEL: blend_mask_cond_v2i64:
1004; AVX1:       # %bb.0:
1005; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm3 = [1,4]
1006; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
1007; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
1008; AVX1-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
1009; AVX1-NEXT:    retq
1010;
1011; AVX2-LABEL: blend_mask_cond_v2i64:
1012; AVX2:       # %bb.0:
1013; AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1014; AVX2-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
1015; AVX2-NEXT:    retq
1016;
1017; AVX512F-LABEL: blend_mask_cond_v2i64:
1018; AVX512F:       # %bb.0:
1019; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
1020; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1021; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1022; AVX512F-NEXT:    vpmovsxbq {{.*#+}} xmm3 = [1,4]
1023; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
1024; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
1025; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1026; AVX512F-NEXT:    vzeroupper
1027; AVX512F-NEXT:    retq
1028;
1029; AVX512VL-LABEL: blend_mask_cond_v2i64:
1030; AVX512VL:       # %bb.0:
1031; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
1032; AVX512VL-NEXT:    vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
1033; AVX512VL-NEXT:    retq
1034;
1035; XOP-LABEL: blend_mask_cond_v2i64:
1036; XOP:       # %bb.0:
1037; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1038; XOP-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
1039; XOP-NEXT:    retq
1040  %a = and <2 x i64> %x, <i64 1, i64 4>
1041  %c = icmp eq <2 x i64> %a, zeroinitializer
1042  %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
1043  ret <2 x i64> %r
1044}
1045
1046define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
1047; AVX1-LABEL: blend_mask_cond_v4i32:
1048; AVX1:       # %bb.0:
1049; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1050; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1051; AVX1-NEXT:    retq
1052;
1053; AVX2-LABEL: blend_mask_cond_v4i32:
1054; AVX2:       # %bb.0:
1055; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1056; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1057; AVX2-NEXT:    retq
1058;
1059; AVX512F-LABEL: blend_mask_cond_v4i32:
1060; AVX512F:       # %bb.0:
1061; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
1062; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1063; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1064; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [65536,512,2,1]
1065; AVX512F-NEXT:    vptestnmd %zmm3, %zmm0, %k1
1066; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
1067; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1068; AVX512F-NEXT:    vzeroupper
1069; AVX512F-NEXT:    retq
1070;
1071; AVX512VL-LABEL: blend_mask_cond_v4i32:
1072; AVX512VL:       # %bb.0:
1073; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
1074; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
1075; AVX512VL-NEXT:    retq
1076;
1077; XOP-LABEL: blend_mask_cond_v4i32:
1078; XOP:       # %bb.0:
1079; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1080; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1081; XOP-NEXT:    retq
1082  %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
1083  %c = icmp eq <4 x i32> %a, zeroinitializer
1084  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
1085  ret <4 x i32> %r
1086}
1087
1088define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
1089; AVX12-LABEL: blend_mask_cond_v8i16:
1090; AVX12:       # %bb.0:
1091; AVX12-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1092; AVX12-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1093; AVX12-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
1094; AVX12-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1095; AVX12-NEXT:    retq
1096;
1097; AVX512F-LABEL: blend_mask_cond_v8i16:
1098; AVX512F:       # %bb.0:
1099; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1100; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1101; AVX512F-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
1102; AVX512F-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1103; AVX512F-NEXT:    retq
1104;
1105; AVX512VL-LABEL: blend_mask_cond_v8i16:
1106; AVX512VL:       # %bb.0:
1107; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1108; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1109; AVX512VL-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
1110; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
1111; AVX512VL-NEXT:    retq
1112;
1113; XOP-LABEL: blend_mask_cond_v8i16:
1114; XOP:       # %bb.0:
1115; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1116; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1117; XOP-NEXT:    vpcomltw %xmm3, %xmm0, %xmm0
1118; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1119; XOP-NEXT:    retq
1120  %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096>
1121  %c = icmp eq <8 x i16> %a, zeroinitializer
1122  %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
1123  ret <8 x i16> %r
1124}
1125
1126define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
1127; AVX12-LABEL: blend_mask_cond_v16i8:
1128; AVX12:       # %bb.0:
1129; AVX12-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2]
1130; AVX12-NEXT:    vpand %xmm3, %xmm0, %xmm0
1131; AVX12-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
1132; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1133; AVX12-NEXT:    retq
1134;
1135; AVX512F-LABEL: blend_mask_cond_v16i8:
1136; AVX512F:       # %bb.0:
1137; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2]
1138; AVX512F-NEXT:    vpand %xmm3, %xmm0, %xmm0
1139; AVX512F-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
1140; AVX512F-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1141; AVX512F-NEXT:    retq
1142;
1143; AVX512VL-LABEL: blend_mask_cond_v16i8:
1144; AVX512VL:       # %bb.0:
1145; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2]
1146; AVX512VL-NEXT:    vpand %xmm3, %xmm0, %xmm0
1147; AVX512VL-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
1148; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1))
1149; AVX512VL-NEXT:    retq
1150;
1151; XOP-LABEL: blend_mask_cond_v16i8:
1152; XOP:       # %bb.0:
1153; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1154; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1155; XOP-NEXT:    retq
1156  %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
1157  %c = icmp eq <16 x i8> %a, zeroinitializer
1158  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
1159  ret <16 x i8> %r
1160}
1161
1162define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
1163; AVX1-LABEL: blend_mask_cond_v4i64:
1164; AVX1:       # %bb.0:
1165; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1166; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1167; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1168; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm3, %xmm3
1169; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm0, %xmm0
1170; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1171; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
1172; AVX1-NEXT:    retq
1173;
1174; AVX2-LABEL: blend_mask_cond_v4i64:
1175; AVX2:       # %bb.0:
1176; AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1177; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1178; AVX2-NEXT:    retq
1179;
1180; AVX512F-LABEL: blend_mask_cond_v4i64:
1181; AVX512F:       # %bb.0:
1182; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1183; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1184; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1185; AVX512F-NEXT:    vpmovzxwq {{.*#+}} ymm3 = [2,4,32768,1]
1186; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
1187; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
1188; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1189; AVX512F-NEXT:    retq
1190;
1191; AVX512VL-LABEL: blend_mask_cond_v4i64:
1192; AVX512VL:       # %bb.0:
1193; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1194; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
1195; AVX512VL-NEXT:    retq
1196;
1197; XOP-LABEL: blend_mask_cond_v4i64:
1198; XOP:       # %bb.0:
1199; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1200; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
1201; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1202; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1203; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1204; XOP-NEXT:    retq
1205  %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1>
1206  %c = icmp eq <4 x i64> %a, zeroinitializer
1207  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
1208  ret <4 x i64> %r
1209}
1210
1211define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
1212; AVX1-LABEL: blend_mask_cond_v8i32:
1213; AVX1:       # %bb.0:
1214; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1215; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1216; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1217; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1218; AVX1-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1219; AVX1-NEXT:    retq
1220;
1221; AVX2-LABEL: blend_mask_cond_v8i32:
1222; AVX2:       # %bb.0:
1223; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1224; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1225; AVX2-NEXT:    retq
1226;
1227; AVX512F-LABEL: blend_mask_cond_v8i32:
1228; AVX512F:       # %bb.0:
1229; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1230; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1231; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1232; AVX512F-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096]
1233; AVX512F-NEXT:    vptestnmd %zmm3, %zmm0, %k1
1234; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
1235; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1236; AVX512F-NEXT:    retq
1237;
1238; AVX512VL-LABEL: blend_mask_cond_v8i32:
1239; AVX512VL:       # %bb.0:
1240; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1241; AVX512VL-NEXT:    vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
1242; AVX512VL-NEXT:    retq
1243;
1244; XOP-LABEL: blend_mask_cond_v8i32:
1245; XOP:       # %bb.0:
1246; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1247; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
1248; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1249; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1250; XOP-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1251; XOP-NEXT:    retq
1252  %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096>
1253  %c = icmp eq <8 x i32> %a, zeroinitializer
1254  %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
1255  ret <8 x i32> %r
1256}
1257
1258define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
1259; AVX1-LABEL: blend_mask_cond_v16i16:
1260; AVX1:       # %bb.0:
1261; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1262; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1263; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1264; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm3
1265; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
1266; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1267; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
1268; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1269; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
1270; AVX1-NEXT:    retq
1271;
1272; AVX2-LABEL: blend_mask_cond_v16i16:
1273; AVX2:       # %bb.0:
1274; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1275; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1276; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
1277; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1278; AVX2-NEXT:    retq
1279;
1280; AVX512F-LABEL: blend_mask_cond_v16i16:
1281; AVX512F:       # %bb.0:
1282; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1283; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1284; AVX512F-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
1285; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1286; AVX512F-NEXT:    retq
1287;
1288; AVX512VL-LABEL: blend_mask_cond_v16i16:
1289; AVX512VL:       # %bb.0:
1290; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1291; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1292; AVX512VL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
1293; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
1294; AVX512VL-NEXT:    retq
1295;
1296; XOP-LABEL: blend_mask_cond_v16i16:
1297; XOP:       # %bb.0:
1298; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
1299; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1300; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1301; XOP-NEXT:    vpcomltw %xmm4, %xmm3, %xmm3
1302; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1303; XOP-NEXT:    vpcomltw %xmm4, %xmm0, %xmm0
1304; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1305; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1306; XOP-NEXT:    retq
1307  %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
1308  %c = icmp eq <16 x i16> %a, zeroinitializer
1309  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
1310  ret <16 x i16> %r
1311}
1312
1313define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
1314; AVX1-LABEL: blend_mask_cond_v32i8:
1315; AVX1:       # %bb.0:
1316; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1317; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1318; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1319; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
1320; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
1321; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1322; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
1323; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1324; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
1325; AVX1-NEXT:    retq
1326;
1327; AVX2-LABEL: blend_mask_cond_v32i8:
1328; AVX2:       # %bb.0:
1329; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16]
1330; AVX2-NEXT:    vpand %ymm3, %ymm0, %ymm0
1331; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
1332; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
1333; AVX2-NEXT:    retq
1334;
1335; AVX512F-LABEL: blend_mask_cond_v32i8:
1336; AVX512F:       # %bb.0:
1337; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16]
1338; AVX512F-NEXT:    vpand %ymm3, %ymm0, %ymm0
1339; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
1340; AVX512F-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
1341; AVX512F-NEXT:    retq
1342;
1343; AVX512VL-LABEL: blend_mask_cond_v32i8:
1344; AVX512VL:       # %bb.0:
1345; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16]
1346; AVX512VL-NEXT:    vpand %ymm3, %ymm0, %ymm0
1347; AVX512VL-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
1348; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1))
1349; AVX512VL-NEXT:    retq
1350;
1351; XOP-LABEL: blend_mask_cond_v32i8:
1352; XOP:       # %bb.0:
1353; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
1354; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1355; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1356; XOP-NEXT:    vpcomltb %xmm4, %xmm3, %xmm3
1357; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1358; XOP-NEXT:    vpcomltb %xmm4, %xmm0, %xmm0
1359; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1360; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1361; XOP-NEXT:    retq
1362  %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16>
1363  %c = icmp eq <32 x i8> %a, zeroinitializer
1364  %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
1365  ret <32 x i8> %r
1366}
1367
1368define void @store_blend_load_v4i64(ptr %a0, ptr %a1, ptr %a2) {
1369; AVX1-LABEL: store_blend_load_v4i64:
1370; AVX1:       # %bb.0:
1371; AVX1-NEXT:    vmovapd (%rsi), %ymm0
1372; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
1373; AVX1-NEXT:    # xmm1 = mem[0,0]
1374; AVX1-NEXT:    vpxor 16(%rdi), %xmm1, %xmm2
1375; AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775815,9223372036854775815]
1376; AVX1-NEXT:    # xmm3 = mem[0,0]
1377; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1378; AVX1-NEXT:    vpxor (%rdi), %xmm1, %xmm1
1379; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
1380; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1381; AVX1-NEXT:    vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
1382; AVX1-NEXT:    vmovapd %ymm0, (%rdx)
1383; AVX1-NEXT:    vzeroupper
1384; AVX1-NEXT:    retq
1385;
1386; AVX2-LABEL: store_blend_load_v4i64:
1387; AVX2:       # %bb.0:
1388; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
1389; AVX2-NEXT:    vmovapd (%rsi), %ymm1
1390; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1391; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
1392; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [9223372036854775815,9223372036854775815,9223372036854775815,9223372036854775815]
1393; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
1394; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1395; AVX2-NEXT:    vmovapd %ymm0, (%rdx)
1396; AVX2-NEXT:    vzeroupper
1397; AVX2-NEXT:    retq
1398;
1399; AVX512F-LABEL: store_blend_load_v4i64:
1400; AVX512F:       # %bb.0:
1401; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
1402; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
1403; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
1404; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
1405; AVX512F-NEXT:    vmovdqa %ymm1, (%rdx)
1406; AVX512F-NEXT:    vzeroupper
1407; AVX512F-NEXT:    retq
1408;
1409; AVX512VL-LABEL: store_blend_load_v4i64:
1410; AVX512VL:       # %bb.0:
1411; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
1412; AVX512VL-NEXT:    vmovdqa (%rsi), %ymm1
1413; AVX512VL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
1414; AVX512VL-NEXT:    vmovdqa64 %ymm0, %ymm1 {%k1}
1415; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
1416; AVX512VL-NEXT:    vzeroupper
1417; AVX512VL-NEXT:    retq
1418;
1419; XOP-LABEL: store_blend_load_v4i64:
1420; XOP:       # %bb.0:
1421; XOP-NEXT:    vmovapd (%rsi), %ymm0
1422; XOP-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [7,7]
1423; XOP-NEXT:    vpcomltuq 16(%rdi), %xmm1, %xmm2
1424; XOP-NEXT:    vpcomltuq (%rdi), %xmm1, %xmm1
1425; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1426; XOP-NEXT:    vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
1427; XOP-NEXT:    vmovapd %ymm0, (%rdx)
1428; XOP-NEXT:    vzeroupper
1429; XOP-NEXT:    retq
1430  %v0 = load <4 x i64>, ptr %a0
1431  %v1 = load <4 x i64>, ptr %a1
1432  %cmp = icmp ugt <4 x i64> %v0, <i64 7, i64 7, i64 7, i64 7>
1433  %res = select <4 x i1> %cmp, <4 x i64> %v0, <4 x i64> %v1
1434  store <4 x i64> %res, ptr %a2
1435  ret void
1436}
1437
1438define void @store_blend_load_v8i32(ptr %a0, ptr %a1, ptr %a2) {
1439; AVX1-LABEL: store_blend_load_v8i32:
1440; AVX1:       # %bb.0:
1441; AVX1-NEXT:    vmovaps (%rsi), %ymm0
1442; AVX1-NEXT:    vmovdqa (%rdi), %xmm1
1443; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm2
1444; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [8,8,8,8]
1445; AVX1-NEXT:    vpmaxud %xmm3, %xmm2, %xmm4
1446; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm2, %xmm2
1447; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm3
1448; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
1449; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1450; AVX1-NEXT:    vblendvps %ymm1, (%rdi), %ymm0, %ymm0
1451; AVX1-NEXT:    vmovaps %ymm0, (%rdx)
1452; AVX1-NEXT:    vzeroupper
1453; AVX1-NEXT:    retq
1454;
1455; AVX2-LABEL: store_blend_load_v8i32:
1456; AVX2:       # %bb.0:
1457; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
1458; AVX2-NEXT:    vmovaps (%rsi), %ymm1
1459; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8]
1460; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm2
1461; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm2
1462; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1463; AVX2-NEXT:    vmovaps %ymm0, (%rdx)
1464; AVX2-NEXT:    vzeroupper
1465; AVX2-NEXT:    retq
1466;
1467; AVX512F-LABEL: store_blend_load_v8i32:
1468; AVX512F:       # %bb.0:
1469; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
1470; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
1471; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
1472; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
1473; AVX512F-NEXT:    vmovdqa %ymm1, (%rdx)
1474; AVX512F-NEXT:    vzeroupper
1475; AVX512F-NEXT:    retq
1476;
1477; AVX512VL-LABEL: store_blend_load_v8i32:
1478; AVX512VL:       # %bb.0:
1479; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
1480; AVX512VL-NEXT:    vmovdqa (%rsi), %ymm1
1481; AVX512VL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
1482; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1}
1483; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
1484; AVX512VL-NEXT:    vzeroupper
1485; AVX512VL-NEXT:    retq
1486;
1487; XOP-LABEL: store_blend_load_v8i32:
1488; XOP:       # %bb.0:
1489; XOP-NEXT:    vmovaps (%rsi), %ymm0
1490; XOP-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
1491; XOP-NEXT:    vpcomltud 16(%rdi), %xmm1, %xmm2
1492; XOP-NEXT:    vpcomltud (%rdi), %xmm1, %xmm1
1493; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1494; XOP-NEXT:    vblendvps %ymm1, (%rdi), %ymm0, %ymm0
1495; XOP-NEXT:    vmovaps %ymm0, (%rdx)
1496; XOP-NEXT:    vzeroupper
1497; XOP-NEXT:    retq
1498  %v0 = load <8 x i32>, ptr %a0
1499  %v1 = load <8 x i32>, ptr %a1
1500  %cmp = icmp ugt <8 x i32> %v0, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
1501  %res = select <8 x i1> %cmp, <8 x i32> %v0, <8 x i32> %v1
1502  store <8 x i32> %res, ptr %a2
1503  ret void
1504}
1505
1506define void @store_blend_load_v16i16(ptr %a0, ptr %a1, ptr %a2) {
1507; AVX1-LABEL: store_blend_load_v16i16:
1508; AVX1:       # %bb.0:
1509; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
1510; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
1511; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8]
1512; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm3
1513; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm3
1514; AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm2
1515; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm2
1516; AVX1-NEXT:    vmovdqa (%rsi), %xmm4
1517; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm5
1518; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
1519; AVX1-NEXT:    vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
1520; AVX1-NEXT:    vmovdqa %xmm0, (%rdx)
1521; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdx)
1522; AVX1-NEXT:    retq
1523;
1524; AVX2-LABEL: store_blend_load_v16i16:
1525; AVX2:       # %bb.0:
1526; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
1527; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
1528; AVX2-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1529; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
1530; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1531; AVX2-NEXT:    vmovdqa %ymm0, (%rdx)
1532; AVX2-NEXT:    vzeroupper
1533; AVX2-NEXT:    retq
1534;
1535; AVX512F-LABEL: store_blend_load_v16i16:
1536; AVX512F:       # %bb.0:
1537; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
1538; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
1539; AVX512F-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1540; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
1541; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1542; AVX512F-NEXT:    vmovdqa %ymm0, (%rdx)
1543; AVX512F-NEXT:    vzeroupper
1544; AVX512F-NEXT:    retq
1545;
1546; AVX512VL-LABEL: store_blend_load_v16i16:
1547; AVX512VL:       # %bb.0:
1548; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
1549; AVX512VL-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1550; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm1
1551; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem))
1552; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
1553; AVX512VL-NEXT:    vzeroupper
1554; AVX512VL-NEXT:    retq
1555;
1556; XOP-LABEL: store_blend_load_v16i16:
1557; XOP:       # %bb.0:
1558; XOP-NEXT:    vmovdqa (%rdi), %ymm0
1559; XOP-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
1560; XOP-NEXT:    vpcomltuw 16(%rdi), %xmm1, %xmm2
1561; XOP-NEXT:    vpcomltuw (%rdi), %xmm1, %xmm1
1562; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1563; XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
1564; XOP-NEXT:    vmovdqa %ymm0, (%rdx)
1565; XOP-NEXT:    vzeroupper
1566; XOP-NEXT:    retq
1567  %v0 = load <16 x i16>, ptr %a0
1568  %v1 = load <16 x i16>, ptr %a1
1569  %cmp = icmp ugt <16 x i16> %v0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1570  %res = select <16 x i1> %cmp, <16 x i16> %v0, <16 x i16> %v1
1571  store <16 x i16> %res, ptr %a2
1572  ret void
1573}
1574
1575define void @store_blend_load_v32i8(ptr %a0, ptr %a1, ptr %a2) {
1576; AVX1-LABEL: store_blend_load_v32i8:
1577; AVX1:       # %bb.0:
1578; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
1579; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
1580; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1581; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm3
1582; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm3
1583; AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm2
1584; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
1585; AVX1-NEXT:    vmovdqa (%rsi), %xmm4
1586; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm5
1587; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
1588; AVX1-NEXT:    vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
1589; AVX1-NEXT:    vmovdqa %xmm0, (%rdx)
1590; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdx)
1591; AVX1-NEXT:    retq
1592;
1593; AVX2-LABEL: store_blend_load_v32i8:
1594; AVX2:       # %bb.0:
1595; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
1596; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
1597; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1598; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
1599; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1600; AVX2-NEXT:    vmovdqa %ymm0, (%rdx)
1601; AVX2-NEXT:    vzeroupper
1602; AVX2-NEXT:    retq
1603;
1604; AVX512F-LABEL: store_blend_load_v32i8:
1605; AVX512F:       # %bb.0:
1606; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
1607; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
1608; AVX512F-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1609; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
1610; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1611; AVX512F-NEXT:    vmovdqa %ymm0, (%rdx)
1612; AVX512F-NEXT:    vzeroupper
1613; AVX512F-NEXT:    retq
1614;
1615; AVX512VL-LABEL: store_blend_load_v32i8:
1616; AVX512VL:       # %bb.0:
1617; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
1618; AVX512VL-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1619; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1620; AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem))
1621; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
1622; AVX512VL-NEXT:    vzeroupper
1623; AVX512VL-NEXT:    retq
1624;
1625; XOP-LABEL: store_blend_load_v32i8:
1626; XOP:       # %bb.0:
1627; XOP-NEXT:    vmovdqa (%rdi), %ymm0
1628; XOP-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1629; XOP-NEXT:    vpcomltub 16(%rdi), %xmm1, %xmm2
1630; XOP-NEXT:    vpcomltub (%rdi), %xmm1, %xmm1
1631; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1632; XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
1633; XOP-NEXT:    vmovdqa %ymm0, (%rdx)
1634; XOP-NEXT:    vzeroupper
1635; XOP-NEXT:    retq
1636  %v0 = load <32 x i8>, ptr %a0
1637  %v1 = load <32 x i8>, ptr %a1
1638  %cmp = icmp ugt <32 x i8> %v0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
1639  %res = select <32 x i1> %cmp, <32 x i8> %v0, <32 x i8> %v1
1640  store <32 x i8> %res, ptr %a2
1641  ret void
1642}
1643
1644define void @PR46531(ptr %x, ptr %y, ptr %z) {
1645; AVX12-LABEL: PR46531:
1646; AVX12:       # %bb.0:
1647; AVX12-NEXT:    vmovdqu (%rsi), %xmm0
1648; AVX12-NEXT:    vmovdqu (%rdx), %xmm1
1649; AVX12-NEXT:    vpor %xmm0, %xmm1, %xmm2
1650; AVX12-NEXT:    vpxor %xmm0, %xmm1, %xmm0
1651; AVX12-NEXT:    vpslld $31, %xmm1, %xmm1
1652; AVX12-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
1653; AVX12-NEXT:    vmovups %xmm0, (%rdi)
1654; AVX12-NEXT:    retq
1655;
1656; AVX512F-LABEL: PR46531:
1657; AVX512F:       # %bb.0:
1658; AVX512F-NEXT:    vmovdqu (%rsi), %xmm0
1659; AVX512F-NEXT:    vmovdqu (%rdx), %xmm1
1660; AVX512F-NEXT:    vpor %xmm0, %xmm1, %xmm2
1661; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
1662; AVX512F-NEXT:    vpxor %xmm0, %xmm1, %xmm0
1663; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1}
1664; AVX512F-NEXT:    vmovdqu %xmm0, (%rdi)
1665; AVX512F-NEXT:    vzeroupper
1666; AVX512F-NEXT:    retq
1667;
1668; AVX512VL-LABEL: PR46531:
1669; AVX512VL:       # %bb.0:
1670; AVX512VL-NEXT:    vmovdqu (%rsi), %xmm0
1671; AVX512VL-NEXT:    vmovdqu (%rdx), %xmm1
1672; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
1673; AVX512VL-NEXT:    vpxor %xmm0, %xmm1, %xmm2
1674; AVX512VL-NEXT:    vpord %xmm0, %xmm1, %xmm2 {%k1}
1675; AVX512VL-NEXT:    vmovdqu %xmm2, (%rdi)
1676; AVX512VL-NEXT:    retq
1677;
1678; XOP-LABEL: PR46531:
1679; XOP:       # %bb.0:
1680; XOP-NEXT:    vmovdqu (%rsi), %xmm0
1681; XOP-NEXT:    vmovdqu (%rdx), %xmm1
1682; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm2
1683; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
1684; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1685; XOP-NEXT:    vpcomneqd %xmm4, %xmm3, %xmm3
1686; XOP-NEXT:    vpxor %xmm0, %xmm1, %xmm0
1687; XOP-NEXT:    vblendvps %xmm3, %xmm0, %xmm2, %xmm0
1688; XOP-NEXT:    vmovups %xmm0, (%rdi)
1689; XOP-NEXT:    retq
1690  %a = load <4 x i32>, ptr %y, align 4
1691  %b = load <4 x i32>, ptr %z, align 4
1692  %or = or <4 x i32> %b, %a
1693  %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1>
1694  %cmp = icmp eq <4 x i32> %and, zeroinitializer
1695  %xor = xor <4 x i32> %b, %a
1696  %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor
1697  store <4 x i32> %sel, ptr %x, align 4
1698  ret void
1699}
1700
1701define <64 x i8> @PR110875(<32 x i8> %a0, <32 x i8> %a1, i64 %a2) {
1702; AVX1-LABEL: PR110875:
1703; AVX1:       # %bb.0:
1704; AVX1-NEXT:    vmovq %rdi, %xmm2
1705; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1706; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,4,5,5]
1707; AVX1-NEXT:    vpshufhw {{.*#+}} xmm4 = xmm2[0,1,2,3,6,6,7,7]
1708; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
1709; AVX1-NEXT:    vshufps {{.*#+}} ymm3 = ymm3[2,2,3,3,6,6,7,7]
1710; AVX1-NEXT:    vpshuflw {{.*#+}} xmm4 = xmm2[0,0,1,1,4,5,6,7]
1711; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[2,2,3,3,4,5,6,7]
1712; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
1713; AVX1-NEXT:    vshufps {{.*#+}} ymm2 = ymm2[0,0,1,1,4,4,5,5]
1714; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1715; AVX1-NEXT:    vandps %ymm4, %ymm2, %ymm2
1716; AVX1-NEXT:    vandps %ymm4, %ymm3, %ymm3
1717; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
1718; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
1719; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm4, %xmm4
1720; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm3, %xmm3
1721; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
1722; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
1723; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm4, %xmm4
1724; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm2, %xmm2
1725; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
1726; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm4 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20]
1727; AVX1-NEXT:    vandnps %ymm4, %ymm2, %ymm5
1728; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
1729; AVX1-NEXT:    vorps %ymm5, %ymm0, %ymm0
1730; AVX1-NEXT:    vandnps %ymm4, %ymm3, %ymm2
1731; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
1732; AVX1-NEXT:    vorps %ymm2, %ymm1, %ymm1
1733; AVX1-NEXT:    retq
1734;
1735; AVX2-LABEL: PR110875:
1736; AVX2:       # %bb.0:
1737; AVX2-NEXT:    vmovq %rdi, %xmm2
1738; AVX2-NEXT:    vpbroadcastq %xmm2, %ymm2
1739; AVX2-NEXT:    vpshufb {{.*#+}} ymm3 = ymm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
1740; AVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm2[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
1741; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1742; AVX2-NEXT:    vpand %ymm4, %ymm2, %ymm2
1743; AVX2-NEXT:    vpand %ymm4, %ymm3, %ymm3
1744; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm3, %ymm3
1745; AVX2-NEXT:    vpbroadcastb {{.*#+}} ymm5 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20]
1746; AVX2-NEXT:    vpblendvb %ymm3, %ymm5, %ymm0, %ymm0
1747; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm2, %ymm2
1748; AVX2-NEXT:    vpblendvb %ymm2, %ymm5, %ymm1, %ymm1
1749; AVX2-NEXT:    retq
1750;
1751; AVX512F-LABEL: PR110875:
1752; AVX512F:       # %bb.0:
1753; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1754; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm1
1755; AVX512F-NEXT:    vmovq %rdi, %xmm0
1756; AVX512F-NEXT:    vpbroadcastq %xmm0, %ymm0
1757; AVX512F-NEXT:    vpshufb {{.*#+}} ymm2 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
1758; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
1759; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1760; AVX512F-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1761; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1762; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1763; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm2, %ymm2
1764; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
1765; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1766; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem))
1767; AVX512F-NEXT:    retq
1768;
1769; AVX512VL-LABEL: PR110875:
1770; AVX512VL:       # %bb.0:
1771; AVX512VL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1772; AVX512VL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm1
1773; AVX512VL-NEXT:    vpbroadcastq %rdi, %ymm0
1774; AVX512VL-NEXT:    vpshufb {{.*#+}} ymm2 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
1775; AVX512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
1776; AVX512VL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1777; AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1778; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1779; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1780; AVX512VL-NEXT:    vpcmpeqb %ymm3, %ymm2, %ymm2
1781; AVX512VL-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
1782; AVX512VL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1783; AVX512VL-NEXT:    vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem))
1784; AVX512VL-NEXT:    retq
1785;
1786; XOP-LABEL: PR110875:
1787; XOP:       # %bb.0:
1788; XOP-NEXT:    vmovq %rdi, %xmm2
1789; XOP-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1790; XOP-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,4,5,5]
1791; XOP-NEXT:    vpshufhw {{.*#+}} xmm4 = xmm2[0,1,2,3,6,6,7,7]
1792; XOP-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
1793; XOP-NEXT:    vshufps {{.*#+}} ymm3 = ymm3[2,2,3,3,6,6,7,7]
1794; XOP-NEXT:    vpshuflw {{.*#+}} xmm4 = xmm2[0,0,1,1,4,5,6,7]
1795; XOP-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[2,2,3,3,4,5,6,7]
1796; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
1797; XOP-NEXT:    vshufps {{.*#+}} ymm2 = ymm2[0,0,1,1,4,4,5,5]
1798; XOP-NEXT:    vbroadcastsd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1799; XOP-NEXT:    vandps %ymm4, %ymm2, %ymm2
1800; XOP-NEXT:    vandps %ymm4, %ymm3, %ymm3
1801; XOP-NEXT:    vextractf128 $1, %ymm3, %xmm4
1802; XOP-NEXT:    vpxor %xmm5, %xmm5, %xmm5
1803; XOP-NEXT:    vpcomeqb %xmm5, %xmm4, %xmm4
1804; XOP-NEXT:    vpcomeqb %xmm5, %xmm3, %xmm3
1805; XOP-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
1806; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm4
1807; XOP-NEXT:    vpcomeqb %xmm5, %xmm4, %xmm4
1808; XOP-NEXT:    vpcomeqb %xmm5, %xmm2, %xmm2
1809; XOP-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
1810; XOP-NEXT:    vbroadcastss {{.*#+}} ymm4 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20]
1811; XOP-NEXT:    vpcmov %ymm2, %ymm4, %ymm0, %ymm0
1812; XOP-NEXT:    vpcmov %ymm3, %ymm4, %ymm1, %ymm1
1813; XOP-NEXT:    retq
1814  %concat = shufflevector <32 x i8> %a0, <32 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1815  %scl = insertelement <1 x i64> poison, i64 %a2, i64 0
1816  %splat = shufflevector <1 x i64> %scl, <1 x i64> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison>
1817  %ref = bitcast <8 x i64> %splat to <64 x i8>
1818  %shuf = shufflevector <64 x i8> %ref, <64 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55>
1819  %mask = and <64 x i8> %shuf, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
1820  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1821  %res = select <64 x i1> %cmp, <64 x i8> %concat, <64 x i8> <i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20>
1822  ret <64 x i8> %res
1823}
1824
1825attributes #0 = { "no-nans-fp-math"="true" }
1826;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1827; AVX: {{.*}}
1828