xref: /llvm-project/llvm/test/CodeGen/X86/combine-bitselect.ll (revision b5d35feacb7246573c6a4ab2bddc4919a4228ed5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop  | FileCheck %s --check-prefix=XOP
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
8
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @bitselect_v2i64_rr(<2 x i64>, <2 x i64>) {
14; SSE-LABEL: bitselect_v2i64_rr:
15; SSE:       # %bb.0:
16; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18; SSE-NEXT:    orps %xmm1, %xmm0
19; SSE-NEXT:    retq
20;
21; XOP-LABEL: bitselect_v2i64_rr:
22; XOP:       # %bb.0:
23; XOP-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1, %xmm0
24; XOP-NEXT:    retq
25;
26; AVX-LABEL: bitselect_v2i64_rr:
27; AVX:       # %bb.0:
28; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
30; AVX-NEXT:    vorps %xmm0, %xmm1, %xmm0
31; AVX-NEXT:    retq
32;
33; AVX512F-LABEL: bitselect_v2i64_rr:
34; AVX512F:       # %bb.0:
35; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
36; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37; AVX512F-NEXT:    vpmovsxbd {{.*#+}} xmm2 = [4294967295,4294967294,4294967293,4294967292]
38; AVX512F-NEXT:    vpternlogq $216, %zmm2, %zmm1, %zmm0
39; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
40; AVX512F-NEXT:    vzeroupper
41; AVX512F-NEXT:    retq
42;
43; AVX512VL-LABEL: bitselect_v2i64_rr:
44; AVX512VL:       # %bb.0:
45; AVX512VL-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
46; AVX512VL-NEXT:    retq
47  %3 = and <2 x i64> %0, <i64 4294967296, i64 12884901890>
48  %4 = and <2 x i64> %1, <i64 -4294967297, i64 -12884901891>
49  %5 = or <2 x i64> %4, %3
50  ret <2 x i64> %5
51}
52
53define <2 x i64> @bitselect_v2i64_rm(<2 x i64>, ptr nocapture readonly) {
54; SSE-LABEL: bitselect_v2i64_rm:
55; SSE:       # %bb.0:
56; SSE-NEXT:    movaps (%rdi), %xmm1
57; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
58; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
59; SSE-NEXT:    orps %xmm1, %xmm0
60; SSE-NEXT:    retq
61;
62; XOP-LABEL: bitselect_v2i64_rm:
63; XOP:       # %bb.0:
64; XOP-NEXT:    vmovdqa (%rdi), %xmm1
65; XOP-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1, %xmm0
66; XOP-NEXT:    retq
67;
68; AVX-LABEL: bitselect_v2i64_rm:
69; AVX:       # %bb.0:
70; AVX-NEXT:    vmovaps (%rdi), %xmm1
71; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
72; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
73; AVX-NEXT:    vorps %xmm0, %xmm1, %xmm0
74; AVX-NEXT:    retq
75;
76; AVX512F-LABEL: bitselect_v2i64_rm:
77; AVX512F:       # %bb.0:
78; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
79; AVX512F-NEXT:    vmovdqa (%rdi), %xmm1
80; AVX512F-NEXT:    vpmovsxbd {{.*#+}} xmm2 = [4294967294,4294967293,4294967292,4294967295]
81; AVX512F-NEXT:    vpternlogq $184, %zmm1, %zmm2, %zmm0
82; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
83; AVX512F-NEXT:    vzeroupper
84; AVX512F-NEXT:    retq
85;
86; AVX512VL-LABEL: bitselect_v2i64_rm:
87; AVX512VL:       # %bb.0:
88; AVX512VL-NEXT:    vmovdqa (%rdi), %xmm1
89; AVX512VL-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
90; AVX512VL-NEXT:    retq
91  %3 = load <2 x i64>, ptr %1
92  %4 = and <2 x i64> %0, <i64 8589934593, i64 3>
93  %5 = and <2 x i64> %3, <i64 -8589934594, i64 -4>
94  %6 = or <2 x i64> %5, %4
95  ret <2 x i64> %6
96}
97
98define <2 x i64> @bitselect_v2i64_mr(ptr nocapture readonly, <2 x i64>) {
99; SSE-LABEL: bitselect_v2i64_mr:
100; SSE:       # %bb.0:
101; SSE-NEXT:    movaps (%rdi), %xmm1
102; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
103; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
104; SSE-NEXT:    orps %xmm1, %xmm0
105; SSE-NEXT:    retq
106;
107; XOP-LABEL: bitselect_v2i64_mr:
108; XOP:       # %bb.0:
109; XOP-NEXT:    vmovdqa (%rdi), %xmm1
110; XOP-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1, %xmm0
111; XOP-NEXT:    retq
112;
113; AVX-LABEL: bitselect_v2i64_mr:
114; AVX:       # %bb.0:
115; AVX-NEXT:    vmovaps (%rdi), %xmm1
116; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
117; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
118; AVX-NEXT:    vorps %xmm0, %xmm1, %xmm0
119; AVX-NEXT:    retq
120;
121; AVX512F-LABEL: bitselect_v2i64_mr:
122; AVX512F:       # %bb.0:
123; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
124; AVX512F-NEXT:    vmovdqa (%rdi), %xmm1
125; AVX512F-NEXT:    vpmovsxbd {{.*#+}} xmm2 = [2,3,0,1]
126; AVX512F-NEXT:    vpternlogq $184, %zmm1, %zmm2, %zmm0
127; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
128; AVX512F-NEXT:    vzeroupper
129; AVX512F-NEXT:    retq
130;
131; AVX512VL-LABEL: bitselect_v2i64_mr:
132; AVX512VL:       # %bb.0:
133; AVX512VL-NEXT:    vmovdqa (%rdi), %xmm1
134; AVX512VL-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
135; AVX512VL-NEXT:    retq
136  %3 = load <2 x i64>, ptr %0
137  %4 = and <2 x i64> %3, <i64 12884901890, i64 4294967296>
138  %5 = and <2 x i64> %1, <i64 -12884901891, i64 -4294967297>
139  %6 = or <2 x i64> %4, %5
140  ret <2 x i64> %6
141}
142
143define <2 x i64> @bitselect_v2i64_mm(ptr nocapture readonly, ptr nocapture readonly) {
144; SSE-LABEL: bitselect_v2i64_mm:
145; SSE:       # %bb.0:
146; SSE-NEXT:    movaps (%rdi), %xmm1
147; SSE-NEXT:    movaps (%rsi), %xmm0
148; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
149; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
150; SSE-NEXT:    orps %xmm1, %xmm0
151; SSE-NEXT:    retq
152;
153; XOP-LABEL: bitselect_v2i64_mm:
154; XOP:       # %bb.0:
155; XOP-NEXT:    vmovdqa (%rsi), %xmm0
156; XOP-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4294967292,4294967295,4294967294,4294967293]
157; XOP-NEXT:    vpcmov %xmm1, (%rdi), %xmm0, %xmm0
158; XOP-NEXT:    retq
159;
160; AVX-LABEL: bitselect_v2i64_mm:
161; AVX:       # %bb.0:
162; AVX-NEXT:    vmovaps (%rdi), %xmm0
163; AVX-NEXT:    vmovaps (%rsi), %xmm1
164; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
165; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
166; AVX-NEXT:    vorps %xmm0, %xmm1, %xmm0
167; AVX-NEXT:    retq
168;
169; AVX512F-LABEL: bitselect_v2i64_mm:
170; AVX512F:       # %bb.0:
171; AVX512F-NEXT:    vmovdqa (%rdi), %xmm1
172; AVX512F-NEXT:    vmovdqa (%rsi), %xmm0
173; AVX512F-NEXT:    vpmovsxbd {{.*#+}} xmm2 = [4294967292,4294967295,4294967294,4294967293]
174; AVX512F-NEXT:    vpternlogq $226, %zmm1, %zmm2, %zmm0
175; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
176; AVX512F-NEXT:    vzeroupper
177; AVX512F-NEXT:    retq
178;
179; AVX512VL-LABEL: bitselect_v2i64_mm:
180; AVX512VL:       # %bb.0:
181; AVX512VL-NEXT:    vmovdqa (%rsi), %xmm1
182; AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967292,4294967295,4294967294,4294967293]
183; AVX512VL-NEXT:    vpternlogq $202, (%rdi), %xmm1, %xmm0
184; AVX512VL-NEXT:    retq
185  %3 = load <2 x i64>, ptr %0
186  %4 = load <2 x i64>, ptr %1
187  %5 = and <2 x i64> %3, <i64 3, i64 8589934593>
188  %6 = and <2 x i64> %4, <i64 -4, i64 -8589934594>
189  %7 = or <2 x i64> %6, %5
190  ret <2 x i64> %7
191}
192
193define <2 x i64> @bitselect_v2i64_broadcast_rrr(<2 x i64> %a0, <2 x i64> %a1, i64 %a2) {
194; SSE-LABEL: bitselect_v2i64_broadcast_rrr:
195; SSE:       # %bb.0:
196; SSE-NEXT:    movq %rdi, %xmm2
197; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
198; SSE-NEXT:    pand %xmm2, %xmm0
199; SSE-NEXT:    pandn %xmm1, %xmm2
200; SSE-NEXT:    por %xmm2, %xmm0
201; SSE-NEXT:    retq
202;
203; XOP-LABEL: bitselect_v2i64_broadcast_rrr:
204; XOP:       # %bb.0:
205; XOP-NEXT:    vmovq %rdi, %xmm2
206; XOP-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
207; XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
208; XOP-NEXT:    retq
209;
210; AVX1-LABEL: bitselect_v2i64_broadcast_rrr:
211; AVX1:       # %bb.0:
212; AVX1-NEXT:    vmovq %rdi, %xmm2
213; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
214; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
215; AVX1-NEXT:    vpandn %xmm1, %xmm2, %xmm1
216; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
217; AVX1-NEXT:    retq
218;
219; AVX2-LABEL: bitselect_v2i64_broadcast_rrr:
220; AVX2:       # %bb.0:
221; AVX2-NEXT:    vmovq %rdi, %xmm2
222; AVX2-NEXT:    vpbroadcastq %xmm2, %xmm2
223; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
224; AVX2-NEXT:    vpandn %xmm1, %xmm2, %xmm1
225; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
226; AVX2-NEXT:    retq
227;
228; AVX512F-LABEL: bitselect_v2i64_broadcast_rrr:
229; AVX512F:       # %bb.0:
230; AVX512F-NEXT:    vmovq %rdi, %xmm2
231; AVX512F-NEXT:    vpbroadcastq %xmm2, %xmm2
232; AVX512F-NEXT:    vpand %xmm2, %xmm0, %xmm0
233; AVX512F-NEXT:    vpandn %xmm1, %xmm2, %xmm1
234; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
235; AVX512F-NEXT:    retq
236;
237; AVX512VL-LABEL: bitselect_v2i64_broadcast_rrr:
238; AVX512VL:       # %bb.0:
239; AVX512VL-NEXT:    vpbroadcastq %rdi, %xmm2
240; AVX512VL-NEXT:    vpternlogq $226, %xmm1, %xmm2, %xmm0
241; AVX512VL-NEXT:    retq
242  %1 = insertelement <2 x i64> undef, i64 %a2, i32 0
243  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
244  %3 = xor <2 x i64> %1, <i64 -1, i64 undef>
245  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
246  %5 = and <2 x i64> %a0, %2
247  %6 = and <2 x i64> %a1, %4
248  %7 = or <2 x i64> %5, %6
249  ret <2 x i64> %7
250}
251
252define <2 x i64> @bitselect_v2i64_broadcast_rrm(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) {
253; SSE-LABEL: bitselect_v2i64_broadcast_rrm:
254; SSE:       # %bb.0:
255; SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
256; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
257; SSE-NEXT:    pand %xmm2, %xmm0
258; SSE-NEXT:    pandn %xmm1, %xmm2
259; SSE-NEXT:    por %xmm2, %xmm0
260; SSE-NEXT:    retq
261;
262; XOP-LABEL: bitselect_v2i64_broadcast_rrm:
263; XOP:       # %bb.0:
264; XOP-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
265; XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
266; XOP-NEXT:    retq
267;
268; AVX-LABEL: bitselect_v2i64_broadcast_rrm:
269; AVX:       # %bb.0:
270; AVX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
271; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
272; AVX-NEXT:    vandnps %xmm1, %xmm2, %xmm1
273; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
274; AVX-NEXT:    retq
275;
276; AVX512F-LABEL: bitselect_v2i64_broadcast_rrm:
277; AVX512F:       # %bb.0:
278; AVX512F-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
279; AVX512F-NEXT:    vandps %xmm2, %xmm0, %xmm0
280; AVX512F-NEXT:    vandnps %xmm1, %xmm2, %xmm1
281; AVX512F-NEXT:    vorps %xmm1, %xmm0, %xmm0
282; AVX512F-NEXT:    retq
283;
284; AVX512VL-LABEL: bitselect_v2i64_broadcast_rrm:
285; AVX512VL:       # %bb.0:
286; AVX512VL-NEXT:    vpternlogq $228, (%rdi){1to2}, %xmm1, %xmm0
287; AVX512VL-NEXT:    retq
288  %a2 = load i64, ptr %p2
289  %1 = insertelement <2 x i64> undef, i64 %a2, i32 0
290  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
291  %3 = xor <2 x i64> %1, <i64 -1, i64 undef>
292  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
293  %5 = and <2 x i64> %a0, %2
294  %6 = and <2 x i64> %a1, %4
295  %7 = or <2 x i64> %5, %6
296  ret <2 x i64> %7
297}
298
299;
300; 256-bit vectors
301;
302
303define <4 x i64> @bitselect_v4i64_rr(<4 x i64>, <4 x i64>) {
304; SSE-LABEL: bitselect_v4i64_rr:
305; SSE:       # %bb.0:
306; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
307; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
308; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
309; SSE-NEXT:    orps %xmm3, %xmm1
310; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
311; SSE-NEXT:    orps %xmm2, %xmm0
312; SSE-NEXT:    retq
313;
314; XOP-LABEL: bitselect_v4i64_rr:
315; XOP:       # %bb.0:
316; XOP-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0
317; XOP-NEXT:    retq
318;
319; AVX-LABEL: bitselect_v4i64_rr:
320; AVX:       # %bb.0:
321; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
322; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
323; AVX-NEXT:    vorps %ymm0, %ymm1, %ymm0
324; AVX-NEXT:    retq
325;
326; AVX512F-LABEL: bitselect_v4i64_rr:
327; AVX512F:       # %bb.0:
328; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
329; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
330; AVX512F-NEXT:    vpmovsxbd {{.*#+}} ymm2 = [4294967295,4294967294,4294967293,4294967292,4294967293,4294967292,4294967293,4294967292]
331; AVX512F-NEXT:    vpternlogq $216, %zmm2, %zmm1, %zmm0
332; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
333; AVX512F-NEXT:    retq
334;
335; AVX512VL-LABEL: bitselect_v4i64_rr:
336; AVX512VL:       # %bb.0:
337; AVX512VL-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
338; AVX512VL-NEXT:    retq
339  %3 = and <4 x i64> %0, <i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890>
340  %4 = and <4 x i64> %1, <i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891>
341  %5 = or <4 x i64> %4, %3
342  ret <4 x i64> %5
343}
344
345define <4 x i64> @bitselect_v4i64_rm(<4 x i64>, ptr nocapture readonly) {
346; SSE-LABEL: bitselect_v4i64_rm:
347; SSE:       # %bb.0:
348; SSE-NEXT:    movaps {{.*#+}} xmm2 = [18446744065119617022,18446744073709551612]
349; SSE-NEXT:    movaps 16(%rdi), %xmm4
350; SSE-NEXT:    andps %xmm2, %xmm4
351; SSE-NEXT:    movaps (%rdi), %xmm5
352; SSE-NEXT:    andps %xmm2, %xmm5
353; SSE-NEXT:    movaps %xmm2, %xmm3
354; SSE-NEXT:    andnps %xmm0, %xmm3
355; SSE-NEXT:    orps %xmm5, %xmm3
356; SSE-NEXT:    andnps %xmm1, %xmm2
357; SSE-NEXT:    orps %xmm4, %xmm2
358; SSE-NEXT:    movaps %xmm3, %xmm0
359; SSE-NEXT:    movaps %xmm2, %xmm1
360; SSE-NEXT:    retq
361;
362; XOP-LABEL: bitselect_v4i64_rm:
363; XOP:       # %bb.0:
364; XOP-NEXT:    vmovdqa (%rdi), %ymm1
365; XOP-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0
366; XOP-NEXT:    retq
367;
368; AVX-LABEL: bitselect_v4i64_rm:
369; AVX:       # %bb.0:
370; AVX-NEXT:    vmovaps (%rdi), %ymm1
371; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
372; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
373; AVX-NEXT:    vorps %ymm0, %ymm1, %ymm0
374; AVX-NEXT:    retq
375;
376; AVX512F-LABEL: bitselect_v4i64_rm:
377; AVX512F:       # %bb.0:
378; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
379; AVX512F-NEXT:    vmovdqa (%rdi), %ymm1
380; AVX512F-NEXT:    vpmovsxbd {{.*#+}} ymm2 = [4294967294,4294967293,4294967292,4294967295,4294967294,4294967293,4294967292,4294967295]
381; AVX512F-NEXT:    vpternlogq $184, %zmm1, %zmm2, %zmm0
382; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
383; AVX512F-NEXT:    retq
384;
385; AVX512VL-LABEL: bitselect_v4i64_rm:
386; AVX512VL:       # %bb.0:
387; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1
388; AVX512VL-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
389; AVX512VL-NEXT:    retq
390  %3 = load <4 x i64>, ptr %1
391  %4 = and <4 x i64> %0, <i64 8589934593, i64 3, i64 8589934593, i64 3>
392  %5 = and <4 x i64> %3, <i64 -8589934594, i64 -4, i64 -8589934594, i64 -4>
393  %6 = or <4 x i64> %5, %4
394  ret <4 x i64> %6
395}
396
397define <4 x i64> @bitselect_v4i64_mr(ptr nocapture readonly, <4 x i64>) {
398; SSE-LABEL: bitselect_v4i64_mr:
399; SSE:       # %bb.0:
400; SSE-NEXT:    movaps {{.*#+}} xmm2 = [12884901890,4294967296]
401; SSE-NEXT:    movaps 16(%rdi), %xmm4
402; SSE-NEXT:    andps %xmm2, %xmm4
403; SSE-NEXT:    movaps (%rdi), %xmm5
404; SSE-NEXT:    andps %xmm2, %xmm5
405; SSE-NEXT:    movaps %xmm2, %xmm3
406; SSE-NEXT:    andnps %xmm0, %xmm3
407; SSE-NEXT:    orps %xmm5, %xmm3
408; SSE-NEXT:    andnps %xmm1, %xmm2
409; SSE-NEXT:    orps %xmm4, %xmm2
410; SSE-NEXT:    movaps %xmm3, %xmm0
411; SSE-NEXT:    movaps %xmm2, %xmm1
412; SSE-NEXT:    retq
413;
414; XOP-LABEL: bitselect_v4i64_mr:
415; XOP:       # %bb.0:
416; XOP-NEXT:    vmovdqa (%rdi), %ymm1
417; XOP-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0
418; XOP-NEXT:    retq
419;
420; AVX-LABEL: bitselect_v4i64_mr:
421; AVX:       # %bb.0:
422; AVX-NEXT:    vmovaps (%rdi), %ymm1
423; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
424; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
425; AVX-NEXT:    vorps %ymm0, %ymm1, %ymm0
426; AVX-NEXT:    retq
427;
428; AVX512F-LABEL: bitselect_v4i64_mr:
429; AVX512F:       # %bb.0:
430; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
431; AVX512F-NEXT:    vmovdqa (%rdi), %ymm1
432; AVX512F-NEXT:    vpmovsxbd {{.*#+}} ymm2 = [2,3,0,1,2,3,0,1]
433; AVX512F-NEXT:    vpternlogq $184, %zmm1, %zmm2, %zmm0
434; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
435; AVX512F-NEXT:    retq
436;
437; AVX512VL-LABEL: bitselect_v4i64_mr:
438; AVX512VL:       # %bb.0:
439; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1
440; AVX512VL-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
441; AVX512VL-NEXT:    retq
442  %3 = load <4 x i64>, ptr %0
443  %4 = and <4 x i64> %3, <i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296>
444  %5 = and <4 x i64> %1, <i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297>
445  %6 = or <4 x i64> %4, %5
446  ret <4 x i64> %6
447}
448
449define <4 x i64> @bitselect_v4i64_mm(ptr nocapture readonly, ptr nocapture readonly) {
450; SSE-LABEL: bitselect_v4i64_mm:
451; SSE:       # %bb.0:
452; SSE-NEXT:    movaps {{.*#+}} xmm1 = [18446744073709551612,18446744065119617022]
453; SSE-NEXT:    movaps 16(%rsi), %xmm2
454; SSE-NEXT:    andps %xmm1, %xmm2
455; SSE-NEXT:    movaps (%rsi), %xmm3
456; SSE-NEXT:    andps %xmm1, %xmm3
457; SSE-NEXT:    movaps %xmm1, %xmm0
458; SSE-NEXT:    andnps (%rdi), %xmm0
459; SSE-NEXT:    orps %xmm3, %xmm0
460; SSE-NEXT:    andnps 16(%rdi), %xmm1
461; SSE-NEXT:    orps %xmm2, %xmm1
462; SSE-NEXT:    retq
463;
464; XOP-LABEL: bitselect_v4i64_mm:
465; XOP:       # %bb.0:
466; XOP-NEXT:    vmovdqa (%rsi), %ymm0
467; XOP-NEXT:    vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022]
468; XOP-NEXT:    # ymm1 = mem[0,1,0,1]
469; XOP-NEXT:    vpcmov %ymm1, (%rdi), %ymm0, %ymm0
470; XOP-NEXT:    retq
471;
472; AVX-LABEL: bitselect_v4i64_mm:
473; AVX:       # %bb.0:
474; AVX-NEXT:    vmovaps (%rdi), %ymm0
475; AVX-NEXT:    vmovaps (%rsi), %ymm1
476; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
477; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
478; AVX-NEXT:    vorps %ymm0, %ymm1, %ymm0
479; AVX-NEXT:    retq
480;
481; AVX512F-LABEL: bitselect_v4i64_mm:
482; AVX512F:       # %bb.0:
483; AVX512F-NEXT:    vmovdqa (%rdi), %ymm1
484; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
485; AVX512F-NEXT:    vpmovsxbd {{.*#+}} ymm2 = [4294967292,4294967295,4294967294,4294967293,4294967292,4294967295,4294967294,4294967293]
486; AVX512F-NEXT:    vpternlogq $226, %zmm1, %zmm2, %zmm0
487; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
488; AVX512F-NEXT:    retq
489;
490; AVX512VL-LABEL: bitselect_v4i64_mm:
491; AVX512VL:       # %bb.0:
492; AVX512VL-NEXT:    vmovdqa (%rsi), %ymm1
493; AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [4294967292,4294967295,4294967294,4294967293,4294967292,4294967295,4294967294,4294967293]
494; AVX512VL-NEXT:    vpternlogq $202, (%rdi), %ymm1, %ymm0
495; AVX512VL-NEXT:    retq
496  %3 = load <4 x i64>, ptr %0
497  %4 = load <4 x i64>, ptr %1
498  %5 = and <4 x i64> %3, <i64 3, i64 8589934593, i64 3, i64 8589934593>
499  %6 = and <4 x i64> %4, <i64 -4, i64 -8589934594, i64 -4, i64 -8589934594>
500  %7 = or <4 x i64> %6, %5
501  ret <4 x i64> %7
502}
503
504define <4 x i64> @bitselect_v4i64_broadcast_rrr(<4 x i64> %a0, <4 x i64> %a1, i64 %a2) {
505; SSE-LABEL: bitselect_v4i64_broadcast_rrr:
506; SSE:       # %bb.0:
507; SSE-NEXT:    movq %rdi, %xmm4
508; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
509; SSE-NEXT:    pand %xmm4, %xmm1
510; SSE-NEXT:    pand %xmm4, %xmm0
511; SSE-NEXT:    movdqa %xmm4, %xmm5
512; SSE-NEXT:    pandn %xmm3, %xmm5
513; SSE-NEXT:    por %xmm5, %xmm1
514; SSE-NEXT:    pandn %xmm2, %xmm4
515; SSE-NEXT:    por %xmm4, %xmm0
516; SSE-NEXT:    retq
517;
518; XOP-LABEL: bitselect_v4i64_broadcast_rrr:
519; XOP:       # %bb.0:
520; XOP-NEXT:    vmovq %rdi, %xmm2
521; XOP-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
522; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm2, %ymm2
523; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
524; XOP-NEXT:    retq
525;
526; AVX1-LABEL: bitselect_v4i64_broadcast_rrr:
527; AVX1:       # %bb.0:
528; AVX1-NEXT:    vmovq %rdi, %xmm2
529; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
530; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm2, %ymm2
531; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
532; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
533; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
534; AVX1-NEXT:    retq
535;
536; AVX2-LABEL: bitselect_v4i64_broadcast_rrr:
537; AVX2:       # %bb.0:
538; AVX2-NEXT:    vmovq %rdi, %xmm2
539; AVX2-NEXT:    vpbroadcastq %xmm2, %ymm2
540; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
541; AVX2-NEXT:    vpandn %ymm1, %ymm2, %ymm1
542; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
543; AVX2-NEXT:    retq
544;
545; AVX512F-LABEL: bitselect_v4i64_broadcast_rrr:
546; AVX512F:       # %bb.0:
547; AVX512F-NEXT:    vmovq %rdi, %xmm2
548; AVX512F-NEXT:    vpbroadcastq %xmm2, %ymm2
549; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
550; AVX512F-NEXT:    vpandn %ymm1, %ymm2, %ymm1
551; AVX512F-NEXT:    vpor %ymm1, %ymm0, %ymm0
552; AVX512F-NEXT:    retq
553;
554; AVX512VL-LABEL: bitselect_v4i64_broadcast_rrr:
555; AVX512VL:       # %bb.0:
556; AVX512VL-NEXT:    vpbroadcastq %rdi, %ymm2
557; AVX512VL-NEXT:    vpternlogq $226, %ymm1, %ymm2, %ymm0
558; AVX512VL-NEXT:    retq
559  %1 = insertelement <4 x i64> undef, i64 %a2, i32 0
560  %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <4 x i32> zeroinitializer
561  %3 = xor <4 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef>
562  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer
563  %5 = and <4 x i64> %a0, %2
564  %6 = and <4 x i64> %a1, %4
565  %7 = or <4 x i64> %5, %6
566  ret <4 x i64> %7
567}
568
569define <4 x i64> @bitselect_v4i64_broadcast_rrm(<4 x i64> %a0, <4 x i64> %a1, ptr %p2) {
570; SSE-LABEL: bitselect_v4i64_broadcast_rrm:
571; SSE:       # %bb.0:
572; SSE-NEXT:    movq {{.*#+}} xmm4 = mem[0],zero
573; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
574; SSE-NEXT:    pand %xmm4, %xmm1
575; SSE-NEXT:    pand %xmm4, %xmm0
576; SSE-NEXT:    movdqa %xmm4, %xmm5
577; SSE-NEXT:    pandn %xmm3, %xmm5
578; SSE-NEXT:    por %xmm5, %xmm1
579; SSE-NEXT:    pandn %xmm2, %xmm4
580; SSE-NEXT:    por %xmm4, %xmm0
581; SSE-NEXT:    retq
582;
583; XOP-LABEL: bitselect_v4i64_broadcast_rrm:
584; XOP:       # %bb.0:
585; XOP-NEXT:    vbroadcastsd (%rdi), %ymm2
586; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
587; XOP-NEXT:    retq
588;
589; AVX-LABEL: bitselect_v4i64_broadcast_rrm:
590; AVX:       # %bb.0:
591; AVX-NEXT:    vbroadcastsd (%rdi), %ymm2
592; AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
593; AVX-NEXT:    vandnps %ymm1, %ymm2, %ymm1
594; AVX-NEXT:    vorps %ymm1, %ymm0, %ymm0
595; AVX-NEXT:    retq
596;
597; AVX512F-LABEL: bitselect_v4i64_broadcast_rrm:
598; AVX512F:       # %bb.0:
599; AVX512F-NEXT:    vbroadcastsd (%rdi), %ymm2
600; AVX512F-NEXT:    vandps %ymm2, %ymm0, %ymm0
601; AVX512F-NEXT:    vandnps %ymm1, %ymm2, %ymm1
602; AVX512F-NEXT:    vorps %ymm1, %ymm0, %ymm0
603; AVX512F-NEXT:    retq
604;
605; AVX512VL-LABEL: bitselect_v4i64_broadcast_rrm:
606; AVX512VL:       # %bb.0:
607; AVX512VL-NEXT:    vpternlogq $228, (%rdi){1to4}, %ymm1, %ymm0
608; AVX512VL-NEXT:    retq
609  %a2 = load i64, ptr %p2
610  %1 = insertelement <4 x i64> undef, i64 %a2, i32 0
611  %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <4 x i32> zeroinitializer
612  %3 = xor <4 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef>
613  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer
614  %5 = and <4 x i64> %a0, %2
615  %6 = and <4 x i64> %a1, %4
616  %7 = or <4 x i64> %5, %6
617  ret <4 x i64> %7
618}
619
620;
621; 512-bit vectors
622;
623
624define <8 x i64> @bitselect_v8i64_rr(<8 x i64>, <8 x i64>) {
625; SSE-LABEL: bitselect_v8i64_rr:
626; SSE:       # %bb.0:
627; SSE-NEXT:    movaps {{.*#+}} xmm8 = [18446744060824649725,18446744060824649725]
628; SSE-NEXT:    andps %xmm8, %xmm7
629; SSE-NEXT:    movaps {{.*#+}} xmm9 = [18446744069414584319,18446744060824649725]
630; SSE-NEXT:    andps %xmm9, %xmm6
631; SSE-NEXT:    andps %xmm8, %xmm5
632; SSE-NEXT:    andps %xmm9, %xmm4
633; SSE-NEXT:    movaps %xmm9, %xmm10
634; SSE-NEXT:    andnps %xmm0, %xmm10
635; SSE-NEXT:    orps %xmm4, %xmm10
636; SSE-NEXT:    movaps %xmm8, %xmm4
637; SSE-NEXT:    andnps %xmm1, %xmm4
638; SSE-NEXT:    orps %xmm5, %xmm4
639; SSE-NEXT:    andnps %xmm2, %xmm9
640; SSE-NEXT:    orps %xmm6, %xmm9
641; SSE-NEXT:    andnps %xmm3, %xmm8
642; SSE-NEXT:    orps %xmm7, %xmm8
643; SSE-NEXT:    movaps %xmm10, %xmm0
644; SSE-NEXT:    movaps %xmm4, %xmm1
645; SSE-NEXT:    movaps %xmm9, %xmm2
646; SSE-NEXT:    movaps %xmm8, %xmm3
647; SSE-NEXT:    retq
648;
649; XOP-LABEL: bitselect_v8i64_rr:
650; XOP:       # %bb.0:
651; XOP-NEXT:    vmovdqa {{.*#+}} ymm4 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725]
652; XOP-NEXT:    vpcmov %ymm4, %ymm0, %ymm2, %ymm0
653; XOP-NEXT:    vpcmov %ymm4, %ymm1, %ymm3, %ymm1
654; XOP-NEXT:    retq
655;
656; AVX-LABEL: bitselect_v8i64_rr:
657; AVX:       # %bb.0:
658; AVX-NEXT:    vmovaps {{.*#+}} ymm4 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725]
659; AVX-NEXT:    vandps %ymm4, %ymm3, %ymm3
660; AVX-NEXT:    vandps %ymm4, %ymm2, %ymm2
661; AVX-NEXT:    vandnps %ymm0, %ymm4, %ymm0
662; AVX-NEXT:    vorps %ymm0, %ymm2, %ymm0
663; AVX-NEXT:    vandnps %ymm1, %ymm4, %ymm1
664; AVX-NEXT:    vorps %ymm1, %ymm3, %ymm1
665; AVX-NEXT:    retq
666;
667; AVX512-LABEL: bitselect_v8i64_rr:
668; AVX512:       # %bb.0:
669; AVX512-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
670; AVX512-NEXT:    retq
671  %3 = and <8 x i64> %0, <i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890, i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890>
672  %4 = and <8 x i64> %1, <i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891>
673  %5 = or <8 x i64> %4, %3
674  ret <8 x i64> %5
675}
676
677define <8 x i64> @bitselect_v8i64_rm(<8 x i64>, ptr nocapture readonly) {
678; SSE-LABEL: bitselect_v8i64_rm:
679; SSE:       # %bb.0:
680; SSE-NEXT:    movaps {{.*#+}} xmm4 = [18446744065119617022,18446744073709551612]
681; SSE-NEXT:    movaps 48(%rdi), %xmm8
682; SSE-NEXT:    andps %xmm4, %xmm8
683; SSE-NEXT:    movaps 32(%rdi), %xmm9
684; SSE-NEXT:    andps %xmm4, %xmm9
685; SSE-NEXT:    movaps 16(%rdi), %xmm7
686; SSE-NEXT:    andps %xmm4, %xmm7
687; SSE-NEXT:    movaps (%rdi), %xmm6
688; SSE-NEXT:    andps %xmm4, %xmm6
689; SSE-NEXT:    movaps %xmm4, %xmm5
690; SSE-NEXT:    andnps %xmm0, %xmm5
691; SSE-NEXT:    orps %xmm6, %xmm5
692; SSE-NEXT:    movaps %xmm4, %xmm6
693; SSE-NEXT:    andnps %xmm1, %xmm6
694; SSE-NEXT:    orps %xmm7, %xmm6
695; SSE-NEXT:    movaps %xmm4, %xmm7
696; SSE-NEXT:    andnps %xmm2, %xmm7
697; SSE-NEXT:    orps %xmm9, %xmm7
698; SSE-NEXT:    andnps %xmm3, %xmm4
699; SSE-NEXT:    orps %xmm8, %xmm4
700; SSE-NEXT:    movaps %xmm5, %xmm0
701; SSE-NEXT:    movaps %xmm6, %xmm1
702; SSE-NEXT:    movaps %xmm7, %xmm2
703; SSE-NEXT:    movaps %xmm4, %xmm3
704; SSE-NEXT:    retq
705;
706; XOP-LABEL: bitselect_v8i64_rm:
707; XOP:       # %bb.0:
708; XOP-NEXT:    vmovdqa (%rdi), %ymm2
709; XOP-NEXT:    vmovdqa 32(%rdi), %ymm3
710; XOP-NEXT:    vbroadcastf128 {{.*#+}} ymm4 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612]
711; XOP-NEXT:    # ymm4 = mem[0,1,0,1]
712; XOP-NEXT:    vpcmov %ymm4, %ymm0, %ymm2, %ymm0
713; XOP-NEXT:    vpcmov %ymm4, %ymm1, %ymm3, %ymm1
714; XOP-NEXT:    retq
715;
716; AVX-LABEL: bitselect_v8i64_rm:
717; AVX:       # %bb.0:
718; AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612]
719; AVX-NEXT:    # ymm2 = mem[0,1,0,1]
720; AVX-NEXT:    vandps 32(%rdi), %ymm2, %ymm3
721; AVX-NEXT:    vandps (%rdi), %ymm2, %ymm4
722; AVX-NEXT:    vandnps %ymm0, %ymm2, %ymm0
723; AVX-NEXT:    vorps %ymm0, %ymm4, %ymm0
724; AVX-NEXT:    vandnps %ymm1, %ymm2, %ymm1
725; AVX-NEXT:    vorps %ymm1, %ymm3, %ymm1
726; AVX-NEXT:    retq
727;
728; AVX512-LABEL: bitselect_v8i64_rm:
729; AVX512:       # %bb.0:
730; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm1
731; AVX512-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
732; AVX512-NEXT:    retq
733  %3 = load <8 x i64>, ptr %1
734  %4 = and <8 x i64> %0, <i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3>
735  %5 = and <8 x i64> %3, <i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4>
736  %6 = or <8 x i64> %5, %4
737  ret <8 x i64> %6
738}
739
740define <8 x i64> @bitselect_v8i64_mr(ptr nocapture readonly, <8 x i64>) {
741; SSE-LABEL: bitselect_v8i64_mr:
742; SSE:       # %bb.0:
743; SSE-NEXT:    movaps {{.*#+}} xmm4 = [12884901890,4294967296]
744; SSE-NEXT:    movaps 48(%rdi), %xmm8
745; SSE-NEXT:    andps %xmm4, %xmm8
746; SSE-NEXT:    movaps 32(%rdi), %xmm9
747; SSE-NEXT:    andps %xmm4, %xmm9
748; SSE-NEXT:    movaps 16(%rdi), %xmm7
749; SSE-NEXT:    andps %xmm4, %xmm7
750; SSE-NEXT:    movaps (%rdi), %xmm6
751; SSE-NEXT:    andps %xmm4, %xmm6
752; SSE-NEXT:    movaps %xmm4, %xmm5
753; SSE-NEXT:    andnps %xmm0, %xmm5
754; SSE-NEXT:    orps %xmm6, %xmm5
755; SSE-NEXT:    movaps %xmm4, %xmm6
756; SSE-NEXT:    andnps %xmm1, %xmm6
757; SSE-NEXT:    orps %xmm7, %xmm6
758; SSE-NEXT:    movaps %xmm4, %xmm7
759; SSE-NEXT:    andnps %xmm2, %xmm7
760; SSE-NEXT:    orps %xmm9, %xmm7
761; SSE-NEXT:    andnps %xmm3, %xmm4
762; SSE-NEXT:    orps %xmm8, %xmm4
763; SSE-NEXT:    movaps %xmm5, %xmm0
764; SSE-NEXT:    movaps %xmm6, %xmm1
765; SSE-NEXT:    movaps %xmm7, %xmm2
766; SSE-NEXT:    movaps %xmm4, %xmm3
767; SSE-NEXT:    retq
768;
769; XOP-LABEL: bitselect_v8i64_mr:
770; XOP:       # %bb.0:
771; XOP-NEXT:    vmovdqa (%rdi), %ymm2
772; XOP-NEXT:    vmovdqa 32(%rdi), %ymm3
773; XOP-NEXT:    vbroadcastf128 {{.*#+}} ymm4 = [12884901890,4294967296,12884901890,4294967296]
774; XOP-NEXT:    # ymm4 = mem[0,1,0,1]
775; XOP-NEXT:    vpcmov %ymm4, %ymm0, %ymm2, %ymm0
776; XOP-NEXT:    vpcmov %ymm4, %ymm1, %ymm3, %ymm1
777; XOP-NEXT:    retq
778;
779; AVX-LABEL: bitselect_v8i64_mr:
780; AVX:       # %bb.0:
781; AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296]
782; AVX-NEXT:    # ymm2 = mem[0,1,0,1]
783; AVX-NEXT:    vandps 32(%rdi), %ymm2, %ymm3
784; AVX-NEXT:    vandps (%rdi), %ymm2, %ymm4
785; AVX-NEXT:    vandnps %ymm0, %ymm2, %ymm0
786; AVX-NEXT:    vorps %ymm0, %ymm4, %ymm0
787; AVX-NEXT:    vandnps %ymm1, %ymm2, %ymm1
788; AVX-NEXT:    vorps %ymm1, %ymm3, %ymm1
789; AVX-NEXT:    retq
790;
791; AVX512-LABEL: bitselect_v8i64_mr:
792; AVX512:       # %bb.0:
793; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm1
794; AVX512-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
795; AVX512-NEXT:    retq
796  %3 = load <8 x i64>, ptr %0
797  %4 = and <8 x i64> %3, <i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296>
798  %5 = and <8 x i64> %1, <i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297>
799  %6 = or <8 x i64> %4, %5
800  ret <8 x i64> %6
801}
802
803define <8 x i64> @bitselect_v8i64_mm(ptr nocapture readonly, ptr nocapture readonly) {
804; SSE-LABEL: bitselect_v8i64_mm:
805; SSE:       # %bb.0:
806; SSE-NEXT:    movaps {{.*#+}} xmm3 = [18446744073709551612,18446744065119617022]
807; SSE-NEXT:    movaps 48(%rsi), %xmm4
808; SSE-NEXT:    andps %xmm3, %xmm4
809; SSE-NEXT:    movaps 32(%rsi), %xmm5
810; SSE-NEXT:    andps %xmm3, %xmm5
811; SSE-NEXT:    movaps 16(%rsi), %xmm2
812; SSE-NEXT:    andps %xmm3, %xmm2
813; SSE-NEXT:    movaps (%rsi), %xmm1
814; SSE-NEXT:    andps %xmm3, %xmm1
815; SSE-NEXT:    movaps %xmm3, %xmm0
816; SSE-NEXT:    andnps (%rdi), %xmm0
817; SSE-NEXT:    orps %xmm1, %xmm0
818; SSE-NEXT:    movaps %xmm3, %xmm1
819; SSE-NEXT:    andnps 16(%rdi), %xmm1
820; SSE-NEXT:    orps %xmm2, %xmm1
821; SSE-NEXT:    movaps %xmm3, %xmm2
822; SSE-NEXT:    andnps 32(%rdi), %xmm2
823; SSE-NEXT:    orps %xmm5, %xmm2
824; SSE-NEXT:    andnps 48(%rdi), %xmm3
825; SSE-NEXT:    orps %xmm4, %xmm3
826; SSE-NEXT:    retq
827;
828; XOP-LABEL: bitselect_v8i64_mm:
829; XOP:       # %bb.0:
830; XOP-NEXT:    vmovdqa (%rsi), %ymm0
831; XOP-NEXT:    vmovdqa 32(%rsi), %ymm1
832; XOP-NEXT:    vbroadcastf128 {{.*#+}} ymm2 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022]
833; XOP-NEXT:    # ymm2 = mem[0,1,0,1]
834; XOP-NEXT:    vpcmov %ymm2, (%rdi), %ymm0, %ymm0
835; XOP-NEXT:    vpcmov %ymm2, 32(%rdi), %ymm1, %ymm1
836; XOP-NEXT:    retq
837;
838; AVX-LABEL: bitselect_v8i64_mm:
839; AVX:       # %bb.0:
840; AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022]
841; AVX-NEXT:    # ymm1 = mem[0,1,0,1]
842; AVX-NEXT:    vandps 32(%rsi), %ymm1, %ymm2
843; AVX-NEXT:    vandps (%rsi), %ymm1, %ymm0
844; AVX-NEXT:    vandnps (%rdi), %ymm1, %ymm3
845; AVX-NEXT:    vorps %ymm3, %ymm0, %ymm0
846; AVX-NEXT:    vandnps 32(%rdi), %ymm1, %ymm1
847; AVX-NEXT:    vorps %ymm1, %ymm2, %ymm1
848; AVX-NEXT:    retq
849;
850; AVX512-LABEL: bitselect_v8i64_mm:
851; AVX512:       # %bb.0:
852; AVX512-NEXT:    vmovdqa64 (%rsi), %zmm1
853; AVX512-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022]
854; AVX512-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
855; AVX512-NEXT:    vpternlogq $202, (%rdi), %zmm1, %zmm0
856; AVX512-NEXT:    retq
857  %3 = load <8 x i64>, ptr %0
858  %4 = load <8 x i64>, ptr %1
859  %5 = and <8 x i64> %3, <i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593>
860  %6 = and <8 x i64> %4, <i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594>
861  %7 = or <8 x i64> %6, %5
862  ret <8 x i64> %7
863}
864
865define <8 x i64> @bitselect_v8i64_broadcast_rrr(<8 x i64> %a0, <8 x i64> %a1, i64 %a2) {
866; SSE-LABEL: bitselect_v8i64_broadcast_rrr:
867; SSE:       # %bb.0:
868; SSE-NEXT:    movq %rdi, %xmm8
869; SSE-NEXT:    pshufd {{.*#+}} xmm8 = xmm8[0,1,0,1]
870; SSE-NEXT:    pand %xmm8, %xmm3
871; SSE-NEXT:    pand %xmm8, %xmm2
872; SSE-NEXT:    pand %xmm8, %xmm1
873; SSE-NEXT:    pand %xmm8, %xmm0
874; SSE-NEXT:    movdqa %xmm8, %xmm9
875; SSE-NEXT:    pandn %xmm7, %xmm9
876; SSE-NEXT:    por %xmm9, %xmm3
877; SSE-NEXT:    movdqa %xmm8, %xmm7
878; SSE-NEXT:    pandn %xmm6, %xmm7
879; SSE-NEXT:    por %xmm7, %xmm2
880; SSE-NEXT:    movdqa %xmm8, %xmm6
881; SSE-NEXT:    pandn %xmm5, %xmm6
882; SSE-NEXT:    por %xmm6, %xmm1
883; SSE-NEXT:    pandn %xmm4, %xmm8
884; SSE-NEXT:    por %xmm8, %xmm0
885; SSE-NEXT:    retq
886;
887; XOP-LABEL: bitselect_v8i64_broadcast_rrr:
888; XOP:       # %bb.0:
889; XOP-NEXT:    vmovq %rdi, %xmm4
890; XOP-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
891; XOP-NEXT:    vinsertf128 $1, %xmm4, %ymm4, %ymm4
892; XOP-NEXT:    vpcmov %ymm4, %ymm2, %ymm0, %ymm0
893; XOP-NEXT:    vpcmov %ymm4, %ymm3, %ymm1, %ymm1
894; XOP-NEXT:    retq
895;
896; AVX1-LABEL: bitselect_v8i64_broadcast_rrr:
897; AVX1:       # %bb.0:
898; AVX1-NEXT:    vmovq %rdi, %xmm4
899; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
900; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm4, %ymm4
901; AVX1-NEXT:    vandps %ymm4, %ymm1, %ymm1
902; AVX1-NEXT:    vandps %ymm4, %ymm0, %ymm0
903; AVX1-NEXT:    vandnps %ymm3, %ymm4, %ymm3
904; AVX1-NEXT:    vorps %ymm3, %ymm1, %ymm1
905; AVX1-NEXT:    vandnps %ymm2, %ymm4, %ymm2
906; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
907; AVX1-NEXT:    retq
908;
909; AVX2-LABEL: bitselect_v8i64_broadcast_rrr:
910; AVX2:       # %bb.0:
911; AVX2-NEXT:    vmovq %rdi, %xmm4
912; AVX2-NEXT:    vpbroadcastq %xmm4, %ymm4
913; AVX2-NEXT:    vpand %ymm4, %ymm1, %ymm1
914; AVX2-NEXT:    vpand %ymm4, %ymm0, %ymm0
915; AVX2-NEXT:    vpandn %ymm3, %ymm4, %ymm3
916; AVX2-NEXT:    vpor %ymm3, %ymm1, %ymm1
917; AVX2-NEXT:    vpandn %ymm2, %ymm4, %ymm2
918; AVX2-NEXT:    vpor %ymm2, %ymm0, %ymm0
919; AVX2-NEXT:    retq
920;
921; AVX512-LABEL: bitselect_v8i64_broadcast_rrr:
922; AVX512:       # %bb.0:
923; AVX512-NEXT:    vpbroadcastq %rdi, %zmm2
924; AVX512-NEXT:    vpternlogq $226, %zmm1, %zmm2, %zmm0
925; AVX512-NEXT:    retq
926  %1 = insertelement <8 x i64> undef, i64 %a2, i32 0
927  %2 = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> zeroinitializer
928  %3 = xor <8 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>
929  %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> zeroinitializer
930  %5 = and <8 x i64> %a0, %2
931  %6 = and <8 x i64> %a1, %4
932  %7 = or <8 x i64> %5, %6
933  ret <8 x i64> %7
934}
935
936define <8 x i64> @bitselect_v8i64_broadcast_rrm(<8 x i64> %a0, <8 x i64> %a1, ptr %p2) {
937; SSE-LABEL: bitselect_v8i64_broadcast_rrm:
938; SSE:       # %bb.0:
939; SSE-NEXT:    movq {{.*#+}} xmm8 = mem[0],zero
940; SSE-NEXT:    pshufd {{.*#+}} xmm8 = xmm8[0,1,0,1]
941; SSE-NEXT:    pand %xmm8, %xmm3
942; SSE-NEXT:    pand %xmm8, %xmm2
943; SSE-NEXT:    pand %xmm8, %xmm1
944; SSE-NEXT:    pand %xmm8, %xmm0
945; SSE-NEXT:    movdqa %xmm8, %xmm9
946; SSE-NEXT:    pandn %xmm7, %xmm9
947; SSE-NEXT:    por %xmm9, %xmm3
948; SSE-NEXT:    movdqa %xmm8, %xmm7
949; SSE-NEXT:    pandn %xmm6, %xmm7
950; SSE-NEXT:    por %xmm7, %xmm2
951; SSE-NEXT:    movdqa %xmm8, %xmm6
952; SSE-NEXT:    pandn %xmm5, %xmm6
953; SSE-NEXT:    por %xmm6, %xmm1
954; SSE-NEXT:    pandn %xmm4, %xmm8
955; SSE-NEXT:    por %xmm8, %xmm0
956; SSE-NEXT:    retq
957;
958; XOP-LABEL: bitselect_v8i64_broadcast_rrm:
959; XOP:       # %bb.0:
960; XOP-NEXT:    vbroadcastsd (%rdi), %ymm4
961; XOP-NEXT:    vpcmov %ymm4, %ymm2, %ymm0, %ymm0
962; XOP-NEXT:    vpcmov %ymm4, %ymm3, %ymm1, %ymm1
963; XOP-NEXT:    retq
964;
965; AVX-LABEL: bitselect_v8i64_broadcast_rrm:
966; AVX:       # %bb.0:
967; AVX-NEXT:    vbroadcastsd (%rdi), %ymm4
968; AVX-NEXT:    vandps %ymm4, %ymm1, %ymm1
969; AVX-NEXT:    vandps %ymm4, %ymm0, %ymm0
970; AVX-NEXT:    vandnps %ymm3, %ymm4, %ymm3
971; AVX-NEXT:    vorps %ymm3, %ymm1, %ymm1
972; AVX-NEXT:    vandnps %ymm2, %ymm4, %ymm2
973; AVX-NEXT:    vorps %ymm2, %ymm0, %ymm0
974; AVX-NEXT:    retq
975;
976; AVX512-LABEL: bitselect_v8i64_broadcast_rrm:
977; AVX512:       # %bb.0:
978; AVX512-NEXT:    vpternlogq $228, (%rdi){1to8}, %zmm1, %zmm0
979; AVX512-NEXT:    retq
980  %a2 = load i64, ptr %p2
981  %1 = insertelement <8 x i64> undef, i64 %a2, i32 0
982  %2 = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> zeroinitializer
983  %3 = xor <8 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>
984  %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> zeroinitializer
985  %5 = and <8 x i64> %a0, %2
986  %6 = and <8 x i64> %a1, %4
987  %7 = or <8 x i64> %5, %6
988  ret <8 x i64> %7
989}
990
991; Check that mask registers don't get canonicalized.
992define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) {
993; SSE-LABEL: bitselect_v4i1_loop:
994; SSE:       # %bb.0: # %bb
995; SSE-NEXT:    pxor %xmm2, %xmm2
996; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
997; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [12,12,12,12]
998; SSE-NEXT:    pcmpeqd %xmm1, %xmm2
999; SSE-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1000; SSE-NEXT:    pand %xmm0, %xmm1
1001; SSE-NEXT:    pandn %xmm2, %xmm0
1002; SSE-NEXT:    por %xmm1, %xmm0
1003; SSE-NEXT:    retq
1004;
1005; XOP-LABEL: bitselect_v4i1_loop:
1006; XOP:       # %bb.0: # %bb
1007; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1008; XOP-NEXT:    vpcomneqd %xmm2, %xmm0, %xmm0
1009; XOP-NEXT:    vpcomeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
1010; XOP-NEXT:    vpcomeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1011; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1012; XOP-NEXT:    retq
1013;
1014; AVX1-LABEL: bitselect_v4i1_loop:
1015; AVX1:       # %bb.0: # %bb
1016; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1017; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1018; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
1019; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1020; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
1021; AVX1-NEXT:    retq
1022;
1023; AVX2-LABEL: bitselect_v4i1_loop:
1024; AVX2:       # %bb.0: # %bb
1025; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1026; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1027; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12]
1028; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm2
1029; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [15,15,15,15]
1030; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
1031; AVX2-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
1032; AVX2-NEXT:    retq
1033;
1034; AVX512F-LABEL: bitselect_v4i1_loop:
1035; AVX512F:       # %bb.0: # %bb
1036; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1037; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1038; AVX512F-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
1039; AVX512F-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k2
1040; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0 {%k2}
1041; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
1042; AVX512F-NEXT:    korw %k0, %k1, %k1
1043; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1044; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1045; AVX512F-NEXT:    vzeroupper
1046; AVX512F-NEXT:    retq
1047;
1048; AVX512VL-LABEL: bitselect_v4i1_loop:
1049; AVX512VL:       # %bb.0: # %bb
1050; AVX512VL-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
1051; AVX512VL-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k2
1052; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0 {%k2}
1053; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1 {%k1}
1054; AVX512VL-NEXT:    korw %k0, %k1, %k1
1055; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1056; AVX512VL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1057; AVX512VL-NEXT:    retq
1058bb:
1059  %tmp = icmp ne <4 x i32> %a0, zeroinitializer
1060  %tmp2 = icmp eq <4 x i32> %a1, <i32 12, i32 12, i32 12, i32 12>
1061  %tmp3 = icmp eq <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15>
1062  %tmp4 = select <4 x i1> %tmp, <4 x i1> %tmp2, <4 x i1> %tmp3
1063  ret <4 x i1> %tmp4
1064}
1065
1066; Regression reported on 057db2002bb3d79429db3c5fe436c8cefc50cb25
1067@d = external global <2 x i64>, align 16
1068define void @constantfold_andn_mask() nounwind {
1069; SSE-LABEL: constantfold_andn_mask:
1070; SSE:       # %bb.0: # %entry
1071; SSE-NEXT:    pushq %rax
1072; SSE-NEXT:    callq use@PLT
1073; SSE-NEXT:    movdqu (%rax), %xmm1
1074; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1075; SSE-NEXT:    pand %xmm2, %xmm0
1076; SSE-NEXT:    pavgb %xmm2, %xmm0
1077; SSE-NEXT:    pandn %xmm1, %xmm0
1078; SSE-NEXT:    pand %xmm2, %xmm1
1079; SSE-NEXT:    pandn %xmm0, %xmm2
1080; SSE-NEXT:    por %xmm1, %xmm2
1081; SSE-NEXT:    movabsq $87960930222080, %rax # imm = 0x500000000000
1082; SSE-NEXT:    xorq d@GOTPCREL(%rip), %rax
1083; SSE-NEXT:    movdqa %xmm2, (%rax)
1084; SSE-NEXT:    popq %rax
1085; SSE-NEXT:    retq
1086;
1087; XOP-LABEL: constantfold_andn_mask:
1088; XOP:       # %bb.0: # %entry
1089; XOP-NEXT:    pushq %rax
1090; XOP-NEXT:    callq use@PLT
1091; XOP-NEXT:    vmovdqu (%rax), %xmm1
1092; XOP-NEXT:    vbroadcastss {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1093; XOP-NEXT:    vpand %xmm2, %xmm1, %xmm3
1094; XOP-NEXT:    vpand %xmm2, %xmm0, %xmm0
1095; XOP-NEXT:    vpavgb %xmm2, %xmm0, %xmm0
1096; XOP-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1097; XOP-NEXT:    vpandn %xmm0, %xmm2, %xmm0
1098; XOP-NEXT:    vpor %xmm0, %xmm3, %xmm0
1099; XOP-NEXT:    movabsq $87960930222080, %rax # imm = 0x500000000000
1100; XOP-NEXT:    xorq d@GOTPCREL(%rip), %rax
1101; XOP-NEXT:    vmovdqa %xmm0, (%rax)
1102; XOP-NEXT:    popq %rax
1103; XOP-NEXT:    retq
1104;
1105; AVX1-LABEL: constantfold_andn_mask:
1106; AVX1:       # %bb.0: # %entry
1107; AVX1-NEXT:    pushq %rax
1108; AVX1-NEXT:    callq use@PLT
1109; AVX1-NEXT:    vmovdqu (%rax), %xmm1
1110; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1111; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
1112; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1113; AVX1-NEXT:    vpavgb %xmm2, %xmm0, %xmm0
1114; AVX1-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1115; AVX1-NEXT:    vpandn %xmm0, %xmm2, %xmm0
1116; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
1117; AVX1-NEXT:    movabsq $87960930222080, %rax # imm = 0x500000000000
1118; AVX1-NEXT:    xorq d@GOTPCREL(%rip), %rax
1119; AVX1-NEXT:    vmovdqa %xmm0, (%rax)
1120; AVX1-NEXT:    popq %rax
1121; AVX1-NEXT:    retq
1122;
1123; AVX2-LABEL: constantfold_andn_mask:
1124; AVX2:       # %bb.0: # %entry
1125; AVX2-NEXT:    pushq %rax
1126; AVX2-NEXT:    callq use@PLT
1127; AVX2-NEXT:    vmovdqu (%rax), %xmm1
1128; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1129; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
1130; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
1131; AVX2-NEXT:    vpavgb %xmm2, %xmm0, %xmm0
1132; AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1133; AVX2-NEXT:    vpandn %xmm0, %xmm2, %xmm0
1134; AVX2-NEXT:    vpor %xmm0, %xmm3, %xmm0
1135; AVX2-NEXT:    movabsq $87960930222080, %rax # imm = 0x500000000000
1136; AVX2-NEXT:    xorq d@GOTPCREL(%rip), %rax
1137; AVX2-NEXT:    vmovdqa %xmm0, (%rax)
1138; AVX2-NEXT:    popq %rax
1139; AVX2-NEXT:    retq
1140;
1141; AVX512F-LABEL: constantfold_andn_mask:
1142; AVX512F:       # %bb.0: # %entry
1143; AVX512F-NEXT:    pushq %rax
1144; AVX512F-NEXT:    callq use@PLT
1145; AVX512F-NEXT:    vmovdqu (%rax), %xmm1
1146; AVX512F-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1147; AVX512F-NEXT:    vpand %xmm2, %xmm0, %xmm0
1148; AVX512F-NEXT:    vpavgb %xmm2, %xmm0, %xmm0
1149; AVX512F-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1150; AVX512F-NEXT:    vpternlogq $184, %zmm1, %zmm2, %zmm0
1151; AVX512F-NEXT:    movabsq $87960930222080, %rax # imm = 0x500000000000
1152; AVX512F-NEXT:    xorq d@GOTPCREL(%rip), %rax
1153; AVX512F-NEXT:    vmovdqa %xmm0, (%rax)
1154; AVX512F-NEXT:    popq %rax
1155; AVX512F-NEXT:    vzeroupper
1156; AVX512F-NEXT:    retq
1157;
1158; AVX512VL-LABEL: constantfold_andn_mask:
1159; AVX512VL:       # %bb.0: # %entry
1160; AVX512VL-NEXT:    pushq %rax
1161; AVX512VL-NEXT:    callq use@PLT
1162; AVX512VL-NEXT:    vmovdqu (%rax), %xmm1
1163; AVX512VL-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1164; AVX512VL-NEXT:    vpand %xmm2, %xmm0, %xmm0
1165; AVX512VL-NEXT:    vpavgb %xmm2, %xmm0, %xmm0
1166; AVX512VL-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1167; AVX512VL-NEXT:    vpternlogq $216, %xmm2, %xmm1, %xmm0
1168; AVX512VL-NEXT:    movabsq $87960930222080, %rax # imm = 0x500000000000
1169; AVX512VL-NEXT:    xorq d@GOTPCREL(%rip), %rax
1170; AVX512VL-NEXT:    vmovdqa %xmm0, (%rax)
1171; AVX512VL-NEXT:    popq %rax
1172; AVX512VL-NEXT:    retq
1173entry:
1174  %call = call noundef <2 x i64> @use()
1175  %_msret = load <2 x i64>, ptr undef, align 8
1176  %i = bitcast <2 x i64> %_msret to <16 x i8>
1177  %i1 = bitcast <2 x i64> %call to <16 x i8>
1178  %i2 = and <16 x i8> %i, <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8>
1179  %i3 = and <16 x i8> %i1, <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8>
1180  %i4 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8>, <16 x i8> %i3)
1181  %i5 = bitcast <16 x i8> %i2 to <2 x i64>
1182  %i6 = bitcast <16 x i8> %i4 to <2 x i64>
1183  %i7 = and <2 x i64> %_msret, <i64 567462211834873824, i64 567462211834873824>
1184  %i8 = xor <2 x i64> zeroinitializer, <i64 -1, i64 -1>
1185  %i9 = xor <2 x i64> %i6, <i64 -1, i64 -1>
1186  %i10 = and <2 x i64> %i8, %i5
1187  %i11 = and <2 x i64> %i7, %i9
1188  %i12 = or <2 x i64> zeroinitializer, %i10
1189  %i13 = or <2 x i64> %i12, %i11
1190  store <2 x i64> %i13, ptr inttoptr (i64 xor (i64 ptrtoint (ptr @d to i64), i64 87960930222080) to ptr), align 16
1191  ret void
1192}
1193
1194declare <2 x i64> @use()
1195declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
1196