xref: /llvm-project/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll (revision 69ffa7be3bda5547d7a41233f86b88539616e386)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
6
7declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
8declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
9declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
10declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
11
12define <4 x i1> @illegal_abs_unchanged(<4 x i8> %x) {
13; AVX512-LABEL: illegal_abs_unchanged:
14; AVX512:       # %bb.0:
15; AVX512-NEXT:    vpabsb %xmm0, %xmm0
16; AVX512-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17; AVX512-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
18; AVX512-NEXT:    retq
19;
20; AVX2-LABEL: illegal_abs_unchanged:
21; AVX2:       # %bb.0:
22; AVX2-NEXT:    vpabsb %xmm0, %xmm0
23; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
25; AVX2-NEXT:    retq
26;
27; SSE41-LABEL: illegal_abs_unchanged:
28; SSE41:       # %bb.0:
29; SSE41-NEXT:    pabsb %xmm0, %xmm0
30; SSE41-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
32; SSE41-NEXT:    retq
33;
34; SSE2-LABEL: illegal_abs_unchanged:
35; SSE2:       # %bb.0:
36; SSE2-NEXT:    pxor %xmm1, %xmm1
37; SSE2-NEXT:    psubb %xmm0, %xmm1
38; SSE2-NEXT:    pminub %xmm1, %xmm0
39; SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
40; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
41; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
42; SSE2-NEXT:    retq
43  %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true)
44  %cmp = icmp eq <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129>
45  ret <4 x i1> %cmp
46}
47
48define <4 x i1> @illegal_abs_unchanged2(<4 x i8> %x) {
49; AVX512-LABEL: illegal_abs_unchanged2:
50; AVX512:       # %bb.0:
51; AVX512-NEXT:    vpabsb %xmm0, %xmm0
52; AVX512-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
53; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
54; AVX512-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
55; AVX512-NEXT:    retq
56;
57; AVX2-LABEL: illegal_abs_unchanged2:
58; AVX2:       # %bb.0:
59; AVX2-NEXT:    vpabsb %xmm0, %xmm0
60; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
61; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
62; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
63; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
64; AVX2-NEXT:    retq
65;
66; SSE41-LABEL: illegal_abs_unchanged2:
67; SSE41:       # %bb.0:
68; SSE41-NEXT:    pabsb %xmm0, %xmm0
69; SSE41-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
70; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
71; SSE41-NEXT:    pxor %xmm0, %xmm1
72; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
73; SSE41-NEXT:    retq
74;
75; SSE2-LABEL: illegal_abs_unchanged2:
76; SSE2:       # %bb.0:
77; SSE2-NEXT:    pxor %xmm1, %xmm1
78; SSE2-NEXT:    psubb %xmm0, %xmm1
79; SSE2-NEXT:    pminub %xmm1, %xmm0
80; SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
81; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
82; SSE2-NEXT:    pxor %xmm1, %xmm0
83; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
84; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
85; SSE2-NEXT:    retq
86  %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true)
87  %cmp = icmp ne <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129>
88  ret <4 x i1> %cmp
89}
90
91define <4 x i1> @illegal_abs_to_eq_or(<4 x i64> %x) {
92; AVX512-LABEL: illegal_abs_to_eq_or:
93; AVX512:       # %bb.0:
94; AVX512-NEXT:    vpabsq %ymm0, %ymm0
95; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
96; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
97; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
98; AVX512-NEXT:    vzeroupper
99; AVX512-NEXT:    retq
100;
101; AVX2-LABEL: illegal_abs_to_eq_or:
102; AVX2:       # %bb.0:
103; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
104; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm1
105; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
106; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
107; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
108; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
109; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
110; AVX2-NEXT:    vzeroupper
111; AVX2-NEXT:    retq
112;
113; SSE41-LABEL: illegal_abs_to_eq_or:
114; SSE41:       # %bb.0:
115; SSE41-NEXT:    movdqa %xmm0, %xmm2
116; SSE41-NEXT:    pxor %xmm3, %xmm3
117; SSE41-NEXT:    pxor %xmm4, %xmm4
118; SSE41-NEXT:    psubq %xmm0, %xmm4
119; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
120; SSE41-NEXT:    psubq %xmm1, %xmm3
121; SSE41-NEXT:    movdqa %xmm1, %xmm0
122; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
123; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = [129,129]
124; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
125; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
126; SSE41-NEXT:    packssdw %xmm1, %xmm2
127; SSE41-NEXT:    movdqa %xmm2, %xmm0
128; SSE41-NEXT:    retq
129;
130; SSE2-LABEL: illegal_abs_to_eq_or:
131; SSE2:       # %bb.0:
132; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
133; SSE2-NEXT:    psrad $31, %xmm2
134; SSE2-NEXT:    pxor %xmm2, %xmm0
135; SSE2-NEXT:    psubq %xmm2, %xmm0
136; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
137; SSE2-NEXT:    psrad $31, %xmm2
138; SSE2-NEXT:    pxor %xmm2, %xmm1
139; SSE2-NEXT:    psubq %xmm2, %xmm1
140; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
141; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
142; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
143; SSE2-NEXT:    movdqa %xmm0, %xmm2
144; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
145; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
146; SSE2-NEXT:    andps %xmm2, %xmm0
147; SSE2-NEXT:    retq
148  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
149  %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
150  ret <4 x i1> %cmp
151}
152
153define <4 x i64> @illegal_abs_to_eq_or_sext(<4 x i64> %x) {
154; AVX512-LABEL: illegal_abs_to_eq_or_sext:
155; AVX512:       # %bb.0:
156; AVX512-NEXT:    vpabsq %ymm0, %ymm0
157; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
158; AVX512-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
159; AVX512-NEXT:    retq
160;
161; AVX2-LABEL: illegal_abs_to_eq_or_sext:
162; AVX2:       # %bb.0:
163; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
164; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm1
165; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
166; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
167; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
168; AVX2-NEXT:    retq
169;
170; SSE41-LABEL: illegal_abs_to_eq_or_sext:
171; SSE41:       # %bb.0:
172; SSE41-NEXT:    movdqa %xmm0, %xmm2
173; SSE41-NEXT:    pxor %xmm3, %xmm3
174; SSE41-NEXT:    pxor %xmm4, %xmm4
175; SSE41-NEXT:    psubq %xmm1, %xmm4
176; SSE41-NEXT:    movdqa %xmm1, %xmm0
177; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm1
178; SSE41-NEXT:    psubq %xmm2, %xmm3
179; SSE41-NEXT:    movdqa %xmm2, %xmm0
180; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm2
181; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = [129,129]
182; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
183; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
184; SSE41-NEXT:    movdqa %xmm2, %xmm0
185; SSE41-NEXT:    retq
186;
187; SSE2-LABEL: illegal_abs_to_eq_or_sext:
188; SSE2:       # %bb.0:
189; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
190; SSE2-NEXT:    psrad $31, %xmm2
191; SSE2-NEXT:    pxor %xmm2, %xmm1
192; SSE2-NEXT:    psubq %xmm2, %xmm1
193; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
194; SSE2-NEXT:    psrad $31, %xmm2
195; SSE2-NEXT:    pxor %xmm2, %xmm0
196; SSE2-NEXT:    psubq %xmm2, %xmm0
197; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
198; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
199; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2]
200; SSE2-NEXT:    pand %xmm3, %xmm0
201; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
202; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
203; SSE2-NEXT:    pand %xmm2, %xmm1
204; SSE2-NEXT:    retq
205  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
206  %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
207  %r = sext <4 x i1> %cmp to <4 x i64>
208  ret <4 x i64> %r
209}
210
211define <4 x i1> @illegal_abs_to_ne_and(<4 x i64> %x) {
212; AVX512-LABEL: illegal_abs_to_ne_and:
213; AVX512:       # %bb.0:
214; AVX512-NEXT:    vpabsq %ymm0, %ymm0
215; AVX512-NEXT:    vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
216; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
217; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
218; AVX512-NEXT:    vzeroupper
219; AVX512-NEXT:    retq
220;
221; AVX2-LABEL: illegal_abs_to_ne_and:
222; AVX2:       # %bb.0:
223; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
224; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm1
225; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
226; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
227; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
228; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
229; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
230; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
231; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
232; AVX2-NEXT:    vzeroupper
233; AVX2-NEXT:    retq
234;
235; SSE41-LABEL: illegal_abs_to_ne_and:
236; SSE41:       # %bb.0:
237; SSE41-NEXT:    movdqa %xmm0, %xmm2
238; SSE41-NEXT:    pxor %xmm3, %xmm3
239; SSE41-NEXT:    pxor %xmm4, %xmm4
240; SSE41-NEXT:    psubq %xmm0, %xmm4
241; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
242; SSE41-NEXT:    psubq %xmm1, %xmm3
243; SSE41-NEXT:    movdqa %xmm1, %xmm0
244; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
245; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = [129,129]
246; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
247; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
248; SSE41-NEXT:    packssdw %xmm1, %xmm2
249; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
250; SSE41-NEXT:    pxor %xmm2, %xmm0
251; SSE41-NEXT:    retq
252;
253; SSE2-LABEL: illegal_abs_to_ne_and:
254; SSE2:       # %bb.0:
255; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
256; SSE2-NEXT:    psrad $31, %xmm2
257; SSE2-NEXT:    pxor %xmm2, %xmm0
258; SSE2-NEXT:    psubq %xmm2, %xmm0
259; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
260; SSE2-NEXT:    psrad $31, %xmm2
261; SSE2-NEXT:    pxor %xmm2, %xmm1
262; SSE2-NEXT:    psubq %xmm2, %xmm1
263; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
264; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
265; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
266; SSE2-NEXT:    movdqa %xmm0, %xmm2
267; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
268; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
269; SSE2-NEXT:    andps %xmm2, %xmm0
270; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
271; SSE2-NEXT:    xorps %xmm1, %xmm0
272; SSE2-NEXT:    retq
273  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
274  %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
275  ret <4 x i1> %cmp
276}
277
278define <4 x i64> @illegal_abs_to_ne_and_sext(<4 x i64> %x) {
279; AVX512-LABEL: illegal_abs_to_ne_and_sext:
280; AVX512:       # %bb.0:
281; AVX512-NEXT:    vpabsq %ymm0, %ymm0
282; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
283; AVX512-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
284; AVX512-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0
285; AVX512-NEXT:    retq
286;
287; AVX2-LABEL: illegal_abs_to_ne_and_sext:
288; AVX2:       # %bb.0:
289; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
290; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm1
291; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
292; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
293; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
294; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
295; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
296; AVX2-NEXT:    retq
297;
298; SSE41-LABEL: illegal_abs_to_ne_and_sext:
299; SSE41:       # %bb.0:
300; SSE41-NEXT:    movdqa %xmm0, %xmm2
301; SSE41-NEXT:    pxor %xmm3, %xmm3
302; SSE41-NEXT:    pxor %xmm4, %xmm4
303; SSE41-NEXT:    psubq %xmm1, %xmm4
304; SSE41-NEXT:    movdqa %xmm1, %xmm0
305; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm1
306; SSE41-NEXT:    psubq %xmm2, %xmm3
307; SSE41-NEXT:    movdqa %xmm2, %xmm0
308; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm2
309; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = [129,129]
310; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
311; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
312; SSE41-NEXT:    pxor %xmm3, %xmm2
313; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
314; SSE41-NEXT:    pxor %xmm3, %xmm1
315; SSE41-NEXT:    movdqa %xmm2, %xmm0
316; SSE41-NEXT:    retq
317;
318; SSE2-LABEL: illegal_abs_to_ne_and_sext:
319; SSE2:       # %bb.0:
320; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
321; SSE2-NEXT:    psrad $31, %xmm2
322; SSE2-NEXT:    pxor %xmm2, %xmm1
323; SSE2-NEXT:    psubq %xmm2, %xmm1
324; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
325; SSE2-NEXT:    psrad $31, %xmm2
326; SSE2-NEXT:    pxor %xmm2, %xmm0
327; SSE2-NEXT:    psubq %xmm2, %xmm0
328; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
329; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
330; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2]
331; SSE2-NEXT:    pand %xmm3, %xmm0
332; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
333; SSE2-NEXT:    pxor %xmm3, %xmm0
334; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
335; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
336; SSE2-NEXT:    pand %xmm2, %xmm1
337; SSE2-NEXT:    pxor %xmm3, %xmm1
338; SSE2-NEXT:    retq
339  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
340  %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
341  %r = sext <4 x i1> %cmp to <4 x i64>
342  ret <4 x i64> %r
343}
344
345define <4 x i1> @legal_abs_eq_unchanged(<4 x i32> %x) {
346; AVX512-LABEL: legal_abs_eq_unchanged:
347; AVX512:       # %bb.0:
348; AVX512-NEXT:    vpabsd %xmm0, %xmm0
349; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
350; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
351; AVX512-NEXT:    retq
352;
353; AVX2-LABEL: legal_abs_eq_unchanged:
354; AVX2:       # %bb.0:
355; AVX2-NEXT:    vpabsd %xmm0, %xmm0
356; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
357; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
358; AVX2-NEXT:    retq
359;
360; SSE41-LABEL: legal_abs_eq_unchanged:
361; SSE41:       # %bb.0:
362; SSE41-NEXT:    pabsd %xmm0, %xmm0
363; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
364; SSE41-NEXT:    retq
365;
366; SSE2-LABEL: legal_abs_eq_unchanged:
367; SSE2:       # %bb.0:
368; SSE2-NEXT:    movdqa %xmm0, %xmm1
369; SSE2-NEXT:    psrad $31, %xmm1
370; SSE2-NEXT:    pxor %xmm1, %xmm0
371; SSE2-NEXT:    psubd %xmm1, %xmm0
372; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
373; SSE2-NEXT:    retq
374  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
375  %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
376  ret <4 x i1> %cmp
377}
378
379define <4 x i32> @legal_abs_eq_unchanged_sext(<4 x i32> %x) {
380; AVX512-LABEL: legal_abs_eq_unchanged_sext:
381; AVX512:       # %bb.0:
382; AVX512-NEXT:    vpabsd %xmm0, %xmm0
383; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
384; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
385; AVX512-NEXT:    retq
386;
387; AVX2-LABEL: legal_abs_eq_unchanged_sext:
388; AVX2:       # %bb.0:
389; AVX2-NEXT:    vpabsd %xmm0, %xmm0
390; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
391; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
392; AVX2-NEXT:    retq
393;
394; SSE41-LABEL: legal_abs_eq_unchanged_sext:
395; SSE41:       # %bb.0:
396; SSE41-NEXT:    pabsd %xmm0, %xmm0
397; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
398; SSE41-NEXT:    retq
399;
400; SSE2-LABEL: legal_abs_eq_unchanged_sext:
401; SSE2:       # %bb.0:
402; SSE2-NEXT:    movdqa %xmm0, %xmm1
403; SSE2-NEXT:    psrad $31, %xmm1
404; SSE2-NEXT:    pxor %xmm1, %xmm0
405; SSE2-NEXT:    psubd %xmm1, %xmm0
406; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
407; SSE2-NEXT:    retq
408  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
409  %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
410  %r = sext <4 x i1> %cmp to <4 x i32>
411  ret <4 x i32> %r
412}
413
414define <4 x i1> @legal_abs_ne_unchangedd(<4 x i32> %x) {
415; AVX512-LABEL: legal_abs_ne_unchangedd:
416; AVX512:       # %bb.0:
417; AVX512-NEXT:    vpabsd %xmm0, %xmm0
418; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
419; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
420; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
421; AVX512-NEXT:    retq
422;
423; AVX2-LABEL: legal_abs_ne_unchangedd:
424; AVX2:       # %bb.0:
425; AVX2-NEXT:    vpabsd %xmm0, %xmm0
426; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
427; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
428; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
429; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
430; AVX2-NEXT:    retq
431;
432; SSE41-LABEL: legal_abs_ne_unchangedd:
433; SSE41:       # %bb.0:
434; SSE41-NEXT:    pabsd %xmm0, %xmm1
435; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
436; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
437; SSE41-NEXT:    pxor %xmm1, %xmm0
438; SSE41-NEXT:    retq
439;
440; SSE2-LABEL: legal_abs_ne_unchangedd:
441; SSE2:       # %bb.0:
442; SSE2-NEXT:    movdqa %xmm0, %xmm1
443; SSE2-NEXT:    psrad $31, %xmm1
444; SSE2-NEXT:    pxor %xmm1, %xmm0
445; SSE2-NEXT:    psubd %xmm1, %xmm0
446; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
447; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
448; SSE2-NEXT:    pxor %xmm1, %xmm0
449; SSE2-NEXT:    retq
450  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
451  %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
452  ret <4 x i1> %cmp
453}
454
455define <4 x i32> @legal_abs_ne_unchangedd_sext(<4 x i32> %x) {
456; AVX512-LABEL: legal_abs_ne_unchangedd_sext:
457; AVX512:       # %bb.0:
458; AVX512-NEXT:    vpabsd %xmm0, %xmm0
459; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
460; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
461; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
462; AVX512-NEXT:    retq
463;
464; AVX2-LABEL: legal_abs_ne_unchangedd_sext:
465; AVX2:       # %bb.0:
466; AVX2-NEXT:    vpabsd %xmm0, %xmm0
467; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
468; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
469; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
470; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
471; AVX2-NEXT:    retq
472;
473; SSE41-LABEL: legal_abs_ne_unchangedd_sext:
474; SSE41:       # %bb.0:
475; SSE41-NEXT:    pabsd %xmm0, %xmm1
476; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
477; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
478; SSE41-NEXT:    pxor %xmm1, %xmm0
479; SSE41-NEXT:    retq
480;
481; SSE2-LABEL: legal_abs_ne_unchangedd_sext:
482; SSE2:       # %bb.0:
483; SSE2-NEXT:    movdqa %xmm0, %xmm1
484; SSE2-NEXT:    psrad $31, %xmm1
485; SSE2-NEXT:    pxor %xmm1, %xmm0
486; SSE2-NEXT:    psubd %xmm1, %xmm0
487; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
488; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
489; SSE2-NEXT:    pxor %xmm1, %xmm0
490; SSE2-NEXT:    retq
491  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
492  %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
493  %r = sext <4 x i1> %cmp to <4 x i32>
494  ret <4 x i32> %r
495}
496
497define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) {
498; AVX512-LABEL: eq_or_to_abs_vec4x64:
499; AVX512:       # %bb.0:
500; AVX512-NEXT:    vpabsq %ymm0, %ymm0
501; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
502; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
503; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
504; AVX512-NEXT:    vzeroupper
505; AVX512-NEXT:    retq
506;
507; AVX2-LABEL: eq_or_to_abs_vec4x64:
508; AVX2:       # %bb.0:
509; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
510; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm1
511; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
512; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
513; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
514; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
515; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
516; AVX2-NEXT:    vzeroupper
517; AVX2-NEXT:    retq
518;
519; SSE41-LABEL: eq_or_to_abs_vec4x64:
520; SSE41:       # %bb.0:
521; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = [129,129]
522; SSE41-NEXT:    movdqa %xmm0, %xmm3
523; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
524; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
525; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
526; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
527; SSE41-NEXT:    por %xmm3, %xmm0
528; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
529; SSE41-NEXT:    por %xmm2, %xmm1
530; SSE41-NEXT:    packssdw %xmm1, %xmm0
531; SSE41-NEXT:    retq
532;
533; SSE2-LABEL: eq_or_to_abs_vec4x64:
534; SSE2:       # %bb.0:
535; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
536; SSE2-NEXT:    movdqa %xmm1, %xmm3
537; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
538; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
539; SSE2-NEXT:    movdqa %xmm2, %xmm4
540; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
541; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
542; SSE2-NEXT:    andps %xmm4, %xmm2
543; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
544; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
545; SSE2-NEXT:    pcmpeqd %xmm3, %xmm0
546; SSE2-NEXT:    movdqa %xmm0, %xmm3
547; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3]
548; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
549; SSE2-NEXT:    andps %xmm3, %xmm0
550; SSE2-NEXT:    orps %xmm2, %xmm0
551; SSE2-NEXT:    retq
552  %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
553  %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
554  %cmp = or <4 x i1> %cmp1, %cmp2
555  ret <4 x i1> %cmp
556}
557
558define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
559; AVX512-LABEL: eq_or_to_abs_vec4x64_sext:
560; AVX512:       # %bb.0:
561; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
562; AVX512-NEXT:    vpabsq %ymm0, %ymm0
563; AVX512-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
564; AVX512-NEXT:    retq
565;
566; AVX2-LABEL: eq_or_to_abs_vec4x64_sext:
567; AVX2:       # %bb.0:
568; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
569; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm1
570; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
571; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
572; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
573; AVX2-NEXT:    retq
574;
575; SSE41-LABEL: eq_or_to_abs_vec4x64_sext:
576; SSE41:       # %bb.0:
577; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = [129,129]
578; SSE41-NEXT:    movdqa %xmm0, %xmm3
579; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
580; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
581; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
582; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
583; SSE41-NEXT:    por %xmm3, %xmm0
584; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
585; SSE41-NEXT:    por %xmm2, %xmm1
586; SSE41-NEXT:    packssdw %xmm1, %xmm0
587; SSE41-NEXT:    pmovsxdq %xmm0, %xmm2
588; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
589; SSE41-NEXT:    pslld $31, %xmm1
590; SSE41-NEXT:    psrad $31, %xmm1
591; SSE41-NEXT:    movdqa %xmm2, %xmm0
592; SSE41-NEXT:    retq
593;
594; SSE2-LABEL: eq_or_to_abs_vec4x64_sext:
595; SSE2:       # %bb.0:
596; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
597; SSE2-NEXT:    movdqa %xmm1, %xmm3
598; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
599; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
600; SSE2-NEXT:    movdqa %xmm2, %xmm4
601; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
602; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
603; SSE2-NEXT:    andps %xmm4, %xmm2
604; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
605; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
606; SSE2-NEXT:    pcmpeqd %xmm3, %xmm0
607; SSE2-NEXT:    movdqa %xmm0, %xmm3
608; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3]
609; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
610; SSE2-NEXT:    andps %xmm3, %xmm0
611; SSE2-NEXT:    orps %xmm2, %xmm0
612; SSE2-NEXT:    xorps %xmm2, %xmm2
613; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
614; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
615; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
616; SSE2-NEXT:    pslld $31, %xmm1
617; SSE2-NEXT:    psrad $31, %xmm1
618; SSE2-NEXT:    retq
619  %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
620  %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
621  %cmp = or <4 x i1> %cmp1, %cmp2
622  %r = sext <4 x i1> %cmp to <4 x i64>
623  ret <4 x i64> %r
624}
625
626define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
627; AVX512-LABEL: ne_and_to_abs_vec4x64:
628; AVX512:       # %bb.0:
629; AVX512-NEXT:    vpabsq %ymm0, %ymm0
630; AVX512-NEXT:    vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
631; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
632; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
633; AVX512-NEXT:    vzeroupper
634; AVX512-NEXT:    retq
635;
636; AVX2-LABEL: ne_and_to_abs_vec4x64:
637; AVX2:       # %bb.0:
638; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
639; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm1
640; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
641; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
642; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm0, %ymm0
643; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
644; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
645; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
646; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
647; AVX2-NEXT:    vzeroupper
648; AVX2-NEXT:    retq
649;
650; SSE41-LABEL: ne_and_to_abs_vec4x64:
651; SSE41:       # %bb.0:
652; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = [129,129]
653; SSE41-NEXT:    movdqa %xmm0, %xmm3
654; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
655; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
656; SSE41-NEXT:    pcmpeqd %xmm4, %xmm4
657; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
658; SSE41-NEXT:    pcmpeqq %xmm5, %xmm0
659; SSE41-NEXT:    por %xmm3, %xmm0
660; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
661; SSE41-NEXT:    por %xmm2, %xmm1
662; SSE41-NEXT:    packssdw %xmm1, %xmm0
663; SSE41-NEXT:    pxor %xmm4, %xmm0
664; SSE41-NEXT:    retq
665;
666; SSE2-LABEL: ne_and_to_abs_vec4x64:
667; SSE2:       # %bb.0:
668; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
669; SSE2-NEXT:    movdqa %xmm1, %xmm3
670; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
671; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
672; SSE2-NEXT:    movdqa %xmm2, %xmm4
673; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
674; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
675; SSE2-NEXT:    andps %xmm4, %xmm2
676; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
677; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
678; SSE2-NEXT:    pcmpeqd %xmm4, %xmm1
679; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
680; SSE2-NEXT:    movdqa %xmm0, %xmm4
681; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
682; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
683; SSE2-NEXT:    andps %xmm4, %xmm0
684; SSE2-NEXT:    orps %xmm2, %xmm0
685; SSE2-NEXT:    xorps %xmm3, %xmm0
686; SSE2-NEXT:    retq
687  %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
688  %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
689  %cmp = and <4 x i1> %cmp1, %cmp2
690  ret <4 x i1> %cmp
691}
692
693define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
694; AVX512-LABEL: ne_and_to_abs_vec4x64_sext:
695; AVX512:       # %bb.0:
696; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
697; AVX512-NEXT:    vpabsq %ymm0, %ymm0
698; AVX512-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
699; AVX512-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0
700; AVX512-NEXT:    retq
701;
702; AVX2-LABEL: ne_and_to_abs_vec4x64_sext:
703; AVX2:       # %bb.0:
704; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
705; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm1
706; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
707; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
708; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm0, %ymm0
709; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
710; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
711; AVX2-NEXT:    retq
712;
713; SSE41-LABEL: ne_and_to_abs_vec4x64_sext:
714; SSE41:       # %bb.0:
715; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = [129,129]
716; SSE41-NEXT:    movdqa %xmm0, %xmm3
717; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
718; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
719; SSE41-NEXT:    pcmpeqd %xmm4, %xmm4
720; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
721; SSE41-NEXT:    pcmpeqq %xmm5, %xmm0
722; SSE41-NEXT:    por %xmm3, %xmm0
723; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
724; SSE41-NEXT:    por %xmm2, %xmm1
725; SSE41-NEXT:    packssdw %xmm1, %xmm0
726; SSE41-NEXT:    pxor %xmm4, %xmm0
727; SSE41-NEXT:    pmovsxdq %xmm0, %xmm2
728; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
729; SSE41-NEXT:    pslld $31, %xmm1
730; SSE41-NEXT:    psrad $31, %xmm1
731; SSE41-NEXT:    movdqa %xmm2, %xmm0
732; SSE41-NEXT:    retq
733;
734; SSE2-LABEL: ne_and_to_abs_vec4x64_sext:
735; SSE2:       # %bb.0:
736; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
737; SSE2-NEXT:    movdqa %xmm1, %xmm3
738; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
739; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
740; SSE2-NEXT:    movdqa %xmm2, %xmm4
741; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
742; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
743; SSE2-NEXT:    andps %xmm4, %xmm2
744; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
745; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
746; SSE2-NEXT:    pcmpeqd %xmm4, %xmm1
747; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
748; SSE2-NEXT:    movdqa %xmm0, %xmm4
749; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
750; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
751; SSE2-NEXT:    andps %xmm4, %xmm0
752; SSE2-NEXT:    orps %xmm2, %xmm0
753; SSE2-NEXT:    xorps %xmm3, %xmm0
754; SSE2-NEXT:    xorps %xmm2, %xmm2
755; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
756; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
757; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
758; SSE2-NEXT:    pslld $31, %xmm1
759; SSE2-NEXT:    psrad $31, %xmm1
760; SSE2-NEXT:    retq
761  %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
762  %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
763  %cmp = and <4 x i1> %cmp1, %cmp2
764  %r = sext <4 x i1> %cmp to <4 x i64>
765  ret <4 x i64> %r
766}
767
768define <4 x i1> @eq_or_to_abs_vec4x32(<4 x i32> %x) {
769; AVX512-LABEL: eq_or_to_abs_vec4x32:
770; AVX512:       # %bb.0:
771; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
772; AVX512-NEXT:    vpabsd %xmm0, %xmm0
773; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
774; AVX512-NEXT:    retq
775;
776; AVX2-LABEL: eq_or_to_abs_vec4x32:
777; AVX2:       # %bb.0:
778; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
779; AVX2-NEXT:    vpabsd %xmm0, %xmm0
780; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
781; AVX2-NEXT:    retq
782;
783; SSE41-LABEL: eq_or_to_abs_vec4x32:
784; SSE41:       # %bb.0:
785; SSE41-NEXT:    pabsd %xmm0, %xmm0
786; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
787; SSE41-NEXT:    retq
788;
789; SSE2-LABEL: eq_or_to_abs_vec4x32:
790; SSE2:       # %bb.0:
791; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,1,1]
792; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
793; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
794; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
795; SSE2-NEXT:    por %xmm1, %xmm0
796; SSE2-NEXT:    retq
797  %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
798  %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
799  %cmp = or <4 x i1> %cmp1, %cmp2
800  ret <4 x i1> %cmp
801}
802
803define <4 x i32> @eq_or_to_abs_vec4x32_sext(<4 x i32> %x) {
804; AVX512-LABEL: eq_or_to_abs_vec4x32_sext:
805; AVX512:       # %bb.0:
806; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
807; AVX512-NEXT:    vpabsd %xmm0, %xmm0
808; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
809; AVX512-NEXT:    retq
810;
811; AVX2-LABEL: eq_or_to_abs_vec4x32_sext:
812; AVX2:       # %bb.0:
813; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
814; AVX2-NEXT:    vpabsd %xmm0, %xmm0
815; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
816; AVX2-NEXT:    retq
817;
818; SSE41-LABEL: eq_or_to_abs_vec4x32_sext:
819; SSE41:       # %bb.0:
820; SSE41-NEXT:    pabsd %xmm0, %xmm0
821; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
822; SSE41-NEXT:    retq
823;
824; SSE2-LABEL: eq_or_to_abs_vec4x32_sext:
825; SSE2:       # %bb.0:
826; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,1,1]
827; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
828; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
829; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
830; SSE2-NEXT:    por %xmm1, %xmm0
831; SSE2-NEXT:    retq
832  %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
833  %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
834  %cmp = or <4 x i1> %cmp1, %cmp2
835  %r = sext <4 x i1> %cmp to <4 x i32>
836  ret <4 x i32> %r
837}
838
839define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
840; AVX512-LABEL: ne_and_to_abs_vec4x32:
841; AVX512:       # %bb.0:
842; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
843; AVX512-NEXT:    vpabsd %xmm0, %xmm0
844; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
845; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
846; AVX512-NEXT:    retq
847;
848; AVX2-LABEL: ne_and_to_abs_vec4x32:
849; AVX2:       # %bb.0:
850; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
851; AVX2-NEXT:    vpabsd %xmm0, %xmm0
852; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
853; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
854; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
855; AVX2-NEXT:    retq
856;
857; SSE41-LABEL: ne_and_to_abs_vec4x32:
858; SSE41:       # %bb.0:
859; SSE41-NEXT:    pabsd %xmm0, %xmm1
860; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
861; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
862; SSE41-NEXT:    pxor %xmm1, %xmm0
863; SSE41-NEXT:    retq
864;
865; SSE2-LABEL: ne_and_to_abs_vec4x32:
866; SSE2:       # %bb.0:
867; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,1,1]
868; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
869; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
870; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
871; SSE2-NEXT:    por %xmm1, %xmm0
872; SSE2-NEXT:    pxor %xmm2, %xmm0
873; SSE2-NEXT:    retq
874  %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
875  %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
876  %cmp = and <4 x i1> %cmp1, %cmp2
877  ret <4 x i1> %cmp
878}
879
880define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) {
881; AVX512-LABEL: ne_and_to_abs_vec4x32_sext:
882; AVX512:       # %bb.0:
883; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
884; AVX512-NEXT:    vpabsd %xmm0, %xmm0
885; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
886; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
887; AVX512-NEXT:    retq
888;
889; AVX2-LABEL: ne_and_to_abs_vec4x32_sext:
890; AVX2:       # %bb.0:
891; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
892; AVX2-NEXT:    vpabsd %xmm0, %xmm0
893; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
894; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
895; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
896; AVX2-NEXT:    retq
897;
898; SSE41-LABEL: ne_and_to_abs_vec4x32_sext:
899; SSE41:       # %bb.0:
900; SSE41-NEXT:    pabsd %xmm0, %xmm1
901; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
902; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
903; SSE41-NEXT:    pxor %xmm1, %xmm0
904; SSE41-NEXT:    retq
905;
906; SSE2-LABEL: ne_and_to_abs_vec4x32_sext:
907; SSE2:       # %bb.0:
908; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,1,1]
909; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
910; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
911; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
912; SSE2-NEXT:    por %xmm1, %xmm0
913; SSE2-NEXT:    pxor %xmm2, %xmm0
914; SSE2-NEXT:    retq
915  %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
916  %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
917  %cmp = and <4 x i1> %cmp1, %cmp2
918  %r = sext <4 x i1> %cmp to <4 x i32>
919  ret <4 x i32> %r
920}
921
922define <4 x i1> @eq_or_to_abs_vec4x16(<4 x i16> %x) {
923; AVX512-LABEL: eq_or_to_abs_vec4x16:
924; AVX512:       # %bb.0:
925; AVX512-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
926; AVX512-NEXT:    vpmovsxwd %xmm1, %ymm1
927; AVX512-NEXT:    vptestmd %ymm1, %ymm1, %k0
928; AVX512-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
929; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
930; AVX512-NEXT:    vptestmd %ymm0, %ymm0, %k1
931; AVX512-NEXT:    korw %k1, %k0, %k1
932; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
933; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
934; AVX512-NEXT:    vzeroupper
935; AVX512-NEXT:    retq
936;
937; AVX2-LABEL: eq_or_to_abs_vec4x16:
938; AVX2:       # %bb.0:
939; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
940; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
941; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
942; AVX2-NEXT:    vpmovsxwd %xmm0, %xmm0
943; AVX2-NEXT:    retq
944;
945; SSE41-LABEL: eq_or_to_abs_vec4x16:
946; SSE41:       # %bb.0:
947; SSE41-NEXT:    pmovsxbw {{.*#+}} xmm1 = [88,88,88,88,88,88,88,88]
948; SSE41-NEXT:    pcmpeqw %xmm0, %xmm1
949; SSE41-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
950; SSE41-NEXT:    por %xmm1, %xmm0
951; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
952; SSE41-NEXT:    retq
953;
954; SSE2-LABEL: eq_or_to_abs_vec4x16:
955; SSE2:       # %bb.0:
956; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u]
957; SSE2-NEXT:    pcmpeqw %xmm0, %xmm1
958; SSE2-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
959; SSE2-NEXT:    por %xmm1, %xmm0
960; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
961; SSE2-NEXT:    retq
962  %cmp1 = icmp eq <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
963  %cmp2 = icmp eq <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
964  %cmp = or <4 x i1> %cmp1, %cmp2
965  ret <4 x i1> %cmp
966}
967
968define <4 x i8> @eq_or_to_abs_vec4x8_sext(<4 x i8> %x) {
969; AVX512-LABEL: eq_or_to_abs_vec4x8_sext:
970; AVX512:       # %bb.0:
971; AVX512-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
972; AVX512-NEXT:    vpmovsxbd %xmm1, %zmm1
973; AVX512-NEXT:    vptestmd %zmm1, %zmm1, %k0
974; AVX512-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
975; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
976; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
977; AVX512-NEXT:    korw %k1, %k0, %k1
978; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
979; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
980; AVX512-NEXT:    vzeroupper
981; AVX512-NEXT:    retq
982;
983; AVX2-LABEL: eq_or_to_abs_vec4x8_sext:
984; AVX2:       # %bb.0:
985; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
986; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
987; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
988; AVX2-NEXT:    retq
989;
990; SSE41-LABEL: eq_or_to_abs_vec4x8_sext:
991; SSE41:       # %bb.0:
992; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u]
993; SSE41-NEXT:    pcmpeqb %xmm0, %xmm1
994; SSE41-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
995; SSE41-NEXT:    por %xmm1, %xmm0
996; SSE41-NEXT:    retq
997;
998; SSE2-LABEL: eq_or_to_abs_vec4x8_sext:
999; SSE2:       # %bb.0:
1000; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u]
1001; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1002; SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1003; SSE2-NEXT:    por %xmm1, %xmm0
1004; SSE2-NEXT:    retq
1005  %cmp1 = icmp eq <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
1006  %cmp2 = icmp eq <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
1007  %cmp = or <4 x i1> %cmp1, %cmp2
1008  %r = sext <4 x i1> %cmp to <4 x i8>
1009  ret <4 x i8> %r
1010}
1011
1012define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
1013; AVX512-LABEL: ne_and_to_abs_vec4x8:
1014; AVX512:       # %bb.0:
1015; AVX512-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1016; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm1
1017; AVX512-NEXT:    vpmovsxbd %xmm1, %zmm1
1018; AVX512-NEXT:    vptestmd %zmm1, %zmm1, %k0
1019; AVX512-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1020; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
1021; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1022; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
1023; AVX512-NEXT:    kandw %k1, %k0, %k1
1024; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1025; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1026; AVX512-NEXT:    vzeroupper
1027; AVX512-NEXT:    retq
1028;
1029; AVX2-LABEL: ne_and_to_abs_vec4x8:
1030; AVX2:       # %bb.0:
1031; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1032; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1033; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1034; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
1035; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
1036; AVX2-NEXT:    vpmovsxbd %xmm0, %xmm0
1037; AVX2-NEXT:    retq
1038;
1039; SSE41-LABEL: ne_and_to_abs_vec4x8:
1040; SSE41:       # %bb.0:
1041; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u]
1042; SSE41-NEXT:    pcmpeqb %xmm0, %xmm1
1043; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
1044; SSE41-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1045; SSE41-NEXT:    por %xmm1, %xmm0
1046; SSE41-NEXT:    pxor %xmm2, %xmm0
1047; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
1048; SSE41-NEXT:    retq
1049;
1050; SSE2-LABEL: ne_and_to_abs_vec4x8:
1051; SSE2:       # %bb.0:
1052; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u]
1053; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1054; SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1055; SSE2-NEXT:    por %xmm1, %xmm0
1056; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1057; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1058; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1059; SSE2-NEXT:    pxor %xmm1, %xmm0
1060; SSE2-NEXT:    retq
1061  %cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
1062  %cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
1063  %cmp = and <4 x i1> %cmp1, %cmp2
1064  ret <4 x i1> %cmp
1065}
1066
1067define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
1068; AVX512-LABEL: ne_and_to_abs_vec4x16_sext:
1069; AVX512:       # %bb.0:
1070; AVX512-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1071; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm1
1072; AVX512-NEXT:    vpmovsxwd %xmm1, %ymm1
1073; AVX512-NEXT:    vptestmd %ymm1, %ymm1, %k0
1074; AVX512-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1075; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
1076; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
1077; AVX512-NEXT:    vptestmd %ymm0, %ymm0, %k1
1078; AVX512-NEXT:    kandw %k1, %k0, %k1
1079; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
1080; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1081; AVX512-NEXT:    vpmovdw %ymm0, %xmm0
1082; AVX512-NEXT:    vzeroupper
1083; AVX512-NEXT:    retq
1084;
1085; AVX2-LABEL: ne_and_to_abs_vec4x16_sext:
1086; AVX2:       # %bb.0:
1087; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1088; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1089; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1090; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
1091; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
1092; AVX2-NEXT:    retq
1093;
1094; SSE41-LABEL: ne_and_to_abs_vec4x16_sext:
1095; SSE41:       # %bb.0:
1096; SSE41-NEXT:    pmovsxbw {{.*#+}} xmm1 = [88,88,88,88,88,88,88,88]
1097; SSE41-NEXT:    pcmpeqw %xmm0, %xmm1
1098; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
1099; SSE41-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1100; SSE41-NEXT:    por %xmm1, %xmm0
1101; SSE41-NEXT:    pxor %xmm2, %xmm0
1102; SSE41-NEXT:    retq
1103;
1104; SSE2-LABEL: ne_and_to_abs_vec4x16_sext:
1105; SSE2:       # %bb.0:
1106; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u]
1107; SSE2-NEXT:    pcmpeqw %xmm0, %xmm1
1108; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1109; SSE2-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1110; SSE2-NEXT:    por %xmm1, %xmm0
1111; SSE2-NEXT:    pxor %xmm2, %xmm0
1112; SSE2-NEXT:    retq
1113  %cmp1 = icmp ne <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
1114  %cmp2 = icmp ne <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
1115  %cmp = and <4 x i1> %cmp1, %cmp2
1116  %r = sext <4 x i1> %cmp to <4 x i16>
1117  ret <4 x i16> %r
1118}
1119