xref: /llvm-project/llvm/test/CodeGen/X86/viabs.ll (revision 11276563c81987791a2326950dbc3315a32dd709)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2     | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3    | FileCheck %s --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1   | FileCheck %s --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx      | FileCheck %s --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2     | FileCheck %s --check-prefix=AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=AVX512,AVX512F
8; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=AVX512,AVX512BW
9
10define <4 x i32> @test_abs_gt_v4i32(<4 x i32> %a) nounwind {
11; SSE2-LABEL: test_abs_gt_v4i32:
12; SSE2:       # %bb.0:
13; SSE2-NEXT:    movdqa %xmm0, %xmm1
14; SSE2-NEXT:    psrad $31, %xmm1
15; SSE2-NEXT:    pxor %xmm1, %xmm0
16; SSE2-NEXT:    psubd %xmm1, %xmm0
17; SSE2-NEXT:    retq
18;
19; SSSE3-LABEL: test_abs_gt_v4i32:
20; SSSE3:       # %bb.0:
21; SSSE3-NEXT:    pabsd %xmm0, %xmm0
22; SSSE3-NEXT:    retq
23;
24; SSE41-LABEL: test_abs_gt_v4i32:
25; SSE41:       # %bb.0:
26; SSE41-NEXT:    pabsd %xmm0, %xmm0
27; SSE41-NEXT:    retq
28;
29; AVX1-LABEL: test_abs_gt_v4i32:
30; AVX1:       # %bb.0:
31; AVX1-NEXT:    vpabsd %xmm0, %xmm0
32; AVX1-NEXT:    retq
33;
34; AVX2-LABEL: test_abs_gt_v4i32:
35; AVX2:       # %bb.0:
36; AVX2-NEXT:    vpabsd %xmm0, %xmm0
37; AVX2-NEXT:    retq
38;
39; AVX512-LABEL: test_abs_gt_v4i32:
40; AVX512:       # %bb.0:
41; AVX512-NEXT:    vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0]
42; AVX512-NEXT:    retq # encoding: [0xc3]
43  %tmp1neg = sub <4 x i32> zeroinitializer, %a
44  %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
45  %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
46  ret <4 x i32> %abs
47}
48
49define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind {
50; SSE2-LABEL: test_abs_ge_v4i32:
51; SSE2:       # %bb.0:
52; SSE2-NEXT:    movdqa %xmm0, %xmm1
53; SSE2-NEXT:    psrad $31, %xmm1
54; SSE2-NEXT:    pxor %xmm1, %xmm0
55; SSE2-NEXT:    psubd %xmm1, %xmm0
56; SSE2-NEXT:    retq
57;
58; SSSE3-LABEL: test_abs_ge_v4i32:
59; SSSE3:       # %bb.0:
60; SSSE3-NEXT:    pabsd %xmm0, %xmm0
61; SSSE3-NEXT:    retq
62;
63; SSE41-LABEL: test_abs_ge_v4i32:
64; SSE41:       # %bb.0:
65; SSE41-NEXT:    pabsd %xmm0, %xmm0
66; SSE41-NEXT:    retq
67;
68; AVX1-LABEL: test_abs_ge_v4i32:
69; AVX1:       # %bb.0:
70; AVX1-NEXT:    vpabsd %xmm0, %xmm0
71; AVX1-NEXT:    retq
72;
73; AVX2-LABEL: test_abs_ge_v4i32:
74; AVX2:       # %bb.0:
75; AVX2-NEXT:    vpabsd %xmm0, %xmm0
76; AVX2-NEXT:    retq
77;
78; AVX512-LABEL: test_abs_ge_v4i32:
79; AVX512:       # %bb.0:
80; AVX512-NEXT:    vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0]
81; AVX512-NEXT:    retq # encoding: [0xc3]
82  %tmp1neg = sub <4 x i32> zeroinitializer, %a
83  %b = icmp sge <4 x i32> %a, zeroinitializer
84  %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
85  ret <4 x i32> %abs
86}
87
88define <8 x i16> @test_abs_gt_v8i16(<8 x i16> %a) nounwind {
89; SSE2-LABEL: test_abs_gt_v8i16:
90; SSE2:       # %bb.0:
91; SSE2-NEXT:    pxor %xmm1, %xmm1
92; SSE2-NEXT:    psubw %xmm0, %xmm1
93; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
94; SSE2-NEXT:    retq
95;
96; SSSE3-LABEL: test_abs_gt_v8i16:
97; SSSE3:       # %bb.0:
98; SSSE3-NEXT:    pabsw %xmm0, %xmm0
99; SSSE3-NEXT:    retq
100;
101; SSE41-LABEL: test_abs_gt_v8i16:
102; SSE41:       # %bb.0:
103; SSE41-NEXT:    pabsw %xmm0, %xmm0
104; SSE41-NEXT:    retq
105;
106; AVX1-LABEL: test_abs_gt_v8i16:
107; AVX1:       # %bb.0:
108; AVX1-NEXT:    vpabsw %xmm0, %xmm0
109; AVX1-NEXT:    retq
110;
111; AVX2-LABEL: test_abs_gt_v8i16:
112; AVX2:       # %bb.0:
113; AVX2-NEXT:    vpabsw %xmm0, %xmm0
114; AVX2-NEXT:    retq
115;
116; AVX512F-LABEL: test_abs_gt_v8i16:
117; AVX512F:       # %bb.0:
118; AVX512F-NEXT:    vpabsw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x1d,0xc0]
119; AVX512F-NEXT:    retq # encoding: [0xc3]
120;
121; AVX512BW-LABEL: test_abs_gt_v8i16:
122; AVX512BW:       # %bb.0:
123; AVX512BW-NEXT:    vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0]
124; AVX512BW-NEXT:    retq # encoding: [0xc3]
125  %tmp1neg = sub <8 x i16> zeroinitializer, %a
126  %b = icmp sgt <8 x i16> %a, zeroinitializer
127  %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
128  ret <8 x i16> %abs
129}
130
131define <16 x i8> @test_abs_lt_v16i8(<16 x i8> %a) nounwind {
132; SSE2-LABEL: test_abs_lt_v16i8:
133; SSE2:       # %bb.0:
134; SSE2-NEXT:    pxor %xmm1, %xmm1
135; SSE2-NEXT:    psubb %xmm0, %xmm1
136; SSE2-NEXT:    pminub %xmm1, %xmm0
137; SSE2-NEXT:    retq
138;
139; SSSE3-LABEL: test_abs_lt_v16i8:
140; SSSE3:       # %bb.0:
141; SSSE3-NEXT:    pabsb %xmm0, %xmm0
142; SSSE3-NEXT:    retq
143;
144; SSE41-LABEL: test_abs_lt_v16i8:
145; SSE41:       # %bb.0:
146; SSE41-NEXT:    pabsb %xmm0, %xmm0
147; SSE41-NEXT:    retq
148;
149; AVX1-LABEL: test_abs_lt_v16i8:
150; AVX1:       # %bb.0:
151; AVX1-NEXT:    vpabsb %xmm0, %xmm0
152; AVX1-NEXT:    retq
153;
154; AVX2-LABEL: test_abs_lt_v16i8:
155; AVX2:       # %bb.0:
156; AVX2-NEXT:    vpabsb %xmm0, %xmm0
157; AVX2-NEXT:    retq
158;
159; AVX512F-LABEL: test_abs_lt_v16i8:
160; AVX512F:       # %bb.0:
161; AVX512F-NEXT:    vpabsb %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x1c,0xc0]
162; AVX512F-NEXT:    retq # encoding: [0xc3]
163;
164; AVX512BW-LABEL: test_abs_lt_v16i8:
165; AVX512BW:       # %bb.0:
166; AVX512BW-NEXT:    vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0]
167; AVX512BW-NEXT:    retq # encoding: [0xc3]
168  %tmp1neg = sub <16 x i8> zeroinitializer, %a
169  %b = icmp slt <16 x i8> %a, zeroinitializer
170  %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
171  ret <16 x i8> %abs
172}
173
174define <4 x i32> @test_abs_le_v4i32(<4 x i32> %a) nounwind {
175; SSE2-LABEL: test_abs_le_v4i32:
176; SSE2:       # %bb.0:
177; SSE2-NEXT:    movdqa %xmm0, %xmm1
178; SSE2-NEXT:    psrad $31, %xmm1
179; SSE2-NEXT:    pxor %xmm1, %xmm0
180; SSE2-NEXT:    psubd %xmm1, %xmm0
181; SSE2-NEXT:    retq
182;
183; SSSE3-LABEL: test_abs_le_v4i32:
184; SSSE3:       # %bb.0:
185; SSSE3-NEXT:    pabsd %xmm0, %xmm0
186; SSSE3-NEXT:    retq
187;
188; SSE41-LABEL: test_abs_le_v4i32:
189; SSE41:       # %bb.0:
190; SSE41-NEXT:    pabsd %xmm0, %xmm0
191; SSE41-NEXT:    retq
192;
193; AVX1-LABEL: test_abs_le_v4i32:
194; AVX1:       # %bb.0:
195; AVX1-NEXT:    vpabsd %xmm0, %xmm0
196; AVX1-NEXT:    retq
197;
198; AVX2-LABEL: test_abs_le_v4i32:
199; AVX2:       # %bb.0:
200; AVX2-NEXT:    vpabsd %xmm0, %xmm0
201; AVX2-NEXT:    retq
202;
203; AVX512-LABEL: test_abs_le_v4i32:
204; AVX512:       # %bb.0:
205; AVX512-NEXT:    vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0]
206; AVX512-NEXT:    retq # encoding: [0xc3]
207  %tmp1neg = sub <4 x i32> zeroinitializer, %a
208  %b = icmp sle <4 x i32> %a, zeroinitializer
209  %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
210  ret <4 x i32> %abs
211}
212
213define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind {
214; SSE2-LABEL: test_abs_gt_v8i32:
215; SSE2:       # %bb.0:
216; SSE2-NEXT:    movdqa %xmm0, %xmm2
217; SSE2-NEXT:    psrad $31, %xmm2
218; SSE2-NEXT:    pxor %xmm2, %xmm0
219; SSE2-NEXT:    psubd %xmm2, %xmm0
220; SSE2-NEXT:    movdqa %xmm1, %xmm2
221; SSE2-NEXT:    psrad $31, %xmm2
222; SSE2-NEXT:    pxor %xmm2, %xmm1
223; SSE2-NEXT:    psubd %xmm2, %xmm1
224; SSE2-NEXT:    retq
225;
226; SSSE3-LABEL: test_abs_gt_v8i32:
227; SSSE3:       # %bb.0:
228; SSSE3-NEXT:    pabsd %xmm0, %xmm0
229; SSSE3-NEXT:    pabsd %xmm1, %xmm1
230; SSSE3-NEXT:    retq
231;
232; SSE41-LABEL: test_abs_gt_v8i32:
233; SSE41:       # %bb.0:
234; SSE41-NEXT:    pabsd %xmm0, %xmm0
235; SSE41-NEXT:    pabsd %xmm1, %xmm1
236; SSE41-NEXT:    retq
237;
238; AVX1-LABEL: test_abs_gt_v8i32:
239; AVX1:       # %bb.0:
240; AVX1-NEXT:    vpabsd %xmm0, %xmm1
241; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
242; AVX1-NEXT:    vpabsd %xmm0, %xmm0
243; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
244; AVX1-NEXT:    retq
245;
246; AVX2-LABEL: test_abs_gt_v8i32:
247; AVX2:       # %bb.0:
248; AVX2-NEXT:    vpabsd %ymm0, %ymm0
249; AVX2-NEXT:    retq
250;
251; AVX512-LABEL: test_abs_gt_v8i32:
252; AVX512:       # %bb.0:
253; AVX512-NEXT:    vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0]
254; AVX512-NEXT:    retq # encoding: [0xc3]
255  %tmp1neg = sub <8 x i32> zeroinitializer, %a
256  %b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
257  %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
258  ret <8 x i32> %abs
259}
260
261define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind {
262; SSE2-LABEL: test_abs_ge_v8i32:
263; SSE2:       # %bb.0:
264; SSE2-NEXT:    movdqa %xmm0, %xmm2
265; SSE2-NEXT:    psrad $31, %xmm2
266; SSE2-NEXT:    pxor %xmm2, %xmm0
267; SSE2-NEXT:    psubd %xmm2, %xmm0
268; SSE2-NEXT:    movdqa %xmm1, %xmm2
269; SSE2-NEXT:    psrad $31, %xmm2
270; SSE2-NEXT:    pxor %xmm2, %xmm1
271; SSE2-NEXT:    psubd %xmm2, %xmm1
272; SSE2-NEXT:    retq
273;
274; SSSE3-LABEL: test_abs_ge_v8i32:
275; SSSE3:       # %bb.0:
276; SSSE3-NEXT:    pabsd %xmm0, %xmm0
277; SSSE3-NEXT:    pabsd %xmm1, %xmm1
278; SSSE3-NEXT:    retq
279;
280; SSE41-LABEL: test_abs_ge_v8i32:
281; SSE41:       # %bb.0:
282; SSE41-NEXT:    pabsd %xmm0, %xmm0
283; SSE41-NEXT:    pabsd %xmm1, %xmm1
284; SSE41-NEXT:    retq
285;
286; AVX1-LABEL: test_abs_ge_v8i32:
287; AVX1:       # %bb.0:
288; AVX1-NEXT:    vpabsd %xmm0, %xmm1
289; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
290; AVX1-NEXT:    vpabsd %xmm0, %xmm0
291; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
292; AVX1-NEXT:    retq
293;
294; AVX2-LABEL: test_abs_ge_v8i32:
295; AVX2:       # %bb.0:
296; AVX2-NEXT:    vpabsd %ymm0, %ymm0
297; AVX2-NEXT:    retq
298;
299; AVX512-LABEL: test_abs_ge_v8i32:
300; AVX512:       # %bb.0:
301; AVX512-NEXT:    vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0]
302; AVX512-NEXT:    retq # encoding: [0xc3]
303  %tmp1neg = sub <8 x i32> zeroinitializer, %a
304  %b = icmp sge <8 x i32> %a, zeroinitializer
305  %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
306  ret <8 x i32> %abs
307}
308
309define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind {
310; SSE2-LABEL: test_abs_gt_v16i16:
311; SSE2:       # %bb.0:
312; SSE2-NEXT:    pxor %xmm2, %xmm2
313; SSE2-NEXT:    pxor %xmm3, %xmm3
314; SSE2-NEXT:    psubw %xmm0, %xmm3
315; SSE2-NEXT:    pmaxsw %xmm3, %xmm0
316; SSE2-NEXT:    psubw %xmm1, %xmm2
317; SSE2-NEXT:    pmaxsw %xmm2, %xmm1
318; SSE2-NEXT:    retq
319;
320; SSSE3-LABEL: test_abs_gt_v16i16:
321; SSSE3:       # %bb.0:
322; SSSE3-NEXT:    pabsw %xmm0, %xmm0
323; SSSE3-NEXT:    pabsw %xmm1, %xmm1
324; SSSE3-NEXT:    retq
325;
326; SSE41-LABEL: test_abs_gt_v16i16:
327; SSE41:       # %bb.0:
328; SSE41-NEXT:    pabsw %xmm0, %xmm0
329; SSE41-NEXT:    pabsw %xmm1, %xmm1
330; SSE41-NEXT:    retq
331;
332; AVX1-LABEL: test_abs_gt_v16i16:
333; AVX1:       # %bb.0:
334; AVX1-NEXT:    vpabsw %xmm0, %xmm1
335; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
336; AVX1-NEXT:    vpabsw %xmm0, %xmm0
337; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
338; AVX1-NEXT:    retq
339;
340; AVX2-LABEL: test_abs_gt_v16i16:
341; AVX2:       # %bb.0:
342; AVX2-NEXT:    vpabsw %ymm0, %ymm0
343; AVX2-NEXT:    retq
344;
345; AVX512F-LABEL: test_abs_gt_v16i16:
346; AVX512F:       # %bb.0:
347; AVX512F-NEXT:    vpabsw %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
348; AVX512F-NEXT:    retq # encoding: [0xc3]
349;
350; AVX512BW-LABEL: test_abs_gt_v16i16:
351; AVX512BW:       # %bb.0:
352; AVX512BW-NEXT:    vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
353; AVX512BW-NEXT:    retq # encoding: [0xc3]
354  %tmp1neg = sub <16 x i16> zeroinitializer, %a
355  %b = icmp sgt <16 x i16> %a, zeroinitializer
356  %abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg
357  ret <16 x i16> %abs
358}
359
360define <32 x i8> @test_abs_lt_v32i8(<32 x i8> %a) nounwind {
361; SSE2-LABEL: test_abs_lt_v32i8:
362; SSE2:       # %bb.0:
363; SSE2-NEXT:    pxor %xmm2, %xmm2
364; SSE2-NEXT:    pxor %xmm3, %xmm3
365; SSE2-NEXT:    psubb %xmm0, %xmm3
366; SSE2-NEXT:    pminub %xmm3, %xmm0
367; SSE2-NEXT:    psubb %xmm1, %xmm2
368; SSE2-NEXT:    pminub %xmm2, %xmm1
369; SSE2-NEXT:    retq
370;
371; SSSE3-LABEL: test_abs_lt_v32i8:
372; SSSE3:       # %bb.0:
373; SSSE3-NEXT:    pabsb %xmm0, %xmm0
374; SSSE3-NEXT:    pabsb %xmm1, %xmm1
375; SSSE3-NEXT:    retq
376;
377; SSE41-LABEL: test_abs_lt_v32i8:
378; SSE41:       # %bb.0:
379; SSE41-NEXT:    pabsb %xmm0, %xmm0
380; SSE41-NEXT:    pabsb %xmm1, %xmm1
381; SSE41-NEXT:    retq
382;
383; AVX1-LABEL: test_abs_lt_v32i8:
384; AVX1:       # %bb.0:
385; AVX1-NEXT:    vpabsb %xmm0, %xmm1
386; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
387; AVX1-NEXT:    vpabsb %xmm0, %xmm0
388; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
389; AVX1-NEXT:    retq
390;
391; AVX2-LABEL: test_abs_lt_v32i8:
392; AVX2:       # %bb.0:
393; AVX2-NEXT:    vpabsb %ymm0, %ymm0
394; AVX2-NEXT:    retq
395;
396; AVX512F-LABEL: test_abs_lt_v32i8:
397; AVX512F:       # %bb.0:
398; AVX512F-NEXT:    vpabsb %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
399; AVX512F-NEXT:    retq # encoding: [0xc3]
400;
401; AVX512BW-LABEL: test_abs_lt_v32i8:
402; AVX512BW:       # %bb.0:
403; AVX512BW-NEXT:    vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
404; AVX512BW-NEXT:    retq # encoding: [0xc3]
405  %tmp1neg = sub <32 x i8> zeroinitializer, %a
406  %b = icmp slt <32 x i8> %a, zeroinitializer
407  %abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a
408  ret <32 x i8> %abs
409}
410
411define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind {
412; SSE2-LABEL: test_abs_le_v8i32:
413; SSE2:       # %bb.0:
414; SSE2-NEXT:    movdqa %xmm0, %xmm2
415; SSE2-NEXT:    psrad $31, %xmm2
416; SSE2-NEXT:    pxor %xmm2, %xmm0
417; SSE2-NEXT:    psubd %xmm2, %xmm0
418; SSE2-NEXT:    movdqa %xmm1, %xmm2
419; SSE2-NEXT:    psrad $31, %xmm2
420; SSE2-NEXT:    pxor %xmm2, %xmm1
421; SSE2-NEXT:    psubd %xmm2, %xmm1
422; SSE2-NEXT:    retq
423;
424; SSSE3-LABEL: test_abs_le_v8i32:
425; SSSE3:       # %bb.0:
426; SSSE3-NEXT:    pabsd %xmm0, %xmm0
427; SSSE3-NEXT:    pabsd %xmm1, %xmm1
428; SSSE3-NEXT:    retq
429;
430; SSE41-LABEL: test_abs_le_v8i32:
431; SSE41:       # %bb.0:
432; SSE41-NEXT:    pabsd %xmm0, %xmm0
433; SSE41-NEXT:    pabsd %xmm1, %xmm1
434; SSE41-NEXT:    retq
435;
436; AVX1-LABEL: test_abs_le_v8i32:
437; AVX1:       # %bb.0:
438; AVX1-NEXT:    vpabsd %xmm0, %xmm1
439; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
440; AVX1-NEXT:    vpabsd %xmm0, %xmm0
441; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
442; AVX1-NEXT:    retq
443;
444; AVX2-LABEL: test_abs_le_v8i32:
445; AVX2:       # %bb.0:
446; AVX2-NEXT:    vpabsd %ymm0, %ymm0
447; AVX2-NEXT:    retq
448;
449; AVX512-LABEL: test_abs_le_v8i32:
450; AVX512:       # %bb.0:
451; AVX512-NEXT:    vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0]
452; AVX512-NEXT:    retq # encoding: [0xc3]
453  %tmp1neg = sub <8 x i32> zeroinitializer, %a
454  %b = icmp sle <8 x i32> %a, zeroinitializer
455  %abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a
456  ret <8 x i32> %abs
457}
458
459define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind {
460; SSE2-LABEL: test_abs_le_16i32:
461; SSE2:       # %bb.0:
462; SSE2-NEXT:    movdqa %xmm0, %xmm4
463; SSE2-NEXT:    psrad $31, %xmm4
464; SSE2-NEXT:    pxor %xmm4, %xmm0
465; SSE2-NEXT:    psubd %xmm4, %xmm0
466; SSE2-NEXT:    movdqa %xmm1, %xmm4
467; SSE2-NEXT:    psrad $31, %xmm4
468; SSE2-NEXT:    pxor %xmm4, %xmm1
469; SSE2-NEXT:    psubd %xmm4, %xmm1
470; SSE2-NEXT:    movdqa %xmm2, %xmm4
471; SSE2-NEXT:    psrad $31, %xmm4
472; SSE2-NEXT:    pxor %xmm4, %xmm2
473; SSE2-NEXT:    psubd %xmm4, %xmm2
474; SSE2-NEXT:    movdqa %xmm3, %xmm4
475; SSE2-NEXT:    psrad $31, %xmm4
476; SSE2-NEXT:    pxor %xmm4, %xmm3
477; SSE2-NEXT:    psubd %xmm4, %xmm3
478; SSE2-NEXT:    retq
479;
480; SSSE3-LABEL: test_abs_le_16i32:
481; SSSE3:       # %bb.0:
482; SSSE3-NEXT:    pabsd %xmm0, %xmm0
483; SSSE3-NEXT:    pabsd %xmm1, %xmm1
484; SSSE3-NEXT:    pabsd %xmm2, %xmm2
485; SSSE3-NEXT:    pabsd %xmm3, %xmm3
486; SSSE3-NEXT:    retq
487;
488; SSE41-LABEL: test_abs_le_16i32:
489; SSE41:       # %bb.0:
490; SSE41-NEXT:    pabsd %xmm0, %xmm0
491; SSE41-NEXT:    pabsd %xmm1, %xmm1
492; SSE41-NEXT:    pabsd %xmm2, %xmm2
493; SSE41-NEXT:    pabsd %xmm3, %xmm3
494; SSE41-NEXT:    retq
495;
496; AVX1-LABEL: test_abs_le_16i32:
497; AVX1:       # %bb.0:
498; AVX1-NEXT:    vpabsd %xmm0, %xmm2
499; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
500; AVX1-NEXT:    vpabsd %xmm0, %xmm0
501; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
502; AVX1-NEXT:    vpabsd %xmm1, %xmm2
503; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
504; AVX1-NEXT:    vpabsd %xmm1, %xmm1
505; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
506; AVX1-NEXT:    retq
507;
508; AVX2-LABEL: test_abs_le_16i32:
509; AVX2:       # %bb.0:
510; AVX2-NEXT:    vpabsd %ymm0, %ymm0
511; AVX2-NEXT:    vpabsd %ymm1, %ymm1
512; AVX2-NEXT:    retq
513;
514; AVX512-LABEL: test_abs_le_16i32:
515; AVX512:       # %bb.0:
516; AVX512-NEXT:    vpabsd %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
517; AVX512-NEXT:    retq # encoding: [0xc3]
518  %tmp1neg = sub <16 x i32> zeroinitializer, %a
519  %b = icmp sle <16 x i32> %a, zeroinitializer
520  %abs = select <16 x i1> %b, <16 x i32> %tmp1neg, <16 x i32> %a
521  ret <16 x i32> %abs
522}
523
524define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
525; SSE2-LABEL: test_abs_ge_v2i64:
526; SSE2:       # %bb.0:
527; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
528; SSE2-NEXT:    psrad $31, %xmm1
529; SSE2-NEXT:    pxor %xmm1, %xmm0
530; SSE2-NEXT:    psubq %xmm1, %xmm0
531; SSE2-NEXT:    retq
532;
533; SSSE3-LABEL: test_abs_ge_v2i64:
534; SSSE3:       # %bb.0:
535; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
536; SSSE3-NEXT:    psrad $31, %xmm1
537; SSSE3-NEXT:    pxor %xmm1, %xmm0
538; SSSE3-NEXT:    psubq %xmm1, %xmm0
539; SSSE3-NEXT:    retq
540;
541; SSE41-LABEL: test_abs_ge_v2i64:
542; SSE41:       # %bb.0:
543; SSE41-NEXT:    pxor %xmm1, %xmm1
544; SSE41-NEXT:    psubq %xmm0, %xmm1
545; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm0
546; SSE41-NEXT:    retq
547;
548; AVX1-LABEL: test_abs_ge_v2i64:
549; AVX1:       # %bb.0:
550; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
551; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm1
552; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm0
553; AVX1-NEXT:    retq
554;
555; AVX2-LABEL: test_abs_ge_v2i64:
556; AVX2:       # %bb.0:
557; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
558; AVX2-NEXT:    vpsubq %xmm0, %xmm1, %xmm1
559; AVX2-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm0
560; AVX2-NEXT:    retq
561;
562; AVX512-LABEL: test_abs_ge_v2i64:
563; AVX512:       # %bb.0:
564; AVX512-NEXT:    vpabsq %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0]
565; AVX512-NEXT:    retq # encoding: [0xc3]
566  %tmp1neg = sub <2 x i64> zeroinitializer, %a
567  %b = icmp sge <2 x i64> %a, zeroinitializer
568  %abs = select <2 x i1> %b, <2 x i64> %a, <2 x i64> %tmp1neg
569  ret <2 x i64> %abs
570}
571
572define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
573; SSE2-LABEL: test_abs_gt_v4i64:
574; SSE2:       # %bb.0:
575; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
576; SSE2-NEXT:    psrad $31, %xmm2
577; SSE2-NEXT:    pxor %xmm2, %xmm0
578; SSE2-NEXT:    psubq %xmm2, %xmm0
579; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
580; SSE2-NEXT:    psrad $31, %xmm2
581; SSE2-NEXT:    pxor %xmm2, %xmm1
582; SSE2-NEXT:    psubq %xmm2, %xmm1
583; SSE2-NEXT:    retq
584;
585; SSSE3-LABEL: test_abs_gt_v4i64:
586; SSSE3:       # %bb.0:
587; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
588; SSSE3-NEXT:    psrad $31, %xmm2
589; SSSE3-NEXT:    pxor %xmm2, %xmm0
590; SSSE3-NEXT:    psubq %xmm2, %xmm0
591; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
592; SSSE3-NEXT:    psrad $31, %xmm2
593; SSSE3-NEXT:    pxor %xmm2, %xmm1
594; SSSE3-NEXT:    psubq %xmm2, %xmm1
595; SSSE3-NEXT:    retq
596;
597; SSE41-LABEL: test_abs_gt_v4i64:
598; SSE41:       # %bb.0:
599; SSE41-NEXT:    movdqa %xmm0, %xmm2
600; SSE41-NEXT:    pxor %xmm3, %xmm3
601; SSE41-NEXT:    pxor %xmm4, %xmm4
602; SSE41-NEXT:    psubq %xmm0, %xmm4
603; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
604; SSE41-NEXT:    psubq %xmm1, %xmm3
605; SSE41-NEXT:    movdqa %xmm1, %xmm0
606; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
607; SSE41-NEXT:    movapd %xmm2, %xmm0
608; SSE41-NEXT:    retq
609;
610; AVX1-LABEL: test_abs_gt_v4i64:
611; AVX1:       # %bb.0:
612; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
613; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
614; AVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
615; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm2
616; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
617; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
618; AVX1-NEXT:    retq
619;
620; AVX2-LABEL: test_abs_gt_v4i64:
621; AVX2:       # %bb.0:
622; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
623; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm1
624; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
625; AVX2-NEXT:    retq
626;
627; AVX512-LABEL: test_abs_gt_v4i64:
628; AVX512:       # %bb.0:
629; AVX512-NEXT:    vpabsq %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0]
630; AVX512-NEXT:    retq # encoding: [0xc3]
631  %tmp1neg = sub <4 x i64> zeroinitializer, %a
632  %b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
633  %abs = select <4 x i1> %b, <4 x i64> %a, <4 x i64> %tmp1neg
634  ret <4 x i64> %abs
635}
636
637define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind {
638; SSE2-LABEL: test_abs_le_v8i64:
639; SSE2:       # %bb.0:
640; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
641; SSE2-NEXT:    psrad $31, %xmm4
642; SSE2-NEXT:    pxor %xmm4, %xmm0
643; SSE2-NEXT:    psubq %xmm4, %xmm0
644; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
645; SSE2-NEXT:    psrad $31, %xmm4
646; SSE2-NEXT:    pxor %xmm4, %xmm1
647; SSE2-NEXT:    psubq %xmm4, %xmm1
648; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
649; SSE2-NEXT:    psrad $31, %xmm4
650; SSE2-NEXT:    pxor %xmm4, %xmm2
651; SSE2-NEXT:    psubq %xmm4, %xmm2
652; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
653; SSE2-NEXT:    psrad $31, %xmm4
654; SSE2-NEXT:    pxor %xmm4, %xmm3
655; SSE2-NEXT:    psubq %xmm4, %xmm3
656; SSE2-NEXT:    retq
657;
658; SSSE3-LABEL: test_abs_le_v8i64:
659; SSSE3:       # %bb.0:
660; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
661; SSSE3-NEXT:    psrad $31, %xmm4
662; SSSE3-NEXT:    pxor %xmm4, %xmm0
663; SSSE3-NEXT:    psubq %xmm4, %xmm0
664; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
665; SSSE3-NEXT:    psrad $31, %xmm4
666; SSSE3-NEXT:    pxor %xmm4, %xmm1
667; SSSE3-NEXT:    psubq %xmm4, %xmm1
668; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
669; SSSE3-NEXT:    psrad $31, %xmm4
670; SSSE3-NEXT:    pxor %xmm4, %xmm2
671; SSSE3-NEXT:    psubq %xmm4, %xmm2
672; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
673; SSSE3-NEXT:    psrad $31, %xmm4
674; SSSE3-NEXT:    pxor %xmm4, %xmm3
675; SSSE3-NEXT:    psubq %xmm4, %xmm3
676; SSSE3-NEXT:    retq
677;
678; SSE41-LABEL: test_abs_le_v8i64:
679; SSE41:       # %bb.0:
680; SSE41-NEXT:    movdqa %xmm0, %xmm4
681; SSE41-NEXT:    pxor %xmm5, %xmm5
682; SSE41-NEXT:    pxor %xmm6, %xmm6
683; SSE41-NEXT:    psubq %xmm0, %xmm6
684; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm4
685; SSE41-NEXT:    pxor %xmm6, %xmm6
686; SSE41-NEXT:    psubq %xmm1, %xmm6
687; SSE41-NEXT:    movdqa %xmm1, %xmm0
688; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm1
689; SSE41-NEXT:    pxor %xmm6, %xmm6
690; SSE41-NEXT:    psubq %xmm2, %xmm6
691; SSE41-NEXT:    movdqa %xmm2, %xmm0
692; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm2
693; SSE41-NEXT:    psubq %xmm3, %xmm5
694; SSE41-NEXT:    movdqa %xmm3, %xmm0
695; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm3
696; SSE41-NEXT:    movapd %xmm4, %xmm0
697; SSE41-NEXT:    retq
698;
699; AVX1-LABEL: test_abs_le_v8i64:
700; AVX1:       # %bb.0:
701; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
702; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
703; AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
704; AVX1-NEXT:    vpsubq %xmm0, %xmm3, %xmm4
705; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
706; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
707; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
708; AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
709; AVX1-NEXT:    vpsubq %xmm1, %xmm3, %xmm3
710; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
711; AVX1-NEXT:    vblendvpd %ymm1, %ymm2, %ymm1, %ymm1
712; AVX1-NEXT:    retq
713;
714; AVX2-LABEL: test_abs_le_v8i64:
715; AVX2:       # %bb.0:
716; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
717; AVX2-NEXT:    vpsubq %ymm0, %ymm2, %ymm3
718; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm0, %ymm0
719; AVX2-NEXT:    vpsubq %ymm1, %ymm2, %ymm2
720; AVX2-NEXT:    vblendvpd %ymm1, %ymm2, %ymm1, %ymm1
721; AVX2-NEXT:    retq
722;
723; AVX512-LABEL: test_abs_le_v8i64:
724; AVX512:       # %bb.0:
725; AVX512-NEXT:    vpabsq %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
726; AVX512-NEXT:    retq # encoding: [0xc3]
727  %tmp1neg = sub <8 x i64> zeroinitializer, %a
728  %b = icmp sle <8 x i64> %a, zeroinitializer
729  %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a
730  ret <8 x i64> %abs
731}
732
733define <8 x i64> @test_abs_le_v8i64_fold(ptr %a.ptr) nounwind {
734; SSE2-LABEL: test_abs_le_v8i64_fold:
735; SSE2:       # %bb.0:
736; SSE2-NEXT:    movdqu (%rdi), %xmm0
737; SSE2-NEXT:    movdqu 16(%rdi), %xmm1
738; SSE2-NEXT:    movdqu 32(%rdi), %xmm2
739; SSE2-NEXT:    movdqu 48(%rdi), %xmm3
740; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
741; SSE2-NEXT:    psrad $31, %xmm4
742; SSE2-NEXT:    pxor %xmm4, %xmm0
743; SSE2-NEXT:    psubq %xmm4, %xmm0
744; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
745; SSE2-NEXT:    psrad $31, %xmm4
746; SSE2-NEXT:    pxor %xmm4, %xmm1
747; SSE2-NEXT:    psubq %xmm4, %xmm1
748; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
749; SSE2-NEXT:    psrad $31, %xmm4
750; SSE2-NEXT:    pxor %xmm4, %xmm2
751; SSE2-NEXT:    psubq %xmm4, %xmm2
752; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
753; SSE2-NEXT:    psrad $31, %xmm4
754; SSE2-NEXT:    pxor %xmm4, %xmm3
755; SSE2-NEXT:    psubq %xmm4, %xmm3
756; SSE2-NEXT:    retq
757;
758; SSSE3-LABEL: test_abs_le_v8i64_fold:
759; SSSE3:       # %bb.0:
760; SSSE3-NEXT:    movdqu (%rdi), %xmm0
761; SSSE3-NEXT:    movdqu 16(%rdi), %xmm1
762; SSSE3-NEXT:    movdqu 32(%rdi), %xmm2
763; SSSE3-NEXT:    movdqu 48(%rdi), %xmm3
764; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
765; SSSE3-NEXT:    psrad $31, %xmm4
766; SSSE3-NEXT:    pxor %xmm4, %xmm0
767; SSSE3-NEXT:    psubq %xmm4, %xmm0
768; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
769; SSSE3-NEXT:    psrad $31, %xmm4
770; SSSE3-NEXT:    pxor %xmm4, %xmm1
771; SSSE3-NEXT:    psubq %xmm4, %xmm1
772; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
773; SSSE3-NEXT:    psrad $31, %xmm4
774; SSSE3-NEXT:    pxor %xmm4, %xmm2
775; SSSE3-NEXT:    psubq %xmm4, %xmm2
776; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
777; SSSE3-NEXT:    psrad $31, %xmm4
778; SSSE3-NEXT:    pxor %xmm4, %xmm3
779; SSSE3-NEXT:    psubq %xmm4, %xmm3
780; SSSE3-NEXT:    retq
781;
782; SSE41-LABEL: test_abs_le_v8i64_fold:
783; SSE41:       # %bb.0:
784; SSE41-NEXT:    movdqu (%rdi), %xmm1
785; SSE41-NEXT:    movdqu 16(%rdi), %xmm2
786; SSE41-NEXT:    movdqu 32(%rdi), %xmm3
787; SSE41-NEXT:    movdqu 48(%rdi), %xmm4
788; SSE41-NEXT:    pxor %xmm5, %xmm5
789; SSE41-NEXT:    pxor %xmm6, %xmm6
790; SSE41-NEXT:    psubq %xmm1, %xmm6
791; SSE41-NEXT:    movdqa %xmm1, %xmm0
792; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm1
793; SSE41-NEXT:    pxor %xmm6, %xmm6
794; SSE41-NEXT:    psubq %xmm2, %xmm6
795; SSE41-NEXT:    movdqa %xmm2, %xmm0
796; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm2
797; SSE41-NEXT:    pxor %xmm6, %xmm6
798; SSE41-NEXT:    psubq %xmm3, %xmm6
799; SSE41-NEXT:    movdqa %xmm3, %xmm0
800; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm3
801; SSE41-NEXT:    psubq %xmm4, %xmm5
802; SSE41-NEXT:    movdqa %xmm4, %xmm0
803; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm4
804; SSE41-NEXT:    movapd %xmm1, %xmm0
805; SSE41-NEXT:    movapd %xmm2, %xmm1
806; SSE41-NEXT:    movapd %xmm3, %xmm2
807; SSE41-NEXT:    movapd %xmm4, %xmm3
808; SSE41-NEXT:    retq
809;
810; AVX1-LABEL: test_abs_le_v8i64_fold:
811; AVX1:       # %bb.0:
812; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
813; AVX1-NEXT:    vmovdqu 32(%rdi), %ymm1
814; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
815; AVX1-NEXT:    vpsubq 16(%rdi), %xmm2, %xmm3
816; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm4
817; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
818; AVX1-NEXT:    vblendvpd %ymm0, %ymm3, %ymm0, %ymm0
819; AVX1-NEXT:    vpsubq 48(%rdi), %xmm2, %xmm3
820; AVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
821; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
822; AVX1-NEXT:    vblendvpd %ymm1, %ymm2, %ymm1, %ymm1
823; AVX1-NEXT:    retq
824;
825; AVX2-LABEL: test_abs_le_v8i64_fold:
826; AVX2:       # %bb.0:
827; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
828; AVX2-NEXT:    vmovdqu 32(%rdi), %ymm1
829; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
830; AVX2-NEXT:    vpsubq %ymm0, %ymm2, %ymm3
831; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm0, %ymm0
832; AVX2-NEXT:    vpsubq %ymm1, %ymm2, %ymm2
833; AVX2-NEXT:    vblendvpd %ymm1, %ymm2, %ymm1, %ymm1
834; AVX2-NEXT:    retq
835;
836; AVX512-LABEL: test_abs_le_v8i64_fold:
837; AVX512:       # %bb.0:
838; AVX512-NEXT:    vpabsq (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x07]
839; AVX512-NEXT:    retq # encoding: [0xc3]
840  %a = load <8 x i64>, ptr %a.ptr, align 8
841  %tmp1neg = sub <8 x i64> zeroinitializer, %a
842  %b = icmp sle <8 x i64> %a, zeroinitializer
843  %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a
844  ret <8 x i64> %abs
845}
846
847define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind {
848; SSE2-LABEL: test_abs_lt_v64i8:
849; SSE2:       # %bb.0:
850; SSE2-NEXT:    pxor %xmm4, %xmm4
851; SSE2-NEXT:    pxor %xmm5, %xmm5
852; SSE2-NEXT:    psubb %xmm0, %xmm5
853; SSE2-NEXT:    pminub %xmm5, %xmm0
854; SSE2-NEXT:    pxor %xmm5, %xmm5
855; SSE2-NEXT:    psubb %xmm1, %xmm5
856; SSE2-NEXT:    pminub %xmm5, %xmm1
857; SSE2-NEXT:    pxor %xmm5, %xmm5
858; SSE2-NEXT:    psubb %xmm2, %xmm5
859; SSE2-NEXT:    pminub %xmm5, %xmm2
860; SSE2-NEXT:    psubb %xmm3, %xmm4
861; SSE2-NEXT:    pminub %xmm4, %xmm3
862; SSE2-NEXT:    retq
863;
864; SSSE3-LABEL: test_abs_lt_v64i8:
865; SSSE3:       # %bb.0:
866; SSSE3-NEXT:    pabsb %xmm0, %xmm0
867; SSSE3-NEXT:    pabsb %xmm1, %xmm1
868; SSSE3-NEXT:    pabsb %xmm2, %xmm2
869; SSSE3-NEXT:    pabsb %xmm3, %xmm3
870; SSSE3-NEXT:    retq
871;
872; SSE41-LABEL: test_abs_lt_v64i8:
873; SSE41:       # %bb.0:
874; SSE41-NEXT:    pabsb %xmm0, %xmm0
875; SSE41-NEXT:    pabsb %xmm1, %xmm1
876; SSE41-NEXT:    pabsb %xmm2, %xmm2
877; SSE41-NEXT:    pabsb %xmm3, %xmm3
878; SSE41-NEXT:    retq
879;
880; AVX1-LABEL: test_abs_lt_v64i8:
881; AVX1:       # %bb.0:
882; AVX1-NEXT:    vpabsb %xmm0, %xmm2
883; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
884; AVX1-NEXT:    vpabsb %xmm0, %xmm0
885; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
886; AVX1-NEXT:    vpabsb %xmm1, %xmm2
887; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
888; AVX1-NEXT:    vpabsb %xmm1, %xmm1
889; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
890; AVX1-NEXT:    retq
891;
892; AVX2-LABEL: test_abs_lt_v64i8:
893; AVX2:       # %bb.0:
894; AVX2-NEXT:    vpabsb %ymm0, %ymm0
895; AVX2-NEXT:    vpabsb %ymm1, %ymm1
896; AVX2-NEXT:    retq
897;
898; AVX512F-LABEL: test_abs_lt_v64i8:
899; AVX512F:       # %bb.0:
900; AVX512F-NEXT:    vpabsb %ymm0, %ymm1 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc8]
901; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01]
902; AVX512F-NEXT:    vpabsb %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
903; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x3a,0xc0,0x01]
904; AVX512F-NEXT:    retq # encoding: [0xc3]
905;
906; AVX512BW-LABEL: test_abs_lt_v64i8:
907; AVX512BW:       # %bb.0:
908; AVX512BW-NEXT:    vpabsb %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xc0]
909; AVX512BW-NEXT:    retq # encoding: [0xc3]
910  %tmp1neg = sub <64 x i8> zeroinitializer, %a
911  %b = icmp slt <64 x i8> %a, zeroinitializer
912  %abs = select <64 x i1> %b, <64 x i8> %tmp1neg, <64 x i8> %a
913  ret <64 x i8> %abs
914}
915
916define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind {
917; SSE2-LABEL: test_abs_gt_v32i16:
918; SSE2:       # %bb.0:
919; SSE2-NEXT:    pxor %xmm4, %xmm4
920; SSE2-NEXT:    pxor %xmm5, %xmm5
921; SSE2-NEXT:    psubw %xmm0, %xmm5
922; SSE2-NEXT:    pmaxsw %xmm5, %xmm0
923; SSE2-NEXT:    pxor %xmm5, %xmm5
924; SSE2-NEXT:    psubw %xmm1, %xmm5
925; SSE2-NEXT:    pmaxsw %xmm5, %xmm1
926; SSE2-NEXT:    pxor %xmm5, %xmm5
927; SSE2-NEXT:    psubw %xmm2, %xmm5
928; SSE2-NEXT:    pmaxsw %xmm5, %xmm2
929; SSE2-NEXT:    psubw %xmm3, %xmm4
930; SSE2-NEXT:    pmaxsw %xmm4, %xmm3
931; SSE2-NEXT:    retq
932;
933; SSSE3-LABEL: test_abs_gt_v32i16:
934; SSSE3:       # %bb.0:
935; SSSE3-NEXT:    pabsw %xmm0, %xmm0
936; SSSE3-NEXT:    pabsw %xmm1, %xmm1
937; SSSE3-NEXT:    pabsw %xmm2, %xmm2
938; SSSE3-NEXT:    pabsw %xmm3, %xmm3
939; SSSE3-NEXT:    retq
940;
941; SSE41-LABEL: test_abs_gt_v32i16:
942; SSE41:       # %bb.0:
943; SSE41-NEXT:    pabsw %xmm0, %xmm0
944; SSE41-NEXT:    pabsw %xmm1, %xmm1
945; SSE41-NEXT:    pabsw %xmm2, %xmm2
946; SSE41-NEXT:    pabsw %xmm3, %xmm3
947; SSE41-NEXT:    retq
948;
949; AVX1-LABEL: test_abs_gt_v32i16:
950; AVX1:       # %bb.0:
951; AVX1-NEXT:    vpabsw %xmm0, %xmm2
952; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
953; AVX1-NEXT:    vpabsw %xmm0, %xmm0
954; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
955; AVX1-NEXT:    vpabsw %xmm1, %xmm2
956; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
957; AVX1-NEXT:    vpabsw %xmm1, %xmm1
958; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
959; AVX1-NEXT:    retq
960;
961; AVX2-LABEL: test_abs_gt_v32i16:
962; AVX2:       # %bb.0:
963; AVX2-NEXT:    vpabsw %ymm0, %ymm0
964; AVX2-NEXT:    vpabsw %ymm1, %ymm1
965; AVX2-NEXT:    retq
966;
967; AVX512F-LABEL: test_abs_gt_v32i16:
968; AVX512F:       # %bb.0:
969; AVX512F-NEXT:    vpabsw %ymm0, %ymm1 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc8]
970; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01]
971; AVX512F-NEXT:    vpabsw %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
972; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x3a,0xc0,0x01]
973; AVX512F-NEXT:    retq # encoding: [0xc3]
974;
975; AVX512BW-LABEL: test_abs_gt_v32i16:
976; AVX512BW:       # %bb.0:
977; AVX512BW-NEXT:    vpabsw %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xc0]
978; AVX512BW-NEXT:    retq # encoding: [0xc3]
979  %tmp1neg = sub <32 x i16> zeroinitializer, %a
980  %b = icmp sgt <32 x i16> %a, zeroinitializer
981  %abs = select <32 x i1> %b, <32 x i16> %a, <32 x i16> %tmp1neg
982  ret <32 x i16> %abs
983}
984