xref: /llvm-project/llvm/test/CodeGen/X86/abdu-vector-128.ll (revision aefd2572a504d675ef623d2f3d61364232b19f26)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux                  | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.2   | FileCheck %s --check-prefixes=SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx      | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2     | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
7
8;
9; trunc(abs(sub(zext(a),zext(b)))) -> abdu(a,b)
10;
11
12define <16 x i8> @abd_ext_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
13; SSE-LABEL: abd_ext_v16i8:
14; SSE:       # %bb.0:
15; SSE-NEXT:    movdqa %xmm0, %xmm2
16; SSE-NEXT:    pminub %xmm1, %xmm2
17; SSE-NEXT:    pmaxub %xmm1, %xmm0
18; SSE-NEXT:    psubb %xmm2, %xmm0
19; SSE-NEXT:    retq
20;
21; AVX-LABEL: abd_ext_v16i8:
22; AVX:       # %bb.0:
23; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm2
24; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
25; AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
26; AVX-NEXT:    retq
27  %aext = zext <16 x i8> %a to <16 x i64>
28  %bext = zext <16 x i8> %b to <16 x i64>
29  %sub = sub <16 x i64> %aext, %bext
30  %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 false)
31  %trunc = trunc <16 x i64> %abs to <16 x i8>
32  ret <16 x i8> %trunc
33}
34
35define <16 x i8> @abd_ext_v16i8_undef(<16 x i8> %a, <16 x i8> %b) nounwind {
36; SSE-LABEL: abd_ext_v16i8_undef:
37; SSE:       # %bb.0:
38; SSE-NEXT:    movdqa %xmm0, %xmm2
39; SSE-NEXT:    pminub %xmm1, %xmm2
40; SSE-NEXT:    pmaxub %xmm1, %xmm0
41; SSE-NEXT:    psubb %xmm2, %xmm0
42; SSE-NEXT:    retq
43;
44; AVX-LABEL: abd_ext_v16i8_undef:
45; AVX:       # %bb.0:
46; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm2
47; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
48; AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
49; AVX-NEXT:    retq
50  %aext = zext <16 x i8> %a to <16 x i64>
51  %bext = zext <16 x i8> %b to <16 x i64>
52  %sub = sub <16 x i64> %aext, %bext
53  %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 true)
54  %trunc = trunc <16 x i64> %abs to <16 x i8>
55  ret <16 x i8> %trunc
56}
57
58define <8 x i16> @abd_ext_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
59; SSE2-LABEL: abd_ext_v8i16:
60; SSE2:       # %bb.0:
61; SSE2-NEXT:    movdqa %xmm1, %xmm2
62; SSE2-NEXT:    psubusw %xmm0, %xmm2
63; SSE2-NEXT:    psubusw %xmm1, %xmm0
64; SSE2-NEXT:    por %xmm2, %xmm0
65; SSE2-NEXT:    retq
66;
67; SSE42-LABEL: abd_ext_v8i16:
68; SSE42:       # %bb.0:
69; SSE42-NEXT:    movdqa %xmm0, %xmm2
70; SSE42-NEXT:    pminuw %xmm1, %xmm2
71; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
72; SSE42-NEXT:    psubw %xmm2, %xmm0
73; SSE42-NEXT:    retq
74;
75; AVX-LABEL: abd_ext_v8i16:
76; AVX:       # %bb.0:
77; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm2
78; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
79; AVX-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
80; AVX-NEXT:    retq
81  %aext = zext <8 x i16> %a to <8 x i64>
82  %bext = zext <8 x i16> %b to <8 x i64>
83  %sub = sub <8 x i64> %aext, %bext
84  %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 false)
85  %trunc = trunc <8 x i64> %abs to <8 x i16>
86  ret <8 x i16> %trunc
87}
88
89define <8 x i16> @abd_ext_v8i16_undef(<8 x i16> %a, <8 x i16> %b) nounwind {
90; SSE2-LABEL: abd_ext_v8i16_undef:
91; SSE2:       # %bb.0:
92; SSE2-NEXT:    movdqa %xmm1, %xmm2
93; SSE2-NEXT:    psubusw %xmm0, %xmm2
94; SSE2-NEXT:    psubusw %xmm1, %xmm0
95; SSE2-NEXT:    por %xmm2, %xmm0
96; SSE2-NEXT:    retq
97;
98; SSE42-LABEL: abd_ext_v8i16_undef:
99; SSE42:       # %bb.0:
100; SSE42-NEXT:    movdqa %xmm0, %xmm2
101; SSE42-NEXT:    pminuw %xmm1, %xmm2
102; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
103; SSE42-NEXT:    psubw %xmm2, %xmm0
104; SSE42-NEXT:    retq
105;
106; AVX-LABEL: abd_ext_v8i16_undef:
107; AVX:       # %bb.0:
108; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm2
109; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
110; AVX-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
111; AVX-NEXT:    retq
112  %aext = zext <8 x i16> %a to <8 x i64>
113  %bext = zext <8 x i16> %b to <8 x i64>
114  %sub = sub <8 x i64> %aext, %bext
115  %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 true)
116  %trunc = trunc <8 x i64> %abs to <8 x i16>
117  ret <8 x i16> %trunc
118}
119
120define <4 x i32> @abd_ext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
121; SSE2-LABEL: abd_ext_v4i32:
122; SSE2:       # %bb.0:
123; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
124; SSE2-NEXT:    movdqa %xmm1, %xmm3
125; SSE2-NEXT:    pxor %xmm2, %xmm3
126; SSE2-NEXT:    pxor %xmm0, %xmm2
127; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
128; SSE2-NEXT:    psubd %xmm1, %xmm0
129; SSE2-NEXT:    pxor %xmm2, %xmm0
130; SSE2-NEXT:    psubd %xmm0, %xmm2
131; SSE2-NEXT:    movdqa %xmm2, %xmm0
132; SSE2-NEXT:    retq
133;
134; SSE42-LABEL: abd_ext_v4i32:
135; SSE42:       # %bb.0:
136; SSE42-NEXT:    movdqa %xmm0, %xmm2
137; SSE42-NEXT:    pminud %xmm1, %xmm2
138; SSE42-NEXT:    pmaxud %xmm1, %xmm0
139; SSE42-NEXT:    psubd %xmm2, %xmm0
140; SSE42-NEXT:    retq
141;
142; AVX-LABEL: abd_ext_v4i32:
143; AVX:       # %bb.0:
144; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm2
145; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
146; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
147; AVX-NEXT:    retq
148  %aext = zext <4 x i32> %a to <4 x i64>
149  %bext = zext <4 x i32> %b to <4 x i64>
150  %sub = sub <4 x i64> %aext, %bext
151  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 false)
152  %trunc = trunc <4 x i64> %abs to <4 x i32>
153  ret <4 x i32> %trunc
154}
155
156define <4 x i32> @abd_ext_v4i32_undef(<4 x i32> %a, <4 x i32> %b) nounwind {
157; SSE2-LABEL: abd_ext_v4i32_undef:
158; SSE2:       # %bb.0:
159; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
160; SSE2-NEXT:    movdqa %xmm1, %xmm3
161; SSE2-NEXT:    pxor %xmm2, %xmm3
162; SSE2-NEXT:    pxor %xmm0, %xmm2
163; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
164; SSE2-NEXT:    psubd %xmm1, %xmm0
165; SSE2-NEXT:    pxor %xmm2, %xmm0
166; SSE2-NEXT:    psubd %xmm0, %xmm2
167; SSE2-NEXT:    movdqa %xmm2, %xmm0
168; SSE2-NEXT:    retq
169;
170; SSE42-LABEL: abd_ext_v4i32_undef:
171; SSE42:       # %bb.0:
172; SSE42-NEXT:    movdqa %xmm0, %xmm2
173; SSE42-NEXT:    pminud %xmm1, %xmm2
174; SSE42-NEXT:    pmaxud %xmm1, %xmm0
175; SSE42-NEXT:    psubd %xmm2, %xmm0
176; SSE42-NEXT:    retq
177;
178; AVX-LABEL: abd_ext_v4i32_undef:
179; AVX:       # %bb.0:
180; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm2
181; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
182; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
183; AVX-NEXT:    retq
184  %aext = zext <4 x i32> %a to <4 x i64>
185  %bext = zext <4 x i32> %b to <4 x i64>
186  %sub = sub <4 x i64> %aext, %bext
187  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
188  %trunc = trunc <4 x i64> %abs to <4 x i32>
189  ret <4 x i32> %trunc
190}
191
192define <2 x i64> @abd_ext_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
193; SSE2-LABEL: abd_ext_v2i64:
194; SSE2:       # %bb.0:
195; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
196; SSE2-NEXT:    movdqa %xmm1, %xmm3
197; SSE2-NEXT:    pxor %xmm2, %xmm3
198; SSE2-NEXT:    pxor %xmm0, %xmm2
199; SSE2-NEXT:    movdqa %xmm2, %xmm4
200; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
201; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
202; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
203; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
204; SSE2-NEXT:    pand %xmm5, %xmm3
205; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
206; SSE2-NEXT:    por %xmm3, %xmm2
207; SSE2-NEXT:    psubq %xmm1, %xmm0
208; SSE2-NEXT:    pxor %xmm2, %xmm0
209; SSE2-NEXT:    psubq %xmm0, %xmm2
210; SSE2-NEXT:    movdqa %xmm2, %xmm0
211; SSE2-NEXT:    retq
212;
213; SSE42-LABEL: abd_ext_v2i64:
214; SSE42:       # %bb.0:
215; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
216; SSE42-NEXT:    movdqa %xmm1, %xmm3
217; SSE42-NEXT:    pxor %xmm2, %xmm3
218; SSE42-NEXT:    pxor %xmm0, %xmm2
219; SSE42-NEXT:    pcmpgtq %xmm3, %xmm2
220; SSE42-NEXT:    psubq %xmm1, %xmm0
221; SSE42-NEXT:    pxor %xmm2, %xmm0
222; SSE42-NEXT:    psubq %xmm0, %xmm2
223; SSE42-NEXT:    movdqa %xmm2, %xmm0
224; SSE42-NEXT:    retq
225;
226; AVX1-LABEL: abd_ext_v2i64:
227; AVX1:       # %bb.0:
228; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
229; AVX1-NEXT:    # xmm2 = mem[0,0]
230; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
231; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
232; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
233; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
234; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
235; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
236; AVX1-NEXT:    retq
237;
238; AVX2-LABEL: abd_ext_v2i64:
239; AVX2:       # %bb.0:
240; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
241; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
242; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
243; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
244; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
245; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
246; AVX2-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
247; AVX2-NEXT:    retq
248;
249; AVX512-LABEL: abd_ext_v2i64:
250; AVX512:       # %bb.0:
251; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm2
252; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
253; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
254; AVX512-NEXT:    retq
255  %aext = zext <2 x i64> %a to <2 x i128>
256  %bext = zext <2 x i64> %b to <2 x i128>
257  %sub = sub <2 x i128> %aext, %bext
258  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 false)
259  %trunc = trunc <2 x i128> %abs to <2 x i64>
260  ret <2 x i64> %trunc
261}
262
263define <2 x i64> @abd_ext_v2i64_undef(<2 x i64> %a, <2 x i64> %b) nounwind {
264; SSE2-LABEL: abd_ext_v2i64_undef:
265; SSE2:       # %bb.0:
266; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
267; SSE2-NEXT:    movdqa %xmm1, %xmm3
268; SSE2-NEXT:    pxor %xmm2, %xmm3
269; SSE2-NEXT:    pxor %xmm0, %xmm2
270; SSE2-NEXT:    movdqa %xmm2, %xmm4
271; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
272; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
273; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
274; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
275; SSE2-NEXT:    pand %xmm5, %xmm3
276; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
277; SSE2-NEXT:    por %xmm3, %xmm2
278; SSE2-NEXT:    psubq %xmm1, %xmm0
279; SSE2-NEXT:    pxor %xmm2, %xmm0
280; SSE2-NEXT:    psubq %xmm0, %xmm2
281; SSE2-NEXT:    movdqa %xmm2, %xmm0
282; SSE2-NEXT:    retq
283;
284; SSE42-LABEL: abd_ext_v2i64_undef:
285; SSE42:       # %bb.0:
286; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
287; SSE42-NEXT:    movdqa %xmm1, %xmm3
288; SSE42-NEXT:    pxor %xmm2, %xmm3
289; SSE42-NEXT:    pxor %xmm0, %xmm2
290; SSE42-NEXT:    pcmpgtq %xmm3, %xmm2
291; SSE42-NEXT:    psubq %xmm1, %xmm0
292; SSE42-NEXT:    pxor %xmm2, %xmm0
293; SSE42-NEXT:    psubq %xmm0, %xmm2
294; SSE42-NEXT:    movdqa %xmm2, %xmm0
295; SSE42-NEXT:    retq
296;
297; AVX1-LABEL: abd_ext_v2i64_undef:
298; AVX1:       # %bb.0:
299; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
300; AVX1-NEXT:    # xmm2 = mem[0,0]
301; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
302; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
303; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
304; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
305; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
306; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
307; AVX1-NEXT:    retq
308;
309; AVX2-LABEL: abd_ext_v2i64_undef:
310; AVX2:       # %bb.0:
311; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
312; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
313; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
314; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
315; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
316; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
317; AVX2-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
318; AVX2-NEXT:    retq
319;
320; AVX512-LABEL: abd_ext_v2i64_undef:
321; AVX512:       # %bb.0:
322; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm2
323; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
324; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
325; AVX512-NEXT:    retq
326  %aext = zext <2 x i64> %a to <2 x i128>
327  %bext = zext <2 x i64> %b to <2 x i128>
328  %sub = sub <2 x i128> %aext, %bext
329  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
330  %trunc = trunc <2 x i128> %abs to <2 x i64>
331  ret <2 x i64> %trunc
332}
333
334;
335; sub(umax(a,b),umin(a,b)) -> abdu(a,b)
336;
337
338define <16 x i8> @abd_minmax_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
339; SSE-LABEL: abd_minmax_v16i8:
340; SSE:       # %bb.0:
341; SSE-NEXT:    movdqa %xmm0, %xmm2
342; SSE-NEXT:    pminub %xmm1, %xmm2
343; SSE-NEXT:    pmaxub %xmm1, %xmm0
344; SSE-NEXT:    psubb %xmm2, %xmm0
345; SSE-NEXT:    retq
346;
347; AVX-LABEL: abd_minmax_v16i8:
348; AVX:       # %bb.0:
349; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm2
350; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
351; AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
352; AVX-NEXT:    retq
353  %min = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
354  %max = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
355  %sub = sub <16 x i8> %max, %min
356  ret <16 x i8> %sub
357}
358
359define <8 x i16> @abd_minmax_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
360; SSE2-LABEL: abd_minmax_v8i16:
361; SSE2:       # %bb.0:
362; SSE2-NEXT:    movdqa %xmm1, %xmm2
363; SSE2-NEXT:    psubusw %xmm0, %xmm2
364; SSE2-NEXT:    psubusw %xmm1, %xmm0
365; SSE2-NEXT:    por %xmm2, %xmm0
366; SSE2-NEXT:    retq
367;
368; SSE42-LABEL: abd_minmax_v8i16:
369; SSE42:       # %bb.0:
370; SSE42-NEXT:    movdqa %xmm0, %xmm2
371; SSE42-NEXT:    pminuw %xmm1, %xmm2
372; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
373; SSE42-NEXT:    psubw %xmm2, %xmm0
374; SSE42-NEXT:    retq
375;
376; AVX-LABEL: abd_minmax_v8i16:
377; AVX:       # %bb.0:
378; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm2
379; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
380; AVX-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
381; AVX-NEXT:    retq
382  %min = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
383  %max = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
384  %sub = sub <8 x i16> %max, %min
385  ret <8 x i16> %sub
386}
387
388define <4 x i32> @abd_minmax_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
389; SSE2-LABEL: abd_minmax_v4i32:
390; SSE2:       # %bb.0:
391; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
392; SSE2-NEXT:    movdqa %xmm1, %xmm3
393; SSE2-NEXT:    pxor %xmm2, %xmm3
394; SSE2-NEXT:    pxor %xmm0, %xmm2
395; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
396; SSE2-NEXT:    psubd %xmm1, %xmm0
397; SSE2-NEXT:    pxor %xmm2, %xmm0
398; SSE2-NEXT:    psubd %xmm0, %xmm2
399; SSE2-NEXT:    movdqa %xmm2, %xmm0
400; SSE2-NEXT:    retq
401;
402; SSE42-LABEL: abd_minmax_v4i32:
403; SSE42:       # %bb.0:
404; SSE42-NEXT:    movdqa %xmm0, %xmm2
405; SSE42-NEXT:    pminud %xmm1, %xmm2
406; SSE42-NEXT:    pmaxud %xmm1, %xmm0
407; SSE42-NEXT:    psubd %xmm2, %xmm0
408; SSE42-NEXT:    retq
409;
410; AVX-LABEL: abd_minmax_v4i32:
411; AVX:       # %bb.0:
412; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm2
413; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
414; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
415; AVX-NEXT:    retq
416  %min = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
417  %max = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
418  %sub = sub <4 x i32> %max, %min
419  ret <4 x i32> %sub
420}
421
422define <2 x i64> @abd_minmax_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
423; SSE2-LABEL: abd_minmax_v2i64:
424; SSE2:       # %bb.0:
425; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
426; SSE2-NEXT:    movdqa %xmm1, %xmm3
427; SSE2-NEXT:    pxor %xmm2, %xmm3
428; SSE2-NEXT:    pxor %xmm0, %xmm2
429; SSE2-NEXT:    movdqa %xmm2, %xmm4
430; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
431; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
432; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
433; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
434; SSE2-NEXT:    pand %xmm5, %xmm3
435; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
436; SSE2-NEXT:    por %xmm3, %xmm2
437; SSE2-NEXT:    psubq %xmm1, %xmm0
438; SSE2-NEXT:    pxor %xmm2, %xmm0
439; SSE2-NEXT:    psubq %xmm0, %xmm2
440; SSE2-NEXT:    movdqa %xmm2, %xmm0
441; SSE2-NEXT:    retq
442;
443; SSE42-LABEL: abd_minmax_v2i64:
444; SSE42:       # %bb.0:
445; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
446; SSE42-NEXT:    movdqa %xmm1, %xmm3
447; SSE42-NEXT:    pxor %xmm2, %xmm3
448; SSE42-NEXT:    pxor %xmm0, %xmm2
449; SSE42-NEXT:    pcmpgtq %xmm3, %xmm2
450; SSE42-NEXT:    psubq %xmm1, %xmm0
451; SSE42-NEXT:    pxor %xmm2, %xmm0
452; SSE42-NEXT:    psubq %xmm0, %xmm2
453; SSE42-NEXT:    movdqa %xmm2, %xmm0
454; SSE42-NEXT:    retq
455;
456; AVX1-LABEL: abd_minmax_v2i64:
457; AVX1:       # %bb.0:
458; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
459; AVX1-NEXT:    # xmm2 = mem[0,0]
460; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
461; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
462; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
463; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
464; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
465; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
466; AVX1-NEXT:    retq
467;
468; AVX2-LABEL: abd_minmax_v2i64:
469; AVX2:       # %bb.0:
470; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
471; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
472; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
473; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
474; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
475; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
476; AVX2-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
477; AVX2-NEXT:    retq
478;
479; AVX512-LABEL: abd_minmax_v2i64:
480; AVX512:       # %bb.0:
481; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm2
482; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
483; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
484; AVX512-NEXT:    retq
485  %min = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
486  %max = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
487  %sub = sub <2 x i64> %max, %min
488  ret <2 x i64> %sub
489}
490
491;
492; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
493;
494
495define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
496; SSE-LABEL: abd_cmp_v16i8:
497; SSE:       # %bb.0:
498; SSE-NEXT:    movdqa %xmm0, %xmm2
499; SSE-NEXT:    pminub %xmm1, %xmm2
500; SSE-NEXT:    pmaxub %xmm1, %xmm0
501; SSE-NEXT:    psubb %xmm2, %xmm0
502; SSE-NEXT:    retq
503;
504; AVX-LABEL: abd_cmp_v16i8:
505; AVX:       # %bb.0:
506; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm2
507; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
508; AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
509; AVX-NEXT:    retq
510  %cmp = icmp ugt <16 x i8> %a, %b
511  %ab = sub <16 x i8> %a, %b
512  %ba = sub <16 x i8> %b, %a
513  %sel = select <16 x i1> %cmp, <16 x i8> %ab, <16 x i8> %ba
514  ret <16 x i8> %sel
515}
516
517define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
518; SSE2-LABEL: abd_cmp_v8i16:
519; SSE2:       # %bb.0:
520; SSE2-NEXT:    movdqa %xmm1, %xmm2
521; SSE2-NEXT:    psubusw %xmm0, %xmm2
522; SSE2-NEXT:    psubusw %xmm1, %xmm0
523; SSE2-NEXT:    por %xmm2, %xmm0
524; SSE2-NEXT:    retq
525;
526; SSE42-LABEL: abd_cmp_v8i16:
527; SSE42:       # %bb.0:
528; SSE42-NEXT:    movdqa %xmm0, %xmm2
529; SSE42-NEXT:    pminuw %xmm1, %xmm2
530; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
531; SSE42-NEXT:    psubw %xmm2, %xmm0
532; SSE42-NEXT:    retq
533;
534; AVX-LABEL: abd_cmp_v8i16:
535; AVX:       # %bb.0:
536; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm2
537; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
538; AVX-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
539; AVX-NEXT:    retq
540  %cmp = icmp uge <8 x i16> %a, %b
541  %ab = sub <8 x i16> %a, %b
542  %ba = sub <8 x i16> %b, %a
543  %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba
544  ret <8 x i16> %sel
545}
546
547define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
548; SSE2-LABEL: abd_cmp_v4i32:
549; SSE2:       # %bb.0:
550; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
551; SSE2-NEXT:    movdqa %xmm1, %xmm3
552; SSE2-NEXT:    pxor %xmm2, %xmm3
553; SSE2-NEXT:    pxor %xmm0, %xmm2
554; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
555; SSE2-NEXT:    psubd %xmm1, %xmm0
556; SSE2-NEXT:    pxor %xmm2, %xmm0
557; SSE2-NEXT:    psubd %xmm0, %xmm2
558; SSE2-NEXT:    movdqa %xmm2, %xmm0
559; SSE2-NEXT:    retq
560;
561; SSE42-LABEL: abd_cmp_v4i32:
562; SSE42:       # %bb.0:
563; SSE42-NEXT:    movdqa %xmm0, %xmm2
564; SSE42-NEXT:    pminud %xmm1, %xmm2
565; SSE42-NEXT:    pmaxud %xmm1, %xmm0
566; SSE42-NEXT:    psubd %xmm2, %xmm0
567; SSE42-NEXT:    retq
568;
569; AVX-LABEL: abd_cmp_v4i32:
570; AVX:       # %bb.0:
571; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm2
572; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
573; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
574; AVX-NEXT:    retq
575  %cmp = icmp ult <4 x i32> %a, %b
576  %ab = sub <4 x i32> %a, %b
577  %ba = sub <4 x i32> %b, %a
578  %sel = select <4 x i1> %cmp, <4 x i32> %ba, <4 x i32> %ab
579  ret <4 x i32> %sel
580}
581
582define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
583; SSE2-LABEL: abd_cmp_v2i64:
584; SSE2:       # %bb.0:
585; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
586; SSE2-NEXT:    movdqa %xmm1, %xmm3
587; SSE2-NEXT:    pxor %xmm2, %xmm3
588; SSE2-NEXT:    pxor %xmm0, %xmm2
589; SSE2-NEXT:    movdqa %xmm2, %xmm4
590; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
591; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
592; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
593; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
594; SSE2-NEXT:    pand %xmm5, %xmm3
595; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
596; SSE2-NEXT:    por %xmm3, %xmm2
597; SSE2-NEXT:    psubq %xmm1, %xmm0
598; SSE2-NEXT:    pxor %xmm2, %xmm0
599; SSE2-NEXT:    psubq %xmm0, %xmm2
600; SSE2-NEXT:    movdqa %xmm2, %xmm0
601; SSE2-NEXT:    retq
602;
603; SSE42-LABEL: abd_cmp_v2i64:
604; SSE42:       # %bb.0:
605; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
606; SSE42-NEXT:    movdqa %xmm1, %xmm3
607; SSE42-NEXT:    pxor %xmm2, %xmm3
608; SSE42-NEXT:    pxor %xmm0, %xmm2
609; SSE42-NEXT:    pcmpgtq %xmm3, %xmm2
610; SSE42-NEXT:    psubq %xmm1, %xmm0
611; SSE42-NEXT:    pxor %xmm2, %xmm0
612; SSE42-NEXT:    psubq %xmm0, %xmm2
613; SSE42-NEXT:    movdqa %xmm2, %xmm0
614; SSE42-NEXT:    retq
615;
616; AVX1-LABEL: abd_cmp_v2i64:
617; AVX1:       # %bb.0:
618; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
619; AVX1-NEXT:    # xmm2 = mem[0,0]
620; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
621; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
622; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
623; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
624; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
625; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
626; AVX1-NEXT:    retq
627;
628; AVX2-LABEL: abd_cmp_v2i64:
629; AVX2:       # %bb.0:
630; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
631; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
632; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
633; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
634; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
635; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
636; AVX2-NEXT:    vpsubq %xmm0, %xmm2, %xmm0
637; AVX2-NEXT:    retq
638;
639; AVX512-LABEL: abd_cmp_v2i64:
640; AVX512:       # %bb.0:
641; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm2
642; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
643; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
644; AVX512-NEXT:    retq
645  %cmp = icmp uge <2 x i64> %a, %b
646  %ab = sub <2 x i64> %a, %b
647  %ba = sub <2 x i64> %b, %a
648  %sel = select <2 x i1> %cmp, <2 x i64> %ab, <2 x i64> %ba
649  ret <2 x i64> %sel
650}
651
652;
653; Special cases
654;
655
656define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwind {
657; SSE2-LABEL: abd_cmp_v2i64_multiuse_cmp:
658; SSE2:       # %bb.0:
659; SSE2-NEXT:    movdqa %xmm0, %xmm2
660; SSE2-NEXT:    psubq %xmm1, %xmm2
661; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
662; SSE2-NEXT:    pxor %xmm3, %xmm1
663; SSE2-NEXT:    pxor %xmm3, %xmm0
664; SSE2-NEXT:    movdqa %xmm0, %xmm3
665; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
666; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
667; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
668; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
669; SSE2-NEXT:    pand %xmm4, %xmm0
670; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
671; SSE2-NEXT:    por %xmm0, %xmm1
672; SSE2-NEXT:    pxor %xmm1, %xmm2
673; SSE2-NEXT:    movdqa %xmm1, %xmm0
674; SSE2-NEXT:    psubq %xmm2, %xmm0
675; SSE2-NEXT:    paddq %xmm1, %xmm0
676; SSE2-NEXT:    retq
677;
678; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
679; SSE42:       # %bb.0:
680; SSE42-NEXT:    movdqa %xmm0, %xmm2
681; SSE42-NEXT:    psubq %xmm1, %xmm2
682; SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
683; SSE42-NEXT:    pxor %xmm3, %xmm1
684; SSE42-NEXT:    pxor %xmm3, %xmm0
685; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
686; SSE42-NEXT:    pxor %xmm0, %xmm2
687; SSE42-NEXT:    movdqa %xmm0, %xmm1
688; SSE42-NEXT:    psubq %xmm2, %xmm1
689; SSE42-NEXT:    paddq %xmm1, %xmm0
690; SSE42-NEXT:    retq
691;
692; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
693; AVX1:       # %bb.0:
694; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm2
695; AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
696; AVX1-NEXT:    # xmm3 = mem[0,0]
697; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
698; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
699; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
700; AVX1-NEXT:    vpxor %xmm0, %xmm2, %xmm1
701; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
702; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
703; AVX1-NEXT:    retq
704;
705; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
706; AVX2:       # %bb.0:
707; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm2
708; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
709; AVX2-NEXT:    vpxor %xmm3, %xmm1, %xmm1
710; AVX2-NEXT:    vpxor %xmm3, %xmm0, %xmm0
711; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
712; AVX2-NEXT:    vpxor %xmm0, %xmm2, %xmm1
713; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
714; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
715; AVX2-NEXT:    retq
716;
717; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
718; AVX512:       # %bb.0:
719; AVX512-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k1
720; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm2
721; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
722; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
723; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
724; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 {%k1}
725; AVX512-NEXT:    retq
726  %cmp = icmp ugt <2 x i64> %a, %b
727  %ab = sub <2 x i64> %a, %b
728  %ba = sub <2 x i64> %b, %a
729  %sel = select <2 x i1> %cmp, <2 x i64> %ab, <2 x i64> %ba
730  %ext = sext <2 x i1> %cmp to <2 x i64>
731  %res = add <2 x i64> %ext, %sel
732  ret <2 x i64> %res
733}
734
735define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwind {
736; SSE2-LABEL: abd_cmp_v8i16_multiuse_sub:
737; SSE2:       # %bb.0:
738; SSE2-NEXT:    movdqa %xmm0, %xmm2
739; SSE2-NEXT:    psubw %xmm1, %xmm2
740; SSE2-NEXT:    movdqa %xmm1, %xmm3
741; SSE2-NEXT:    psubusw %xmm0, %xmm3
742; SSE2-NEXT:    psubusw %xmm1, %xmm0
743; SSE2-NEXT:    por %xmm3, %xmm0
744; SSE2-NEXT:    paddw %xmm2, %xmm0
745; SSE2-NEXT:    retq
746;
747; SSE42-LABEL: abd_cmp_v8i16_multiuse_sub:
748; SSE42:       # %bb.0:
749; SSE42-NEXT:    movdqa %xmm0, %xmm2
750; SSE42-NEXT:    psubw %xmm1, %xmm2
751; SSE42-NEXT:    movdqa %xmm0, %xmm3
752; SSE42-NEXT:    pminuw %xmm1, %xmm3
753; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
754; SSE42-NEXT:    psubw %xmm3, %xmm0
755; SSE42-NEXT:    paddw %xmm2, %xmm0
756; SSE42-NEXT:    retq
757;
758; AVX-LABEL: abd_cmp_v8i16_multiuse_sub:
759; AVX:       # %bb.0:
760; AVX-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
761; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm3
762; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
763; AVX-NEXT:    vpsubw %xmm3, %xmm0, %xmm0
764; AVX-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
765; AVX-NEXT:    retq
766  %cmp = icmp uge <8 x i16> %a, %b
767  %ab = sub <8 x i16> %a, %b
768  %ba = sub <8 x i16> %b, %a
769  %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba
770  %res = add <8 x i16> %ab, %sel
771  ret <8 x i16> %res
772}
773
774declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
775declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
776declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
777declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
778declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
779declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1)
780declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1)
781declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1)
782
783declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
784declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
785declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
786declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
787
788declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
789declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
790declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
791declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
792