xref: /llvm-project/llvm/test/CodeGen/X86/abdu-vector-512.ll (revision da570ef1b4f856603970ecb14299947fb6cd678a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
4
5;
6; trunc(abs(sub(zext(a),zext(b)))) -> abdu(a,b)
7;
8
9define <64 x i8> @abd_ext_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
10; AVX512BW-LABEL: abd_ext_v64i8:
11; AVX512BW:       # %bb.0:
12; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2
13; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
14; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
15; AVX512BW-NEXT:    retq
16;
17; AVX512DQ-LABEL: abd_ext_v64i8:
18; AVX512DQ:       # %bb.0:
19; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
20; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
21; AVX512DQ-NEXT:    vpminub %ymm2, %ymm3, %ymm4
22; AVX512DQ-NEXT:    vpmaxub %ymm2, %ymm3, %ymm2
23; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
24; AVX512DQ-NEXT:    vpminub %ymm1, %ymm0, %ymm3
25; AVX512DQ-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
26; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
27; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
28; AVX512DQ-NEXT:    retq
29  %aext = zext <64 x i8> %a to <64 x i64>
30  %bext = zext <64 x i8> %b to <64 x i64>
31  %sub = sub <64 x i64> %aext, %bext
32  %abs = call <64 x i64> @llvm.abs.v64i64(<64 x i64> %sub, i1 false)
33  %trunc = trunc <64 x i64> %abs to <64 x i8>
34  ret <64 x i8> %trunc
35}
36
37define <64 x i8> @abd_ext_v64i8_undef(<64 x i8> %a, <64 x i8> %b) nounwind {
38; AVX512BW-LABEL: abd_ext_v64i8_undef:
39; AVX512BW:       # %bb.0:
40; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2
41; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
42; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
43; AVX512BW-NEXT:    retq
44;
45; AVX512DQ-LABEL: abd_ext_v64i8_undef:
46; AVX512DQ:       # %bb.0:
47; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
48; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
49; AVX512DQ-NEXT:    vpminub %ymm2, %ymm3, %ymm4
50; AVX512DQ-NEXT:    vpmaxub %ymm2, %ymm3, %ymm2
51; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
52; AVX512DQ-NEXT:    vpminub %ymm1, %ymm0, %ymm3
53; AVX512DQ-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
54; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
55; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
56; AVX512DQ-NEXT:    retq
57  %aext = zext <64 x i8> %a to <64 x i64>
58  %bext = zext <64 x i8> %b to <64 x i64>
59  %sub = sub <64 x i64> %aext, %bext
60  %abs = call <64 x i64> @llvm.abs.v64i64(<64 x i64> %sub, i1 true)
61  %trunc = trunc <64 x i64> %abs to <64 x i8>
62  ret <64 x i8> %trunc
63}
64
65define <32 x i16> @abd_ext_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
66; AVX512BW-LABEL: abd_ext_v32i16:
67; AVX512BW:       # %bb.0:
68; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2
69; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
70; AVX512BW-NEXT:    vpsubw %zmm2, %zmm0, %zmm0
71; AVX512BW-NEXT:    retq
72;
73; AVX512DQ-LABEL: abd_ext_v32i16:
74; AVX512DQ:       # %bb.0:
75; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
76; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
77; AVX512DQ-NEXT:    vpminuw %ymm2, %ymm3, %ymm4
78; AVX512DQ-NEXT:    vpmaxuw %ymm2, %ymm3, %ymm2
79; AVX512DQ-NEXT:    vpsubw %ymm4, %ymm2, %ymm2
80; AVX512DQ-NEXT:    vpminuw %ymm1, %ymm0, %ymm3
81; AVX512DQ-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
82; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
83; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
84; AVX512DQ-NEXT:    retq
85  %aext = zext <32 x i16> %a to <32 x i64>
86  %bext = zext <32 x i16> %b to <32 x i64>
87  %sub = sub <32 x i64> %aext, %bext
88  %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 false)
89  %trunc = trunc <32 x i64> %abs to <32 x i16>
90  ret <32 x i16> %trunc
91}
92
93define <32 x i16> @abd_ext_v32i16_undef(<32 x i16> %a, <32 x i16> %b) nounwind {
94; AVX512BW-LABEL: abd_ext_v32i16_undef:
95; AVX512BW:       # %bb.0:
96; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2
97; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
98; AVX512BW-NEXT:    vpsubw %zmm2, %zmm0, %zmm0
99; AVX512BW-NEXT:    retq
100;
101; AVX512DQ-LABEL: abd_ext_v32i16_undef:
102; AVX512DQ:       # %bb.0:
103; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
104; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
105; AVX512DQ-NEXT:    vpminuw %ymm2, %ymm3, %ymm4
106; AVX512DQ-NEXT:    vpmaxuw %ymm2, %ymm3, %ymm2
107; AVX512DQ-NEXT:    vpsubw %ymm4, %ymm2, %ymm2
108; AVX512DQ-NEXT:    vpminuw %ymm1, %ymm0, %ymm3
109; AVX512DQ-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
110; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
111; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
112; AVX512DQ-NEXT:    retq
113  %aext = zext <32 x i16> %a to <32 x i64>
114  %bext = zext <32 x i16> %b to <32 x i64>
115  %sub = sub <32 x i64> %aext, %bext
116  %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 true)
117  %trunc = trunc <32 x i64> %abs to <32 x i16>
118  ret <32 x i16> %trunc
119}
120
121define <16 x i32> @abd_ext_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
122; AVX512-LABEL: abd_ext_v16i32:
123; AVX512:       # %bb.0:
124; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm2
125; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
126; AVX512-NEXT:    vpsubd %zmm2, %zmm0, %zmm0
127; AVX512-NEXT:    retq
128  %aext = zext <16 x i32> %a to <16 x i64>
129  %bext = zext <16 x i32> %b to <16 x i64>
130  %sub = sub <16 x i64> %aext, %bext
131  %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 false)
132  %trunc = trunc <16 x i64> %abs to <16 x i32>
133  ret <16 x i32> %trunc
134}
135
136define <16 x i32> @abd_ext_v16i32_undef(<16 x i32> %a, <16 x i32> %b) nounwind {
137; AVX512-LABEL: abd_ext_v16i32_undef:
138; AVX512:       # %bb.0:
139; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm2
140; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
141; AVX512-NEXT:    vpsubd %zmm2, %zmm0, %zmm0
142; AVX512-NEXT:    retq
143  %aext = zext <16 x i32> %a to <16 x i64>
144  %bext = zext <16 x i32> %b to <16 x i64>
145  %sub = sub <16 x i64> %aext, %bext
146  %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 true)
147  %trunc = trunc <16 x i64> %abs to <16 x i32>
148  ret <16 x i32> %trunc
149}
150
151define <8 x i64> @abd_ext_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
152; AVX512-LABEL: abd_ext_v8i64:
153; AVX512:       # %bb.0:
154; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm2
155; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
156; AVX512-NEXT:    vpsubq %zmm2, %zmm0, %zmm0
157; AVX512-NEXT:    retq
158  %aext = zext <8 x i64> %a to <8 x i128>
159  %bext = zext <8 x i64> %b to <8 x i128>
160  %sub = sub <8 x i128> %aext, %bext
161  %abs = call <8 x i128> @llvm.abs.v8i128(<8 x i128> %sub, i1 false)
162  %trunc = trunc <8 x i128> %abs to <8 x i64>
163  ret <8 x i64> %trunc
164}
165
166define <8 x i64> @abd_ext_v8i64_undef(<8 x i64> %a, <8 x i64> %b) nounwind {
167; AVX512-LABEL: abd_ext_v8i64_undef:
168; AVX512:       # %bb.0:
169; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm2
170; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
171; AVX512-NEXT:    vpsubq %zmm2, %zmm0, %zmm0
172; AVX512-NEXT:    retq
173  %aext = zext <8 x i64> %a to <8 x i128>
174  %bext = zext <8 x i64> %b to <8 x i128>
175  %sub = sub <8 x i128> %aext, %bext
176  %abs = call <8 x i128> @llvm.abs.v8i128(<8 x i128> %sub, i1 true)
177  %trunc = trunc <8 x i128> %abs to <8 x i64>
178  ret <8 x i64> %trunc
179}
180
181;
182; sub(umax(a,b),umin(a,b)) -> abdu(a,b)
183;
184
185define <64 x i8> @abd_minmax_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
186; AVX512BW-LABEL: abd_minmax_v64i8:
187; AVX512BW:       # %bb.0:
188; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2
189; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
190; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
191; AVX512BW-NEXT:    retq
192;
193; AVX512DQ-LABEL: abd_minmax_v64i8:
194; AVX512DQ:       # %bb.0:
195; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
196; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
197; AVX512DQ-NEXT:    vpminub %ymm2, %ymm3, %ymm4
198; AVX512DQ-NEXT:    vpmaxub %ymm2, %ymm3, %ymm2
199; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
200; AVX512DQ-NEXT:    vpminub %ymm1, %ymm0, %ymm3
201; AVX512DQ-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
202; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
203; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
204; AVX512DQ-NEXT:    retq
205  %min = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %a, <64 x i8> %b)
206  %max = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %a, <64 x i8> %b)
207  %sub = sub <64 x i8> %max, %min
208  ret <64 x i8> %sub
209}
210
211define <32 x i16> @abd_minmax_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
212; AVX512BW-LABEL: abd_minmax_v32i16:
213; AVX512BW:       # %bb.0:
214; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2
215; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
216; AVX512BW-NEXT:    vpsubw %zmm2, %zmm0, %zmm0
217; AVX512BW-NEXT:    retq
218;
219; AVX512DQ-LABEL: abd_minmax_v32i16:
220; AVX512DQ:       # %bb.0:
221; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
222; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
223; AVX512DQ-NEXT:    vpminuw %ymm2, %ymm3, %ymm4
224; AVX512DQ-NEXT:    vpmaxuw %ymm2, %ymm3, %ymm2
225; AVX512DQ-NEXT:    vpsubw %ymm4, %ymm2, %ymm2
226; AVX512DQ-NEXT:    vpminuw %ymm1, %ymm0, %ymm3
227; AVX512DQ-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
228; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
229; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
230; AVX512DQ-NEXT:    retq
231  %min = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %a, <32 x i16> %b)
232  %max = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %a, <32 x i16> %b)
233  %sub = sub <32 x i16> %max, %min
234  ret <32 x i16> %sub
235}
236
237define <16 x i32> @abd_minmax_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
238; AVX512-LABEL: abd_minmax_v16i32:
239; AVX512:       # %bb.0:
240; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm2
241; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
242; AVX512-NEXT:    vpsubd %zmm2, %zmm0, %zmm0
243; AVX512-NEXT:    retq
244  %min = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %a, <16 x i32> %b)
245  %max = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %a, <16 x i32> %b)
246  %sub = sub <16 x i32> %max, %min
247  ret <16 x i32> %sub
248}
249
250define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
251; AVX512-LABEL: abd_minmax_v8i64:
252; AVX512:       # %bb.0:
253; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm2
254; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
255; AVX512-NEXT:    vpsubq %zmm2, %zmm0, %zmm0
256; AVX512-NEXT:    retq
257  %min = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %a, <8 x i64> %b)
258  %max = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %a, <8 x i64> %b)
259  %sub = sub <8 x i64> %max, %min
260  ret <8 x i64> %sub
261}
262
263;
264; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
265;
266
267define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
268; AVX512BW-LABEL: abd_cmp_v64i8:
269; AVX512BW:       # %bb.0:
270; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2
271; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
272; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
273; AVX512BW-NEXT:    retq
274;
275; AVX512DQ-LABEL: abd_cmp_v64i8:
276; AVX512DQ:       # %bb.0:
277; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
278; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
279; AVX512DQ-NEXT:    vpminub %ymm2, %ymm3, %ymm4
280; AVX512DQ-NEXT:    vpmaxub %ymm2, %ymm3, %ymm2
281; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
282; AVX512DQ-NEXT:    vpminub %ymm1, %ymm0, %ymm3
283; AVX512DQ-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
284; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
285; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
286; AVX512DQ-NEXT:    retq
287  %cmp = icmp ugt <64 x i8> %a, %b
288  %ab = sub <64 x i8> %a, %b
289  %ba = sub <64 x i8> %b, %a
290  %sel = select <64 x i1> %cmp, <64 x i8> %ab, <64 x i8> %ba
291  ret <64 x i8> %sel
292}
293
294define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
295; AVX512BW-LABEL: abd_cmp_v32i16:
296; AVX512BW:       # %bb.0:
297; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2
298; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
299; AVX512BW-NEXT:    vpsubw %zmm2, %zmm0, %zmm0
300; AVX512BW-NEXT:    retq
301;
302; AVX512DQ-LABEL: abd_cmp_v32i16:
303; AVX512DQ:       # %bb.0:
304; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
305; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
306; AVX512DQ-NEXT:    vpminuw %ymm2, %ymm3, %ymm4
307; AVX512DQ-NEXT:    vpmaxuw %ymm2, %ymm3, %ymm2
308; AVX512DQ-NEXT:    vpsubw %ymm4, %ymm2, %ymm2
309; AVX512DQ-NEXT:    vpminuw %ymm1, %ymm0, %ymm3
310; AVX512DQ-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
311; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
312; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
313; AVX512DQ-NEXT:    retq
314  %cmp = icmp uge <32 x i16> %a, %b
315  %ab = sub <32 x i16> %a, %b
316  %ba = sub <32 x i16> %b, %a
317  %sel = select <32 x i1> %cmp, <32 x i16> %ab, <32 x i16> %ba
318  ret <32 x i16> %sel
319}
320
321define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
322; AVX512-LABEL: abd_cmp_v16i32:
323; AVX512:       # %bb.0:
324; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm2
325; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
326; AVX512-NEXT:    vpsubd %zmm2, %zmm0, %zmm0
327; AVX512-NEXT:    retq
328  %cmp = icmp ult <16 x i32> %a, %b
329  %ab = sub <16 x i32> %a, %b
330  %ba = sub <16 x i32> %b, %a
331  %sel = select <16 x i1> %cmp, <16 x i32> %ba, <16 x i32> %ab
332  ret <16 x i32> %sel
333}
334
335define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
336; AVX512-LABEL: abd_cmp_v8i64:
337; AVX512:       # %bb.0:
338; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm2
339; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
340; AVX512-NEXT:    vpsubq %zmm2, %zmm0, %zmm0
341; AVX512-NEXT:    retq
342  %cmp = icmp uge <8 x i64> %a, %b
343  %ab = sub <8 x i64> %a, %b
344  %ba = sub <8 x i64> %b, %a
345  %sel = select <8 x i1> %cmp, <8 x i64> %ab, <8 x i64> %ba
346  ret <8 x i64> %sel
347}
348
349declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1)
350declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1)
351declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1)
352declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1)
353declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1)
354declare <32 x i64> @llvm.abs.v32i64(<32 x i64>, i1)
355declare <64 x i64> @llvm.abs.v64i64(<64 x i64>, i1)
356declare <8 x i128> @llvm.abs.v8i128(<8 x i128>, i1)
357
358declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>)
359declare <32 x i16> @llvm.umax.v32i16(<32 x i16>, <32 x i16>)
360declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>)
361declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>)
362
363declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>)
364declare <32 x i16> @llvm.umin.v32i16(<32 x i16>, <32 x i16>)
365declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>)
366declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>)
367