xref: /llvm-project/llvm/test/CodeGen/AArch64/abdu.ll (revision 854ded9b24ea41ae10e884294b19bf3f80ca49f6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3
4;
5; trunc(abs(sub(zext(a),zext(b)))) -> abdu(a,b)
6;
7
8define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
9; CHECK-LABEL: abd_ext_i8:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    and w8, w0, #0xff
12; CHECK-NEXT:    sub w8, w8, w1, uxtb
13; CHECK-NEXT:    cmp w8, #0
14; CHECK-NEXT:    cneg w0, w8, mi
15; CHECK-NEXT:    ret
16  %aext = zext i8 %a to i64
17  %bext = zext i8 %b to i64
18  %sub = sub i64 %aext, %bext
19  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
20  %trunc = trunc i64 %abs to i8
21  ret i8 %trunc
22}
23
24define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
25; CHECK-LABEL: abd_ext_i8_i16:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    and w8, w0, #0xff
28; CHECK-NEXT:    sub w8, w8, w1, uxth
29; CHECK-NEXT:    cmp w8, #0
30; CHECK-NEXT:    cneg w0, w8, mi
31; CHECK-NEXT:    ret
32  %aext = zext i8 %a to i64
33  %bext = zext i16 %b to i64
34  %sub = sub i64 %aext, %bext
35  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
36  %trunc = trunc i64 %abs to i8
37  ret i8 %trunc
38}
39
40define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
41; CHECK-LABEL: abd_ext_i8_undef:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    and w8, w0, #0xff
44; CHECK-NEXT:    sub w8, w8, w1, uxtb
45; CHECK-NEXT:    cmp w8, #0
46; CHECK-NEXT:    cneg w0, w8, mi
47; CHECK-NEXT:    ret
48  %aext = zext i8 %a to i64
49  %bext = zext i8 %b to i64
50  %sub = sub i64 %aext, %bext
51  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
52  %trunc = trunc i64 %abs to i8
53  ret i8 %trunc
54}
55
56define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
57; CHECK-LABEL: abd_ext_i16:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    and w8, w0, #0xffff
60; CHECK-NEXT:    sub w8, w8, w1, uxth
61; CHECK-NEXT:    cmp w8, #0
62; CHECK-NEXT:    cneg w0, w8, mi
63; CHECK-NEXT:    ret
64  %aext = zext i16 %a to i64
65  %bext = zext i16 %b to i64
66  %sub = sub i64 %aext, %bext
67  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
68  %trunc = trunc i64 %abs to i16
69  ret i16 %trunc
70}
71
72define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
73; CHECK-LABEL: abd_ext_i16_i32:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    and w8, w0, #0xffff
76; CHECK-NEXT:    sub w9, w1, w8
77; CHECK-NEXT:    subs w8, w8, w1
78; CHECK-NEXT:    csel w0, w8, w9, hi
79; CHECK-NEXT:    ret
80  %aext = zext i16 %a to i64
81  %bext = zext i32 %b to i64
82  %sub = sub i64 %aext, %bext
83  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
84  %trunc = trunc i64 %abs to i16
85  ret i16 %trunc
86}
87
88define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
89; CHECK-LABEL: abd_ext_i16_undef:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    and w8, w0, #0xffff
92; CHECK-NEXT:    sub w8, w8, w1, uxth
93; CHECK-NEXT:    cmp w8, #0
94; CHECK-NEXT:    cneg w0, w8, mi
95; CHECK-NEXT:    ret
96  %aext = zext i16 %a to i64
97  %bext = zext i16 %b to i64
98  %sub = sub i64 %aext, %bext
99  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
100  %trunc = trunc i64 %abs to i16
101  ret i16 %trunc
102}
103
104define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
105; CHECK-LABEL: abd_ext_i32:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    sub w8, w1, w0
108; CHECK-NEXT:    subs w9, w0, w1
109; CHECK-NEXT:    csel w0, w9, w8, hi
110; CHECK-NEXT:    ret
111  %aext = zext i32 %a to i64
112  %bext = zext i32 %b to i64
113  %sub = sub i64 %aext, %bext
114  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
115  %trunc = trunc i64 %abs to i32
116  ret i32 %trunc
117}
118
119define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
120; CHECK-LABEL: abd_ext_i32_i16:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    and w8, w1, #0xffff
123; CHECK-NEXT:    sub w9, w8, w0
124; CHECK-NEXT:    subs w8, w0, w8
125; CHECK-NEXT:    csel w0, w8, w9, hi
126; CHECK-NEXT:    ret
127  %aext = zext i32 %a to i64
128  %bext = zext i16 %b to i64
129  %sub = sub i64 %aext, %bext
130  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
131  %trunc = trunc i64 %abs to i32
132  ret i32 %trunc
133}
134
135define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
136; CHECK-LABEL: abd_ext_i32_undef:
137; CHECK:       // %bb.0:
138; CHECK-NEXT:    sub w8, w1, w0
139; CHECK-NEXT:    subs w9, w0, w1
140; CHECK-NEXT:    csel w0, w9, w8, hi
141; CHECK-NEXT:    ret
142  %aext = zext i32 %a to i64
143  %bext = zext i32 %b to i64
144  %sub = sub i64 %aext, %bext
145  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
146  %trunc = trunc i64 %abs to i32
147  ret i32 %trunc
148}
149
150define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
151; CHECK-LABEL: abd_ext_i64:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    sub x8, x1, x0
154; CHECK-NEXT:    subs x9, x0, x1
155; CHECK-NEXT:    csel x0, x9, x8, hi
156; CHECK-NEXT:    ret
157  %aext = zext i64 %a to i128
158  %bext = zext i64 %b to i128
159  %sub = sub i128 %aext, %bext
160  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
161  %trunc = trunc i128 %abs to i64
162  ret i64 %trunc
163}
164
165define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
166; CHECK-LABEL: abd_ext_i64_undef:
167; CHECK:       // %bb.0:
168; CHECK-NEXT:    sub x8, x1, x0
169; CHECK-NEXT:    subs x9, x0, x1
170; CHECK-NEXT:    csel x0, x9, x8, hi
171; CHECK-NEXT:    ret
172  %aext = zext i64 %a to i128
173  %bext = zext i64 %b to i128
174  %sub = sub i128 %aext, %bext
175  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
176  %trunc = trunc i128 %abs to i64
177  ret i64 %trunc
178}
179
180define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
181; CHECK-LABEL: abd_ext_i128:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    subs x8, x0, x2
184; CHECK-NEXT:    sbcs x9, x1, x3
185; CHECK-NEXT:    cset w10, lo
186; CHECK-NEXT:    sbfx x10, x10, #0, #1
187; CHECK-NEXT:    eor x8, x8, x10
188; CHECK-NEXT:    eor x9, x9, x10
189; CHECK-NEXT:    subs x0, x8, x10
190; CHECK-NEXT:    sbc x1, x9, x10
191; CHECK-NEXT:    ret
192  %aext = zext i128 %a to i256
193  %bext = zext i128 %b to i256
194  %sub = sub i256 %aext, %bext
195  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
196  %trunc = trunc i256 %abs to i128
197  ret i128 %trunc
198}
199
200define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
201; CHECK-LABEL: abd_ext_i128_undef:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    subs x8, x0, x2
204; CHECK-NEXT:    sbcs x9, x1, x3
205; CHECK-NEXT:    cset w10, lo
206; CHECK-NEXT:    sbfx x10, x10, #0, #1
207; CHECK-NEXT:    eor x8, x8, x10
208; CHECK-NEXT:    eor x9, x9, x10
209; CHECK-NEXT:    subs x0, x8, x10
210; CHECK-NEXT:    sbc x1, x9, x10
211; CHECK-NEXT:    ret
212  %aext = zext i128 %a to i256
213  %bext = zext i128 %b to i256
214  %sub = sub i256 %aext, %bext
215  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
216  %trunc = trunc i256 %abs to i128
217  ret i128 %trunc
218}
219
220;
221; sub(umax(a,b),umin(a,b)) -> abdu(a,b)
222;
223
224define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
225; CHECK-LABEL: abd_minmax_i8:
226; CHECK:       // %bb.0:
227; CHECK-NEXT:    and w8, w0, #0xff
228; CHECK-NEXT:    sub w8, w8, w1, uxtb
229; CHECK-NEXT:    cmp w8, #0
230; CHECK-NEXT:    cneg w0, w8, mi
231; CHECK-NEXT:    ret
232  %min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
233  %max = call i8 @llvm.umax.i8(i8 %a, i8 %b)
234  %sub = sub i8 %max, %min
235  ret i8 %sub
236}
237
238define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
239; CHECK-LABEL: abd_minmax_i16:
240; CHECK:       // %bb.0:
241; CHECK-NEXT:    and w8, w0, #0xffff
242; CHECK-NEXT:    sub w8, w8, w1, uxth
243; CHECK-NEXT:    cmp w8, #0
244; CHECK-NEXT:    cneg w0, w8, mi
245; CHECK-NEXT:    ret
246  %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
247  %max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
248  %sub = sub i16 %max, %min
249  ret i16 %sub
250}
251
252define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
253; CHECK-LABEL: abd_minmax_i32:
254; CHECK:       // %bb.0:
255; CHECK-NEXT:    sub w8, w1, w0
256; CHECK-NEXT:    subs w9, w0, w1
257; CHECK-NEXT:    csel w0, w9, w8, hi
258; CHECK-NEXT:    ret
259  %min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
260  %max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
261  %sub = sub i32 %max, %min
262  ret i32 %sub
263}
264
265define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
266; CHECK-LABEL: abd_minmax_i64:
267; CHECK:       // %bb.0:
268; CHECK-NEXT:    sub x8, x1, x0
269; CHECK-NEXT:    subs x9, x0, x1
270; CHECK-NEXT:    csel x0, x9, x8, hi
271; CHECK-NEXT:    ret
272  %min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
273  %max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
274  %sub = sub i64 %max, %min
275  ret i64 %sub
276}
277
278define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
279; CHECK-LABEL: abd_minmax_i128:
280; CHECK:       // %bb.0:
281; CHECK-NEXT:    subs x8, x0, x2
282; CHECK-NEXT:    sbcs x9, x1, x3
283; CHECK-NEXT:    cset w10, lo
284; CHECK-NEXT:    sbfx x10, x10, #0, #1
285; CHECK-NEXT:    eor x8, x8, x10
286; CHECK-NEXT:    eor x9, x9, x10
287; CHECK-NEXT:    subs x0, x8, x10
288; CHECK-NEXT:    sbc x1, x9, x10
289; CHECK-NEXT:    ret
290  %min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
291  %max = call i128 @llvm.umax.i128(i128 %a, i128 %b)
292  %sub = sub i128 %max, %min
293  ret i128 %sub
294}
295
296;
297; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
298;
299
300define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
301; CHECK-LABEL: abd_cmp_i8:
302; CHECK:       // %bb.0:
303; CHECK-NEXT:    and w8, w0, #0xff
304; CHECK-NEXT:    sub w8, w8, w1, uxtb
305; CHECK-NEXT:    cmp w8, #0
306; CHECK-NEXT:    cneg w0, w8, mi
307; CHECK-NEXT:    ret
308  %cmp = icmp ugt i8 %a, %b
309  %ab = sub i8 %a, %b
310  %ba = sub i8 %b, %a
311  %sel = select i1 %cmp, i8 %ab, i8 %ba
312  ret i8 %sel
313}
314
315define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
316; CHECK-LABEL: abd_cmp_i16:
317; CHECK:       // %bb.0:
318; CHECK-NEXT:    and w8, w0, #0xffff
319; CHECK-NEXT:    sub w8, w8, w1, uxth
320; CHECK-NEXT:    cmp w8, #0
321; CHECK-NEXT:    cneg w0, w8, mi
322; CHECK-NEXT:    ret
323  %cmp = icmp uge i16 %a, %b
324  %ab = sub i16 %a, %b
325  %ba = sub i16 %b, %a
326  %sel = select i1 %cmp, i16 %ab, i16 %ba
327  ret i16 %sel
328}
329
330define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
331; CHECK-LABEL: abd_cmp_i32:
332; CHECK:       // %bb.0:
333; CHECK-NEXT:    sub w8, w1, w0
334; CHECK-NEXT:    subs w9, w0, w1
335; CHECK-NEXT:    csel w0, w9, w8, hi
336; CHECK-NEXT:    ret
337  %cmp = icmp ult i32 %a, %b
338  %ab = sub i32 %a, %b
339  %ba = sub i32 %b, %a
340  %sel = select i1 %cmp, i32 %ba, i32 %ab
341  ret i32 %sel
342}
343
344define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
345; CHECK-LABEL: abd_cmp_i64:
346; CHECK:       // %bb.0:
347; CHECK-NEXT:    sub x8, x1, x0
348; CHECK-NEXT:    subs x9, x0, x1
349; CHECK-NEXT:    csel x0, x9, x8, hi
350; CHECK-NEXT:    ret
351  %cmp = icmp uge i64 %a, %b
352  %ab = sub i64 %a, %b
353  %ba = sub i64 %b, %a
354  %sel = select i1 %cmp, i64 %ab, i64 %ba
355  ret i64 %sel
356}
357
358define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
359; CHECK-LABEL: abd_cmp_i128:
360; CHECK:       // %bb.0:
361; CHECK-NEXT:    subs x8, x0, x2
362; CHECK-NEXT:    sbcs x9, x1, x3
363; CHECK-NEXT:    cset w10, lo
364; CHECK-NEXT:    sbfx x10, x10, #0, #1
365; CHECK-NEXT:    eor x8, x8, x10
366; CHECK-NEXT:    eor x9, x9, x10
367; CHECK-NEXT:    subs x0, x8, x10
368; CHECK-NEXT:    sbc x1, x9, x10
369; CHECK-NEXT:    ret
370  %cmp = icmp uge i128 %a, %b
371  %ab = sub i128 %a, %b
372  %ba = sub i128 %b, %a
373  %sel = select i1 %cmp, i128 %ab, i128 %ba
374  ret i128 %sel
375}
376
377;
378; negative tests
379;
380
381define i64 @vector_legalized(i16 %a, i16 %b) {
382; CHECK-LABEL: vector_legalized:
383; CHECK:       // %bb.0:
384; CHECK-NEXT:    movi v0.2d, #0000000000000000
385; CHECK-NEXT:    and w8, w0, #0xffff
386; CHECK-NEXT:    sub w8, w8, w1, uxth
387; CHECK-NEXT:    cmp w8, #0
388; CHECK-NEXT:    addp d0, v0.2d
389; CHECK-NEXT:    cneg w8, w8, mi
390; CHECK-NEXT:    fmov x9, d0
391; CHECK-NEXT:    add x0, x9, x8
392; CHECK-NEXT:    ret
393  %ea = zext i16 %a to i32
394  %eb = zext i16 %b to i32
395  %s = sub i32 %ea, %eb
396  %ab = call i32 @llvm.abs.i32(i32 %s, i1 false)
397  %e = zext i32 %ab to i64
398  %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer)
399  %z = add i64 %red, %e
400  ret i64 %z
401}
402
403;
404; sub(select(icmp(a,b),a,b),select(icmp(a,b),b,a)) -> abdu(a,b)
405;
406
407define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
408; CHECK-LABEL: abd_select_i8:
409; CHECK:       // %bb.0:
410; CHECK-NEXT:    and w8, w0, #0xff
411; CHECK-NEXT:    sub w8, w8, w1, uxtb
412; CHECK-NEXT:    cmp w8, #0
413; CHECK-NEXT:    cneg w0, w8, mi
414; CHECK-NEXT:    ret
415  %cmp = icmp ult i8 %a, %b
416  %ab = select i1 %cmp, i8 %a, i8 %b
417  %ba = select i1 %cmp, i8 %b, i8 %a
418  %sub = sub i8 %ba, %ab
419  ret i8 %sub
420}
421
422define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
423; CHECK-LABEL: abd_select_i16:
424; CHECK:       // %bb.0:
425; CHECK-NEXT:    and w8, w0, #0xffff
426; CHECK-NEXT:    sub w8, w8, w1, uxth
427; CHECK-NEXT:    cmp w8, #0
428; CHECK-NEXT:    cneg w0, w8, mi
429; CHECK-NEXT:    ret
430  %cmp = icmp ule i16 %a, %b
431  %ab = select i1 %cmp, i16 %a, i16 %b
432  %ba = select i1 %cmp, i16 %b, i16 %a
433  %sub = sub i16 %ba, %ab
434  ret i16 %sub
435}
436
437define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
438; CHECK-LABEL: abd_select_i32:
439; CHECK:       // %bb.0:
440; CHECK-NEXT:    sub w8, w1, w0
441; CHECK-NEXT:    subs w9, w0, w1
442; CHECK-NEXT:    csel w0, w9, w8, hi
443; CHECK-NEXT:    ret
444  %cmp = icmp ugt i32 %a, %b
445  %ab = select i1 %cmp, i32 %a, i32 %b
446  %ba = select i1 %cmp, i32 %b, i32 %a
447  %sub = sub i32 %ab, %ba
448  ret i32 %sub
449}
450
451define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
452; CHECK-LABEL: abd_select_i64:
453; CHECK:       // %bb.0:
454; CHECK-NEXT:    sub x8, x1, x0
455; CHECK-NEXT:    subs x9, x0, x1
456; CHECK-NEXT:    csel x0, x9, x8, hi
457; CHECK-NEXT:    ret
458  %cmp = icmp uge i64 %a, %b
459  %ab = select i1 %cmp, i64 %a, i64 %b
460  %ba = select i1 %cmp, i64 %b, i64 %a
461  %sub = sub i64 %ab, %ba
462  ret i64 %sub
463}
464
465define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
466; CHECK-LABEL: abd_select_i128:
467; CHECK:       // %bb.0:
468; CHECK-NEXT:    subs x8, x0, x2
469; CHECK-NEXT:    sbcs x9, x1, x3
470; CHECK-NEXT:    cset w10, lo
471; CHECK-NEXT:    sbfx x10, x10, #0, #1
472; CHECK-NEXT:    eor x8, x8, x10
473; CHECK-NEXT:    eor x9, x9, x10
474; CHECK-NEXT:    subs x0, x8, x10
475; CHECK-NEXT:    sbc x1, x9, x10
476; CHECK-NEXT:    ret
477  %cmp = icmp ult i128 %a, %b
478  %ab = select i1 %cmp, i128 %a, i128 %b
479  %ba = select i1 %cmp, i128 %b, i128 %a
480  %sub = sub i128 %ba, %ab
481  ret i128 %sub
482}
483
484declare i8 @llvm.abs.i8(i8, i1)
485declare i16 @llvm.abs.i16(i16, i1)
486declare i32 @llvm.abs.i32(i32, i1)
487declare i64 @llvm.abs.i64(i64, i1)
488declare i128 @llvm.abs.i128(i128, i1)
489
490declare i8 @llvm.umax.i8(i8, i8)
491declare i16 @llvm.umax.i16(i16, i16)
492declare i32 @llvm.umax.i32(i32, i32)
493declare i64 @llvm.umax.i64(i64, i64)
494
495declare i8 @llvm.umin.i8(i8, i8)
496declare i16 @llvm.umin.i16(i16, i16)
497declare i32 @llvm.umin.i32(i32, i32)
498declare i64 @llvm.umin.i64(i64, i64)
499