xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-bsl.ll (revision 37a92f9f60fc2f77264b06c5602a61aaa5196edb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK
3
4define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
5; CHECK-LABEL: bsl:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    mov z2.s, #0x7fffffff
8; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
9; CHECK-NEXT:    ret
10  %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
11  %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
12  %c = or <vscale x 4 x i32> %1, %2
13  ret <vscale x 4 x i32> %c
14}
15
16define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
17; CHECK-LABEL: bsl_add_sub:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
20; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
21; CHECK-NEXT:    mov z0.d, z1.d
22; CHECK-NEXT:    ret
23  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
24  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
25  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
26  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
27  %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
28  ret <vscale x 4 x i32> %bsl0000
29}
30
31; we are not expecting bsl instruction here. the constants do not match to fold to bsl.
32define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
33; CHECK-LABEL: no_bsl_fold:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    and z0.s, z0.s, #0x7fffffff
36; CHECK-NEXT:    and z1.s, z1.s, #0x7ffffffe
37; CHECK-NEXT:    orr z0.d, z0.d, z1.d
38; CHECK-NEXT:    ret
39  %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
40  %2 = and <vscale x 4 x i32> %b, splat(i32 2147483646)
41  %c = or <vscale x 4 x i32> %1, %2
42  ret <vscale x 4 x i32> %c
43}
44
45define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
46; CHECK-LABEL: nbsl_i8:
47; CHECK:       // %bb.0:
48; CHECK-NEXT:    mov z2.b, #127 // =0x7f
49; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
50; CHECK-NEXT:    ret
51  %1 = and <vscale x 16 x i8> %a, splat(i8 127)
52  %2 = and <vscale x 16 x i8> %b, splat(i8 -128)
53  %3 = or <vscale x 16 x i8> %1, %2
54  %4 = xor <vscale x 16 x i8> %3, splat(i8 -1)
55  ret <vscale x 16 x i8> %4
56}
57
58define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
59; CHECK-LABEL: nbsl_i16:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
62; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
63; CHECK-NEXT:    ret
64  %1 = and <vscale x 8 x i16> %a, splat(i16 32767)
65  %2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
66  %3 = or <vscale x 8 x i16> %1, %2
67  %4 = xor <vscale x 8 x i16> %3, splat(i16 -1)
68  ret <vscale x 8 x i16> %4
69}
70
71define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
72; CHECK-LABEL: nbsl_i32:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    mov z2.s, #0x7fffffff
75; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
76; CHECK-NEXT:    ret
77  %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
78  %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
79  %3 = or <vscale x 4 x i32> %1, %2
80  %4 = xor <vscale x 4 x i32> %3, splat(i32 -1)
81  ret <vscale x 4 x i32> %4
82}
83
84define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
85; CHECK-LABEL: nbsl_i64:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
88; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
89; CHECK-NEXT:    ret
90  %1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
91  %2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
92  %3 = or <vscale x 2 x i64> %1, %2
93  %4 = xor <vscale x 2 x i64> %3, splat(i64 -1)
94  ret <vscale x 2 x i64> %4
95}
96