xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll (revision 2238363a5f3081274b54dd0d1b0ff297e6940ba5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s
3
4; This is the analogue of AArch64's file of the same name. It's mostly testing
5; some form of correct lowering occurs, the tests are a little artificial but I
6; strongly suspect there's room for improved CodeGen (FIXME).
7
8define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
9; CHECK-LABEL: test_sext_extr_cmp_0:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
12; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
13; CHECK-NEXT:    fmov x8, d1
14; CHECK-NEXT:    fmov x9, d0
15; CHECK-NEXT:    cmp x9, x8
16; CHECK-NEXT:    cset w8, ge
17; CHECK-NEXT:    sbfx x0, x8, #0, #1
18; CHECK-NEXT:    ret
19  %1 = icmp sge <1 x i64> %v1, %v2
20  %2 = extractelement <1 x i1> %1, i32 0
21  %vget_lane = sext i1 %2 to i64
22  ret i64 %vget_lane
23}
24
25define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
26; CHECK-LABEL: test_sext_extr_cmp_1:
27; CHECK:       // %bb.0:
28; CHECK-NEXT:    fcmp d0, d1
29; CHECK-NEXT:    cset w8, eq
30; CHECK-NEXT:    sbfx x0, x8, #0, #1
31; CHECK-NEXT:    ret
32  %1 = fcmp oeq <1 x double> %v1, %v2
33  %2 = extractelement <1 x i1> %1, i32 0
34  %vget_lane = sext i1 %2 to i64
35  ret i64 %vget_lane
36}
37
38define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
39; CHECK-LABEL: test_select_v1i1_0:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    cmeq d0, d0, d1
42; CHECK-NEXT:    bic v0.8b, v2.8b, v0.8b
43; CHECK-NEXT:    ret
44  %1 = icmp eq <1 x i64> %v1, %v2
45  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
46  ret <1 x i64> %res
47}
48
49define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
50; CHECK-LABEL: test_select_v1i1_1:
51; CHECK:       // %bb.0:
52; CHECK-NEXT:    fcmeq d0, d0, d1
53; CHECK-NEXT:    bic v0.8b, v2.8b, v0.8b
54; CHECK-NEXT:    ret
55  %1 = fcmp oeq <1 x double> %v1, %v2
56  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
57  ret <1 x i64> %res
58}
59
60define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
61; CHECK-LABEL: test_select_v1i1_2:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    cmeq d0, d0, d1
64; CHECK-NEXT:    bic v0.8b, v2.8b, v0.8b
65; CHECK-NEXT:    ret
66  %1 = icmp eq <1 x i64> %v1, %v2
67  %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
68  ret <1 x double> %res
69}
70
71; For v1i64, it's not clear which of the vector or scalar compare is better.
72; Let's stick to the vector form, like for all other vector selects fed by a
73; scalar setcc.  If anything, it exposes more ILP.
74define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
75; CHECK-LABEL: test_select_v1i1_3:
76; CHECK:       // %bb.0:
77; CHECK-NEXT:    fmov d1, x1
78; CHECK-NEXT:    fmov d2, x0
79; CHECK-NEXT:    cmeq d1, d2, d1
80; CHECK-NEXT:    bic v0.8b, v0.8b, v1.8b
81; CHECK-NEXT:    ret
82  %tst = icmp eq i64 %lhs, %rhs
83  %evil = insertelement <1 x i1> undef, i1 %tst, i32 0
84  %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3
85  ret <1 x i64> %res
86}
87
88define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
89; CHECK-LABEL: test_br_extr_cmp:
90; CHECK:       // %bb.0: // %common.ret
91; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
92; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
93; CHECK-NEXT:    fmov x8, d1
94; CHECK-NEXT:    fmov x9, d0
95; CHECK-NEXT:    cmp x9, x8
96; CHECK-NEXT:    cset w0, eq
97; CHECK-NEXT:    ret
98  %1 = icmp eq <1 x i64> %v1, %v2
99  %2 = extractelement <1 x i1> %1, i32 0
100  br i1 %2, label %if.end, label %if.then
101
102if.then:
103  ret i32 0;
104
105if.end:
106  ret i32 1;
107}
108
109
110define <1 x float> @test_vselect_f32(<1 x float> %i105, <1 x float> %in) {
111; CHECK-LABEL: test_vselect_f32:
112; CHECK:       // %bb.0:
113; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
114; CHECK-NEXT:    fcmp s0, s0
115; CHECK-NEXT:    cset w8, vs
116; CHECK-NEXT:    fmov s2, w8
117; CHECK-NEXT:    shl v2.2s, v2.2s, #31
118; CHECK-NEXT:    cmlt v2.2s, v2.2s, #0
119; CHECK-NEXT:    bit v0.8b, v1.8b, v2.8b
120; CHECK-NEXT:    ret
121  %i179 = fcmp uno <1 x float> %i105, zeroinitializer
122  %i180 = select <1 x i1> %i179, <1 x float> %in, <1 x float> %i105
123  ret <1 x float> %i180
124}
125
126define <1 x half> @test_vselect_f16(<1 x half> %i105, <1 x half> %in) {
127; CHECK-LABEL: test_vselect_f16:
128; CHECK:       // %bb.0:
129; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
130; CHECK-NEXT:    fcvt s2, h0
131; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
132; CHECK-NEXT:    fcmp s2, s2
133; CHECK-NEXT:    fcsel s0, s1, s0, vs
134; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
135; CHECK-NEXT:    ret
136  %i179 = fcmp uno <1 x half> %i105, zeroinitializer
137  %i180 = select <1 x i1> %i179, <1 x half> %in, <1 x half> %i105
138  ret <1 x half> %i180
139}
140
141define <1 x half> @test_select_f16(half %a, half %b, <1 x half> %c, <1 x half> %d ) {
142; CHECK-LABEL: test_select_f16:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    fcvt s1, h1
145; CHECK-NEXT:    fcvt s0, h0
146; CHECK-NEXT:    // kill: def $h3 killed $h3 def $s3
147; CHECK-NEXT:    // kill: def $h2 killed $h2 def $s2
148; CHECK-NEXT:    fcmp s0, s1
149; CHECK-NEXT:    fcsel s0, s2, s3, eq
150; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
151; CHECK-NEXT:    ret
152  %cmp31 = fcmp oeq half %a, %b
153  %e = select i1 %cmp31, <1 x half> %c, <1 x half> %d
154  ret <1 x half> %e
155}
156
157define <1 x i16> @test_vselect_f16_i16(<1 x half> %i105, <1 x half> %in, <1 x i16> %x, <1 x i16> %y) {
158; CHECK-LABEL: test_vselect_f16_i16:
159; CHECK:       // %bb.0:
160; CHECK-NEXT:    fcvt s0, h0
161; CHECK-NEXT:    fcmp s0, s0
162; CHECK-NEXT:    cset w8, vs
163; CHECK-NEXT:    fmov s0, w8
164; CHECK-NEXT:    shl v0.4h, v0.4h, #15
165; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
166; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
167; CHECK-NEXT:    ret
168  %i179 = fcmp uno <1 x half> %i105, zeroinitializer
169  %i180 = select <1 x i1> %i179, <1 x i16> %x, <1 x i16> %y
170  ret <1 x i16> %i180
171}
172
173define <1 x i16> @test_select_f16_i16(half %i105, half %in, <1 x i16> %x, <1 x i16> %y) {
174; CHECK-LABEL: test_select_f16_i16:
175; CHECK:       // %bb.0:
176; CHECK-NEXT:    fcvt s0, h0
177; CHECK-NEXT:    fcmp s0, s0
178; CHECK-NEXT:    csetm w8, vs
179; CHECK-NEXT:    dup v0.4h, w8
180; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
181; CHECK-NEXT:    ret
182  %i179 = fcmp uno half %i105, zeroinitializer
183  %i180 = select i1 %i179, <1 x i16> %x, <1 x i16> %y
184  ret <1 x i16> %i180
185}
186
187define <1 x i32> @test_vselect_f16_i32(<1 x half> %i105, <1 x half> %in, <1 x i32> %x, <1 x i32> %y) {
188; CHECK-LABEL: test_vselect_f16_i32:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    fcvt s0, h0
191; CHECK-NEXT:    fcmp s0, s0
192; CHECK-NEXT:    cset w8, vs
193; CHECK-NEXT:    fmov s0, w8
194; CHECK-NEXT:    shl v0.2s, v0.2s, #31
195; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
196; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
197; CHECK-NEXT:    ret
198  %i179 = fcmp uno <1 x half> %i105, zeroinitializer
199  %i180 = select <1 x i1> %i179, <1 x i32> %x, <1 x i32> %y
200  ret <1 x i32> %i180
201}
202
203define i64 @test_sext_extr_cmp_half(<1 x half> %v1, <1 x half> %v2) {
204; CHECK-LABEL: test_sext_extr_cmp_half:
205; CHECK:       // %bb.0:
206; CHECK-NEXT:    fcvt s1, h1
207; CHECK-NEXT:    fcvt s0, h0
208; CHECK-NEXT:    fcmp s0, s1
209; CHECK-NEXT:    cset w8, eq
210; CHECK-NEXT:    sbfx x0, x8, #0, #1
211; CHECK-NEXT:    ret
212  %1 = fcmp oeq <1 x half> %v1, %v2
213  %2 = extractelement <1 x i1> %1, i32 0
214  %vget_lane = sext i1 %2 to i64
215  ret i64 %vget_lane
216}
217
218define <1 x i64> @test_select_v1i1_half(half %lhs, half %rhs, <1 x i64> %v3) {
219; CHECK-LABEL: test_select_v1i1_half:
220; CHECK:       // %bb.0:
221; CHECK-NEXT:    fcvt s1, h1
222; CHECK-NEXT:    fcvt s0, h0
223; CHECK-NEXT:    fcmp s0, s1
224; CHECK-NEXT:    csetm x8, eq
225; CHECK-NEXT:    fmov d0, x8
226; CHECK-NEXT:    bic v0.8b, v2.8b, v0.8b
227; CHECK-NEXT:    ret
228  %tst = fcmp oeq half %lhs, %rhs
229  %evil = insertelement <1 x i1> undef, i1 %tst, i32 0
230  %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3
231  ret <1 x i64> %res
232}
233
234define i32 @test_br_extr_cmp_half(<1 x half> %v1, <1 x half> %v2) {
235; CHECK-LABEL: test_br_extr_cmp_half:
236; CHECK:       // %bb.0: // %common.ret
237; CHECK-NEXT:    fcvt s1, h1
238; CHECK-NEXT:    fcvt s0, h0
239; CHECK-NEXT:    fcmp s0, s1
240; CHECK-NEXT:    cset w0, eq
241; CHECK-NEXT:    ret
242  %1 = fcmp oeq <1 x half> %v1, %v2
243  %2 = extractelement <1 x i1> %1, i32 0
244  br i1 %2, label %if.end, label %if.then
245
246if.then:
247  ret i32 0;
248
249if.end:
250  ret i32 1;
251}
252