xref: /llvm-project/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll (revision 50bfa85d7968690aa305ae1b30f0214ced64c4d7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mattr=+sve < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6
7define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) {
8; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    mov w8, v0.s[1]
11; CHECK-NEXT:    cmp w8, #5
12; CHECK-NEXT:    cset w0, lo
13; CHECK-NEXT:    ret
14  %icmp = icmp ult <4 x i32> %a, splat (i32 5)
15  %ext = extractelement <4 x i1> %icmp, i32 1
16  ret i1 %ext
17}
18
19define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) {
20; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    mov w8, v0.s[1]
23; CHECK-NEXT:    cmp w8, #7
24; CHECK-NEXT:    cset w0, hi
25; CHECK-NEXT:    ret
26  %icmp = icmp ult <4 x i32> splat(i32 7), %a
27  %ext = extractelement <4 x i1> %icmp, i32 1
28  ret i1 %ext
29}
30
31define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) {
32; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    mov w8, v0.s[1]
35; CHECK-NEXT:    cmp w8, #234
36; CHECK-NEXT:    cset w0, lo
37; CHECK-NEXT:    ret
38  %icmp = icmp ult <4 x i32> %a, <i32 5, i32 234, i32 -1, i32 7>
39  %ext = extractelement <4 x i1> %icmp, i32 1
40  ret i1 %ext
41}
42
43define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) {
44; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    mov s0, v0.s[1]
47; CHECK-NEXT:    fmov s1, #4.00000000
48; CHECK-NEXT:    fcmp s0, s1
49; CHECK-NEXT:    cset w0, lt
50; CHECK-NEXT:    ret
51  %fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0)
52  %ext = extractelement <4 x i1> %fcmp, i32 1
53  ret i1 %ext
54}
55
56; Tests the code in ExpandIntRes_SETCC
57define i128 @extract_icmp_v1i128(ptr %p) {
58; CHECK-LABEL: extract_icmp_v1i128:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    ldp x9, x8, [x0]
61; CHECK-NEXT:    orr x8, x9, x8
62; CHECK-NEXT:    cmp x8, #0
63; CHECK-NEXT:    cset w8, eq
64; CHECK-NEXT:    sbfx x0, x8, #0, #1
65; CHECK-NEXT:    mov x1, x0
66; CHECK-NEXT:    ret
67  %load = load <1 x i128>, ptr %p, align 16
68  %cmp = icmp eq <1 x i128> %load, zeroinitializer
69  %sext = sext <1 x i1> %cmp to <1 x i128>
70  %res = extractelement <1 x i128> %sext, i32 0
71  ret i128 %res
72}
73
74define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
75; CHECK-LABEL: vector_loop_with_icmp:
76; CHECK:       // %bb.0: // %entry
77; CHECK-NEXT:    index z0.d, #0, #1
78; CHECK-NEXT:    mov w8, #2 // =0x2
79; CHECK-NEXT:    mov w9, #16 // =0x10
80; CHECK-NEXT:    dup v1.2d, x8
81; CHECK-NEXT:    add x8, x0, #4
82; CHECK-NEXT:    mov w10, #1 // =0x1
83; CHECK-NEXT:    b .LBB5_2
84; CHECK-NEXT:  .LBB5_1: // %pred.store.continue6
85; CHECK-NEXT:    // in Loop: Header=BB5_2 Depth=1
86; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
87; CHECK-NEXT:    subs x9, x9, #2
88; CHECK-NEXT:    add x8, x8, #8
89; CHECK-NEXT:    b.eq .LBB5_6
90; CHECK-NEXT:  .LBB5_2: // %vector.body
91; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
92; CHECK-NEXT:    fmov x11, d0
93; CHECK-NEXT:    cmp x11, #14
94; CHECK-NEXT:    b.hi .LBB5_4
95; CHECK-NEXT:  // %bb.3: // %pred.store.if
96; CHECK-NEXT:    // in Loop: Header=BB5_2 Depth=1
97; CHECK-NEXT:    stur w10, [x8, #-4]
98; CHECK-NEXT:  .LBB5_4: // %pred.store.continue
99; CHECK-NEXT:    // in Loop: Header=BB5_2 Depth=1
100; CHECK-NEXT:    mov x11, v0.d[1]
101; CHECK-NEXT:    cmp x11, #14
102; CHECK-NEXT:    b.hi .LBB5_1
103; CHECK-NEXT:  // %bb.5: // %pred.store.if5
104; CHECK-NEXT:    // in Loop: Header=BB5_2 Depth=1
105; CHECK-NEXT:    str w10, [x8]
106; CHECK-NEXT:    b .LBB5_1
107; CHECK-NEXT:  .LBB5_6: // %for.cond.cleanup
108; CHECK-NEXT:    ret
109entry:
110  br label %vector.body
111
112vector.body:
113  %index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue6 ]
114  %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %entry ], [ %vec.ind.next, %pred.store.continue6 ]
115  %vec.cmp = icmp ult <2 x i64> %vec.ind, <i64 15, i64 15>
116  %c0 = extractelement <2 x i1> %vec.cmp, i64 0
117  br i1 %c0, label %pred.store.if, label %pred.store.continue
118
119pred.store.if:
120  %arrayidx = getelementptr inbounds i32, ptr %dest, i64 %index
121  store i32 1, ptr %arrayidx, align 4
122  br label %pred.store.continue
123
124pred.store.continue:
125  %c1 = extractelement <2 x i1> %vec.cmp, i64 1
126  br i1 %c1, label %pred.store.if5, label %pred.store.continue6
127
128pred.store.if5:
129  %indexp1 = or disjoint i64 %index, 1
130  %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %indexp1
131  store i32 1, ptr %arrayidx2, align 4
132  br label %pred.store.continue6
133
134pred.store.continue6:
135  %index.next = add i64 %index, 2
136  %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
137  %index.cmp = icmp eq i64 %index.next, 16
138  br i1 %index.cmp, label %for.cond.cleanup, label %vector.body
139
140for.cond.cleanup:
141  ret void
142}
143
144
145; TODO: Combine the sbfx(cset) into a csetm
146define i32 @issue_121372(<4 x i32> %v) {
147; CHECK-LABEL: issue_121372:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    fmov w8, s0
150; CHECK-NEXT:    cmp w8, #0
151; CHECK-NEXT:    cset w8, eq
152; CHECK-NEXT:    sbfx w8, w8, #0, #1
153; CHECK-NEXT:    cmp w8, #1
154; CHECK-NEXT:    csetm w0, lt
155; CHECK-NEXT:    ret
156  %cmp_ule = icmp ule <4 x i32> %v, zeroinitializer
157  %sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32>
158  %cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1
159  %ext = extractelement <4 x i1> %cmp_sge, i32 0
160  %res = sext i1 %ext to i32
161  ret i32 %res
162}
163
164
165; Negative tests
166
167define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
168; CHECK-LABEL: extract_icmp_v4i32_splat_rhs:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    dup v1.4s, w0
171; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
172; CHECK-NEXT:    xtn v0.4h, v0.4s
173; CHECK-NEXT:    umov w8, v0.h[1]
174; CHECK-NEXT:    and w0, w8, #0x1
175; CHECK-NEXT:    ret
176  %ins = insertelement <4 x i32> poison, i32 %b, i32 0
177  %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
178  %icmp = icmp ult <4 x i32> %a, %splat
179  %ext = extractelement <4 x i1> %icmp, i32 1
180  ret i1 %ext
181}
182
183define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
184; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
185; CHECK:       // %bb.0:
186; CHECK-NEXT:    movi v1.4s, #235
187; CHECK-NEXT:    adrp x9, .LCPI8_0
188; CHECK-NEXT:    mov x8, x0
189; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI8_0]
190; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
191; CHECK-NEXT:    xtn v1.4h, v0.4s
192; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
193; CHECK-NEXT:    addv s0, v0.4s
194; CHECK-NEXT:    umov w9, v1.h[1]
195; CHECK-NEXT:    fmov w10, s0
196; CHECK-NEXT:    and w0, w9, #0x1
197; CHECK-NEXT:    strb w10, [x8]
198; CHECK-NEXT:    ret
199  %icmp = icmp ult <4 x i32> %a, splat(i32 235)
200  %ext = extractelement <4 x i1> %icmp, i32 1
201  store <4 x i1> %icmp, ptr %p, align 4
202  ret i1 %ext
203}
204
205define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) {
206; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx:
207; CHECK:       // %bb.0:
208; CHECK-NEXT:    sub sp, sp, #16
209; CHECK-NEXT:    .cfi_def_cfa_offset 16
210; CHECK-NEXT:    movi v1.4s, #127
211; CHECK-NEXT:    add x8, sp, #8
212; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
213; CHECK-NEXT:    bfi x8, x0, #1, #2
214; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
215; CHECK-NEXT:    xtn v0.4h, v0.4s
216; CHECK-NEXT:    str d0, [sp, #8]
217; CHECK-NEXT:    ldrh w8, [x8]
218; CHECK-NEXT:    and w0, w8, #0x1
219; CHECK-NEXT:    add sp, sp, #16
220; CHECK-NEXT:    ret
221  %icmp = icmp ult <4 x i32> %a, splat(i32 127)
222  %ext = extractelement <4 x i1> %icmp, i32 %c
223  ret i1 %ext
224}
225
226