1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mattr=+sve < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6 7define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) { 8; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs: 9; CHECK: // %bb.0: 10; CHECK-NEXT: mov w8, v0.s[1] 11; CHECK-NEXT: cmp w8, #5 12; CHECK-NEXT: cset w0, lo 13; CHECK-NEXT: ret 14 %icmp = icmp ult <4 x i32> %a, splat (i32 5) 15 %ext = extractelement <4 x i1> %icmp, i32 1 16 ret i1 %ext 17} 18 19define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) { 20; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs: 21; CHECK: // %bb.0: 22; CHECK-NEXT: mov w8, v0.s[1] 23; CHECK-NEXT: cmp w8, #7 24; CHECK-NEXT: cset w0, hi 25; CHECK-NEXT: ret 26 %icmp = icmp ult <4 x i32> splat(i32 7), %a 27 %ext = extractelement <4 x i1> %icmp, i32 1 28 ret i1 %ext 29} 30 31define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) { 32; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs: 33; CHECK: // %bb.0: 34; CHECK-NEXT: mov w8, v0.s[1] 35; CHECK-NEXT: cmp w8, #234 36; CHECK-NEXT: cset w0, lo 37; CHECK-NEXT: ret 38 %icmp = icmp ult <4 x i32> %a, <i32 5, i32 234, i32 -1, i32 7> 39 %ext = extractelement <4 x i1> %icmp, i32 1 40 ret i1 %ext 41} 42 43define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) { 44; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs: 45; CHECK: // %bb.0: 46; CHECK-NEXT: mov s0, v0.s[1] 47; CHECK-NEXT: fmov s1, #4.00000000 48; CHECK-NEXT: fcmp s0, s1 49; CHECK-NEXT: cset w0, lt 50; CHECK-NEXT: ret 51 %fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0) 52 %ext = extractelement <4 x i1> %fcmp, i32 1 53 ret i1 %ext 54} 55 56; Tests the code in ExpandIntRes_SETCC 57define i128 @extract_icmp_v1i128(ptr %p) { 58; CHECK-LABEL: extract_icmp_v1i128: 59; CHECK: // %bb.0: 60; CHECK-NEXT: ldp x9, x8, [x0] 61; CHECK-NEXT: orr x8, x9, x8 62; CHECK-NEXT: cmp x8, #0 63; CHECK-NEXT: cset w8, eq 64; CHECK-NEXT: sbfx x0, x8, #0, #1 65; CHECK-NEXT: mov x1, x0 66; CHECK-NEXT: ret 67 %load = load <1 x i128>, ptr %p, align 16 68 %cmp = icmp eq <1 x i128> %load, zeroinitializer 69 %sext = sext <1 x i1> %cmp to <1 x i128> 70 %res = extractelement <1 x i128> %sext, i32 0 71 ret i128 %res 72} 73 74define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) { 75; CHECK-LABEL: vector_loop_with_icmp: 76; CHECK: // %bb.0: // %entry 77; CHECK-NEXT: index z0.d, #0, #1 78; CHECK-NEXT: mov w8, #2 // =0x2 79; CHECK-NEXT: mov w9, #16 // =0x10 80; CHECK-NEXT: dup v1.2d, x8 81; CHECK-NEXT: add x8, x0, #4 82; CHECK-NEXT: mov w10, #1 // =0x1 83; CHECK-NEXT: b .LBB5_2 84; CHECK-NEXT: .LBB5_1: // %pred.store.continue6 85; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 86; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 87; CHECK-NEXT: subs x9, x9, #2 88; CHECK-NEXT: add x8, x8, #8 89; CHECK-NEXT: b.eq .LBB5_6 90; CHECK-NEXT: .LBB5_2: // %vector.body 91; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 92; CHECK-NEXT: fmov x11, d0 93; CHECK-NEXT: cmp x11, #14 94; CHECK-NEXT: b.hi .LBB5_4 95; CHECK-NEXT: // %bb.3: // %pred.store.if 96; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 97; CHECK-NEXT: stur w10, [x8, #-4] 98; CHECK-NEXT: .LBB5_4: // %pred.store.continue 99; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 100; CHECK-NEXT: mov x11, v0.d[1] 101; CHECK-NEXT: cmp x11, #14 102; CHECK-NEXT: b.hi .LBB5_1 103; CHECK-NEXT: // %bb.5: // %pred.store.if5 104; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 105; CHECK-NEXT: str w10, [x8] 106; CHECK-NEXT: b .LBB5_1 107; CHECK-NEXT: .LBB5_6: // %for.cond.cleanup 108; CHECK-NEXT: ret 109entry: 110 br label %vector.body 111 112vector.body: 113 %index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue6 ] 114 %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %entry ], [ %vec.ind.next, %pred.store.continue6 ] 115 %vec.cmp = icmp ult <2 x i64> %vec.ind, <i64 15, i64 15> 116 %c0 = extractelement <2 x i1> %vec.cmp, i64 0 117 br i1 %c0, label %pred.store.if, label %pred.store.continue 118 119pred.store.if: 120 %arrayidx = getelementptr inbounds i32, ptr %dest, i64 %index 121 store i32 1, ptr %arrayidx, align 4 122 br label %pred.store.continue 123 124pred.store.continue: 125 %c1 = extractelement <2 x i1> %vec.cmp, i64 1 126 br i1 %c1, label %pred.store.if5, label %pred.store.continue6 127 128pred.store.if5: 129 %indexp1 = or disjoint i64 %index, 1 130 %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %indexp1 131 store i32 1, ptr %arrayidx2, align 4 132 br label %pred.store.continue6 133 134pred.store.continue6: 135 %index.next = add i64 %index, 2 136 %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2> 137 %index.cmp = icmp eq i64 %index.next, 16 138 br i1 %index.cmp, label %for.cond.cleanup, label %vector.body 139 140for.cond.cleanup: 141 ret void 142} 143 144 145; TODO: Combine the sbfx(cset) into a csetm 146define i32 @issue_121372(<4 x i32> %v) { 147; CHECK-LABEL: issue_121372: 148; CHECK: // %bb.0: 149; CHECK-NEXT: fmov w8, s0 150; CHECK-NEXT: cmp w8, #0 151; CHECK-NEXT: cset w8, eq 152; CHECK-NEXT: sbfx w8, w8, #0, #1 153; CHECK-NEXT: cmp w8, #1 154; CHECK-NEXT: csetm w0, lt 155; CHECK-NEXT: ret 156 %cmp_ule = icmp ule <4 x i32> %v, zeroinitializer 157 %sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32> 158 %cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1 159 %ext = extractelement <4 x i1> %cmp_sge, i32 0 160 %res = sext i1 %ext to i32 161 ret i32 %res 162} 163 164 165; Negative tests 166 167define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) { 168; CHECK-LABEL: extract_icmp_v4i32_splat_rhs: 169; CHECK: // %bb.0: 170; CHECK-NEXT: dup v1.4s, w0 171; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s 172; CHECK-NEXT: xtn v0.4h, v0.4s 173; CHECK-NEXT: umov w8, v0.h[1] 174; CHECK-NEXT: and w0, w8, #0x1 175; CHECK-NEXT: ret 176 %ins = insertelement <4 x i32> poison, i32 %b, i32 0 177 %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer 178 %icmp = icmp ult <4 x i32> %a, %splat 179 %ext = extractelement <4 x i1> %icmp, i32 1 180 ret i1 %ext 181} 182 183define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) { 184; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use: 185; CHECK: // %bb.0: 186; CHECK-NEXT: movi v1.4s, #235 187; CHECK-NEXT: adrp x9, .LCPI8_0 188; CHECK-NEXT: mov x8, x0 189; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_0] 190; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s 191; CHECK-NEXT: xtn v1.4h, v0.4s 192; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 193; CHECK-NEXT: addv s0, v0.4s 194; CHECK-NEXT: umov w9, v1.h[1] 195; CHECK-NEXT: fmov w10, s0 196; CHECK-NEXT: and w0, w9, #0x1 197; CHECK-NEXT: strb w10, [x8] 198; CHECK-NEXT: ret 199 %icmp = icmp ult <4 x i32> %a, splat(i32 235) 200 %ext = extractelement <4 x i1> %icmp, i32 1 201 store <4 x i1> %icmp, ptr %p, align 4 202 ret i1 %ext 203} 204 205define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) { 206; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx: 207; CHECK: // %bb.0: 208; CHECK-NEXT: sub sp, sp, #16 209; CHECK-NEXT: .cfi_def_cfa_offset 16 210; CHECK-NEXT: movi v1.4s, #127 211; CHECK-NEXT: add x8, sp, #8 212; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 213; CHECK-NEXT: bfi x8, x0, #1, #2 214; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s 215; CHECK-NEXT: xtn v0.4h, v0.4s 216; CHECK-NEXT: str d0, [sp, #8] 217; CHECK-NEXT: ldrh w8, [x8] 218; CHECK-NEXT: and w0, w8, #0x1 219; CHECK-NEXT: add sp, sp, #16 220; CHECK-NEXT: ret 221 %icmp = icmp ult <4 x i32> %a, splat(i32 127) 222 %ext = extractelement <4 x i1> %icmp, i32 %c 223 ret i1 %ext 224} 225 226