1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; Ensure we use the CC result of SVE compare instructions when branching. 5define void @sve_cmplt_setcc(ptr %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) { 6; CHECK-LABEL: sve_cmplt_setcc: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, #0 9; CHECK-NEXT: b.eq .LBB0_2 10; CHECK-NEXT: // %bb.1: // %if.then 11; CHECK-NEXT: st1h { z0.h }, p0, [x0] 12; CHECK-NEXT: .LBB0_2: // %if.end 13; CHECK-NEXT: ret 14entry: 15 %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer) 16 %1 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %0) 17 br i1 %1, label %if.then, label %if.end 18 19if.then: 20 tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> %in, ptr %out, i32 2, <vscale x 8 x i1> %pg) 21 br label %if.end 22 23if.end: 24 ret void 25} 26 27; Ensure we use the inverted CC result of SVE compare instructions when branching. 28define void @sve_cmplt_setcc_inverted(ptr %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) { 29; CHECK-LABEL: sve_cmplt_setcc_inverted: 30; CHECK: // %bb.0: // %entry 31; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, #0 32; CHECK-NEXT: b.ne .LBB1_2 33; CHECK-NEXT: // %bb.1: // %if.then 34; CHECK-NEXT: st1h { z0.h }, p0, [x0] 35; CHECK-NEXT: .LBB1_2: // %if.end 36; CHECK-NEXT: ret 37entry: 38 %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer) 39 %1 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %0) 40 br i1 %1, label %if.end, label %if.then 41 42if.then: 43 tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> %in, ptr %out, i32 2, <vscale x 8 x i1> %pg) 44 br label %if.end 45 46if.end: 47 ret void 48} 49 50; Ensure we combine setcc and csel so as to not end up with an extra compare 51define void @sve_cmplt_setcc_hslo(ptr %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) { 52; CHECK-LABEL: sve_cmplt_setcc_hslo: 53; CHECK: // %bb.0: // %entry 54; CHECK-NEXT: ptrue p1.h 55; CHECK-NEXT: cmplt p2.h, p0/z, z0.h, #0 56; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b 57; CHECK-NEXT: ptest p1, p2.b 58; CHECK-NEXT: b.hs .LBB2_2 59; CHECK-NEXT: // %bb.1: // %if.then 60; CHECK-NEXT: st1h { z0.h }, p0, [x0] 61; CHECK-NEXT: .LBB2_2: // %if.end 62; CHECK-NEXT: ret 63entry: 64 %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer) 65 %1 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %0) 66 br i1 %1, label %if.then, label %if.end 67 68if.then: 69 tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> %in, ptr %out, i32 2, <vscale x 8 x i1> %pg) 70 br label %if.end 71 72if.end: 73 ret void 74} 75 76; Fold away the redundant setcc:: 77; setcc(ne, <all ones>, sext(nxvNi1 ...), splat(0)) 78; -> nxvNi1 ... 79define <vscale x 16 x i1> @sve_cmpne_setcc_all_true_sext(<vscale x 16 x i8> %vec, <vscale x 16 x i1> %pg) { 80; CHECK-LABEL: sve_cmpne_setcc_all_true_sext: 81; CHECK: // %bb.0: 82; CHECK-NEXT: ret 83 %alltrue.ins = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 84 %alltrue = shufflevector <vscale x 16 x i1> %alltrue.ins, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer 85 %pg.sext = sext <vscale x 16 x i1> %pg to <vscale x 16 x i8> 86 %cmp2 = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %alltrue, <vscale x 16 x i8> %pg.sext, <vscale x 16 x i8> zeroinitializer) 87 ret <vscale x 16 x i1> %cmp2 88} 89 90; Fold away the redundant setcc:: 91; setcc(ne, pred, sext(setcc(ne, pred, ..., splat(0))), splat(0)) 92; -> setcc(ne, pred, ..., splat(0)) 93define <vscale x 16 x i1> @sve_cmpne_setcc_equal_pred(<vscale x 16 x i8> %vec, <vscale x 16 x i1> %pg) { 94; CHECK-LABEL: sve_cmpne_setcc_equal_pred: 95; CHECK: // %bb.0: 96; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 97; CHECK-NEXT: ret 98 %cmp1 = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer) 99 %cmp1.sext = sext <vscale x 16 x i1> %cmp1 to <vscale x 16 x i8> 100 %cmp2 = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %cmp1.sext, <vscale x 16 x i8> zeroinitializer) 101 ret <vscale x 16 x i1> %cmp2 102} 103 104; Combine: 105; setcc(ne, pred1, sext(setcc(ne, pred2, ..., splat(0))), splat(0)) 106; -> setcc(ne, and(pred1, pred2), ..., splat(0)) 107define <vscale x 16 x i1> @sve_cmpne_setcc_different_pred(<vscale x 16 x i8> %vec, <vscale x 16 x i1> %pg1, <vscale x 16 x i1> %pg2) { 108; CHECK-LABEL: sve_cmpne_setcc_different_pred: 109; CHECK: // %bb.0: 110; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 111; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 112; CHECK-NEXT: ret 113 %cmp1 = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg1, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer) 114 %cmp1.sext = sext <vscale x 16 x i1> %cmp1 to <vscale x 16 x i8> 115 %cmp2 = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg2, <vscale x 16 x i8> %cmp1.sext, <vscale x 16 x i8> zeroinitializer) 116 ret <vscale x 16 x i1> %cmp2 117} 118 119declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 120 121declare i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 122declare i1 @llvm.aarch64.sve.ptest.last.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 123 124declare <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) 125 126declare void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, i32, <vscale x 8 x i1>) 127