1; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \ 2; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s --check-prefix=CHECK-VF4IC1 3; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S \ 4; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 5 6target triple = "aarch64-linux-gnu" 7 8define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { 9; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp 10; CHECK-VF4IC1: vector.body: 11; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] 12; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> 13; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3) 14; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true) 15; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 16; CHECK-VF4IC1: middle.block: 17; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) 18; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] 19; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 20; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec 21 22; CHECK-VF4IC4-LABEL: @select_const_i32_from_icmp 23; CHECK-VF4IC4: vector.body: 24; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] 25; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] 26; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] 27; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] 28; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) 29; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) 30; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) 31; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) 32; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP1]], splat (i1 true) 33; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP2]], splat (i1 true) 34; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP3]], splat (i1 true) 35; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP4]], splat (i1 true) 36; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <vscale x 4 x i1> [[VEC_PHI1]], [[NOT1]] 37; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <vscale x 4 x i1> [[VEC_PHI2]], [[NOT2]] 38; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <vscale x 4 x i1> [[VEC_PHI3]], [[NOT3]] 39; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <vscale x 4 x i1> [[VEC_PHI4]], [[NOT4]] 40; CHECK-VF4IC4: middle.block: 41; CHECK-VF4IC4-NEXT: [[OR1:%.*]] = or <vscale x 4 x i1> [[VEC_SEL2]], [[VEC_SEL1]] 42; CHECK-VF4IC4-NEXT: [[OR2:%.*]] = or <vscale x 4 x i1> [[VEC_SEL3]], [[OR1]] 43; CHECK-VF4IC4-NEXT: [[OR3:%.*]] = or <vscale x 4 x i1> [[VEC_SEL4]], [[OR2]] 44; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[OR3]]) 45; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] 46; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 47; CHECK-VF4IC4-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 48entry: 49 br label %for.body 50 51for.body: ; preds = %entry, %for.body 52 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 53 %1 = phi i32 [ 3, %entry ], [ %5, %for.body ] 54 %2 = getelementptr inbounds i32, ptr %v, i64 %0 55 %3 = load i32, ptr %2, align 4 56 %4 = icmp eq i32 %3, 3 57 %5 = select i1 %4, i32 %1, i32 7 58 %6 = add nuw nsw i64 %0, 1 59 %7 = icmp eq i64 %6, %n 60 br i1 %7, label %exit, label %for.body, !llvm.loop !0 61 62exit: ; preds = %for.body 63 ret i32 %5 64} 65 66define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { 67; CHECK-VF4IC1-LABEL: @select_i32_from_icmp 68; CHECK-VF4IC1: vector.ph: 69; CHECK-VF4IC1-NOT: shufflevector <vscale x 4 x i32> 70; CHECK-VF4IC1-NOT: shufflevector <vscale x 4 x i32> 71; CHECK-VF4IC1: vector.body: 72; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] 73; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> 74; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3) 75; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true) 76; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 77; CHECK-VF4IC1: middle.block: 78; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) 79; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] 80; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 %b, i32 %a 81; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec 82 83; CHECK-VF4IC4-LABEL: @select_i32_from_icmp 84; CHECK-VF4IC4: vector.body: 85entry: 86 br label %for.body 87 88for.body: ; preds = %entry, %for.body 89 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 90 %1 = phi i32 [ %a, %entry ], [ %5, %for.body ] 91 %2 = getelementptr inbounds i32, ptr %v, i64 %0 92 %3 = load i32, ptr %2, align 4 93 %4 = icmp eq i32 %3, 3 94 %5 = select i1 %4, i32 %1, i32 %b 95 %6 = add nuw nsw i64 %0, 1 96 %7 = icmp eq i64 %6, %n 97 br i1 %7, label %exit, label %for.body, !llvm.loop !0 98 99exit: ; preds = %for.body 100 ret i32 %5 101} 102 103define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { 104; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp 105; CHECK-VF4IC1: vector.body: 106; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] 107; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x float> 108; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) 109; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true) 110; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 111; CHECK-VF4IC1: middle.block: 112; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) 113; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] 114; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 2 115; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec 116 117; CHECK-VF4IC4-LABEL: @select_const_i32_from_fcmp 118; CHECK-VF4IC4: vector.body: 119entry: 120 br label %for.body 121 122for.body: ; preds = %entry, %for.body 123 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 124 %1 = phi i32 [ 2, %entry ], [ %5, %for.body ] 125 %2 = getelementptr inbounds float, ptr %v, i64 %0 126 %3 = load float, ptr %2, align 4 127 %4 = fcmp fast ueq float %3, 3.0 128 %5 = select i1 %4, i32 %1, i32 1 129 %6 = add nuw nsw i64 %0, 1 130 %7 = icmp eq i64 %6, %n 131 br i1 %7, label %exit, label %for.body, !llvm.loop !0 132 133exit: ; preds = %for.body 134 ret i32 %5 135} 136 137define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { 138; CHECK-VF4IC1-LABEL: @select_const_f32_from_icmp 139; CHECK-VF4IC1-NOT: vector.body 140; CHECK-VF4IC4-LABEL: @select_const_f32_from_icmp 141; CHECK-VF4IC4-NOT: vector.body 142entry: 143 br label %for.body 144 145for.body: ; preds = %entry, %for.body 146 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 147 %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ] 148 %2 = getelementptr inbounds i32, ptr %v, i64 %0 149 %3 = load i32, ptr %2, align 4 150 %4 = icmp eq i32 %3, 3 151 %5 = select fast i1 %4, float %1, float 7.0 152 %6 = add nuw nsw i64 %0, 1 153 %7 = icmp eq i64 %6, %n 154 br i1 %7, label %exit, label %for.body, !llvm.loop !0 155 156exit: ; preds = %for.body 157 ret float %5 158} 159 160define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { 161; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp 162; CHECK-VF4IC1: vector.body: 163; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] 164; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> 165; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 35) 166; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison) 167; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], splat (i32 2) 168; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]] 169; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i1> [[VEC_SEL_TMP]], <vscale x 4 x i1> [[VEC_PHI]] 170; CHECK-VF4IC1: middle.block: 171; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) 172; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] 173; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 0 174; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec 175 176; CHECK-VF4IC4-LABEL: @pred_select_const_i32_from_icmp 177; CHECK-VF4IC4: vector.body: 178entry: 179 br label %for.body 180 181for.body: ; preds = %entry, %for.inc 182 %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] 183 %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ] 184 %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %i.013 185 %0 = load i32, ptr %arrayidx, align 4 186 %cmp1 = icmp sgt i32 %0, 35 187 br i1 %cmp1, label %if.then, label %for.inc 188 189if.then: ; preds = %for.body 190 %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %i.013 191 %1 = load i32, ptr %arrayidx2, align 4 192 %cmp3 = icmp eq i32 %1, 2 193 %spec.select = select i1 %cmp3, i32 1, i32 %r.012 194 br label %for.inc 195 196for.inc: ; preds = %if.then, %for.body 197 %r.1 = phi i32 [ %r.012, %for.body ], [ %spec.select, %if.then ] 198 %inc = add nuw nsw i64 %i.013, 1 199 %exitcond.not = icmp eq i64 %inc, %n 200 br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0 201 202for.end.loopexit: ; preds = %for.inc 203 %r.1.lcssa = phi i32 [ %r.1, %for.inc ] 204 ret i32 %r.1.lcssa 205} 206 207 208attributes #0 = { "target-features"="+sve" } 209 210!0 = distinct !{!0, !1} 211!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 212