1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -prefer-predicate-over-epilogue=scalar-epilogue -mtriple aarch64-unknown-linux-gnu \ 3; RUN: -mattr=+sve -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s 4 5define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %cond, i64 %N){ 6; CHECK-LABEL: @cond_fadd( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 9; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 10; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 11; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 12; CHECK: vector.ph: 13; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 14; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 17; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 18; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 19; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 20; CHECK: vector.body: 21; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 22; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 23; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 24; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] 25; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 26; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 27; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) 28; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] 29; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 30; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) 31; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> zeroinitializer 32; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP12]]) 33; CHECK-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]] 34; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 35; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 36; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 37; CHECK: middle.block: 38; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 39; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 40; CHECK: scalar.ph: 41; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 42; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] 43; CHECK-NEXT: br label [[FOR_BODY:%.*]] 44; CHECK: for.body: 45; CHECK-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], [[FOR_INC:%.*]] ] 46; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] 47; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDVARS]] 48; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 49; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP16]], 2.000000e+00 50; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 51; CHECK: if.then: 52; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS]] 53; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 54; CHECK-NEXT: [[FADD:%.*]] = fadd fast float [[RDX]], [[TMP17]] 55; CHECK-NEXT: br label [[FOR_INC]] 56; CHECK: for.inc: 57; CHECK-NEXT: [[RES]] = phi float [ [[FADD]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 58; CHECK-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 59; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[N]] 60; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 61; CHECK: for.end: 62; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 63; CHECK-NEXT: ret float [[RES_LCSSA]] 64; 65entry: 66 br label %for.body 67 68for.body: 69 %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %for.inc ] 70 %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ] 71 %arrayidx = getelementptr inbounds float, ptr %cond, i64 %indvars 72 %0 = load float, ptr %arrayidx 73 %tobool = fcmp une float %0, 2.000000e+00 74 br i1 %tobool, label %if.then, label %for.inc 75 76if.then: 77 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars 78 %1 = load float, ptr %arrayidx2 79 %fadd = fadd fast float %rdx, %1 80 br label %for.inc 81 82for.inc: 83 %res = phi float [ %fadd, %if.then ], [ %rdx, %for.body ] 84 %indvars.next = add nuw nsw i64 %indvars, 1 85 %exitcond.not = icmp eq i64 %indvars.next, %N 86 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 87 88for.end: 89 ret float %res 90} 91 92define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { 93; CHECK-LABEL: @cond_cmp_sel( 94; CHECK-NEXT: entry: 95; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 96; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 97; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 98; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 99; CHECK: vector.ph: 100; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 101; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 102; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 103; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 104; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 105; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 106; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 107; CHECK: vector.body: 108; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 109; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] 110; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 111; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] 112; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 113; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 114; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 115; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] 116; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 117; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) 118; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float 0x47EFFFFFE0000000) 119; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP12]]) 120; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] 121; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] 122; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 123; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 124; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 125; CHECK: middle.block: 126; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 127; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 128; CHECK: scalar.ph: 129; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 130; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] 131; CHECK-NEXT: br label [[FOR_BODY:%.*]] 132; CHECK: for.body: 133; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 134; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[RES:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 135; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]] 136; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 137; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP15]], 3.000000e+00 138; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 139; CHECK: if.then: 140; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 141; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 142; CHECK-NEXT: [[FCMP:%.*]] = fcmp fast olt float [[RDX]], [[TMP16]] 143; CHECK-NEXT: [[FSEL:%.*]] = select fast i1 [[FCMP]], float [[RDX]], float [[TMP16]] 144; CHECK-NEXT: br label [[FOR_INC]] 145; CHECK: for.inc: 146; CHECK-NEXT: [[RES]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[FSEL]], [[IF_THEN]] ] 147; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 148; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 149; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 150; CHECK: for.end: 151; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 152; CHECK-NEXT: ret float [[RES_LCSSA]] 153; 154entry: 155 br label %for.body 156 157for.body: 158 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 159 %rdx = phi float [ %res, %for.inc ], [ 1.000000e+00, %entry ] 160 %arrayidx = getelementptr inbounds float, ptr %cond, i64 %iv 161 %0 = load float, ptr %arrayidx 162 %tobool = fcmp une float %0, 3.000000e+00 163 br i1 %tobool, label %if.then, label %for.inc 164 165if.then: 166 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %iv 167 %1 = load float, ptr %arrayidx2 168 %fcmp = fcmp fast olt float %rdx, %1 169 %fsel = select fast i1 %fcmp, float %rdx, float %1 170 br label %for.inc 171 172for.inc: 173 %res = phi float [ %rdx, %for.body ], [ %fsel, %if.then ] 174 %iv.next = add i64 %iv, 1 175 %exitcond.not = icmp eq i64 %iv.next, %N 176 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 177 178for.end: 179 ret float %res 180} 181 182!0 = distinct !{!0, !1} 183!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 184